diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,238448 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 34058, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.936167713899818e-05, + "grad_norm": 0.0, + "learning_rate": 1.9569471624266147e-08, + "loss": 2.2598, + "step": 1 + }, + { + "epoch": 5.872335427799636e-05, + "grad_norm": 0.0, + "learning_rate": 3.9138943248532294e-08, + "loss": 2.2451, + "step": 2 + }, + { + "epoch": 8.808503141699454e-05, + "grad_norm": 0.0, + "learning_rate": 5.870841487279844e-08, + "loss": 2.2461, + "step": 3 + }, + { + "epoch": 0.00011744670855599271, + "grad_norm": 0.0, + "learning_rate": 7.827788649706459e-08, + "loss": 2.2344, + "step": 4 + }, + { + "epoch": 0.0001468083856949909, + "grad_norm": 0.0, + "learning_rate": 9.784735812133072e-08, + "loss": 2.1484, + "step": 5 + }, + { + "epoch": 0.00017617006283398909, + "grad_norm": 0.0, + "learning_rate": 1.1741682974559687e-07, + "loss": 2.2324, + "step": 6 + }, + { + "epoch": 0.00020553173997298724, + "grad_norm": 0.0, + "learning_rate": 1.36986301369863e-07, + "loss": 2.1338, + "step": 7 + }, + { + "epoch": 0.00023489341711198543, + "grad_norm": 0.0, + "learning_rate": 1.5655577299412917e-07, + "loss": 2.2461, + "step": 8 + }, + { + "epoch": 0.0002642550942509836, + "grad_norm": 0.0, + "learning_rate": 1.761252446183953e-07, + "loss": 2.3486, + "step": 9 + }, + { + "epoch": 0.0002936167713899818, + "grad_norm": 0.0, + "learning_rate": 1.9569471624266145e-07, + "loss": 2.2461, + "step": 10 + }, + { + "epoch": 0.00032297844852898, + "grad_norm": 0.0, + "learning_rate": 2.152641878669276e-07, + "loss": 2.1377, + "step": 11 + }, + { + "epoch": 0.00035234012566797817, + "grad_norm": 0.0, + "learning_rate": 2.3483365949119375e-07, + "loss": 2.2236, + "step": 12 + }, + { + "epoch": 0.00038170180280697636, + "grad_norm": 0.0, + "learning_rate": 2.544031311154599e-07, + "loss": 2.0938, + "step": 13 + }, + { + "epoch": 0.0004110634799459745, + "grad_norm": 0.0, + "learning_rate": 2.73972602739726e-07, + "loss": 2.1182, + "step": 14 + }, + { + "epoch": 0.0004404251570849727, + "grad_norm": 0.0, + "learning_rate": 2.9354207436399216e-07, + "loss": 2.2012, + "step": 15 + }, + { + "epoch": 0.00046978683422397086, + "grad_norm": 0.0, + "learning_rate": 3.1311154598825835e-07, + "loss": 2.0645, + "step": 16 + }, + { + "epoch": 0.000499148511362969, + "grad_norm": 0.0, + "learning_rate": 3.326810176125245e-07, + "loss": 2.1201, + "step": 17 + }, + { + "epoch": 0.0005285101885019672, + "grad_norm": 0.0, + "learning_rate": 3.522504892367906e-07, + "loss": 1.9824, + "step": 18 + }, + { + "epoch": 0.0005578718656409654, + "grad_norm": 0.0, + "learning_rate": 3.7181996086105676e-07, + "loss": 2.1006, + "step": 19 + }, + { + "epoch": 0.0005872335427799636, + "grad_norm": 0.0, + "learning_rate": 3.913894324853229e-07, + "loss": 1.9834, + "step": 20 + }, + { + "epoch": 0.0006165952199189618, + "grad_norm": 0.0, + "learning_rate": 4.1095890410958903e-07, + "loss": 1.8633, + "step": 21 + }, + { + "epoch": 0.00064595689705796, + "grad_norm": 0.0, + "learning_rate": 4.305283757338552e-07, + "loss": 2.0, + "step": 22 + }, + { + "epoch": 0.0006753185741969582, + "grad_norm": 0.0, + "learning_rate": 4.5009784735812136e-07, + "loss": 1.9756, + "step": 23 + }, + { + "epoch": 0.0007046802513359563, + "grad_norm": 0.0, + "learning_rate": 4.696673189823875e-07, + "loss": 2.0586, + "step": 24 + }, + { + "epoch": 0.0007340419284749545, + "grad_norm": 0.0, + "learning_rate": 4.892367906066536e-07, + "loss": 1.9658, + "step": 25 + }, + { + "epoch": 0.0007634036056139527, + "grad_norm": 0.0, + "learning_rate": 5.088062622309198e-07, + "loss": 1.9688, + "step": 26 + }, + { + "epoch": 0.0007927652827529509, + "grad_norm": 0.0, + "learning_rate": 5.283757338551859e-07, + "loss": 1.8594, + "step": 27 + }, + { + "epoch": 0.000822126959891949, + "grad_norm": 0.0, + "learning_rate": 5.47945205479452e-07, + "loss": 1.9131, + "step": 28 + }, + { + "epoch": 0.0008514886370309472, + "grad_norm": 0.0, + "learning_rate": 5.675146771037182e-07, + "loss": 1.9023, + "step": 29 + }, + { + "epoch": 0.0008808503141699453, + "grad_norm": 0.0, + "learning_rate": 5.870841487279843e-07, + "loss": 1.8662, + "step": 30 + }, + { + "epoch": 0.0009102119913089435, + "grad_norm": 0.0, + "learning_rate": 6.066536203522505e-07, + "loss": 1.8086, + "step": 31 + }, + { + "epoch": 0.0009395736684479417, + "grad_norm": 0.0, + "learning_rate": 6.262230919765167e-07, + "loss": 1.873, + "step": 32 + }, + { + "epoch": 0.0009689353455869399, + "grad_norm": 0.0, + "learning_rate": 6.457925636007828e-07, + "loss": 1.875, + "step": 33 + }, + { + "epoch": 0.000998297022725938, + "grad_norm": 0.0, + "learning_rate": 6.65362035225049e-07, + "loss": 1.7793, + "step": 34 + }, + { + "epoch": 0.0010276586998649363, + "grad_norm": 0.0, + "learning_rate": 6.849315068493151e-07, + "loss": 1.8086, + "step": 35 + }, + { + "epoch": 0.0010570203770039345, + "grad_norm": 0.0, + "learning_rate": 7.045009784735812e-07, + "loss": 1.8047, + "step": 36 + }, + { + "epoch": 0.0010863820541429326, + "grad_norm": 0.0, + "learning_rate": 7.240704500978474e-07, + "loss": 1.7256, + "step": 37 + }, + { + "epoch": 0.0011157437312819308, + "grad_norm": 0.0, + "learning_rate": 7.436399217221135e-07, + "loss": 1.6152, + "step": 38 + }, + { + "epoch": 0.001145105408420929, + "grad_norm": 0.0, + "learning_rate": 7.632093933463797e-07, + "loss": 1.6104, + "step": 39 + }, + { + "epoch": 0.0011744670855599272, + "grad_norm": 0.0, + "learning_rate": 7.827788649706458e-07, + "loss": 1.708, + "step": 40 + }, + { + "epoch": 0.0012038287626989254, + "grad_norm": 0.0, + "learning_rate": 8.023483365949119e-07, + "loss": 1.7031, + "step": 41 + }, + { + "epoch": 0.0012331904398379236, + "grad_norm": 0.0, + "learning_rate": 8.219178082191781e-07, + "loss": 1.6582, + "step": 42 + }, + { + "epoch": 0.0012625521169769218, + "grad_norm": 0.0, + "learning_rate": 8.414872798434442e-07, + "loss": 1.8291, + "step": 43 + }, + { + "epoch": 0.00129191379411592, + "grad_norm": 0.0, + "learning_rate": 8.610567514677104e-07, + "loss": 1.751, + "step": 44 + }, + { + "epoch": 0.0013212754712549181, + "grad_norm": 0.0, + "learning_rate": 8.806262230919766e-07, + "loss": 1.7949, + "step": 45 + }, + { + "epoch": 0.0013506371483939163, + "grad_norm": 0.0, + "learning_rate": 9.001956947162427e-07, + "loss": 1.7227, + "step": 46 + }, + { + "epoch": 0.0013799988255329145, + "grad_norm": 0.0, + "learning_rate": 9.197651663405089e-07, + "loss": 1.7354, + "step": 47 + }, + { + "epoch": 0.0014093605026719127, + "grad_norm": 0.0, + "learning_rate": 9.39334637964775e-07, + "loss": 1.6592, + "step": 48 + }, + { + "epoch": 0.0014387221798109109, + "grad_norm": 0.0, + "learning_rate": 9.589041095890411e-07, + "loss": 1.7832, + "step": 49 + }, + { + "epoch": 0.001468083856949909, + "grad_norm": 0.0, + "learning_rate": 9.784735812133073e-07, + "loss": 1.7822, + "step": 50 + }, + { + "epoch": 0.0014974455340889072, + "grad_norm": 0.0, + "learning_rate": 9.980430528375734e-07, + "loss": 1.6436, + "step": 51 + }, + { + "epoch": 0.0015268072112279054, + "grad_norm": 0.0, + "learning_rate": 1.0176125244618395e-06, + "loss": 1.6973, + "step": 52 + }, + { + "epoch": 0.0015561688883669036, + "grad_norm": 0.0, + "learning_rate": 1.0371819960861057e-06, + "loss": 1.6133, + "step": 53 + }, + { + "epoch": 0.0015855305655059018, + "grad_norm": 0.0, + "learning_rate": 1.0567514677103718e-06, + "loss": 1.5918, + "step": 54 + }, + { + "epoch": 0.0016148922426449, + "grad_norm": 0.0, + "learning_rate": 1.076320939334638e-06, + "loss": 1.6914, + "step": 55 + }, + { + "epoch": 0.001644253919783898, + "grad_norm": 0.0, + "learning_rate": 1.095890410958904e-06, + "loss": 1.6318, + "step": 56 + }, + { + "epoch": 0.0016736155969228961, + "grad_norm": 0.0, + "learning_rate": 1.1154598825831702e-06, + "loss": 1.6826, + "step": 57 + }, + { + "epoch": 0.0017029772740618943, + "grad_norm": 0.0, + "learning_rate": 1.1350293542074364e-06, + "loss": 1.6543, + "step": 58 + }, + { + "epoch": 0.0017323389512008925, + "grad_norm": 0.0, + "learning_rate": 1.1545988258317025e-06, + "loss": 1.6523, + "step": 59 + }, + { + "epoch": 0.0017617006283398907, + "grad_norm": 0.0, + "learning_rate": 1.1741682974559686e-06, + "loss": 1.6396, + "step": 60 + }, + { + "epoch": 0.0017910623054788889, + "grad_norm": 0.0, + "learning_rate": 1.1937377690802348e-06, + "loss": 1.6777, + "step": 61 + }, + { + "epoch": 0.001820423982617887, + "grad_norm": 0.0, + "learning_rate": 1.213307240704501e-06, + "loss": 1.5918, + "step": 62 + }, + { + "epoch": 0.0018497856597568852, + "grad_norm": 0.0, + "learning_rate": 1.2328767123287673e-06, + "loss": 1.6855, + "step": 63 + }, + { + "epoch": 0.0018791473368958834, + "grad_norm": 0.0, + "learning_rate": 1.2524461839530334e-06, + "loss": 1.6133, + "step": 64 + }, + { + "epoch": 0.0019085090140348816, + "grad_norm": 0.0, + "learning_rate": 1.2720156555772995e-06, + "loss": 1.6699, + "step": 65 + }, + { + "epoch": 0.0019378706911738798, + "grad_norm": 0.0, + "learning_rate": 1.2915851272015657e-06, + "loss": 1.6367, + "step": 66 + }, + { + "epoch": 0.001967232368312878, + "grad_norm": 0.0, + "learning_rate": 1.3111545988258318e-06, + "loss": 1.6895, + "step": 67 + }, + { + "epoch": 0.001996594045451876, + "grad_norm": 0.0, + "learning_rate": 1.330724070450098e-06, + "loss": 1.6572, + "step": 68 + }, + { + "epoch": 0.0020259557225908746, + "grad_norm": 0.0, + "learning_rate": 1.350293542074364e-06, + "loss": 1.7549, + "step": 69 + }, + { + "epoch": 0.0020553173997298725, + "grad_norm": 0.0, + "learning_rate": 1.3698630136986302e-06, + "loss": 1.5371, + "step": 70 + }, + { + "epoch": 0.002084679076868871, + "grad_norm": 0.0, + "learning_rate": 1.3894324853228964e-06, + "loss": 1.7383, + "step": 71 + }, + { + "epoch": 0.002114040754007869, + "grad_norm": 0.0, + "learning_rate": 1.4090019569471625e-06, + "loss": 1.665, + "step": 72 + }, + { + "epoch": 0.0021434024311468673, + "grad_norm": 0.0, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.6211, + "step": 73 + }, + { + "epoch": 0.0021727641082858653, + "grad_norm": 0.0, + "learning_rate": 1.4481409001956948e-06, + "loss": 1.6777, + "step": 74 + }, + { + "epoch": 0.0022021257854248633, + "grad_norm": 0.0, + "learning_rate": 1.467710371819961e-06, + "loss": 1.6367, + "step": 75 + }, + { + "epoch": 0.0022314874625638617, + "grad_norm": 0.0, + "learning_rate": 1.487279843444227e-06, + "loss": 1.6924, + "step": 76 + }, + { + "epoch": 0.0022608491397028596, + "grad_norm": 0.0, + "learning_rate": 1.5068493150684932e-06, + "loss": 1.6318, + "step": 77 + }, + { + "epoch": 0.002290210816841858, + "grad_norm": 0.0, + "learning_rate": 1.5264187866927593e-06, + "loss": 1.6504, + "step": 78 + }, + { + "epoch": 0.002319572493980856, + "grad_norm": 0.0, + "learning_rate": 1.5459882583170254e-06, + "loss": 1.4863, + "step": 79 + }, + { + "epoch": 0.0023489341711198544, + "grad_norm": 0.0, + "learning_rate": 1.5655577299412916e-06, + "loss": 1.7607, + "step": 80 + }, + { + "epoch": 0.0023782958482588524, + "grad_norm": 0.0, + "learning_rate": 1.5851272015655577e-06, + "loss": 1.6113, + "step": 81 + }, + { + "epoch": 0.0024076575253978508, + "grad_norm": 0.0, + "learning_rate": 1.6046966731898239e-06, + "loss": 1.5332, + "step": 82 + }, + { + "epoch": 0.0024370192025368487, + "grad_norm": 0.0, + "learning_rate": 1.62426614481409e-06, + "loss": 1.5322, + "step": 83 + }, + { + "epoch": 0.002466380879675847, + "grad_norm": 0.0, + "learning_rate": 1.6438356164383561e-06, + "loss": 1.5186, + "step": 84 + }, + { + "epoch": 0.002495742556814845, + "grad_norm": 0.0, + "learning_rate": 1.6634050880626223e-06, + "loss": 1.5674, + "step": 85 + }, + { + "epoch": 0.0025251042339538435, + "grad_norm": 0.0, + "learning_rate": 1.6829745596868884e-06, + "loss": 1.4863, + "step": 86 + }, + { + "epoch": 0.0025544659110928415, + "grad_norm": 0.0, + "learning_rate": 1.7025440313111545e-06, + "loss": 1.4736, + "step": 87 + }, + { + "epoch": 0.00258382758823184, + "grad_norm": 0.0, + "learning_rate": 1.7221135029354209e-06, + "loss": 1.6768, + "step": 88 + }, + { + "epoch": 0.002613189265370838, + "grad_norm": 0.0, + "learning_rate": 1.741682974559687e-06, + "loss": 1.4307, + "step": 89 + }, + { + "epoch": 0.0026425509425098363, + "grad_norm": 0.0, + "learning_rate": 1.7612524461839532e-06, + "loss": 1.5332, + "step": 90 + }, + { + "epoch": 0.0026719126196488342, + "grad_norm": 0.0, + "learning_rate": 1.7808219178082193e-06, + "loss": 1.5957, + "step": 91 + }, + { + "epoch": 0.0027012742967878326, + "grad_norm": 0.0, + "learning_rate": 1.8003913894324854e-06, + "loss": 1.541, + "step": 92 + }, + { + "epoch": 0.0027306359739268306, + "grad_norm": 0.0, + "learning_rate": 1.8199608610567516e-06, + "loss": 1.5283, + "step": 93 + }, + { + "epoch": 0.002759997651065829, + "grad_norm": 0.0, + "learning_rate": 1.8395303326810177e-06, + "loss": 1.6338, + "step": 94 + }, + { + "epoch": 0.002789359328204827, + "grad_norm": 0.0, + "learning_rate": 1.8590998043052839e-06, + "loss": 1.5059, + "step": 95 + }, + { + "epoch": 0.0028187210053438254, + "grad_norm": 0.0, + "learning_rate": 1.87866927592955e-06, + "loss": 1.5566, + "step": 96 + }, + { + "epoch": 0.0028480826824828233, + "grad_norm": 0.0, + "learning_rate": 1.8982387475538161e-06, + "loss": 1.6816, + "step": 97 + }, + { + "epoch": 0.0028774443596218217, + "grad_norm": 0.0, + "learning_rate": 1.9178082191780823e-06, + "loss": 1.6445, + "step": 98 + }, + { + "epoch": 0.0029068060367608197, + "grad_norm": 0.0, + "learning_rate": 1.937377690802348e-06, + "loss": 1.5352, + "step": 99 + }, + { + "epoch": 0.002936167713899818, + "grad_norm": 0.0, + "learning_rate": 1.9569471624266145e-06, + "loss": 1.5215, + "step": 100 + }, + { + "epoch": 0.002965529391038816, + "grad_norm": 0.0, + "learning_rate": 1.976516634050881e-06, + "loss": 1.5879, + "step": 101 + }, + { + "epoch": 0.0029948910681778145, + "grad_norm": 0.0, + "learning_rate": 1.996086105675147e-06, + "loss": 1.4863, + "step": 102 + }, + { + "epoch": 0.0030242527453168125, + "grad_norm": 0.0, + "learning_rate": 2.015655577299413e-06, + "loss": 1.6475, + "step": 103 + }, + { + "epoch": 0.003053614422455811, + "grad_norm": 0.0, + "learning_rate": 2.035225048923679e-06, + "loss": 1.6934, + "step": 104 + }, + { + "epoch": 0.003082976099594809, + "grad_norm": 0.0, + "learning_rate": 2.0547945205479454e-06, + "loss": 1.5977, + "step": 105 + }, + { + "epoch": 0.0031123377767338072, + "grad_norm": 0.0, + "learning_rate": 2.0743639921722114e-06, + "loss": 1.5977, + "step": 106 + }, + { + "epoch": 0.003141699453872805, + "grad_norm": 0.0, + "learning_rate": 2.0939334637964777e-06, + "loss": 1.6533, + "step": 107 + }, + { + "epoch": 0.0031710611310118036, + "grad_norm": 0.0, + "learning_rate": 2.1135029354207436e-06, + "loss": 1.5684, + "step": 108 + }, + { + "epoch": 0.0032004228081508016, + "grad_norm": 0.0, + "learning_rate": 2.13307240704501e-06, + "loss": 1.667, + "step": 109 + }, + { + "epoch": 0.0032297844852898, + "grad_norm": 0.0, + "learning_rate": 2.152641878669276e-06, + "loss": 1.5361, + "step": 110 + }, + { + "epoch": 0.003259146162428798, + "grad_norm": 0.0, + "learning_rate": 2.1722113502935423e-06, + "loss": 1.5771, + "step": 111 + }, + { + "epoch": 0.003288507839567796, + "grad_norm": 0.0, + "learning_rate": 2.191780821917808e-06, + "loss": 1.5762, + "step": 112 + }, + { + "epoch": 0.0033178695167067943, + "grad_norm": 0.0, + "learning_rate": 2.2113502935420745e-06, + "loss": 1.5762, + "step": 113 + }, + { + "epoch": 0.0033472311938457923, + "grad_norm": 0.0, + "learning_rate": 2.2309197651663405e-06, + "loss": 1.5615, + "step": 114 + }, + { + "epoch": 0.0033765928709847907, + "grad_norm": 0.0, + "learning_rate": 2.250489236790607e-06, + "loss": 1.4961, + "step": 115 + }, + { + "epoch": 0.0034059545481237886, + "grad_norm": 0.0, + "learning_rate": 2.2700587084148727e-06, + "loss": 1.6113, + "step": 116 + }, + { + "epoch": 0.003435316225262787, + "grad_norm": 0.0, + "learning_rate": 2.289628180039139e-06, + "loss": 1.7334, + "step": 117 + }, + { + "epoch": 0.003464677902401785, + "grad_norm": 0.0, + "learning_rate": 2.309197651663405e-06, + "loss": 1.5986, + "step": 118 + }, + { + "epoch": 0.0034940395795407834, + "grad_norm": 0.0, + "learning_rate": 2.3287671232876713e-06, + "loss": 1.4717, + "step": 119 + }, + { + "epoch": 0.0035234012566797814, + "grad_norm": 0.0, + "learning_rate": 2.3483365949119373e-06, + "loss": 1.5537, + "step": 120 + }, + { + "epoch": 0.00355276293381878, + "grad_norm": 0.0, + "learning_rate": 2.3679060665362036e-06, + "loss": 1.5781, + "step": 121 + }, + { + "epoch": 0.0035821246109577778, + "grad_norm": 0.0, + "learning_rate": 2.3874755381604695e-06, + "loss": 1.6514, + "step": 122 + }, + { + "epoch": 0.003611486288096776, + "grad_norm": 0.0, + "learning_rate": 2.407045009784736e-06, + "loss": 1.5537, + "step": 123 + }, + { + "epoch": 0.003640847965235774, + "grad_norm": 0.0, + "learning_rate": 2.426614481409002e-06, + "loss": 1.5615, + "step": 124 + }, + { + "epoch": 0.0036702096423747725, + "grad_norm": 0.0, + "learning_rate": 2.446183953033268e-06, + "loss": 1.5508, + "step": 125 + }, + { + "epoch": 0.0036995713195137705, + "grad_norm": 0.0, + "learning_rate": 2.4657534246575345e-06, + "loss": 1.5352, + "step": 126 + }, + { + "epoch": 0.003728932996652769, + "grad_norm": 0.0, + "learning_rate": 2.4853228962818004e-06, + "loss": 1.5566, + "step": 127 + }, + { + "epoch": 0.003758294673791767, + "grad_norm": 0.0, + "learning_rate": 2.504892367906067e-06, + "loss": 1.4561, + "step": 128 + }, + { + "epoch": 0.0037876563509307653, + "grad_norm": 0.0, + "learning_rate": 2.5244618395303327e-06, + "loss": 1.5693, + "step": 129 + }, + { + "epoch": 0.0038170180280697632, + "grad_norm": 0.0, + "learning_rate": 2.544031311154599e-06, + "loss": 1.5518, + "step": 130 + }, + { + "epoch": 0.0038463797052087616, + "grad_norm": 0.0, + "learning_rate": 2.563600782778865e-06, + "loss": 1.6719, + "step": 131 + }, + { + "epoch": 0.0038757413823477596, + "grad_norm": 0.0, + "learning_rate": 2.5831702544031313e-06, + "loss": 1.6113, + "step": 132 + }, + { + "epoch": 0.003905103059486758, + "grad_norm": 0.0, + "learning_rate": 2.6027397260273973e-06, + "loss": 1.5352, + "step": 133 + }, + { + "epoch": 0.003934464736625756, + "grad_norm": 0.0, + "learning_rate": 2.6223091976516636e-06, + "loss": 1.4883, + "step": 134 + }, + { + "epoch": 0.003963826413764754, + "grad_norm": 0.0, + "learning_rate": 2.6418786692759295e-06, + "loss": 1.5059, + "step": 135 + }, + { + "epoch": 0.003993188090903752, + "grad_norm": 0.0, + "learning_rate": 2.661448140900196e-06, + "loss": 1.5078, + "step": 136 + }, + { + "epoch": 0.00402254976804275, + "grad_norm": 0.0, + "learning_rate": 2.681017612524462e-06, + "loss": 1.4619, + "step": 137 + }, + { + "epoch": 0.004051911445181749, + "grad_norm": 0.0, + "learning_rate": 2.700587084148728e-06, + "loss": 1.5225, + "step": 138 + }, + { + "epoch": 0.004081273122320747, + "grad_norm": 0.0, + "learning_rate": 2.720156555772994e-06, + "loss": 1.4287, + "step": 139 + }, + { + "epoch": 0.004110634799459745, + "grad_norm": 0.0, + "learning_rate": 2.7397260273972604e-06, + "loss": 1.6768, + "step": 140 + }, + { + "epoch": 0.004139996476598743, + "grad_norm": 0.0, + "learning_rate": 2.7592954990215264e-06, + "loss": 1.582, + "step": 141 + }, + { + "epoch": 0.004169358153737742, + "grad_norm": 0.0, + "learning_rate": 2.7788649706457927e-06, + "loss": 1.4971, + "step": 142 + }, + { + "epoch": 0.00419871983087674, + "grad_norm": 0.0, + "learning_rate": 2.7984344422700586e-06, + "loss": 1.5869, + "step": 143 + }, + { + "epoch": 0.004228081508015738, + "grad_norm": 0.0, + "learning_rate": 2.818003913894325e-06, + "loss": 1.5928, + "step": 144 + }, + { + "epoch": 0.004257443185154736, + "grad_norm": 0.0, + "learning_rate": 2.837573385518591e-06, + "loss": 1.5127, + "step": 145 + }, + { + "epoch": 0.004286804862293735, + "grad_norm": 0.0, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5586, + "step": 146 + }, + { + "epoch": 0.004316166539432733, + "grad_norm": 0.0, + "learning_rate": 2.876712328767123e-06, + "loss": 1.5488, + "step": 147 + }, + { + "epoch": 0.004345528216571731, + "grad_norm": 0.0, + "learning_rate": 2.8962818003913895e-06, + "loss": 1.4971, + "step": 148 + }, + { + "epoch": 0.0043748898937107285, + "grad_norm": 0.0, + "learning_rate": 2.9158512720156555e-06, + "loss": 1.5498, + "step": 149 + }, + { + "epoch": 0.0044042515708497265, + "grad_norm": 0.0, + "learning_rate": 2.935420743639922e-06, + "loss": 1.5527, + "step": 150 + }, + { + "epoch": 0.004433613247988725, + "grad_norm": 0.0, + "learning_rate": 2.954990215264188e-06, + "loss": 1.6865, + "step": 151 + }, + { + "epoch": 0.004462974925127723, + "grad_norm": 0.0, + "learning_rate": 2.974559686888454e-06, + "loss": 1.5176, + "step": 152 + }, + { + "epoch": 0.004492336602266721, + "grad_norm": 0.0, + "learning_rate": 2.9941291585127204e-06, + "loss": 1.5947, + "step": 153 + }, + { + "epoch": 0.004521698279405719, + "grad_norm": 0.0, + "learning_rate": 3.0136986301369864e-06, + "loss": 1.5645, + "step": 154 + }, + { + "epoch": 0.004551059956544718, + "grad_norm": 0.0, + "learning_rate": 3.0332681017612527e-06, + "loss": 1.5645, + "step": 155 + }, + { + "epoch": 0.004580421633683716, + "grad_norm": 0.0, + "learning_rate": 3.0528375733855186e-06, + "loss": 1.5225, + "step": 156 + }, + { + "epoch": 0.004609783310822714, + "grad_norm": 0.0, + "learning_rate": 3.072407045009785e-06, + "loss": 1.5879, + "step": 157 + }, + { + "epoch": 0.004639144987961712, + "grad_norm": 0.0, + "learning_rate": 3.091976516634051e-06, + "loss": 1.6436, + "step": 158 + }, + { + "epoch": 0.004668506665100711, + "grad_norm": 0.0, + "learning_rate": 3.1115459882583172e-06, + "loss": 1.5068, + "step": 159 + }, + { + "epoch": 0.004697868342239709, + "grad_norm": 0.0, + "learning_rate": 3.131115459882583e-06, + "loss": 1.5322, + "step": 160 + }, + { + "epoch": 0.004727230019378707, + "grad_norm": 0.0, + "learning_rate": 3.1506849315068495e-06, + "loss": 1.543, + "step": 161 + }, + { + "epoch": 0.004756591696517705, + "grad_norm": 0.0, + "learning_rate": 3.1702544031311154e-06, + "loss": 1.5234, + "step": 162 + }, + { + "epoch": 0.004785953373656704, + "grad_norm": 0.0, + "learning_rate": 3.189823874755382e-06, + "loss": 1.4688, + "step": 163 + }, + { + "epoch": 0.0048153150507957015, + "grad_norm": 0.0, + "learning_rate": 3.2093933463796477e-06, + "loss": 1.5352, + "step": 164 + }, + { + "epoch": 0.0048446767279346995, + "grad_norm": 0.0, + "learning_rate": 3.228962818003914e-06, + "loss": 1.6055, + "step": 165 + }, + { + "epoch": 0.0048740384050736975, + "grad_norm": 0.0, + "learning_rate": 3.24853228962818e-06, + "loss": 1.5381, + "step": 166 + }, + { + "epoch": 0.004903400082212696, + "grad_norm": 0.0, + "learning_rate": 3.2681017612524463e-06, + "loss": 1.5322, + "step": 167 + }, + { + "epoch": 0.004932761759351694, + "grad_norm": 0.0, + "learning_rate": 3.2876712328767123e-06, + "loss": 1.5781, + "step": 168 + }, + { + "epoch": 0.004962123436490692, + "grad_norm": 0.0, + "learning_rate": 3.3072407045009786e-06, + "loss": 1.4971, + "step": 169 + }, + { + "epoch": 0.00499148511362969, + "grad_norm": 0.0, + "learning_rate": 3.3268101761252445e-06, + "loss": 1.582, + "step": 170 + }, + { + "epoch": 0.005020846790768689, + "grad_norm": 0.0, + "learning_rate": 3.346379647749511e-06, + "loss": 1.5693, + "step": 171 + }, + { + "epoch": 0.005050208467907687, + "grad_norm": 0.0, + "learning_rate": 3.365949119373777e-06, + "loss": 1.5088, + "step": 172 + }, + { + "epoch": 0.005079570145046685, + "grad_norm": 0.0, + "learning_rate": 3.385518590998043e-06, + "loss": 1.6025, + "step": 173 + }, + { + "epoch": 0.005108931822185683, + "grad_norm": 0.0, + "learning_rate": 3.405088062622309e-06, + "loss": 1.5596, + "step": 174 + }, + { + "epoch": 0.005138293499324682, + "grad_norm": 0.0, + "learning_rate": 3.4246575342465754e-06, + "loss": 1.4629, + "step": 175 + }, + { + "epoch": 0.00516765517646368, + "grad_norm": 0.0, + "learning_rate": 3.4442270058708418e-06, + "loss": 1.5947, + "step": 176 + }, + { + "epoch": 0.005197016853602678, + "grad_norm": 0.0, + "learning_rate": 3.4637964774951077e-06, + "loss": 1.5723, + "step": 177 + }, + { + "epoch": 0.005226378530741676, + "grad_norm": 0.0, + "learning_rate": 3.483365949119374e-06, + "loss": 1.4883, + "step": 178 + }, + { + "epoch": 0.0052557402078806745, + "grad_norm": 0.0, + "learning_rate": 3.50293542074364e-06, + "loss": 1.4238, + "step": 179 + }, + { + "epoch": 0.0052851018850196725, + "grad_norm": 0.0, + "learning_rate": 3.5225048923679063e-06, + "loss": 1.5488, + "step": 180 + }, + { + "epoch": 0.0053144635621586705, + "grad_norm": 0.0, + "learning_rate": 3.5420743639921723e-06, + "loss": 1.5029, + "step": 181 + }, + { + "epoch": 0.0053438252392976685, + "grad_norm": 0.0, + "learning_rate": 3.5616438356164386e-06, + "loss": 1.5518, + "step": 182 + }, + { + "epoch": 0.005373186916436667, + "grad_norm": 0.0, + "learning_rate": 3.5812133072407045e-06, + "loss": 1.5576, + "step": 183 + }, + { + "epoch": 0.005402548593575665, + "grad_norm": 0.0, + "learning_rate": 3.600782778864971e-06, + "loss": 1.6045, + "step": 184 + }, + { + "epoch": 0.005431910270714663, + "grad_norm": 0.0, + "learning_rate": 3.620352250489237e-06, + "loss": 1.5371, + "step": 185 + }, + { + "epoch": 0.005461271947853661, + "grad_norm": 0.0, + "learning_rate": 3.639921722113503e-06, + "loss": 1.6123, + "step": 186 + }, + { + "epoch": 0.005490633624992659, + "grad_norm": 0.0, + "learning_rate": 3.659491193737769e-06, + "loss": 1.4639, + "step": 187 + }, + { + "epoch": 0.005519995302131658, + "grad_norm": 0.0, + "learning_rate": 3.6790606653620354e-06, + "loss": 1.4482, + "step": 188 + }, + { + "epoch": 0.005549356979270656, + "grad_norm": 0.0, + "learning_rate": 3.6986301369863014e-06, + "loss": 1.5234, + "step": 189 + }, + { + "epoch": 0.005578718656409654, + "grad_norm": 0.0, + "learning_rate": 3.7181996086105677e-06, + "loss": 1.3457, + "step": 190 + }, + { + "epoch": 0.005608080333548652, + "grad_norm": 0.0, + "learning_rate": 3.7377690802348336e-06, + "loss": 1.4971, + "step": 191 + }, + { + "epoch": 0.005637442010687651, + "grad_norm": 0.0, + "learning_rate": 3.7573385518591e-06, + "loss": 1.5879, + "step": 192 + }, + { + "epoch": 0.005666803687826649, + "grad_norm": 0.0, + "learning_rate": 3.776908023483366e-06, + "loss": 1.4971, + "step": 193 + }, + { + "epoch": 0.005696165364965647, + "grad_norm": 0.0, + "learning_rate": 3.7964774951076322e-06, + "loss": 1.6562, + "step": 194 + }, + { + "epoch": 0.005725527042104645, + "grad_norm": 0.0, + "learning_rate": 3.816046966731898e-06, + "loss": 1.5205, + "step": 195 + }, + { + "epoch": 0.0057548887192436435, + "grad_norm": 0.0, + "learning_rate": 3.8356164383561645e-06, + "loss": 1.4854, + "step": 196 + }, + { + "epoch": 0.0057842503963826414, + "grad_norm": 0.0, + "learning_rate": 3.855185909980431e-06, + "loss": 1.4111, + "step": 197 + }, + { + "epoch": 0.005813612073521639, + "grad_norm": 0.0, + "learning_rate": 3.874755381604696e-06, + "loss": 1.3887, + "step": 198 + }, + { + "epoch": 0.005842973750660637, + "grad_norm": 0.0, + "learning_rate": 3.894324853228963e-06, + "loss": 1.4551, + "step": 199 + }, + { + "epoch": 0.005872335427799636, + "grad_norm": 0.0, + "learning_rate": 3.913894324853229e-06, + "loss": 1.4746, + "step": 200 + }, + { + "epoch": 0.005901697104938634, + "grad_norm": 0.0, + "learning_rate": 3.933463796477495e-06, + "loss": 1.5117, + "step": 201 + }, + { + "epoch": 0.005931058782077632, + "grad_norm": 0.0, + "learning_rate": 3.953033268101762e-06, + "loss": 1.668, + "step": 202 + }, + { + "epoch": 0.00596042045921663, + "grad_norm": 0.0, + "learning_rate": 3.972602739726027e-06, + "loss": 1.5615, + "step": 203 + }, + { + "epoch": 0.005989782136355629, + "grad_norm": 0.0, + "learning_rate": 3.992172211350294e-06, + "loss": 1.5811, + "step": 204 + }, + { + "epoch": 0.006019143813494627, + "grad_norm": 0.0, + "learning_rate": 4.01174168297456e-06, + "loss": 1.46, + "step": 205 + }, + { + "epoch": 0.006048505490633625, + "grad_norm": 0.0, + "learning_rate": 4.031311154598826e-06, + "loss": 1.5439, + "step": 206 + }, + { + "epoch": 0.006077867167772623, + "grad_norm": 0.0, + "learning_rate": 4.050880626223092e-06, + "loss": 1.4697, + "step": 207 + }, + { + "epoch": 0.006107228844911622, + "grad_norm": 0.0, + "learning_rate": 4.070450097847358e-06, + "loss": 1.4268, + "step": 208 + }, + { + "epoch": 0.00613659052205062, + "grad_norm": 0.0, + "learning_rate": 4.0900195694716245e-06, + "loss": 1.5664, + "step": 209 + }, + { + "epoch": 0.006165952199189618, + "grad_norm": 0.0, + "learning_rate": 4.109589041095891e-06, + "loss": 1.4844, + "step": 210 + }, + { + "epoch": 0.006195313876328616, + "grad_norm": 0.0, + "learning_rate": 4.129158512720156e-06, + "loss": 1.4766, + "step": 211 + }, + { + "epoch": 0.0062246755534676144, + "grad_norm": 0.0, + "learning_rate": 4.148727984344423e-06, + "loss": 1.6641, + "step": 212 + }, + { + "epoch": 0.006254037230606612, + "grad_norm": 0.0, + "learning_rate": 4.168297455968689e-06, + "loss": 1.5527, + "step": 213 + }, + { + "epoch": 0.00628339890774561, + "grad_norm": 0.0, + "learning_rate": 4.187866927592955e-06, + "loss": 1.4453, + "step": 214 + }, + { + "epoch": 0.006312760584884608, + "grad_norm": 0.0, + "learning_rate": 4.207436399217221e-06, + "loss": 1.501, + "step": 215 + }, + { + "epoch": 0.006342122262023607, + "grad_norm": 0.0, + "learning_rate": 4.227005870841487e-06, + "loss": 1.5811, + "step": 216 + }, + { + "epoch": 0.006371483939162605, + "grad_norm": 0.0, + "learning_rate": 4.246575342465754e-06, + "loss": 1.5693, + "step": 217 + }, + { + "epoch": 0.006400845616301603, + "grad_norm": 0.0, + "learning_rate": 4.26614481409002e-06, + "loss": 1.4395, + "step": 218 + }, + { + "epoch": 0.006430207293440601, + "grad_norm": 0.0, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6191, + "step": 219 + }, + { + "epoch": 0.0064595689705796, + "grad_norm": 0.0, + "learning_rate": 4.305283757338552e-06, + "loss": 1.415, + "step": 220 + }, + { + "epoch": 0.006488930647718598, + "grad_norm": 0.0, + "learning_rate": 4.324853228962818e-06, + "loss": 1.5049, + "step": 221 + }, + { + "epoch": 0.006518292324857596, + "grad_norm": 0.0, + "learning_rate": 4.3444227005870845e-06, + "loss": 1.4873, + "step": 222 + }, + { + "epoch": 0.006547654001996594, + "grad_norm": 0.0, + "learning_rate": 4.36399217221135e-06, + "loss": 1.5693, + "step": 223 + }, + { + "epoch": 0.006577015679135592, + "grad_norm": 0.0, + "learning_rate": 4.383561643835616e-06, + "loss": 1.5547, + "step": 224 + }, + { + "epoch": 0.006606377356274591, + "grad_norm": 0.0, + "learning_rate": 4.403131115459883e-06, + "loss": 1.5469, + "step": 225 + }, + { + "epoch": 0.006635739033413589, + "grad_norm": 0.0, + "learning_rate": 4.422700587084149e-06, + "loss": 1.46, + "step": 226 + }, + { + "epoch": 0.006665100710552587, + "grad_norm": 0.0, + "learning_rate": 4.442270058708415e-06, + "loss": 1.5254, + "step": 227 + }, + { + "epoch": 0.0066944623876915845, + "grad_norm": 0.0, + "learning_rate": 4.461839530332681e-06, + "loss": 1.5498, + "step": 228 + }, + { + "epoch": 0.006723824064830583, + "grad_norm": 0.0, + "learning_rate": 4.481409001956947e-06, + "loss": 1.6152, + "step": 229 + }, + { + "epoch": 0.006753185741969581, + "grad_norm": 0.0, + "learning_rate": 4.500978473581214e-06, + "loss": 1.4941, + "step": 230 + }, + { + "epoch": 0.006782547419108579, + "grad_norm": 0.0, + "learning_rate": 4.52054794520548e-06, + "loss": 1.543, + "step": 231 + }, + { + "epoch": 0.006811909096247577, + "grad_norm": 0.0, + "learning_rate": 4.5401174168297455e-06, + "loss": 1.6055, + "step": 232 + }, + { + "epoch": 0.006841270773386576, + "grad_norm": 0.0, + "learning_rate": 4.559686888454012e-06, + "loss": 1.5039, + "step": 233 + }, + { + "epoch": 0.006870632450525574, + "grad_norm": 0.0, + "learning_rate": 4.579256360078278e-06, + "loss": 1.5205, + "step": 234 + }, + { + "epoch": 0.006899994127664572, + "grad_norm": 0.0, + "learning_rate": 4.5988258317025445e-06, + "loss": 1.4883, + "step": 235 + }, + { + "epoch": 0.00692935580480357, + "grad_norm": 0.0, + "learning_rate": 4.61839530332681e-06, + "loss": 1.4219, + "step": 236 + }, + { + "epoch": 0.006958717481942569, + "grad_norm": 0.0, + "learning_rate": 4.637964774951076e-06, + "loss": 1.4248, + "step": 237 + }, + { + "epoch": 0.006988079159081567, + "grad_norm": 0.0, + "learning_rate": 4.657534246575343e-06, + "loss": 1.5127, + "step": 238 + }, + { + "epoch": 0.007017440836220565, + "grad_norm": 0.0, + "learning_rate": 4.677103718199609e-06, + "loss": 1.5186, + "step": 239 + }, + { + "epoch": 0.007046802513359563, + "grad_norm": 0.0, + "learning_rate": 4.6966731898238745e-06, + "loss": 1.4795, + "step": 240 + }, + { + "epoch": 0.007076164190498562, + "grad_norm": 0.0, + "learning_rate": 4.716242661448141e-06, + "loss": 1.5723, + "step": 241 + }, + { + "epoch": 0.00710552586763756, + "grad_norm": 0.0, + "learning_rate": 4.735812133072407e-06, + "loss": 1.4727, + "step": 242 + }, + { + "epoch": 0.0071348875447765575, + "grad_norm": 0.0, + "learning_rate": 4.755381604696674e-06, + "loss": 1.5098, + "step": 243 + }, + { + "epoch": 0.0071642492219155555, + "grad_norm": 0.0, + "learning_rate": 4.774951076320939e-06, + "loss": 1.5547, + "step": 244 + }, + { + "epoch": 0.007193610899054554, + "grad_norm": 0.0, + "learning_rate": 4.7945205479452054e-06, + "loss": 1.4971, + "step": 245 + }, + { + "epoch": 0.007222972576193552, + "grad_norm": 0.0, + "learning_rate": 4.814090019569472e-06, + "loss": 1.5488, + "step": 246 + }, + { + "epoch": 0.00725233425333255, + "grad_norm": 0.0, + "learning_rate": 4.833659491193738e-06, + "loss": 1.502, + "step": 247 + }, + { + "epoch": 0.007281695930471548, + "grad_norm": 0.0, + "learning_rate": 4.853228962818004e-06, + "loss": 1.4629, + "step": 248 + }, + { + "epoch": 0.007311057607610547, + "grad_norm": 0.0, + "learning_rate": 4.87279843444227e-06, + "loss": 1.5381, + "step": 249 + }, + { + "epoch": 0.007340419284749545, + "grad_norm": 0.0, + "learning_rate": 4.892367906066536e-06, + "loss": 1.3262, + "step": 250 + }, + { + "epoch": 0.007369780961888543, + "grad_norm": 0.0, + "learning_rate": 4.911937377690803e-06, + "loss": 1.4795, + "step": 251 + }, + { + "epoch": 0.007399142639027541, + "grad_norm": 0.0, + "learning_rate": 4.931506849315069e-06, + "loss": 1.6445, + "step": 252 + }, + { + "epoch": 0.00742850431616654, + "grad_norm": 0.0, + "learning_rate": 4.9510763209393345e-06, + "loss": 1.5039, + "step": 253 + }, + { + "epoch": 0.007457865993305538, + "grad_norm": 0.0, + "learning_rate": 4.970645792563601e-06, + "loss": 1.4023, + "step": 254 + }, + { + "epoch": 0.007487227670444536, + "grad_norm": 0.0, + "learning_rate": 4.990215264187867e-06, + "loss": 1.6191, + "step": 255 + }, + { + "epoch": 0.007516589347583534, + "grad_norm": 0.0, + "learning_rate": 5.009784735812134e-06, + "loss": 1.5078, + "step": 256 + }, + { + "epoch": 0.007545951024722533, + "grad_norm": 0.0, + "learning_rate": 5.0293542074364e-06, + "loss": 1.5381, + "step": 257 + }, + { + "epoch": 0.0075753127018615305, + "grad_norm": 0.0, + "learning_rate": 5.0489236790606654e-06, + "loss": 1.5615, + "step": 258 + }, + { + "epoch": 0.0076046743790005285, + "grad_norm": 0.0, + "learning_rate": 5.068493150684932e-06, + "loss": 1.6475, + "step": 259 + }, + { + "epoch": 0.0076340360561395265, + "grad_norm": 0.0, + "learning_rate": 5.088062622309198e-06, + "loss": 1.4688, + "step": 260 + }, + { + "epoch": 0.0076633977332785244, + "grad_norm": 0.0, + "learning_rate": 5.1076320939334645e-06, + "loss": 1.4941, + "step": 261 + }, + { + "epoch": 0.007692759410417523, + "grad_norm": 0.0, + "learning_rate": 5.12720156555773e-06, + "loss": 1.3838, + "step": 262 + }, + { + "epoch": 0.007722121087556521, + "grad_norm": 0.0, + "learning_rate": 5.146771037181997e-06, + "loss": 1.543, + "step": 263 + }, + { + "epoch": 0.007751482764695519, + "grad_norm": 0.0, + "learning_rate": 5.166340508806263e-06, + "loss": 1.5137, + "step": 264 + }, + { + "epoch": 0.007780844441834517, + "grad_norm": 0.0, + "learning_rate": 5.185909980430529e-06, + "loss": 1.5352, + "step": 265 + }, + { + "epoch": 0.007810206118973516, + "grad_norm": 0.0, + "learning_rate": 5.2054794520547945e-06, + "loss": 1.5303, + "step": 266 + }, + { + "epoch": 0.007839567796112513, + "grad_norm": 0.0, + "learning_rate": 5.225048923679062e-06, + "loss": 1.3916, + "step": 267 + }, + { + "epoch": 0.007868929473251513, + "grad_norm": 0.0, + "learning_rate": 5.244618395303327e-06, + "loss": 1.4766, + "step": 268 + }, + { + "epoch": 0.00789829115039051, + "grad_norm": 0.0, + "learning_rate": 5.2641878669275936e-06, + "loss": 1.4404, + "step": 269 + }, + { + "epoch": 0.007927652827529509, + "grad_norm": 0.0, + "learning_rate": 5.283757338551859e-06, + "loss": 1.5469, + "step": 270 + }, + { + "epoch": 0.007957014504668507, + "grad_norm": 0.0, + "learning_rate": 5.303326810176126e-06, + "loss": 1.4473, + "step": 271 + }, + { + "epoch": 0.007986376181807505, + "grad_norm": 0.0, + "learning_rate": 5.322896281800392e-06, + "loss": 1.5732, + "step": 272 + }, + { + "epoch": 0.008015737858946503, + "grad_norm": 0.0, + "learning_rate": 5.342465753424658e-06, + "loss": 1.583, + "step": 273 + }, + { + "epoch": 0.0080450995360855, + "grad_norm": 0.0, + "learning_rate": 5.362035225048924e-06, + "loss": 1.4668, + "step": 274 + }, + { + "epoch": 0.008074461213224499, + "grad_norm": 0.0, + "learning_rate": 5.381604696673191e-06, + "loss": 1.6201, + "step": 275 + }, + { + "epoch": 0.008103822890363498, + "grad_norm": 0.0, + "learning_rate": 5.401174168297456e-06, + "loss": 1.5381, + "step": 276 + }, + { + "epoch": 0.008133184567502496, + "grad_norm": 0.0, + "learning_rate": 5.420743639921723e-06, + "loss": 1.5283, + "step": 277 + }, + { + "epoch": 0.008162546244641494, + "grad_norm": 0.0, + "learning_rate": 5.440313111545988e-06, + "loss": 1.6045, + "step": 278 + }, + { + "epoch": 0.008191907921780492, + "grad_norm": 0.0, + "learning_rate": 5.459882583170255e-06, + "loss": 1.4756, + "step": 279 + }, + { + "epoch": 0.00822126959891949, + "grad_norm": 0.0, + "learning_rate": 5.479452054794521e-06, + "loss": 1.625, + "step": 280 + }, + { + "epoch": 0.008250631276058488, + "grad_norm": 0.0, + "learning_rate": 5.499021526418787e-06, + "loss": 1.5283, + "step": 281 + }, + { + "epoch": 0.008279992953197486, + "grad_norm": 0.0, + "learning_rate": 5.518590998043053e-06, + "loss": 1.5664, + "step": 282 + }, + { + "epoch": 0.008309354630336484, + "grad_norm": 0.0, + "learning_rate": 5.53816046966732e-06, + "loss": 1.4307, + "step": 283 + }, + { + "epoch": 0.008338716307475484, + "grad_norm": 0.0, + "learning_rate": 5.557729941291585e-06, + "loss": 1.5361, + "step": 284 + }, + { + "epoch": 0.008368077984614482, + "grad_norm": 0.0, + "learning_rate": 5.577299412915852e-06, + "loss": 1.4609, + "step": 285 + }, + { + "epoch": 0.00839743966175348, + "grad_norm": 0.0, + "learning_rate": 5.596868884540117e-06, + "loss": 1.5322, + "step": 286 + }, + { + "epoch": 0.008426801338892478, + "grad_norm": 0.0, + "learning_rate": 5.6164383561643845e-06, + "loss": 1.4883, + "step": 287 + }, + { + "epoch": 0.008456163016031476, + "grad_norm": 0.0, + "learning_rate": 5.63600782778865e-06, + "loss": 1.502, + "step": 288 + }, + { + "epoch": 0.008485524693170474, + "grad_norm": 0.0, + "learning_rate": 5.655577299412916e-06, + "loss": 1.4082, + "step": 289 + }, + { + "epoch": 0.008514886370309472, + "grad_norm": 0.0, + "learning_rate": 5.675146771037182e-06, + "loss": 1.5088, + "step": 290 + }, + { + "epoch": 0.00854424804744847, + "grad_norm": 0.0, + "learning_rate": 5.694716242661449e-06, + "loss": 1.4453, + "step": 291 + }, + { + "epoch": 0.00857360972458747, + "grad_norm": 0.0, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.4834, + "step": 292 + }, + { + "epoch": 0.008602971401726467, + "grad_norm": 0.0, + "learning_rate": 5.733855185909981e-06, + "loss": 1.3662, + "step": 293 + }, + { + "epoch": 0.008632333078865465, + "grad_norm": 0.0, + "learning_rate": 5.753424657534246e-06, + "loss": 1.5127, + "step": 294 + }, + { + "epoch": 0.008661694756004463, + "grad_norm": 0.0, + "learning_rate": 5.7729941291585136e-06, + "loss": 1.4541, + "step": 295 + }, + { + "epoch": 0.008691056433143461, + "grad_norm": 0.0, + "learning_rate": 5.792563600782779e-06, + "loss": 1.5625, + "step": 296 + }, + { + "epoch": 0.008720418110282459, + "grad_norm": 0.0, + "learning_rate": 5.812133072407045e-06, + "loss": 1.5498, + "step": 297 + }, + { + "epoch": 0.008749779787421457, + "grad_norm": 0.0, + "learning_rate": 5.831702544031311e-06, + "loss": 1.5625, + "step": 298 + }, + { + "epoch": 0.008779141464560455, + "grad_norm": 0.0, + "learning_rate": 5.851272015655578e-06, + "loss": 1.4258, + "step": 299 + }, + { + "epoch": 0.008808503141699453, + "grad_norm": 0.0, + "learning_rate": 5.870841487279844e-06, + "loss": 1.502, + "step": 300 + }, + { + "epoch": 0.008837864818838453, + "grad_norm": 0.0, + "learning_rate": 5.89041095890411e-06, + "loss": 1.46, + "step": 301 + }, + { + "epoch": 0.00886722649597745, + "grad_norm": 0.0, + "learning_rate": 5.909980430528376e-06, + "loss": 1.4111, + "step": 302 + }, + { + "epoch": 0.008896588173116449, + "grad_norm": 0.0, + "learning_rate": 5.929549902152643e-06, + "loss": 1.4248, + "step": 303 + }, + { + "epoch": 0.008925949850255447, + "grad_norm": 0.0, + "learning_rate": 5.949119373776908e-06, + "loss": 1.416, + "step": 304 + }, + { + "epoch": 0.008955311527394445, + "grad_norm": 0.0, + "learning_rate": 5.9686888454011745e-06, + "loss": 1.5508, + "step": 305 + }, + { + "epoch": 0.008984673204533443, + "grad_norm": 0.0, + "learning_rate": 5.988258317025441e-06, + "loss": 1.417, + "step": 306 + }, + { + "epoch": 0.00901403488167244, + "grad_norm": 0.0, + "learning_rate": 6.007827788649707e-06, + "loss": 1.4971, + "step": 307 + }, + { + "epoch": 0.009043396558811439, + "grad_norm": 0.0, + "learning_rate": 6.027397260273973e-06, + "loss": 1.4805, + "step": 308 + }, + { + "epoch": 0.009072758235950438, + "grad_norm": 0.0, + "learning_rate": 6.046966731898239e-06, + "loss": 1.5771, + "step": 309 + }, + { + "epoch": 0.009102119913089436, + "grad_norm": 0.0, + "learning_rate": 6.066536203522505e-06, + "loss": 1.4385, + "step": 310 + }, + { + "epoch": 0.009131481590228434, + "grad_norm": 0.0, + "learning_rate": 6.086105675146772e-06, + "loss": 1.5332, + "step": 311 + }, + { + "epoch": 0.009160843267367432, + "grad_norm": 0.0, + "learning_rate": 6.105675146771037e-06, + "loss": 1.4766, + "step": 312 + }, + { + "epoch": 0.00919020494450643, + "grad_norm": 0.0, + "learning_rate": 6.1252446183953044e-06, + "loss": 1.4863, + "step": 313 + }, + { + "epoch": 0.009219566621645428, + "grad_norm": 0.0, + "learning_rate": 6.14481409001957e-06, + "loss": 1.4951, + "step": 314 + }, + { + "epoch": 0.009248928298784426, + "grad_norm": 0.0, + "learning_rate": 6.164383561643836e-06, + "loss": 1.4385, + "step": 315 + }, + { + "epoch": 0.009278289975923424, + "grad_norm": 0.0, + "learning_rate": 6.183953033268102e-06, + "loss": 1.4561, + "step": 316 + }, + { + "epoch": 0.009307651653062424, + "grad_norm": 0.0, + "learning_rate": 6.203522504892369e-06, + "loss": 1.4971, + "step": 317 + }, + { + "epoch": 0.009337013330201422, + "grad_norm": 0.0, + "learning_rate": 6.2230919765166345e-06, + "loss": 1.5049, + "step": 318 + }, + { + "epoch": 0.00936637500734042, + "grad_norm": 0.0, + "learning_rate": 6.242661448140901e-06, + "loss": 1.335, + "step": 319 + }, + { + "epoch": 0.009395736684479418, + "grad_norm": 0.0, + "learning_rate": 6.262230919765166e-06, + "loss": 1.3955, + "step": 320 + }, + { + "epoch": 0.009425098361618416, + "grad_norm": 0.0, + "learning_rate": 6.2818003913894335e-06, + "loss": 1.4922, + "step": 321 + }, + { + "epoch": 0.009454460038757414, + "grad_norm": 0.0, + "learning_rate": 6.301369863013699e-06, + "loss": 1.5332, + "step": 322 + }, + { + "epoch": 0.009483821715896412, + "grad_norm": 0.0, + "learning_rate": 6.320939334637965e-06, + "loss": 1.4062, + "step": 323 + }, + { + "epoch": 0.00951318339303541, + "grad_norm": 0.0, + "learning_rate": 6.340508806262231e-06, + "loss": 1.6758, + "step": 324 + }, + { + "epoch": 0.00954254507017441, + "grad_norm": 0.0, + "learning_rate": 6.360078277886498e-06, + "loss": 1.5078, + "step": 325 + }, + { + "epoch": 0.009571906747313407, + "grad_norm": 0.0, + "learning_rate": 6.379647749510764e-06, + "loss": 1.5, + "step": 326 + }, + { + "epoch": 0.009601268424452405, + "grad_norm": 0.0, + "learning_rate": 6.39921722113503e-06, + "loss": 1.5332, + "step": 327 + }, + { + "epoch": 0.009630630101591403, + "grad_norm": 0.0, + "learning_rate": 6.4187866927592954e-06, + "loss": 1.4326, + "step": 328 + }, + { + "epoch": 0.009659991778730401, + "grad_norm": 0.0, + "learning_rate": 6.438356164383563e-06, + "loss": 1.5918, + "step": 329 + }, + { + "epoch": 0.009689353455869399, + "grad_norm": 0.0, + "learning_rate": 6.457925636007828e-06, + "loss": 1.5244, + "step": 330 + }, + { + "epoch": 0.009718715133008397, + "grad_norm": 0.0, + "learning_rate": 6.4774951076320945e-06, + "loss": 1.4092, + "step": 331 + }, + { + "epoch": 0.009748076810147395, + "grad_norm": 0.0, + "learning_rate": 6.49706457925636e-06, + "loss": 1.457, + "step": 332 + }, + { + "epoch": 0.009777438487286393, + "grad_norm": 0.0, + "learning_rate": 6.516634050880627e-06, + "loss": 1.4268, + "step": 333 + }, + { + "epoch": 0.009806800164425393, + "grad_norm": 0.0, + "learning_rate": 6.536203522504893e-06, + "loss": 1.5215, + "step": 334 + }, + { + "epoch": 0.00983616184156439, + "grad_norm": 0.0, + "learning_rate": 6.555772994129159e-06, + "loss": 1.4551, + "step": 335 + }, + { + "epoch": 0.009865523518703389, + "grad_norm": 0.0, + "learning_rate": 6.5753424657534245e-06, + "loss": 1.5215, + "step": 336 + }, + { + "epoch": 0.009894885195842387, + "grad_norm": 0.0, + "learning_rate": 6.594911937377692e-06, + "loss": 1.4854, + "step": 337 + }, + { + "epoch": 0.009924246872981385, + "grad_norm": 0.0, + "learning_rate": 6.614481409001957e-06, + "loss": 1.5234, + "step": 338 + }, + { + "epoch": 0.009953608550120382, + "grad_norm": 0.0, + "learning_rate": 6.634050880626224e-06, + "loss": 1.4209, + "step": 339 + }, + { + "epoch": 0.00998297022725938, + "grad_norm": 0.0, + "learning_rate": 6.653620352250489e-06, + "loss": 1.4629, + "step": 340 + }, + { + "epoch": 0.010012331904398378, + "grad_norm": 0.0, + "learning_rate": 6.673189823874756e-06, + "loss": 1.5947, + "step": 341 + }, + { + "epoch": 0.010041693581537378, + "grad_norm": 0.0, + "learning_rate": 6.692759295499022e-06, + "loss": 1.5723, + "step": 342 + }, + { + "epoch": 0.010071055258676376, + "grad_norm": 0.0, + "learning_rate": 6.712328767123288e-06, + "loss": 1.4248, + "step": 343 + }, + { + "epoch": 0.010100416935815374, + "grad_norm": 0.0, + "learning_rate": 6.731898238747554e-06, + "loss": 1.5098, + "step": 344 + }, + { + "epoch": 0.010129778612954372, + "grad_norm": 0.0, + "learning_rate": 6.751467710371821e-06, + "loss": 1.4238, + "step": 345 + }, + { + "epoch": 0.01015914029009337, + "grad_norm": 0.0, + "learning_rate": 6.771037181996086e-06, + "loss": 1.4629, + "step": 346 + }, + { + "epoch": 0.010188501967232368, + "grad_norm": 0.0, + "learning_rate": 6.790606653620353e-06, + "loss": 1.4912, + "step": 347 + }, + { + "epoch": 0.010217863644371366, + "grad_norm": 0.0, + "learning_rate": 6.810176125244618e-06, + "loss": 1.4902, + "step": 348 + }, + { + "epoch": 0.010247225321510364, + "grad_norm": 0.0, + "learning_rate": 6.829745596868885e-06, + "loss": 1.5352, + "step": 349 + }, + { + "epoch": 0.010276586998649364, + "grad_norm": 0.0, + "learning_rate": 6.849315068493151e-06, + "loss": 1.4492, + "step": 350 + }, + { + "epoch": 0.010305948675788362, + "grad_norm": 0.0, + "learning_rate": 6.868884540117417e-06, + "loss": 1.6543, + "step": 351 + }, + { + "epoch": 0.01033531035292736, + "grad_norm": 0.0, + "learning_rate": 6.8884540117416836e-06, + "loss": 1.4453, + "step": 352 + }, + { + "epoch": 0.010364672030066358, + "grad_norm": 0.0, + "learning_rate": 6.90802348336595e-06, + "loss": 1.5908, + "step": 353 + }, + { + "epoch": 0.010394033707205355, + "grad_norm": 0.0, + "learning_rate": 6.927592954990215e-06, + "loss": 1.5146, + "step": 354 + }, + { + "epoch": 0.010423395384344353, + "grad_norm": 0.0, + "learning_rate": 6.947162426614482e-06, + "loss": 1.5205, + "step": 355 + }, + { + "epoch": 0.010452757061483351, + "grad_norm": 0.0, + "learning_rate": 6.966731898238748e-06, + "loss": 1.5908, + "step": 356 + }, + { + "epoch": 0.01048211873862235, + "grad_norm": 0.0, + "learning_rate": 6.9863013698630145e-06, + "loss": 1.5273, + "step": 357 + }, + { + "epoch": 0.010511480415761349, + "grad_norm": 0.0, + "learning_rate": 7.00587084148728e-06, + "loss": 1.4717, + "step": 358 + }, + { + "epoch": 0.010540842092900347, + "grad_norm": 0.0, + "learning_rate": 7.025440313111546e-06, + "loss": 1.4941, + "step": 359 + }, + { + "epoch": 0.010570203770039345, + "grad_norm": 0.0, + "learning_rate": 7.045009784735813e-06, + "loss": 1.5088, + "step": 360 + }, + { + "epoch": 0.010599565447178343, + "grad_norm": 0.0, + "learning_rate": 7.064579256360079e-06, + "loss": 1.4775, + "step": 361 + }, + { + "epoch": 0.010628927124317341, + "grad_norm": 0.0, + "learning_rate": 7.0841487279843445e-06, + "loss": 1.4854, + "step": 362 + }, + { + "epoch": 0.010658288801456339, + "grad_norm": 0.0, + "learning_rate": 7.103718199608612e-06, + "loss": 1.5127, + "step": 363 + }, + { + "epoch": 0.010687650478595337, + "grad_norm": 0.0, + "learning_rate": 7.123287671232877e-06, + "loss": 1.4912, + "step": 364 + }, + { + "epoch": 0.010717012155734335, + "grad_norm": 0.0, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.502, + "step": 365 + }, + { + "epoch": 0.010746373832873335, + "grad_norm": 0.0, + "learning_rate": 7.162426614481409e-06, + "loss": 1.3857, + "step": 366 + }, + { + "epoch": 0.010775735510012333, + "grad_norm": 0.0, + "learning_rate": 7.181996086105676e-06, + "loss": 1.4443, + "step": 367 + }, + { + "epoch": 0.01080509718715133, + "grad_norm": 0.0, + "learning_rate": 7.201565557729942e-06, + "loss": 1.5957, + "step": 368 + }, + { + "epoch": 0.010834458864290328, + "grad_norm": 0.0, + "learning_rate": 7.221135029354208e-06, + "loss": 1.46, + "step": 369 + }, + { + "epoch": 0.010863820541429326, + "grad_norm": 0.0, + "learning_rate": 7.240704500978474e-06, + "loss": 1.459, + "step": 370 + }, + { + "epoch": 0.010893182218568324, + "grad_norm": 0.0, + "learning_rate": 7.260273972602741e-06, + "loss": 1.5654, + "step": 371 + }, + { + "epoch": 0.010922543895707322, + "grad_norm": 0.0, + "learning_rate": 7.279843444227006e-06, + "loss": 1.4258, + "step": 372 + }, + { + "epoch": 0.01095190557284632, + "grad_norm": 0.0, + "learning_rate": 7.299412915851273e-06, + "loss": 1.4355, + "step": 373 + }, + { + "epoch": 0.010981267249985318, + "grad_norm": 0.0, + "learning_rate": 7.318982387475538e-06, + "loss": 1.4893, + "step": 374 + }, + { + "epoch": 0.011010628927124318, + "grad_norm": 0.0, + "learning_rate": 7.338551859099805e-06, + "loss": 1.4971, + "step": 375 + }, + { + "epoch": 0.011039990604263316, + "grad_norm": 0.0, + "learning_rate": 7.358121330724071e-06, + "loss": 1.416, + "step": 376 + }, + { + "epoch": 0.011069352281402314, + "grad_norm": 0.0, + "learning_rate": 7.377690802348337e-06, + "loss": 1.4512, + "step": 377 + }, + { + "epoch": 0.011098713958541312, + "grad_norm": 0.0, + "learning_rate": 7.397260273972603e-06, + "loss": 1.542, + "step": 378 + }, + { + "epoch": 0.01112807563568031, + "grad_norm": 0.0, + "learning_rate": 7.41682974559687e-06, + "loss": 1.4248, + "step": 379 + }, + { + "epoch": 0.011157437312819308, + "grad_norm": 0.0, + "learning_rate": 7.436399217221135e-06, + "loss": 1.4834, + "step": 380 + }, + { + "epoch": 0.011186798989958306, + "grad_norm": 0.0, + "learning_rate": 7.455968688845402e-06, + "loss": 1.4922, + "step": 381 + }, + { + "epoch": 0.011216160667097304, + "grad_norm": 0.0, + "learning_rate": 7.475538160469667e-06, + "loss": 1.4736, + "step": 382 + }, + { + "epoch": 0.011245522344236304, + "grad_norm": 0.0, + "learning_rate": 7.4951076320939344e-06, + "loss": 1.3691, + "step": 383 + }, + { + "epoch": 0.011274884021375301, + "grad_norm": 0.0, + "learning_rate": 7.5146771037182e-06, + "loss": 1.5547, + "step": 384 + }, + { + "epoch": 0.0113042456985143, + "grad_norm": 0.0, + "learning_rate": 7.534246575342466e-06, + "loss": 1.3838, + "step": 385 + }, + { + "epoch": 0.011333607375653297, + "grad_norm": 0.0, + "learning_rate": 7.553816046966732e-06, + "loss": 1.415, + "step": 386 + }, + { + "epoch": 0.011362969052792295, + "grad_norm": 0.0, + "learning_rate": 7.573385518590999e-06, + "loss": 1.4209, + "step": 387 + }, + { + "epoch": 0.011392330729931293, + "grad_norm": 0.0, + "learning_rate": 7.5929549902152645e-06, + "loss": 1.4922, + "step": 388 + }, + { + "epoch": 0.011421692407070291, + "grad_norm": 0.0, + "learning_rate": 7.612524461839531e-06, + "loss": 1.499, + "step": 389 + }, + { + "epoch": 0.01145105408420929, + "grad_norm": 0.0, + "learning_rate": 7.632093933463796e-06, + "loss": 1.6113, + "step": 390 + }, + { + "epoch": 0.011480415761348289, + "grad_norm": 0.0, + "learning_rate": 7.651663405088063e-06, + "loss": 1.6016, + "step": 391 + }, + { + "epoch": 0.011509777438487287, + "grad_norm": 0.0, + "learning_rate": 7.671232876712329e-06, + "loss": 1.5693, + "step": 392 + }, + { + "epoch": 0.011539139115626285, + "grad_norm": 0.0, + "learning_rate": 7.690802348336595e-06, + "loss": 1.4424, + "step": 393 + }, + { + "epoch": 0.011568500792765283, + "grad_norm": 0.0, + "learning_rate": 7.710371819960862e-06, + "loss": 1.54, + "step": 394 + }, + { + "epoch": 0.01159786246990428, + "grad_norm": 0.0, + "learning_rate": 7.729941291585128e-06, + "loss": 1.5166, + "step": 395 + }, + { + "epoch": 0.011627224147043279, + "grad_norm": 0.0, + "learning_rate": 7.749510763209393e-06, + "loss": 1.4932, + "step": 396 + }, + { + "epoch": 0.011656585824182277, + "grad_norm": 0.0, + "learning_rate": 7.76908023483366e-06, + "loss": 1.4434, + "step": 397 + }, + { + "epoch": 0.011685947501321275, + "grad_norm": 0.0, + "learning_rate": 7.788649706457925e-06, + "loss": 1.5205, + "step": 398 + }, + { + "epoch": 0.011715309178460274, + "grad_norm": 0.0, + "learning_rate": 7.808219178082192e-06, + "loss": 1.4258, + "step": 399 + }, + { + "epoch": 0.011744670855599272, + "grad_norm": 0.0, + "learning_rate": 7.827788649706458e-06, + "loss": 1.4824, + "step": 400 + }, + { + "epoch": 0.01177403253273827, + "grad_norm": 0.0, + "learning_rate": 7.847358121330724e-06, + "loss": 1.5762, + "step": 401 + }, + { + "epoch": 0.011803394209877268, + "grad_norm": 0.0, + "learning_rate": 7.86692759295499e-06, + "loss": 1.4102, + "step": 402 + }, + { + "epoch": 0.011832755887016266, + "grad_norm": 0.0, + "learning_rate": 7.886497064579257e-06, + "loss": 1.4434, + "step": 403 + }, + { + "epoch": 0.011862117564155264, + "grad_norm": 0.0, + "learning_rate": 7.906066536203524e-06, + "loss": 1.4756, + "step": 404 + }, + { + "epoch": 0.011891479241294262, + "grad_norm": 0.0, + "learning_rate": 7.92563600782779e-06, + "loss": 1.6318, + "step": 405 + }, + { + "epoch": 0.01192084091843326, + "grad_norm": 0.0, + "learning_rate": 7.945205479452055e-06, + "loss": 1.4746, + "step": 406 + }, + { + "epoch": 0.011950202595572258, + "grad_norm": 0.0, + "learning_rate": 7.964774951076321e-06, + "loss": 1.6133, + "step": 407 + }, + { + "epoch": 0.011979564272711258, + "grad_norm": 0.0, + "learning_rate": 7.984344422700587e-06, + "loss": 1.4775, + "step": 408 + }, + { + "epoch": 0.012008925949850256, + "grad_norm": 0.0, + "learning_rate": 8.003913894324854e-06, + "loss": 1.4561, + "step": 409 + }, + { + "epoch": 0.012038287626989254, + "grad_norm": 0.0, + "learning_rate": 8.02348336594912e-06, + "loss": 1.4658, + "step": 410 + }, + { + "epoch": 0.012067649304128252, + "grad_norm": 0.0, + "learning_rate": 8.043052837573386e-06, + "loss": 1.5615, + "step": 411 + }, + { + "epoch": 0.01209701098126725, + "grad_norm": 0.0, + "learning_rate": 8.062622309197653e-06, + "loss": 1.542, + "step": 412 + }, + { + "epoch": 0.012126372658406248, + "grad_norm": 0.0, + "learning_rate": 8.082191780821919e-06, + "loss": 1.5254, + "step": 413 + }, + { + "epoch": 0.012155734335545246, + "grad_norm": 0.0, + "learning_rate": 8.101761252446184e-06, + "loss": 1.4795, + "step": 414 + }, + { + "epoch": 0.012185096012684244, + "grad_norm": 0.0, + "learning_rate": 8.121330724070452e-06, + "loss": 1.3926, + "step": 415 + }, + { + "epoch": 0.012214457689823243, + "grad_norm": 0.0, + "learning_rate": 8.140900195694716e-06, + "loss": 1.4639, + "step": 416 + }, + { + "epoch": 0.012243819366962241, + "grad_norm": 0.0, + "learning_rate": 8.160469667318983e-06, + "loss": 1.5576, + "step": 417 + }, + { + "epoch": 0.01227318104410124, + "grad_norm": 0.0, + "learning_rate": 8.180039138943249e-06, + "loss": 1.5039, + "step": 418 + }, + { + "epoch": 0.012302542721240237, + "grad_norm": 0.0, + "learning_rate": 8.199608610567515e-06, + "loss": 1.4297, + "step": 419 + }, + { + "epoch": 0.012331904398379235, + "grad_norm": 0.0, + "learning_rate": 8.219178082191782e-06, + "loss": 1.4238, + "step": 420 + }, + { + "epoch": 0.012361266075518233, + "grad_norm": 0.0, + "learning_rate": 8.238747553816048e-06, + "loss": 1.5879, + "step": 421 + }, + { + "epoch": 0.012390627752657231, + "grad_norm": 0.0, + "learning_rate": 8.258317025440313e-06, + "loss": 1.5508, + "step": 422 + }, + { + "epoch": 0.01241998942979623, + "grad_norm": 0.0, + "learning_rate": 8.27788649706458e-06, + "loss": 1.5439, + "step": 423 + }, + { + "epoch": 0.012449351106935229, + "grad_norm": 0.0, + "learning_rate": 8.297455968688845e-06, + "loss": 1.4717, + "step": 424 + }, + { + "epoch": 0.012478712784074227, + "grad_norm": 0.0, + "learning_rate": 8.317025440313112e-06, + "loss": 1.4961, + "step": 425 + }, + { + "epoch": 0.012508074461213225, + "grad_norm": 0.0, + "learning_rate": 8.336594911937378e-06, + "loss": 1.4834, + "step": 426 + }, + { + "epoch": 0.012537436138352223, + "grad_norm": 0.0, + "learning_rate": 8.356164383561644e-06, + "loss": 1.5361, + "step": 427 + }, + { + "epoch": 0.01256679781549122, + "grad_norm": 0.0, + "learning_rate": 8.37573385518591e-06, + "loss": 1.5449, + "step": 428 + }, + { + "epoch": 0.012596159492630219, + "grad_norm": 0.0, + "learning_rate": 8.395303326810177e-06, + "loss": 1.5518, + "step": 429 + }, + { + "epoch": 0.012625521169769217, + "grad_norm": 0.0, + "learning_rate": 8.414872798434442e-06, + "loss": 1.5527, + "step": 430 + }, + { + "epoch": 0.012654882846908215, + "grad_norm": 0.0, + "learning_rate": 8.43444227005871e-06, + "loss": 1.5039, + "step": 431 + }, + { + "epoch": 0.012684244524047214, + "grad_norm": 0.0, + "learning_rate": 8.454011741682975e-06, + "loss": 1.5635, + "step": 432 + }, + { + "epoch": 0.012713606201186212, + "grad_norm": 0.0, + "learning_rate": 8.473581213307241e-06, + "loss": 1.5283, + "step": 433 + }, + { + "epoch": 0.01274296787832521, + "grad_norm": 0.0, + "learning_rate": 8.493150684931507e-06, + "loss": 1.5879, + "step": 434 + }, + { + "epoch": 0.012772329555464208, + "grad_norm": 0.0, + "learning_rate": 8.512720156555774e-06, + "loss": 1.5449, + "step": 435 + }, + { + "epoch": 0.012801691232603206, + "grad_norm": 0.0, + "learning_rate": 8.53228962818004e-06, + "loss": 1.4873, + "step": 436 + }, + { + "epoch": 0.012831052909742204, + "grad_norm": 0.0, + "learning_rate": 8.551859099804306e-06, + "loss": 1.4141, + "step": 437 + }, + { + "epoch": 0.012860414586881202, + "grad_norm": 0.0, + "learning_rate": 8.571428571428571e-06, + "loss": 1.5186, + "step": 438 + }, + { + "epoch": 0.0128897762640202, + "grad_norm": 0.0, + "learning_rate": 8.590998043052839e-06, + "loss": 1.6221, + "step": 439 + }, + { + "epoch": 0.0129191379411592, + "grad_norm": 0.0, + "learning_rate": 8.610567514677104e-06, + "loss": 1.5166, + "step": 440 + }, + { + "epoch": 0.012948499618298198, + "grad_norm": 0.0, + "learning_rate": 8.63013698630137e-06, + "loss": 1.4902, + "step": 441 + }, + { + "epoch": 0.012977861295437196, + "grad_norm": 0.0, + "learning_rate": 8.649706457925636e-06, + "loss": 1.4883, + "step": 442 + }, + { + "epoch": 0.013007222972576194, + "grad_norm": 0.0, + "learning_rate": 8.669275929549903e-06, + "loss": 1.5303, + "step": 443 + }, + { + "epoch": 0.013036584649715192, + "grad_norm": 0.0, + "learning_rate": 8.688845401174169e-06, + "loss": 1.5547, + "step": 444 + }, + { + "epoch": 0.01306594632685419, + "grad_norm": 0.0, + "learning_rate": 8.708414872798435e-06, + "loss": 1.5029, + "step": 445 + }, + { + "epoch": 0.013095308003993188, + "grad_norm": 0.0, + "learning_rate": 8.7279843444227e-06, + "loss": 1.4092, + "step": 446 + }, + { + "epoch": 0.013124669681132186, + "grad_norm": 0.0, + "learning_rate": 8.747553816046968e-06, + "loss": 1.4463, + "step": 447 + }, + { + "epoch": 0.013154031358271184, + "grad_norm": 0.0, + "learning_rate": 8.767123287671233e-06, + "loss": 1.5137, + "step": 448 + }, + { + "epoch": 0.013183393035410183, + "grad_norm": 0.0, + "learning_rate": 8.786692759295499e-06, + "loss": 1.4697, + "step": 449 + }, + { + "epoch": 0.013212754712549181, + "grad_norm": 0.0, + "learning_rate": 8.806262230919765e-06, + "loss": 1.4775, + "step": 450 + }, + { + "epoch": 0.01324211638968818, + "grad_norm": 0.0, + "learning_rate": 8.825831702544032e-06, + "loss": 1.5713, + "step": 451 + }, + { + "epoch": 0.013271478066827177, + "grad_norm": 0.0, + "learning_rate": 8.845401174168298e-06, + "loss": 1.5234, + "step": 452 + }, + { + "epoch": 0.013300839743966175, + "grad_norm": 0.0, + "learning_rate": 8.864970645792564e-06, + "loss": 1.5176, + "step": 453 + }, + { + "epoch": 0.013330201421105173, + "grad_norm": 0.0, + "learning_rate": 8.88454011741683e-06, + "loss": 1.5166, + "step": 454 + }, + { + "epoch": 0.013359563098244171, + "grad_norm": 0.0, + "learning_rate": 8.904109589041097e-06, + "loss": 1.4736, + "step": 455 + }, + { + "epoch": 0.013388924775383169, + "grad_norm": 0.0, + "learning_rate": 8.923679060665362e-06, + "loss": 1.6152, + "step": 456 + }, + { + "epoch": 0.013418286452522169, + "grad_norm": 0.0, + "learning_rate": 8.943248532289628e-06, + "loss": 1.5322, + "step": 457 + }, + { + "epoch": 0.013447648129661167, + "grad_norm": 0.0, + "learning_rate": 8.962818003913895e-06, + "loss": 1.4746, + "step": 458 + }, + { + "epoch": 0.013477009806800165, + "grad_norm": 0.0, + "learning_rate": 8.982387475538161e-06, + "loss": 1.4355, + "step": 459 + }, + { + "epoch": 0.013506371483939163, + "grad_norm": 0.0, + "learning_rate": 9.001956947162427e-06, + "loss": 1.5537, + "step": 460 + }, + { + "epoch": 0.01353573316107816, + "grad_norm": 0.0, + "learning_rate": 9.021526418786694e-06, + "loss": 1.4307, + "step": 461 + }, + { + "epoch": 0.013565094838217159, + "grad_norm": 0.0, + "learning_rate": 9.04109589041096e-06, + "loss": 1.5645, + "step": 462 + }, + { + "epoch": 0.013594456515356157, + "grad_norm": 0.0, + "learning_rate": 9.060665362035226e-06, + "loss": 1.5234, + "step": 463 + }, + { + "epoch": 0.013623818192495155, + "grad_norm": 0.0, + "learning_rate": 9.080234833659491e-06, + "loss": 1.5752, + "step": 464 + }, + { + "epoch": 0.013653179869634154, + "grad_norm": 0.0, + "learning_rate": 9.099804305283759e-06, + "loss": 1.5225, + "step": 465 + }, + { + "epoch": 0.013682541546773152, + "grad_norm": 0.0, + "learning_rate": 9.119373776908024e-06, + "loss": 1.5371, + "step": 466 + }, + { + "epoch": 0.01371190322391215, + "grad_norm": 0.0, + "learning_rate": 9.13894324853229e-06, + "loss": 1.5254, + "step": 467 + }, + { + "epoch": 0.013741264901051148, + "grad_norm": 0.0, + "learning_rate": 9.158512720156556e-06, + "loss": 1.4941, + "step": 468 + }, + { + "epoch": 0.013770626578190146, + "grad_norm": 0.0, + "learning_rate": 9.178082191780823e-06, + "loss": 1.6455, + "step": 469 + }, + { + "epoch": 0.013799988255329144, + "grad_norm": 0.0, + "learning_rate": 9.197651663405089e-06, + "loss": 1.501, + "step": 470 + }, + { + "epoch": 0.013829349932468142, + "grad_norm": 0.0, + "learning_rate": 9.217221135029355e-06, + "loss": 1.5703, + "step": 471 + }, + { + "epoch": 0.01385871160960714, + "grad_norm": 0.0, + "learning_rate": 9.23679060665362e-06, + "loss": 1.6299, + "step": 472 + }, + { + "epoch": 0.01388807328674614, + "grad_norm": 0.0, + "learning_rate": 9.256360078277888e-06, + "loss": 1.4844, + "step": 473 + }, + { + "epoch": 0.013917434963885138, + "grad_norm": 0.0, + "learning_rate": 9.275929549902153e-06, + "loss": 1.5771, + "step": 474 + }, + { + "epoch": 0.013946796641024136, + "grad_norm": 0.0, + "learning_rate": 9.295499021526419e-06, + "loss": 1.54, + "step": 475 + }, + { + "epoch": 0.013976158318163134, + "grad_norm": 0.0, + "learning_rate": 9.315068493150685e-06, + "loss": 1.5, + "step": 476 + }, + { + "epoch": 0.014005519995302132, + "grad_norm": 0.0, + "learning_rate": 9.334637964774952e-06, + "loss": 1.4395, + "step": 477 + }, + { + "epoch": 0.01403488167244113, + "grad_norm": 0.0, + "learning_rate": 9.354207436399218e-06, + "loss": 1.6045, + "step": 478 + }, + { + "epoch": 0.014064243349580128, + "grad_norm": 0.0, + "learning_rate": 9.373776908023484e-06, + "loss": 1.4707, + "step": 479 + }, + { + "epoch": 0.014093605026719126, + "grad_norm": 0.0, + "learning_rate": 9.393346379647749e-06, + "loss": 1.5723, + "step": 480 + }, + { + "epoch": 0.014122966703858124, + "grad_norm": 0.0, + "learning_rate": 9.412915851272017e-06, + "loss": 1.6084, + "step": 481 + }, + { + "epoch": 0.014152328380997123, + "grad_norm": 0.0, + "learning_rate": 9.432485322896282e-06, + "loss": 1.4844, + "step": 482 + }, + { + "epoch": 0.014181690058136121, + "grad_norm": 0.0, + "learning_rate": 9.452054794520548e-06, + "loss": 1.498, + "step": 483 + }, + { + "epoch": 0.01421105173527512, + "grad_norm": 0.0, + "learning_rate": 9.471624266144814e-06, + "loss": 1.4014, + "step": 484 + }, + { + "epoch": 0.014240413412414117, + "grad_norm": 0.0, + "learning_rate": 9.49119373776908e-06, + "loss": 1.5596, + "step": 485 + }, + { + "epoch": 0.014269775089553115, + "grad_norm": 0.0, + "learning_rate": 9.510763209393347e-06, + "loss": 1.5166, + "step": 486 + }, + { + "epoch": 0.014299136766692113, + "grad_norm": 0.0, + "learning_rate": 9.530332681017614e-06, + "loss": 1.5498, + "step": 487 + }, + { + "epoch": 0.014328498443831111, + "grad_norm": 0.0, + "learning_rate": 9.549902152641878e-06, + "loss": 1.5791, + "step": 488 + }, + { + "epoch": 0.014357860120970109, + "grad_norm": 0.0, + "learning_rate": 9.569471624266146e-06, + "loss": 1.458, + "step": 489 + }, + { + "epoch": 0.014387221798109109, + "grad_norm": 0.0, + "learning_rate": 9.589041095890411e-06, + "loss": 1.4688, + "step": 490 + }, + { + "epoch": 0.014416583475248107, + "grad_norm": 0.0, + "learning_rate": 9.608610567514677e-06, + "loss": 1.6807, + "step": 491 + }, + { + "epoch": 0.014445945152387105, + "grad_norm": 0.0, + "learning_rate": 9.628180039138944e-06, + "loss": 1.5332, + "step": 492 + }, + { + "epoch": 0.014475306829526103, + "grad_norm": 0.0, + "learning_rate": 9.64774951076321e-06, + "loss": 1.5645, + "step": 493 + }, + { + "epoch": 0.0145046685066651, + "grad_norm": 0.0, + "learning_rate": 9.667318982387476e-06, + "loss": 1.6211, + "step": 494 + }, + { + "epoch": 0.014534030183804099, + "grad_norm": 0.0, + "learning_rate": 9.686888454011743e-06, + "loss": 1.4209, + "step": 495 + }, + { + "epoch": 0.014563391860943097, + "grad_norm": 0.0, + "learning_rate": 9.706457925636007e-06, + "loss": 1.501, + "step": 496 + }, + { + "epoch": 0.014592753538082094, + "grad_norm": 0.0, + "learning_rate": 9.726027397260275e-06, + "loss": 1.4766, + "step": 497 + }, + { + "epoch": 0.014622115215221094, + "grad_norm": 0.0, + "learning_rate": 9.74559686888454e-06, + "loss": 1.582, + "step": 498 + }, + { + "epoch": 0.014651476892360092, + "grad_norm": 0.0, + "learning_rate": 9.765166340508806e-06, + "loss": 1.5127, + "step": 499 + }, + { + "epoch": 0.01468083856949909, + "grad_norm": 0.0, + "learning_rate": 9.784735812133073e-06, + "loss": 1.4082, + "step": 500 + }, + { + "epoch": 0.014710200246638088, + "grad_norm": 0.0, + "learning_rate": 9.804305283757339e-06, + "loss": 1.4707, + "step": 501 + }, + { + "epoch": 0.014739561923777086, + "grad_norm": 0.0, + "learning_rate": 9.823874755381605e-06, + "loss": 1.5098, + "step": 502 + }, + { + "epoch": 0.014768923600916084, + "grad_norm": 0.0, + "learning_rate": 9.843444227005872e-06, + "loss": 1.4043, + "step": 503 + }, + { + "epoch": 0.014798285278055082, + "grad_norm": 0.0, + "learning_rate": 9.863013698630138e-06, + "loss": 1.4795, + "step": 504 + }, + { + "epoch": 0.01482764695519408, + "grad_norm": 0.0, + "learning_rate": 9.882583170254404e-06, + "loss": 1.5283, + "step": 505 + }, + { + "epoch": 0.01485700863233308, + "grad_norm": 0.0, + "learning_rate": 9.902152641878669e-06, + "loss": 1.501, + "step": 506 + }, + { + "epoch": 0.014886370309472078, + "grad_norm": 0.0, + "learning_rate": 9.921722113502935e-06, + "loss": 1.5273, + "step": 507 + }, + { + "epoch": 0.014915731986611076, + "grad_norm": 0.0, + "learning_rate": 9.941291585127202e-06, + "loss": 1.4355, + "step": 508 + }, + { + "epoch": 0.014945093663750074, + "grad_norm": 0.0, + "learning_rate": 9.960861056751468e-06, + "loss": 1.4307, + "step": 509 + }, + { + "epoch": 0.014974455340889072, + "grad_norm": 0.0, + "learning_rate": 9.980430528375734e-06, + "loss": 1.5918, + "step": 510 + }, + { + "epoch": 0.01500381701802807, + "grad_norm": 0.0, + "learning_rate": 1e-05, + "loss": 1.5234, + "step": 511 + }, + { + "epoch": 0.015033178695167067, + "grad_norm": 0.0, + "learning_rate": 1.0019569471624267e-05, + "loss": 1.5928, + "step": 512 + }, + { + "epoch": 0.015062540372306065, + "grad_norm": 0.0, + "learning_rate": 1.0039138943248534e-05, + "loss": 1.5205, + "step": 513 + }, + { + "epoch": 0.015091902049445065, + "grad_norm": 0.0, + "learning_rate": 1.00587084148728e-05, + "loss": 1.4629, + "step": 514 + }, + { + "epoch": 0.015121263726584063, + "grad_norm": 0.0, + "learning_rate": 1.0078277886497066e-05, + "loss": 1.6309, + "step": 515 + }, + { + "epoch": 0.015150625403723061, + "grad_norm": 0.0, + "learning_rate": 1.0097847358121331e-05, + "loss": 1.3818, + "step": 516 + }, + { + "epoch": 0.015179987080862059, + "grad_norm": 0.0, + "learning_rate": 1.0117416829745599e-05, + "loss": 1.4727, + "step": 517 + }, + { + "epoch": 0.015209348758001057, + "grad_norm": 0.0, + "learning_rate": 1.0136986301369864e-05, + "loss": 1.5225, + "step": 518 + }, + { + "epoch": 0.015238710435140055, + "grad_norm": 0.0, + "learning_rate": 1.015655577299413e-05, + "loss": 1.5039, + "step": 519 + }, + { + "epoch": 0.015268072112279053, + "grad_norm": 0.0, + "learning_rate": 1.0176125244618396e-05, + "loss": 1.5488, + "step": 520 + }, + { + "epoch": 0.015297433789418051, + "grad_norm": 0.0, + "learning_rate": 1.0195694716242663e-05, + "loss": 1.4893, + "step": 521 + }, + { + "epoch": 0.015326795466557049, + "grad_norm": 0.0, + "learning_rate": 1.0215264187866929e-05, + "loss": 1.3711, + "step": 522 + }, + { + "epoch": 0.015356157143696049, + "grad_norm": 0.0, + "learning_rate": 1.0234833659491195e-05, + "loss": 1.4951, + "step": 523 + }, + { + "epoch": 0.015385518820835047, + "grad_norm": 0.0, + "learning_rate": 1.025440313111546e-05, + "loss": 1.5049, + "step": 524 + }, + { + "epoch": 0.015414880497974045, + "grad_norm": 0.0, + "learning_rate": 1.0273972602739728e-05, + "loss": 1.6465, + "step": 525 + }, + { + "epoch": 0.015444242175113043, + "grad_norm": 0.0, + "learning_rate": 1.0293542074363994e-05, + "loss": 1.4629, + "step": 526 + }, + { + "epoch": 0.01547360385225204, + "grad_norm": 0.0, + "learning_rate": 1.0313111545988259e-05, + "loss": 1.5068, + "step": 527 + }, + { + "epoch": 0.015502965529391038, + "grad_norm": 0.0, + "learning_rate": 1.0332681017612525e-05, + "loss": 1.4922, + "step": 528 + }, + { + "epoch": 0.015532327206530036, + "grad_norm": 0.0, + "learning_rate": 1.0352250489236792e-05, + "loss": 1.5498, + "step": 529 + }, + { + "epoch": 0.015561688883669034, + "grad_norm": 0.0, + "learning_rate": 1.0371819960861058e-05, + "loss": 1.4814, + "step": 530 + }, + { + "epoch": 0.015591050560808034, + "grad_norm": 0.0, + "learning_rate": 1.0391389432485324e-05, + "loss": 1.4629, + "step": 531 + }, + { + "epoch": 0.015620412237947032, + "grad_norm": 0.0, + "learning_rate": 1.0410958904109589e-05, + "loss": 1.5801, + "step": 532 + }, + { + "epoch": 0.01564977391508603, + "grad_norm": 0.0, + "learning_rate": 1.0430528375733857e-05, + "loss": 1.54, + "step": 533 + }, + { + "epoch": 0.015679135592225026, + "grad_norm": 0.0, + "learning_rate": 1.0450097847358123e-05, + "loss": 1.4619, + "step": 534 + }, + { + "epoch": 0.015708497269364026, + "grad_norm": 0.0, + "learning_rate": 1.0469667318982388e-05, + "loss": 1.6006, + "step": 535 + }, + { + "epoch": 0.015737858946503026, + "grad_norm": 0.0, + "learning_rate": 1.0489236790606654e-05, + "loss": 1.46, + "step": 536 + }, + { + "epoch": 0.015767220623642022, + "grad_norm": 0.0, + "learning_rate": 1.0508806262230922e-05, + "loss": 1.3467, + "step": 537 + }, + { + "epoch": 0.01579658230078102, + "grad_norm": 0.0, + "learning_rate": 1.0528375733855187e-05, + "loss": 1.6621, + "step": 538 + }, + { + "epoch": 0.015825943977920018, + "grad_norm": 0.0, + "learning_rate": 1.0547945205479453e-05, + "loss": 1.457, + "step": 539 + }, + { + "epoch": 0.015855305655059018, + "grad_norm": 0.0, + "learning_rate": 1.0567514677103718e-05, + "loss": 1.4883, + "step": 540 + }, + { + "epoch": 0.015884667332198014, + "grad_norm": 0.0, + "learning_rate": 1.0587084148727986e-05, + "loss": 1.4502, + "step": 541 + }, + { + "epoch": 0.015914029009337013, + "grad_norm": 0.0, + "learning_rate": 1.0606653620352253e-05, + "loss": 1.5479, + "step": 542 + }, + { + "epoch": 0.015943390686476013, + "grad_norm": 0.0, + "learning_rate": 1.0626223091976517e-05, + "loss": 1.4648, + "step": 543 + }, + { + "epoch": 0.01597275236361501, + "grad_norm": 0.0, + "learning_rate": 1.0645792563600784e-05, + "loss": 1.5684, + "step": 544 + }, + { + "epoch": 0.01600211404075401, + "grad_norm": 0.0, + "learning_rate": 1.0665362035225052e-05, + "loss": 1.4609, + "step": 545 + }, + { + "epoch": 0.016031475717893005, + "grad_norm": 0.0, + "learning_rate": 1.0684931506849316e-05, + "loss": 1.5869, + "step": 546 + }, + { + "epoch": 0.016060837395032005, + "grad_norm": 0.0, + "learning_rate": 1.0704500978473583e-05, + "loss": 1.5977, + "step": 547 + }, + { + "epoch": 0.016090199072171, + "grad_norm": 0.0, + "learning_rate": 1.0724070450097847e-05, + "loss": 1.376, + "step": 548 + }, + { + "epoch": 0.01611956074931, + "grad_norm": 0.0, + "learning_rate": 1.0743639921722115e-05, + "loss": 1.5342, + "step": 549 + }, + { + "epoch": 0.016148922426448997, + "grad_norm": 0.0, + "learning_rate": 1.0763209393346382e-05, + "loss": 1.5244, + "step": 550 + }, + { + "epoch": 0.016178284103587997, + "grad_norm": 0.0, + "learning_rate": 1.0782778864970646e-05, + "loss": 1.418, + "step": 551 + }, + { + "epoch": 0.016207645780726997, + "grad_norm": 0.0, + "learning_rate": 1.0802348336594913e-05, + "loss": 1.5029, + "step": 552 + }, + { + "epoch": 0.016237007457865993, + "grad_norm": 0.0, + "learning_rate": 1.082191780821918e-05, + "loss": 1.501, + "step": 553 + }, + { + "epoch": 0.016266369135004993, + "grad_norm": 0.0, + "learning_rate": 1.0841487279843445e-05, + "loss": 1.4375, + "step": 554 + }, + { + "epoch": 0.01629573081214399, + "grad_norm": 0.0, + "learning_rate": 1.0861056751467712e-05, + "loss": 1.4922, + "step": 555 + }, + { + "epoch": 0.01632509248928299, + "grad_norm": 0.0, + "learning_rate": 1.0880626223091976e-05, + "loss": 1.5137, + "step": 556 + }, + { + "epoch": 0.016354454166421985, + "grad_norm": 0.0, + "learning_rate": 1.0900195694716244e-05, + "loss": 1.5381, + "step": 557 + }, + { + "epoch": 0.016383815843560984, + "grad_norm": 0.0, + "learning_rate": 1.091976516634051e-05, + "loss": 1.5225, + "step": 558 + }, + { + "epoch": 0.01641317752069998, + "grad_norm": 0.0, + "learning_rate": 1.0939334637964775e-05, + "loss": 1.4658, + "step": 559 + }, + { + "epoch": 0.01644253919783898, + "grad_norm": 0.0, + "learning_rate": 1.0958904109589042e-05, + "loss": 1.5322, + "step": 560 + }, + { + "epoch": 0.01647190087497798, + "grad_norm": 0.0, + "learning_rate": 1.097847358121331e-05, + "loss": 1.4219, + "step": 561 + }, + { + "epoch": 0.016501262552116976, + "grad_norm": 0.0, + "learning_rate": 1.0998043052837574e-05, + "loss": 1.5273, + "step": 562 + }, + { + "epoch": 0.016530624229255976, + "grad_norm": 0.0, + "learning_rate": 1.101761252446184e-05, + "loss": 1.4775, + "step": 563 + }, + { + "epoch": 0.016559985906394972, + "grad_norm": 0.0, + "learning_rate": 1.1037181996086105e-05, + "loss": 1.5625, + "step": 564 + }, + { + "epoch": 0.016589347583533972, + "grad_norm": 0.0, + "learning_rate": 1.1056751467710373e-05, + "loss": 1.4902, + "step": 565 + }, + { + "epoch": 0.016618709260672968, + "grad_norm": 0.0, + "learning_rate": 1.107632093933464e-05, + "loss": 1.4541, + "step": 566 + }, + { + "epoch": 0.016648070937811968, + "grad_norm": 0.0, + "learning_rate": 1.1095890410958904e-05, + "loss": 1.5801, + "step": 567 + }, + { + "epoch": 0.016677432614950968, + "grad_norm": 0.0, + "learning_rate": 1.111545988258317e-05, + "loss": 1.4854, + "step": 568 + }, + { + "epoch": 0.016706794292089964, + "grad_norm": 0.0, + "learning_rate": 1.1135029354207439e-05, + "loss": 1.5352, + "step": 569 + }, + { + "epoch": 0.016736155969228964, + "grad_norm": 0.0, + "learning_rate": 1.1154598825831704e-05, + "loss": 1.5908, + "step": 570 + }, + { + "epoch": 0.01676551764636796, + "grad_norm": 0.0, + "learning_rate": 1.117416829745597e-05, + "loss": 1.5127, + "step": 571 + }, + { + "epoch": 0.01679487932350696, + "grad_norm": 0.0, + "learning_rate": 1.1193737769080235e-05, + "loss": 1.5645, + "step": 572 + }, + { + "epoch": 0.016824241000645956, + "grad_norm": 0.0, + "learning_rate": 1.1213307240704503e-05, + "loss": 1.5654, + "step": 573 + }, + { + "epoch": 0.016853602677784955, + "grad_norm": 0.0, + "learning_rate": 1.1232876712328769e-05, + "loss": 1.6035, + "step": 574 + }, + { + "epoch": 0.01688296435492395, + "grad_norm": 0.0, + "learning_rate": 1.1252446183953034e-05, + "loss": 1.4629, + "step": 575 + }, + { + "epoch": 0.01691232603206295, + "grad_norm": 0.0, + "learning_rate": 1.12720156555773e-05, + "loss": 1.4648, + "step": 576 + }, + { + "epoch": 0.01694168770920195, + "grad_norm": 0.0, + "learning_rate": 1.1291585127201568e-05, + "loss": 1.6396, + "step": 577 + }, + { + "epoch": 0.016971049386340947, + "grad_norm": 0.0, + "learning_rate": 1.1311154598825833e-05, + "loss": 1.4531, + "step": 578 + }, + { + "epoch": 0.017000411063479947, + "grad_norm": 0.0, + "learning_rate": 1.1330724070450099e-05, + "loss": 1.4121, + "step": 579 + }, + { + "epoch": 0.017029772740618943, + "grad_norm": 0.0, + "learning_rate": 1.1350293542074364e-05, + "loss": 1.5576, + "step": 580 + }, + { + "epoch": 0.017059134417757943, + "grad_norm": 0.0, + "learning_rate": 1.1369863013698632e-05, + "loss": 1.54, + "step": 581 + }, + { + "epoch": 0.01708849609489694, + "grad_norm": 0.0, + "learning_rate": 1.1389432485322898e-05, + "loss": 1.5195, + "step": 582 + }, + { + "epoch": 0.01711785777203594, + "grad_norm": 0.0, + "learning_rate": 1.1409001956947163e-05, + "loss": 1.5576, + "step": 583 + }, + { + "epoch": 0.01714721944917494, + "grad_norm": 0.0, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.627, + "step": 584 + }, + { + "epoch": 0.017176581126313935, + "grad_norm": 0.0, + "learning_rate": 1.1448140900195697e-05, + "loss": 1.5596, + "step": 585 + }, + { + "epoch": 0.017205942803452935, + "grad_norm": 0.0, + "learning_rate": 1.1467710371819962e-05, + "loss": 1.4141, + "step": 586 + }, + { + "epoch": 0.01723530448059193, + "grad_norm": 0.0, + "learning_rate": 1.1487279843444228e-05, + "loss": 1.5908, + "step": 587 + }, + { + "epoch": 0.01726466615773093, + "grad_norm": 0.0, + "learning_rate": 1.1506849315068493e-05, + "loss": 1.499, + "step": 588 + }, + { + "epoch": 0.017294027834869927, + "grad_norm": 0.0, + "learning_rate": 1.152641878669276e-05, + "loss": 1.5986, + "step": 589 + }, + { + "epoch": 0.017323389512008926, + "grad_norm": 0.0, + "learning_rate": 1.1545988258317027e-05, + "loss": 1.4697, + "step": 590 + }, + { + "epoch": 0.017352751189147923, + "grad_norm": 0.0, + "learning_rate": 1.1565557729941292e-05, + "loss": 1.5752, + "step": 591 + }, + { + "epoch": 0.017382112866286922, + "grad_norm": 0.0, + "learning_rate": 1.1585127201565558e-05, + "loss": 1.5771, + "step": 592 + }, + { + "epoch": 0.017411474543425922, + "grad_norm": 0.0, + "learning_rate": 1.1604696673189826e-05, + "loss": 1.5098, + "step": 593 + }, + { + "epoch": 0.017440836220564918, + "grad_norm": 0.0, + "learning_rate": 1.162426614481409e-05, + "loss": 1.5508, + "step": 594 + }, + { + "epoch": 0.017470197897703918, + "grad_norm": 0.0, + "learning_rate": 1.1643835616438357e-05, + "loss": 1.4316, + "step": 595 + }, + { + "epoch": 0.017499559574842914, + "grad_norm": 0.0, + "learning_rate": 1.1663405088062622e-05, + "loss": 1.6523, + "step": 596 + }, + { + "epoch": 0.017528921251981914, + "grad_norm": 0.0, + "learning_rate": 1.168297455968689e-05, + "loss": 1.4717, + "step": 597 + }, + { + "epoch": 0.01755828292912091, + "grad_norm": 0.0, + "learning_rate": 1.1702544031311156e-05, + "loss": 1.4834, + "step": 598 + }, + { + "epoch": 0.01758764460625991, + "grad_norm": 0.0, + "learning_rate": 1.1722113502935421e-05, + "loss": 1.3701, + "step": 599 + }, + { + "epoch": 0.017617006283398906, + "grad_norm": 0.0, + "learning_rate": 1.1741682974559687e-05, + "loss": 1.4414, + "step": 600 + }, + { + "epoch": 0.017646367960537906, + "grad_norm": 0.0, + "learning_rate": 1.1761252446183955e-05, + "loss": 1.498, + "step": 601 + }, + { + "epoch": 0.017675729637676905, + "grad_norm": 0.0, + "learning_rate": 1.178082191780822e-05, + "loss": 1.583, + "step": 602 + }, + { + "epoch": 0.0177050913148159, + "grad_norm": 0.0, + "learning_rate": 1.1800391389432486e-05, + "loss": 1.4219, + "step": 603 + }, + { + "epoch": 0.0177344529919549, + "grad_norm": 0.0, + "learning_rate": 1.1819960861056753e-05, + "loss": 1.4697, + "step": 604 + }, + { + "epoch": 0.017763814669093898, + "grad_norm": 0.0, + "learning_rate": 1.1839530332681019e-05, + "loss": 1.501, + "step": 605 + }, + { + "epoch": 0.017793176346232897, + "grad_norm": 0.0, + "learning_rate": 1.1859099804305285e-05, + "loss": 1.4873, + "step": 606 + }, + { + "epoch": 0.017822538023371894, + "grad_norm": 0.0, + "learning_rate": 1.187866927592955e-05, + "loss": 1.4922, + "step": 607 + }, + { + "epoch": 0.017851899700510893, + "grad_norm": 0.0, + "learning_rate": 1.1898238747553816e-05, + "loss": 1.4658, + "step": 608 + }, + { + "epoch": 0.017881261377649893, + "grad_norm": 0.0, + "learning_rate": 1.1917808219178084e-05, + "loss": 1.4639, + "step": 609 + }, + { + "epoch": 0.01791062305478889, + "grad_norm": 0.0, + "learning_rate": 1.1937377690802349e-05, + "loss": 1.627, + "step": 610 + }, + { + "epoch": 0.01793998473192789, + "grad_norm": 0.0, + "learning_rate": 1.1956947162426615e-05, + "loss": 1.5586, + "step": 611 + }, + { + "epoch": 0.017969346409066885, + "grad_norm": 0.0, + "learning_rate": 1.1976516634050882e-05, + "loss": 1.4795, + "step": 612 + }, + { + "epoch": 0.017998708086205885, + "grad_norm": 0.0, + "learning_rate": 1.1996086105675148e-05, + "loss": 1.3848, + "step": 613 + }, + { + "epoch": 0.01802806976334488, + "grad_norm": 0.0, + "learning_rate": 1.2015655577299414e-05, + "loss": 1.5127, + "step": 614 + }, + { + "epoch": 0.01805743144048388, + "grad_norm": 0.0, + "learning_rate": 1.203522504892368e-05, + "loss": 1.5156, + "step": 615 + }, + { + "epoch": 0.018086793117622877, + "grad_norm": 0.0, + "learning_rate": 1.2054794520547945e-05, + "loss": 1.5459, + "step": 616 + }, + { + "epoch": 0.018116154794761877, + "grad_norm": 0.0, + "learning_rate": 1.2074363992172213e-05, + "loss": 1.2852, + "step": 617 + }, + { + "epoch": 0.018145516471900876, + "grad_norm": 0.0, + "learning_rate": 1.2093933463796478e-05, + "loss": 1.5557, + "step": 618 + }, + { + "epoch": 0.018174878149039873, + "grad_norm": 0.0, + "learning_rate": 1.2113502935420744e-05, + "loss": 1.4102, + "step": 619 + }, + { + "epoch": 0.018204239826178872, + "grad_norm": 0.0, + "learning_rate": 1.213307240704501e-05, + "loss": 1.5273, + "step": 620 + }, + { + "epoch": 0.01823360150331787, + "grad_norm": 0.0, + "learning_rate": 1.2152641878669277e-05, + "loss": 1.5137, + "step": 621 + }, + { + "epoch": 0.01826296318045687, + "grad_norm": 0.0, + "learning_rate": 1.2172211350293543e-05, + "loss": 1.5439, + "step": 622 + }, + { + "epoch": 0.018292324857595865, + "grad_norm": 0.0, + "learning_rate": 1.219178082191781e-05, + "loss": 1.5449, + "step": 623 + }, + { + "epoch": 0.018321686534734864, + "grad_norm": 0.0, + "learning_rate": 1.2211350293542074e-05, + "loss": 1.5605, + "step": 624 + }, + { + "epoch": 0.018351048211873864, + "grad_norm": 0.0, + "learning_rate": 1.2230919765166343e-05, + "loss": 1.4619, + "step": 625 + }, + { + "epoch": 0.01838040988901286, + "grad_norm": 0.0, + "learning_rate": 1.2250489236790609e-05, + "loss": 1.4414, + "step": 626 + }, + { + "epoch": 0.01840977156615186, + "grad_norm": 0.0, + "learning_rate": 1.2270058708414874e-05, + "loss": 1.4482, + "step": 627 + }, + { + "epoch": 0.018439133243290856, + "grad_norm": 0.0, + "learning_rate": 1.228962818003914e-05, + "loss": 1.5996, + "step": 628 + }, + { + "epoch": 0.018468494920429856, + "grad_norm": 0.0, + "learning_rate": 1.2309197651663406e-05, + "loss": 1.5088, + "step": 629 + }, + { + "epoch": 0.018497856597568852, + "grad_norm": 0.0, + "learning_rate": 1.2328767123287673e-05, + "loss": 1.5361, + "step": 630 + }, + { + "epoch": 0.018527218274707852, + "grad_norm": 0.0, + "learning_rate": 1.2348336594911939e-05, + "loss": 1.5449, + "step": 631 + }, + { + "epoch": 0.018556579951846848, + "grad_norm": 0.0, + "learning_rate": 1.2367906066536204e-05, + "loss": 1.6143, + "step": 632 + }, + { + "epoch": 0.018585941628985848, + "grad_norm": 0.0, + "learning_rate": 1.2387475538160472e-05, + "loss": 1.5225, + "step": 633 + }, + { + "epoch": 0.018615303306124847, + "grad_norm": 0.0, + "learning_rate": 1.2407045009784738e-05, + "loss": 1.4277, + "step": 634 + }, + { + "epoch": 0.018644664983263844, + "grad_norm": 0.0, + "learning_rate": 1.2426614481409003e-05, + "loss": 1.6162, + "step": 635 + }, + { + "epoch": 0.018674026660402843, + "grad_norm": 0.0, + "learning_rate": 1.2446183953033269e-05, + "loss": 1.5449, + "step": 636 + }, + { + "epoch": 0.01870338833754184, + "grad_norm": 0.0, + "learning_rate": 1.2465753424657537e-05, + "loss": 1.5693, + "step": 637 + }, + { + "epoch": 0.01873275001468084, + "grad_norm": 0.0, + "learning_rate": 1.2485322896281802e-05, + "loss": 1.4609, + "step": 638 + }, + { + "epoch": 0.018762111691819836, + "grad_norm": 0.0, + "learning_rate": 1.2504892367906068e-05, + "loss": 1.5098, + "step": 639 + }, + { + "epoch": 0.018791473368958835, + "grad_norm": 0.0, + "learning_rate": 1.2524461839530333e-05, + "loss": 1.6211, + "step": 640 + }, + { + "epoch": 0.01882083504609783, + "grad_norm": 0.0, + "learning_rate": 1.25440313111546e-05, + "loss": 1.4014, + "step": 641 + }, + { + "epoch": 0.01885019672323683, + "grad_norm": 0.0, + "learning_rate": 1.2563600782778867e-05, + "loss": 1.5498, + "step": 642 + }, + { + "epoch": 0.01887955840037583, + "grad_norm": 0.0, + "learning_rate": 1.2583170254403132e-05, + "loss": 1.5098, + "step": 643 + }, + { + "epoch": 0.018908920077514827, + "grad_norm": 0.0, + "learning_rate": 1.2602739726027398e-05, + "loss": 1.4639, + "step": 644 + }, + { + "epoch": 0.018938281754653827, + "grad_norm": 0.0, + "learning_rate": 1.2622309197651666e-05, + "loss": 1.4961, + "step": 645 + }, + { + "epoch": 0.018967643431792823, + "grad_norm": 0.0, + "learning_rate": 1.264187866927593e-05, + "loss": 1.4268, + "step": 646 + }, + { + "epoch": 0.018997005108931823, + "grad_norm": 0.0, + "learning_rate": 1.2661448140900197e-05, + "loss": 1.5039, + "step": 647 + }, + { + "epoch": 0.01902636678607082, + "grad_norm": 0.0, + "learning_rate": 1.2681017612524462e-05, + "loss": 1.5303, + "step": 648 + }, + { + "epoch": 0.01905572846320982, + "grad_norm": 0.0, + "learning_rate": 1.270058708414873e-05, + "loss": 1.4521, + "step": 649 + }, + { + "epoch": 0.01908509014034882, + "grad_norm": 0.0, + "learning_rate": 1.2720156555772996e-05, + "loss": 1.4766, + "step": 650 + }, + { + "epoch": 0.019114451817487815, + "grad_norm": 0.0, + "learning_rate": 1.273972602739726e-05, + "loss": 1.4951, + "step": 651 + }, + { + "epoch": 0.019143813494626814, + "grad_norm": 0.0, + "learning_rate": 1.2759295499021527e-05, + "loss": 1.6279, + "step": 652 + }, + { + "epoch": 0.01917317517176581, + "grad_norm": 0.0, + "learning_rate": 1.2778864970645795e-05, + "loss": 1.5225, + "step": 653 + }, + { + "epoch": 0.01920253684890481, + "grad_norm": 0.0, + "learning_rate": 1.279843444227006e-05, + "loss": 1.4785, + "step": 654 + }, + { + "epoch": 0.019231898526043806, + "grad_norm": 0.0, + "learning_rate": 1.2818003913894326e-05, + "loss": 1.5029, + "step": 655 + }, + { + "epoch": 0.019261260203182806, + "grad_norm": 0.0, + "learning_rate": 1.2837573385518591e-05, + "loss": 1.4736, + "step": 656 + }, + { + "epoch": 0.019290621880321802, + "grad_norm": 0.0, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.4893, + "step": 657 + }, + { + "epoch": 0.019319983557460802, + "grad_norm": 0.0, + "learning_rate": 1.2876712328767125e-05, + "loss": 1.4111, + "step": 658 + }, + { + "epoch": 0.019349345234599802, + "grad_norm": 0.0, + "learning_rate": 1.289628180039139e-05, + "loss": 1.6123, + "step": 659 + }, + { + "epoch": 0.019378706911738798, + "grad_norm": 0.0, + "learning_rate": 1.2915851272015656e-05, + "loss": 1.5137, + "step": 660 + }, + { + "epoch": 0.019408068588877798, + "grad_norm": 0.0, + "learning_rate": 1.2935420743639924e-05, + "loss": 1.5762, + "step": 661 + }, + { + "epoch": 0.019437430266016794, + "grad_norm": 0.0, + "learning_rate": 1.2954990215264189e-05, + "loss": 1.5098, + "step": 662 + }, + { + "epoch": 0.019466791943155794, + "grad_norm": 0.0, + "learning_rate": 1.2974559686888455e-05, + "loss": 1.4795, + "step": 663 + }, + { + "epoch": 0.01949615362029479, + "grad_norm": 0.0, + "learning_rate": 1.299412915851272e-05, + "loss": 1.4785, + "step": 664 + }, + { + "epoch": 0.01952551529743379, + "grad_norm": 0.0, + "learning_rate": 1.3013698630136988e-05, + "loss": 1.501, + "step": 665 + }, + { + "epoch": 0.019554876974572786, + "grad_norm": 0.0, + "learning_rate": 1.3033268101761254e-05, + "loss": 1.499, + "step": 666 + }, + { + "epoch": 0.019584238651711786, + "grad_norm": 0.0, + "learning_rate": 1.3052837573385519e-05, + "loss": 1.4355, + "step": 667 + }, + { + "epoch": 0.019613600328850785, + "grad_norm": 0.0, + "learning_rate": 1.3072407045009785e-05, + "loss": 1.5234, + "step": 668 + }, + { + "epoch": 0.01964296200598978, + "grad_norm": 0.0, + "learning_rate": 1.3091976516634053e-05, + "loss": 1.5518, + "step": 669 + }, + { + "epoch": 0.01967232368312878, + "grad_norm": 0.0, + "learning_rate": 1.3111545988258318e-05, + "loss": 1.6611, + "step": 670 + }, + { + "epoch": 0.019701685360267777, + "grad_norm": 0.0, + "learning_rate": 1.3131115459882584e-05, + "loss": 1.4629, + "step": 671 + }, + { + "epoch": 0.019731047037406777, + "grad_norm": 0.0, + "learning_rate": 1.3150684931506849e-05, + "loss": 1.5078, + "step": 672 + }, + { + "epoch": 0.019760408714545773, + "grad_norm": 0.0, + "learning_rate": 1.3170254403131117e-05, + "loss": 1.3809, + "step": 673 + }, + { + "epoch": 0.019789770391684773, + "grad_norm": 0.0, + "learning_rate": 1.3189823874755383e-05, + "loss": 1.5469, + "step": 674 + }, + { + "epoch": 0.019819132068823773, + "grad_norm": 0.0, + "learning_rate": 1.3209393346379648e-05, + "loss": 1.5107, + "step": 675 + }, + { + "epoch": 0.01984849374596277, + "grad_norm": 0.0, + "learning_rate": 1.3228962818003914e-05, + "loss": 1.4736, + "step": 676 + }, + { + "epoch": 0.01987785542310177, + "grad_norm": 0.0, + "learning_rate": 1.3248532289628183e-05, + "loss": 1.4893, + "step": 677 + }, + { + "epoch": 0.019907217100240765, + "grad_norm": 0.0, + "learning_rate": 1.3268101761252447e-05, + "loss": 1.5732, + "step": 678 + }, + { + "epoch": 0.019936578777379765, + "grad_norm": 0.0, + "learning_rate": 1.3287671232876714e-05, + "loss": 1.5635, + "step": 679 + }, + { + "epoch": 0.01996594045451876, + "grad_norm": 0.0, + "learning_rate": 1.3307240704500978e-05, + "loss": 1.5322, + "step": 680 + }, + { + "epoch": 0.01999530213165776, + "grad_norm": 0.0, + "learning_rate": 1.3326810176125246e-05, + "loss": 1.5303, + "step": 681 + }, + { + "epoch": 0.020024663808796757, + "grad_norm": 0.0, + "learning_rate": 1.3346379647749513e-05, + "loss": 1.5322, + "step": 682 + }, + { + "epoch": 0.020054025485935757, + "grad_norm": 0.0, + "learning_rate": 1.3365949119373777e-05, + "loss": 1.5332, + "step": 683 + }, + { + "epoch": 0.020083387163074756, + "grad_norm": 0.0, + "learning_rate": 1.3385518590998044e-05, + "loss": 1.5049, + "step": 684 + }, + { + "epoch": 0.020112748840213752, + "grad_norm": 0.0, + "learning_rate": 1.3405088062622312e-05, + "loss": 1.4902, + "step": 685 + }, + { + "epoch": 0.020142110517352752, + "grad_norm": 0.0, + "learning_rate": 1.3424657534246576e-05, + "loss": 1.4873, + "step": 686 + }, + { + "epoch": 0.02017147219449175, + "grad_norm": 0.0, + "learning_rate": 1.3444227005870843e-05, + "loss": 1.5732, + "step": 687 + }, + { + "epoch": 0.020200833871630748, + "grad_norm": 0.0, + "learning_rate": 1.3463796477495107e-05, + "loss": 1.6094, + "step": 688 + }, + { + "epoch": 0.020230195548769744, + "grad_norm": 0.0, + "learning_rate": 1.3483365949119375e-05, + "loss": 1.5303, + "step": 689 + }, + { + "epoch": 0.020259557225908744, + "grad_norm": 0.0, + "learning_rate": 1.3502935420743642e-05, + "loss": 1.5186, + "step": 690 + }, + { + "epoch": 0.020288918903047744, + "grad_norm": 0.0, + "learning_rate": 1.3522504892367906e-05, + "loss": 1.5811, + "step": 691 + }, + { + "epoch": 0.02031828058018674, + "grad_norm": 0.0, + "learning_rate": 1.3542074363992173e-05, + "loss": 1.6738, + "step": 692 + }, + { + "epoch": 0.02034764225732574, + "grad_norm": 0.0, + "learning_rate": 1.356164383561644e-05, + "loss": 1.5762, + "step": 693 + }, + { + "epoch": 0.020377003934464736, + "grad_norm": 0.0, + "learning_rate": 1.3581213307240705e-05, + "loss": 1.5967, + "step": 694 + }, + { + "epoch": 0.020406365611603736, + "grad_norm": 0.0, + "learning_rate": 1.3600782778864972e-05, + "loss": 1.4688, + "step": 695 + }, + { + "epoch": 0.020435727288742732, + "grad_norm": 0.0, + "learning_rate": 1.3620352250489236e-05, + "loss": 1.5645, + "step": 696 + }, + { + "epoch": 0.02046508896588173, + "grad_norm": 0.0, + "learning_rate": 1.3639921722113504e-05, + "loss": 1.6172, + "step": 697 + }, + { + "epoch": 0.020494450643020728, + "grad_norm": 0.0, + "learning_rate": 1.365949119373777e-05, + "loss": 1.5537, + "step": 698 + }, + { + "epoch": 0.020523812320159728, + "grad_norm": 0.0, + "learning_rate": 1.3679060665362035e-05, + "loss": 1.6162, + "step": 699 + }, + { + "epoch": 0.020553173997298727, + "grad_norm": 0.0, + "learning_rate": 1.3698630136986302e-05, + "loss": 1.5703, + "step": 700 + }, + { + "epoch": 0.020582535674437723, + "grad_norm": 0.0, + "learning_rate": 1.371819960861057e-05, + "loss": 1.4697, + "step": 701 + }, + { + "epoch": 0.020611897351576723, + "grad_norm": 0.0, + "learning_rate": 1.3737769080234834e-05, + "loss": 1.4902, + "step": 702 + }, + { + "epoch": 0.02064125902871572, + "grad_norm": 0.0, + "learning_rate": 1.37573385518591e-05, + "loss": 1.4492, + "step": 703 + }, + { + "epoch": 0.02067062070585472, + "grad_norm": 0.0, + "learning_rate": 1.3776908023483367e-05, + "loss": 1.4834, + "step": 704 + }, + { + "epoch": 0.020699982382993715, + "grad_norm": 0.0, + "learning_rate": 1.3796477495107633e-05, + "loss": 1.6055, + "step": 705 + }, + { + "epoch": 0.020729344060132715, + "grad_norm": 0.0, + "learning_rate": 1.38160469667319e-05, + "loss": 1.542, + "step": 706 + }, + { + "epoch": 0.02075870573727171, + "grad_norm": 0.0, + "learning_rate": 1.3835616438356164e-05, + "loss": 1.5039, + "step": 707 + }, + { + "epoch": 0.02078806741441071, + "grad_norm": 0.0, + "learning_rate": 1.385518590998043e-05, + "loss": 1.5742, + "step": 708 + }, + { + "epoch": 0.02081742909154971, + "grad_norm": 0.0, + "learning_rate": 1.3874755381604699e-05, + "loss": 1.5547, + "step": 709 + }, + { + "epoch": 0.020846790768688707, + "grad_norm": 0.0, + "learning_rate": 1.3894324853228964e-05, + "loss": 1.4004, + "step": 710 + }, + { + "epoch": 0.020876152445827707, + "grad_norm": 0.0, + "learning_rate": 1.391389432485323e-05, + "loss": 1.4434, + "step": 711 + }, + { + "epoch": 0.020905514122966703, + "grad_norm": 0.0, + "learning_rate": 1.3933463796477496e-05, + "loss": 1.5674, + "step": 712 + }, + { + "epoch": 0.020934875800105703, + "grad_norm": 0.0, + "learning_rate": 1.3953033268101763e-05, + "loss": 1.5566, + "step": 713 + }, + { + "epoch": 0.0209642374772447, + "grad_norm": 0.0, + "learning_rate": 1.3972602739726029e-05, + "loss": 1.6602, + "step": 714 + }, + { + "epoch": 0.0209935991543837, + "grad_norm": 0.0, + "learning_rate": 1.3992172211350295e-05, + "loss": 1.5391, + "step": 715 + }, + { + "epoch": 0.021022960831522698, + "grad_norm": 0.0, + "learning_rate": 1.401174168297456e-05, + "loss": 1.4658, + "step": 716 + }, + { + "epoch": 0.021052322508661694, + "grad_norm": 0.0, + "learning_rate": 1.4031311154598828e-05, + "loss": 1.4707, + "step": 717 + }, + { + "epoch": 0.021081684185800694, + "grad_norm": 0.0, + "learning_rate": 1.4050880626223093e-05, + "loss": 1.5303, + "step": 718 + }, + { + "epoch": 0.02111104586293969, + "grad_norm": 0.0, + "learning_rate": 1.4070450097847359e-05, + "loss": 1.5898, + "step": 719 + }, + { + "epoch": 0.02114040754007869, + "grad_norm": 0.0, + "learning_rate": 1.4090019569471625e-05, + "loss": 1.5215, + "step": 720 + }, + { + "epoch": 0.021169769217217686, + "grad_norm": 0.0, + "learning_rate": 1.4109589041095892e-05, + "loss": 1.5508, + "step": 721 + }, + { + "epoch": 0.021199130894356686, + "grad_norm": 0.0, + "learning_rate": 1.4129158512720158e-05, + "loss": 1.416, + "step": 722 + }, + { + "epoch": 0.021228492571495682, + "grad_norm": 0.0, + "learning_rate": 1.4148727984344424e-05, + "loss": 1.5635, + "step": 723 + }, + { + "epoch": 0.021257854248634682, + "grad_norm": 0.0, + "learning_rate": 1.4168297455968689e-05, + "loss": 1.5811, + "step": 724 + }, + { + "epoch": 0.02128721592577368, + "grad_norm": 0.0, + "learning_rate": 1.4187866927592957e-05, + "loss": 1.5273, + "step": 725 + }, + { + "epoch": 0.021316577602912678, + "grad_norm": 0.0, + "learning_rate": 1.4207436399217223e-05, + "loss": 1.4756, + "step": 726 + }, + { + "epoch": 0.021345939280051678, + "grad_norm": 0.0, + "learning_rate": 1.4227005870841488e-05, + "loss": 1.5068, + "step": 727 + }, + { + "epoch": 0.021375300957190674, + "grad_norm": 0.0, + "learning_rate": 1.4246575342465754e-05, + "loss": 1.5166, + "step": 728 + }, + { + "epoch": 0.021404662634329674, + "grad_norm": 0.0, + "learning_rate": 1.426614481409002e-05, + "loss": 1.4629, + "step": 729 + }, + { + "epoch": 0.02143402431146867, + "grad_norm": 0.0, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.5225, + "step": 730 + }, + { + "epoch": 0.02146338598860767, + "grad_norm": 0.0, + "learning_rate": 1.4305283757338553e-05, + "loss": 1.4619, + "step": 731 + }, + { + "epoch": 0.02149274766574667, + "grad_norm": 0.0, + "learning_rate": 1.4324853228962818e-05, + "loss": 1.5977, + "step": 732 + }, + { + "epoch": 0.021522109342885665, + "grad_norm": 0.0, + "learning_rate": 1.4344422700587086e-05, + "loss": 1.5479, + "step": 733 + }, + { + "epoch": 0.021551471020024665, + "grad_norm": 0.0, + "learning_rate": 1.4363992172211353e-05, + "loss": 1.5801, + "step": 734 + }, + { + "epoch": 0.02158083269716366, + "grad_norm": 0.0, + "learning_rate": 1.4383561643835617e-05, + "loss": 1.5293, + "step": 735 + }, + { + "epoch": 0.02161019437430266, + "grad_norm": 0.0, + "learning_rate": 1.4403131115459884e-05, + "loss": 1.4795, + "step": 736 + }, + { + "epoch": 0.021639556051441657, + "grad_norm": 0.0, + "learning_rate": 1.4422700587084152e-05, + "loss": 1.6416, + "step": 737 + }, + { + "epoch": 0.021668917728580657, + "grad_norm": 0.0, + "learning_rate": 1.4442270058708416e-05, + "loss": 1.6123, + "step": 738 + }, + { + "epoch": 0.021698279405719653, + "grad_norm": 0.0, + "learning_rate": 1.4461839530332683e-05, + "loss": 1.4961, + "step": 739 + }, + { + "epoch": 0.021727641082858653, + "grad_norm": 0.0, + "learning_rate": 1.4481409001956947e-05, + "loss": 1.5684, + "step": 740 + }, + { + "epoch": 0.021757002759997653, + "grad_norm": 0.0, + "learning_rate": 1.4500978473581215e-05, + "loss": 1.5469, + "step": 741 + }, + { + "epoch": 0.02178636443713665, + "grad_norm": 0.0, + "learning_rate": 1.4520547945205482e-05, + "loss": 1.4609, + "step": 742 + }, + { + "epoch": 0.02181572611427565, + "grad_norm": 0.0, + "learning_rate": 1.4540117416829746e-05, + "loss": 1.5205, + "step": 743 + }, + { + "epoch": 0.021845087791414645, + "grad_norm": 0.0, + "learning_rate": 1.4559686888454013e-05, + "loss": 1.626, + "step": 744 + }, + { + "epoch": 0.021874449468553644, + "grad_norm": 0.0, + "learning_rate": 1.457925636007828e-05, + "loss": 1.5537, + "step": 745 + }, + { + "epoch": 0.02190381114569264, + "grad_norm": 0.0, + "learning_rate": 1.4598825831702545e-05, + "loss": 1.4912, + "step": 746 + }, + { + "epoch": 0.02193317282283164, + "grad_norm": 0.0, + "learning_rate": 1.4618395303326812e-05, + "loss": 1.4736, + "step": 747 + }, + { + "epoch": 0.021962534499970637, + "grad_norm": 0.0, + "learning_rate": 1.4637964774951076e-05, + "loss": 1.4873, + "step": 748 + }, + { + "epoch": 0.021991896177109636, + "grad_norm": 0.0, + "learning_rate": 1.4657534246575344e-05, + "loss": 1.4775, + "step": 749 + }, + { + "epoch": 0.022021257854248636, + "grad_norm": 0.0, + "learning_rate": 1.467710371819961e-05, + "loss": 1.5859, + "step": 750 + }, + { + "epoch": 0.022050619531387632, + "grad_norm": 0.0, + "learning_rate": 1.4696673189823875e-05, + "loss": 1.543, + "step": 751 + }, + { + "epoch": 0.022079981208526632, + "grad_norm": 0.0, + "learning_rate": 1.4716242661448142e-05, + "loss": 1.458, + "step": 752 + }, + { + "epoch": 0.022109342885665628, + "grad_norm": 0.0, + "learning_rate": 1.473581213307241e-05, + "loss": 1.5723, + "step": 753 + }, + { + "epoch": 0.022138704562804628, + "grad_norm": 0.0, + "learning_rate": 1.4755381604696674e-05, + "loss": 1.4609, + "step": 754 + }, + { + "epoch": 0.022168066239943624, + "grad_norm": 0.0, + "learning_rate": 1.477495107632094e-05, + "loss": 1.6533, + "step": 755 + }, + { + "epoch": 0.022197427917082624, + "grad_norm": 0.0, + "learning_rate": 1.4794520547945205e-05, + "loss": 1.5391, + "step": 756 + }, + { + "epoch": 0.022226789594221624, + "grad_norm": 0.0, + "learning_rate": 1.4814090019569473e-05, + "loss": 1.542, + "step": 757 + }, + { + "epoch": 0.02225615127136062, + "grad_norm": 0.0, + "learning_rate": 1.483365949119374e-05, + "loss": 1.4082, + "step": 758 + }, + { + "epoch": 0.02228551294849962, + "grad_norm": 0.0, + "learning_rate": 1.4853228962818004e-05, + "loss": 1.6152, + "step": 759 + }, + { + "epoch": 0.022314874625638616, + "grad_norm": 0.0, + "learning_rate": 1.487279843444227e-05, + "loss": 1.5312, + "step": 760 + }, + { + "epoch": 0.022344236302777615, + "grad_norm": 0.0, + "learning_rate": 1.4892367906066539e-05, + "loss": 1.5605, + "step": 761 + }, + { + "epoch": 0.02237359797991661, + "grad_norm": 0.0, + "learning_rate": 1.4911937377690804e-05, + "loss": 1.4893, + "step": 762 + }, + { + "epoch": 0.02240295965705561, + "grad_norm": 0.0, + "learning_rate": 1.493150684931507e-05, + "loss": 1.4404, + "step": 763 + }, + { + "epoch": 0.022432321334194608, + "grad_norm": 0.0, + "learning_rate": 1.4951076320939335e-05, + "loss": 1.4141, + "step": 764 + }, + { + "epoch": 0.022461683011333607, + "grad_norm": 0.0, + "learning_rate": 1.4970645792563603e-05, + "loss": 1.5635, + "step": 765 + }, + { + "epoch": 0.022491044688472607, + "grad_norm": 0.0, + "learning_rate": 1.4990215264187869e-05, + "loss": 1.4814, + "step": 766 + }, + { + "epoch": 0.022520406365611603, + "grad_norm": 0.0, + "learning_rate": 1.5009784735812134e-05, + "loss": 1.5078, + "step": 767 + }, + { + "epoch": 0.022549768042750603, + "grad_norm": 0.0, + "learning_rate": 1.50293542074364e-05, + "loss": 1.502, + "step": 768 + }, + { + "epoch": 0.0225791297198896, + "grad_norm": 0.0, + "learning_rate": 1.5048923679060668e-05, + "loss": 1.4736, + "step": 769 + }, + { + "epoch": 0.0226084913970286, + "grad_norm": 0.0, + "learning_rate": 1.5068493150684933e-05, + "loss": 1.4785, + "step": 770 + }, + { + "epoch": 0.022637853074167595, + "grad_norm": 0.0, + "learning_rate": 1.5088062622309199e-05, + "loss": 1.5371, + "step": 771 + }, + { + "epoch": 0.022667214751306595, + "grad_norm": 0.0, + "learning_rate": 1.5107632093933464e-05, + "loss": 1.332, + "step": 772 + }, + { + "epoch": 0.022696576428445595, + "grad_norm": 0.0, + "learning_rate": 1.5127201565557732e-05, + "loss": 1.5908, + "step": 773 + }, + { + "epoch": 0.02272593810558459, + "grad_norm": 0.0, + "learning_rate": 1.5146771037181998e-05, + "loss": 1.5459, + "step": 774 + }, + { + "epoch": 0.02275529978272359, + "grad_norm": 0.0, + "learning_rate": 1.5166340508806263e-05, + "loss": 1.5176, + "step": 775 + }, + { + "epoch": 0.022784661459862587, + "grad_norm": 0.0, + "learning_rate": 1.5185909980430529e-05, + "loss": 1.5205, + "step": 776 + }, + { + "epoch": 0.022814023137001586, + "grad_norm": 0.0, + "learning_rate": 1.5205479452054797e-05, + "loss": 1.5195, + "step": 777 + }, + { + "epoch": 0.022843384814140583, + "grad_norm": 0.0, + "learning_rate": 1.5225048923679062e-05, + "loss": 1.627, + "step": 778 + }, + { + "epoch": 0.022872746491279582, + "grad_norm": 0.0, + "learning_rate": 1.5244618395303328e-05, + "loss": 1.5303, + "step": 779 + }, + { + "epoch": 0.02290210816841858, + "grad_norm": 0.0, + "learning_rate": 1.5264187866927593e-05, + "loss": 1.3945, + "step": 780 + }, + { + "epoch": 0.02293146984555758, + "grad_norm": 0.0, + "learning_rate": 1.5283757338551862e-05, + "loss": 1.5049, + "step": 781 + }, + { + "epoch": 0.022960831522696578, + "grad_norm": 0.0, + "learning_rate": 1.5303326810176125e-05, + "loss": 1.457, + "step": 782 + }, + { + "epoch": 0.022990193199835574, + "grad_norm": 0.0, + "learning_rate": 1.5322896281800392e-05, + "loss": 1.5439, + "step": 783 + }, + { + "epoch": 0.023019554876974574, + "grad_norm": 0.0, + "learning_rate": 1.5342465753424658e-05, + "loss": 1.4814, + "step": 784 + }, + { + "epoch": 0.02304891655411357, + "grad_norm": 0.0, + "learning_rate": 1.5362035225048924e-05, + "loss": 1.5908, + "step": 785 + }, + { + "epoch": 0.02307827823125257, + "grad_norm": 0.0, + "learning_rate": 1.538160469667319e-05, + "loss": 1.499, + "step": 786 + }, + { + "epoch": 0.023107639908391566, + "grad_norm": 0.0, + "learning_rate": 1.5401174168297457e-05, + "loss": 1.6504, + "step": 787 + }, + { + "epoch": 0.023137001585530566, + "grad_norm": 0.0, + "learning_rate": 1.5420743639921723e-05, + "loss": 1.4941, + "step": 788 + }, + { + "epoch": 0.023166363262669562, + "grad_norm": 0.0, + "learning_rate": 1.544031311154599e-05, + "loss": 1.5244, + "step": 789 + }, + { + "epoch": 0.02319572493980856, + "grad_norm": 0.0, + "learning_rate": 1.5459882583170256e-05, + "loss": 1.4795, + "step": 790 + }, + { + "epoch": 0.02322508661694756, + "grad_norm": 0.0, + "learning_rate": 1.5479452054794523e-05, + "loss": 1.4883, + "step": 791 + }, + { + "epoch": 0.023254448294086558, + "grad_norm": 0.0, + "learning_rate": 1.5499021526418785e-05, + "loss": 1.5703, + "step": 792 + }, + { + "epoch": 0.023283809971225557, + "grad_norm": 0.0, + "learning_rate": 1.5518590998043055e-05, + "loss": 1.5723, + "step": 793 + }, + { + "epoch": 0.023313171648364554, + "grad_norm": 0.0, + "learning_rate": 1.553816046966732e-05, + "loss": 1.5762, + "step": 794 + }, + { + "epoch": 0.023342533325503553, + "grad_norm": 0.0, + "learning_rate": 1.5557729941291585e-05, + "loss": 1.5723, + "step": 795 + }, + { + "epoch": 0.02337189500264255, + "grad_norm": 0.0, + "learning_rate": 1.557729941291585e-05, + "loss": 1.5312, + "step": 796 + }, + { + "epoch": 0.02340125667978155, + "grad_norm": 0.0, + "learning_rate": 1.559686888454012e-05, + "loss": 1.5, + "step": 797 + }, + { + "epoch": 0.02343061835692055, + "grad_norm": 0.0, + "learning_rate": 1.5616438356164384e-05, + "loss": 1.6143, + "step": 798 + }, + { + "epoch": 0.023459980034059545, + "grad_norm": 0.0, + "learning_rate": 1.563600782778865e-05, + "loss": 1.6416, + "step": 799 + }, + { + "epoch": 0.023489341711198545, + "grad_norm": 0.0, + "learning_rate": 1.5655577299412916e-05, + "loss": 1.6514, + "step": 800 + }, + { + "epoch": 0.02351870338833754, + "grad_norm": 0.0, + "learning_rate": 1.5675146771037183e-05, + "loss": 1.4395, + "step": 801 + }, + { + "epoch": 0.02354806506547654, + "grad_norm": 0.0, + "learning_rate": 1.569471624266145e-05, + "loss": 1.5947, + "step": 802 + }, + { + "epoch": 0.023577426742615537, + "grad_norm": 0.0, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.415, + "step": 803 + }, + { + "epoch": 0.023606788419754537, + "grad_norm": 0.0, + "learning_rate": 1.573385518590998e-05, + "loss": 1.5264, + "step": 804 + }, + { + "epoch": 0.023636150096893533, + "grad_norm": 0.0, + "learning_rate": 1.5753424657534248e-05, + "loss": 1.6719, + "step": 805 + }, + { + "epoch": 0.023665511774032533, + "grad_norm": 0.0, + "learning_rate": 1.5772994129158514e-05, + "loss": 1.6123, + "step": 806 + }, + { + "epoch": 0.023694873451171532, + "grad_norm": 0.0, + "learning_rate": 1.579256360078278e-05, + "loss": 1.6582, + "step": 807 + }, + { + "epoch": 0.02372423512831053, + "grad_norm": 0.0, + "learning_rate": 1.5812133072407047e-05, + "loss": 1.501, + "step": 808 + }, + { + "epoch": 0.02375359680544953, + "grad_norm": 0.0, + "learning_rate": 1.5831702544031313e-05, + "loss": 1.4775, + "step": 809 + }, + { + "epoch": 0.023782958482588525, + "grad_norm": 0.0, + "learning_rate": 1.585127201565558e-05, + "loss": 1.6055, + "step": 810 + }, + { + "epoch": 0.023812320159727524, + "grad_norm": 0.0, + "learning_rate": 1.5870841487279843e-05, + "loss": 1.582, + "step": 811 + }, + { + "epoch": 0.02384168183686652, + "grad_norm": 0.0, + "learning_rate": 1.589041095890411e-05, + "loss": 1.5156, + "step": 812 + }, + { + "epoch": 0.02387104351400552, + "grad_norm": 0.0, + "learning_rate": 1.590998043052838e-05, + "loss": 1.6768, + "step": 813 + }, + { + "epoch": 0.023900405191144516, + "grad_norm": 0.0, + "learning_rate": 1.5929549902152642e-05, + "loss": 1.5518, + "step": 814 + }, + { + "epoch": 0.023929766868283516, + "grad_norm": 0.0, + "learning_rate": 1.5949119373776908e-05, + "loss": 1.5781, + "step": 815 + }, + { + "epoch": 0.023959128545422516, + "grad_norm": 0.0, + "learning_rate": 1.5968688845401174e-05, + "loss": 1.5615, + "step": 816 + }, + { + "epoch": 0.023988490222561512, + "grad_norm": 0.0, + "learning_rate": 1.598825831702544e-05, + "loss": 1.5381, + "step": 817 + }, + { + "epoch": 0.024017851899700512, + "grad_norm": 0.0, + "learning_rate": 1.6007827788649707e-05, + "loss": 1.5674, + "step": 818 + }, + { + "epoch": 0.024047213576839508, + "grad_norm": 0.0, + "learning_rate": 1.6027397260273974e-05, + "loss": 1.5488, + "step": 819 + }, + { + "epoch": 0.024076575253978508, + "grad_norm": 0.0, + "learning_rate": 1.604696673189824e-05, + "loss": 1.6211, + "step": 820 + }, + { + "epoch": 0.024105936931117504, + "grad_norm": 0.0, + "learning_rate": 1.6066536203522506e-05, + "loss": 1.457, + "step": 821 + }, + { + "epoch": 0.024135298608256504, + "grad_norm": 0.0, + "learning_rate": 1.6086105675146773e-05, + "loss": 1.5078, + "step": 822 + }, + { + "epoch": 0.024164660285395503, + "grad_norm": 0.0, + "learning_rate": 1.610567514677104e-05, + "loss": 1.4141, + "step": 823 + }, + { + "epoch": 0.0241940219625345, + "grad_norm": 0.0, + "learning_rate": 1.6125244618395305e-05, + "loss": 1.6084, + "step": 824 + }, + { + "epoch": 0.0242233836396735, + "grad_norm": 0.0, + "learning_rate": 1.614481409001957e-05, + "loss": 1.6279, + "step": 825 + }, + { + "epoch": 0.024252745316812496, + "grad_norm": 0.0, + "learning_rate": 1.6164383561643838e-05, + "loss": 1.5283, + "step": 826 + }, + { + "epoch": 0.024282106993951495, + "grad_norm": 0.0, + "learning_rate": 1.6183953033268104e-05, + "loss": 1.3955, + "step": 827 + }, + { + "epoch": 0.02431146867109049, + "grad_norm": 0.0, + "learning_rate": 1.6203522504892367e-05, + "loss": 1.5566, + "step": 828 + }, + { + "epoch": 0.02434083034822949, + "grad_norm": 0.0, + "learning_rate": 1.6223091976516637e-05, + "loss": 1.5791, + "step": 829 + }, + { + "epoch": 0.024370192025368487, + "grad_norm": 0.0, + "learning_rate": 1.6242661448140903e-05, + "loss": 1.4502, + "step": 830 + }, + { + "epoch": 0.024399553702507487, + "grad_norm": 0.0, + "learning_rate": 1.6262230919765166e-05, + "loss": 1.6123, + "step": 831 + }, + { + "epoch": 0.024428915379646487, + "grad_norm": 0.0, + "learning_rate": 1.6281800391389433e-05, + "loss": 1.54, + "step": 832 + }, + { + "epoch": 0.024458277056785483, + "grad_norm": 0.0, + "learning_rate": 1.6301369863013702e-05, + "loss": 1.583, + "step": 833 + }, + { + "epoch": 0.024487638733924483, + "grad_norm": 0.0, + "learning_rate": 1.6320939334637965e-05, + "loss": 1.4883, + "step": 834 + }, + { + "epoch": 0.02451700041106348, + "grad_norm": 0.0, + "learning_rate": 1.6340508806262232e-05, + "loss": 1.5469, + "step": 835 + }, + { + "epoch": 0.02454636208820248, + "grad_norm": 0.0, + "learning_rate": 1.6360078277886498e-05, + "loss": 1.624, + "step": 836 + }, + { + "epoch": 0.024575723765341475, + "grad_norm": 0.0, + "learning_rate": 1.6379647749510764e-05, + "loss": 1.5498, + "step": 837 + }, + { + "epoch": 0.024605085442480475, + "grad_norm": 0.0, + "learning_rate": 1.639921722113503e-05, + "loss": 1.4844, + "step": 838 + }, + { + "epoch": 0.024634447119619474, + "grad_norm": 0.0, + "learning_rate": 1.6418786692759297e-05, + "loss": 1.4434, + "step": 839 + }, + { + "epoch": 0.02466380879675847, + "grad_norm": 0.0, + "learning_rate": 1.6438356164383563e-05, + "loss": 1.5732, + "step": 840 + }, + { + "epoch": 0.02469317047389747, + "grad_norm": 0.0, + "learning_rate": 1.645792563600783e-05, + "loss": 1.6133, + "step": 841 + }, + { + "epoch": 0.024722532151036467, + "grad_norm": 0.0, + "learning_rate": 1.6477495107632096e-05, + "loss": 1.5703, + "step": 842 + }, + { + "epoch": 0.024751893828175466, + "grad_norm": 0.0, + "learning_rate": 1.6497064579256362e-05, + "loss": 1.5107, + "step": 843 + }, + { + "epoch": 0.024781255505314462, + "grad_norm": 0.0, + "learning_rate": 1.6516634050880625e-05, + "loss": 1.5449, + "step": 844 + }, + { + "epoch": 0.024810617182453462, + "grad_norm": 0.0, + "learning_rate": 1.6536203522504895e-05, + "loss": 1.5361, + "step": 845 + }, + { + "epoch": 0.02483997885959246, + "grad_norm": 0.0, + "learning_rate": 1.655577299412916e-05, + "loss": 1.4863, + "step": 846 + }, + { + "epoch": 0.024869340536731458, + "grad_norm": 0.0, + "learning_rate": 1.6575342465753425e-05, + "loss": 1.5293, + "step": 847 + }, + { + "epoch": 0.024898702213870458, + "grad_norm": 0.0, + "learning_rate": 1.659491193737769e-05, + "loss": 1.4678, + "step": 848 + }, + { + "epoch": 0.024928063891009454, + "grad_norm": 0.0, + "learning_rate": 1.661448140900196e-05, + "loss": 1.5928, + "step": 849 + }, + { + "epoch": 0.024957425568148454, + "grad_norm": 0.0, + "learning_rate": 1.6634050880626224e-05, + "loss": 1.5068, + "step": 850 + }, + { + "epoch": 0.02498678724528745, + "grad_norm": 0.0, + "learning_rate": 1.665362035225049e-05, + "loss": 1.5293, + "step": 851 + }, + { + "epoch": 0.02501614892242645, + "grad_norm": 0.0, + "learning_rate": 1.6673189823874756e-05, + "loss": 1.5771, + "step": 852 + }, + { + "epoch": 0.025045510599565446, + "grad_norm": 0.0, + "learning_rate": 1.6692759295499023e-05, + "loss": 1.4922, + "step": 853 + }, + { + "epoch": 0.025074872276704446, + "grad_norm": 0.0, + "learning_rate": 1.671232876712329e-05, + "loss": 1.5459, + "step": 854 + }, + { + "epoch": 0.025104233953843442, + "grad_norm": 0.0, + "learning_rate": 1.6731898238747555e-05, + "loss": 1.5869, + "step": 855 + }, + { + "epoch": 0.02513359563098244, + "grad_norm": 0.0, + "learning_rate": 1.675146771037182e-05, + "loss": 1.5264, + "step": 856 + }, + { + "epoch": 0.02516295730812144, + "grad_norm": 0.0, + "learning_rate": 1.6771037181996088e-05, + "loss": 1.4648, + "step": 857 + }, + { + "epoch": 0.025192318985260437, + "grad_norm": 0.0, + "learning_rate": 1.6790606653620354e-05, + "loss": 1.4805, + "step": 858 + }, + { + "epoch": 0.025221680662399437, + "grad_norm": 0.0, + "learning_rate": 1.681017612524462e-05, + "loss": 1.5117, + "step": 859 + }, + { + "epoch": 0.025251042339538433, + "grad_norm": 0.0, + "learning_rate": 1.6829745596868884e-05, + "loss": 1.4814, + "step": 860 + }, + { + "epoch": 0.025280404016677433, + "grad_norm": 0.0, + "learning_rate": 1.6849315068493153e-05, + "loss": 1.4961, + "step": 861 + }, + { + "epoch": 0.02530976569381643, + "grad_norm": 0.0, + "learning_rate": 1.686888454011742e-05, + "loss": 1.543, + "step": 862 + }, + { + "epoch": 0.02533912737095543, + "grad_norm": 0.0, + "learning_rate": 1.6888454011741683e-05, + "loss": 1.5293, + "step": 863 + }, + { + "epoch": 0.02536848904809443, + "grad_norm": 0.0, + "learning_rate": 1.690802348336595e-05, + "loss": 1.624, + "step": 864 + }, + { + "epoch": 0.025397850725233425, + "grad_norm": 0.0, + "learning_rate": 1.692759295499022e-05, + "loss": 1.4795, + "step": 865 + }, + { + "epoch": 0.025427212402372425, + "grad_norm": 0.0, + "learning_rate": 1.6947162426614482e-05, + "loss": 1.4541, + "step": 866 + }, + { + "epoch": 0.02545657407951142, + "grad_norm": 0.0, + "learning_rate": 1.6966731898238748e-05, + "loss": 1.585, + "step": 867 + }, + { + "epoch": 0.02548593575665042, + "grad_norm": 0.0, + "learning_rate": 1.6986301369863014e-05, + "loss": 1.5391, + "step": 868 + }, + { + "epoch": 0.025515297433789417, + "grad_norm": 0.0, + "learning_rate": 1.700587084148728e-05, + "loss": 1.6416, + "step": 869 + }, + { + "epoch": 0.025544659110928417, + "grad_norm": 0.0, + "learning_rate": 1.7025440313111547e-05, + "loss": 1.4795, + "step": 870 + }, + { + "epoch": 0.025574020788067413, + "grad_norm": 0.0, + "learning_rate": 1.7045009784735813e-05, + "loss": 1.5332, + "step": 871 + }, + { + "epoch": 0.025603382465206413, + "grad_norm": 0.0, + "learning_rate": 1.706457925636008e-05, + "loss": 1.5137, + "step": 872 + }, + { + "epoch": 0.025632744142345412, + "grad_norm": 0.0, + "learning_rate": 1.7084148727984346e-05, + "loss": 1.6562, + "step": 873 + }, + { + "epoch": 0.02566210581948441, + "grad_norm": 0.0, + "learning_rate": 1.7103718199608613e-05, + "loss": 1.5049, + "step": 874 + }, + { + "epoch": 0.025691467496623408, + "grad_norm": 0.0, + "learning_rate": 1.712328767123288e-05, + "loss": 1.4189, + "step": 875 + }, + { + "epoch": 0.025720829173762404, + "grad_norm": 0.0, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.4961, + "step": 876 + }, + { + "epoch": 0.025750190850901404, + "grad_norm": 0.0, + "learning_rate": 1.716242661448141e-05, + "loss": 1.6777, + "step": 877 + }, + { + "epoch": 0.0257795525280404, + "grad_norm": 0.0, + "learning_rate": 1.7181996086105678e-05, + "loss": 1.6348, + "step": 878 + }, + { + "epoch": 0.0258089142051794, + "grad_norm": 0.0, + "learning_rate": 1.720156555772994e-05, + "loss": 1.5957, + "step": 879 + }, + { + "epoch": 0.0258382758823184, + "grad_norm": 0.0, + "learning_rate": 1.7221135029354207e-05, + "loss": 1.5967, + "step": 880 + }, + { + "epoch": 0.025867637559457396, + "grad_norm": 0.0, + "learning_rate": 1.7240704500978477e-05, + "loss": 1.5342, + "step": 881 + }, + { + "epoch": 0.025896999236596396, + "grad_norm": 0.0, + "learning_rate": 1.726027397260274e-05, + "loss": 1.54, + "step": 882 + }, + { + "epoch": 0.025926360913735392, + "grad_norm": 0.0, + "learning_rate": 1.7279843444227006e-05, + "loss": 1.5234, + "step": 883 + }, + { + "epoch": 0.02595572259087439, + "grad_norm": 0.0, + "learning_rate": 1.7299412915851273e-05, + "loss": 1.5068, + "step": 884 + }, + { + "epoch": 0.025985084268013388, + "grad_norm": 0.0, + "learning_rate": 1.731898238747554e-05, + "loss": 1.5537, + "step": 885 + }, + { + "epoch": 0.026014445945152388, + "grad_norm": 0.0, + "learning_rate": 1.7338551859099805e-05, + "loss": 1.4893, + "step": 886 + }, + { + "epoch": 0.026043807622291384, + "grad_norm": 0.0, + "learning_rate": 1.735812133072407e-05, + "loss": 1.6504, + "step": 887 + }, + { + "epoch": 0.026073169299430383, + "grad_norm": 0.0, + "learning_rate": 1.7377690802348338e-05, + "loss": 1.6133, + "step": 888 + }, + { + "epoch": 0.026102530976569383, + "grad_norm": 0.0, + "learning_rate": 1.7397260273972604e-05, + "loss": 1.5244, + "step": 889 + }, + { + "epoch": 0.02613189265370838, + "grad_norm": 0.0, + "learning_rate": 1.741682974559687e-05, + "loss": 1.6777, + "step": 890 + }, + { + "epoch": 0.02616125433084738, + "grad_norm": 0.0, + "learning_rate": 1.7436399217221137e-05, + "loss": 1.3857, + "step": 891 + }, + { + "epoch": 0.026190616007986375, + "grad_norm": 0.0, + "learning_rate": 1.74559686888454e-05, + "loss": 1.5342, + "step": 892 + }, + { + "epoch": 0.026219977685125375, + "grad_norm": 0.0, + "learning_rate": 1.747553816046967e-05, + "loss": 1.5332, + "step": 893 + }, + { + "epoch": 0.02624933936226437, + "grad_norm": 0.0, + "learning_rate": 1.7495107632093936e-05, + "loss": 1.5684, + "step": 894 + }, + { + "epoch": 0.02627870103940337, + "grad_norm": 0.0, + "learning_rate": 1.75146771037182e-05, + "loss": 1.498, + "step": 895 + }, + { + "epoch": 0.026308062716542367, + "grad_norm": 0.0, + "learning_rate": 1.7534246575342465e-05, + "loss": 1.4219, + "step": 896 + }, + { + "epoch": 0.026337424393681367, + "grad_norm": 0.0, + "learning_rate": 1.7553816046966735e-05, + "loss": 1.6514, + "step": 897 + }, + { + "epoch": 0.026366786070820367, + "grad_norm": 0.0, + "learning_rate": 1.7573385518590998e-05, + "loss": 1.4424, + "step": 898 + }, + { + "epoch": 0.026396147747959363, + "grad_norm": 0.0, + "learning_rate": 1.7592954990215264e-05, + "loss": 1.5713, + "step": 899 + }, + { + "epoch": 0.026425509425098363, + "grad_norm": 0.0, + "learning_rate": 1.761252446183953e-05, + "loss": 1.4766, + "step": 900 + }, + { + "epoch": 0.02645487110223736, + "grad_norm": 0.0, + "learning_rate": 1.7632093933463797e-05, + "loss": 1.6465, + "step": 901 + }, + { + "epoch": 0.02648423277937636, + "grad_norm": 0.0, + "learning_rate": 1.7651663405088064e-05, + "loss": 1.499, + "step": 902 + }, + { + "epoch": 0.026513594456515355, + "grad_norm": 0.0, + "learning_rate": 1.767123287671233e-05, + "loss": 1.5049, + "step": 903 + }, + { + "epoch": 0.026542956133654354, + "grad_norm": 0.0, + "learning_rate": 1.7690802348336596e-05, + "loss": 1.5498, + "step": 904 + }, + { + "epoch": 0.026572317810793354, + "grad_norm": 0.0, + "learning_rate": 1.7710371819960863e-05, + "loss": 1.5225, + "step": 905 + }, + { + "epoch": 0.02660167948793235, + "grad_norm": 0.0, + "learning_rate": 1.772994129158513e-05, + "loss": 1.5801, + "step": 906 + }, + { + "epoch": 0.02663104116507135, + "grad_norm": 0.0, + "learning_rate": 1.7749510763209395e-05, + "loss": 1.627, + "step": 907 + }, + { + "epoch": 0.026660402842210346, + "grad_norm": 0.0, + "learning_rate": 1.776908023483366e-05, + "loss": 1.6455, + "step": 908 + }, + { + "epoch": 0.026689764519349346, + "grad_norm": 0.0, + "learning_rate": 1.7788649706457928e-05, + "loss": 1.5459, + "step": 909 + }, + { + "epoch": 0.026719126196488342, + "grad_norm": 0.0, + "learning_rate": 1.7808219178082194e-05, + "loss": 1.4883, + "step": 910 + }, + { + "epoch": 0.026748487873627342, + "grad_norm": 0.0, + "learning_rate": 1.7827788649706457e-05, + "loss": 1.5254, + "step": 911 + }, + { + "epoch": 0.026777849550766338, + "grad_norm": 0.0, + "learning_rate": 1.7847358121330724e-05, + "loss": 1.5605, + "step": 912 + }, + { + "epoch": 0.026807211227905338, + "grad_norm": 0.0, + "learning_rate": 1.7866927592954993e-05, + "loss": 1.6172, + "step": 913 + }, + { + "epoch": 0.026836572905044338, + "grad_norm": 0.0, + "learning_rate": 1.7886497064579256e-05, + "loss": 1.4756, + "step": 914 + }, + { + "epoch": 0.026865934582183334, + "grad_norm": 0.0, + "learning_rate": 1.7906066536203523e-05, + "loss": 1.6504, + "step": 915 + }, + { + "epoch": 0.026895296259322334, + "grad_norm": 0.0, + "learning_rate": 1.792563600782779e-05, + "loss": 1.5762, + "step": 916 + }, + { + "epoch": 0.02692465793646133, + "grad_norm": 0.0, + "learning_rate": 1.7945205479452055e-05, + "loss": 1.6562, + "step": 917 + }, + { + "epoch": 0.02695401961360033, + "grad_norm": 0.0, + "learning_rate": 1.7964774951076322e-05, + "loss": 1.502, + "step": 918 + }, + { + "epoch": 0.026983381290739326, + "grad_norm": 0.0, + "learning_rate": 1.7984344422700588e-05, + "loss": 1.5352, + "step": 919 + }, + { + "epoch": 0.027012742967878325, + "grad_norm": 0.0, + "learning_rate": 1.8003913894324854e-05, + "loss": 1.4424, + "step": 920 + }, + { + "epoch": 0.027042104645017325, + "grad_norm": 0.0, + "learning_rate": 1.802348336594912e-05, + "loss": 1.5693, + "step": 921 + }, + { + "epoch": 0.02707146632215632, + "grad_norm": 0.0, + "learning_rate": 1.8043052837573387e-05, + "loss": 1.5352, + "step": 922 + }, + { + "epoch": 0.02710082799929532, + "grad_norm": 0.0, + "learning_rate": 1.8062622309197653e-05, + "loss": 1.5703, + "step": 923 + }, + { + "epoch": 0.027130189676434317, + "grad_norm": 0.0, + "learning_rate": 1.808219178082192e-05, + "loss": 1.6367, + "step": 924 + }, + { + "epoch": 0.027159551353573317, + "grad_norm": 0.0, + "learning_rate": 1.8101761252446186e-05, + "loss": 1.583, + "step": 925 + }, + { + "epoch": 0.027188913030712313, + "grad_norm": 0.0, + "learning_rate": 1.8121330724070452e-05, + "loss": 1.6445, + "step": 926 + }, + { + "epoch": 0.027218274707851313, + "grad_norm": 0.0, + "learning_rate": 1.814090019569472e-05, + "loss": 1.5576, + "step": 927 + }, + { + "epoch": 0.02724763638499031, + "grad_norm": 0.0, + "learning_rate": 1.8160469667318982e-05, + "loss": 1.6123, + "step": 928 + }, + { + "epoch": 0.02727699806212931, + "grad_norm": 0.0, + "learning_rate": 1.818003913894325e-05, + "loss": 1.6152, + "step": 929 + }, + { + "epoch": 0.02730635973926831, + "grad_norm": 0.0, + "learning_rate": 1.8199608610567518e-05, + "loss": 1.4873, + "step": 930 + }, + { + "epoch": 0.027335721416407305, + "grad_norm": 0.0, + "learning_rate": 1.821917808219178e-05, + "loss": 1.4932, + "step": 931 + }, + { + "epoch": 0.027365083093546304, + "grad_norm": 0.0, + "learning_rate": 1.8238747553816047e-05, + "loss": 1.5361, + "step": 932 + }, + { + "epoch": 0.0273944447706853, + "grad_norm": 0.0, + "learning_rate": 1.8258317025440317e-05, + "loss": 1.6943, + "step": 933 + }, + { + "epoch": 0.0274238064478243, + "grad_norm": 0.0, + "learning_rate": 1.827788649706458e-05, + "loss": 1.5342, + "step": 934 + }, + { + "epoch": 0.027453168124963297, + "grad_norm": 0.0, + "learning_rate": 1.8297455968688846e-05, + "loss": 1.5996, + "step": 935 + }, + { + "epoch": 0.027482529802102296, + "grad_norm": 0.0, + "learning_rate": 1.8317025440313113e-05, + "loss": 1.5322, + "step": 936 + }, + { + "epoch": 0.027511891479241293, + "grad_norm": 0.0, + "learning_rate": 1.833659491193738e-05, + "loss": 1.5312, + "step": 937 + }, + { + "epoch": 0.027541253156380292, + "grad_norm": 0.0, + "learning_rate": 1.8356164383561645e-05, + "loss": 1.8447, + "step": 938 + }, + { + "epoch": 0.027570614833519292, + "grad_norm": 0.0, + "learning_rate": 1.837573385518591e-05, + "loss": 1.499, + "step": 939 + }, + { + "epoch": 0.027599976510658288, + "grad_norm": 0.0, + "learning_rate": 1.8395303326810178e-05, + "loss": 1.5693, + "step": 940 + }, + { + "epoch": 0.027629338187797288, + "grad_norm": 0.0, + "learning_rate": 1.8414872798434444e-05, + "loss": 1.4648, + "step": 941 + }, + { + "epoch": 0.027658699864936284, + "grad_norm": 0.0, + "learning_rate": 1.843444227005871e-05, + "loss": 1.6211, + "step": 942 + }, + { + "epoch": 0.027688061542075284, + "grad_norm": 0.0, + "learning_rate": 1.8454011741682977e-05, + "loss": 1.4951, + "step": 943 + }, + { + "epoch": 0.02771742321921428, + "grad_norm": 0.0, + "learning_rate": 1.847358121330724e-05, + "loss": 1.582, + "step": 944 + }, + { + "epoch": 0.02774678489635328, + "grad_norm": 0.0, + "learning_rate": 1.849315068493151e-05, + "loss": 1.4932, + "step": 945 + }, + { + "epoch": 0.02777614657349228, + "grad_norm": 0.0, + "learning_rate": 1.8512720156555776e-05, + "loss": 1.5547, + "step": 946 + }, + { + "epoch": 0.027805508250631276, + "grad_norm": 0.0, + "learning_rate": 1.853228962818004e-05, + "loss": 1.5605, + "step": 947 + }, + { + "epoch": 0.027834869927770275, + "grad_norm": 0.0, + "learning_rate": 1.8551859099804305e-05, + "loss": 1.4062, + "step": 948 + }, + { + "epoch": 0.02786423160490927, + "grad_norm": 0.0, + "learning_rate": 1.8571428571428575e-05, + "loss": 1.4932, + "step": 949 + }, + { + "epoch": 0.02789359328204827, + "grad_norm": 0.0, + "learning_rate": 1.8590998043052838e-05, + "loss": 1.5078, + "step": 950 + }, + { + "epoch": 0.027922954959187268, + "grad_norm": 0.0, + "learning_rate": 1.8610567514677104e-05, + "loss": 1.6133, + "step": 951 + }, + { + "epoch": 0.027952316636326267, + "grad_norm": 0.0, + "learning_rate": 1.863013698630137e-05, + "loss": 1.5117, + "step": 952 + }, + { + "epoch": 0.027981678313465264, + "grad_norm": 0.0, + "learning_rate": 1.8649706457925637e-05, + "loss": 1.5312, + "step": 953 + }, + { + "epoch": 0.028011039990604263, + "grad_norm": 0.0, + "learning_rate": 1.8669275929549903e-05, + "loss": 1.5371, + "step": 954 + }, + { + "epoch": 0.028040401667743263, + "grad_norm": 0.0, + "learning_rate": 1.868884540117417e-05, + "loss": 1.4707, + "step": 955 + }, + { + "epoch": 0.02806976334488226, + "grad_norm": 0.0, + "learning_rate": 1.8708414872798436e-05, + "loss": 1.582, + "step": 956 + }, + { + "epoch": 0.02809912502202126, + "grad_norm": 0.0, + "learning_rate": 1.8727984344422703e-05, + "loss": 1.3301, + "step": 957 + }, + { + "epoch": 0.028128486699160255, + "grad_norm": 0.0, + "learning_rate": 1.874755381604697e-05, + "loss": 1.6631, + "step": 958 + }, + { + "epoch": 0.028157848376299255, + "grad_norm": 0.0, + "learning_rate": 1.8767123287671235e-05, + "loss": 1.5342, + "step": 959 + }, + { + "epoch": 0.02818721005343825, + "grad_norm": 0.0, + "learning_rate": 1.8786692759295498e-05, + "loss": 1.5186, + "step": 960 + }, + { + "epoch": 0.02821657173057725, + "grad_norm": 0.0, + "learning_rate": 1.8806262230919768e-05, + "loss": 1.5303, + "step": 961 + }, + { + "epoch": 0.028245933407716247, + "grad_norm": 0.0, + "learning_rate": 1.8825831702544034e-05, + "loss": 1.6162, + "step": 962 + }, + { + "epoch": 0.028275295084855247, + "grad_norm": 0.0, + "learning_rate": 1.8845401174168297e-05, + "loss": 1.6035, + "step": 963 + }, + { + "epoch": 0.028304656761994246, + "grad_norm": 0.0, + "learning_rate": 1.8864970645792564e-05, + "loss": 1.6172, + "step": 964 + }, + { + "epoch": 0.028334018439133243, + "grad_norm": 0.0, + "learning_rate": 1.8884540117416833e-05, + "loss": 1.5986, + "step": 965 + }, + { + "epoch": 0.028363380116272242, + "grad_norm": 0.0, + "learning_rate": 1.8904109589041096e-05, + "loss": 1.6377, + "step": 966 + }, + { + "epoch": 0.02839274179341124, + "grad_norm": 0.0, + "learning_rate": 1.8923679060665363e-05, + "loss": 1.5186, + "step": 967 + }, + { + "epoch": 0.02842210347055024, + "grad_norm": 0.0, + "learning_rate": 1.894324853228963e-05, + "loss": 1.6113, + "step": 968 + }, + { + "epoch": 0.028451465147689235, + "grad_norm": 0.0, + "learning_rate": 1.8962818003913895e-05, + "loss": 1.5664, + "step": 969 + }, + { + "epoch": 0.028480826824828234, + "grad_norm": 0.0, + "learning_rate": 1.898238747553816e-05, + "loss": 1.6602, + "step": 970 + }, + { + "epoch": 0.028510188501967234, + "grad_norm": 0.0, + "learning_rate": 1.9001956947162428e-05, + "loss": 1.6436, + "step": 971 + }, + { + "epoch": 0.02853955017910623, + "grad_norm": 0.0, + "learning_rate": 1.9021526418786694e-05, + "loss": 1.5645, + "step": 972 + }, + { + "epoch": 0.02856891185624523, + "grad_norm": 0.0, + "learning_rate": 1.904109589041096e-05, + "loss": 1.6348, + "step": 973 + }, + { + "epoch": 0.028598273533384226, + "grad_norm": 0.0, + "learning_rate": 1.9060665362035227e-05, + "loss": 1.6221, + "step": 974 + }, + { + "epoch": 0.028627635210523226, + "grad_norm": 0.0, + "learning_rate": 1.9080234833659493e-05, + "loss": 1.5732, + "step": 975 + }, + { + "epoch": 0.028656996887662222, + "grad_norm": 0.0, + "learning_rate": 1.9099804305283756e-05, + "loss": 1.5615, + "step": 976 + }, + { + "epoch": 0.028686358564801222, + "grad_norm": 0.0, + "learning_rate": 1.9119373776908026e-05, + "loss": 1.5186, + "step": 977 + }, + { + "epoch": 0.028715720241940218, + "grad_norm": 0.0, + "learning_rate": 1.9138943248532292e-05, + "loss": 1.584, + "step": 978 + }, + { + "epoch": 0.028745081919079218, + "grad_norm": 0.0, + "learning_rate": 1.9158512720156555e-05, + "loss": 1.5674, + "step": 979 + }, + { + "epoch": 0.028774443596218217, + "grad_norm": 0.0, + "learning_rate": 1.9178082191780822e-05, + "loss": 1.4619, + "step": 980 + }, + { + "epoch": 0.028803805273357214, + "grad_norm": 0.0, + "learning_rate": 1.919765166340509e-05, + "loss": 1.5449, + "step": 981 + }, + { + "epoch": 0.028833166950496213, + "grad_norm": 0.0, + "learning_rate": 1.9217221135029354e-05, + "loss": 1.4912, + "step": 982 + }, + { + "epoch": 0.02886252862763521, + "grad_norm": 0.0, + "learning_rate": 1.923679060665362e-05, + "loss": 1.5332, + "step": 983 + }, + { + "epoch": 0.02889189030477421, + "grad_norm": 0.0, + "learning_rate": 1.9256360078277887e-05, + "loss": 1.5605, + "step": 984 + }, + { + "epoch": 0.028921251981913206, + "grad_norm": 0.0, + "learning_rate": 1.9275929549902154e-05, + "loss": 1.5859, + "step": 985 + }, + { + "epoch": 0.028950613659052205, + "grad_norm": 0.0, + "learning_rate": 1.929549902152642e-05, + "loss": 1.5264, + "step": 986 + }, + { + "epoch": 0.028979975336191205, + "grad_norm": 0.0, + "learning_rate": 1.9315068493150686e-05, + "loss": 1.4805, + "step": 987 + }, + { + "epoch": 0.0290093370133302, + "grad_norm": 0.0, + "learning_rate": 1.9334637964774953e-05, + "loss": 1.5039, + "step": 988 + }, + { + "epoch": 0.0290386986904692, + "grad_norm": 0.0, + "learning_rate": 1.935420743639922e-05, + "loss": 1.5664, + "step": 989 + }, + { + "epoch": 0.029068060367608197, + "grad_norm": 0.0, + "learning_rate": 1.9373776908023485e-05, + "loss": 1.5615, + "step": 990 + }, + { + "epoch": 0.029097422044747197, + "grad_norm": 0.0, + "learning_rate": 1.939334637964775e-05, + "loss": 1.5605, + "step": 991 + }, + { + "epoch": 0.029126783721886193, + "grad_norm": 0.0, + "learning_rate": 1.9412915851272015e-05, + "loss": 1.4902, + "step": 992 + }, + { + "epoch": 0.029156145399025193, + "grad_norm": 0.0, + "learning_rate": 1.9432485322896284e-05, + "loss": 1.5654, + "step": 993 + }, + { + "epoch": 0.02918550707616419, + "grad_norm": 0.0, + "learning_rate": 1.945205479452055e-05, + "loss": 1.6465, + "step": 994 + }, + { + "epoch": 0.02921486875330319, + "grad_norm": 0.0, + "learning_rate": 1.9471624266144814e-05, + "loss": 1.4678, + "step": 995 + }, + { + "epoch": 0.02924423043044219, + "grad_norm": 0.0, + "learning_rate": 1.949119373776908e-05, + "loss": 1.5469, + "step": 996 + }, + { + "epoch": 0.029273592107581185, + "grad_norm": 0.0, + "learning_rate": 1.951076320939335e-05, + "loss": 1.5498, + "step": 997 + }, + { + "epoch": 0.029302953784720184, + "grad_norm": 0.0, + "learning_rate": 1.9530332681017613e-05, + "loss": 1.6055, + "step": 998 + }, + { + "epoch": 0.02933231546185918, + "grad_norm": 0.0, + "learning_rate": 1.954990215264188e-05, + "loss": 1.5039, + "step": 999 + }, + { + "epoch": 0.02936167713899818, + "grad_norm": 0.0, + "learning_rate": 1.9569471624266145e-05, + "loss": 1.4424, + "step": 1000 + }, + { + "epoch": 0.029391038816137176, + "grad_norm": 0.0, + "learning_rate": 1.9589041095890412e-05, + "loss": 1.4619, + "step": 1001 + }, + { + "epoch": 0.029420400493276176, + "grad_norm": 0.0, + "learning_rate": 1.9608610567514678e-05, + "loss": 1.6201, + "step": 1002 + }, + { + "epoch": 0.029449762170415172, + "grad_norm": 0.0, + "learning_rate": 1.9628180039138944e-05, + "loss": 1.5752, + "step": 1003 + }, + { + "epoch": 0.029479123847554172, + "grad_norm": 0.0, + "learning_rate": 1.964774951076321e-05, + "loss": 1.5635, + "step": 1004 + }, + { + "epoch": 0.029508485524693172, + "grad_norm": 0.0, + "learning_rate": 1.9667318982387477e-05, + "loss": 1.4951, + "step": 1005 + }, + { + "epoch": 0.029537847201832168, + "grad_norm": 0.0, + "learning_rate": 1.9686888454011743e-05, + "loss": 1.5127, + "step": 1006 + }, + { + "epoch": 0.029567208878971168, + "grad_norm": 0.0, + "learning_rate": 1.970645792563601e-05, + "loss": 1.5791, + "step": 1007 + }, + { + "epoch": 0.029596570556110164, + "grad_norm": 0.0, + "learning_rate": 1.9726027397260276e-05, + "loss": 1.6143, + "step": 1008 + }, + { + "epoch": 0.029625932233249164, + "grad_norm": 0.0, + "learning_rate": 1.9745596868884542e-05, + "loss": 1.6396, + "step": 1009 + }, + { + "epoch": 0.02965529391038816, + "grad_norm": 0.0, + "learning_rate": 1.976516634050881e-05, + "loss": 1.583, + "step": 1010 + }, + { + "epoch": 0.02968465558752716, + "grad_norm": 0.0, + "learning_rate": 1.9784735812133072e-05, + "loss": 1.624, + "step": 1011 + }, + { + "epoch": 0.02971401726466616, + "grad_norm": 0.0, + "learning_rate": 1.9804305283757338e-05, + "loss": 1.5566, + "step": 1012 + }, + { + "epoch": 0.029743378941805156, + "grad_norm": 0.0, + "learning_rate": 1.9823874755381608e-05, + "loss": 1.5723, + "step": 1013 + }, + { + "epoch": 0.029772740618944155, + "grad_norm": 0.0, + "learning_rate": 1.984344422700587e-05, + "loss": 1.5029, + "step": 1014 + }, + { + "epoch": 0.02980210229608315, + "grad_norm": 0.0, + "learning_rate": 1.9863013698630137e-05, + "loss": 1.5312, + "step": 1015 + }, + { + "epoch": 0.02983146397322215, + "grad_norm": 0.0, + "learning_rate": 1.9882583170254404e-05, + "loss": 1.501, + "step": 1016 + }, + { + "epoch": 0.029860825650361147, + "grad_norm": 0.0, + "learning_rate": 1.990215264187867e-05, + "loss": 1.5996, + "step": 1017 + }, + { + "epoch": 0.029890187327500147, + "grad_norm": 0.0, + "learning_rate": 1.9921722113502936e-05, + "loss": 1.5029, + "step": 1018 + }, + { + "epoch": 0.029919549004639143, + "grad_norm": 0.0, + "learning_rate": 1.9941291585127203e-05, + "loss": 1.5186, + "step": 1019 + }, + { + "epoch": 0.029948910681778143, + "grad_norm": 0.0, + "learning_rate": 1.996086105675147e-05, + "loss": 1.5293, + "step": 1020 + }, + { + "epoch": 0.029978272358917143, + "grad_norm": 0.0, + "learning_rate": 1.9980430528375735e-05, + "loss": 1.5156, + "step": 1021 + }, + { + "epoch": 0.03000763403605614, + "grad_norm": 0.0, + "learning_rate": 2e-05, + "loss": 1.5664, + "step": 1022 + }, + { + "epoch": 0.03003699571319514, + "grad_norm": 0.0, + "learning_rate": 1.9999999954783723e-05, + "loss": 1.502, + "step": 1023 + }, + { + "epoch": 0.030066357390334135, + "grad_norm": 0.0, + "learning_rate": 1.999999981913488e-05, + "loss": 1.4453, + "step": 1024 + }, + { + "epoch": 0.030095719067473135, + "grad_norm": 0.0, + "learning_rate": 1.999999959305348e-05, + "loss": 1.5283, + "step": 1025 + }, + { + "epoch": 0.03012508074461213, + "grad_norm": 0.0, + "learning_rate": 1.9999999276539522e-05, + "loss": 1.7217, + "step": 1026 + }, + { + "epoch": 0.03015444242175113, + "grad_norm": 0.0, + "learning_rate": 1.999999886959301e-05, + "loss": 1.5723, + "step": 1027 + }, + { + "epoch": 0.03018380409889013, + "grad_norm": 0.0, + "learning_rate": 1.9999998372213944e-05, + "loss": 1.6572, + "step": 1028 + }, + { + "epoch": 0.030213165776029127, + "grad_norm": 0.0, + "learning_rate": 1.9999997784402335e-05, + "loss": 1.5967, + "step": 1029 + }, + { + "epoch": 0.030242527453168126, + "grad_norm": 0.0, + "learning_rate": 1.9999997106158185e-05, + "loss": 1.5273, + "step": 1030 + }, + { + "epoch": 0.030271889130307122, + "grad_norm": 0.0, + "learning_rate": 1.99999963374815e-05, + "loss": 1.5967, + "step": 1031 + }, + { + "epoch": 0.030301250807446122, + "grad_norm": 0.0, + "learning_rate": 1.9999995478372287e-05, + "loss": 1.5049, + "step": 1032 + }, + { + "epoch": 0.03033061248458512, + "grad_norm": 0.0, + "learning_rate": 1.9999994528830552e-05, + "loss": 1.6396, + "step": 1033 + }, + { + "epoch": 0.030359974161724118, + "grad_norm": 0.0, + "learning_rate": 1.999999348885631e-05, + "loss": 1.4814, + "step": 1034 + }, + { + "epoch": 0.030389335838863114, + "grad_norm": 0.0, + "learning_rate": 1.999999235844956e-05, + "loss": 1.6748, + "step": 1035 + }, + { + "epoch": 0.030418697516002114, + "grad_norm": 0.0, + "learning_rate": 1.999999113761032e-05, + "loss": 1.6318, + "step": 1036 + }, + { + "epoch": 0.030448059193141114, + "grad_norm": 0.0, + "learning_rate": 1.9999989826338602e-05, + "loss": 1.4736, + "step": 1037 + }, + { + "epoch": 0.03047742087028011, + "grad_norm": 0.0, + "learning_rate": 1.9999988424634414e-05, + "loss": 1.6182, + "step": 1038 + }, + { + "epoch": 0.03050678254741911, + "grad_norm": 0.0, + "learning_rate": 1.999998693249777e-05, + "loss": 1.6299, + "step": 1039 + }, + { + "epoch": 0.030536144224558106, + "grad_norm": 0.0, + "learning_rate": 1.9999985349928678e-05, + "loss": 1.5088, + "step": 1040 + }, + { + "epoch": 0.030565505901697106, + "grad_norm": 0.0, + "learning_rate": 1.999998367692716e-05, + "loss": 1.5693, + "step": 1041 + }, + { + "epoch": 0.030594867578836102, + "grad_norm": 0.0, + "learning_rate": 1.9999981913493232e-05, + "loss": 1.4277, + "step": 1042 + }, + { + "epoch": 0.0306242292559751, + "grad_norm": 0.0, + "learning_rate": 1.9999980059626905e-05, + "loss": 1.6494, + "step": 1043 + }, + { + "epoch": 0.030653590933114098, + "grad_norm": 0.0, + "learning_rate": 1.9999978115328196e-05, + "loss": 1.627, + "step": 1044 + }, + { + "epoch": 0.030682952610253098, + "grad_norm": 0.0, + "learning_rate": 1.9999976080597126e-05, + "loss": 1.708, + "step": 1045 + }, + { + "epoch": 0.030712314287392097, + "grad_norm": 0.0, + "learning_rate": 1.9999973955433708e-05, + "loss": 1.5049, + "step": 1046 + }, + { + "epoch": 0.030741675964531093, + "grad_norm": 0.0, + "learning_rate": 1.9999971739837965e-05, + "loss": 1.5498, + "step": 1047 + }, + { + "epoch": 0.030771037641670093, + "grad_norm": 0.0, + "learning_rate": 1.999996943380992e-05, + "loss": 1.5234, + "step": 1048 + }, + { + "epoch": 0.03080039931880909, + "grad_norm": 0.0, + "learning_rate": 1.9999967037349587e-05, + "loss": 1.625, + "step": 1049 + }, + { + "epoch": 0.03082976099594809, + "grad_norm": 0.0, + "learning_rate": 1.9999964550456995e-05, + "loss": 1.5127, + "step": 1050 + }, + { + "epoch": 0.030859122673087085, + "grad_norm": 0.0, + "learning_rate": 1.9999961973132157e-05, + "loss": 1.4922, + "step": 1051 + }, + { + "epoch": 0.030888484350226085, + "grad_norm": 0.0, + "learning_rate": 1.9999959305375106e-05, + "loss": 1.6035, + "step": 1052 + }, + { + "epoch": 0.030917846027365085, + "grad_norm": 0.0, + "learning_rate": 1.999995654718586e-05, + "loss": 1.4209, + "step": 1053 + }, + { + "epoch": 0.03094720770450408, + "grad_norm": 0.0, + "learning_rate": 1.9999953698564446e-05, + "loss": 1.4756, + "step": 1054 + }, + { + "epoch": 0.03097656938164308, + "grad_norm": 0.0, + "learning_rate": 1.9999950759510888e-05, + "loss": 1.5547, + "step": 1055 + }, + { + "epoch": 0.031005931058782077, + "grad_norm": 0.0, + "learning_rate": 1.999994773002522e-05, + "loss": 1.709, + "step": 1056 + }, + { + "epoch": 0.031035292735921077, + "grad_norm": 0.0, + "learning_rate": 1.999994461010746e-05, + "loss": 1.6094, + "step": 1057 + }, + { + "epoch": 0.031064654413060073, + "grad_norm": 0.0, + "learning_rate": 1.9999941399757637e-05, + "loss": 1.4834, + "step": 1058 + }, + { + "epoch": 0.031094016090199073, + "grad_norm": 0.0, + "learning_rate": 1.9999938098975785e-05, + "loss": 1.5146, + "step": 1059 + }, + { + "epoch": 0.03112337776733807, + "grad_norm": 0.0, + "learning_rate": 1.9999934707761934e-05, + "loss": 1.501, + "step": 1060 + }, + { + "epoch": 0.03115273944447707, + "grad_norm": 0.0, + "learning_rate": 1.9999931226116112e-05, + "loss": 1.5713, + "step": 1061 + }, + { + "epoch": 0.031182101121616068, + "grad_norm": 0.0, + "learning_rate": 1.999992765403835e-05, + "loss": 1.4531, + "step": 1062 + }, + { + "epoch": 0.031211462798755064, + "grad_norm": 0.0, + "learning_rate": 1.999992399152868e-05, + "loss": 1.5732, + "step": 1063 + }, + { + "epoch": 0.031240824475894064, + "grad_norm": 0.0, + "learning_rate": 1.9999920238587137e-05, + "loss": 1.4678, + "step": 1064 + }, + { + "epoch": 0.031270186153033064, + "grad_norm": 0.0, + "learning_rate": 1.9999916395213757e-05, + "loss": 1.6221, + "step": 1065 + }, + { + "epoch": 0.03129954783017206, + "grad_norm": 0.0, + "learning_rate": 1.9999912461408568e-05, + "loss": 1.3867, + "step": 1066 + }, + { + "epoch": 0.031328909507311056, + "grad_norm": 0.0, + "learning_rate": 1.999990843717161e-05, + "loss": 1.5107, + "step": 1067 + }, + { + "epoch": 0.03135827118445005, + "grad_norm": 0.0, + "learning_rate": 1.999990432250292e-05, + "loss": 1.5674, + "step": 1068 + }, + { + "epoch": 0.031387632861589056, + "grad_norm": 0.0, + "learning_rate": 1.9999900117402536e-05, + "loss": 1.6504, + "step": 1069 + }, + { + "epoch": 0.03141699453872805, + "grad_norm": 0.0, + "learning_rate": 1.9999895821870493e-05, + "loss": 1.541, + "step": 1070 + }, + { + "epoch": 0.03144635621586705, + "grad_norm": 0.0, + "learning_rate": 1.9999891435906828e-05, + "loss": 1.6172, + "step": 1071 + }, + { + "epoch": 0.03147571789300605, + "grad_norm": 0.0, + "learning_rate": 1.9999886959511587e-05, + "loss": 1.5918, + "step": 1072 + }, + { + "epoch": 0.03150507957014505, + "grad_norm": 0.0, + "learning_rate": 1.999988239268481e-05, + "loss": 1.7129, + "step": 1073 + }, + { + "epoch": 0.031534441247284044, + "grad_norm": 0.0, + "learning_rate": 1.9999877735426533e-05, + "loss": 1.5977, + "step": 1074 + }, + { + "epoch": 0.03156380292442304, + "grad_norm": 0.0, + "learning_rate": 1.9999872987736797e-05, + "loss": 1.4795, + "step": 1075 + }, + { + "epoch": 0.03159316460156204, + "grad_norm": 0.0, + "learning_rate": 1.999986814961565e-05, + "loss": 1.583, + "step": 1076 + }, + { + "epoch": 0.03162252627870104, + "grad_norm": 0.0, + "learning_rate": 1.9999863221063134e-05, + "loss": 1.5586, + "step": 1077 + }, + { + "epoch": 0.031651887955840036, + "grad_norm": 0.0, + "learning_rate": 1.99998582020793e-05, + "loss": 1.5117, + "step": 1078 + }, + { + "epoch": 0.03168124963297904, + "grad_norm": 0.0, + "learning_rate": 1.999985309266418e-05, + "loss": 1.5781, + "step": 1079 + }, + { + "epoch": 0.031710611310118035, + "grad_norm": 0.0, + "learning_rate": 1.9999847892817832e-05, + "loss": 1.5195, + "step": 1080 + }, + { + "epoch": 0.03173997298725703, + "grad_norm": 0.0, + "learning_rate": 1.9999842602540295e-05, + "loss": 1.5996, + "step": 1081 + }, + { + "epoch": 0.03176933466439603, + "grad_norm": 0.0, + "learning_rate": 1.9999837221831623e-05, + "loss": 1.5986, + "step": 1082 + }, + { + "epoch": 0.03179869634153503, + "grad_norm": 0.0, + "learning_rate": 1.9999831750691863e-05, + "loss": 1.4678, + "step": 1083 + }, + { + "epoch": 0.03182805801867403, + "grad_norm": 0.0, + "learning_rate": 1.999982618912106e-05, + "loss": 1.6426, + "step": 1084 + }, + { + "epoch": 0.03185741969581302, + "grad_norm": 0.0, + "learning_rate": 1.999982053711927e-05, + "loss": 1.5723, + "step": 1085 + }, + { + "epoch": 0.031886781372952026, + "grad_norm": 0.0, + "learning_rate": 1.9999814794686544e-05, + "loss": 1.6396, + "step": 1086 + }, + { + "epoch": 0.03191614305009102, + "grad_norm": 0.0, + "learning_rate": 1.9999808961822927e-05, + "loss": 1.5557, + "step": 1087 + }, + { + "epoch": 0.03194550472723002, + "grad_norm": 0.0, + "learning_rate": 1.999980303852848e-05, + "loss": 1.6562, + "step": 1088 + }, + { + "epoch": 0.031974866404369015, + "grad_norm": 0.0, + "learning_rate": 1.999979702480325e-05, + "loss": 1.6191, + "step": 1089 + }, + { + "epoch": 0.03200422808150802, + "grad_norm": 0.0, + "learning_rate": 1.9999790920647297e-05, + "loss": 1.624, + "step": 1090 + }, + { + "epoch": 0.032033589758647014, + "grad_norm": 0.0, + "learning_rate": 1.9999784726060675e-05, + "loss": 1.3604, + "step": 1091 + }, + { + "epoch": 0.03206295143578601, + "grad_norm": 0.0, + "learning_rate": 1.9999778441043433e-05, + "loss": 1.5029, + "step": 1092 + }, + { + "epoch": 0.03209231311292501, + "grad_norm": 0.0, + "learning_rate": 1.999977206559564e-05, + "loss": 1.6064, + "step": 1093 + }, + { + "epoch": 0.03212167479006401, + "grad_norm": 0.0, + "learning_rate": 1.9999765599717343e-05, + "loss": 1.5, + "step": 1094 + }, + { + "epoch": 0.032151036467203006, + "grad_norm": 0.0, + "learning_rate": 1.9999759043408608e-05, + "loss": 1.5508, + "step": 1095 + }, + { + "epoch": 0.032180398144342, + "grad_norm": 0.0, + "learning_rate": 1.9999752396669486e-05, + "loss": 1.6084, + "step": 1096 + }, + { + "epoch": 0.032209759821481006, + "grad_norm": 0.0, + "learning_rate": 1.9999745659500047e-05, + "loss": 1.5566, + "step": 1097 + }, + { + "epoch": 0.03223912149862, + "grad_norm": 0.0, + "learning_rate": 1.9999738831900346e-05, + "loss": 1.5342, + "step": 1098 + }, + { + "epoch": 0.032268483175759, + "grad_norm": 0.0, + "learning_rate": 1.9999731913870446e-05, + "loss": 1.4619, + "step": 1099 + }, + { + "epoch": 0.032297844852897994, + "grad_norm": 0.0, + "learning_rate": 1.999972490541041e-05, + "loss": 1.4717, + "step": 1100 + }, + { + "epoch": 0.032327206530037, + "grad_norm": 0.0, + "learning_rate": 1.9999717806520297e-05, + "loss": 1.5811, + "step": 1101 + }, + { + "epoch": 0.032356568207175994, + "grad_norm": 0.0, + "learning_rate": 1.9999710617200178e-05, + "loss": 1.5088, + "step": 1102 + }, + { + "epoch": 0.03238592988431499, + "grad_norm": 0.0, + "learning_rate": 1.9999703337450112e-05, + "loss": 1.4199, + "step": 1103 + }, + { + "epoch": 0.03241529156145399, + "grad_norm": 0.0, + "learning_rate": 1.9999695967270173e-05, + "loss": 1.5723, + "step": 1104 + }, + { + "epoch": 0.03244465323859299, + "grad_norm": 0.0, + "learning_rate": 1.9999688506660416e-05, + "loss": 1.502, + "step": 1105 + }, + { + "epoch": 0.032474014915731986, + "grad_norm": 0.0, + "learning_rate": 1.999968095562092e-05, + "loss": 1.625, + "step": 1106 + }, + { + "epoch": 0.03250337659287098, + "grad_norm": 0.0, + "learning_rate": 1.9999673314151747e-05, + "loss": 1.5508, + "step": 1107 + }, + { + "epoch": 0.032532738270009985, + "grad_norm": 0.0, + "learning_rate": 1.9999665582252964e-05, + "loss": 1.4189, + "step": 1108 + }, + { + "epoch": 0.03256209994714898, + "grad_norm": 0.0, + "learning_rate": 1.9999657759924648e-05, + "loss": 1.7559, + "step": 1109 + }, + { + "epoch": 0.03259146162428798, + "grad_norm": 0.0, + "learning_rate": 1.9999649847166868e-05, + "loss": 1.6533, + "step": 1110 + }, + { + "epoch": 0.03262082330142698, + "grad_norm": 0.0, + "learning_rate": 1.9999641843979688e-05, + "loss": 1.6533, + "step": 1111 + }, + { + "epoch": 0.03265018497856598, + "grad_norm": 0.0, + "learning_rate": 1.999963375036319e-05, + "loss": 1.4824, + "step": 1112 + }, + { + "epoch": 0.03267954665570497, + "grad_norm": 0.0, + "learning_rate": 1.999962556631744e-05, + "loss": 1.5293, + "step": 1113 + }, + { + "epoch": 0.03270890833284397, + "grad_norm": 0.0, + "learning_rate": 1.9999617291842516e-05, + "loss": 1.542, + "step": 1114 + }, + { + "epoch": 0.03273827000998297, + "grad_norm": 0.0, + "learning_rate": 1.999960892693849e-05, + "loss": 1.4873, + "step": 1115 + }, + { + "epoch": 0.03276763168712197, + "grad_norm": 0.0, + "learning_rate": 1.9999600471605442e-05, + "loss": 1.6025, + "step": 1116 + }, + { + "epoch": 0.032796993364260965, + "grad_norm": 0.0, + "learning_rate": 1.9999591925843447e-05, + "loss": 1.625, + "step": 1117 + }, + { + "epoch": 0.03282635504139996, + "grad_norm": 0.0, + "learning_rate": 1.999958328965258e-05, + "loss": 1.6348, + "step": 1118 + }, + { + "epoch": 0.032855716718538965, + "grad_norm": 0.0, + "learning_rate": 1.999957456303292e-05, + "loss": 1.5293, + "step": 1119 + }, + { + "epoch": 0.03288507839567796, + "grad_norm": 0.0, + "learning_rate": 1.9999565745984548e-05, + "loss": 1.6455, + "step": 1120 + }, + { + "epoch": 0.03291444007281696, + "grad_norm": 0.0, + "learning_rate": 1.9999556838507537e-05, + "loss": 1.6436, + "step": 1121 + }, + { + "epoch": 0.03294380174995596, + "grad_norm": 0.0, + "learning_rate": 1.9999547840601977e-05, + "loss": 1.4648, + "step": 1122 + }, + { + "epoch": 0.032973163427094956, + "grad_norm": 0.0, + "learning_rate": 1.9999538752267942e-05, + "loss": 1.4902, + "step": 1123 + }, + { + "epoch": 0.03300252510423395, + "grad_norm": 0.0, + "learning_rate": 1.9999529573505524e-05, + "loss": 1.583, + "step": 1124 + }, + { + "epoch": 0.03303188678137295, + "grad_norm": 0.0, + "learning_rate": 1.9999520304314793e-05, + "loss": 1.4932, + "step": 1125 + }, + { + "epoch": 0.03306124845851195, + "grad_norm": 0.0, + "learning_rate": 1.999951094469584e-05, + "loss": 1.501, + "step": 1126 + }, + { + "epoch": 0.03309061013565095, + "grad_norm": 0.0, + "learning_rate": 1.999950149464875e-05, + "loss": 1.4346, + "step": 1127 + }, + { + "epoch": 0.033119971812789945, + "grad_norm": 0.0, + "learning_rate": 1.9999491954173605e-05, + "loss": 1.5615, + "step": 1128 + }, + { + "epoch": 0.03314933348992895, + "grad_norm": 0.0, + "learning_rate": 1.9999482323270495e-05, + "loss": 1.5986, + "step": 1129 + }, + { + "epoch": 0.033178695167067944, + "grad_norm": 0.0, + "learning_rate": 1.9999472601939506e-05, + "loss": 1.5713, + "step": 1130 + }, + { + "epoch": 0.03320805684420694, + "grad_norm": 0.0, + "learning_rate": 1.9999462790180723e-05, + "loss": 1.5879, + "step": 1131 + }, + { + "epoch": 0.033237418521345936, + "grad_norm": 0.0, + "learning_rate": 1.999945288799424e-05, + "loss": 1.5459, + "step": 1132 + }, + { + "epoch": 0.03326678019848494, + "grad_norm": 0.0, + "learning_rate": 1.999944289538014e-05, + "loss": 1.5166, + "step": 1133 + }, + { + "epoch": 0.033296141875623936, + "grad_norm": 0.0, + "learning_rate": 1.999943281233852e-05, + "loss": 1.4805, + "step": 1134 + }, + { + "epoch": 0.03332550355276293, + "grad_norm": 0.0, + "learning_rate": 1.9999422638869465e-05, + "loss": 1.5244, + "step": 1135 + }, + { + "epoch": 0.033354865229901935, + "grad_norm": 0.0, + "learning_rate": 1.9999412374973076e-05, + "loss": 1.6318, + "step": 1136 + }, + { + "epoch": 0.03338422690704093, + "grad_norm": 0.0, + "learning_rate": 1.9999402020649436e-05, + "loss": 1.5889, + "step": 1137 + }, + { + "epoch": 0.03341358858417993, + "grad_norm": 0.0, + "learning_rate": 1.9999391575898642e-05, + "loss": 1.5586, + "step": 1138 + }, + { + "epoch": 0.033442950261318924, + "grad_norm": 0.0, + "learning_rate": 1.999938104072079e-05, + "loss": 1.5146, + "step": 1139 + }, + { + "epoch": 0.03347231193845793, + "grad_norm": 0.0, + "learning_rate": 1.9999370415115975e-05, + "loss": 1.5098, + "step": 1140 + }, + { + "epoch": 0.03350167361559692, + "grad_norm": 0.0, + "learning_rate": 1.9999359699084292e-05, + "loss": 1.4912, + "step": 1141 + }, + { + "epoch": 0.03353103529273592, + "grad_norm": 0.0, + "learning_rate": 1.999934889262584e-05, + "loss": 1.5957, + "step": 1142 + }, + { + "epoch": 0.03356039696987492, + "grad_norm": 0.0, + "learning_rate": 1.999933799574071e-05, + "loss": 1.5498, + "step": 1143 + }, + { + "epoch": 0.03358975864701392, + "grad_norm": 0.0, + "learning_rate": 1.9999327008429012e-05, + "loss": 1.4707, + "step": 1144 + }, + { + "epoch": 0.033619120324152915, + "grad_norm": 0.0, + "learning_rate": 1.9999315930690835e-05, + "loss": 1.6338, + "step": 1145 + }, + { + "epoch": 0.03364848200129191, + "grad_norm": 0.0, + "learning_rate": 1.999930476252628e-05, + "loss": 1.5361, + "step": 1146 + }, + { + "epoch": 0.033677843678430915, + "grad_norm": 0.0, + "learning_rate": 1.9999293503935456e-05, + "loss": 1.5947, + "step": 1147 + }, + { + "epoch": 0.03370720535556991, + "grad_norm": 0.0, + "learning_rate": 1.999928215491846e-05, + "loss": 1.5898, + "step": 1148 + }, + { + "epoch": 0.03373656703270891, + "grad_norm": 0.0, + "learning_rate": 1.9999270715475396e-05, + "loss": 1.4932, + "step": 1149 + }, + { + "epoch": 0.0337659287098479, + "grad_norm": 0.0, + "learning_rate": 1.9999259185606364e-05, + "loss": 1.5645, + "step": 1150 + }, + { + "epoch": 0.033795290386986906, + "grad_norm": 0.0, + "learning_rate": 1.9999247565311468e-05, + "loss": 1.584, + "step": 1151 + }, + { + "epoch": 0.0338246520641259, + "grad_norm": 0.0, + "learning_rate": 1.9999235854590818e-05, + "loss": 1.542, + "step": 1152 + }, + { + "epoch": 0.0338540137412649, + "grad_norm": 0.0, + "learning_rate": 1.999922405344452e-05, + "loss": 1.5146, + "step": 1153 + }, + { + "epoch": 0.0338833754184039, + "grad_norm": 0.0, + "learning_rate": 1.9999212161872675e-05, + "loss": 1.5879, + "step": 1154 + }, + { + "epoch": 0.0339127370955429, + "grad_norm": 0.0, + "learning_rate": 1.999920017987539e-05, + "loss": 1.4502, + "step": 1155 + }, + { + "epoch": 0.033942098772681895, + "grad_norm": 0.0, + "learning_rate": 1.9999188107452784e-05, + "loss": 1.6055, + "step": 1156 + }, + { + "epoch": 0.03397146044982089, + "grad_norm": 0.0, + "learning_rate": 1.9999175944604958e-05, + "loss": 1.5889, + "step": 1157 + }, + { + "epoch": 0.034000822126959894, + "grad_norm": 0.0, + "learning_rate": 1.9999163691332017e-05, + "loss": 1.6416, + "step": 1158 + }, + { + "epoch": 0.03403018380409889, + "grad_norm": 0.0, + "learning_rate": 1.999915134763408e-05, + "loss": 1.5283, + "step": 1159 + }, + { + "epoch": 0.034059545481237886, + "grad_norm": 0.0, + "learning_rate": 1.999913891351126e-05, + "loss": 1.6221, + "step": 1160 + }, + { + "epoch": 0.03408890715837689, + "grad_norm": 0.0, + "learning_rate": 1.9999126388963663e-05, + "loss": 1.4932, + "step": 1161 + }, + { + "epoch": 0.034118268835515886, + "grad_norm": 0.0, + "learning_rate": 1.999911377399141e-05, + "loss": 1.457, + "step": 1162 + }, + { + "epoch": 0.03414763051265488, + "grad_norm": 0.0, + "learning_rate": 1.9999101068594604e-05, + "loss": 1.5312, + "step": 1163 + }, + { + "epoch": 0.03417699218979388, + "grad_norm": 0.0, + "learning_rate": 1.999908827277337e-05, + "loss": 1.4697, + "step": 1164 + }, + { + "epoch": 0.03420635386693288, + "grad_norm": 0.0, + "learning_rate": 1.999907538652782e-05, + "loss": 1.5508, + "step": 1165 + }, + { + "epoch": 0.03423571554407188, + "grad_norm": 0.0, + "learning_rate": 1.9999062409858066e-05, + "loss": 1.3594, + "step": 1166 + }, + { + "epoch": 0.034265077221210874, + "grad_norm": 0.0, + "learning_rate": 1.9999049342764235e-05, + "loss": 1.6494, + "step": 1167 + }, + { + "epoch": 0.03429443889834988, + "grad_norm": 0.0, + "learning_rate": 1.999903618524644e-05, + "loss": 1.5254, + "step": 1168 + }, + { + "epoch": 0.03432380057548887, + "grad_norm": 0.0, + "learning_rate": 1.99990229373048e-05, + "loss": 1.5225, + "step": 1169 + }, + { + "epoch": 0.03435316225262787, + "grad_norm": 0.0, + "learning_rate": 1.999900959893943e-05, + "loss": 1.4668, + "step": 1170 + }, + { + "epoch": 0.034382523929766866, + "grad_norm": 0.0, + "learning_rate": 1.999899617015046e-05, + "loss": 1.6582, + "step": 1171 + }, + { + "epoch": 0.03441188560690587, + "grad_norm": 0.0, + "learning_rate": 1.9998982650938007e-05, + "loss": 1.5078, + "step": 1172 + }, + { + "epoch": 0.034441247284044865, + "grad_norm": 0.0, + "learning_rate": 1.999896904130219e-05, + "loss": 1.4453, + "step": 1173 + }, + { + "epoch": 0.03447060896118386, + "grad_norm": 0.0, + "learning_rate": 1.9998955341243137e-05, + "loss": 1.5723, + "step": 1174 + }, + { + "epoch": 0.03449997063832286, + "grad_norm": 0.0, + "learning_rate": 1.999894155076097e-05, + "loss": 1.5098, + "step": 1175 + }, + { + "epoch": 0.03452933231546186, + "grad_norm": 0.0, + "learning_rate": 1.9998927669855813e-05, + "loss": 1.4688, + "step": 1176 + }, + { + "epoch": 0.03455869399260086, + "grad_norm": 0.0, + "learning_rate": 1.9998913698527792e-05, + "loss": 1.5771, + "step": 1177 + }, + { + "epoch": 0.03458805566973985, + "grad_norm": 0.0, + "learning_rate": 1.9998899636777035e-05, + "loss": 1.4473, + "step": 1178 + }, + { + "epoch": 0.034617417346878857, + "grad_norm": 0.0, + "learning_rate": 1.9998885484603668e-05, + "loss": 1.5781, + "step": 1179 + }, + { + "epoch": 0.03464677902401785, + "grad_norm": 0.0, + "learning_rate": 1.999887124200782e-05, + "loss": 1.5938, + "step": 1180 + }, + { + "epoch": 0.03467614070115685, + "grad_norm": 0.0, + "learning_rate": 1.9998856908989615e-05, + "loss": 1.6465, + "step": 1181 + }, + { + "epoch": 0.034705502378295845, + "grad_norm": 0.0, + "learning_rate": 1.999884248554919e-05, + "loss": 1.6182, + "step": 1182 + }, + { + "epoch": 0.03473486405543485, + "grad_norm": 0.0, + "learning_rate": 1.9998827971686666e-05, + "loss": 1.5801, + "step": 1183 + }, + { + "epoch": 0.034764225732573845, + "grad_norm": 0.0, + "learning_rate": 1.9998813367402185e-05, + "loss": 1.5635, + "step": 1184 + }, + { + "epoch": 0.03479358740971284, + "grad_norm": 0.0, + "learning_rate": 1.9998798672695874e-05, + "loss": 1.5732, + "step": 1185 + }, + { + "epoch": 0.034822949086851844, + "grad_norm": 0.0, + "learning_rate": 1.9998783887567864e-05, + "loss": 1.5479, + "step": 1186 + }, + { + "epoch": 0.03485231076399084, + "grad_norm": 0.0, + "learning_rate": 1.999876901201829e-05, + "loss": 1.5674, + "step": 1187 + }, + { + "epoch": 0.034881672441129837, + "grad_norm": 0.0, + "learning_rate": 1.999875404604729e-05, + "loss": 1.4609, + "step": 1188 + }, + { + "epoch": 0.03491103411826883, + "grad_norm": 0.0, + "learning_rate": 1.9998738989654992e-05, + "loss": 1.5195, + "step": 1189 + }, + { + "epoch": 0.034940395795407836, + "grad_norm": 0.0, + "learning_rate": 1.9998723842841542e-05, + "loss": 1.5205, + "step": 1190 + }, + { + "epoch": 0.03496975747254683, + "grad_norm": 0.0, + "learning_rate": 1.9998708605607067e-05, + "loss": 1.4746, + "step": 1191 + }, + { + "epoch": 0.03499911914968583, + "grad_norm": 0.0, + "learning_rate": 1.999869327795171e-05, + "loss": 1.5342, + "step": 1192 + }, + { + "epoch": 0.03502848082682483, + "grad_norm": 0.0, + "learning_rate": 1.999867785987561e-05, + "loss": 1.5361, + "step": 1193 + }, + { + "epoch": 0.03505784250396383, + "grad_norm": 0.0, + "learning_rate": 1.9998662351378907e-05, + "loss": 1.5732, + "step": 1194 + }, + { + "epoch": 0.035087204181102824, + "grad_norm": 0.0, + "learning_rate": 1.999864675246174e-05, + "loss": 1.499, + "step": 1195 + }, + { + "epoch": 0.03511656585824182, + "grad_norm": 0.0, + "learning_rate": 1.9998631063124246e-05, + "loss": 1.5967, + "step": 1196 + }, + { + "epoch": 0.03514592753538082, + "grad_norm": 0.0, + "learning_rate": 1.9998615283366575e-05, + "loss": 1.5283, + "step": 1197 + }, + { + "epoch": 0.03517528921251982, + "grad_norm": 0.0, + "learning_rate": 1.999859941318886e-05, + "loss": 1.5352, + "step": 1198 + }, + { + "epoch": 0.035204650889658816, + "grad_norm": 0.0, + "learning_rate": 1.999858345259125e-05, + "loss": 1.5371, + "step": 1199 + }, + { + "epoch": 0.03523401256679781, + "grad_norm": 0.0, + "learning_rate": 1.9998567401573897e-05, + "loss": 1.5361, + "step": 1200 + }, + { + "epoch": 0.035263374243936815, + "grad_norm": 0.0, + "learning_rate": 1.999855126013693e-05, + "loss": 1.5635, + "step": 1201 + }, + { + "epoch": 0.03529273592107581, + "grad_norm": 0.0, + "learning_rate": 1.9998535028280506e-05, + "loss": 1.5645, + "step": 1202 + }, + { + "epoch": 0.03532209759821481, + "grad_norm": 0.0, + "learning_rate": 1.999851870600477e-05, + "loss": 1.5537, + "step": 1203 + }, + { + "epoch": 0.03535145927535381, + "grad_norm": 0.0, + "learning_rate": 1.9998502293309868e-05, + "loss": 1.6289, + "step": 1204 + }, + { + "epoch": 0.03538082095249281, + "grad_norm": 0.0, + "learning_rate": 1.9998485790195953e-05, + "loss": 1.5029, + "step": 1205 + }, + { + "epoch": 0.0354101826296318, + "grad_norm": 0.0, + "learning_rate": 1.9998469196663165e-05, + "loss": 1.5361, + "step": 1206 + }, + { + "epoch": 0.0354395443067708, + "grad_norm": 0.0, + "learning_rate": 1.999845251271166e-05, + "loss": 1.5068, + "step": 1207 + }, + { + "epoch": 0.0354689059839098, + "grad_norm": 0.0, + "learning_rate": 1.999843573834159e-05, + "loss": 1.501, + "step": 1208 + }, + { + "epoch": 0.0354982676610488, + "grad_norm": 0.0, + "learning_rate": 1.9998418873553102e-05, + "loss": 1.7051, + "step": 1209 + }, + { + "epoch": 0.035527629338187795, + "grad_norm": 0.0, + "learning_rate": 1.9998401918346356e-05, + "loss": 1.5547, + "step": 1210 + }, + { + "epoch": 0.0355569910153268, + "grad_norm": 0.0, + "learning_rate": 1.99983848727215e-05, + "loss": 1.5557, + "step": 1211 + }, + { + "epoch": 0.035586352692465795, + "grad_norm": 0.0, + "learning_rate": 1.9998367736678684e-05, + "loss": 1.4795, + "step": 1212 + }, + { + "epoch": 0.03561571436960479, + "grad_norm": 0.0, + "learning_rate": 1.9998350510218073e-05, + "loss": 1.5049, + "step": 1213 + }, + { + "epoch": 0.03564507604674379, + "grad_norm": 0.0, + "learning_rate": 1.999833319333982e-05, + "loss": 1.5459, + "step": 1214 + }, + { + "epoch": 0.03567443772388279, + "grad_norm": 0.0, + "learning_rate": 1.999831578604407e-05, + "loss": 1.5869, + "step": 1215 + }, + { + "epoch": 0.03570379940102179, + "grad_norm": 0.0, + "learning_rate": 1.9998298288330997e-05, + "loss": 1.5566, + "step": 1216 + }, + { + "epoch": 0.03573316107816078, + "grad_norm": 0.0, + "learning_rate": 1.999828070020075e-05, + "loss": 1.3711, + "step": 1217 + }, + { + "epoch": 0.035762522755299786, + "grad_norm": 0.0, + "learning_rate": 1.999826302165349e-05, + "loss": 1.6826, + "step": 1218 + }, + { + "epoch": 0.03579188443243878, + "grad_norm": 0.0, + "learning_rate": 1.999824525268938e-05, + "loss": 1.5137, + "step": 1219 + }, + { + "epoch": 0.03582124610957778, + "grad_norm": 0.0, + "learning_rate": 1.9998227393308573e-05, + "loss": 1.5029, + "step": 1220 + }, + { + "epoch": 0.035850607786716775, + "grad_norm": 0.0, + "learning_rate": 1.9998209443511238e-05, + "loss": 1.5068, + "step": 1221 + }, + { + "epoch": 0.03587996946385578, + "grad_norm": 0.0, + "learning_rate": 1.9998191403297533e-05, + "loss": 1.5527, + "step": 1222 + }, + { + "epoch": 0.035909331140994774, + "grad_norm": 0.0, + "learning_rate": 1.999817327266762e-05, + "loss": 1.4619, + "step": 1223 + }, + { + "epoch": 0.03593869281813377, + "grad_norm": 0.0, + "learning_rate": 1.9998155051621672e-05, + "loss": 1.5713, + "step": 1224 + }, + { + "epoch": 0.03596805449527277, + "grad_norm": 0.0, + "learning_rate": 1.9998136740159842e-05, + "loss": 1.6221, + "step": 1225 + }, + { + "epoch": 0.03599741617241177, + "grad_norm": 0.0, + "learning_rate": 1.9998118338282303e-05, + "loss": 1.418, + "step": 1226 + }, + { + "epoch": 0.036026777849550766, + "grad_norm": 0.0, + "learning_rate": 1.999809984598922e-05, + "loss": 1.4463, + "step": 1227 + }, + { + "epoch": 0.03605613952668976, + "grad_norm": 0.0, + "learning_rate": 1.9998081263280756e-05, + "loss": 1.6338, + "step": 1228 + }, + { + "epoch": 0.036085501203828765, + "grad_norm": 0.0, + "learning_rate": 1.9998062590157087e-05, + "loss": 1.5459, + "step": 1229 + }, + { + "epoch": 0.03611486288096776, + "grad_norm": 0.0, + "learning_rate": 1.9998043826618377e-05, + "loss": 1.624, + "step": 1230 + }, + { + "epoch": 0.03614422455810676, + "grad_norm": 0.0, + "learning_rate": 1.9998024972664792e-05, + "loss": 1.5586, + "step": 1231 + }, + { + "epoch": 0.036173586235245754, + "grad_norm": 0.0, + "learning_rate": 1.999800602829651e-05, + "loss": 1.4795, + "step": 1232 + }, + { + "epoch": 0.03620294791238476, + "grad_norm": 0.0, + "learning_rate": 1.9997986993513698e-05, + "loss": 1.6787, + "step": 1233 + }, + { + "epoch": 0.03623230958952375, + "grad_norm": 0.0, + "learning_rate": 1.999796786831653e-05, + "loss": 1.6387, + "step": 1234 + }, + { + "epoch": 0.03626167126666275, + "grad_norm": 0.0, + "learning_rate": 1.9997948652705176e-05, + "loss": 1.6182, + "step": 1235 + }, + { + "epoch": 0.03629103294380175, + "grad_norm": 0.0, + "learning_rate": 1.9997929346679814e-05, + "loss": 1.6748, + "step": 1236 + }, + { + "epoch": 0.03632039462094075, + "grad_norm": 0.0, + "learning_rate": 1.9997909950240614e-05, + "loss": 1.6006, + "step": 1237 + }, + { + "epoch": 0.036349756298079745, + "grad_norm": 0.0, + "learning_rate": 1.9997890463387758e-05, + "loss": 1.666, + "step": 1238 + }, + { + "epoch": 0.03637911797521874, + "grad_norm": 0.0, + "learning_rate": 1.9997870886121415e-05, + "loss": 1.6689, + "step": 1239 + }, + { + "epoch": 0.036408479652357745, + "grad_norm": 0.0, + "learning_rate": 1.9997851218441767e-05, + "loss": 1.5527, + "step": 1240 + }, + { + "epoch": 0.03643784132949674, + "grad_norm": 0.0, + "learning_rate": 1.9997831460348988e-05, + "loss": 1.5811, + "step": 1241 + }, + { + "epoch": 0.03646720300663574, + "grad_norm": 0.0, + "learning_rate": 1.999781161184326e-05, + "loss": 1.5615, + "step": 1242 + }, + { + "epoch": 0.03649656468377474, + "grad_norm": 0.0, + "learning_rate": 1.9997791672924758e-05, + "loss": 1.5293, + "step": 1243 + }, + { + "epoch": 0.03652592636091374, + "grad_norm": 0.0, + "learning_rate": 1.999777164359367e-05, + "loss": 1.6611, + "step": 1244 + }, + { + "epoch": 0.03655528803805273, + "grad_norm": 0.0, + "learning_rate": 1.999775152385017e-05, + "loss": 1.582, + "step": 1245 + }, + { + "epoch": 0.03658464971519173, + "grad_norm": 0.0, + "learning_rate": 1.9997731313694442e-05, + "loss": 1.4668, + "step": 1246 + }, + { + "epoch": 0.03661401139233073, + "grad_norm": 0.0, + "learning_rate": 1.999771101312667e-05, + "loss": 1.6484, + "step": 1247 + }, + { + "epoch": 0.03664337306946973, + "grad_norm": 0.0, + "learning_rate": 1.9997690622147037e-05, + "loss": 1.5957, + "step": 1248 + }, + { + "epoch": 0.036672734746608725, + "grad_norm": 0.0, + "learning_rate": 1.999767014075573e-05, + "loss": 1.5732, + "step": 1249 + }, + { + "epoch": 0.03670209642374773, + "grad_norm": 0.0, + "learning_rate": 1.9997649568952922e-05, + "loss": 1.6504, + "step": 1250 + }, + { + "epoch": 0.036731458100886724, + "grad_norm": 0.0, + "learning_rate": 1.999762890673882e-05, + "loss": 1.5879, + "step": 1251 + }, + { + "epoch": 0.03676081977802572, + "grad_norm": 0.0, + "learning_rate": 1.999760815411359e-05, + "loss": 1.5195, + "step": 1252 + }, + { + "epoch": 0.03679018145516472, + "grad_norm": 0.0, + "learning_rate": 1.9997587311077438e-05, + "loss": 1.5176, + "step": 1253 + }, + { + "epoch": 0.03681954313230372, + "grad_norm": 0.0, + "learning_rate": 1.9997566377630537e-05, + "loss": 1.5869, + "step": 1254 + }, + { + "epoch": 0.036848904809442716, + "grad_norm": 0.0, + "learning_rate": 1.9997545353773088e-05, + "loss": 1.5186, + "step": 1255 + }, + { + "epoch": 0.03687826648658171, + "grad_norm": 0.0, + "learning_rate": 1.999752423950527e-05, + "loss": 1.6045, + "step": 1256 + }, + { + "epoch": 0.03690762816372071, + "grad_norm": 0.0, + "learning_rate": 1.9997503034827282e-05, + "loss": 1.6221, + "step": 1257 + }, + { + "epoch": 0.03693698984085971, + "grad_norm": 0.0, + "learning_rate": 1.9997481739739315e-05, + "loss": 1.6309, + "step": 1258 + }, + { + "epoch": 0.03696635151799871, + "grad_norm": 0.0, + "learning_rate": 1.999746035424156e-05, + "loss": 1.5977, + "step": 1259 + }, + { + "epoch": 0.036995713195137704, + "grad_norm": 0.0, + "learning_rate": 1.999743887833421e-05, + "loss": 1.5312, + "step": 1260 + }, + { + "epoch": 0.03702507487227671, + "grad_norm": 0.0, + "learning_rate": 1.999741731201746e-05, + "loss": 1.4473, + "step": 1261 + }, + { + "epoch": 0.037054436549415704, + "grad_norm": 0.0, + "learning_rate": 1.9997395655291504e-05, + "loss": 1.5693, + "step": 1262 + }, + { + "epoch": 0.0370837982265547, + "grad_norm": 0.0, + "learning_rate": 1.999737390815654e-05, + "loss": 1.5674, + "step": 1263 + }, + { + "epoch": 0.037113159903693696, + "grad_norm": 0.0, + "learning_rate": 1.9997352070612764e-05, + "loss": 1.5508, + "step": 1264 + }, + { + "epoch": 0.0371425215808327, + "grad_norm": 0.0, + "learning_rate": 1.999733014266037e-05, + "loss": 1.4697, + "step": 1265 + }, + { + "epoch": 0.037171883257971695, + "grad_norm": 0.0, + "learning_rate": 1.9997308124299565e-05, + "loss": 1.5469, + "step": 1266 + }, + { + "epoch": 0.03720124493511069, + "grad_norm": 0.0, + "learning_rate": 1.9997286015530536e-05, + "loss": 1.5449, + "step": 1267 + }, + { + "epoch": 0.037230606612249695, + "grad_norm": 0.0, + "learning_rate": 1.9997263816353493e-05, + "loss": 1.584, + "step": 1268 + }, + { + "epoch": 0.03725996828938869, + "grad_norm": 0.0, + "learning_rate": 1.999724152676863e-05, + "loss": 1.4678, + "step": 1269 + }, + { + "epoch": 0.03728932996652769, + "grad_norm": 0.0, + "learning_rate": 1.9997219146776154e-05, + "loss": 1.4209, + "step": 1270 + }, + { + "epoch": 0.037318691643666684, + "grad_norm": 0.0, + "learning_rate": 1.9997196676376264e-05, + "loss": 1.5498, + "step": 1271 + }, + { + "epoch": 0.03734805332080569, + "grad_norm": 0.0, + "learning_rate": 1.9997174115569163e-05, + "loss": 1.3887, + "step": 1272 + }, + { + "epoch": 0.03737741499794468, + "grad_norm": 0.0, + "learning_rate": 1.9997151464355057e-05, + "loss": 1.5264, + "step": 1273 + }, + { + "epoch": 0.03740677667508368, + "grad_norm": 0.0, + "learning_rate": 1.999712872273415e-05, + "loss": 1.5439, + "step": 1274 + }, + { + "epoch": 0.03743613835222268, + "grad_norm": 0.0, + "learning_rate": 1.999710589070665e-05, + "loss": 1.499, + "step": 1275 + }, + { + "epoch": 0.03746550002936168, + "grad_norm": 0.0, + "learning_rate": 1.9997082968272757e-05, + "loss": 1.5859, + "step": 1276 + }, + { + "epoch": 0.037494861706500675, + "grad_norm": 0.0, + "learning_rate": 1.9997059955432686e-05, + "loss": 1.6523, + "step": 1277 + }, + { + "epoch": 0.03752422338363967, + "grad_norm": 0.0, + "learning_rate": 1.9997036852186638e-05, + "loss": 1.5879, + "step": 1278 + }, + { + "epoch": 0.037553585060778674, + "grad_norm": 0.0, + "learning_rate": 1.999701365853483e-05, + "loss": 1.4766, + "step": 1279 + }, + { + "epoch": 0.03758294673791767, + "grad_norm": 0.0, + "learning_rate": 1.9996990374477465e-05, + "loss": 1.4238, + "step": 1280 + }, + { + "epoch": 0.03761230841505667, + "grad_norm": 0.0, + "learning_rate": 1.9996967000014757e-05, + "loss": 1.4395, + "step": 1281 + }, + { + "epoch": 0.03764167009219566, + "grad_norm": 0.0, + "learning_rate": 1.9996943535146914e-05, + "loss": 1.5596, + "step": 1282 + }, + { + "epoch": 0.037671031769334666, + "grad_norm": 0.0, + "learning_rate": 1.999691997987415e-05, + "loss": 1.5762, + "step": 1283 + }, + { + "epoch": 0.03770039344647366, + "grad_norm": 0.0, + "learning_rate": 1.999689633419668e-05, + "loss": 1.4961, + "step": 1284 + }, + { + "epoch": 0.03772975512361266, + "grad_norm": 0.0, + "learning_rate": 1.9996872598114713e-05, + "loss": 1.6592, + "step": 1285 + }, + { + "epoch": 0.03775911680075166, + "grad_norm": 0.0, + "learning_rate": 1.999684877162847e-05, + "loss": 1.5166, + "step": 1286 + }, + { + "epoch": 0.03778847847789066, + "grad_norm": 0.0, + "learning_rate": 1.9996824854738162e-05, + "loss": 1.4951, + "step": 1287 + }, + { + "epoch": 0.037817840155029654, + "grad_norm": 0.0, + "learning_rate": 1.999680084744401e-05, + "loss": 1.5342, + "step": 1288 + }, + { + "epoch": 0.03784720183216865, + "grad_norm": 0.0, + "learning_rate": 1.9996776749746225e-05, + "loss": 1.5996, + "step": 1289 + }, + { + "epoch": 0.037876563509307654, + "grad_norm": 0.0, + "learning_rate": 1.9996752561645026e-05, + "loss": 1.5986, + "step": 1290 + }, + { + "epoch": 0.03790592518644665, + "grad_norm": 0.0, + "learning_rate": 1.9996728283140635e-05, + "loss": 1.5654, + "step": 1291 + }, + { + "epoch": 0.037935286863585646, + "grad_norm": 0.0, + "learning_rate": 1.9996703914233274e-05, + "loss": 1.5547, + "step": 1292 + }, + { + "epoch": 0.03796464854072465, + "grad_norm": 0.0, + "learning_rate": 1.9996679454923155e-05, + "loss": 1.5771, + "step": 1293 + }, + { + "epoch": 0.037994010217863645, + "grad_norm": 0.0, + "learning_rate": 1.9996654905210503e-05, + "loss": 1.5977, + "step": 1294 + }, + { + "epoch": 0.03802337189500264, + "grad_norm": 0.0, + "learning_rate": 1.9996630265095542e-05, + "loss": 1.6816, + "step": 1295 + }, + { + "epoch": 0.03805273357214164, + "grad_norm": 0.0, + "learning_rate": 1.999660553457849e-05, + "loss": 1.585, + "step": 1296 + }, + { + "epoch": 0.03808209524928064, + "grad_norm": 0.0, + "learning_rate": 1.999658071365958e-05, + "loss": 1.6055, + "step": 1297 + }, + { + "epoch": 0.03811145692641964, + "grad_norm": 0.0, + "learning_rate": 1.9996555802339025e-05, + "loss": 1.5547, + "step": 1298 + }, + { + "epoch": 0.038140818603558634, + "grad_norm": 0.0, + "learning_rate": 1.999653080061706e-05, + "loss": 1.3164, + "step": 1299 + }, + { + "epoch": 0.03817018028069764, + "grad_norm": 0.0, + "learning_rate": 1.9996505708493903e-05, + "loss": 1.5107, + "step": 1300 + }, + { + "epoch": 0.03819954195783663, + "grad_norm": 0.0, + "learning_rate": 1.9996480525969787e-05, + "loss": 1.5352, + "step": 1301 + }, + { + "epoch": 0.03822890363497563, + "grad_norm": 0.0, + "learning_rate": 1.9996455253044934e-05, + "loss": 1.5117, + "step": 1302 + }, + { + "epoch": 0.038258265312114625, + "grad_norm": 0.0, + "learning_rate": 1.9996429889719582e-05, + "loss": 1.5361, + "step": 1303 + }, + { + "epoch": 0.03828762698925363, + "grad_norm": 0.0, + "learning_rate": 1.999640443599395e-05, + "loss": 1.6484, + "step": 1304 + }, + { + "epoch": 0.038316988666392625, + "grad_norm": 0.0, + "learning_rate": 1.9996378891868272e-05, + "loss": 1.4141, + "step": 1305 + }, + { + "epoch": 0.03834635034353162, + "grad_norm": 0.0, + "learning_rate": 1.9996353257342784e-05, + "loss": 1.4814, + "step": 1306 + }, + { + "epoch": 0.03837571202067062, + "grad_norm": 0.0, + "learning_rate": 1.9996327532417708e-05, + "loss": 1.4561, + "step": 1307 + }, + { + "epoch": 0.03840507369780962, + "grad_norm": 0.0, + "learning_rate": 1.999630171709329e-05, + "loss": 1.4688, + "step": 1308 + }, + { + "epoch": 0.03843443537494862, + "grad_norm": 0.0, + "learning_rate": 1.999627581136975e-05, + "loss": 1.5361, + "step": 1309 + }, + { + "epoch": 0.03846379705208761, + "grad_norm": 0.0, + "learning_rate": 1.9996249815247325e-05, + "loss": 1.6279, + "step": 1310 + }, + { + "epoch": 0.038493158729226616, + "grad_norm": 0.0, + "learning_rate": 1.999622372872626e-05, + "loss": 1.5176, + "step": 1311 + }, + { + "epoch": 0.03852252040636561, + "grad_norm": 0.0, + "learning_rate": 1.9996197551806778e-05, + "loss": 1.4326, + "step": 1312 + }, + { + "epoch": 0.03855188208350461, + "grad_norm": 0.0, + "learning_rate": 1.9996171284489125e-05, + "loss": 1.5859, + "step": 1313 + }, + { + "epoch": 0.038581243760643605, + "grad_norm": 0.0, + "learning_rate": 1.999614492677353e-05, + "loss": 1.4795, + "step": 1314 + }, + { + "epoch": 0.03861060543778261, + "grad_norm": 0.0, + "learning_rate": 1.999611847866024e-05, + "loss": 1.4736, + "step": 1315 + }, + { + "epoch": 0.038639967114921604, + "grad_norm": 0.0, + "learning_rate": 1.999609194014949e-05, + "loss": 1.5898, + "step": 1316 + }, + { + "epoch": 0.0386693287920606, + "grad_norm": 0.0, + "learning_rate": 1.9996065311241522e-05, + "loss": 1.4844, + "step": 1317 + }, + { + "epoch": 0.038698690469199604, + "grad_norm": 0.0, + "learning_rate": 1.999603859193657e-05, + "loss": 1.4834, + "step": 1318 + }, + { + "epoch": 0.0387280521463386, + "grad_norm": 0.0, + "learning_rate": 1.999601178223489e-05, + "loss": 1.5195, + "step": 1319 + }, + { + "epoch": 0.038757413823477596, + "grad_norm": 0.0, + "learning_rate": 1.999598488213671e-05, + "loss": 1.5166, + "step": 1320 + }, + { + "epoch": 0.03878677550061659, + "grad_norm": 0.0, + "learning_rate": 1.9995957891642278e-05, + "loss": 1.584, + "step": 1321 + }, + { + "epoch": 0.038816137177755596, + "grad_norm": 0.0, + "learning_rate": 1.999593081075184e-05, + "loss": 1.5381, + "step": 1322 + }, + { + "epoch": 0.03884549885489459, + "grad_norm": 0.0, + "learning_rate": 1.999590363946564e-05, + "loss": 1.5293, + "step": 1323 + }, + { + "epoch": 0.03887486053203359, + "grad_norm": 0.0, + "learning_rate": 1.9995876377783922e-05, + "loss": 1.3857, + "step": 1324 + }, + { + "epoch": 0.03890422220917259, + "grad_norm": 0.0, + "learning_rate": 1.9995849025706935e-05, + "loss": 1.5381, + "step": 1325 + }, + { + "epoch": 0.03893358388631159, + "grad_norm": 0.0, + "learning_rate": 1.9995821583234924e-05, + "loss": 1.5283, + "step": 1326 + }, + { + "epoch": 0.038962945563450584, + "grad_norm": 0.0, + "learning_rate": 1.9995794050368143e-05, + "loss": 1.5918, + "step": 1327 + }, + { + "epoch": 0.03899230724058958, + "grad_norm": 0.0, + "learning_rate": 1.9995766427106832e-05, + "loss": 1.5518, + "step": 1328 + }, + { + "epoch": 0.03902166891772858, + "grad_norm": 0.0, + "learning_rate": 1.9995738713451243e-05, + "loss": 1.5957, + "step": 1329 + }, + { + "epoch": 0.03905103059486758, + "grad_norm": 0.0, + "learning_rate": 1.9995710909401636e-05, + "loss": 1.5352, + "step": 1330 + }, + { + "epoch": 0.039080392272006576, + "grad_norm": 0.0, + "learning_rate": 1.999568301495825e-05, + "loss": 1.5977, + "step": 1331 + }, + { + "epoch": 0.03910975394914557, + "grad_norm": 0.0, + "learning_rate": 1.9995655030121345e-05, + "loss": 1.6006, + "step": 1332 + }, + { + "epoch": 0.039139115626284575, + "grad_norm": 0.0, + "learning_rate": 1.999562695489117e-05, + "loss": 1.5146, + "step": 1333 + }, + { + "epoch": 0.03916847730342357, + "grad_norm": 0.0, + "learning_rate": 1.9995598789267984e-05, + "loss": 1.626, + "step": 1334 + }, + { + "epoch": 0.03919783898056257, + "grad_norm": 0.0, + "learning_rate": 1.9995570533252035e-05, + "loss": 1.4521, + "step": 1335 + }, + { + "epoch": 0.03922720065770157, + "grad_norm": 0.0, + "learning_rate": 1.9995542186843583e-05, + "loss": 1.4912, + "step": 1336 + }, + { + "epoch": 0.03925656233484057, + "grad_norm": 0.0, + "learning_rate": 1.999551375004288e-05, + "loss": 1.4463, + "step": 1337 + }, + { + "epoch": 0.03928592401197956, + "grad_norm": 0.0, + "learning_rate": 1.999548522285019e-05, + "loss": 1.6025, + "step": 1338 + }, + { + "epoch": 0.03931528568911856, + "grad_norm": 0.0, + "learning_rate": 1.9995456605265764e-05, + "loss": 1.6807, + "step": 1339 + }, + { + "epoch": 0.03934464736625756, + "grad_norm": 0.0, + "learning_rate": 1.9995427897289867e-05, + "loss": 1.5039, + "step": 1340 + }, + { + "epoch": 0.03937400904339656, + "grad_norm": 0.0, + "learning_rate": 1.9995399098922754e-05, + "loss": 1.5527, + "step": 1341 + }, + { + "epoch": 0.039403370720535555, + "grad_norm": 0.0, + "learning_rate": 1.999537021016469e-05, + "loss": 1.6211, + "step": 1342 + }, + { + "epoch": 0.03943273239767456, + "grad_norm": 0.0, + "learning_rate": 1.9995341231015928e-05, + "loss": 1.5137, + "step": 1343 + }, + { + "epoch": 0.039462094074813554, + "grad_norm": 0.0, + "learning_rate": 1.999531216147674e-05, + "loss": 1.5654, + "step": 1344 + }, + { + "epoch": 0.03949145575195255, + "grad_norm": 0.0, + "learning_rate": 1.9995283001547382e-05, + "loss": 1.4922, + "step": 1345 + }, + { + "epoch": 0.03952081742909155, + "grad_norm": 0.0, + "learning_rate": 1.999525375122812e-05, + "loss": 1.5664, + "step": 1346 + }, + { + "epoch": 0.03955017910623055, + "grad_norm": 0.0, + "learning_rate": 1.999522441051922e-05, + "loss": 1.583, + "step": 1347 + }, + { + "epoch": 0.039579540783369546, + "grad_norm": 0.0, + "learning_rate": 1.9995194979420946e-05, + "loss": 1.4609, + "step": 1348 + }, + { + "epoch": 0.03960890246050854, + "grad_norm": 0.0, + "learning_rate": 1.9995165457933562e-05, + "loss": 1.5439, + "step": 1349 + }, + { + "epoch": 0.039638264137647546, + "grad_norm": 0.0, + "learning_rate": 1.9995135846057338e-05, + "loss": 1.6182, + "step": 1350 + }, + { + "epoch": 0.03966762581478654, + "grad_norm": 0.0, + "learning_rate": 1.999510614379254e-05, + "loss": 1.5195, + "step": 1351 + }, + { + "epoch": 0.03969698749192554, + "grad_norm": 0.0, + "learning_rate": 1.9995076351139438e-05, + "loss": 1.6201, + "step": 1352 + }, + { + "epoch": 0.039726349169064534, + "grad_norm": 0.0, + "learning_rate": 1.99950464680983e-05, + "loss": 1.6074, + "step": 1353 + }, + { + "epoch": 0.03975571084620354, + "grad_norm": 0.0, + "learning_rate": 1.99950164946694e-05, + "loss": 1.5049, + "step": 1354 + }, + { + "epoch": 0.039785072523342534, + "grad_norm": 0.0, + "learning_rate": 1.9994986430853e-05, + "loss": 1.498, + "step": 1355 + }, + { + "epoch": 0.03981443420048153, + "grad_norm": 0.0, + "learning_rate": 1.9994956276649384e-05, + "loss": 1.6816, + "step": 1356 + }, + { + "epoch": 0.03984379587762053, + "grad_norm": 0.0, + "learning_rate": 1.999492603205882e-05, + "loss": 1.3926, + "step": 1357 + }, + { + "epoch": 0.03987315755475953, + "grad_norm": 0.0, + "learning_rate": 1.9994895697081577e-05, + "loss": 1.5801, + "step": 1358 + }, + { + "epoch": 0.039902519231898526, + "grad_norm": 0.0, + "learning_rate": 1.9994865271717932e-05, + "loss": 1.6416, + "step": 1359 + }, + { + "epoch": 0.03993188090903752, + "grad_norm": 0.0, + "learning_rate": 1.999483475596816e-05, + "loss": 1.6523, + "step": 1360 + }, + { + "epoch": 0.039961242586176525, + "grad_norm": 0.0, + "learning_rate": 1.9994804149832537e-05, + "loss": 1.5322, + "step": 1361 + }, + { + "epoch": 0.03999060426331552, + "grad_norm": 0.0, + "learning_rate": 1.9994773453311342e-05, + "loss": 1.5645, + "step": 1362 + }, + { + "epoch": 0.04001996594045452, + "grad_norm": 0.0, + "learning_rate": 1.999474266640485e-05, + "loss": 1.5195, + "step": 1363 + }, + { + "epoch": 0.040049327617593514, + "grad_norm": 0.0, + "learning_rate": 1.9994711789113343e-05, + "loss": 1.5117, + "step": 1364 + }, + { + "epoch": 0.04007868929473252, + "grad_norm": 0.0, + "learning_rate": 1.9994680821437094e-05, + "loss": 1.582, + "step": 1365 + }, + { + "epoch": 0.04010805097187151, + "grad_norm": 0.0, + "learning_rate": 1.999464976337639e-05, + "loss": 1.5166, + "step": 1366 + }, + { + "epoch": 0.04013741264901051, + "grad_norm": 0.0, + "learning_rate": 1.9994618614931506e-05, + "loss": 1.4668, + "step": 1367 + }, + { + "epoch": 0.04016677432614951, + "grad_norm": 0.0, + "learning_rate": 1.9994587376102728e-05, + "loss": 1.5107, + "step": 1368 + }, + { + "epoch": 0.04019613600328851, + "grad_norm": 0.0, + "learning_rate": 1.9994556046890334e-05, + "loss": 1.5156, + "step": 1369 + }, + { + "epoch": 0.040225497680427505, + "grad_norm": 0.0, + "learning_rate": 1.9994524627294615e-05, + "loss": 1.5449, + "step": 1370 + }, + { + "epoch": 0.0402548593575665, + "grad_norm": 0.0, + "learning_rate": 1.9994493117315846e-05, + "loss": 1.5791, + "step": 1371 + }, + { + "epoch": 0.040284221034705504, + "grad_norm": 0.0, + "learning_rate": 1.9994461516954318e-05, + "loss": 1.6094, + "step": 1372 + }, + { + "epoch": 0.0403135827118445, + "grad_norm": 0.0, + "learning_rate": 1.999442982621031e-05, + "loss": 1.4824, + "step": 1373 + }, + { + "epoch": 0.0403429443889835, + "grad_norm": 0.0, + "learning_rate": 1.999439804508412e-05, + "loss": 1.5713, + "step": 1374 + }, + { + "epoch": 0.0403723060661225, + "grad_norm": 0.0, + "learning_rate": 1.9994366173576028e-05, + "loss": 1.6885, + "step": 1375 + }, + { + "epoch": 0.040401667743261496, + "grad_norm": 0.0, + "learning_rate": 1.999433421168632e-05, + "loss": 1.5244, + "step": 1376 + }, + { + "epoch": 0.04043102942040049, + "grad_norm": 0.0, + "learning_rate": 1.9994302159415292e-05, + "loss": 1.5508, + "step": 1377 + }, + { + "epoch": 0.04046039109753949, + "grad_norm": 0.0, + "learning_rate": 1.9994270016763226e-05, + "loss": 1.6016, + "step": 1378 + }, + { + "epoch": 0.04048975277467849, + "grad_norm": 0.0, + "learning_rate": 1.9994237783730418e-05, + "loss": 1.3818, + "step": 1379 + }, + { + "epoch": 0.04051911445181749, + "grad_norm": 0.0, + "learning_rate": 1.9994205460317158e-05, + "loss": 1.7061, + "step": 1380 + }, + { + "epoch": 0.040548476128956484, + "grad_norm": 0.0, + "learning_rate": 1.999417304652374e-05, + "loss": 1.5762, + "step": 1381 + }, + { + "epoch": 0.04057783780609549, + "grad_norm": 0.0, + "learning_rate": 1.9994140542350454e-05, + "loss": 1.5498, + "step": 1382 + }, + { + "epoch": 0.040607199483234484, + "grad_norm": 0.0, + "learning_rate": 1.9994107947797597e-05, + "loss": 1.6738, + "step": 1383 + }, + { + "epoch": 0.04063656116037348, + "grad_norm": 0.0, + "learning_rate": 1.9994075262865458e-05, + "loss": 1.3955, + "step": 1384 + }, + { + "epoch": 0.040665922837512476, + "grad_norm": 0.0, + "learning_rate": 1.999404248755434e-05, + "loss": 1.4922, + "step": 1385 + }, + { + "epoch": 0.04069528451465148, + "grad_norm": 0.0, + "learning_rate": 1.999400962186454e-05, + "loss": 1.5664, + "step": 1386 + }, + { + "epoch": 0.040724646191790476, + "grad_norm": 0.0, + "learning_rate": 1.9993976665796346e-05, + "loss": 1.5566, + "step": 1387 + }, + { + "epoch": 0.04075400786892947, + "grad_norm": 0.0, + "learning_rate": 1.9993943619350064e-05, + "loss": 1.5391, + "step": 1388 + }, + { + "epoch": 0.04078336954606847, + "grad_norm": 0.0, + "learning_rate": 1.999391048252599e-05, + "loss": 1.4941, + "step": 1389 + }, + { + "epoch": 0.04081273122320747, + "grad_norm": 0.0, + "learning_rate": 1.9993877255324423e-05, + "loss": 1.5273, + "step": 1390 + }, + { + "epoch": 0.04084209290034647, + "grad_norm": 0.0, + "learning_rate": 1.9993843937745665e-05, + "loss": 1.3984, + "step": 1391 + }, + { + "epoch": 0.040871454577485464, + "grad_norm": 0.0, + "learning_rate": 1.999381052979002e-05, + "loss": 1.501, + "step": 1392 + }, + { + "epoch": 0.04090081625462447, + "grad_norm": 0.0, + "learning_rate": 1.9993777031457782e-05, + "loss": 1.4238, + "step": 1393 + }, + { + "epoch": 0.04093017793176346, + "grad_norm": 0.0, + "learning_rate": 1.999374344274926e-05, + "loss": 1.5879, + "step": 1394 + }, + { + "epoch": 0.04095953960890246, + "grad_norm": 0.0, + "learning_rate": 1.999370976366476e-05, + "loss": 1.4189, + "step": 1395 + }, + { + "epoch": 0.040988901286041456, + "grad_norm": 0.0, + "learning_rate": 1.999367599420458e-05, + "loss": 1.6719, + "step": 1396 + }, + { + "epoch": 0.04101826296318046, + "grad_norm": 0.0, + "learning_rate": 1.999364213436903e-05, + "loss": 1.5977, + "step": 1397 + }, + { + "epoch": 0.041047624640319455, + "grad_norm": 0.0, + "learning_rate": 1.9993608184158413e-05, + "loss": 1.5635, + "step": 1398 + }, + { + "epoch": 0.04107698631745845, + "grad_norm": 0.0, + "learning_rate": 1.9993574143573037e-05, + "loss": 1.4814, + "step": 1399 + }, + { + "epoch": 0.041106347994597454, + "grad_norm": 0.0, + "learning_rate": 1.9993540012613212e-05, + "loss": 1.3691, + "step": 1400 + }, + { + "epoch": 0.04113570967173645, + "grad_norm": 0.0, + "learning_rate": 1.9993505791279248e-05, + "loss": 1.4912, + "step": 1401 + }, + { + "epoch": 0.04116507134887545, + "grad_norm": 0.0, + "learning_rate": 1.999347147957145e-05, + "loss": 1.585, + "step": 1402 + }, + { + "epoch": 0.04119443302601444, + "grad_norm": 0.0, + "learning_rate": 1.999343707749013e-05, + "loss": 1.4434, + "step": 1403 + }, + { + "epoch": 0.041223794703153446, + "grad_norm": 0.0, + "learning_rate": 1.9993402585035597e-05, + "loss": 1.4678, + "step": 1404 + }, + { + "epoch": 0.04125315638029244, + "grad_norm": 0.0, + "learning_rate": 1.9993368002208167e-05, + "loss": 1.3916, + "step": 1405 + }, + { + "epoch": 0.04128251805743144, + "grad_norm": 0.0, + "learning_rate": 1.999333332900815e-05, + "loss": 1.6084, + "step": 1406 + }, + { + "epoch": 0.04131187973457044, + "grad_norm": 0.0, + "learning_rate": 1.999329856543586e-05, + "loss": 1.5312, + "step": 1407 + }, + { + "epoch": 0.04134124141170944, + "grad_norm": 0.0, + "learning_rate": 1.9993263711491612e-05, + "loss": 1.5967, + "step": 1408 + }, + { + "epoch": 0.041370603088848434, + "grad_norm": 0.0, + "learning_rate": 1.999322876717572e-05, + "loss": 1.6758, + "step": 1409 + }, + { + "epoch": 0.04139996476598743, + "grad_norm": 0.0, + "learning_rate": 1.9993193732488507e-05, + "loss": 1.4707, + "step": 1410 + }, + { + "epoch": 0.041429326443126434, + "grad_norm": 0.0, + "learning_rate": 1.999315860743028e-05, + "loss": 1.5596, + "step": 1411 + }, + { + "epoch": 0.04145868812026543, + "grad_norm": 0.0, + "learning_rate": 1.999312339200136e-05, + "loss": 1.415, + "step": 1412 + }, + { + "epoch": 0.041488049797404426, + "grad_norm": 0.0, + "learning_rate": 1.9993088086202066e-05, + "loss": 1.5508, + "step": 1413 + }, + { + "epoch": 0.04151741147454342, + "grad_norm": 0.0, + "learning_rate": 1.9993052690032716e-05, + "loss": 1.5498, + "step": 1414 + }, + { + "epoch": 0.041546773151682426, + "grad_norm": 0.0, + "learning_rate": 1.999301720349363e-05, + "loss": 1.5957, + "step": 1415 + }, + { + "epoch": 0.04157613482882142, + "grad_norm": 0.0, + "learning_rate": 1.9992981626585136e-05, + "loss": 1.4854, + "step": 1416 + }, + { + "epoch": 0.04160549650596042, + "grad_norm": 0.0, + "learning_rate": 1.9992945959307547e-05, + "loss": 1.3691, + "step": 1417 + }, + { + "epoch": 0.04163485818309942, + "grad_norm": 0.0, + "learning_rate": 1.9992910201661188e-05, + "loss": 1.6084, + "step": 1418 + }, + { + "epoch": 0.04166421986023842, + "grad_norm": 0.0, + "learning_rate": 1.999287435364638e-05, + "loss": 1.5771, + "step": 1419 + }, + { + "epoch": 0.041693581537377414, + "grad_norm": 0.0, + "learning_rate": 1.9992838415263452e-05, + "loss": 1.3955, + "step": 1420 + }, + { + "epoch": 0.04172294321451641, + "grad_norm": 0.0, + "learning_rate": 1.999280238651273e-05, + "loss": 1.4209, + "step": 1421 + }, + { + "epoch": 0.04175230489165541, + "grad_norm": 0.0, + "learning_rate": 1.9992766267394532e-05, + "loss": 1.4609, + "step": 1422 + }, + { + "epoch": 0.04178166656879441, + "grad_norm": 0.0, + "learning_rate": 1.999273005790919e-05, + "loss": 1.5176, + "step": 1423 + }, + { + "epoch": 0.041811028245933406, + "grad_norm": 0.0, + "learning_rate": 1.9992693758057035e-05, + "loss": 1.5898, + "step": 1424 + }, + { + "epoch": 0.04184038992307241, + "grad_norm": 0.0, + "learning_rate": 1.9992657367838387e-05, + "loss": 1.626, + "step": 1425 + }, + { + "epoch": 0.041869751600211405, + "grad_norm": 0.0, + "learning_rate": 1.9992620887253582e-05, + "loss": 1.5703, + "step": 1426 + }, + { + "epoch": 0.0418991132773504, + "grad_norm": 0.0, + "learning_rate": 1.9992584316302948e-05, + "loss": 1.4961, + "step": 1427 + }, + { + "epoch": 0.0419284749544894, + "grad_norm": 0.0, + "learning_rate": 1.999254765498681e-05, + "loss": 1.5244, + "step": 1428 + }, + { + "epoch": 0.0419578366316284, + "grad_norm": 0.0, + "learning_rate": 1.999251090330551e-05, + "loss": 1.457, + "step": 1429 + }, + { + "epoch": 0.0419871983087674, + "grad_norm": 0.0, + "learning_rate": 1.9992474061259372e-05, + "loss": 1.5918, + "step": 1430 + }, + { + "epoch": 0.04201655998590639, + "grad_norm": 0.0, + "learning_rate": 1.9992437128848734e-05, + "loss": 1.5166, + "step": 1431 + }, + { + "epoch": 0.042045921663045396, + "grad_norm": 0.0, + "learning_rate": 1.9992400106073924e-05, + "loss": 1.4443, + "step": 1432 + }, + { + "epoch": 0.04207528334018439, + "grad_norm": 0.0, + "learning_rate": 1.9992362992935285e-05, + "loss": 1.5625, + "step": 1433 + }, + { + "epoch": 0.04210464501732339, + "grad_norm": 0.0, + "learning_rate": 1.9992325789433145e-05, + "loss": 1.5137, + "step": 1434 + }, + { + "epoch": 0.042134006694462385, + "grad_norm": 0.0, + "learning_rate": 1.9992288495567846e-05, + "loss": 1.5068, + "step": 1435 + }, + { + "epoch": 0.04216336837160139, + "grad_norm": 0.0, + "learning_rate": 1.999225111133972e-05, + "loss": 1.4258, + "step": 1436 + }, + { + "epoch": 0.042192730048740384, + "grad_norm": 0.0, + "learning_rate": 1.999221363674911e-05, + "loss": 1.5684, + "step": 1437 + }, + { + "epoch": 0.04222209172587938, + "grad_norm": 0.0, + "learning_rate": 1.9992176071796355e-05, + "loss": 1.4902, + "step": 1438 + }, + { + "epoch": 0.042251453403018384, + "grad_norm": 0.0, + "learning_rate": 1.9992138416481796e-05, + "loss": 1.6025, + "step": 1439 + }, + { + "epoch": 0.04228081508015738, + "grad_norm": 0.0, + "learning_rate": 1.9992100670805762e-05, + "loss": 1.5469, + "step": 1440 + }, + { + "epoch": 0.042310176757296376, + "grad_norm": 0.0, + "learning_rate": 1.9992062834768605e-05, + "loss": 1.4141, + "step": 1441 + }, + { + "epoch": 0.04233953843443537, + "grad_norm": 0.0, + "learning_rate": 1.9992024908370665e-05, + "loss": 1.5186, + "step": 1442 + }, + { + "epoch": 0.042368900111574376, + "grad_norm": 0.0, + "learning_rate": 1.999198689161229e-05, + "loss": 1.5312, + "step": 1443 + }, + { + "epoch": 0.04239826178871337, + "grad_norm": 0.0, + "learning_rate": 1.999194878449381e-05, + "loss": 1.5869, + "step": 1444 + }, + { + "epoch": 0.04242762346585237, + "grad_norm": 0.0, + "learning_rate": 1.9991910587015586e-05, + "loss": 1.5703, + "step": 1445 + }, + { + "epoch": 0.042456985142991364, + "grad_norm": 0.0, + "learning_rate": 1.9991872299177954e-05, + "loss": 1.5869, + "step": 1446 + }, + { + "epoch": 0.04248634682013037, + "grad_norm": 0.0, + "learning_rate": 1.9991833920981255e-05, + "loss": 1.5664, + "step": 1447 + }, + { + "epoch": 0.042515708497269364, + "grad_norm": 0.0, + "learning_rate": 1.9991795452425847e-05, + "loss": 1.4473, + "step": 1448 + }, + { + "epoch": 0.04254507017440836, + "grad_norm": 0.0, + "learning_rate": 1.9991756893512078e-05, + "loss": 1.4424, + "step": 1449 + }, + { + "epoch": 0.04257443185154736, + "grad_norm": 0.0, + "learning_rate": 1.9991718244240286e-05, + "loss": 1.6016, + "step": 1450 + }, + { + "epoch": 0.04260379352868636, + "grad_norm": 0.0, + "learning_rate": 1.999167950461083e-05, + "loss": 1.5508, + "step": 1451 + }, + { + "epoch": 0.042633155205825356, + "grad_norm": 0.0, + "learning_rate": 1.999164067462406e-05, + "loss": 1.5332, + "step": 1452 + }, + { + "epoch": 0.04266251688296435, + "grad_norm": 0.0, + "learning_rate": 1.9991601754280322e-05, + "loss": 1.6104, + "step": 1453 + }, + { + "epoch": 0.042691878560103355, + "grad_norm": 0.0, + "learning_rate": 1.999156274357997e-05, + "loss": 1.4961, + "step": 1454 + }, + { + "epoch": 0.04272124023724235, + "grad_norm": 0.0, + "learning_rate": 1.9991523642523357e-05, + "loss": 1.5615, + "step": 1455 + }, + { + "epoch": 0.04275060191438135, + "grad_norm": 0.0, + "learning_rate": 1.9991484451110838e-05, + "loss": 1.4736, + "step": 1456 + }, + { + "epoch": 0.04277996359152035, + "grad_norm": 0.0, + "learning_rate": 1.9991445169342767e-05, + "loss": 1.5117, + "step": 1457 + }, + { + "epoch": 0.04280932526865935, + "grad_norm": 0.0, + "learning_rate": 1.9991405797219495e-05, + "loss": 1.6484, + "step": 1458 + }, + { + "epoch": 0.04283868694579834, + "grad_norm": 0.0, + "learning_rate": 1.9991366334741383e-05, + "loss": 1.5938, + "step": 1459 + }, + { + "epoch": 0.04286804862293734, + "grad_norm": 0.0, + "learning_rate": 1.999132678190879e-05, + "loss": 1.4951, + "step": 1460 + }, + { + "epoch": 0.04289741030007634, + "grad_norm": 0.0, + "learning_rate": 1.9991287138722066e-05, + "loss": 1.3613, + "step": 1461 + }, + { + "epoch": 0.04292677197721534, + "grad_norm": 0.0, + "learning_rate": 1.9991247405181573e-05, + "loss": 1.4941, + "step": 1462 + }, + { + "epoch": 0.042956133654354335, + "grad_norm": 0.0, + "learning_rate": 1.9991207581287675e-05, + "loss": 1.5615, + "step": 1463 + }, + { + "epoch": 0.04298549533149334, + "grad_norm": 0.0, + "learning_rate": 1.9991167667040722e-05, + "loss": 1.5205, + "step": 1464 + }, + { + "epoch": 0.043014857008632335, + "grad_norm": 0.0, + "learning_rate": 1.9991127662441086e-05, + "loss": 1.6035, + "step": 1465 + }, + { + "epoch": 0.04304421868577133, + "grad_norm": 0.0, + "learning_rate": 1.9991087567489124e-05, + "loss": 1.5244, + "step": 1466 + }, + { + "epoch": 0.04307358036291033, + "grad_norm": 0.0, + "learning_rate": 1.9991047382185197e-05, + "loss": 1.4609, + "step": 1467 + }, + { + "epoch": 0.04310294204004933, + "grad_norm": 0.0, + "learning_rate": 1.9991007106529668e-05, + "loss": 1.6016, + "step": 1468 + }, + { + "epoch": 0.043132303717188326, + "grad_norm": 0.0, + "learning_rate": 1.9990966740522907e-05, + "loss": 1.4775, + "step": 1469 + }, + { + "epoch": 0.04316166539432732, + "grad_norm": 0.0, + "learning_rate": 1.999092628416527e-05, + "loss": 1.6162, + "step": 1470 + }, + { + "epoch": 0.04319102707146632, + "grad_norm": 0.0, + "learning_rate": 1.9990885737457134e-05, + "loss": 1.6338, + "step": 1471 + }, + { + "epoch": 0.04322038874860532, + "grad_norm": 0.0, + "learning_rate": 1.9990845100398854e-05, + "loss": 1.5186, + "step": 1472 + }, + { + "epoch": 0.04324975042574432, + "grad_norm": 0.0, + "learning_rate": 1.9990804372990805e-05, + "loss": 1.6484, + "step": 1473 + }, + { + "epoch": 0.043279112102883315, + "grad_norm": 0.0, + "learning_rate": 1.9990763555233353e-05, + "loss": 1.584, + "step": 1474 + }, + { + "epoch": 0.04330847378002232, + "grad_norm": 0.0, + "learning_rate": 1.999072264712687e-05, + "loss": 1.5859, + "step": 1475 + }, + { + "epoch": 0.043337835457161314, + "grad_norm": 0.0, + "learning_rate": 1.999068164867172e-05, + "loss": 1.5068, + "step": 1476 + }, + { + "epoch": 0.04336719713430031, + "grad_norm": 0.0, + "learning_rate": 1.999064055986828e-05, + "loss": 1.4863, + "step": 1477 + }, + { + "epoch": 0.043396558811439306, + "grad_norm": 0.0, + "learning_rate": 1.9990599380716917e-05, + "loss": 1.6221, + "step": 1478 + }, + { + "epoch": 0.04342592048857831, + "grad_norm": 0.0, + "learning_rate": 1.9990558111218008e-05, + "loss": 1.5469, + "step": 1479 + }, + { + "epoch": 0.043455282165717306, + "grad_norm": 0.0, + "learning_rate": 1.9990516751371918e-05, + "loss": 1.5098, + "step": 1480 + }, + { + "epoch": 0.0434846438428563, + "grad_norm": 0.0, + "learning_rate": 1.999047530117903e-05, + "loss": 1.5723, + "step": 1481 + }, + { + "epoch": 0.043514005519995305, + "grad_norm": 0.0, + "learning_rate": 1.9990433760639715e-05, + "loss": 1.5654, + "step": 1482 + }, + { + "epoch": 0.0435433671971343, + "grad_norm": 0.0, + "learning_rate": 1.999039212975435e-05, + "loss": 1.582, + "step": 1483 + }, + { + "epoch": 0.0435727288742733, + "grad_norm": 0.0, + "learning_rate": 1.9990350408523307e-05, + "loss": 1.4521, + "step": 1484 + }, + { + "epoch": 0.043602090551412294, + "grad_norm": 0.0, + "learning_rate": 1.9990308596946967e-05, + "loss": 1.5146, + "step": 1485 + }, + { + "epoch": 0.0436314522285513, + "grad_norm": 0.0, + "learning_rate": 1.999026669502571e-05, + "loss": 1.584, + "step": 1486 + }, + { + "epoch": 0.04366081390569029, + "grad_norm": 0.0, + "learning_rate": 1.9990224702759912e-05, + "loss": 1.4902, + "step": 1487 + }, + { + "epoch": 0.04369017558282929, + "grad_norm": 0.0, + "learning_rate": 1.9990182620149956e-05, + "loss": 1.5713, + "step": 1488 + }, + { + "epoch": 0.04371953725996829, + "grad_norm": 0.0, + "learning_rate": 1.9990140447196218e-05, + "loss": 1.6406, + "step": 1489 + }, + { + "epoch": 0.04374889893710729, + "grad_norm": 0.0, + "learning_rate": 1.999009818389908e-05, + "loss": 1.4932, + "step": 1490 + }, + { + "epoch": 0.043778260614246285, + "grad_norm": 0.0, + "learning_rate": 1.9990055830258927e-05, + "loss": 1.5947, + "step": 1491 + }, + { + "epoch": 0.04380762229138528, + "grad_norm": 0.0, + "learning_rate": 1.999001338627614e-05, + "loss": 1.5244, + "step": 1492 + }, + { + "epoch": 0.043836983968524285, + "grad_norm": 0.0, + "learning_rate": 1.9989970851951103e-05, + "loss": 1.4961, + "step": 1493 + }, + { + "epoch": 0.04386634564566328, + "grad_norm": 0.0, + "learning_rate": 1.9989928227284205e-05, + "loss": 1.5439, + "step": 1494 + }, + { + "epoch": 0.04389570732280228, + "grad_norm": 0.0, + "learning_rate": 1.9989885512275825e-05, + "loss": 1.624, + "step": 1495 + }, + { + "epoch": 0.04392506899994127, + "grad_norm": 0.0, + "learning_rate": 1.998984270692635e-05, + "loss": 1.5215, + "step": 1496 + }, + { + "epoch": 0.043954430677080276, + "grad_norm": 0.0, + "learning_rate": 1.998979981123617e-05, + "loss": 1.5947, + "step": 1497 + }, + { + "epoch": 0.04398379235421927, + "grad_norm": 0.0, + "learning_rate": 1.9989756825205673e-05, + "loss": 1.5645, + "step": 1498 + }, + { + "epoch": 0.04401315403135827, + "grad_norm": 0.0, + "learning_rate": 1.9989713748835244e-05, + "loss": 1.376, + "step": 1499 + }, + { + "epoch": 0.04404251570849727, + "grad_norm": 0.0, + "learning_rate": 1.9989670582125277e-05, + "loss": 1.5098, + "step": 1500 + }, + { + "epoch": 0.04407187738563627, + "grad_norm": 0.0, + "learning_rate": 1.998962732507616e-05, + "loss": 1.6582, + "step": 1501 + }, + { + "epoch": 0.044101239062775265, + "grad_norm": 0.0, + "learning_rate": 1.9989583977688287e-05, + "loss": 1.5557, + "step": 1502 + }, + { + "epoch": 0.04413060073991426, + "grad_norm": 0.0, + "learning_rate": 1.9989540539962045e-05, + "loss": 1.5205, + "step": 1503 + }, + { + "epoch": 0.044159962417053264, + "grad_norm": 0.0, + "learning_rate": 1.9989497011897832e-05, + "loss": 1.5049, + "step": 1504 + }, + { + "epoch": 0.04418932409419226, + "grad_norm": 0.0, + "learning_rate": 1.9989453393496037e-05, + "loss": 1.5117, + "step": 1505 + }, + { + "epoch": 0.044218685771331256, + "grad_norm": 0.0, + "learning_rate": 1.9989409684757057e-05, + "loss": 1.4912, + "step": 1506 + }, + { + "epoch": 0.04424804744847026, + "grad_norm": 0.0, + "learning_rate": 1.998936588568129e-05, + "loss": 1.54, + "step": 1507 + }, + { + "epoch": 0.044277409125609256, + "grad_norm": 0.0, + "learning_rate": 1.9989321996269123e-05, + "loss": 1.4385, + "step": 1508 + }, + { + "epoch": 0.04430677080274825, + "grad_norm": 0.0, + "learning_rate": 1.9989278016520966e-05, + "loss": 1.5225, + "step": 1509 + }, + { + "epoch": 0.04433613247988725, + "grad_norm": 0.0, + "learning_rate": 1.9989233946437205e-05, + "loss": 1.4912, + "step": 1510 + }, + { + "epoch": 0.04436549415702625, + "grad_norm": 0.0, + "learning_rate": 1.9989189786018245e-05, + "loss": 1.4492, + "step": 1511 + }, + { + "epoch": 0.04439485583416525, + "grad_norm": 0.0, + "learning_rate": 1.998914553526448e-05, + "loss": 1.5703, + "step": 1512 + }, + { + "epoch": 0.044424217511304244, + "grad_norm": 0.0, + "learning_rate": 1.9989101194176317e-05, + "loss": 1.4072, + "step": 1513 + }, + { + "epoch": 0.04445357918844325, + "grad_norm": 0.0, + "learning_rate": 1.9989056762754153e-05, + "loss": 1.4551, + "step": 1514 + }, + { + "epoch": 0.04448294086558224, + "grad_norm": 0.0, + "learning_rate": 1.9989012240998387e-05, + "loss": 1.4805, + "step": 1515 + }, + { + "epoch": 0.04451230254272124, + "grad_norm": 0.0, + "learning_rate": 1.998896762890943e-05, + "loss": 1.542, + "step": 1516 + }, + { + "epoch": 0.044541664219860236, + "grad_norm": 0.0, + "learning_rate": 1.9988922926487678e-05, + "loss": 1.4277, + "step": 1517 + }, + { + "epoch": 0.04457102589699924, + "grad_norm": 0.0, + "learning_rate": 1.998887813373354e-05, + "loss": 1.5293, + "step": 1518 + }, + { + "epoch": 0.044600387574138235, + "grad_norm": 0.0, + "learning_rate": 1.9988833250647418e-05, + "loss": 1.7188, + "step": 1519 + }, + { + "epoch": 0.04462974925127723, + "grad_norm": 0.0, + "learning_rate": 1.9988788277229718e-05, + "loss": 1.4922, + "step": 1520 + }, + { + "epoch": 0.04465911092841623, + "grad_norm": 0.0, + "learning_rate": 1.998874321348085e-05, + "loss": 1.6406, + "step": 1521 + }, + { + "epoch": 0.04468847260555523, + "grad_norm": 0.0, + "learning_rate": 1.9988698059401216e-05, + "loss": 1.5469, + "step": 1522 + }, + { + "epoch": 0.04471783428269423, + "grad_norm": 0.0, + "learning_rate": 1.998865281499123e-05, + "loss": 1.4668, + "step": 1523 + }, + { + "epoch": 0.04474719595983322, + "grad_norm": 0.0, + "learning_rate": 1.9988607480251292e-05, + "loss": 1.4238, + "step": 1524 + }, + { + "epoch": 0.044776557636972227, + "grad_norm": 0.0, + "learning_rate": 1.998856205518182e-05, + "loss": 1.5527, + "step": 1525 + }, + { + "epoch": 0.04480591931411122, + "grad_norm": 0.0, + "learning_rate": 1.998851653978323e-05, + "loss": 1.5889, + "step": 1526 + }, + { + "epoch": 0.04483528099125022, + "grad_norm": 0.0, + "learning_rate": 1.998847093405592e-05, + "loss": 1.4814, + "step": 1527 + }, + { + "epoch": 0.044864642668389215, + "grad_norm": 0.0, + "learning_rate": 1.9988425238000312e-05, + "loss": 1.4805, + "step": 1528 + }, + { + "epoch": 0.04489400434552822, + "grad_norm": 0.0, + "learning_rate": 1.9988379451616818e-05, + "loss": 1.6006, + "step": 1529 + }, + { + "epoch": 0.044923366022667215, + "grad_norm": 0.0, + "learning_rate": 1.998833357490585e-05, + "loss": 1.5654, + "step": 1530 + }, + { + "epoch": 0.04495272769980621, + "grad_norm": 0.0, + "learning_rate": 1.9988287607867822e-05, + "loss": 1.5635, + "step": 1531 + }, + { + "epoch": 0.044982089376945214, + "grad_norm": 0.0, + "learning_rate": 1.998824155050315e-05, + "loss": 1.4502, + "step": 1532 + }, + { + "epoch": 0.04501145105408421, + "grad_norm": 0.0, + "learning_rate": 1.9988195402812253e-05, + "loss": 1.5107, + "step": 1533 + }, + { + "epoch": 0.045040812731223206, + "grad_norm": 0.0, + "learning_rate": 1.998814916479555e-05, + "loss": 1.4736, + "step": 1534 + }, + { + "epoch": 0.0450701744083622, + "grad_norm": 0.0, + "learning_rate": 1.9988102836453453e-05, + "loss": 1.6172, + "step": 1535 + }, + { + "epoch": 0.045099536085501206, + "grad_norm": 0.0, + "learning_rate": 1.9988056417786383e-05, + "loss": 1.6494, + "step": 1536 + }, + { + "epoch": 0.0451288977626402, + "grad_norm": 0.0, + "learning_rate": 1.998800990879476e-05, + "loss": 1.4932, + "step": 1537 + }, + { + "epoch": 0.0451582594397792, + "grad_norm": 0.0, + "learning_rate": 1.9987963309479013e-05, + "loss": 1.5059, + "step": 1538 + }, + { + "epoch": 0.0451876211169182, + "grad_norm": 0.0, + "learning_rate": 1.998791661983955e-05, + "loss": 1.3281, + "step": 1539 + }, + { + "epoch": 0.0452169827940572, + "grad_norm": 0.0, + "learning_rate": 1.99878698398768e-05, + "loss": 1.4297, + "step": 1540 + }, + { + "epoch": 0.045246344471196194, + "grad_norm": 0.0, + "learning_rate": 1.9987822969591183e-05, + "loss": 1.5801, + "step": 1541 + }, + { + "epoch": 0.04527570614833519, + "grad_norm": 0.0, + "learning_rate": 1.9987776008983126e-05, + "loss": 1.4893, + "step": 1542 + }, + { + "epoch": 0.04530506782547419, + "grad_norm": 0.0, + "learning_rate": 1.9987728958053056e-05, + "loss": 1.6318, + "step": 1543 + }, + { + "epoch": 0.04533442950261319, + "grad_norm": 0.0, + "learning_rate": 1.9987681816801392e-05, + "loss": 1.5146, + "step": 1544 + }, + { + "epoch": 0.045363791179752186, + "grad_norm": 0.0, + "learning_rate": 1.9987634585228563e-05, + "loss": 1.5791, + "step": 1545 + }, + { + "epoch": 0.04539315285689119, + "grad_norm": 0.0, + "learning_rate": 1.9987587263334997e-05, + "loss": 1.5322, + "step": 1546 + }, + { + "epoch": 0.045422514534030185, + "grad_norm": 0.0, + "learning_rate": 1.9987539851121123e-05, + "loss": 1.5723, + "step": 1547 + }, + { + "epoch": 0.04545187621116918, + "grad_norm": 0.0, + "learning_rate": 1.9987492348587366e-05, + "loss": 1.5342, + "step": 1548 + }, + { + "epoch": 0.04548123788830818, + "grad_norm": 0.0, + "learning_rate": 1.9987444755734157e-05, + "loss": 1.4912, + "step": 1549 + }, + { + "epoch": 0.04551059956544718, + "grad_norm": 0.0, + "learning_rate": 1.998739707256193e-05, + "loss": 1.5117, + "step": 1550 + }, + { + "epoch": 0.04553996124258618, + "grad_norm": 0.0, + "learning_rate": 1.9987349299071113e-05, + "loss": 1.6064, + "step": 1551 + }, + { + "epoch": 0.04556932291972517, + "grad_norm": 0.0, + "learning_rate": 1.9987301435262137e-05, + "loss": 1.5986, + "step": 1552 + }, + { + "epoch": 0.04559868459686417, + "grad_norm": 0.0, + "learning_rate": 1.998725348113544e-05, + "loss": 1.5264, + "step": 1553 + }, + { + "epoch": 0.04562804627400317, + "grad_norm": 0.0, + "learning_rate": 1.998720543669145e-05, + "loss": 1.5293, + "step": 1554 + }, + { + "epoch": 0.04565740795114217, + "grad_norm": 0.0, + "learning_rate": 1.9987157301930603e-05, + "loss": 1.5947, + "step": 1555 + }, + { + "epoch": 0.045686769628281165, + "grad_norm": 0.0, + "learning_rate": 1.9987109076853334e-05, + "loss": 1.5205, + "step": 1556 + }, + { + "epoch": 0.04571613130542017, + "grad_norm": 0.0, + "learning_rate": 1.9987060761460083e-05, + "loss": 1.5117, + "step": 1557 + }, + { + "epoch": 0.045745492982559165, + "grad_norm": 0.0, + "learning_rate": 1.998701235575128e-05, + "loss": 1.5234, + "step": 1558 + }, + { + "epoch": 0.04577485465969816, + "grad_norm": 0.0, + "learning_rate": 1.9986963859727368e-05, + "loss": 1.6602, + "step": 1559 + }, + { + "epoch": 0.04580421633683716, + "grad_norm": 0.0, + "learning_rate": 1.9986915273388785e-05, + "loss": 1.5186, + "step": 1560 + }, + { + "epoch": 0.04583357801397616, + "grad_norm": 0.0, + "learning_rate": 1.998686659673597e-05, + "loss": 1.5283, + "step": 1561 + }, + { + "epoch": 0.04586293969111516, + "grad_norm": 0.0, + "learning_rate": 1.998681782976936e-05, + "loss": 1.6367, + "step": 1562 + }, + { + "epoch": 0.04589230136825415, + "grad_norm": 0.0, + "learning_rate": 1.9986768972489398e-05, + "loss": 1.5293, + "step": 1563 + }, + { + "epoch": 0.045921663045393156, + "grad_norm": 0.0, + "learning_rate": 1.9986720024896532e-05, + "loss": 1.5303, + "step": 1564 + }, + { + "epoch": 0.04595102472253215, + "grad_norm": 0.0, + "learning_rate": 1.9986670986991194e-05, + "loss": 1.4473, + "step": 1565 + }, + { + "epoch": 0.04598038639967115, + "grad_norm": 0.0, + "learning_rate": 1.9986621858773834e-05, + "loss": 1.5488, + "step": 1566 + }, + { + "epoch": 0.046009748076810145, + "grad_norm": 0.0, + "learning_rate": 1.9986572640244896e-05, + "loss": 1.6768, + "step": 1567 + }, + { + "epoch": 0.04603910975394915, + "grad_norm": 0.0, + "learning_rate": 1.9986523331404824e-05, + "loss": 1.6768, + "step": 1568 + }, + { + "epoch": 0.046068471431088144, + "grad_norm": 0.0, + "learning_rate": 1.9986473932254062e-05, + "loss": 1.4766, + "step": 1569 + }, + { + "epoch": 0.04609783310822714, + "grad_norm": 0.0, + "learning_rate": 1.998642444279306e-05, + "loss": 1.5635, + "step": 1570 + }, + { + "epoch": 0.046127194785366143, + "grad_norm": 0.0, + "learning_rate": 1.9986374863022267e-05, + "loss": 1.4531, + "step": 1571 + }, + { + "epoch": 0.04615655646250514, + "grad_norm": 0.0, + "learning_rate": 1.9986325192942125e-05, + "loss": 1.626, + "step": 1572 + }, + { + "epoch": 0.046185918139644136, + "grad_norm": 0.0, + "learning_rate": 1.9986275432553086e-05, + "loss": 1.6191, + "step": 1573 + }, + { + "epoch": 0.04621527981678313, + "grad_norm": 0.0, + "learning_rate": 1.9986225581855602e-05, + "loss": 1.5244, + "step": 1574 + }, + { + "epoch": 0.046244641493922135, + "grad_norm": 0.0, + "learning_rate": 1.9986175640850125e-05, + "loss": 1.4697, + "step": 1575 + }, + { + "epoch": 0.04627400317106113, + "grad_norm": 0.0, + "learning_rate": 1.99861256095371e-05, + "loss": 1.5029, + "step": 1576 + }, + { + "epoch": 0.04630336484820013, + "grad_norm": 0.0, + "learning_rate": 1.9986075487916988e-05, + "loss": 1.5361, + "step": 1577 + }, + { + "epoch": 0.046332726525339124, + "grad_norm": 0.0, + "learning_rate": 1.9986025275990232e-05, + "loss": 1.4932, + "step": 1578 + }, + { + "epoch": 0.04636208820247813, + "grad_norm": 0.0, + "learning_rate": 1.99859749737573e-05, + "loss": 1.5215, + "step": 1579 + }, + { + "epoch": 0.04639144987961712, + "grad_norm": 0.0, + "learning_rate": 1.9985924581218633e-05, + "loss": 1.3896, + "step": 1580 + }, + { + "epoch": 0.04642081155675612, + "grad_norm": 0.0, + "learning_rate": 1.998587409837469e-05, + "loss": 1.4111, + "step": 1581 + }, + { + "epoch": 0.04645017323389512, + "grad_norm": 0.0, + "learning_rate": 1.9985823525225937e-05, + "loss": 1.4609, + "step": 1582 + }, + { + "epoch": 0.04647953491103412, + "grad_norm": 0.0, + "learning_rate": 1.998577286177282e-05, + "loss": 1.541, + "step": 1583 + }, + { + "epoch": 0.046508896588173115, + "grad_norm": 0.0, + "learning_rate": 1.9985722108015804e-05, + "loss": 1.5752, + "step": 1584 + }, + { + "epoch": 0.04653825826531211, + "grad_norm": 0.0, + "learning_rate": 1.9985671263955346e-05, + "loss": 1.4346, + "step": 1585 + }, + { + "epoch": 0.046567619942451115, + "grad_norm": 0.0, + "learning_rate": 1.9985620329591902e-05, + "loss": 1.6504, + "step": 1586 + }, + { + "epoch": 0.04659698161959011, + "grad_norm": 0.0, + "learning_rate": 1.998556930492594e-05, + "loss": 1.5967, + "step": 1587 + }, + { + "epoch": 0.04662634329672911, + "grad_norm": 0.0, + "learning_rate": 1.9985518189957912e-05, + "loss": 1.4033, + "step": 1588 + }, + { + "epoch": 0.04665570497386811, + "grad_norm": 0.0, + "learning_rate": 1.9985466984688294e-05, + "loss": 1.5146, + "step": 1589 + }, + { + "epoch": 0.04668506665100711, + "grad_norm": 0.0, + "learning_rate": 1.9985415689117533e-05, + "loss": 1.4248, + "step": 1590 + }, + { + "epoch": 0.0467144283281461, + "grad_norm": 0.0, + "learning_rate": 1.9985364303246104e-05, + "loss": 1.5039, + "step": 1591 + }, + { + "epoch": 0.0467437900052851, + "grad_norm": 0.0, + "learning_rate": 1.998531282707447e-05, + "loss": 1.4111, + "step": 1592 + }, + { + "epoch": 0.0467731516824241, + "grad_norm": 0.0, + "learning_rate": 1.998526126060309e-05, + "loss": 1.5713, + "step": 1593 + }, + { + "epoch": 0.0468025133595631, + "grad_norm": 0.0, + "learning_rate": 1.998520960383244e-05, + "loss": 1.6055, + "step": 1594 + }, + { + "epoch": 0.046831875036702095, + "grad_norm": 0.0, + "learning_rate": 1.998515785676298e-05, + "loss": 1.5049, + "step": 1595 + }, + { + "epoch": 0.0468612367138411, + "grad_norm": 0.0, + "learning_rate": 1.998510601939518e-05, + "loss": 1.6328, + "step": 1596 + }, + { + "epoch": 0.046890598390980094, + "grad_norm": 0.0, + "learning_rate": 1.998505409172951e-05, + "loss": 1.5986, + "step": 1597 + }, + { + "epoch": 0.04691996006811909, + "grad_norm": 0.0, + "learning_rate": 1.9985002073766437e-05, + "loss": 1.5078, + "step": 1598 + }, + { + "epoch": 0.04694932174525809, + "grad_norm": 0.0, + "learning_rate": 1.9984949965506436e-05, + "loss": 1.4834, + "step": 1599 + }, + { + "epoch": 0.04697868342239709, + "grad_norm": 0.0, + "learning_rate": 1.998489776694997e-05, + "loss": 1.5117, + "step": 1600 + }, + { + "epoch": 0.047008045099536086, + "grad_norm": 0.0, + "learning_rate": 1.998484547809752e-05, + "loss": 1.5859, + "step": 1601 + }, + { + "epoch": 0.04703740677667508, + "grad_norm": 0.0, + "learning_rate": 1.998479309894956e-05, + "loss": 1.6074, + "step": 1602 + }, + { + "epoch": 0.04706676845381408, + "grad_norm": 0.0, + "learning_rate": 1.998474062950655e-05, + "loss": 1.4023, + "step": 1603 + }, + { + "epoch": 0.04709613013095308, + "grad_norm": 0.0, + "learning_rate": 1.9984688069768978e-05, + "loss": 1.542, + "step": 1604 + }, + { + "epoch": 0.04712549180809208, + "grad_norm": 0.0, + "learning_rate": 1.998463541973731e-05, + "loss": 1.6328, + "step": 1605 + }, + { + "epoch": 0.047154853485231074, + "grad_norm": 0.0, + "learning_rate": 1.9984582679412035e-05, + "loss": 1.4873, + "step": 1606 + }, + { + "epoch": 0.04718421516237008, + "grad_norm": 0.0, + "learning_rate": 1.9984529848793618e-05, + "loss": 1.5508, + "step": 1607 + }, + { + "epoch": 0.047213576839509074, + "grad_norm": 0.0, + "learning_rate": 1.9984476927882536e-05, + "loss": 1.6465, + "step": 1608 + }, + { + "epoch": 0.04724293851664807, + "grad_norm": 0.0, + "learning_rate": 1.998442391667928e-05, + "loss": 1.5234, + "step": 1609 + }, + { + "epoch": 0.047272300193787066, + "grad_norm": 0.0, + "learning_rate": 1.9984370815184315e-05, + "loss": 1.543, + "step": 1610 + }, + { + "epoch": 0.04730166187092607, + "grad_norm": 0.0, + "learning_rate": 1.998431762339813e-05, + "loss": 1.5312, + "step": 1611 + }, + { + "epoch": 0.047331023548065065, + "grad_norm": 0.0, + "learning_rate": 1.9984264341321205e-05, + "loss": 1.5391, + "step": 1612 + }, + { + "epoch": 0.04736038522520406, + "grad_norm": 0.0, + "learning_rate": 1.9984210968954017e-05, + "loss": 1.5264, + "step": 1613 + }, + { + "epoch": 0.047389746902343065, + "grad_norm": 0.0, + "learning_rate": 1.9984157506297058e-05, + "loss": 1.4844, + "step": 1614 + }, + { + "epoch": 0.04741910857948206, + "grad_norm": 0.0, + "learning_rate": 1.99841039533508e-05, + "loss": 1.4922, + "step": 1615 + }, + { + "epoch": 0.04744847025662106, + "grad_norm": 0.0, + "learning_rate": 1.9984050310115734e-05, + "loss": 1.4746, + "step": 1616 + }, + { + "epoch": 0.047477831933760054, + "grad_norm": 0.0, + "learning_rate": 1.998399657659235e-05, + "loss": 1.4961, + "step": 1617 + }, + { + "epoch": 0.04750719361089906, + "grad_norm": 0.0, + "learning_rate": 1.998394275278112e-05, + "loss": 1.4883, + "step": 1618 + }, + { + "epoch": 0.04753655528803805, + "grad_norm": 0.0, + "learning_rate": 1.998388883868254e-05, + "loss": 1.4971, + "step": 1619 + }, + { + "epoch": 0.04756591696517705, + "grad_norm": 0.0, + "learning_rate": 1.9983834834297102e-05, + "loss": 1.5039, + "step": 1620 + }, + { + "epoch": 0.04759527864231605, + "grad_norm": 0.0, + "learning_rate": 1.9983780739625285e-05, + "loss": 1.5439, + "step": 1621 + }, + { + "epoch": 0.04762464031945505, + "grad_norm": 0.0, + "learning_rate": 1.9983726554667583e-05, + "loss": 1.5557, + "step": 1622 + }, + { + "epoch": 0.047654001996594045, + "grad_norm": 0.0, + "learning_rate": 1.9983672279424483e-05, + "loss": 1.5205, + "step": 1623 + }, + { + "epoch": 0.04768336367373304, + "grad_norm": 0.0, + "learning_rate": 1.9983617913896477e-05, + "loss": 1.4736, + "step": 1624 + }, + { + "epoch": 0.047712725350872044, + "grad_norm": 0.0, + "learning_rate": 1.998356345808406e-05, + "loss": 1.5146, + "step": 1625 + }, + { + "epoch": 0.04774208702801104, + "grad_norm": 0.0, + "learning_rate": 1.998350891198772e-05, + "loss": 1.5615, + "step": 1626 + }, + { + "epoch": 0.04777144870515004, + "grad_norm": 0.0, + "learning_rate": 1.9983454275607953e-05, + "loss": 1.4883, + "step": 1627 + }, + { + "epoch": 0.04780081038228903, + "grad_norm": 0.0, + "learning_rate": 1.998339954894525e-05, + "loss": 1.4541, + "step": 1628 + }, + { + "epoch": 0.047830172059428036, + "grad_norm": 0.0, + "learning_rate": 1.9983344732000112e-05, + "loss": 1.4932, + "step": 1629 + }, + { + "epoch": 0.04785953373656703, + "grad_norm": 0.0, + "learning_rate": 1.9983289824773032e-05, + "loss": 1.5098, + "step": 1630 + }, + { + "epoch": 0.04788889541370603, + "grad_norm": 0.0, + "learning_rate": 1.99832348272645e-05, + "loss": 1.541, + "step": 1631 + }, + { + "epoch": 0.04791825709084503, + "grad_norm": 0.0, + "learning_rate": 1.998317973947502e-05, + "loss": 1.5166, + "step": 1632 + }, + { + "epoch": 0.04794761876798403, + "grad_norm": 0.0, + "learning_rate": 1.998312456140509e-05, + "loss": 1.5547, + "step": 1633 + }, + { + "epoch": 0.047976980445123024, + "grad_norm": 0.0, + "learning_rate": 1.998306929305521e-05, + "loss": 1.6416, + "step": 1634 + }, + { + "epoch": 0.04800634212226202, + "grad_norm": 0.0, + "learning_rate": 1.9983013934425874e-05, + "loss": 1.5986, + "step": 1635 + }, + { + "epoch": 0.048035703799401024, + "grad_norm": 0.0, + "learning_rate": 1.9982958485517588e-05, + "loss": 1.5811, + "step": 1636 + }, + { + "epoch": 0.04806506547654002, + "grad_norm": 0.0, + "learning_rate": 1.998290294633085e-05, + "loss": 1.5156, + "step": 1637 + }, + { + "epoch": 0.048094427153679016, + "grad_norm": 0.0, + "learning_rate": 1.998284731686617e-05, + "loss": 1.4307, + "step": 1638 + }, + { + "epoch": 0.04812378883081802, + "grad_norm": 0.0, + "learning_rate": 1.9982791597124038e-05, + "loss": 1.5771, + "step": 1639 + }, + { + "epoch": 0.048153150507957015, + "grad_norm": 0.0, + "learning_rate": 1.9982735787104966e-05, + "loss": 1.4746, + "step": 1640 + }, + { + "epoch": 0.04818251218509601, + "grad_norm": 0.0, + "learning_rate": 1.998267988680946e-05, + "loss": 1.5166, + "step": 1641 + }, + { + "epoch": 0.04821187386223501, + "grad_norm": 0.0, + "learning_rate": 1.9982623896238023e-05, + "loss": 1.5479, + "step": 1642 + }, + { + "epoch": 0.04824123553937401, + "grad_norm": 0.0, + "learning_rate": 1.998256781539116e-05, + "loss": 1.4932, + "step": 1643 + }, + { + "epoch": 0.04827059721651301, + "grad_norm": 0.0, + "learning_rate": 1.998251164426938e-05, + "loss": 1.4561, + "step": 1644 + }, + { + "epoch": 0.048299958893652004, + "grad_norm": 0.0, + "learning_rate": 1.998245538287319e-05, + "loss": 1.5059, + "step": 1645 + }, + { + "epoch": 0.04832932057079101, + "grad_norm": 0.0, + "learning_rate": 1.9982399031203102e-05, + "loss": 1.5127, + "step": 1646 + }, + { + "epoch": 0.04835868224793, + "grad_norm": 0.0, + "learning_rate": 1.9982342589259624e-05, + "loss": 1.5195, + "step": 1647 + }, + { + "epoch": 0.048388043925069, + "grad_norm": 0.0, + "learning_rate": 1.998228605704326e-05, + "loss": 1.5049, + "step": 1648 + }, + { + "epoch": 0.048417405602207995, + "grad_norm": 0.0, + "learning_rate": 1.9982229434554532e-05, + "loss": 1.5898, + "step": 1649 + }, + { + "epoch": 0.048446767279347, + "grad_norm": 0.0, + "learning_rate": 1.9982172721793945e-05, + "loss": 1.5166, + "step": 1650 + }, + { + "epoch": 0.048476128956485995, + "grad_norm": 0.0, + "learning_rate": 1.9982115918762012e-05, + "loss": 1.4688, + "step": 1651 + }, + { + "epoch": 0.04850549063362499, + "grad_norm": 0.0, + "learning_rate": 1.998205902545925e-05, + "loss": 1.626, + "step": 1652 + }, + { + "epoch": 0.048534852310763994, + "grad_norm": 0.0, + "learning_rate": 1.998200204188617e-05, + "loss": 1.5771, + "step": 1653 + }, + { + "epoch": 0.04856421398790299, + "grad_norm": 0.0, + "learning_rate": 1.9981944968043292e-05, + "loss": 1.6416, + "step": 1654 + }, + { + "epoch": 0.04859357566504199, + "grad_norm": 0.0, + "learning_rate": 1.9981887803931127e-05, + "loss": 1.582, + "step": 1655 + }, + { + "epoch": 0.04862293734218098, + "grad_norm": 0.0, + "learning_rate": 1.9981830549550193e-05, + "loss": 1.6006, + "step": 1656 + }, + { + "epoch": 0.048652299019319986, + "grad_norm": 0.0, + "learning_rate": 1.9981773204901012e-05, + "loss": 1.623, + "step": 1657 + }, + { + "epoch": 0.04868166069645898, + "grad_norm": 0.0, + "learning_rate": 1.99817157699841e-05, + "loss": 1.5527, + "step": 1658 + }, + { + "epoch": 0.04871102237359798, + "grad_norm": 0.0, + "learning_rate": 1.9981658244799976e-05, + "loss": 1.623, + "step": 1659 + }, + { + "epoch": 0.048740384050736975, + "grad_norm": 0.0, + "learning_rate": 1.9981600629349154e-05, + "loss": 1.4941, + "step": 1660 + }, + { + "epoch": 0.04876974572787598, + "grad_norm": 0.0, + "learning_rate": 1.998154292363217e-05, + "loss": 1.4775, + "step": 1661 + }, + { + "epoch": 0.048799107405014974, + "grad_norm": 0.0, + "learning_rate": 1.998148512764953e-05, + "loss": 1.5811, + "step": 1662 + }, + { + "epoch": 0.04882846908215397, + "grad_norm": 0.0, + "learning_rate": 1.9981427241401766e-05, + "loss": 1.6523, + "step": 1663 + }, + { + "epoch": 0.048857830759292974, + "grad_norm": 0.0, + "learning_rate": 1.99813692648894e-05, + "loss": 1.4775, + "step": 1664 + }, + { + "epoch": 0.04888719243643197, + "grad_norm": 0.0, + "learning_rate": 1.9981311198112958e-05, + "loss": 1.4043, + "step": 1665 + }, + { + "epoch": 0.048916554113570966, + "grad_norm": 0.0, + "learning_rate": 1.9981253041072956e-05, + "loss": 1.5293, + "step": 1666 + }, + { + "epoch": 0.04894591579070996, + "grad_norm": 0.0, + "learning_rate": 1.9981194793769928e-05, + "loss": 1.543, + "step": 1667 + }, + { + "epoch": 0.048975277467848966, + "grad_norm": 0.0, + "learning_rate": 1.9981136456204403e-05, + "loss": 1.4473, + "step": 1668 + }, + { + "epoch": 0.04900463914498796, + "grad_norm": 0.0, + "learning_rate": 1.9981078028376902e-05, + "loss": 1.6514, + "step": 1669 + }, + { + "epoch": 0.04903400082212696, + "grad_norm": 0.0, + "learning_rate": 1.998101951028796e-05, + "loss": 1.5723, + "step": 1670 + }, + { + "epoch": 0.04906336249926596, + "grad_norm": 0.0, + "learning_rate": 1.99809609019381e-05, + "loss": 1.5137, + "step": 1671 + }, + { + "epoch": 0.04909272417640496, + "grad_norm": 0.0, + "learning_rate": 1.9980902203327856e-05, + "loss": 1.6211, + "step": 1672 + }, + { + "epoch": 0.049122085853543954, + "grad_norm": 0.0, + "learning_rate": 1.9980843414457752e-05, + "loss": 1.5342, + "step": 1673 + }, + { + "epoch": 0.04915144753068295, + "grad_norm": 0.0, + "learning_rate": 1.998078453532833e-05, + "loss": 1.5244, + "step": 1674 + }, + { + "epoch": 0.04918080920782195, + "grad_norm": 0.0, + "learning_rate": 1.9980725565940112e-05, + "loss": 1.6113, + "step": 1675 + }, + { + "epoch": 0.04921017088496095, + "grad_norm": 0.0, + "learning_rate": 1.9980666506293643e-05, + "loss": 1.6064, + "step": 1676 + }, + { + "epoch": 0.049239532562099946, + "grad_norm": 0.0, + "learning_rate": 1.9980607356389447e-05, + "loss": 1.4277, + "step": 1677 + }, + { + "epoch": 0.04926889423923895, + "grad_norm": 0.0, + "learning_rate": 1.9980548116228063e-05, + "loss": 1.5781, + "step": 1678 + }, + { + "epoch": 0.049298255916377945, + "grad_norm": 0.0, + "learning_rate": 1.9980488785810026e-05, + "loss": 1.6611, + "step": 1679 + }, + { + "epoch": 0.04932761759351694, + "grad_norm": 0.0, + "learning_rate": 1.9980429365135875e-05, + "loss": 1.6133, + "step": 1680 + }, + { + "epoch": 0.04935697927065594, + "grad_norm": 0.0, + "learning_rate": 1.9980369854206142e-05, + "loss": 1.4951, + "step": 1681 + }, + { + "epoch": 0.04938634094779494, + "grad_norm": 0.0, + "learning_rate": 1.9980310253021372e-05, + "loss": 1.4805, + "step": 1682 + }, + { + "epoch": 0.04941570262493394, + "grad_norm": 0.0, + "learning_rate": 1.99802505615821e-05, + "loss": 1.542, + "step": 1683 + }, + { + "epoch": 0.04944506430207293, + "grad_norm": 0.0, + "learning_rate": 1.9980190779888864e-05, + "loss": 1.4902, + "step": 1684 + }, + { + "epoch": 0.04947442597921193, + "grad_norm": 0.0, + "learning_rate": 1.9980130907942206e-05, + "loss": 1.4141, + "step": 1685 + }, + { + "epoch": 0.04950378765635093, + "grad_norm": 0.0, + "learning_rate": 1.998007094574267e-05, + "loss": 1.4004, + "step": 1686 + }, + { + "epoch": 0.04953314933348993, + "grad_norm": 0.0, + "learning_rate": 1.9980010893290798e-05, + "loss": 1.5947, + "step": 1687 + }, + { + "epoch": 0.049562511010628925, + "grad_norm": 0.0, + "learning_rate": 1.9979950750587128e-05, + "loss": 1.5586, + "step": 1688 + }, + { + "epoch": 0.04959187268776793, + "grad_norm": 0.0, + "learning_rate": 1.997989051763221e-05, + "loss": 1.5557, + "step": 1689 + }, + { + "epoch": 0.049621234364906924, + "grad_norm": 0.0, + "learning_rate": 1.9979830194426588e-05, + "loss": 1.4854, + "step": 1690 + }, + { + "epoch": 0.04965059604204592, + "grad_norm": 0.0, + "learning_rate": 1.99797697809708e-05, + "loss": 1.4268, + "step": 1691 + }, + { + "epoch": 0.04967995771918492, + "grad_norm": 0.0, + "learning_rate": 1.9979709277265403e-05, + "loss": 1.3838, + "step": 1692 + }, + { + "epoch": 0.04970931939632392, + "grad_norm": 0.0, + "learning_rate": 1.9979648683310937e-05, + "loss": 1.5771, + "step": 1693 + }, + { + "epoch": 0.049738681073462916, + "grad_norm": 0.0, + "learning_rate": 1.9979587999107954e-05, + "loss": 1.4814, + "step": 1694 + }, + { + "epoch": 0.04976804275060191, + "grad_norm": 0.0, + "learning_rate": 1.9979527224656998e-05, + "loss": 1.3516, + "step": 1695 + }, + { + "epoch": 0.049797404427740916, + "grad_norm": 0.0, + "learning_rate": 1.9979466359958622e-05, + "loss": 1.5273, + "step": 1696 + }, + { + "epoch": 0.04982676610487991, + "grad_norm": 0.0, + "learning_rate": 1.997940540501338e-05, + "loss": 1.4072, + "step": 1697 + }, + { + "epoch": 0.04985612778201891, + "grad_norm": 0.0, + "learning_rate": 1.9979344359821816e-05, + "loss": 1.5322, + "step": 1698 + }, + { + "epoch": 0.049885489459157904, + "grad_norm": 0.0, + "learning_rate": 1.9979283224384483e-05, + "loss": 1.4111, + "step": 1699 + }, + { + "epoch": 0.04991485113629691, + "grad_norm": 0.0, + "learning_rate": 1.997922199870194e-05, + "loss": 1.4346, + "step": 1700 + }, + { + "epoch": 0.049944212813435904, + "grad_norm": 0.0, + "learning_rate": 1.9979160682774733e-05, + "loss": 1.5801, + "step": 1701 + }, + { + "epoch": 0.0499735744905749, + "grad_norm": 0.0, + "learning_rate": 1.9979099276603424e-05, + "loss": 1.5332, + "step": 1702 + }, + { + "epoch": 0.0500029361677139, + "grad_norm": 0.0, + "learning_rate": 1.997903778018856e-05, + "loss": 1.3008, + "step": 1703 + }, + { + "epoch": 0.0500322978448529, + "grad_norm": 0.0, + "learning_rate": 1.9978976193530705e-05, + "loss": 1.5146, + "step": 1704 + }, + { + "epoch": 0.050061659521991896, + "grad_norm": 0.0, + "learning_rate": 1.9978914516630414e-05, + "loss": 1.3926, + "step": 1705 + }, + { + "epoch": 0.05009102119913089, + "grad_norm": 0.0, + "learning_rate": 1.997885274948824e-05, + "loss": 1.5137, + "step": 1706 + }, + { + "epoch": 0.050120382876269895, + "grad_norm": 0.0, + "learning_rate": 1.9978790892104748e-05, + "loss": 1.5039, + "step": 1707 + }, + { + "epoch": 0.05014974455340889, + "grad_norm": 0.0, + "learning_rate": 1.9978728944480492e-05, + "loss": 1.4658, + "step": 1708 + }, + { + "epoch": 0.05017910623054789, + "grad_norm": 0.0, + "learning_rate": 1.9978666906616036e-05, + "loss": 1.6123, + "step": 1709 + }, + { + "epoch": 0.050208467907686884, + "grad_norm": 0.0, + "learning_rate": 1.997860477851194e-05, + "loss": 1.4463, + "step": 1710 + }, + { + "epoch": 0.05023782958482589, + "grad_norm": 0.0, + "learning_rate": 1.9978542560168763e-05, + "loss": 1.6631, + "step": 1711 + }, + { + "epoch": 0.05026719126196488, + "grad_norm": 0.0, + "learning_rate": 1.997848025158707e-05, + "loss": 1.5176, + "step": 1712 + }, + { + "epoch": 0.05029655293910388, + "grad_norm": 0.0, + "learning_rate": 1.9978417852767426e-05, + "loss": 1.5645, + "step": 1713 + }, + { + "epoch": 0.05032591461624288, + "grad_norm": 0.0, + "learning_rate": 1.9978355363710396e-05, + "loss": 1.5791, + "step": 1714 + }, + { + "epoch": 0.05035527629338188, + "grad_norm": 0.0, + "learning_rate": 1.997829278441654e-05, + "loss": 1.4443, + "step": 1715 + }, + { + "epoch": 0.050384637970520875, + "grad_norm": 0.0, + "learning_rate": 1.997823011488643e-05, + "loss": 1.5176, + "step": 1716 + }, + { + "epoch": 0.05041399964765987, + "grad_norm": 0.0, + "learning_rate": 1.9978167355120628e-05, + "loss": 1.6533, + "step": 1717 + }, + { + "epoch": 0.050443361324798874, + "grad_norm": 0.0, + "learning_rate": 1.99781045051197e-05, + "loss": 1.4678, + "step": 1718 + }, + { + "epoch": 0.05047272300193787, + "grad_norm": 0.0, + "learning_rate": 1.9978041564884222e-05, + "loss": 1.5186, + "step": 1719 + }, + { + "epoch": 0.05050208467907687, + "grad_norm": 0.0, + "learning_rate": 1.9977978534414758e-05, + "loss": 1.4209, + "step": 1720 + }, + { + "epoch": 0.05053144635621587, + "grad_norm": 0.0, + "learning_rate": 1.9977915413711876e-05, + "loss": 1.4609, + "step": 1721 + }, + { + "epoch": 0.050560808033354866, + "grad_norm": 0.0, + "learning_rate": 1.997785220277615e-05, + "loss": 1.6895, + "step": 1722 + }, + { + "epoch": 0.05059016971049386, + "grad_norm": 0.0, + "learning_rate": 1.9977788901608152e-05, + "loss": 1.627, + "step": 1723 + }, + { + "epoch": 0.05061953138763286, + "grad_norm": 0.0, + "learning_rate": 1.9977725510208455e-05, + "loss": 1.5088, + "step": 1724 + }, + { + "epoch": 0.05064889306477186, + "grad_norm": 0.0, + "learning_rate": 1.9977662028577627e-05, + "loss": 1.4766, + "step": 1725 + }, + { + "epoch": 0.05067825474191086, + "grad_norm": 0.0, + "learning_rate": 1.9977598456716253e-05, + "loss": 1.5439, + "step": 1726 + }, + { + "epoch": 0.050707616419049854, + "grad_norm": 0.0, + "learning_rate": 1.9977534794624894e-05, + "loss": 1.541, + "step": 1727 + }, + { + "epoch": 0.05073697809618886, + "grad_norm": 0.0, + "learning_rate": 1.9977471042304136e-05, + "loss": 1.4795, + "step": 1728 + }, + { + "epoch": 0.050766339773327854, + "grad_norm": 0.0, + "learning_rate": 1.9977407199754552e-05, + "loss": 1.5664, + "step": 1729 + }, + { + "epoch": 0.05079570145046685, + "grad_norm": 0.0, + "learning_rate": 1.9977343266976718e-05, + "loss": 1.5068, + "step": 1730 + }, + { + "epoch": 0.050825063127605846, + "grad_norm": 0.0, + "learning_rate": 1.9977279243971213e-05, + "loss": 1.3887, + "step": 1731 + }, + { + "epoch": 0.05085442480474485, + "grad_norm": 0.0, + "learning_rate": 1.9977215130738617e-05, + "loss": 1.5303, + "step": 1732 + }, + { + "epoch": 0.050883786481883846, + "grad_norm": 0.0, + "learning_rate": 1.9977150927279513e-05, + "loss": 1.5518, + "step": 1733 + }, + { + "epoch": 0.05091314815902284, + "grad_norm": 0.0, + "learning_rate": 1.9977086633594472e-05, + "loss": 1.5693, + "step": 1734 + }, + { + "epoch": 0.050942509836161845, + "grad_norm": 0.0, + "learning_rate": 1.9977022249684085e-05, + "loss": 1.5098, + "step": 1735 + }, + { + "epoch": 0.05097187151330084, + "grad_norm": 0.0, + "learning_rate": 1.997695777554893e-05, + "loss": 1.4551, + "step": 1736 + }, + { + "epoch": 0.05100123319043984, + "grad_norm": 0.0, + "learning_rate": 1.9976893211189592e-05, + "loss": 1.5684, + "step": 1737 + }, + { + "epoch": 0.051030594867578834, + "grad_norm": 0.0, + "learning_rate": 1.9976828556606652e-05, + "loss": 1.5693, + "step": 1738 + }, + { + "epoch": 0.05105995654471784, + "grad_norm": 0.0, + "learning_rate": 1.9976763811800697e-05, + "loss": 1.5557, + "step": 1739 + }, + { + "epoch": 0.05108931822185683, + "grad_norm": 0.0, + "learning_rate": 1.9976698976772312e-05, + "loss": 1.6221, + "step": 1740 + }, + { + "epoch": 0.05111867989899583, + "grad_norm": 0.0, + "learning_rate": 1.997663405152208e-05, + "loss": 1.4551, + "step": 1741 + }, + { + "epoch": 0.051148041576134826, + "grad_norm": 0.0, + "learning_rate": 1.9976569036050596e-05, + "loss": 1.5752, + "step": 1742 + }, + { + "epoch": 0.05117740325327383, + "grad_norm": 0.0, + "learning_rate": 1.997650393035844e-05, + "loss": 1.4502, + "step": 1743 + }, + { + "epoch": 0.051206764930412825, + "grad_norm": 0.0, + "learning_rate": 1.9976438734446207e-05, + "loss": 1.5107, + "step": 1744 + }, + { + "epoch": 0.05123612660755182, + "grad_norm": 0.0, + "learning_rate": 1.997637344831448e-05, + "loss": 1.6094, + "step": 1745 + }, + { + "epoch": 0.051265488284690824, + "grad_norm": 0.0, + "learning_rate": 1.9976308071963854e-05, + "loss": 1.5918, + "step": 1746 + }, + { + "epoch": 0.05129484996182982, + "grad_norm": 0.0, + "learning_rate": 1.997624260539492e-05, + "loss": 1.499, + "step": 1747 + }, + { + "epoch": 0.05132421163896882, + "grad_norm": 0.0, + "learning_rate": 1.9976177048608267e-05, + "loss": 1.5557, + "step": 1748 + }, + { + "epoch": 0.05135357331610781, + "grad_norm": 0.0, + "learning_rate": 1.9976111401604494e-05, + "loss": 1.4629, + "step": 1749 + }, + { + "epoch": 0.051382934993246816, + "grad_norm": 0.0, + "learning_rate": 1.997604566438419e-05, + "loss": 1.4883, + "step": 1750 + }, + { + "epoch": 0.05141229667038581, + "grad_norm": 0.0, + "learning_rate": 1.9975979836947945e-05, + "loss": 1.4805, + "step": 1751 + }, + { + "epoch": 0.05144165834752481, + "grad_norm": 0.0, + "learning_rate": 1.9975913919296362e-05, + "loss": 1.6152, + "step": 1752 + }, + { + "epoch": 0.05147102002466381, + "grad_norm": 0.0, + "learning_rate": 1.9975847911430038e-05, + "loss": 1.458, + "step": 1753 + }, + { + "epoch": 0.05150038170180281, + "grad_norm": 0.0, + "learning_rate": 1.9975781813349565e-05, + "loss": 1.502, + "step": 1754 + }, + { + "epoch": 0.051529743378941804, + "grad_norm": 0.0, + "learning_rate": 1.997571562505554e-05, + "loss": 1.46, + "step": 1755 + }, + { + "epoch": 0.0515591050560808, + "grad_norm": 0.0, + "learning_rate": 1.9975649346548565e-05, + "loss": 1.4883, + "step": 1756 + }, + { + "epoch": 0.051588466733219804, + "grad_norm": 0.0, + "learning_rate": 1.9975582977829237e-05, + "loss": 1.5273, + "step": 1757 + }, + { + "epoch": 0.0516178284103588, + "grad_norm": 0.0, + "learning_rate": 1.997551651889816e-05, + "loss": 1.4824, + "step": 1758 + }, + { + "epoch": 0.051647190087497796, + "grad_norm": 0.0, + "learning_rate": 1.9975449969755928e-05, + "loss": 1.5576, + "step": 1759 + }, + { + "epoch": 0.0516765517646368, + "grad_norm": 0.0, + "learning_rate": 1.997538333040315e-05, + "loss": 1.4922, + "step": 1760 + }, + { + "epoch": 0.051705913441775796, + "grad_norm": 0.0, + "learning_rate": 1.9975316600840428e-05, + "loss": 1.5156, + "step": 1761 + }, + { + "epoch": 0.05173527511891479, + "grad_norm": 0.0, + "learning_rate": 1.997524978106836e-05, + "loss": 1.5273, + "step": 1762 + }, + { + "epoch": 0.05176463679605379, + "grad_norm": 0.0, + "learning_rate": 1.9975182871087554e-05, + "loss": 1.5654, + "step": 1763 + }, + { + "epoch": 0.05179399847319279, + "grad_norm": 0.0, + "learning_rate": 1.9975115870898614e-05, + "loss": 1.6836, + "step": 1764 + }, + { + "epoch": 0.05182336015033179, + "grad_norm": 0.0, + "learning_rate": 1.997504878050215e-05, + "loss": 1.6143, + "step": 1765 + }, + { + "epoch": 0.051852721827470784, + "grad_norm": 0.0, + "learning_rate": 1.9974981599898765e-05, + "loss": 1.5977, + "step": 1766 + }, + { + "epoch": 0.05188208350460978, + "grad_norm": 0.0, + "learning_rate": 1.9974914329089064e-05, + "loss": 1.4805, + "step": 1767 + }, + { + "epoch": 0.05191144518174878, + "grad_norm": 0.0, + "learning_rate": 1.9974846968073657e-05, + "loss": 1.4424, + "step": 1768 + }, + { + "epoch": 0.05194080685888778, + "grad_norm": 0.0, + "learning_rate": 1.9974779516853158e-05, + "loss": 1.4902, + "step": 1769 + }, + { + "epoch": 0.051970168536026776, + "grad_norm": 0.0, + "learning_rate": 1.997471197542817e-05, + "loss": 1.543, + "step": 1770 + }, + { + "epoch": 0.05199953021316578, + "grad_norm": 0.0, + "learning_rate": 1.9974644343799307e-05, + "loss": 1.5146, + "step": 1771 + }, + { + "epoch": 0.052028891890304775, + "grad_norm": 0.0, + "learning_rate": 1.9974576621967184e-05, + "loss": 1.4893, + "step": 1772 + }, + { + "epoch": 0.05205825356744377, + "grad_norm": 0.0, + "learning_rate": 1.9974508809932408e-05, + "loss": 1.4775, + "step": 1773 + }, + { + "epoch": 0.05208761524458277, + "grad_norm": 0.0, + "learning_rate": 1.9974440907695593e-05, + "loss": 1.4922, + "step": 1774 + }, + { + "epoch": 0.05211697692172177, + "grad_norm": 0.0, + "learning_rate": 1.9974372915257358e-05, + "loss": 1.5166, + "step": 1775 + }, + { + "epoch": 0.05214633859886077, + "grad_norm": 0.0, + "learning_rate": 1.997430483261831e-05, + "loss": 1.6416, + "step": 1776 + }, + { + "epoch": 0.05217570027599976, + "grad_norm": 0.0, + "learning_rate": 1.9974236659779073e-05, + "loss": 1.5303, + "step": 1777 + }, + { + "epoch": 0.052205061953138766, + "grad_norm": 0.0, + "learning_rate": 1.9974168396740256e-05, + "loss": 1.4404, + "step": 1778 + }, + { + "epoch": 0.05223442363027776, + "grad_norm": 0.0, + "learning_rate": 1.9974100043502483e-05, + "loss": 1.5215, + "step": 1779 + }, + { + "epoch": 0.05226378530741676, + "grad_norm": 0.0, + "learning_rate": 1.9974031600066367e-05, + "loss": 1.5693, + "step": 1780 + }, + { + "epoch": 0.052293146984555755, + "grad_norm": 0.0, + "learning_rate": 1.9973963066432527e-05, + "loss": 1.5518, + "step": 1781 + }, + { + "epoch": 0.05232250866169476, + "grad_norm": 0.0, + "learning_rate": 1.997389444260159e-05, + "loss": 1.5107, + "step": 1782 + }, + { + "epoch": 0.052351870338833754, + "grad_norm": 0.0, + "learning_rate": 1.9973825728574167e-05, + "loss": 1.5088, + "step": 1783 + }, + { + "epoch": 0.05238123201597275, + "grad_norm": 0.0, + "learning_rate": 1.9973756924350885e-05, + "loss": 1.5088, + "step": 1784 + }, + { + "epoch": 0.052410593693111754, + "grad_norm": 0.0, + "learning_rate": 1.9973688029932364e-05, + "loss": 1.3994, + "step": 1785 + }, + { + "epoch": 0.05243995537025075, + "grad_norm": 0.0, + "learning_rate": 1.9973619045319227e-05, + "loss": 1.6387, + "step": 1786 + }, + { + "epoch": 0.052469317047389746, + "grad_norm": 0.0, + "learning_rate": 1.9973549970512098e-05, + "loss": 1.4727, + "step": 1787 + }, + { + "epoch": 0.05249867872452874, + "grad_norm": 0.0, + "learning_rate": 1.9973480805511605e-05, + "loss": 1.5166, + "step": 1788 + }, + { + "epoch": 0.052528040401667746, + "grad_norm": 0.0, + "learning_rate": 1.9973411550318367e-05, + "loss": 1.5742, + "step": 1789 + }, + { + "epoch": 0.05255740207880674, + "grad_norm": 0.0, + "learning_rate": 1.997334220493302e-05, + "loss": 1.5547, + "step": 1790 + }, + { + "epoch": 0.05258676375594574, + "grad_norm": 0.0, + "learning_rate": 1.997327276935618e-05, + "loss": 1.5918, + "step": 1791 + }, + { + "epoch": 0.052616125433084734, + "grad_norm": 0.0, + "learning_rate": 1.9973203243588487e-05, + "loss": 1.4551, + "step": 1792 + }, + { + "epoch": 0.05264548711022374, + "grad_norm": 0.0, + "learning_rate": 1.9973133627630557e-05, + "loss": 1.6562, + "step": 1793 + }, + { + "epoch": 0.052674848787362734, + "grad_norm": 0.0, + "learning_rate": 1.9973063921483026e-05, + "loss": 1.5371, + "step": 1794 + }, + { + "epoch": 0.05270421046450173, + "grad_norm": 0.0, + "learning_rate": 1.9972994125146526e-05, + "loss": 1.4824, + "step": 1795 + }, + { + "epoch": 0.05273357214164073, + "grad_norm": 0.0, + "learning_rate": 1.9972924238621686e-05, + "loss": 1.4004, + "step": 1796 + }, + { + "epoch": 0.05276293381877973, + "grad_norm": 0.0, + "learning_rate": 1.9972854261909134e-05, + "loss": 1.6396, + "step": 1797 + }, + { + "epoch": 0.052792295495918726, + "grad_norm": 0.0, + "learning_rate": 1.9972784195009508e-05, + "loss": 1.5205, + "step": 1798 + }, + { + "epoch": 0.05282165717305772, + "grad_norm": 0.0, + "learning_rate": 1.997271403792344e-05, + "loss": 1.46, + "step": 1799 + }, + { + "epoch": 0.052851018850196725, + "grad_norm": 0.0, + "learning_rate": 1.997264379065157e-05, + "loss": 1.5439, + "step": 1800 + }, + { + "epoch": 0.05288038052733572, + "grad_norm": 0.0, + "learning_rate": 1.9972573453194525e-05, + "loss": 1.4893, + "step": 1801 + }, + { + "epoch": 0.05290974220447472, + "grad_norm": 0.0, + "learning_rate": 1.9972503025552945e-05, + "loss": 1.5156, + "step": 1802 + }, + { + "epoch": 0.05293910388161372, + "grad_norm": 0.0, + "learning_rate": 1.9972432507727462e-05, + "loss": 1.627, + "step": 1803 + }, + { + "epoch": 0.05296846555875272, + "grad_norm": 0.0, + "learning_rate": 1.9972361899718724e-05, + "loss": 1.4893, + "step": 1804 + }, + { + "epoch": 0.05299782723589171, + "grad_norm": 0.0, + "learning_rate": 1.997229120152736e-05, + "loss": 1.5537, + "step": 1805 + }, + { + "epoch": 0.05302718891303071, + "grad_norm": 0.0, + "learning_rate": 1.9972220413154015e-05, + "loss": 1.5, + "step": 1806 + }, + { + "epoch": 0.05305655059016971, + "grad_norm": 0.0, + "learning_rate": 1.9972149534599324e-05, + "loss": 1.4756, + "step": 1807 + }, + { + "epoch": 0.05308591226730871, + "grad_norm": 0.0, + "learning_rate": 1.9972078565863933e-05, + "loss": 1.4902, + "step": 1808 + }, + { + "epoch": 0.053115273944447705, + "grad_norm": 0.0, + "learning_rate": 1.997200750694848e-05, + "loss": 1.373, + "step": 1809 + }, + { + "epoch": 0.05314463562158671, + "grad_norm": 0.0, + "learning_rate": 1.9971936357853608e-05, + "loss": 1.4893, + "step": 1810 + }, + { + "epoch": 0.053173997298725705, + "grad_norm": 0.0, + "learning_rate": 1.9971865118579966e-05, + "loss": 1.626, + "step": 1811 + }, + { + "epoch": 0.0532033589758647, + "grad_norm": 0.0, + "learning_rate": 1.997179378912819e-05, + "loss": 1.4893, + "step": 1812 + }, + { + "epoch": 0.0532327206530037, + "grad_norm": 0.0, + "learning_rate": 1.997172236949893e-05, + "loss": 1.4492, + "step": 1813 + }, + { + "epoch": 0.0532620823301427, + "grad_norm": 0.0, + "learning_rate": 1.9971650859692834e-05, + "loss": 1.5732, + "step": 1814 + }, + { + "epoch": 0.053291444007281696, + "grad_norm": 0.0, + "learning_rate": 1.9971579259710544e-05, + "loss": 1.6143, + "step": 1815 + }, + { + "epoch": 0.05332080568442069, + "grad_norm": 0.0, + "learning_rate": 1.9971507569552705e-05, + "loss": 1.5547, + "step": 1816 + }, + { + "epoch": 0.05335016736155969, + "grad_norm": 0.0, + "learning_rate": 1.997143578921997e-05, + "loss": 1.626, + "step": 1817 + }, + { + "epoch": 0.05337952903869869, + "grad_norm": 0.0, + "learning_rate": 1.997136391871299e-05, + "loss": 1.5225, + "step": 1818 + }, + { + "epoch": 0.05340889071583769, + "grad_norm": 0.0, + "learning_rate": 1.9971291958032415e-05, + "loss": 1.499, + "step": 1819 + }, + { + "epoch": 0.053438252392976685, + "grad_norm": 0.0, + "learning_rate": 1.9971219907178888e-05, + "loss": 1.6582, + "step": 1820 + }, + { + "epoch": 0.05346761407011569, + "grad_norm": 0.0, + "learning_rate": 1.997114776615307e-05, + "loss": 1.5615, + "step": 1821 + }, + { + "epoch": 0.053496975747254684, + "grad_norm": 0.0, + "learning_rate": 1.9971075534955605e-05, + "loss": 1.4463, + "step": 1822 + }, + { + "epoch": 0.05352633742439368, + "grad_norm": 0.0, + "learning_rate": 1.997100321358715e-05, + "loss": 1.4238, + "step": 1823 + }, + { + "epoch": 0.053555699101532676, + "grad_norm": 0.0, + "learning_rate": 1.9970930802048363e-05, + "loss": 1.5273, + "step": 1824 + }, + { + "epoch": 0.05358506077867168, + "grad_norm": 0.0, + "learning_rate": 1.9970858300339896e-05, + "loss": 1.3472, + "step": 1825 + }, + { + "epoch": 0.053614422455810676, + "grad_norm": 0.0, + "learning_rate": 1.99707857084624e-05, + "loss": 1.375, + "step": 1826 + }, + { + "epoch": 0.05364378413294967, + "grad_norm": 0.0, + "learning_rate": 1.9970713026416536e-05, + "loss": 1.4912, + "step": 1827 + }, + { + "epoch": 0.053673145810088675, + "grad_norm": 0.0, + "learning_rate": 1.997064025420296e-05, + "loss": 1.6289, + "step": 1828 + }, + { + "epoch": 0.05370250748722767, + "grad_norm": 0.0, + "learning_rate": 1.9970567391822335e-05, + "loss": 1.5547, + "step": 1829 + }, + { + "epoch": 0.05373186916436667, + "grad_norm": 0.0, + "learning_rate": 1.997049443927531e-05, + "loss": 1.5098, + "step": 1830 + }, + { + "epoch": 0.053761230841505664, + "grad_norm": 0.0, + "learning_rate": 1.9970421396562552e-05, + "loss": 1.5332, + "step": 1831 + }, + { + "epoch": 0.05379059251864467, + "grad_norm": 0.0, + "learning_rate": 1.9970348263684723e-05, + "loss": 1.5088, + "step": 1832 + }, + { + "epoch": 0.05381995419578366, + "grad_norm": 0.0, + "learning_rate": 1.997027504064248e-05, + "loss": 1.3818, + "step": 1833 + }, + { + "epoch": 0.05384931587292266, + "grad_norm": 0.0, + "learning_rate": 1.9970201727436486e-05, + "loss": 1.6113, + "step": 1834 + }, + { + "epoch": 0.05387867755006166, + "grad_norm": 0.0, + "learning_rate": 1.9970128324067403e-05, + "loss": 1.5947, + "step": 1835 + }, + { + "epoch": 0.05390803922720066, + "grad_norm": 0.0, + "learning_rate": 1.9970054830535894e-05, + "loss": 1.4551, + "step": 1836 + }, + { + "epoch": 0.053937400904339655, + "grad_norm": 0.0, + "learning_rate": 1.996998124684263e-05, + "loss": 1.5742, + "step": 1837 + }, + { + "epoch": 0.05396676258147865, + "grad_norm": 0.0, + "learning_rate": 1.996990757298827e-05, + "loss": 1.6699, + "step": 1838 + }, + { + "epoch": 0.053996124258617655, + "grad_norm": 0.0, + "learning_rate": 1.9969833808973484e-05, + "loss": 1.5547, + "step": 1839 + }, + { + "epoch": 0.05402548593575665, + "grad_norm": 0.0, + "learning_rate": 1.9969759954798938e-05, + "loss": 1.4971, + "step": 1840 + }, + { + "epoch": 0.05405484761289565, + "grad_norm": 0.0, + "learning_rate": 1.9969686010465297e-05, + "loss": 1.4277, + "step": 1841 + }, + { + "epoch": 0.05408420929003465, + "grad_norm": 0.0, + "learning_rate": 1.996961197597323e-05, + "loss": 1.4668, + "step": 1842 + }, + { + "epoch": 0.054113570967173646, + "grad_norm": 0.0, + "learning_rate": 1.9969537851323413e-05, + "loss": 1.5732, + "step": 1843 + }, + { + "epoch": 0.05414293264431264, + "grad_norm": 0.0, + "learning_rate": 1.9969463636516516e-05, + "loss": 1.6797, + "step": 1844 + }, + { + "epoch": 0.05417229432145164, + "grad_norm": 0.0, + "learning_rate": 1.99693893315532e-05, + "loss": 1.6611, + "step": 1845 + }, + { + "epoch": 0.05420165599859064, + "grad_norm": 0.0, + "learning_rate": 1.9969314936434145e-05, + "loss": 1.5605, + "step": 1846 + }, + { + "epoch": 0.05423101767572964, + "grad_norm": 0.0, + "learning_rate": 1.996924045116002e-05, + "loss": 1.4395, + "step": 1847 + }, + { + "epoch": 0.054260379352868635, + "grad_norm": 0.0, + "learning_rate": 1.99691658757315e-05, + "loss": 1.54, + "step": 1848 + }, + { + "epoch": 0.05428974103000763, + "grad_norm": 0.0, + "learning_rate": 1.9969091210149264e-05, + "loss": 1.5449, + "step": 1849 + }, + { + "epoch": 0.054319102707146634, + "grad_norm": 0.0, + "learning_rate": 1.9969016454413978e-05, + "loss": 1.46, + "step": 1850 + }, + { + "epoch": 0.05434846438428563, + "grad_norm": 0.0, + "learning_rate": 1.9968941608526328e-05, + "loss": 1.4404, + "step": 1851 + }, + { + "epoch": 0.054377826061424626, + "grad_norm": 0.0, + "learning_rate": 1.996886667248698e-05, + "loss": 1.3916, + "step": 1852 + }, + { + "epoch": 0.05440718773856363, + "grad_norm": 0.0, + "learning_rate": 1.9968791646296625e-05, + "loss": 1.6064, + "step": 1853 + }, + { + "epoch": 0.054436549415702626, + "grad_norm": 0.0, + "learning_rate": 1.996871652995593e-05, + "loss": 1.5, + "step": 1854 + }, + { + "epoch": 0.05446591109284162, + "grad_norm": 0.0, + "learning_rate": 1.9968641323465578e-05, + "loss": 1.501, + "step": 1855 + }, + { + "epoch": 0.05449527276998062, + "grad_norm": 0.0, + "learning_rate": 1.996856602682625e-05, + "loss": 1.4033, + "step": 1856 + }, + { + "epoch": 0.05452463444711962, + "grad_norm": 0.0, + "learning_rate": 1.9968490640038623e-05, + "loss": 1.5029, + "step": 1857 + }, + { + "epoch": 0.05455399612425862, + "grad_norm": 0.0, + "learning_rate": 1.9968415163103388e-05, + "loss": 1.5791, + "step": 1858 + }, + { + "epoch": 0.054583357801397614, + "grad_norm": 0.0, + "learning_rate": 1.996833959602122e-05, + "loss": 1.5293, + "step": 1859 + }, + { + "epoch": 0.05461271947853662, + "grad_norm": 0.0, + "learning_rate": 1.9968263938792804e-05, + "loss": 1.4014, + "step": 1860 + }, + { + "epoch": 0.05464208115567561, + "grad_norm": 0.0, + "learning_rate": 1.996818819141882e-05, + "loss": 1.4424, + "step": 1861 + }, + { + "epoch": 0.05467144283281461, + "grad_norm": 0.0, + "learning_rate": 1.9968112353899963e-05, + "loss": 1.6318, + "step": 1862 + }, + { + "epoch": 0.054700804509953606, + "grad_norm": 0.0, + "learning_rate": 1.996803642623691e-05, + "loss": 1.4902, + "step": 1863 + }, + { + "epoch": 0.05473016618709261, + "grad_norm": 0.0, + "learning_rate": 1.9967960408430356e-05, + "loss": 1.5293, + "step": 1864 + }, + { + "epoch": 0.054759527864231605, + "grad_norm": 0.0, + "learning_rate": 1.9967884300480976e-05, + "loss": 1.5625, + "step": 1865 + }, + { + "epoch": 0.0547888895413706, + "grad_norm": 0.0, + "learning_rate": 1.996780810238947e-05, + "loss": 1.5029, + "step": 1866 + }, + { + "epoch": 0.054818251218509605, + "grad_norm": 0.0, + "learning_rate": 1.9967731814156522e-05, + "loss": 1.4805, + "step": 1867 + }, + { + "epoch": 0.0548476128956486, + "grad_norm": 0.0, + "learning_rate": 1.996765543578282e-05, + "loss": 1.5039, + "step": 1868 + }, + { + "epoch": 0.0548769745727876, + "grad_norm": 0.0, + "learning_rate": 1.996757896726906e-05, + "loss": 1.4893, + "step": 1869 + }, + { + "epoch": 0.05490633624992659, + "grad_norm": 0.0, + "learning_rate": 1.996750240861593e-05, + "loss": 1.5469, + "step": 1870 + }, + { + "epoch": 0.054935697927065597, + "grad_norm": 0.0, + "learning_rate": 1.9967425759824122e-05, + "loss": 1.4883, + "step": 1871 + }, + { + "epoch": 0.05496505960420459, + "grad_norm": 0.0, + "learning_rate": 1.9967349020894333e-05, + "loss": 1.5312, + "step": 1872 + }, + { + "epoch": 0.05499442128134359, + "grad_norm": 0.0, + "learning_rate": 1.9967272191827248e-05, + "loss": 1.3633, + "step": 1873 + }, + { + "epoch": 0.055023782958482585, + "grad_norm": 0.0, + "learning_rate": 1.9967195272623572e-05, + "loss": 1.5752, + "step": 1874 + }, + { + "epoch": 0.05505314463562159, + "grad_norm": 0.0, + "learning_rate": 1.9967118263283996e-05, + "loss": 1.502, + "step": 1875 + }, + { + "epoch": 0.055082506312760585, + "grad_norm": 0.0, + "learning_rate": 1.996704116380922e-05, + "loss": 1.4736, + "step": 1876 + }, + { + "epoch": 0.05511186798989958, + "grad_norm": 0.0, + "learning_rate": 1.9966963974199934e-05, + "loss": 1.5244, + "step": 1877 + }, + { + "epoch": 0.055141229667038584, + "grad_norm": 0.0, + "learning_rate": 1.9966886694456843e-05, + "loss": 1.5342, + "step": 1878 + }, + { + "epoch": 0.05517059134417758, + "grad_norm": 0.0, + "learning_rate": 1.9966809324580638e-05, + "loss": 1.501, + "step": 1879 + }, + { + "epoch": 0.055199953021316576, + "grad_norm": 0.0, + "learning_rate": 1.996673186457203e-05, + "loss": 1.4365, + "step": 1880 + }, + { + "epoch": 0.05522931469845557, + "grad_norm": 0.0, + "learning_rate": 1.996665431443171e-05, + "loss": 1.6924, + "step": 1881 + }, + { + "epoch": 0.055258676375594576, + "grad_norm": 0.0, + "learning_rate": 1.996657667416038e-05, + "loss": 1.5645, + "step": 1882 + }, + { + "epoch": 0.05528803805273357, + "grad_norm": 0.0, + "learning_rate": 1.996649894375875e-05, + "loss": 1.543, + "step": 1883 + }, + { + "epoch": 0.05531739972987257, + "grad_norm": 0.0, + "learning_rate": 1.9966421123227512e-05, + "loss": 1.3555, + "step": 1884 + }, + { + "epoch": 0.05534676140701157, + "grad_norm": 0.0, + "learning_rate": 1.996634321256738e-05, + "loss": 1.5928, + "step": 1885 + }, + { + "epoch": 0.05537612308415057, + "grad_norm": 0.0, + "learning_rate": 1.9966265211779053e-05, + "loss": 1.4854, + "step": 1886 + }, + { + "epoch": 0.055405484761289564, + "grad_norm": 0.0, + "learning_rate": 1.9966187120863236e-05, + "loss": 1.5068, + "step": 1887 + }, + { + "epoch": 0.05543484643842856, + "grad_norm": 0.0, + "learning_rate": 1.996610893982064e-05, + "loss": 1.4355, + "step": 1888 + }, + { + "epoch": 0.05546420811556756, + "grad_norm": 0.0, + "learning_rate": 1.9966030668651966e-05, + "loss": 1.4551, + "step": 1889 + }, + { + "epoch": 0.05549356979270656, + "grad_norm": 0.0, + "learning_rate": 1.996595230735792e-05, + "loss": 1.4678, + "step": 1890 + }, + { + "epoch": 0.055522931469845556, + "grad_norm": 0.0, + "learning_rate": 1.9965873855939222e-05, + "loss": 1.4805, + "step": 1891 + }, + { + "epoch": 0.05555229314698456, + "grad_norm": 0.0, + "learning_rate": 1.996579531439657e-05, + "loss": 1.4229, + "step": 1892 + }, + { + "epoch": 0.055581654824123555, + "grad_norm": 0.0, + "learning_rate": 1.996571668273068e-05, + "loss": 1.5264, + "step": 1893 + }, + { + "epoch": 0.05561101650126255, + "grad_norm": 0.0, + "learning_rate": 1.9965637960942262e-05, + "loss": 1.459, + "step": 1894 + }, + { + "epoch": 0.05564037817840155, + "grad_norm": 0.0, + "learning_rate": 1.996555914903203e-05, + "loss": 1.5029, + "step": 1895 + }, + { + "epoch": 0.05566973985554055, + "grad_norm": 0.0, + "learning_rate": 1.9965480247000692e-05, + "loss": 1.5449, + "step": 1896 + }, + { + "epoch": 0.05569910153267955, + "grad_norm": 0.0, + "learning_rate": 1.9965401254848964e-05, + "loss": 1.5176, + "step": 1897 + }, + { + "epoch": 0.05572846320981854, + "grad_norm": 0.0, + "learning_rate": 1.996532217257756e-05, + "loss": 1.4453, + "step": 1898 + }, + { + "epoch": 0.05575782488695754, + "grad_norm": 0.0, + "learning_rate": 1.9965243000187192e-05, + "loss": 1.5771, + "step": 1899 + }, + { + "epoch": 0.05578718656409654, + "grad_norm": 0.0, + "learning_rate": 1.9965163737678587e-05, + "loss": 1.542, + "step": 1900 + }, + { + "epoch": 0.05581654824123554, + "grad_norm": 0.0, + "learning_rate": 1.996508438505245e-05, + "loss": 1.4912, + "step": 1901 + }, + { + "epoch": 0.055845909918374535, + "grad_norm": 0.0, + "learning_rate": 1.9965004942309504e-05, + "loss": 1.5098, + "step": 1902 + }, + { + "epoch": 0.05587527159551354, + "grad_norm": 0.0, + "learning_rate": 1.9964925409450463e-05, + "loss": 1.5127, + "step": 1903 + }, + { + "epoch": 0.055904633272652535, + "grad_norm": 0.0, + "learning_rate": 1.9964845786476053e-05, + "loss": 1.5244, + "step": 1904 + }, + { + "epoch": 0.05593399494979153, + "grad_norm": 0.0, + "learning_rate": 1.9964766073386988e-05, + "loss": 1.5557, + "step": 1905 + }, + { + "epoch": 0.05596335662693053, + "grad_norm": 0.0, + "learning_rate": 1.996468627018399e-05, + "loss": 1.4844, + "step": 1906 + }, + { + "epoch": 0.05599271830406953, + "grad_norm": 0.0, + "learning_rate": 1.996460637686779e-05, + "loss": 1.4951, + "step": 1907 + }, + { + "epoch": 0.05602207998120853, + "grad_norm": 0.0, + "learning_rate": 1.9964526393439095e-05, + "loss": 1.543, + "step": 1908 + }, + { + "epoch": 0.05605144165834752, + "grad_norm": 0.0, + "learning_rate": 1.996444631989864e-05, + "loss": 1.4736, + "step": 1909 + }, + { + "epoch": 0.056080803335486526, + "grad_norm": 0.0, + "learning_rate": 1.996436615624714e-05, + "loss": 1.5303, + "step": 1910 + }, + { + "epoch": 0.05611016501262552, + "grad_norm": 0.0, + "learning_rate": 1.9964285902485328e-05, + "loss": 1.3945, + "step": 1911 + }, + { + "epoch": 0.05613952668976452, + "grad_norm": 0.0, + "learning_rate": 1.9964205558613927e-05, + "loss": 1.4424, + "step": 1912 + }, + { + "epoch": 0.056168888366903515, + "grad_norm": 0.0, + "learning_rate": 1.9964125124633664e-05, + "loss": 1.5205, + "step": 1913 + }, + { + "epoch": 0.05619825004404252, + "grad_norm": 0.0, + "learning_rate": 1.9964044600545266e-05, + "loss": 1.5781, + "step": 1914 + }, + { + "epoch": 0.056227611721181514, + "grad_norm": 0.0, + "learning_rate": 1.996396398634946e-05, + "loss": 1.4834, + "step": 1915 + }, + { + "epoch": 0.05625697339832051, + "grad_norm": 0.0, + "learning_rate": 1.9963883282046973e-05, + "loss": 1.5273, + "step": 1916 + }, + { + "epoch": 0.056286335075459513, + "grad_norm": 0.0, + "learning_rate": 1.996380248763854e-05, + "loss": 1.4863, + "step": 1917 + }, + { + "epoch": 0.05631569675259851, + "grad_norm": 0.0, + "learning_rate": 1.996372160312489e-05, + "loss": 1.5361, + "step": 1918 + }, + { + "epoch": 0.056345058429737506, + "grad_norm": 0.0, + "learning_rate": 1.9963640628506754e-05, + "loss": 1.6025, + "step": 1919 + }, + { + "epoch": 0.0563744201068765, + "grad_norm": 0.0, + "learning_rate": 1.9963559563784863e-05, + "loss": 1.541, + "step": 1920 + }, + { + "epoch": 0.056403781784015505, + "grad_norm": 0.0, + "learning_rate": 1.9963478408959954e-05, + "loss": 1.582, + "step": 1921 + }, + { + "epoch": 0.0564331434611545, + "grad_norm": 0.0, + "learning_rate": 1.9963397164032753e-05, + "loss": 1.5205, + "step": 1922 + }, + { + "epoch": 0.0564625051382935, + "grad_norm": 0.0, + "learning_rate": 1.9963315829004004e-05, + "loss": 1.5527, + "step": 1923 + }, + { + "epoch": 0.056491866815432494, + "grad_norm": 0.0, + "learning_rate": 1.9963234403874435e-05, + "loss": 1.4639, + "step": 1924 + }, + { + "epoch": 0.0565212284925715, + "grad_norm": 0.0, + "learning_rate": 1.9963152888644787e-05, + "loss": 1.4502, + "step": 1925 + }, + { + "epoch": 0.05655059016971049, + "grad_norm": 0.0, + "learning_rate": 1.9963071283315796e-05, + "loss": 1.4795, + "step": 1926 + }, + { + "epoch": 0.05657995184684949, + "grad_norm": 0.0, + "learning_rate": 1.99629895878882e-05, + "loss": 1.5605, + "step": 1927 + }, + { + "epoch": 0.05660931352398849, + "grad_norm": 0.0, + "learning_rate": 1.9962907802362738e-05, + "loss": 1.5352, + "step": 1928 + }, + { + "epoch": 0.05663867520112749, + "grad_norm": 0.0, + "learning_rate": 1.9962825926740147e-05, + "loss": 1.5518, + "step": 1929 + }, + { + "epoch": 0.056668036878266485, + "grad_norm": 0.0, + "learning_rate": 1.9962743961021175e-05, + "loss": 1.6377, + "step": 1930 + }, + { + "epoch": 0.05669739855540548, + "grad_norm": 0.0, + "learning_rate": 1.9962661905206555e-05, + "loss": 1.4902, + "step": 1931 + }, + { + "epoch": 0.056726760232544485, + "grad_norm": 0.0, + "learning_rate": 1.996257975929703e-05, + "loss": 1.5898, + "step": 1932 + }, + { + "epoch": 0.05675612190968348, + "grad_norm": 0.0, + "learning_rate": 1.9962497523293346e-05, + "loss": 1.4502, + "step": 1933 + }, + { + "epoch": 0.05678548358682248, + "grad_norm": 0.0, + "learning_rate": 1.9962415197196244e-05, + "loss": 1.4971, + "step": 1934 + }, + { + "epoch": 0.05681484526396148, + "grad_norm": 0.0, + "learning_rate": 1.9962332781006473e-05, + "loss": 1.5615, + "step": 1935 + }, + { + "epoch": 0.05684420694110048, + "grad_norm": 0.0, + "learning_rate": 1.9962250274724772e-05, + "loss": 1.5586, + "step": 1936 + }, + { + "epoch": 0.05687356861823947, + "grad_norm": 0.0, + "learning_rate": 1.9962167678351892e-05, + "loss": 1.542, + "step": 1937 + }, + { + "epoch": 0.05690293029537847, + "grad_norm": 0.0, + "learning_rate": 1.9962084991888578e-05, + "loss": 1.5537, + "step": 1938 + }, + { + "epoch": 0.05693229197251747, + "grad_norm": 0.0, + "learning_rate": 1.9962002215335577e-05, + "loss": 1.5107, + "step": 1939 + }, + { + "epoch": 0.05696165364965647, + "grad_norm": 0.0, + "learning_rate": 1.9961919348693643e-05, + "loss": 1.5479, + "step": 1940 + }, + { + "epoch": 0.056991015326795465, + "grad_norm": 0.0, + "learning_rate": 1.9961836391963518e-05, + "loss": 1.626, + "step": 1941 + }, + { + "epoch": 0.05702037700393447, + "grad_norm": 0.0, + "learning_rate": 1.9961753345145954e-05, + "loss": 1.6035, + "step": 1942 + }, + { + "epoch": 0.057049738681073464, + "grad_norm": 0.0, + "learning_rate": 1.9961670208241705e-05, + "loss": 1.5293, + "step": 1943 + }, + { + "epoch": 0.05707910035821246, + "grad_norm": 0.0, + "learning_rate": 1.9961586981251522e-05, + "loss": 1.3389, + "step": 1944 + }, + { + "epoch": 0.05710846203535146, + "grad_norm": 0.0, + "learning_rate": 1.9961503664176156e-05, + "loss": 1.5137, + "step": 1945 + }, + { + "epoch": 0.05713782371249046, + "grad_norm": 0.0, + "learning_rate": 1.996142025701636e-05, + "loss": 1.5928, + "step": 1946 + }, + { + "epoch": 0.057167185389629456, + "grad_norm": 0.0, + "learning_rate": 1.996133675977289e-05, + "loss": 1.5752, + "step": 1947 + }, + { + "epoch": 0.05719654706676845, + "grad_norm": 0.0, + "learning_rate": 1.99612531724465e-05, + "loss": 1.5137, + "step": 1948 + }, + { + "epoch": 0.057225908743907455, + "grad_norm": 0.0, + "learning_rate": 1.9961169495037947e-05, + "loss": 1.4102, + "step": 1949 + }, + { + "epoch": 0.05725527042104645, + "grad_norm": 0.0, + "learning_rate": 1.996108572754799e-05, + "loss": 1.5039, + "step": 1950 + }, + { + "epoch": 0.05728463209818545, + "grad_norm": 0.0, + "learning_rate": 1.9961001869977376e-05, + "loss": 1.418, + "step": 1951 + }, + { + "epoch": 0.057313993775324444, + "grad_norm": 0.0, + "learning_rate": 1.996091792232688e-05, + "loss": 1.4561, + "step": 1952 + }, + { + "epoch": 0.05734335545246345, + "grad_norm": 0.0, + "learning_rate": 1.9960833884597246e-05, + "loss": 1.6094, + "step": 1953 + }, + { + "epoch": 0.057372717129602444, + "grad_norm": 0.0, + "learning_rate": 1.996074975678924e-05, + "loss": 1.5352, + "step": 1954 + }, + { + "epoch": 0.05740207880674144, + "grad_norm": 0.0, + "learning_rate": 1.996066553890363e-05, + "loss": 1.5176, + "step": 1955 + }, + { + "epoch": 0.057431440483880436, + "grad_norm": 0.0, + "learning_rate": 1.9960581230941166e-05, + "loss": 1.4854, + "step": 1956 + }, + { + "epoch": 0.05746080216101944, + "grad_norm": 0.0, + "learning_rate": 1.996049683290261e-05, + "loss": 1.4023, + "step": 1957 + }, + { + "epoch": 0.057490163838158435, + "grad_norm": 0.0, + "learning_rate": 1.9960412344788738e-05, + "loss": 1.583, + "step": 1958 + }, + { + "epoch": 0.05751952551529743, + "grad_norm": 0.0, + "learning_rate": 1.99603277666003e-05, + "loss": 1.6045, + "step": 1959 + }, + { + "epoch": 0.057548887192436435, + "grad_norm": 0.0, + "learning_rate": 1.9960243098338076e-05, + "loss": 1.4961, + "step": 1960 + }, + { + "epoch": 0.05757824886957543, + "grad_norm": 0.0, + "learning_rate": 1.9960158340002814e-05, + "loss": 1.4619, + "step": 1961 + }, + { + "epoch": 0.05760761054671443, + "grad_norm": 0.0, + "learning_rate": 1.996007349159529e-05, + "loss": 1.6953, + "step": 1962 + }, + { + "epoch": 0.057636972223853424, + "grad_norm": 0.0, + "learning_rate": 1.9959988553116278e-05, + "loss": 1.4902, + "step": 1963 + }, + { + "epoch": 0.05766633390099243, + "grad_norm": 0.0, + "learning_rate": 1.9959903524566532e-05, + "loss": 1.4521, + "step": 1964 + }, + { + "epoch": 0.05769569557813142, + "grad_norm": 0.0, + "learning_rate": 1.995981840594683e-05, + "loss": 1.4434, + "step": 1965 + }, + { + "epoch": 0.05772505725527042, + "grad_norm": 0.0, + "learning_rate": 1.995973319725794e-05, + "loss": 1.5273, + "step": 1966 + }, + { + "epoch": 0.05775441893240942, + "grad_norm": 0.0, + "learning_rate": 1.995964789850063e-05, + "loss": 1.4961, + "step": 1967 + }, + { + "epoch": 0.05778378060954842, + "grad_norm": 0.0, + "learning_rate": 1.9959562509675673e-05, + "loss": 1.4932, + "step": 1968 + }, + { + "epoch": 0.057813142286687415, + "grad_norm": 0.0, + "learning_rate": 1.9959477030783845e-05, + "loss": 1.5576, + "step": 1969 + }, + { + "epoch": 0.05784250396382641, + "grad_norm": 0.0, + "learning_rate": 1.9959391461825913e-05, + "loss": 1.5791, + "step": 1970 + }, + { + "epoch": 0.057871865640965414, + "grad_norm": 0.0, + "learning_rate": 1.9959305802802653e-05, + "loss": 1.5039, + "step": 1971 + }, + { + "epoch": 0.05790122731810441, + "grad_norm": 0.0, + "learning_rate": 1.995922005371484e-05, + "loss": 1.5137, + "step": 1972 + }, + { + "epoch": 0.05793058899524341, + "grad_norm": 0.0, + "learning_rate": 1.995913421456325e-05, + "loss": 1.4453, + "step": 1973 + }, + { + "epoch": 0.05795995067238241, + "grad_norm": 0.0, + "learning_rate": 1.995904828534866e-05, + "loss": 1.5381, + "step": 1974 + }, + { + "epoch": 0.057989312349521406, + "grad_norm": 0.0, + "learning_rate": 1.9958962266071845e-05, + "loss": 1.5098, + "step": 1975 + }, + { + "epoch": 0.0580186740266604, + "grad_norm": 0.0, + "learning_rate": 1.9958876156733585e-05, + "loss": 1.5225, + "step": 1976 + }, + { + "epoch": 0.0580480357037994, + "grad_norm": 0.0, + "learning_rate": 1.9958789957334654e-05, + "loss": 1.542, + "step": 1977 + }, + { + "epoch": 0.0580773973809384, + "grad_norm": 0.0, + "learning_rate": 1.9958703667875837e-05, + "loss": 1.416, + "step": 1978 + }, + { + "epoch": 0.0581067590580774, + "grad_norm": 0.0, + "learning_rate": 1.9958617288357912e-05, + "loss": 1.5781, + "step": 1979 + }, + { + "epoch": 0.058136120735216394, + "grad_norm": 0.0, + "learning_rate": 1.995853081878166e-05, + "loss": 1.5576, + "step": 1980 + }, + { + "epoch": 0.05816548241235539, + "grad_norm": 0.0, + "learning_rate": 1.9958444259147866e-05, + "loss": 1.5488, + "step": 1981 + }, + { + "epoch": 0.058194844089494394, + "grad_norm": 0.0, + "learning_rate": 1.9958357609457306e-05, + "loss": 1.6387, + "step": 1982 + }, + { + "epoch": 0.05822420576663339, + "grad_norm": 0.0, + "learning_rate": 1.995827086971077e-05, + "loss": 1.543, + "step": 1983 + }, + { + "epoch": 0.058253567443772386, + "grad_norm": 0.0, + "learning_rate": 1.9958184039909043e-05, + "loss": 1.4883, + "step": 1984 + }, + { + "epoch": 0.05828292912091139, + "grad_norm": 0.0, + "learning_rate": 1.9958097120052903e-05, + "loss": 1.4531, + "step": 1985 + }, + { + "epoch": 0.058312290798050385, + "grad_norm": 0.0, + "learning_rate": 1.995801011014314e-05, + "loss": 1.459, + "step": 1986 + }, + { + "epoch": 0.05834165247518938, + "grad_norm": 0.0, + "learning_rate": 1.9957923010180544e-05, + "loss": 1.4961, + "step": 1987 + }, + { + "epoch": 0.05837101415232838, + "grad_norm": 0.0, + "learning_rate": 1.9957835820165902e-05, + "loss": 1.4561, + "step": 1988 + }, + { + "epoch": 0.05840037582946738, + "grad_norm": 0.0, + "learning_rate": 1.9957748540099996e-05, + "loss": 1.375, + "step": 1989 + }, + { + "epoch": 0.05842973750660638, + "grad_norm": 0.0, + "learning_rate": 1.995766116998362e-05, + "loss": 1.5771, + "step": 1990 + }, + { + "epoch": 0.058459099183745374, + "grad_norm": 0.0, + "learning_rate": 1.9957573709817563e-05, + "loss": 1.54, + "step": 1991 + }, + { + "epoch": 0.05848846086088438, + "grad_norm": 0.0, + "learning_rate": 1.995748615960262e-05, + "loss": 1.582, + "step": 1992 + }, + { + "epoch": 0.05851782253802337, + "grad_norm": 0.0, + "learning_rate": 1.995739851933958e-05, + "loss": 1.4824, + "step": 1993 + }, + { + "epoch": 0.05854718421516237, + "grad_norm": 0.0, + "learning_rate": 1.9957310789029232e-05, + "loss": 1.5479, + "step": 1994 + }, + { + "epoch": 0.058576545892301365, + "grad_norm": 0.0, + "learning_rate": 1.9957222968672373e-05, + "loss": 1.3906, + "step": 1995 + }, + { + "epoch": 0.05860590756944037, + "grad_norm": 0.0, + "learning_rate": 1.99571350582698e-05, + "loss": 1.4785, + "step": 1996 + }, + { + "epoch": 0.058635269246579365, + "grad_norm": 0.0, + "learning_rate": 1.9957047057822302e-05, + "loss": 1.5996, + "step": 1997 + }, + { + "epoch": 0.05866463092371836, + "grad_norm": 0.0, + "learning_rate": 1.9956958967330674e-05, + "loss": 1.3613, + "step": 1998 + }, + { + "epoch": 0.058693992600857364, + "grad_norm": 0.0, + "learning_rate": 1.995687078679572e-05, + "loss": 1.5986, + "step": 1999 + }, + { + "epoch": 0.05872335427799636, + "grad_norm": 0.0, + "learning_rate": 1.9956782516218235e-05, + "loss": 1.4854, + "step": 2000 + }, + { + "epoch": 0.05875271595513536, + "grad_norm": 0.0, + "learning_rate": 1.9956694155599012e-05, + "loss": 1.4814, + "step": 2001 + }, + { + "epoch": 0.05878207763227435, + "grad_norm": 0.0, + "learning_rate": 1.9956605704938856e-05, + "loss": 1.542, + "step": 2002 + }, + { + "epoch": 0.058811439309413356, + "grad_norm": 0.0, + "learning_rate": 1.9956517164238565e-05, + "loss": 1.5078, + "step": 2003 + }, + { + "epoch": 0.05884080098655235, + "grad_norm": 0.0, + "learning_rate": 1.995642853349894e-05, + "loss": 1.46, + "step": 2004 + }, + { + "epoch": 0.05887016266369135, + "grad_norm": 0.0, + "learning_rate": 1.9956339812720778e-05, + "loss": 1.4736, + "step": 2005 + }, + { + "epoch": 0.058899524340830345, + "grad_norm": 0.0, + "learning_rate": 1.995625100190489e-05, + "loss": 1.4414, + "step": 2006 + }, + { + "epoch": 0.05892888601796935, + "grad_norm": 0.0, + "learning_rate": 1.9956162101052074e-05, + "loss": 1.5146, + "step": 2007 + }, + { + "epoch": 0.058958247695108344, + "grad_norm": 0.0, + "learning_rate": 1.995607311016313e-05, + "loss": 1.4688, + "step": 2008 + }, + { + "epoch": 0.05898760937224734, + "grad_norm": 0.0, + "learning_rate": 1.9955984029238872e-05, + "loss": 1.4414, + "step": 2009 + }, + { + "epoch": 0.059016971049386344, + "grad_norm": 0.0, + "learning_rate": 1.9955894858280095e-05, + "loss": 1.3916, + "step": 2010 + }, + { + "epoch": 0.05904633272652534, + "grad_norm": 0.0, + "learning_rate": 1.9955805597287614e-05, + "loss": 1.4609, + "step": 2011 + }, + { + "epoch": 0.059075694403664336, + "grad_norm": 0.0, + "learning_rate": 1.9955716246262232e-05, + "loss": 1.3213, + "step": 2012 + }, + { + "epoch": 0.05910505608080333, + "grad_norm": 0.0, + "learning_rate": 1.9955626805204763e-05, + "loss": 1.5703, + "step": 2013 + }, + { + "epoch": 0.059134417757942336, + "grad_norm": 0.0, + "learning_rate": 1.9955537274116006e-05, + "loss": 1.5674, + "step": 2014 + }, + { + "epoch": 0.05916377943508133, + "grad_norm": 0.0, + "learning_rate": 1.9955447652996778e-05, + "loss": 1.4785, + "step": 2015 + }, + { + "epoch": 0.05919314111222033, + "grad_norm": 0.0, + "learning_rate": 1.9955357941847885e-05, + "loss": 1.4355, + "step": 2016 + }, + { + "epoch": 0.05922250278935933, + "grad_norm": 0.0, + "learning_rate": 1.9955268140670144e-05, + "loss": 1.5566, + "step": 2017 + }, + { + "epoch": 0.05925186446649833, + "grad_norm": 0.0, + "learning_rate": 1.995517824946436e-05, + "loss": 1.5391, + "step": 2018 + }, + { + "epoch": 0.059281226143637324, + "grad_norm": 0.0, + "learning_rate": 1.9955088268231355e-05, + "loss": 1.5605, + "step": 2019 + }, + { + "epoch": 0.05931058782077632, + "grad_norm": 0.0, + "learning_rate": 1.9954998196971934e-05, + "loss": 1.5527, + "step": 2020 + }, + { + "epoch": 0.05933994949791532, + "grad_norm": 0.0, + "learning_rate": 1.9954908035686914e-05, + "loss": 1.6143, + "step": 2021 + }, + { + "epoch": 0.05936931117505432, + "grad_norm": 0.0, + "learning_rate": 1.995481778437711e-05, + "loss": 1.498, + "step": 2022 + }, + { + "epoch": 0.059398672852193315, + "grad_norm": 0.0, + "learning_rate": 1.9954727443043342e-05, + "loss": 1.5283, + "step": 2023 + }, + { + "epoch": 0.05942803452933232, + "grad_norm": 0.0, + "learning_rate": 1.9954637011686424e-05, + "loss": 1.5254, + "step": 2024 + }, + { + "epoch": 0.059457396206471315, + "grad_norm": 0.0, + "learning_rate": 1.9954546490307172e-05, + "loss": 1.624, + "step": 2025 + }, + { + "epoch": 0.05948675788361031, + "grad_norm": 0.0, + "learning_rate": 1.9954455878906408e-05, + "loss": 1.3955, + "step": 2026 + }, + { + "epoch": 0.05951611956074931, + "grad_norm": 0.0, + "learning_rate": 1.995436517748495e-05, + "loss": 1.4463, + "step": 2027 + }, + { + "epoch": 0.05954548123788831, + "grad_norm": 0.0, + "learning_rate": 1.995427438604362e-05, + "loss": 1.5244, + "step": 2028 + }, + { + "epoch": 0.05957484291502731, + "grad_norm": 0.0, + "learning_rate": 1.9954183504583235e-05, + "loss": 1.582, + "step": 2029 + }, + { + "epoch": 0.0596042045921663, + "grad_norm": 0.0, + "learning_rate": 1.995409253310462e-05, + "loss": 1.5361, + "step": 2030 + }, + { + "epoch": 0.059633566269305306, + "grad_norm": 0.0, + "learning_rate": 1.9954001471608597e-05, + "loss": 1.3994, + "step": 2031 + }, + { + "epoch": 0.0596629279464443, + "grad_norm": 0.0, + "learning_rate": 1.995391032009599e-05, + "loss": 1.4912, + "step": 2032 + }, + { + "epoch": 0.0596922896235833, + "grad_norm": 0.0, + "learning_rate": 1.9953819078567626e-05, + "loss": 1.4883, + "step": 2033 + }, + { + "epoch": 0.059721651300722295, + "grad_norm": 0.0, + "learning_rate": 1.9953727747024322e-05, + "loss": 1.6006, + "step": 2034 + }, + { + "epoch": 0.0597510129778613, + "grad_norm": 0.0, + "learning_rate": 1.995363632546691e-05, + "loss": 1.54, + "step": 2035 + }, + { + "epoch": 0.059780374655000294, + "grad_norm": 0.0, + "learning_rate": 1.9953544813896214e-05, + "loss": 1.5664, + "step": 2036 + }, + { + "epoch": 0.05980973633213929, + "grad_norm": 0.0, + "learning_rate": 1.9953453212313066e-05, + "loss": 1.4521, + "step": 2037 + }, + { + "epoch": 0.05983909800927829, + "grad_norm": 0.0, + "learning_rate": 1.995336152071829e-05, + "loss": 1.4805, + "step": 2038 + }, + { + "epoch": 0.05986845968641729, + "grad_norm": 0.0, + "learning_rate": 1.9953269739112716e-05, + "loss": 1.4453, + "step": 2039 + }, + { + "epoch": 0.059897821363556286, + "grad_norm": 0.0, + "learning_rate": 1.9953177867497176e-05, + "loss": 1.5508, + "step": 2040 + }, + { + "epoch": 0.05992718304069528, + "grad_norm": 0.0, + "learning_rate": 1.9953085905872497e-05, + "loss": 1.4316, + "step": 2041 + }, + { + "epoch": 0.059956544717834286, + "grad_norm": 0.0, + "learning_rate": 1.9952993854239518e-05, + "loss": 1.5049, + "step": 2042 + }, + { + "epoch": 0.05998590639497328, + "grad_norm": 0.0, + "learning_rate": 1.9952901712599063e-05, + "loss": 1.501, + "step": 2043 + }, + { + "epoch": 0.06001526807211228, + "grad_norm": 0.0, + "learning_rate": 1.995280948095197e-05, + "loss": 1.4619, + "step": 2044 + }, + { + "epoch": 0.060044629749251274, + "grad_norm": 0.0, + "learning_rate": 1.995271715929907e-05, + "loss": 1.5557, + "step": 2045 + }, + { + "epoch": 0.06007399142639028, + "grad_norm": 0.0, + "learning_rate": 1.99526247476412e-05, + "loss": 1.457, + "step": 2046 + }, + { + "epoch": 0.060103353103529274, + "grad_norm": 0.0, + "learning_rate": 1.9952532245979198e-05, + "loss": 1.4365, + "step": 2047 + }, + { + "epoch": 0.06013271478066827, + "grad_norm": 0.0, + "learning_rate": 1.9952439654313896e-05, + "loss": 1.4424, + "step": 2048 + }, + { + "epoch": 0.06016207645780727, + "grad_norm": 0.0, + "learning_rate": 1.9952346972646135e-05, + "loss": 1.582, + "step": 2049 + }, + { + "epoch": 0.06019143813494627, + "grad_norm": 0.0, + "learning_rate": 1.995225420097675e-05, + "loss": 1.3213, + "step": 2050 + }, + { + "epoch": 0.060220799812085266, + "grad_norm": 0.0, + "learning_rate": 1.9952161339306584e-05, + "loss": 1.6436, + "step": 2051 + }, + { + "epoch": 0.06025016148922426, + "grad_norm": 0.0, + "learning_rate": 1.995206838763647e-05, + "loss": 1.5254, + "step": 2052 + }, + { + "epoch": 0.060279523166363265, + "grad_norm": 0.0, + "learning_rate": 1.9951975345967256e-05, + "loss": 1.458, + "step": 2053 + }, + { + "epoch": 0.06030888484350226, + "grad_norm": 0.0, + "learning_rate": 1.9951882214299778e-05, + "loss": 1.5264, + "step": 2054 + }, + { + "epoch": 0.06033824652064126, + "grad_norm": 0.0, + "learning_rate": 1.9951788992634883e-05, + "loss": 1.4736, + "step": 2055 + }, + { + "epoch": 0.06036760819778026, + "grad_norm": 0.0, + "learning_rate": 1.9951695680973412e-05, + "loss": 1.5156, + "step": 2056 + }, + { + "epoch": 0.06039696987491926, + "grad_norm": 0.0, + "learning_rate": 1.9951602279316205e-05, + "loss": 1.6152, + "step": 2057 + }, + { + "epoch": 0.06042633155205825, + "grad_norm": 0.0, + "learning_rate": 1.9951508787664112e-05, + "loss": 1.4102, + "step": 2058 + }, + { + "epoch": 0.06045569322919725, + "grad_norm": 0.0, + "learning_rate": 1.9951415206017974e-05, + "loss": 1.457, + "step": 2059 + }, + { + "epoch": 0.06048505490633625, + "grad_norm": 0.0, + "learning_rate": 1.9951321534378643e-05, + "loss": 1.6045, + "step": 2060 + }, + { + "epoch": 0.06051441658347525, + "grad_norm": 0.0, + "learning_rate": 1.9951227772746962e-05, + "loss": 1.5322, + "step": 2061 + }, + { + "epoch": 0.060543778260614245, + "grad_norm": 0.0, + "learning_rate": 1.9951133921123776e-05, + "loss": 1.6357, + "step": 2062 + }, + { + "epoch": 0.06057313993775324, + "grad_norm": 0.0, + "learning_rate": 1.9951039979509943e-05, + "loss": 1.5273, + "step": 2063 + }, + { + "epoch": 0.060602501614892244, + "grad_norm": 0.0, + "learning_rate": 1.9950945947906303e-05, + "loss": 1.4268, + "step": 2064 + }, + { + "epoch": 0.06063186329203124, + "grad_norm": 0.0, + "learning_rate": 1.995085182631371e-05, + "loss": 1.5898, + "step": 2065 + }, + { + "epoch": 0.06066122496917024, + "grad_norm": 0.0, + "learning_rate": 1.9950757614733017e-05, + "loss": 1.5479, + "step": 2066 + }, + { + "epoch": 0.06069058664630924, + "grad_norm": 0.0, + "learning_rate": 1.9950663313165076e-05, + "loss": 1.4893, + "step": 2067 + }, + { + "epoch": 0.060719948323448236, + "grad_norm": 0.0, + "learning_rate": 1.9950568921610736e-05, + "loss": 1.5596, + "step": 2068 + }, + { + "epoch": 0.06074931000058723, + "grad_norm": 0.0, + "learning_rate": 1.9950474440070855e-05, + "loss": 1.5527, + "step": 2069 + }, + { + "epoch": 0.06077867167772623, + "grad_norm": 0.0, + "learning_rate": 1.9950379868546284e-05, + "loss": 1.5889, + "step": 2070 + }, + { + "epoch": 0.06080803335486523, + "grad_norm": 0.0, + "learning_rate": 1.995028520703788e-05, + "loss": 1.5977, + "step": 2071 + }, + { + "epoch": 0.06083739503200423, + "grad_norm": 0.0, + "learning_rate": 1.99501904555465e-05, + "loss": 1.5195, + "step": 2072 + }, + { + "epoch": 0.060866756709143224, + "grad_norm": 0.0, + "learning_rate": 1.9950095614072995e-05, + "loss": 1.5547, + "step": 2073 + }, + { + "epoch": 0.06089611838628223, + "grad_norm": 0.0, + "learning_rate": 1.9950000682618232e-05, + "loss": 1.5703, + "step": 2074 + }, + { + "epoch": 0.060925480063421224, + "grad_norm": 0.0, + "learning_rate": 1.9949905661183064e-05, + "loss": 1.3516, + "step": 2075 + }, + { + "epoch": 0.06095484174056022, + "grad_norm": 0.0, + "learning_rate": 1.9949810549768344e-05, + "loss": 1.5225, + "step": 2076 + }, + { + "epoch": 0.060984203417699216, + "grad_norm": 0.0, + "learning_rate": 1.9949715348374945e-05, + "loss": 1.4473, + "step": 2077 + }, + { + "epoch": 0.06101356509483822, + "grad_norm": 0.0, + "learning_rate": 1.9949620057003722e-05, + "loss": 1.4424, + "step": 2078 + }, + { + "epoch": 0.061042926771977216, + "grad_norm": 0.0, + "learning_rate": 1.9949524675655537e-05, + "loss": 1.4658, + "step": 2079 + }, + { + "epoch": 0.06107228844911621, + "grad_norm": 0.0, + "learning_rate": 1.994942920433125e-05, + "loss": 1.5156, + "step": 2080 + }, + { + "epoch": 0.061101650126255215, + "grad_norm": 0.0, + "learning_rate": 1.994933364303173e-05, + "loss": 1.5869, + "step": 2081 + }, + { + "epoch": 0.06113101180339421, + "grad_norm": 0.0, + "learning_rate": 1.9949237991757833e-05, + "loss": 1.6152, + "step": 2082 + }, + { + "epoch": 0.06116037348053321, + "grad_norm": 0.0, + "learning_rate": 1.994914225051043e-05, + "loss": 1.3252, + "step": 2083 + }, + { + "epoch": 0.061189735157672204, + "grad_norm": 0.0, + "learning_rate": 1.994904641929039e-05, + "loss": 1.4424, + "step": 2084 + }, + { + "epoch": 0.06121909683481121, + "grad_norm": 0.0, + "learning_rate": 1.994895049809857e-05, + "loss": 1.4551, + "step": 2085 + }, + { + "epoch": 0.0612484585119502, + "grad_norm": 0.0, + "learning_rate": 1.994885448693585e-05, + "loss": 1.3135, + "step": 2086 + }, + { + "epoch": 0.0612778201890892, + "grad_norm": 0.0, + "learning_rate": 1.9948758385803083e-05, + "loss": 1.4521, + "step": 2087 + }, + { + "epoch": 0.061307181866228196, + "grad_norm": 0.0, + "learning_rate": 1.994866219470115e-05, + "loss": 1.5059, + "step": 2088 + }, + { + "epoch": 0.0613365435433672, + "grad_norm": 0.0, + "learning_rate": 1.9948565913630914e-05, + "loss": 1.4863, + "step": 2089 + }, + { + "epoch": 0.061365905220506195, + "grad_norm": 0.0, + "learning_rate": 1.994846954259325e-05, + "loss": 1.4902, + "step": 2090 + }, + { + "epoch": 0.06139526689764519, + "grad_norm": 0.0, + "learning_rate": 1.994837308158903e-05, + "loss": 1.5303, + "step": 2091 + }, + { + "epoch": 0.061424628574784194, + "grad_norm": 0.0, + "learning_rate": 1.9948276530619122e-05, + "loss": 1.4922, + "step": 2092 + }, + { + "epoch": 0.06145399025192319, + "grad_norm": 0.0, + "learning_rate": 1.99481798896844e-05, + "loss": 1.5254, + "step": 2093 + }, + { + "epoch": 0.06148335192906219, + "grad_norm": 0.0, + "learning_rate": 1.994808315878574e-05, + "loss": 1.5098, + "step": 2094 + }, + { + "epoch": 0.06151271360620118, + "grad_norm": 0.0, + "learning_rate": 1.9947986337924022e-05, + "loss": 1.4639, + "step": 2095 + }, + { + "epoch": 0.061542075283340186, + "grad_norm": 0.0, + "learning_rate": 1.994788942710011e-05, + "loss": 1.6025, + "step": 2096 + }, + { + "epoch": 0.06157143696047918, + "grad_norm": 0.0, + "learning_rate": 1.9947792426314888e-05, + "loss": 1.5898, + "step": 2097 + }, + { + "epoch": 0.06160079863761818, + "grad_norm": 0.0, + "learning_rate": 1.994769533556923e-05, + "loss": 1.4883, + "step": 2098 + }, + { + "epoch": 0.06163016031475718, + "grad_norm": 0.0, + "learning_rate": 1.994759815486402e-05, + "loss": 1.3838, + "step": 2099 + }, + { + "epoch": 0.06165952199189618, + "grad_norm": 0.0, + "learning_rate": 1.9947500884200125e-05, + "loss": 1.5049, + "step": 2100 + }, + { + "epoch": 0.061688883669035174, + "grad_norm": 0.0, + "learning_rate": 1.994740352357844e-05, + "loss": 1.4365, + "step": 2101 + }, + { + "epoch": 0.06171824534617417, + "grad_norm": 0.0, + "learning_rate": 1.994730607299983e-05, + "loss": 1.4482, + "step": 2102 + }, + { + "epoch": 0.061747607023313174, + "grad_norm": 0.0, + "learning_rate": 1.9947208532465188e-05, + "loss": 1.6191, + "step": 2103 + }, + { + "epoch": 0.06177696870045217, + "grad_norm": 0.0, + "learning_rate": 1.9947110901975392e-05, + "loss": 1.5352, + "step": 2104 + }, + { + "epoch": 0.061806330377591166, + "grad_norm": 0.0, + "learning_rate": 1.9947013181531323e-05, + "loss": 1.5537, + "step": 2105 + }, + { + "epoch": 0.06183569205473017, + "grad_norm": 0.0, + "learning_rate": 1.9946915371133867e-05, + "loss": 1.3799, + "step": 2106 + }, + { + "epoch": 0.061865053731869166, + "grad_norm": 0.0, + "learning_rate": 1.9946817470783907e-05, + "loss": 1.4795, + "step": 2107 + }, + { + "epoch": 0.06189441540900816, + "grad_norm": 0.0, + "learning_rate": 1.994671948048233e-05, + "loss": 1.5488, + "step": 2108 + }, + { + "epoch": 0.06192377708614716, + "grad_norm": 0.0, + "learning_rate": 1.994662140023002e-05, + "loss": 1.5195, + "step": 2109 + }, + { + "epoch": 0.06195313876328616, + "grad_norm": 0.0, + "learning_rate": 1.994652323002787e-05, + "loss": 1.5801, + "step": 2110 + }, + { + "epoch": 0.06198250044042516, + "grad_norm": 0.0, + "learning_rate": 1.9946424969876758e-05, + "loss": 1.4824, + "step": 2111 + }, + { + "epoch": 0.062011862117564154, + "grad_norm": 0.0, + "learning_rate": 1.994632661977758e-05, + "loss": 1.4932, + "step": 2112 + }, + { + "epoch": 0.06204122379470315, + "grad_norm": 0.0, + "learning_rate": 1.9946228179731222e-05, + "loss": 1.46, + "step": 2113 + }, + { + "epoch": 0.06207058547184215, + "grad_norm": 0.0, + "learning_rate": 1.9946129649738575e-05, + "loss": 1.6055, + "step": 2114 + }, + { + "epoch": 0.06209994714898115, + "grad_norm": 0.0, + "learning_rate": 1.9946031029800533e-05, + "loss": 1.4199, + "step": 2115 + }, + { + "epoch": 0.062129308826120146, + "grad_norm": 0.0, + "learning_rate": 1.994593231991798e-05, + "loss": 1.5117, + "step": 2116 + }, + { + "epoch": 0.06215867050325915, + "grad_norm": 0.0, + "learning_rate": 1.994583352009182e-05, + "loss": 1.5928, + "step": 2117 + }, + { + "epoch": 0.062188032180398145, + "grad_norm": 0.0, + "learning_rate": 1.9945734630322936e-05, + "loss": 1.4551, + "step": 2118 + }, + { + "epoch": 0.06221739385753714, + "grad_norm": 0.0, + "learning_rate": 1.9945635650612227e-05, + "loss": 1.3994, + "step": 2119 + }, + { + "epoch": 0.06224675553467614, + "grad_norm": 0.0, + "learning_rate": 1.994553658096059e-05, + "loss": 1.5234, + "step": 2120 + }, + { + "epoch": 0.06227611721181514, + "grad_norm": 0.0, + "learning_rate": 1.9945437421368915e-05, + "loss": 1.5693, + "step": 2121 + }, + { + "epoch": 0.06230547888895414, + "grad_norm": 0.0, + "learning_rate": 1.9945338171838107e-05, + "loss": 1.6299, + "step": 2122 + }, + { + "epoch": 0.06233484056609313, + "grad_norm": 0.0, + "learning_rate": 1.9945238832369056e-05, + "loss": 1.4863, + "step": 2123 + }, + { + "epoch": 0.062364202243232136, + "grad_norm": 0.0, + "learning_rate": 1.9945139402962663e-05, + "loss": 1.376, + "step": 2124 + }, + { + "epoch": 0.06239356392037113, + "grad_norm": 0.0, + "learning_rate": 1.9945039883619827e-05, + "loss": 1.582, + "step": 2125 + }, + { + "epoch": 0.06242292559751013, + "grad_norm": 0.0, + "learning_rate": 1.9944940274341448e-05, + "loss": 1.5283, + "step": 2126 + }, + { + "epoch": 0.062452287274649125, + "grad_norm": 0.0, + "learning_rate": 1.994484057512843e-05, + "loss": 1.4824, + "step": 2127 + }, + { + "epoch": 0.06248164895178813, + "grad_norm": 0.0, + "learning_rate": 1.994474078598167e-05, + "loss": 1.4043, + "step": 2128 + }, + { + "epoch": 0.06251101062892712, + "grad_norm": 0.0, + "learning_rate": 1.9944640906902074e-05, + "loss": 1.5244, + "step": 2129 + }, + { + "epoch": 0.06254037230606613, + "grad_norm": 0.0, + "learning_rate": 1.994454093789054e-05, + "loss": 1.4912, + "step": 2130 + }, + { + "epoch": 0.06256973398320512, + "grad_norm": 0.0, + "learning_rate": 1.994444087894798e-05, + "loss": 1.6113, + "step": 2131 + }, + { + "epoch": 0.06259909566034412, + "grad_norm": 0.0, + "learning_rate": 1.9944340730075293e-05, + "loss": 1.5332, + "step": 2132 + }, + { + "epoch": 0.06262845733748312, + "grad_norm": 0.0, + "learning_rate": 1.9944240491273384e-05, + "loss": 1.4258, + "step": 2133 + }, + { + "epoch": 0.06265781901462211, + "grad_norm": 0.0, + "learning_rate": 1.994414016254316e-05, + "loss": 1.4346, + "step": 2134 + }, + { + "epoch": 0.06268718069176112, + "grad_norm": 0.0, + "learning_rate": 1.994403974388553e-05, + "loss": 1.4766, + "step": 2135 + }, + { + "epoch": 0.0627165423689001, + "grad_norm": 0.0, + "learning_rate": 1.9943939235301406e-05, + "loss": 1.3301, + "step": 2136 + }, + { + "epoch": 0.06274590404603911, + "grad_norm": 0.0, + "learning_rate": 1.994383863679169e-05, + "loss": 1.5059, + "step": 2137 + }, + { + "epoch": 0.06277526572317811, + "grad_norm": 0.0, + "learning_rate": 1.9943737948357296e-05, + "loss": 1.4922, + "step": 2138 + }, + { + "epoch": 0.0628046274003171, + "grad_norm": 0.0, + "learning_rate": 1.9943637169999134e-05, + "loss": 1.418, + "step": 2139 + }, + { + "epoch": 0.0628339890774561, + "grad_norm": 0.0, + "learning_rate": 1.994353630171811e-05, + "loss": 1.5215, + "step": 2140 + }, + { + "epoch": 0.06286335075459511, + "grad_norm": 0.0, + "learning_rate": 1.9943435343515145e-05, + "loss": 1.5898, + "step": 2141 + }, + { + "epoch": 0.0628927124317341, + "grad_norm": 0.0, + "learning_rate": 1.9943334295391145e-05, + "loss": 1.5283, + "step": 2142 + }, + { + "epoch": 0.0629220741088731, + "grad_norm": 0.0, + "learning_rate": 1.994323315734703e-05, + "loss": 1.4795, + "step": 2143 + }, + { + "epoch": 0.0629514357860121, + "grad_norm": 0.0, + "learning_rate": 1.9943131929383707e-05, + "loss": 1.4619, + "step": 2144 + }, + { + "epoch": 0.06298079746315109, + "grad_norm": 0.0, + "learning_rate": 1.9943030611502098e-05, + "loss": 1.3604, + "step": 2145 + }, + { + "epoch": 0.0630101591402901, + "grad_norm": 0.0, + "learning_rate": 1.9942929203703114e-05, + "loss": 1.4902, + "step": 2146 + }, + { + "epoch": 0.0630395208174291, + "grad_norm": 0.0, + "learning_rate": 1.9942827705987676e-05, + "loss": 1.5781, + "step": 2147 + }, + { + "epoch": 0.06306888249456809, + "grad_norm": 0.0, + "learning_rate": 1.9942726118356702e-05, + "loss": 1.4688, + "step": 2148 + }, + { + "epoch": 0.06309824417170709, + "grad_norm": 0.0, + "learning_rate": 1.9942624440811108e-05, + "loss": 1.4551, + "step": 2149 + }, + { + "epoch": 0.06312760584884608, + "grad_norm": 0.0, + "learning_rate": 1.9942522673351817e-05, + "loss": 1.416, + "step": 2150 + }, + { + "epoch": 0.06315696752598508, + "grad_norm": 0.0, + "learning_rate": 1.9942420815979746e-05, + "loss": 1.4229, + "step": 2151 + }, + { + "epoch": 0.06318632920312409, + "grad_norm": 0.0, + "learning_rate": 1.9942318868695815e-05, + "loss": 1.3584, + "step": 2152 + }, + { + "epoch": 0.06321569088026308, + "grad_norm": 0.0, + "learning_rate": 1.994221683150095e-05, + "loss": 1.5322, + "step": 2153 + }, + { + "epoch": 0.06324505255740208, + "grad_norm": 0.0, + "learning_rate": 1.9942114704396073e-05, + "loss": 1.4902, + "step": 2154 + }, + { + "epoch": 0.06327441423454108, + "grad_norm": 0.0, + "learning_rate": 1.9942012487382107e-05, + "loss": 1.4912, + "step": 2155 + }, + { + "epoch": 0.06330377591168007, + "grad_norm": 0.0, + "learning_rate": 1.994191018045997e-05, + "loss": 1.4932, + "step": 2156 + }, + { + "epoch": 0.06333313758881907, + "grad_norm": 0.0, + "learning_rate": 1.99418077836306e-05, + "loss": 1.5908, + "step": 2157 + }, + { + "epoch": 0.06336249926595808, + "grad_norm": 0.0, + "learning_rate": 1.9941705296894914e-05, + "loss": 1.4785, + "step": 2158 + }, + { + "epoch": 0.06339186094309707, + "grad_norm": 0.0, + "learning_rate": 1.994160272025384e-05, + "loss": 1.4785, + "step": 2159 + }, + { + "epoch": 0.06342122262023607, + "grad_norm": 0.0, + "learning_rate": 1.9941500053708308e-05, + "loss": 1.5674, + "step": 2160 + }, + { + "epoch": 0.06345058429737506, + "grad_norm": 0.0, + "learning_rate": 1.9941397297259244e-05, + "loss": 1.5557, + "step": 2161 + }, + { + "epoch": 0.06347994597451406, + "grad_norm": 0.0, + "learning_rate": 1.9941294450907576e-05, + "loss": 1.542, + "step": 2162 + }, + { + "epoch": 0.06350930765165307, + "grad_norm": 0.0, + "learning_rate": 1.9941191514654237e-05, + "loss": 1.415, + "step": 2163 + }, + { + "epoch": 0.06353866932879206, + "grad_norm": 0.0, + "learning_rate": 1.994108848850016e-05, + "loss": 1.5146, + "step": 2164 + }, + { + "epoch": 0.06356803100593106, + "grad_norm": 0.0, + "learning_rate": 1.9940985372446272e-05, + "loss": 1.4854, + "step": 2165 + }, + { + "epoch": 0.06359739268307006, + "grad_norm": 0.0, + "learning_rate": 1.9940882166493506e-05, + "loss": 1.6074, + "step": 2166 + }, + { + "epoch": 0.06362675436020905, + "grad_norm": 0.0, + "learning_rate": 1.9940778870642802e-05, + "loss": 1.4395, + "step": 2167 + }, + { + "epoch": 0.06365611603734805, + "grad_norm": 0.0, + "learning_rate": 1.9940675484895085e-05, + "loss": 1.4141, + "step": 2168 + }, + { + "epoch": 0.06368547771448706, + "grad_norm": 0.0, + "learning_rate": 1.9940572009251294e-05, + "loss": 1.4775, + "step": 2169 + }, + { + "epoch": 0.06371483939162605, + "grad_norm": 0.0, + "learning_rate": 1.9940468443712364e-05, + "loss": 1.6709, + "step": 2170 + }, + { + "epoch": 0.06374420106876505, + "grad_norm": 0.0, + "learning_rate": 1.9940364788279233e-05, + "loss": 1.6064, + "step": 2171 + }, + { + "epoch": 0.06377356274590405, + "grad_norm": 0.0, + "learning_rate": 1.9940261042952837e-05, + "loss": 1.4883, + "step": 2172 + }, + { + "epoch": 0.06380292442304304, + "grad_norm": 0.0, + "learning_rate": 1.9940157207734114e-05, + "loss": 1.498, + "step": 2173 + }, + { + "epoch": 0.06383228610018205, + "grad_norm": 0.0, + "learning_rate": 1.9940053282624005e-05, + "loss": 1.5098, + "step": 2174 + }, + { + "epoch": 0.06386164777732103, + "grad_norm": 0.0, + "learning_rate": 1.9939949267623447e-05, + "loss": 1.5254, + "step": 2175 + }, + { + "epoch": 0.06389100945446004, + "grad_norm": 0.0, + "learning_rate": 1.993984516273338e-05, + "loss": 1.4473, + "step": 2176 + }, + { + "epoch": 0.06392037113159904, + "grad_norm": 0.0, + "learning_rate": 1.9939740967954755e-05, + "loss": 1.5801, + "step": 2177 + }, + { + "epoch": 0.06394973280873803, + "grad_norm": 0.0, + "learning_rate": 1.9939636683288502e-05, + "loss": 1.5352, + "step": 2178 + }, + { + "epoch": 0.06397909448587703, + "grad_norm": 0.0, + "learning_rate": 1.9939532308735566e-05, + "loss": 1.3955, + "step": 2179 + }, + { + "epoch": 0.06400845616301604, + "grad_norm": 0.0, + "learning_rate": 1.99394278442969e-05, + "loss": 1.5498, + "step": 2180 + }, + { + "epoch": 0.06403781784015503, + "grad_norm": 0.0, + "learning_rate": 1.993932328997344e-05, + "loss": 1.5967, + "step": 2181 + }, + { + "epoch": 0.06406717951729403, + "grad_norm": 0.0, + "learning_rate": 1.993921864576613e-05, + "loss": 1.4238, + "step": 2182 + }, + { + "epoch": 0.06409654119443303, + "grad_norm": 0.0, + "learning_rate": 1.9939113911675926e-05, + "loss": 1.4502, + "step": 2183 + }, + { + "epoch": 0.06412590287157202, + "grad_norm": 0.0, + "learning_rate": 1.9939009087703768e-05, + "loss": 1.3672, + "step": 2184 + }, + { + "epoch": 0.06415526454871102, + "grad_norm": 0.0, + "learning_rate": 1.9938904173850603e-05, + "loss": 1.4619, + "step": 2185 + }, + { + "epoch": 0.06418462622585001, + "grad_norm": 0.0, + "learning_rate": 1.9938799170117386e-05, + "loss": 1.6084, + "step": 2186 + }, + { + "epoch": 0.06421398790298902, + "grad_norm": 0.0, + "learning_rate": 1.993869407650506e-05, + "loss": 1.3232, + "step": 2187 + }, + { + "epoch": 0.06424334958012802, + "grad_norm": 0.0, + "learning_rate": 1.9938588893014578e-05, + "loss": 1.6865, + "step": 2188 + }, + { + "epoch": 0.06427271125726701, + "grad_norm": 0.0, + "learning_rate": 1.993848361964689e-05, + "loss": 1.5352, + "step": 2189 + }, + { + "epoch": 0.06430207293440601, + "grad_norm": 0.0, + "learning_rate": 1.993837825640295e-05, + "loss": 1.4785, + "step": 2190 + }, + { + "epoch": 0.06433143461154502, + "grad_norm": 0.0, + "learning_rate": 1.9938272803283714e-05, + "loss": 1.5, + "step": 2191 + }, + { + "epoch": 0.064360796288684, + "grad_norm": 0.0, + "learning_rate": 1.993816726029013e-05, + "loss": 1.4863, + "step": 2192 + }, + { + "epoch": 0.06439015796582301, + "grad_norm": 0.0, + "learning_rate": 1.993806162742315e-05, + "loss": 1.5342, + "step": 2193 + }, + { + "epoch": 0.06441951964296201, + "grad_norm": 0.0, + "learning_rate": 1.993795590468374e-05, + "loss": 1.54, + "step": 2194 + }, + { + "epoch": 0.064448881320101, + "grad_norm": 0.0, + "learning_rate": 1.9937850092072845e-05, + "loss": 1.4834, + "step": 2195 + }, + { + "epoch": 0.06447824299724, + "grad_norm": 0.0, + "learning_rate": 1.993774418959143e-05, + "loss": 1.4375, + "step": 2196 + }, + { + "epoch": 0.06450760467437901, + "grad_norm": 0.0, + "learning_rate": 1.993763819724045e-05, + "loss": 1.5771, + "step": 2197 + }, + { + "epoch": 0.064536966351518, + "grad_norm": 0.0, + "learning_rate": 1.9937532115020858e-05, + "loss": 1.6016, + "step": 2198 + }, + { + "epoch": 0.064566328028657, + "grad_norm": 0.0, + "learning_rate": 1.993742594293362e-05, + "loss": 1.3984, + "step": 2199 + }, + { + "epoch": 0.06459568970579599, + "grad_norm": 0.0, + "learning_rate": 1.9937319680979695e-05, + "loss": 1.501, + "step": 2200 + }, + { + "epoch": 0.06462505138293499, + "grad_norm": 0.0, + "learning_rate": 1.993721332916004e-05, + "loss": 1.5342, + "step": 2201 + }, + { + "epoch": 0.064654413060074, + "grad_norm": 0.0, + "learning_rate": 1.9937106887475622e-05, + "loss": 1.4023, + "step": 2202 + }, + { + "epoch": 0.06468377473721298, + "grad_norm": 0.0, + "learning_rate": 1.9937000355927404e-05, + "loss": 1.5713, + "step": 2203 + }, + { + "epoch": 0.06471313641435199, + "grad_norm": 0.0, + "learning_rate": 1.9936893734516346e-05, + "loss": 1.4922, + "step": 2204 + }, + { + "epoch": 0.06474249809149099, + "grad_norm": 0.0, + "learning_rate": 1.993678702324341e-05, + "loss": 1.6201, + "step": 2205 + }, + { + "epoch": 0.06477185976862998, + "grad_norm": 0.0, + "learning_rate": 1.9936680222109563e-05, + "loss": 1.4414, + "step": 2206 + }, + { + "epoch": 0.06480122144576898, + "grad_norm": 0.0, + "learning_rate": 1.9936573331115778e-05, + "loss": 1.5205, + "step": 2207 + }, + { + "epoch": 0.06483058312290799, + "grad_norm": 0.0, + "learning_rate": 1.993646635026301e-05, + "loss": 1.5654, + "step": 2208 + }, + { + "epoch": 0.06485994480004698, + "grad_norm": 0.0, + "learning_rate": 1.9936359279552235e-05, + "loss": 1.5078, + "step": 2209 + }, + { + "epoch": 0.06488930647718598, + "grad_norm": 0.0, + "learning_rate": 1.993625211898442e-05, + "loss": 1.5049, + "step": 2210 + }, + { + "epoch": 0.06491866815432497, + "grad_norm": 0.0, + "learning_rate": 1.993614486856053e-05, + "loss": 1.707, + "step": 2211 + }, + { + "epoch": 0.06494802983146397, + "grad_norm": 0.0, + "learning_rate": 1.9936037528281535e-05, + "loss": 1.5986, + "step": 2212 + }, + { + "epoch": 0.06497739150860297, + "grad_norm": 0.0, + "learning_rate": 1.993593009814841e-05, + "loss": 1.5928, + "step": 2213 + }, + { + "epoch": 0.06500675318574196, + "grad_norm": 0.0, + "learning_rate": 1.9935822578162125e-05, + "loss": 1.6191, + "step": 2214 + }, + { + "epoch": 0.06503611486288097, + "grad_norm": 0.0, + "learning_rate": 1.9935714968323655e-05, + "loss": 1.4766, + "step": 2215 + }, + { + "epoch": 0.06506547654001997, + "grad_norm": 0.0, + "learning_rate": 1.9935607268633963e-05, + "loss": 1.541, + "step": 2216 + }, + { + "epoch": 0.06509483821715896, + "grad_norm": 0.0, + "learning_rate": 1.9935499479094035e-05, + "loss": 1.543, + "step": 2217 + }, + { + "epoch": 0.06512419989429796, + "grad_norm": 0.0, + "learning_rate": 1.993539159970484e-05, + "loss": 1.5342, + "step": 2218 + }, + { + "epoch": 0.06515356157143697, + "grad_norm": 0.0, + "learning_rate": 1.9935283630467353e-05, + "loss": 1.5605, + "step": 2219 + }, + { + "epoch": 0.06518292324857596, + "grad_norm": 0.0, + "learning_rate": 1.993517557138255e-05, + "loss": 1.5742, + "step": 2220 + }, + { + "epoch": 0.06521228492571496, + "grad_norm": 0.0, + "learning_rate": 1.9935067422451416e-05, + "loss": 1.3867, + "step": 2221 + }, + { + "epoch": 0.06524164660285396, + "grad_norm": 0.0, + "learning_rate": 1.9934959183674916e-05, + "loss": 1.4561, + "step": 2222 + }, + { + "epoch": 0.06527100827999295, + "grad_norm": 0.0, + "learning_rate": 1.993485085505404e-05, + "loss": 1.625, + "step": 2223 + }, + { + "epoch": 0.06530036995713195, + "grad_norm": 0.0, + "learning_rate": 1.9934742436589762e-05, + "loss": 1.4746, + "step": 2224 + }, + { + "epoch": 0.06532973163427094, + "grad_norm": 0.0, + "learning_rate": 1.9934633928283062e-05, + "loss": 1.6309, + "step": 2225 + }, + { + "epoch": 0.06535909331140995, + "grad_norm": 0.0, + "learning_rate": 1.9934525330134925e-05, + "loss": 1.5205, + "step": 2226 + }, + { + "epoch": 0.06538845498854895, + "grad_norm": 0.0, + "learning_rate": 1.993441664214633e-05, + "loss": 1.4082, + "step": 2227 + }, + { + "epoch": 0.06541781666568794, + "grad_norm": 0.0, + "learning_rate": 1.9934307864318266e-05, + "loss": 1.418, + "step": 2228 + }, + { + "epoch": 0.06544717834282694, + "grad_norm": 0.0, + "learning_rate": 1.9934198996651706e-05, + "loss": 1.4756, + "step": 2229 + }, + { + "epoch": 0.06547654001996595, + "grad_norm": 0.0, + "learning_rate": 1.9934090039147642e-05, + "loss": 1.5176, + "step": 2230 + }, + { + "epoch": 0.06550590169710493, + "grad_norm": 0.0, + "learning_rate": 1.9933980991807057e-05, + "loss": 1.46, + "step": 2231 + }, + { + "epoch": 0.06553526337424394, + "grad_norm": 0.0, + "learning_rate": 1.993387185463094e-05, + "loss": 1.6367, + "step": 2232 + }, + { + "epoch": 0.06556462505138294, + "grad_norm": 0.0, + "learning_rate": 1.9933762627620275e-05, + "loss": 1.6123, + "step": 2233 + }, + { + "epoch": 0.06559398672852193, + "grad_norm": 0.0, + "learning_rate": 1.993365331077605e-05, + "loss": 1.5049, + "step": 2234 + }, + { + "epoch": 0.06562334840566093, + "grad_norm": 0.0, + "learning_rate": 1.9933543904099253e-05, + "loss": 1.6094, + "step": 2235 + }, + { + "epoch": 0.06565271008279992, + "grad_norm": 0.0, + "learning_rate": 1.9933434407590876e-05, + "loss": 1.4189, + "step": 2236 + }, + { + "epoch": 0.06568207175993893, + "grad_norm": 0.0, + "learning_rate": 1.9933324821251907e-05, + "loss": 1.5898, + "step": 2237 + }, + { + "epoch": 0.06571143343707793, + "grad_norm": 0.0, + "learning_rate": 1.993321514508334e-05, + "loss": 1.4482, + "step": 2238 + }, + { + "epoch": 0.06574079511421692, + "grad_norm": 0.0, + "learning_rate": 1.9933105379086158e-05, + "loss": 1.3779, + "step": 2239 + }, + { + "epoch": 0.06577015679135592, + "grad_norm": 0.0, + "learning_rate": 1.9932995523261366e-05, + "loss": 1.4121, + "step": 2240 + }, + { + "epoch": 0.06579951846849492, + "grad_norm": 0.0, + "learning_rate": 1.993288557760995e-05, + "loss": 1.5986, + "step": 2241 + }, + { + "epoch": 0.06582888014563391, + "grad_norm": 0.0, + "learning_rate": 1.9932775542132906e-05, + "loss": 1.4404, + "step": 2242 + }, + { + "epoch": 0.06585824182277292, + "grad_norm": 0.0, + "learning_rate": 1.9932665416831226e-05, + "loss": 1.5791, + "step": 2243 + }, + { + "epoch": 0.06588760349991192, + "grad_norm": 0.0, + "learning_rate": 1.993255520170591e-05, + "loss": 1.5117, + "step": 2244 + }, + { + "epoch": 0.06591696517705091, + "grad_norm": 0.0, + "learning_rate": 1.9932444896757958e-05, + "loss": 1.4971, + "step": 2245 + }, + { + "epoch": 0.06594632685418991, + "grad_norm": 0.0, + "learning_rate": 1.9932334501988362e-05, + "loss": 1.4971, + "step": 2246 + }, + { + "epoch": 0.06597568853132892, + "grad_norm": 0.0, + "learning_rate": 1.9932224017398115e-05, + "loss": 1.4775, + "step": 2247 + }, + { + "epoch": 0.0660050502084679, + "grad_norm": 0.0, + "learning_rate": 1.993211344298823e-05, + "loss": 1.5547, + "step": 2248 + }, + { + "epoch": 0.06603441188560691, + "grad_norm": 0.0, + "learning_rate": 1.9932002778759697e-05, + "loss": 1.3896, + "step": 2249 + }, + { + "epoch": 0.0660637735627459, + "grad_norm": 0.0, + "learning_rate": 1.993189202471352e-05, + "loss": 1.4355, + "step": 2250 + }, + { + "epoch": 0.0660931352398849, + "grad_norm": 0.0, + "learning_rate": 1.9931781180850698e-05, + "loss": 1.5, + "step": 2251 + }, + { + "epoch": 0.0661224969170239, + "grad_norm": 0.0, + "learning_rate": 1.9931670247172236e-05, + "loss": 1.4385, + "step": 2252 + }, + { + "epoch": 0.0661518585941629, + "grad_norm": 0.0, + "learning_rate": 1.9931559223679136e-05, + "loss": 1.5537, + "step": 2253 + }, + { + "epoch": 0.0661812202713019, + "grad_norm": 0.0, + "learning_rate": 1.9931448110372403e-05, + "loss": 1.6475, + "step": 2254 + }, + { + "epoch": 0.0662105819484409, + "grad_norm": 0.0, + "learning_rate": 1.993133690725304e-05, + "loss": 1.6035, + "step": 2255 + }, + { + "epoch": 0.06623994362557989, + "grad_norm": 0.0, + "learning_rate": 1.9931225614322054e-05, + "loss": 1.5195, + "step": 2256 + }, + { + "epoch": 0.06626930530271889, + "grad_norm": 0.0, + "learning_rate": 1.9931114231580452e-05, + "loss": 1.707, + "step": 2257 + }, + { + "epoch": 0.0662986669798579, + "grad_norm": 0.0, + "learning_rate": 1.9931002759029244e-05, + "loss": 1.4922, + "step": 2258 + }, + { + "epoch": 0.06632802865699688, + "grad_norm": 0.0, + "learning_rate": 1.993089119666943e-05, + "loss": 1.5332, + "step": 2259 + }, + { + "epoch": 0.06635739033413589, + "grad_norm": 0.0, + "learning_rate": 1.9930779544502026e-05, + "loss": 1.5127, + "step": 2260 + }, + { + "epoch": 0.06638675201127488, + "grad_norm": 0.0, + "learning_rate": 1.9930667802528036e-05, + "loss": 1.4619, + "step": 2261 + }, + { + "epoch": 0.06641611368841388, + "grad_norm": 0.0, + "learning_rate": 1.9930555970748474e-05, + "loss": 1.6094, + "step": 2262 + }, + { + "epoch": 0.06644547536555288, + "grad_norm": 0.0, + "learning_rate": 1.9930444049164356e-05, + "loss": 1.5889, + "step": 2263 + }, + { + "epoch": 0.06647483704269187, + "grad_norm": 0.0, + "learning_rate": 1.9930332037776686e-05, + "loss": 1.4424, + "step": 2264 + }, + { + "epoch": 0.06650419871983088, + "grad_norm": 0.0, + "learning_rate": 1.993021993658648e-05, + "loss": 1.4795, + "step": 2265 + }, + { + "epoch": 0.06653356039696988, + "grad_norm": 0.0, + "learning_rate": 1.9930107745594755e-05, + "loss": 1.6328, + "step": 2266 + }, + { + "epoch": 0.06656292207410887, + "grad_norm": 0.0, + "learning_rate": 1.992999546480252e-05, + "loss": 1.4941, + "step": 2267 + }, + { + "epoch": 0.06659228375124787, + "grad_norm": 0.0, + "learning_rate": 1.9929883094210793e-05, + "loss": 1.5098, + "step": 2268 + }, + { + "epoch": 0.06662164542838687, + "grad_norm": 0.0, + "learning_rate": 1.992977063382059e-05, + "loss": 1.5762, + "step": 2269 + }, + { + "epoch": 0.06665100710552586, + "grad_norm": 0.0, + "learning_rate": 1.9929658083632928e-05, + "loss": 1.4014, + "step": 2270 + }, + { + "epoch": 0.06668036878266487, + "grad_norm": 0.0, + "learning_rate": 1.9929545443648823e-05, + "loss": 1.4082, + "step": 2271 + }, + { + "epoch": 0.06670973045980387, + "grad_norm": 0.0, + "learning_rate": 1.99294327138693e-05, + "loss": 1.7002, + "step": 2272 + }, + { + "epoch": 0.06673909213694286, + "grad_norm": 0.0, + "learning_rate": 1.992931989429537e-05, + "loss": 1.4785, + "step": 2273 + }, + { + "epoch": 0.06676845381408186, + "grad_norm": 0.0, + "learning_rate": 1.9929206984928056e-05, + "loss": 1.499, + "step": 2274 + }, + { + "epoch": 0.06679781549122085, + "grad_norm": 0.0, + "learning_rate": 1.9929093985768385e-05, + "loss": 1.5996, + "step": 2275 + }, + { + "epoch": 0.06682717716835986, + "grad_norm": 0.0, + "learning_rate": 1.9928980896817374e-05, + "loss": 1.502, + "step": 2276 + }, + { + "epoch": 0.06685653884549886, + "grad_norm": 0.0, + "learning_rate": 1.9928867718076045e-05, + "loss": 1.5996, + "step": 2277 + }, + { + "epoch": 0.06688590052263785, + "grad_norm": 0.0, + "learning_rate": 1.992875444954542e-05, + "loss": 1.5977, + "step": 2278 + }, + { + "epoch": 0.06691526219977685, + "grad_norm": 0.0, + "learning_rate": 1.992864109122653e-05, + "loss": 1.4492, + "step": 2279 + }, + { + "epoch": 0.06694462387691585, + "grad_norm": 0.0, + "learning_rate": 1.992852764312039e-05, + "loss": 1.5664, + "step": 2280 + }, + { + "epoch": 0.06697398555405484, + "grad_norm": 0.0, + "learning_rate": 1.9928414105228037e-05, + "loss": 1.4404, + "step": 2281 + }, + { + "epoch": 0.06700334723119385, + "grad_norm": 0.0, + "learning_rate": 1.992830047755049e-05, + "loss": 1.5381, + "step": 2282 + }, + { + "epoch": 0.06703270890833285, + "grad_norm": 0.0, + "learning_rate": 1.992818676008878e-05, + "loss": 1.6992, + "step": 2283 + }, + { + "epoch": 0.06706207058547184, + "grad_norm": 0.0, + "learning_rate": 1.9928072952843937e-05, + "loss": 1.3208, + "step": 2284 + }, + { + "epoch": 0.06709143226261084, + "grad_norm": 0.0, + "learning_rate": 1.9927959055816982e-05, + "loss": 1.4961, + "step": 2285 + }, + { + "epoch": 0.06712079393974985, + "grad_norm": 0.0, + "learning_rate": 1.9927845069008956e-05, + "loss": 1.5322, + "step": 2286 + }, + { + "epoch": 0.06715015561688883, + "grad_norm": 0.0, + "learning_rate": 1.9927730992420882e-05, + "loss": 1.4365, + "step": 2287 + }, + { + "epoch": 0.06717951729402784, + "grad_norm": 0.0, + "learning_rate": 1.9927616826053794e-05, + "loss": 1.5508, + "step": 2288 + }, + { + "epoch": 0.06720887897116683, + "grad_norm": 0.0, + "learning_rate": 1.9927502569908725e-05, + "loss": 1.6465, + "step": 2289 + }, + { + "epoch": 0.06723824064830583, + "grad_norm": 0.0, + "learning_rate": 1.9927388223986708e-05, + "loss": 1.4277, + "step": 2290 + }, + { + "epoch": 0.06726760232544483, + "grad_norm": 0.0, + "learning_rate": 1.9927273788288774e-05, + "loss": 1.4883, + "step": 2291 + }, + { + "epoch": 0.06729696400258382, + "grad_norm": 0.0, + "learning_rate": 1.9927159262815966e-05, + "loss": 1.5088, + "step": 2292 + }, + { + "epoch": 0.06732632567972283, + "grad_norm": 0.0, + "learning_rate": 1.992704464756931e-05, + "loss": 1.5811, + "step": 2293 + }, + { + "epoch": 0.06735568735686183, + "grad_norm": 0.0, + "learning_rate": 1.9926929942549847e-05, + "loss": 1.415, + "step": 2294 + }, + { + "epoch": 0.06738504903400082, + "grad_norm": 0.0, + "learning_rate": 1.9926815147758617e-05, + "loss": 1.5186, + "step": 2295 + }, + { + "epoch": 0.06741441071113982, + "grad_norm": 0.0, + "learning_rate": 1.992670026319665e-05, + "loss": 1.4678, + "step": 2296 + }, + { + "epoch": 0.06744377238827882, + "grad_norm": 0.0, + "learning_rate": 1.9926585288864997e-05, + "loss": 1.5566, + "step": 2297 + }, + { + "epoch": 0.06747313406541781, + "grad_norm": 0.0, + "learning_rate": 1.992647022476469e-05, + "loss": 1.4854, + "step": 2298 + }, + { + "epoch": 0.06750249574255682, + "grad_norm": 0.0, + "learning_rate": 1.9926355070896766e-05, + "loss": 1.5293, + "step": 2299 + }, + { + "epoch": 0.0675318574196958, + "grad_norm": 0.0, + "learning_rate": 1.9926239827262276e-05, + "loss": 1.4805, + "step": 2300 + }, + { + "epoch": 0.06756121909683481, + "grad_norm": 0.0, + "learning_rate": 1.9926124493862253e-05, + "loss": 1.417, + "step": 2301 + }, + { + "epoch": 0.06759058077397381, + "grad_norm": 0.0, + "learning_rate": 1.9926009070697744e-05, + "loss": 1.5156, + "step": 2302 + }, + { + "epoch": 0.0676199424511128, + "grad_norm": 0.0, + "learning_rate": 1.99258935577698e-05, + "loss": 1.6172, + "step": 2303 + }, + { + "epoch": 0.0676493041282518, + "grad_norm": 0.0, + "learning_rate": 1.9925777955079452e-05, + "loss": 1.4893, + "step": 2304 + }, + { + "epoch": 0.06767866580539081, + "grad_norm": 0.0, + "learning_rate": 1.9925662262627753e-05, + "loss": 1.4473, + "step": 2305 + }, + { + "epoch": 0.0677080274825298, + "grad_norm": 0.0, + "learning_rate": 1.9925546480415746e-05, + "loss": 1.3926, + "step": 2306 + }, + { + "epoch": 0.0677373891596688, + "grad_norm": 0.0, + "learning_rate": 1.9925430608444484e-05, + "loss": 1.4922, + "step": 2307 + }, + { + "epoch": 0.0677667508368078, + "grad_norm": 0.0, + "learning_rate": 1.992531464671501e-05, + "loss": 1.5186, + "step": 2308 + }, + { + "epoch": 0.0677961125139468, + "grad_norm": 0.0, + "learning_rate": 1.9925198595228373e-05, + "loss": 1.5361, + "step": 2309 + }, + { + "epoch": 0.0678254741910858, + "grad_norm": 0.0, + "learning_rate": 1.9925082453985622e-05, + "loss": 1.5391, + "step": 2310 + }, + { + "epoch": 0.0678548358682248, + "grad_norm": 0.0, + "learning_rate": 1.992496622298781e-05, + "loss": 1.3887, + "step": 2311 + }, + { + "epoch": 0.06788419754536379, + "grad_norm": 0.0, + "learning_rate": 1.992484990223599e-05, + "loss": 1.5771, + "step": 2312 + }, + { + "epoch": 0.06791355922250279, + "grad_norm": 0.0, + "learning_rate": 1.9924733491731204e-05, + "loss": 1.498, + "step": 2313 + }, + { + "epoch": 0.06794292089964178, + "grad_norm": 0.0, + "learning_rate": 1.9924616991474514e-05, + "loss": 1.5176, + "step": 2314 + }, + { + "epoch": 0.06797228257678078, + "grad_norm": 0.0, + "learning_rate": 1.992450040146697e-05, + "loss": 1.5527, + "step": 2315 + }, + { + "epoch": 0.06800164425391979, + "grad_norm": 0.0, + "learning_rate": 1.992438372170963e-05, + "loss": 1.6055, + "step": 2316 + }, + { + "epoch": 0.06803100593105878, + "grad_norm": 0.0, + "learning_rate": 1.9924266952203545e-05, + "loss": 1.5254, + "step": 2317 + }, + { + "epoch": 0.06806036760819778, + "grad_norm": 0.0, + "learning_rate": 1.992415009294977e-05, + "loss": 1.4482, + "step": 2318 + }, + { + "epoch": 0.06808972928533678, + "grad_norm": 0.0, + "learning_rate": 1.9924033143949368e-05, + "loss": 1.4854, + "step": 2319 + }, + { + "epoch": 0.06811909096247577, + "grad_norm": 0.0, + "learning_rate": 1.992391610520339e-05, + "loss": 1.542, + "step": 2320 + }, + { + "epoch": 0.06814845263961478, + "grad_norm": 0.0, + "learning_rate": 1.99237989767129e-05, + "loss": 1.4312, + "step": 2321 + }, + { + "epoch": 0.06817781431675378, + "grad_norm": 0.0, + "learning_rate": 1.992368175847895e-05, + "loss": 1.4355, + "step": 2322 + }, + { + "epoch": 0.06820717599389277, + "grad_norm": 0.0, + "learning_rate": 1.992356445050261e-05, + "loss": 1.5703, + "step": 2323 + }, + { + "epoch": 0.06823653767103177, + "grad_norm": 0.0, + "learning_rate": 1.9923447052784933e-05, + "loss": 1.5537, + "step": 2324 + }, + { + "epoch": 0.06826589934817076, + "grad_norm": 0.0, + "learning_rate": 1.992332956532698e-05, + "loss": 1.4746, + "step": 2325 + }, + { + "epoch": 0.06829526102530976, + "grad_norm": 0.0, + "learning_rate": 1.9923211988129818e-05, + "loss": 1.5127, + "step": 2326 + }, + { + "epoch": 0.06832462270244877, + "grad_norm": 0.0, + "learning_rate": 1.9923094321194507e-05, + "loss": 1.4199, + "step": 2327 + }, + { + "epoch": 0.06835398437958776, + "grad_norm": 0.0, + "learning_rate": 1.9922976564522116e-05, + "loss": 1.4111, + "step": 2328 + }, + { + "epoch": 0.06838334605672676, + "grad_norm": 0.0, + "learning_rate": 1.9922858718113703e-05, + "loss": 1.542, + "step": 2329 + }, + { + "epoch": 0.06841270773386576, + "grad_norm": 0.0, + "learning_rate": 1.992274078197034e-05, + "loss": 1.3193, + "step": 2330 + }, + { + "epoch": 0.06844206941100475, + "grad_norm": 0.0, + "learning_rate": 1.992262275609309e-05, + "loss": 1.6123, + "step": 2331 + }, + { + "epoch": 0.06847143108814376, + "grad_norm": 0.0, + "learning_rate": 1.9922504640483023e-05, + "loss": 1.5146, + "step": 2332 + }, + { + "epoch": 0.06850079276528276, + "grad_norm": 0.0, + "learning_rate": 1.9922386435141204e-05, + "loss": 1.5469, + "step": 2333 + }, + { + "epoch": 0.06853015444242175, + "grad_norm": 0.0, + "learning_rate": 1.9922268140068702e-05, + "loss": 1.4883, + "step": 2334 + }, + { + "epoch": 0.06855951611956075, + "grad_norm": 0.0, + "learning_rate": 1.9922149755266588e-05, + "loss": 1.4912, + "step": 2335 + }, + { + "epoch": 0.06858887779669975, + "grad_norm": 0.0, + "learning_rate": 1.9922031280735936e-05, + "loss": 1.5127, + "step": 2336 + }, + { + "epoch": 0.06861823947383874, + "grad_norm": 0.0, + "learning_rate": 1.992191271647781e-05, + "loss": 1.5234, + "step": 2337 + }, + { + "epoch": 0.06864760115097775, + "grad_norm": 0.0, + "learning_rate": 1.9921794062493288e-05, + "loss": 1.4414, + "step": 2338 + }, + { + "epoch": 0.06867696282811674, + "grad_norm": 0.0, + "learning_rate": 1.9921675318783446e-05, + "loss": 1.4541, + "step": 2339 + }, + { + "epoch": 0.06870632450525574, + "grad_norm": 0.0, + "learning_rate": 1.9921556485349346e-05, + "loss": 1.4229, + "step": 2340 + }, + { + "epoch": 0.06873568618239474, + "grad_norm": 0.0, + "learning_rate": 1.992143756219207e-05, + "loss": 1.4854, + "step": 2341 + }, + { + "epoch": 0.06876504785953373, + "grad_norm": 0.0, + "learning_rate": 1.99213185493127e-05, + "loss": 1.5361, + "step": 2342 + }, + { + "epoch": 0.06879440953667273, + "grad_norm": 0.0, + "learning_rate": 1.9921199446712297e-05, + "loss": 1.6387, + "step": 2343 + }, + { + "epoch": 0.06882377121381174, + "grad_norm": 0.0, + "learning_rate": 1.9921080254391953e-05, + "loss": 1.4551, + "step": 2344 + }, + { + "epoch": 0.06885313289095073, + "grad_norm": 0.0, + "learning_rate": 1.9920960972352737e-05, + "loss": 1.5107, + "step": 2345 + }, + { + "epoch": 0.06888249456808973, + "grad_norm": 0.0, + "learning_rate": 1.992084160059573e-05, + "loss": 1.6582, + "step": 2346 + }, + { + "epoch": 0.06891185624522873, + "grad_norm": 0.0, + "learning_rate": 1.992072213912201e-05, + "loss": 1.5625, + "step": 2347 + }, + { + "epoch": 0.06894121792236772, + "grad_norm": 0.0, + "learning_rate": 1.992060258793266e-05, + "loss": 1.5322, + "step": 2348 + }, + { + "epoch": 0.06897057959950673, + "grad_norm": 0.0, + "learning_rate": 1.9920482947028762e-05, + "loss": 1.4092, + "step": 2349 + }, + { + "epoch": 0.06899994127664572, + "grad_norm": 0.0, + "learning_rate": 1.9920363216411394e-05, + "loss": 1.5811, + "step": 2350 + }, + { + "epoch": 0.06902930295378472, + "grad_norm": 0.0, + "learning_rate": 1.9920243396081643e-05, + "loss": 1.5342, + "step": 2351 + }, + { + "epoch": 0.06905866463092372, + "grad_norm": 0.0, + "learning_rate": 1.992012348604059e-05, + "loss": 1.4756, + "step": 2352 + }, + { + "epoch": 0.06908802630806271, + "grad_norm": 0.0, + "learning_rate": 1.9920003486289317e-05, + "loss": 1.5254, + "step": 2353 + }, + { + "epoch": 0.06911738798520171, + "grad_norm": 0.0, + "learning_rate": 1.9919883396828914e-05, + "loss": 1.3672, + "step": 2354 + }, + { + "epoch": 0.06914674966234072, + "grad_norm": 0.0, + "learning_rate": 1.9919763217660465e-05, + "loss": 1.5391, + "step": 2355 + }, + { + "epoch": 0.0691761113394797, + "grad_norm": 0.0, + "learning_rate": 1.9919642948785057e-05, + "loss": 1.4951, + "step": 2356 + }, + { + "epoch": 0.06920547301661871, + "grad_norm": 0.0, + "learning_rate": 1.9919522590203777e-05, + "loss": 1.4668, + "step": 2357 + }, + { + "epoch": 0.06923483469375771, + "grad_norm": 0.0, + "learning_rate": 1.991940214191771e-05, + "loss": 1.4639, + "step": 2358 + }, + { + "epoch": 0.0692641963708967, + "grad_norm": 0.0, + "learning_rate": 1.9919281603927955e-05, + "loss": 1.5859, + "step": 2359 + }, + { + "epoch": 0.0692935580480357, + "grad_norm": 0.0, + "learning_rate": 1.9919160976235593e-05, + "loss": 1.5986, + "step": 2360 + }, + { + "epoch": 0.06932291972517471, + "grad_norm": 0.0, + "learning_rate": 1.9919040258841718e-05, + "loss": 1.3906, + "step": 2361 + }, + { + "epoch": 0.0693522814023137, + "grad_norm": 0.0, + "learning_rate": 1.9918919451747423e-05, + "loss": 1.5322, + "step": 2362 + }, + { + "epoch": 0.0693816430794527, + "grad_norm": 0.0, + "learning_rate": 1.99187985549538e-05, + "loss": 1.5537, + "step": 2363 + }, + { + "epoch": 0.06941100475659169, + "grad_norm": 0.0, + "learning_rate": 1.9918677568461937e-05, + "loss": 1.3652, + "step": 2364 + }, + { + "epoch": 0.0694403664337307, + "grad_norm": 0.0, + "learning_rate": 1.9918556492272936e-05, + "loss": 1.6025, + "step": 2365 + }, + { + "epoch": 0.0694697281108697, + "grad_norm": 0.0, + "learning_rate": 1.9918435326387885e-05, + "loss": 1.6309, + "step": 2366 + }, + { + "epoch": 0.06949908978800869, + "grad_norm": 0.0, + "learning_rate": 1.9918314070807886e-05, + "loss": 1.54, + "step": 2367 + }, + { + "epoch": 0.06952845146514769, + "grad_norm": 0.0, + "learning_rate": 1.9918192725534036e-05, + "loss": 1.5664, + "step": 2368 + }, + { + "epoch": 0.06955781314228669, + "grad_norm": 0.0, + "learning_rate": 1.9918071290567425e-05, + "loss": 1.4375, + "step": 2369 + }, + { + "epoch": 0.06958717481942568, + "grad_norm": 0.0, + "learning_rate": 1.9917949765909155e-05, + "loss": 1.5293, + "step": 2370 + }, + { + "epoch": 0.06961653649656468, + "grad_norm": 0.0, + "learning_rate": 1.9917828151560328e-05, + "loss": 1.5381, + "step": 2371 + }, + { + "epoch": 0.06964589817370369, + "grad_norm": 0.0, + "learning_rate": 1.991770644752204e-05, + "loss": 1.5391, + "step": 2372 + }, + { + "epoch": 0.06967525985084268, + "grad_norm": 0.0, + "learning_rate": 1.991758465379539e-05, + "loss": 1.4111, + "step": 2373 + }, + { + "epoch": 0.06970462152798168, + "grad_norm": 0.0, + "learning_rate": 1.9917462770381483e-05, + "loss": 1.6787, + "step": 2374 + }, + { + "epoch": 0.06973398320512067, + "grad_norm": 0.0, + "learning_rate": 1.9917340797281423e-05, + "loss": 1.4854, + "step": 2375 + }, + { + "epoch": 0.06976334488225967, + "grad_norm": 0.0, + "learning_rate": 1.9917218734496308e-05, + "loss": 1.5869, + "step": 2376 + }, + { + "epoch": 0.06979270655939868, + "grad_norm": 0.0, + "learning_rate": 1.9917096582027243e-05, + "loss": 1.4941, + "step": 2377 + }, + { + "epoch": 0.06982206823653767, + "grad_norm": 0.0, + "learning_rate": 1.9916974339875336e-05, + "loss": 1.5918, + "step": 2378 + }, + { + "epoch": 0.06985142991367667, + "grad_norm": 0.0, + "learning_rate": 1.9916852008041688e-05, + "loss": 1.5, + "step": 2379 + }, + { + "epoch": 0.06988079159081567, + "grad_norm": 0.0, + "learning_rate": 1.9916729586527413e-05, + "loss": 1.458, + "step": 2380 + }, + { + "epoch": 0.06991015326795466, + "grad_norm": 0.0, + "learning_rate": 1.9916607075333607e-05, + "loss": 1.5723, + "step": 2381 + }, + { + "epoch": 0.06993951494509366, + "grad_norm": 0.0, + "learning_rate": 1.9916484474461386e-05, + "loss": 1.459, + "step": 2382 + }, + { + "epoch": 0.06996887662223267, + "grad_norm": 0.0, + "learning_rate": 1.9916361783911853e-05, + "loss": 1.46, + "step": 2383 + }, + { + "epoch": 0.06999823829937166, + "grad_norm": 0.0, + "learning_rate": 1.9916239003686126e-05, + "loss": 1.7295, + "step": 2384 + }, + { + "epoch": 0.07002759997651066, + "grad_norm": 0.0, + "learning_rate": 1.9916116133785304e-05, + "loss": 1.4775, + "step": 2385 + }, + { + "epoch": 0.07005696165364966, + "grad_norm": 0.0, + "learning_rate": 1.9915993174210514e-05, + "loss": 1.3472, + "step": 2386 + }, + { + "epoch": 0.07008632333078865, + "grad_norm": 0.0, + "learning_rate": 1.991587012496285e-05, + "loss": 1.6006, + "step": 2387 + }, + { + "epoch": 0.07011568500792766, + "grad_norm": 0.0, + "learning_rate": 1.9915746986043433e-05, + "loss": 1.6045, + "step": 2388 + }, + { + "epoch": 0.07014504668506664, + "grad_norm": 0.0, + "learning_rate": 1.9915623757453378e-05, + "loss": 1.4844, + "step": 2389 + }, + { + "epoch": 0.07017440836220565, + "grad_norm": 0.0, + "learning_rate": 1.99155004391938e-05, + "loss": 1.3662, + "step": 2390 + }, + { + "epoch": 0.07020377003934465, + "grad_norm": 0.0, + "learning_rate": 1.9915377031265812e-05, + "loss": 1.3691, + "step": 2391 + }, + { + "epoch": 0.07023313171648364, + "grad_norm": 0.0, + "learning_rate": 1.991525353367053e-05, + "loss": 1.5918, + "step": 2392 + }, + { + "epoch": 0.07026249339362264, + "grad_norm": 0.0, + "learning_rate": 1.991512994640907e-05, + "loss": 1.5312, + "step": 2393 + }, + { + "epoch": 0.07029185507076165, + "grad_norm": 0.0, + "learning_rate": 1.9915006269482548e-05, + "loss": 1.5205, + "step": 2394 + }, + { + "epoch": 0.07032121674790064, + "grad_norm": 0.0, + "learning_rate": 1.9914882502892086e-05, + "loss": 1.6289, + "step": 2395 + }, + { + "epoch": 0.07035057842503964, + "grad_norm": 0.0, + "learning_rate": 1.9914758646638806e-05, + "loss": 1.54, + "step": 2396 + }, + { + "epoch": 0.07037994010217864, + "grad_norm": 0.0, + "learning_rate": 1.9914634700723822e-05, + "loss": 1.4072, + "step": 2397 + }, + { + "epoch": 0.07040930177931763, + "grad_norm": 0.0, + "learning_rate": 1.9914510665148256e-05, + "loss": 1.4326, + "step": 2398 + }, + { + "epoch": 0.07043866345645663, + "grad_norm": 0.0, + "learning_rate": 1.991438653991323e-05, + "loss": 1.3672, + "step": 2399 + }, + { + "epoch": 0.07046802513359562, + "grad_norm": 0.0, + "learning_rate": 1.991426232501987e-05, + "loss": 1.4951, + "step": 2400 + }, + { + "epoch": 0.07049738681073463, + "grad_norm": 0.0, + "learning_rate": 1.9914138020469297e-05, + "loss": 1.5693, + "step": 2401 + }, + { + "epoch": 0.07052674848787363, + "grad_norm": 0.0, + "learning_rate": 1.9914013626262634e-05, + "loss": 1.5225, + "step": 2402 + }, + { + "epoch": 0.07055611016501262, + "grad_norm": 0.0, + "learning_rate": 1.991388914240101e-05, + "loss": 1.4502, + "step": 2403 + }, + { + "epoch": 0.07058547184215162, + "grad_norm": 0.0, + "learning_rate": 1.991376456888554e-05, + "loss": 1.4863, + "step": 2404 + }, + { + "epoch": 0.07061483351929063, + "grad_norm": 0.0, + "learning_rate": 1.9913639905717364e-05, + "loss": 1.3838, + "step": 2405 + }, + { + "epoch": 0.07064419519642962, + "grad_norm": 0.0, + "learning_rate": 1.99135151528976e-05, + "loss": 1.4092, + "step": 2406 + }, + { + "epoch": 0.07067355687356862, + "grad_norm": 0.0, + "learning_rate": 1.9913390310427384e-05, + "loss": 1.5518, + "step": 2407 + }, + { + "epoch": 0.07070291855070762, + "grad_norm": 0.0, + "learning_rate": 1.9913265378307837e-05, + "loss": 1.4707, + "step": 2408 + }, + { + "epoch": 0.07073228022784661, + "grad_norm": 0.0, + "learning_rate": 1.991314035654009e-05, + "loss": 1.5498, + "step": 2409 + }, + { + "epoch": 0.07076164190498561, + "grad_norm": 0.0, + "learning_rate": 1.9913015245125282e-05, + "loss": 1.4912, + "step": 2410 + }, + { + "epoch": 0.07079100358212462, + "grad_norm": 0.0, + "learning_rate": 1.9912890044064537e-05, + "loss": 1.2705, + "step": 2411 + }, + { + "epoch": 0.0708203652592636, + "grad_norm": 0.0, + "learning_rate": 1.9912764753358986e-05, + "loss": 1.5605, + "step": 2412 + }, + { + "epoch": 0.07084972693640261, + "grad_norm": 0.0, + "learning_rate": 1.9912639373009766e-05, + "loss": 1.5293, + "step": 2413 + }, + { + "epoch": 0.0708790886135416, + "grad_norm": 0.0, + "learning_rate": 1.991251390301801e-05, + "loss": 1.3096, + "step": 2414 + }, + { + "epoch": 0.0709084502906806, + "grad_norm": 0.0, + "learning_rate": 1.9912388343384853e-05, + "loss": 1.3223, + "step": 2415 + }, + { + "epoch": 0.0709378119678196, + "grad_norm": 0.0, + "learning_rate": 1.9912262694111427e-05, + "loss": 1.4473, + "step": 2416 + }, + { + "epoch": 0.0709671736449586, + "grad_norm": 0.0, + "learning_rate": 1.9912136955198872e-05, + "loss": 1.6006, + "step": 2417 + }, + { + "epoch": 0.0709965353220976, + "grad_norm": 0.0, + "learning_rate": 1.9912011126648324e-05, + "loss": 1.4111, + "step": 2418 + }, + { + "epoch": 0.0710258969992366, + "grad_norm": 0.0, + "learning_rate": 1.9911885208460918e-05, + "loss": 1.5674, + "step": 2419 + }, + { + "epoch": 0.07105525867637559, + "grad_norm": 0.0, + "learning_rate": 1.9911759200637798e-05, + "loss": 1.5576, + "step": 2420 + }, + { + "epoch": 0.0710846203535146, + "grad_norm": 0.0, + "learning_rate": 1.9911633103180103e-05, + "loss": 1.4238, + "step": 2421 + }, + { + "epoch": 0.0711139820306536, + "grad_norm": 0.0, + "learning_rate": 1.9911506916088972e-05, + "loss": 1.4375, + "step": 2422 + }, + { + "epoch": 0.07114334370779259, + "grad_norm": 0.0, + "learning_rate": 1.9911380639365543e-05, + "loss": 1.5488, + "step": 2423 + }, + { + "epoch": 0.07117270538493159, + "grad_norm": 0.0, + "learning_rate": 1.9911254273010958e-05, + "loss": 1.5195, + "step": 2424 + }, + { + "epoch": 0.07120206706207058, + "grad_norm": 0.0, + "learning_rate": 1.991112781702637e-05, + "loss": 1.4668, + "step": 2425 + }, + { + "epoch": 0.07123142873920958, + "grad_norm": 0.0, + "learning_rate": 1.991100127141291e-05, + "loss": 1.4775, + "step": 2426 + }, + { + "epoch": 0.07126079041634859, + "grad_norm": 0.0, + "learning_rate": 1.9910874636171727e-05, + "loss": 1.5488, + "step": 2427 + }, + { + "epoch": 0.07129015209348757, + "grad_norm": 0.0, + "learning_rate": 1.9910747911303965e-05, + "loss": 1.4346, + "step": 2428 + }, + { + "epoch": 0.07131951377062658, + "grad_norm": 0.0, + "learning_rate": 1.9910621096810773e-05, + "loss": 1.3779, + "step": 2429 + }, + { + "epoch": 0.07134887544776558, + "grad_norm": 0.0, + "learning_rate": 1.9910494192693296e-05, + "loss": 1.5498, + "step": 2430 + }, + { + "epoch": 0.07137823712490457, + "grad_norm": 0.0, + "learning_rate": 1.9910367198952682e-05, + "loss": 1.4492, + "step": 2431 + }, + { + "epoch": 0.07140759880204357, + "grad_norm": 0.0, + "learning_rate": 1.991024011559008e-05, + "loss": 1.5293, + "step": 2432 + }, + { + "epoch": 0.07143696047918258, + "grad_norm": 0.0, + "learning_rate": 1.991011294260664e-05, + "loss": 1.4336, + "step": 2433 + }, + { + "epoch": 0.07146632215632157, + "grad_norm": 0.0, + "learning_rate": 1.9909985680003508e-05, + "loss": 1.5, + "step": 2434 + }, + { + "epoch": 0.07149568383346057, + "grad_norm": 0.0, + "learning_rate": 1.9909858327781838e-05, + "loss": 1.5264, + "step": 2435 + }, + { + "epoch": 0.07152504551059957, + "grad_norm": 0.0, + "learning_rate": 1.9909730885942784e-05, + "loss": 1.2959, + "step": 2436 + }, + { + "epoch": 0.07155440718773856, + "grad_norm": 0.0, + "learning_rate": 1.990960335448749e-05, + "loss": 1.4844, + "step": 2437 + }, + { + "epoch": 0.07158376886487756, + "grad_norm": 0.0, + "learning_rate": 1.9909475733417123e-05, + "loss": 1.626, + "step": 2438 + }, + { + "epoch": 0.07161313054201655, + "grad_norm": 0.0, + "learning_rate": 1.9909348022732823e-05, + "loss": 1.5156, + "step": 2439 + }, + { + "epoch": 0.07164249221915556, + "grad_norm": 0.0, + "learning_rate": 1.990922022243575e-05, + "loss": 1.417, + "step": 2440 + }, + { + "epoch": 0.07167185389629456, + "grad_norm": 0.0, + "learning_rate": 1.9909092332527063e-05, + "loss": 1.5654, + "step": 2441 + }, + { + "epoch": 0.07170121557343355, + "grad_norm": 0.0, + "learning_rate": 1.990896435300792e-05, + "loss": 1.4307, + "step": 2442 + }, + { + "epoch": 0.07173057725057255, + "grad_norm": 0.0, + "learning_rate": 1.990883628387947e-05, + "loss": 1.5879, + "step": 2443 + }, + { + "epoch": 0.07175993892771156, + "grad_norm": 0.0, + "learning_rate": 1.9908708125142877e-05, + "loss": 1.4932, + "step": 2444 + }, + { + "epoch": 0.07178930060485054, + "grad_norm": 0.0, + "learning_rate": 1.99085798767993e-05, + "loss": 1.4746, + "step": 2445 + }, + { + "epoch": 0.07181866228198955, + "grad_norm": 0.0, + "learning_rate": 1.9908451538849896e-05, + "loss": 1.5254, + "step": 2446 + }, + { + "epoch": 0.07184802395912855, + "grad_norm": 0.0, + "learning_rate": 1.990832311129583e-05, + "loss": 1.3916, + "step": 2447 + }, + { + "epoch": 0.07187738563626754, + "grad_norm": 0.0, + "learning_rate": 1.9908194594138254e-05, + "loss": 1.4004, + "step": 2448 + }, + { + "epoch": 0.07190674731340654, + "grad_norm": 0.0, + "learning_rate": 1.9908065987378342e-05, + "loss": 1.4102, + "step": 2449 + }, + { + "epoch": 0.07193610899054553, + "grad_norm": 0.0, + "learning_rate": 1.990793729101725e-05, + "loss": 1.6299, + "step": 2450 + }, + { + "epoch": 0.07196547066768454, + "grad_norm": 0.0, + "learning_rate": 1.9907808505056146e-05, + "loss": 1.4814, + "step": 2451 + }, + { + "epoch": 0.07199483234482354, + "grad_norm": 0.0, + "learning_rate": 1.990767962949619e-05, + "loss": 1.4834, + "step": 2452 + }, + { + "epoch": 0.07202419402196253, + "grad_norm": 0.0, + "learning_rate": 1.990755066433855e-05, + "loss": 1.4912, + "step": 2453 + }, + { + "epoch": 0.07205355569910153, + "grad_norm": 0.0, + "learning_rate": 1.9907421609584394e-05, + "loss": 1.4795, + "step": 2454 + }, + { + "epoch": 0.07208291737624054, + "grad_norm": 0.0, + "learning_rate": 1.9907292465234886e-05, + "loss": 1.5127, + "step": 2455 + }, + { + "epoch": 0.07211227905337952, + "grad_norm": 0.0, + "learning_rate": 1.990716323129119e-05, + "loss": 1.4316, + "step": 2456 + }, + { + "epoch": 0.07214164073051853, + "grad_norm": 0.0, + "learning_rate": 1.9907033907754484e-05, + "loss": 1.3701, + "step": 2457 + }, + { + "epoch": 0.07217100240765753, + "grad_norm": 0.0, + "learning_rate": 1.9906904494625935e-05, + "loss": 1.4707, + "step": 2458 + }, + { + "epoch": 0.07220036408479652, + "grad_norm": 0.0, + "learning_rate": 1.990677499190671e-05, + "loss": 1.4375, + "step": 2459 + }, + { + "epoch": 0.07222972576193552, + "grad_norm": 0.0, + "learning_rate": 1.9906645399597977e-05, + "loss": 1.5986, + "step": 2460 + }, + { + "epoch": 0.07225908743907453, + "grad_norm": 0.0, + "learning_rate": 1.990651571770092e-05, + "loss": 1.5283, + "step": 2461 + }, + { + "epoch": 0.07228844911621352, + "grad_norm": 0.0, + "learning_rate": 1.99063859462167e-05, + "loss": 1.459, + "step": 2462 + }, + { + "epoch": 0.07231781079335252, + "grad_norm": 0.0, + "learning_rate": 1.9906256085146495e-05, + "loss": 1.4736, + "step": 2463 + }, + { + "epoch": 0.07234717247049151, + "grad_norm": 0.0, + "learning_rate": 1.990612613449148e-05, + "loss": 1.4121, + "step": 2464 + }, + { + "epoch": 0.07237653414763051, + "grad_norm": 0.0, + "learning_rate": 1.990599609425283e-05, + "loss": 1.5811, + "step": 2465 + }, + { + "epoch": 0.07240589582476951, + "grad_norm": 0.0, + "learning_rate": 1.990586596443172e-05, + "loss": 1.498, + "step": 2466 + }, + { + "epoch": 0.0724352575019085, + "grad_norm": 0.0, + "learning_rate": 1.9905735745029324e-05, + "loss": 1.4023, + "step": 2467 + }, + { + "epoch": 0.0724646191790475, + "grad_norm": 0.0, + "learning_rate": 1.9905605436046827e-05, + "loss": 1.3403, + "step": 2468 + }, + { + "epoch": 0.07249398085618651, + "grad_norm": 0.0, + "learning_rate": 1.9905475037485402e-05, + "loss": 1.4541, + "step": 2469 + }, + { + "epoch": 0.0725233425333255, + "grad_norm": 0.0, + "learning_rate": 1.9905344549346228e-05, + "loss": 1.5693, + "step": 2470 + }, + { + "epoch": 0.0725527042104645, + "grad_norm": 0.0, + "learning_rate": 1.9905213971630487e-05, + "loss": 1.4844, + "step": 2471 + }, + { + "epoch": 0.0725820658876035, + "grad_norm": 0.0, + "learning_rate": 1.9905083304339362e-05, + "loss": 1.3779, + "step": 2472 + }, + { + "epoch": 0.0726114275647425, + "grad_norm": 0.0, + "learning_rate": 1.990495254747403e-05, + "loss": 1.4805, + "step": 2473 + }, + { + "epoch": 0.0726407892418815, + "grad_norm": 0.0, + "learning_rate": 1.9904821701035676e-05, + "loss": 1.5234, + "step": 2474 + }, + { + "epoch": 0.07267015091902049, + "grad_norm": 0.0, + "learning_rate": 1.990469076502548e-05, + "loss": 1.4346, + "step": 2475 + }, + { + "epoch": 0.07269951259615949, + "grad_norm": 0.0, + "learning_rate": 1.990455973944463e-05, + "loss": 1.5684, + "step": 2476 + }, + { + "epoch": 0.0727288742732985, + "grad_norm": 0.0, + "learning_rate": 1.990442862429431e-05, + "loss": 1.5127, + "step": 2477 + }, + { + "epoch": 0.07275823595043748, + "grad_norm": 0.0, + "learning_rate": 1.9904297419575704e-05, + "loss": 1.4531, + "step": 2478 + }, + { + "epoch": 0.07278759762757649, + "grad_norm": 0.0, + "learning_rate": 1.9904166125290002e-05, + "loss": 1.6455, + "step": 2479 + }, + { + "epoch": 0.07281695930471549, + "grad_norm": 0.0, + "learning_rate": 1.9904034741438392e-05, + "loss": 1.5264, + "step": 2480 + }, + { + "epoch": 0.07284632098185448, + "grad_norm": 0.0, + "learning_rate": 1.9903903268022057e-05, + "loss": 1.5469, + "step": 2481 + }, + { + "epoch": 0.07287568265899348, + "grad_norm": 0.0, + "learning_rate": 1.9903771705042187e-05, + "loss": 1.4053, + "step": 2482 + }, + { + "epoch": 0.07290504433613249, + "grad_norm": 0.0, + "learning_rate": 1.990364005249997e-05, + "loss": 1.4736, + "step": 2483 + }, + { + "epoch": 0.07293440601327147, + "grad_norm": 0.0, + "learning_rate": 1.9903508310396605e-05, + "loss": 1.4658, + "step": 2484 + }, + { + "epoch": 0.07296376769041048, + "grad_norm": 0.0, + "learning_rate": 1.9903376478733275e-05, + "loss": 1.4795, + "step": 2485 + }, + { + "epoch": 0.07299312936754948, + "grad_norm": 0.0, + "learning_rate": 1.9903244557511177e-05, + "loss": 1.5439, + "step": 2486 + }, + { + "epoch": 0.07302249104468847, + "grad_norm": 0.0, + "learning_rate": 1.9903112546731502e-05, + "loss": 1.4131, + "step": 2487 + }, + { + "epoch": 0.07305185272182747, + "grad_norm": 0.0, + "learning_rate": 1.9902980446395444e-05, + "loss": 1.4932, + "step": 2488 + }, + { + "epoch": 0.07308121439896646, + "grad_norm": 0.0, + "learning_rate": 1.9902848256504195e-05, + "loss": 1.4062, + "step": 2489 + }, + { + "epoch": 0.07311057607610547, + "grad_norm": 0.0, + "learning_rate": 1.990271597705896e-05, + "loss": 1.5303, + "step": 2490 + }, + { + "epoch": 0.07313993775324447, + "grad_norm": 0.0, + "learning_rate": 1.9902583608060922e-05, + "loss": 1.4668, + "step": 2491 + }, + { + "epoch": 0.07316929943038346, + "grad_norm": 0.0, + "learning_rate": 1.9902451149511287e-05, + "loss": 1.5508, + "step": 2492 + }, + { + "epoch": 0.07319866110752246, + "grad_norm": 0.0, + "learning_rate": 1.9902318601411248e-05, + "loss": 1.5635, + "step": 2493 + }, + { + "epoch": 0.07322802278466146, + "grad_norm": 0.0, + "learning_rate": 1.9902185963762005e-05, + "loss": 1.4443, + "step": 2494 + }, + { + "epoch": 0.07325738446180045, + "grad_norm": 0.0, + "learning_rate": 1.990205323656476e-05, + "loss": 1.5068, + "step": 2495 + }, + { + "epoch": 0.07328674613893946, + "grad_norm": 0.0, + "learning_rate": 1.9901920419820714e-05, + "loss": 1.4893, + "step": 2496 + }, + { + "epoch": 0.07331610781607846, + "grad_norm": 0.0, + "learning_rate": 1.9901787513531062e-05, + "loss": 1.4883, + "step": 2497 + }, + { + "epoch": 0.07334546949321745, + "grad_norm": 0.0, + "learning_rate": 1.990165451769701e-05, + "loss": 1.4072, + "step": 2498 + }, + { + "epoch": 0.07337483117035645, + "grad_norm": 0.0, + "learning_rate": 1.9901521432319765e-05, + "loss": 1.4766, + "step": 2499 + }, + { + "epoch": 0.07340419284749546, + "grad_norm": 0.0, + "learning_rate": 1.990138825740052e-05, + "loss": 1.5947, + "step": 2500 + }, + { + "epoch": 0.07343355452463445, + "grad_norm": 0.0, + "learning_rate": 1.9901254992940488e-05, + "loss": 1.498, + "step": 2501 + }, + { + "epoch": 0.07346291620177345, + "grad_norm": 0.0, + "learning_rate": 1.990112163894087e-05, + "loss": 1.5078, + "step": 2502 + }, + { + "epoch": 0.07349227787891244, + "grad_norm": 0.0, + "learning_rate": 1.9900988195402876e-05, + "loss": 1.6875, + "step": 2503 + }, + { + "epoch": 0.07352163955605144, + "grad_norm": 0.0, + "learning_rate": 1.9900854662327708e-05, + "loss": 1.4375, + "step": 2504 + }, + { + "epoch": 0.07355100123319044, + "grad_norm": 0.0, + "learning_rate": 1.9900721039716577e-05, + "loss": 1.499, + "step": 2505 + }, + { + "epoch": 0.07358036291032943, + "grad_norm": 0.0, + "learning_rate": 1.990058732757069e-05, + "loss": 1.4912, + "step": 2506 + }, + { + "epoch": 0.07360972458746844, + "grad_norm": 0.0, + "learning_rate": 1.9900453525891256e-05, + "loss": 1.6826, + "step": 2507 + }, + { + "epoch": 0.07363908626460744, + "grad_norm": 0.0, + "learning_rate": 1.9900319634679482e-05, + "loss": 1.458, + "step": 2508 + }, + { + "epoch": 0.07366844794174643, + "grad_norm": 0.0, + "learning_rate": 1.9900185653936586e-05, + "loss": 1.457, + "step": 2509 + }, + { + "epoch": 0.07369780961888543, + "grad_norm": 0.0, + "learning_rate": 1.9900051583663775e-05, + "loss": 1.5703, + "step": 2510 + }, + { + "epoch": 0.07372717129602444, + "grad_norm": 0.0, + "learning_rate": 1.9899917423862262e-05, + "loss": 1.4365, + "step": 2511 + }, + { + "epoch": 0.07375653297316342, + "grad_norm": 0.0, + "learning_rate": 1.989978317453326e-05, + "loss": 1.584, + "step": 2512 + }, + { + "epoch": 0.07378589465030243, + "grad_norm": 0.0, + "learning_rate": 1.9899648835677982e-05, + "loss": 1.5986, + "step": 2513 + }, + { + "epoch": 0.07381525632744142, + "grad_norm": 0.0, + "learning_rate": 1.9899514407297646e-05, + "loss": 1.5654, + "step": 2514 + }, + { + "epoch": 0.07384461800458042, + "grad_norm": 0.0, + "learning_rate": 1.9899379889393465e-05, + "loss": 1.5342, + "step": 2515 + }, + { + "epoch": 0.07387397968171942, + "grad_norm": 0.0, + "learning_rate": 1.9899245281966657e-05, + "loss": 1.5322, + "step": 2516 + }, + { + "epoch": 0.07390334135885841, + "grad_norm": 0.0, + "learning_rate": 1.9899110585018438e-05, + "loss": 1.4736, + "step": 2517 + }, + { + "epoch": 0.07393270303599742, + "grad_norm": 0.0, + "learning_rate": 1.9898975798550027e-05, + "loss": 1.4941, + "step": 2518 + }, + { + "epoch": 0.07396206471313642, + "grad_norm": 0.0, + "learning_rate": 1.9898840922562644e-05, + "loss": 1.4873, + "step": 2519 + }, + { + "epoch": 0.07399142639027541, + "grad_norm": 0.0, + "learning_rate": 1.9898705957057506e-05, + "loss": 1.4326, + "step": 2520 + }, + { + "epoch": 0.07402078806741441, + "grad_norm": 0.0, + "learning_rate": 1.9898570902035835e-05, + "loss": 1.4746, + "step": 2521 + }, + { + "epoch": 0.07405014974455341, + "grad_norm": 0.0, + "learning_rate": 1.9898435757498852e-05, + "loss": 1.5967, + "step": 2522 + }, + { + "epoch": 0.0740795114216924, + "grad_norm": 0.0, + "learning_rate": 1.989830052344778e-05, + "loss": 1.4082, + "step": 2523 + }, + { + "epoch": 0.07410887309883141, + "grad_norm": 0.0, + "learning_rate": 1.989816519988384e-05, + "loss": 1.3789, + "step": 2524 + }, + { + "epoch": 0.07413823477597041, + "grad_norm": 0.0, + "learning_rate": 1.989802978680826e-05, + "loss": 1.4541, + "step": 2525 + }, + { + "epoch": 0.0741675964531094, + "grad_norm": 0.0, + "learning_rate": 1.989789428422226e-05, + "loss": 1.5303, + "step": 2526 + }, + { + "epoch": 0.0741969581302484, + "grad_norm": 0.0, + "learning_rate": 1.9897758692127067e-05, + "loss": 1.5557, + "step": 2527 + }, + { + "epoch": 0.07422631980738739, + "grad_norm": 0.0, + "learning_rate": 1.9897623010523907e-05, + "loss": 1.5186, + "step": 2528 + }, + { + "epoch": 0.0742556814845264, + "grad_norm": 0.0, + "learning_rate": 1.989748723941401e-05, + "loss": 1.4893, + "step": 2529 + }, + { + "epoch": 0.0742850431616654, + "grad_norm": 0.0, + "learning_rate": 1.98973513787986e-05, + "loss": 1.4717, + "step": 2530 + }, + { + "epoch": 0.07431440483880439, + "grad_norm": 0.0, + "learning_rate": 1.9897215428678903e-05, + "loss": 1.5488, + "step": 2531 + }, + { + "epoch": 0.07434376651594339, + "grad_norm": 0.0, + "learning_rate": 1.9897079389056153e-05, + "loss": 1.7393, + "step": 2532 + }, + { + "epoch": 0.0743731281930824, + "grad_norm": 0.0, + "learning_rate": 1.989694325993158e-05, + "loss": 1.5537, + "step": 2533 + }, + { + "epoch": 0.07440248987022138, + "grad_norm": 0.0, + "learning_rate": 1.9896807041306417e-05, + "loss": 1.584, + "step": 2534 + }, + { + "epoch": 0.07443185154736039, + "grad_norm": 0.0, + "learning_rate": 1.9896670733181894e-05, + "loss": 1.5566, + "step": 2535 + }, + { + "epoch": 0.07446121322449939, + "grad_norm": 0.0, + "learning_rate": 1.989653433555924e-05, + "loss": 1.5801, + "step": 2536 + }, + { + "epoch": 0.07449057490163838, + "grad_norm": 0.0, + "learning_rate": 1.989639784843969e-05, + "loss": 1.415, + "step": 2537 + }, + { + "epoch": 0.07451993657877738, + "grad_norm": 0.0, + "learning_rate": 1.9896261271824485e-05, + "loss": 1.5059, + "step": 2538 + }, + { + "epoch": 0.07454929825591637, + "grad_norm": 0.0, + "learning_rate": 1.989612460571485e-05, + "loss": 1.543, + "step": 2539 + }, + { + "epoch": 0.07457865993305537, + "grad_norm": 0.0, + "learning_rate": 1.989598785011203e-05, + "loss": 1.3994, + "step": 2540 + }, + { + "epoch": 0.07460802161019438, + "grad_norm": 0.0, + "learning_rate": 1.9895851005017254e-05, + "loss": 1.4014, + "step": 2541 + }, + { + "epoch": 0.07463738328733337, + "grad_norm": 0.0, + "learning_rate": 1.9895714070431764e-05, + "loss": 1.6094, + "step": 2542 + }, + { + "epoch": 0.07466674496447237, + "grad_norm": 0.0, + "learning_rate": 1.98955770463568e-05, + "loss": 1.5195, + "step": 2543 + }, + { + "epoch": 0.07469610664161137, + "grad_norm": 0.0, + "learning_rate": 1.9895439932793593e-05, + "loss": 1.5303, + "step": 2544 + }, + { + "epoch": 0.07472546831875036, + "grad_norm": 0.0, + "learning_rate": 1.989530272974339e-05, + "loss": 1.4277, + "step": 2545 + }, + { + "epoch": 0.07475482999588937, + "grad_norm": 0.0, + "learning_rate": 1.9895165437207433e-05, + "loss": 1.4111, + "step": 2546 + }, + { + "epoch": 0.07478419167302837, + "grad_norm": 0.0, + "learning_rate": 1.989502805518696e-05, + "loss": 1.4492, + "step": 2547 + }, + { + "epoch": 0.07481355335016736, + "grad_norm": 0.0, + "learning_rate": 1.9894890583683212e-05, + "loss": 1.5312, + "step": 2548 + }, + { + "epoch": 0.07484291502730636, + "grad_norm": 0.0, + "learning_rate": 1.9894753022697436e-05, + "loss": 1.5684, + "step": 2549 + }, + { + "epoch": 0.07487227670444536, + "grad_norm": 0.0, + "learning_rate": 1.9894615372230874e-05, + "loss": 1.5146, + "step": 2550 + }, + { + "epoch": 0.07490163838158435, + "grad_norm": 0.0, + "learning_rate": 1.9894477632284773e-05, + "loss": 1.4043, + "step": 2551 + }, + { + "epoch": 0.07493100005872336, + "grad_norm": 0.0, + "learning_rate": 1.9894339802860372e-05, + "loss": 1.5459, + "step": 2552 + }, + { + "epoch": 0.07496036173586235, + "grad_norm": 0.0, + "learning_rate": 1.9894201883958928e-05, + "loss": 1.4512, + "step": 2553 + }, + { + "epoch": 0.07498972341300135, + "grad_norm": 0.0, + "learning_rate": 1.989406387558168e-05, + "loss": 1.6055, + "step": 2554 + }, + { + "epoch": 0.07501908509014035, + "grad_norm": 0.0, + "learning_rate": 1.9893925777729878e-05, + "loss": 1.5566, + "step": 2555 + }, + { + "epoch": 0.07504844676727934, + "grad_norm": 0.0, + "learning_rate": 1.989378759040477e-05, + "loss": 1.502, + "step": 2556 + }, + { + "epoch": 0.07507780844441835, + "grad_norm": 0.0, + "learning_rate": 1.989364931360761e-05, + "loss": 1.5293, + "step": 2557 + }, + { + "epoch": 0.07510717012155735, + "grad_norm": 0.0, + "learning_rate": 1.9893510947339645e-05, + "loss": 1.5107, + "step": 2558 + }, + { + "epoch": 0.07513653179869634, + "grad_norm": 0.0, + "learning_rate": 1.9893372491602125e-05, + "loss": 1.5537, + "step": 2559 + }, + { + "epoch": 0.07516589347583534, + "grad_norm": 0.0, + "learning_rate": 1.9893233946396303e-05, + "loss": 1.5, + "step": 2560 + }, + { + "epoch": 0.07519525515297434, + "grad_norm": 0.0, + "learning_rate": 1.9893095311723436e-05, + "loss": 1.5215, + "step": 2561 + }, + { + "epoch": 0.07522461683011333, + "grad_norm": 0.0, + "learning_rate": 1.989295658758477e-05, + "loss": 1.4131, + "step": 2562 + }, + { + "epoch": 0.07525397850725234, + "grad_norm": 0.0, + "learning_rate": 1.9892817773981567e-05, + "loss": 1.2666, + "step": 2563 + }, + { + "epoch": 0.07528334018439133, + "grad_norm": 0.0, + "learning_rate": 1.989267887091508e-05, + "loss": 1.5195, + "step": 2564 + }, + { + "epoch": 0.07531270186153033, + "grad_norm": 0.0, + "learning_rate": 1.9892539878386563e-05, + "loss": 1.5186, + "step": 2565 + }, + { + "epoch": 0.07534206353866933, + "grad_norm": 0.0, + "learning_rate": 1.9892400796397276e-05, + "loss": 1.4531, + "step": 2566 + }, + { + "epoch": 0.07537142521580832, + "grad_norm": 0.0, + "learning_rate": 1.9892261624948473e-05, + "loss": 1.5596, + "step": 2567 + }, + { + "epoch": 0.07540078689294732, + "grad_norm": 0.0, + "learning_rate": 1.9892122364041414e-05, + "loss": 1.582, + "step": 2568 + }, + { + "epoch": 0.07543014857008633, + "grad_norm": 0.0, + "learning_rate": 1.989198301367736e-05, + "loss": 1.6699, + "step": 2569 + }, + { + "epoch": 0.07545951024722532, + "grad_norm": 0.0, + "learning_rate": 1.9891843573857574e-05, + "loss": 1.4785, + "step": 2570 + }, + { + "epoch": 0.07548887192436432, + "grad_norm": 0.0, + "learning_rate": 1.9891704044583307e-05, + "loss": 1.4775, + "step": 2571 + }, + { + "epoch": 0.07551823360150332, + "grad_norm": 0.0, + "learning_rate": 1.9891564425855832e-05, + "loss": 1.3721, + "step": 2572 + }, + { + "epoch": 0.07554759527864231, + "grad_norm": 0.0, + "learning_rate": 1.9891424717676403e-05, + "loss": 1.5752, + "step": 2573 + }, + { + "epoch": 0.07557695695578132, + "grad_norm": 0.0, + "learning_rate": 1.989128492004629e-05, + "loss": 1.5273, + "step": 2574 + }, + { + "epoch": 0.07560631863292032, + "grad_norm": 0.0, + "learning_rate": 1.9891145032966752e-05, + "loss": 1.4736, + "step": 2575 + }, + { + "epoch": 0.07563568031005931, + "grad_norm": 0.0, + "learning_rate": 1.9891005056439056e-05, + "loss": 1.5498, + "step": 2576 + }, + { + "epoch": 0.07566504198719831, + "grad_norm": 0.0, + "learning_rate": 1.9890864990464473e-05, + "loss": 1.4229, + "step": 2577 + }, + { + "epoch": 0.0756944036643373, + "grad_norm": 0.0, + "learning_rate": 1.9890724835044258e-05, + "loss": 1.4648, + "step": 2578 + }, + { + "epoch": 0.0757237653414763, + "grad_norm": 0.0, + "learning_rate": 1.9890584590179688e-05, + "loss": 1.4902, + "step": 2579 + }, + { + "epoch": 0.07575312701861531, + "grad_norm": 0.0, + "learning_rate": 1.989044425587203e-05, + "loss": 1.5059, + "step": 2580 + }, + { + "epoch": 0.0757824886957543, + "grad_norm": 0.0, + "learning_rate": 1.989030383212255e-05, + "loss": 1.5127, + "step": 2581 + }, + { + "epoch": 0.0758118503728933, + "grad_norm": 0.0, + "learning_rate": 1.989016331893252e-05, + "loss": 1.5469, + "step": 2582 + }, + { + "epoch": 0.0758412120500323, + "grad_norm": 0.0, + "learning_rate": 1.989002271630321e-05, + "loss": 1.5391, + "step": 2583 + }, + { + "epoch": 0.07587057372717129, + "grad_norm": 0.0, + "learning_rate": 1.9889882024235894e-05, + "loss": 1.6055, + "step": 2584 + }, + { + "epoch": 0.0758999354043103, + "grad_norm": 0.0, + "learning_rate": 1.988974124273184e-05, + "loss": 1.4131, + "step": 2585 + }, + { + "epoch": 0.0759292970814493, + "grad_norm": 0.0, + "learning_rate": 1.9889600371792322e-05, + "loss": 1.5596, + "step": 2586 + }, + { + "epoch": 0.07595865875858829, + "grad_norm": 0.0, + "learning_rate": 1.9889459411418618e-05, + "loss": 1.5625, + "step": 2587 + }, + { + "epoch": 0.07598802043572729, + "grad_norm": 0.0, + "learning_rate": 1.9889318361611994e-05, + "loss": 1.4971, + "step": 2588 + }, + { + "epoch": 0.07601738211286628, + "grad_norm": 0.0, + "learning_rate": 1.9889177222373738e-05, + "loss": 1.4238, + "step": 2589 + }, + { + "epoch": 0.07604674379000528, + "grad_norm": 0.0, + "learning_rate": 1.9889035993705116e-05, + "loss": 1.6328, + "step": 2590 + }, + { + "epoch": 0.07607610546714429, + "grad_norm": 0.0, + "learning_rate": 1.9888894675607413e-05, + "loss": 1.6084, + "step": 2591 + }, + { + "epoch": 0.07610546714428328, + "grad_norm": 0.0, + "learning_rate": 1.9888753268081896e-05, + "loss": 1.5195, + "step": 2592 + }, + { + "epoch": 0.07613482882142228, + "grad_norm": 0.0, + "learning_rate": 1.988861177112986e-05, + "loss": 1.5293, + "step": 2593 + }, + { + "epoch": 0.07616419049856128, + "grad_norm": 0.0, + "learning_rate": 1.9888470184752566e-05, + "loss": 1.4492, + "step": 2594 + }, + { + "epoch": 0.07619355217570027, + "grad_norm": 0.0, + "learning_rate": 1.988832850895131e-05, + "loss": 1.5234, + "step": 2595 + }, + { + "epoch": 0.07622291385283927, + "grad_norm": 0.0, + "learning_rate": 1.988818674372736e-05, + "loss": 1.543, + "step": 2596 + }, + { + "epoch": 0.07625227552997828, + "grad_norm": 0.0, + "learning_rate": 1.988804488908201e-05, + "loss": 1.5107, + "step": 2597 + }, + { + "epoch": 0.07628163720711727, + "grad_norm": 0.0, + "learning_rate": 1.9887902945016538e-05, + "loss": 1.5898, + "step": 2598 + }, + { + "epoch": 0.07631099888425627, + "grad_norm": 0.0, + "learning_rate": 1.9887760911532226e-05, + "loss": 1.4883, + "step": 2599 + }, + { + "epoch": 0.07634036056139527, + "grad_norm": 0.0, + "learning_rate": 1.988761878863036e-05, + "loss": 1.5439, + "step": 2600 + }, + { + "epoch": 0.07636972223853426, + "grad_norm": 0.0, + "learning_rate": 1.9887476576312225e-05, + "loss": 1.5547, + "step": 2601 + }, + { + "epoch": 0.07639908391567327, + "grad_norm": 0.0, + "learning_rate": 1.9887334274579108e-05, + "loss": 1.5967, + "step": 2602 + }, + { + "epoch": 0.07642844559281226, + "grad_norm": 0.0, + "learning_rate": 1.9887191883432295e-05, + "loss": 1.5918, + "step": 2603 + }, + { + "epoch": 0.07645780726995126, + "grad_norm": 0.0, + "learning_rate": 1.988704940287307e-05, + "loss": 1.3242, + "step": 2604 + }, + { + "epoch": 0.07648716894709026, + "grad_norm": 0.0, + "learning_rate": 1.988690683290273e-05, + "loss": 1.5068, + "step": 2605 + }, + { + "epoch": 0.07651653062422925, + "grad_norm": 0.0, + "learning_rate": 1.988676417352256e-05, + "loss": 1.5, + "step": 2606 + }, + { + "epoch": 0.07654589230136825, + "grad_norm": 0.0, + "learning_rate": 1.9886621424733844e-05, + "loss": 1.4805, + "step": 2607 + }, + { + "epoch": 0.07657525397850726, + "grad_norm": 0.0, + "learning_rate": 1.988647858653788e-05, + "loss": 1.4902, + "step": 2608 + }, + { + "epoch": 0.07660461565564625, + "grad_norm": 0.0, + "learning_rate": 1.988633565893596e-05, + "loss": 1.5342, + "step": 2609 + }, + { + "epoch": 0.07663397733278525, + "grad_norm": 0.0, + "learning_rate": 1.9886192641929375e-05, + "loss": 1.418, + "step": 2610 + }, + { + "epoch": 0.07666333900992425, + "grad_norm": 0.0, + "learning_rate": 1.9886049535519416e-05, + "loss": 1.4473, + "step": 2611 + }, + { + "epoch": 0.07669270068706324, + "grad_norm": 0.0, + "learning_rate": 1.988590633970738e-05, + "loss": 1.4736, + "step": 2612 + }, + { + "epoch": 0.07672206236420225, + "grad_norm": 0.0, + "learning_rate": 1.9885763054494563e-05, + "loss": 1.4062, + "step": 2613 + }, + { + "epoch": 0.07675142404134123, + "grad_norm": 0.0, + "learning_rate": 1.9885619679882254e-05, + "loss": 1.3711, + "step": 2614 + }, + { + "epoch": 0.07678078571848024, + "grad_norm": 0.0, + "learning_rate": 1.9885476215871763e-05, + "loss": 1.4297, + "step": 2615 + }, + { + "epoch": 0.07681014739561924, + "grad_norm": 0.0, + "learning_rate": 1.988533266246437e-05, + "loss": 1.4229, + "step": 2616 + }, + { + "epoch": 0.07683950907275823, + "grad_norm": 0.0, + "learning_rate": 1.9885189019661387e-05, + "loss": 1.4814, + "step": 2617 + }, + { + "epoch": 0.07686887074989723, + "grad_norm": 0.0, + "learning_rate": 1.9885045287464107e-05, + "loss": 1.4014, + "step": 2618 + }, + { + "epoch": 0.07689823242703624, + "grad_norm": 0.0, + "learning_rate": 1.9884901465873833e-05, + "loss": 1.5977, + "step": 2619 + }, + { + "epoch": 0.07692759410417523, + "grad_norm": 0.0, + "learning_rate": 1.988475755489186e-05, + "loss": 1.5908, + "step": 2620 + }, + { + "epoch": 0.07695695578131423, + "grad_norm": 0.0, + "learning_rate": 1.9884613554519495e-05, + "loss": 1.4795, + "step": 2621 + }, + { + "epoch": 0.07698631745845323, + "grad_norm": 0.0, + "learning_rate": 1.9884469464758034e-05, + "loss": 1.4688, + "step": 2622 + }, + { + "epoch": 0.07701567913559222, + "grad_norm": 0.0, + "learning_rate": 1.988432528560879e-05, + "loss": 1.4619, + "step": 2623 + }, + { + "epoch": 0.07704504081273122, + "grad_norm": 0.0, + "learning_rate": 1.988418101707306e-05, + "loss": 1.501, + "step": 2624 + }, + { + "epoch": 0.07707440248987023, + "grad_norm": 0.0, + "learning_rate": 1.9884036659152146e-05, + "loss": 1.5518, + "step": 2625 + }, + { + "epoch": 0.07710376416700922, + "grad_norm": 0.0, + "learning_rate": 1.988389221184736e-05, + "loss": 1.4351, + "step": 2626 + }, + { + "epoch": 0.07713312584414822, + "grad_norm": 0.0, + "learning_rate": 1.9883747675160007e-05, + "loss": 1.5244, + "step": 2627 + }, + { + "epoch": 0.07716248752128721, + "grad_norm": 0.0, + "learning_rate": 1.9883603049091386e-05, + "loss": 1.5469, + "step": 2628 + }, + { + "epoch": 0.07719184919842621, + "grad_norm": 0.0, + "learning_rate": 1.9883458333642817e-05, + "loss": 1.5674, + "step": 2629 + }, + { + "epoch": 0.07722121087556522, + "grad_norm": 0.0, + "learning_rate": 1.9883313528815604e-05, + "loss": 1.5469, + "step": 2630 + }, + { + "epoch": 0.0772505725527042, + "grad_norm": 0.0, + "learning_rate": 1.9883168634611052e-05, + "loss": 1.4258, + "step": 2631 + }, + { + "epoch": 0.07727993422984321, + "grad_norm": 0.0, + "learning_rate": 1.9883023651030474e-05, + "loss": 1.5088, + "step": 2632 + }, + { + "epoch": 0.07730929590698221, + "grad_norm": 0.0, + "learning_rate": 1.9882878578075188e-05, + "loss": 1.6318, + "step": 2633 + }, + { + "epoch": 0.0773386575841212, + "grad_norm": 0.0, + "learning_rate": 1.9882733415746497e-05, + "loss": 1.585, + "step": 2634 + }, + { + "epoch": 0.0773680192612602, + "grad_norm": 0.0, + "learning_rate": 1.9882588164045713e-05, + "loss": 1.4014, + "step": 2635 + }, + { + "epoch": 0.07739738093839921, + "grad_norm": 0.0, + "learning_rate": 1.988244282297416e-05, + "loss": 1.5293, + "step": 2636 + }, + { + "epoch": 0.0774267426155382, + "grad_norm": 0.0, + "learning_rate": 1.988229739253314e-05, + "loss": 1.418, + "step": 2637 + }, + { + "epoch": 0.0774561042926772, + "grad_norm": 0.0, + "learning_rate": 1.9882151872723975e-05, + "loss": 1.416, + "step": 2638 + }, + { + "epoch": 0.07748546596981619, + "grad_norm": 0.0, + "learning_rate": 1.9882006263547983e-05, + "loss": 1.4287, + "step": 2639 + }, + { + "epoch": 0.07751482764695519, + "grad_norm": 0.0, + "learning_rate": 1.988186056500647e-05, + "loss": 1.5811, + "step": 2640 + }, + { + "epoch": 0.0775441893240942, + "grad_norm": 0.0, + "learning_rate": 1.988171477710077e-05, + "loss": 1.5908, + "step": 2641 + }, + { + "epoch": 0.07757355100123318, + "grad_norm": 0.0, + "learning_rate": 1.988156889983219e-05, + "loss": 1.4902, + "step": 2642 + }, + { + "epoch": 0.07760291267837219, + "grad_norm": 0.0, + "learning_rate": 1.988142293320205e-05, + "loss": 1.5576, + "step": 2643 + }, + { + "epoch": 0.07763227435551119, + "grad_norm": 0.0, + "learning_rate": 1.988127687721167e-05, + "loss": 1.4668, + "step": 2644 + }, + { + "epoch": 0.07766163603265018, + "grad_norm": 0.0, + "learning_rate": 1.9881130731862376e-05, + "loss": 1.4971, + "step": 2645 + }, + { + "epoch": 0.07769099770978918, + "grad_norm": 0.0, + "learning_rate": 1.9880984497155483e-05, + "loss": 1.4844, + "step": 2646 + }, + { + "epoch": 0.07772035938692819, + "grad_norm": 0.0, + "learning_rate": 1.988083817309232e-05, + "loss": 1.5107, + "step": 2647 + }, + { + "epoch": 0.07774972106406718, + "grad_norm": 0.0, + "learning_rate": 1.9880691759674207e-05, + "loss": 1.4512, + "step": 2648 + }, + { + "epoch": 0.07777908274120618, + "grad_norm": 0.0, + "learning_rate": 1.9880545256902467e-05, + "loss": 1.5654, + "step": 2649 + }, + { + "epoch": 0.07780844441834518, + "grad_norm": 0.0, + "learning_rate": 1.988039866477842e-05, + "loss": 1.5312, + "step": 2650 + }, + { + "epoch": 0.07783780609548417, + "grad_norm": 0.0, + "learning_rate": 1.9880251983303407e-05, + "loss": 1.376, + "step": 2651 + }, + { + "epoch": 0.07786716777262317, + "grad_norm": 0.0, + "learning_rate": 1.988010521247874e-05, + "loss": 1.4375, + "step": 2652 + }, + { + "epoch": 0.07789652944976216, + "grad_norm": 0.0, + "learning_rate": 1.9879958352305754e-05, + "loss": 1.5068, + "step": 2653 + }, + { + "epoch": 0.07792589112690117, + "grad_norm": 0.0, + "learning_rate": 1.9879811402785768e-05, + "loss": 1.6533, + "step": 2654 + }, + { + "epoch": 0.07795525280404017, + "grad_norm": 0.0, + "learning_rate": 1.987966436392012e-05, + "loss": 1.6504, + "step": 2655 + }, + { + "epoch": 0.07798461448117916, + "grad_norm": 0.0, + "learning_rate": 1.987951723571014e-05, + "loss": 1.4824, + "step": 2656 + }, + { + "epoch": 0.07801397615831816, + "grad_norm": 0.0, + "learning_rate": 1.9879370018157153e-05, + "loss": 1.4307, + "step": 2657 + }, + { + "epoch": 0.07804333783545717, + "grad_norm": 0.0, + "learning_rate": 1.9879222711262495e-05, + "loss": 1.5586, + "step": 2658 + }, + { + "epoch": 0.07807269951259616, + "grad_norm": 0.0, + "learning_rate": 1.9879075315027493e-05, + "loss": 1.5293, + "step": 2659 + }, + { + "epoch": 0.07810206118973516, + "grad_norm": 0.0, + "learning_rate": 1.9878927829453487e-05, + "loss": 1.4238, + "step": 2660 + }, + { + "epoch": 0.07813142286687416, + "grad_norm": 0.0, + "learning_rate": 1.9878780254541802e-05, + "loss": 1.4316, + "step": 2661 + }, + { + "epoch": 0.07816078454401315, + "grad_norm": 0.0, + "learning_rate": 1.9878632590293782e-05, + "loss": 1.4863, + "step": 2662 + }, + { + "epoch": 0.07819014622115215, + "grad_norm": 0.0, + "learning_rate": 1.9878484836710755e-05, + "loss": 1.5684, + "step": 2663 + }, + { + "epoch": 0.07821950789829114, + "grad_norm": 0.0, + "learning_rate": 1.9878336993794058e-05, + "loss": 1.5088, + "step": 2664 + }, + { + "epoch": 0.07824886957543015, + "grad_norm": 0.0, + "learning_rate": 1.987818906154503e-05, + "loss": 1.4131, + "step": 2665 + }, + { + "epoch": 0.07827823125256915, + "grad_norm": 0.0, + "learning_rate": 1.987804103996501e-05, + "loss": 1.5215, + "step": 2666 + }, + { + "epoch": 0.07830759292970814, + "grad_norm": 0.0, + "learning_rate": 1.9877892929055335e-05, + "loss": 1.5137, + "step": 2667 + }, + { + "epoch": 0.07833695460684714, + "grad_norm": 0.0, + "learning_rate": 1.9877744728817344e-05, + "loss": 1.3193, + "step": 2668 + }, + { + "epoch": 0.07836631628398615, + "grad_norm": 0.0, + "learning_rate": 1.9877596439252377e-05, + "loss": 1.4639, + "step": 2669 + }, + { + "epoch": 0.07839567796112513, + "grad_norm": 0.0, + "learning_rate": 1.987744806036178e-05, + "loss": 1.4727, + "step": 2670 + }, + { + "epoch": 0.07842503963826414, + "grad_norm": 0.0, + "learning_rate": 1.9877299592146884e-05, + "loss": 1.4775, + "step": 2671 + }, + { + "epoch": 0.07845440131540314, + "grad_norm": 0.0, + "learning_rate": 1.987715103460904e-05, + "loss": 1.3799, + "step": 2672 + }, + { + "epoch": 0.07848376299254213, + "grad_norm": 0.0, + "learning_rate": 1.9877002387749594e-05, + "loss": 1.4736, + "step": 2673 + }, + { + "epoch": 0.07851312466968113, + "grad_norm": 0.0, + "learning_rate": 1.987685365156988e-05, + "loss": 1.4258, + "step": 2674 + }, + { + "epoch": 0.07854248634682014, + "grad_norm": 0.0, + "learning_rate": 1.987670482607125e-05, + "loss": 1.4531, + "step": 2675 + }, + { + "epoch": 0.07857184802395913, + "grad_norm": 0.0, + "learning_rate": 1.9876555911255052e-05, + "loss": 1.4307, + "step": 2676 + }, + { + "epoch": 0.07860120970109813, + "grad_norm": 0.0, + "learning_rate": 1.9876406907122628e-05, + "loss": 1.5146, + "step": 2677 + }, + { + "epoch": 0.07863057137823712, + "grad_norm": 0.0, + "learning_rate": 1.9876257813675327e-05, + "loss": 1.5156, + "step": 2678 + }, + { + "epoch": 0.07865993305537612, + "grad_norm": 0.0, + "learning_rate": 1.9876108630914496e-05, + "loss": 1.5078, + "step": 2679 + }, + { + "epoch": 0.07868929473251512, + "grad_norm": 0.0, + "learning_rate": 1.987595935884149e-05, + "loss": 1.502, + "step": 2680 + }, + { + "epoch": 0.07871865640965411, + "grad_norm": 0.0, + "learning_rate": 1.987580999745765e-05, + "loss": 1.4883, + "step": 2681 + }, + { + "epoch": 0.07874801808679312, + "grad_norm": 0.0, + "learning_rate": 1.9875660546764332e-05, + "loss": 1.5479, + "step": 2682 + }, + { + "epoch": 0.07877737976393212, + "grad_norm": 0.0, + "learning_rate": 1.9875511006762884e-05, + "loss": 1.4463, + "step": 2683 + }, + { + "epoch": 0.07880674144107111, + "grad_norm": 0.0, + "learning_rate": 1.9875361377454662e-05, + "loss": 1.4375, + "step": 2684 + }, + { + "epoch": 0.07883610311821011, + "grad_norm": 0.0, + "learning_rate": 1.987521165884102e-05, + "loss": 1.5605, + "step": 2685 + }, + { + "epoch": 0.07886546479534912, + "grad_norm": 0.0, + "learning_rate": 1.9875061850923307e-05, + "loss": 1.333, + "step": 2686 + }, + { + "epoch": 0.0788948264724881, + "grad_norm": 0.0, + "learning_rate": 1.9874911953702885e-05, + "loss": 1.5703, + "step": 2687 + }, + { + "epoch": 0.07892418814962711, + "grad_norm": 0.0, + "learning_rate": 1.98747619671811e-05, + "loss": 1.4541, + "step": 2688 + }, + { + "epoch": 0.07895354982676611, + "grad_norm": 0.0, + "learning_rate": 1.9874611891359318e-05, + "loss": 1.5762, + "step": 2689 + }, + { + "epoch": 0.0789829115039051, + "grad_norm": 0.0, + "learning_rate": 1.987446172623889e-05, + "loss": 1.4756, + "step": 2690 + }, + { + "epoch": 0.0790122731810441, + "grad_norm": 0.0, + "learning_rate": 1.9874311471821174e-05, + "loss": 1.4941, + "step": 2691 + }, + { + "epoch": 0.0790416348581831, + "grad_norm": 0.0, + "learning_rate": 1.9874161128107533e-05, + "loss": 1.4658, + "step": 2692 + }, + { + "epoch": 0.0790709965353221, + "grad_norm": 0.0, + "learning_rate": 1.9874010695099323e-05, + "loss": 1.4453, + "step": 2693 + }, + { + "epoch": 0.0791003582124611, + "grad_norm": 0.0, + "learning_rate": 1.9873860172797906e-05, + "loss": 1.5283, + "step": 2694 + }, + { + "epoch": 0.07912971988960009, + "grad_norm": 0.0, + "learning_rate": 1.987370956120464e-05, + "loss": 1.4404, + "step": 2695 + }, + { + "epoch": 0.07915908156673909, + "grad_norm": 0.0, + "learning_rate": 1.9873558860320893e-05, + "loss": 1.4678, + "step": 2696 + }, + { + "epoch": 0.0791884432438781, + "grad_norm": 0.0, + "learning_rate": 1.9873408070148023e-05, + "loss": 1.5068, + "step": 2697 + }, + { + "epoch": 0.07921780492101708, + "grad_norm": 0.0, + "learning_rate": 1.9873257190687395e-05, + "loss": 1.3955, + "step": 2698 + }, + { + "epoch": 0.07924716659815609, + "grad_norm": 0.0, + "learning_rate": 1.9873106221940377e-05, + "loss": 1.4111, + "step": 2699 + }, + { + "epoch": 0.07927652827529509, + "grad_norm": 0.0, + "learning_rate": 1.987295516390833e-05, + "loss": 1.4316, + "step": 2700 + }, + { + "epoch": 0.07930588995243408, + "grad_norm": 0.0, + "learning_rate": 1.9872804016592618e-05, + "loss": 1.6064, + "step": 2701 + }, + { + "epoch": 0.07933525162957308, + "grad_norm": 0.0, + "learning_rate": 1.9872652779994612e-05, + "loss": 1.4551, + "step": 2702 + }, + { + "epoch": 0.07936461330671207, + "grad_norm": 0.0, + "learning_rate": 1.9872501454115677e-05, + "loss": 1.5361, + "step": 2703 + }, + { + "epoch": 0.07939397498385108, + "grad_norm": 0.0, + "learning_rate": 1.9872350038957186e-05, + "loss": 1.4639, + "step": 2704 + }, + { + "epoch": 0.07942333666099008, + "grad_norm": 0.0, + "learning_rate": 1.9872198534520504e-05, + "loss": 1.5068, + "step": 2705 + }, + { + "epoch": 0.07945269833812907, + "grad_norm": 0.0, + "learning_rate": 1.9872046940807004e-05, + "loss": 1.541, + "step": 2706 + }, + { + "epoch": 0.07948206001526807, + "grad_norm": 0.0, + "learning_rate": 1.9871895257818054e-05, + "loss": 1.5615, + "step": 2707 + }, + { + "epoch": 0.07951142169240707, + "grad_norm": 0.0, + "learning_rate": 1.9871743485555022e-05, + "loss": 1.4033, + "step": 2708 + }, + { + "epoch": 0.07954078336954606, + "grad_norm": 0.0, + "learning_rate": 1.9871591624019293e-05, + "loss": 1.4941, + "step": 2709 + }, + { + "epoch": 0.07957014504668507, + "grad_norm": 0.0, + "learning_rate": 1.987143967321223e-05, + "loss": 1.4258, + "step": 2710 + }, + { + "epoch": 0.07959950672382407, + "grad_norm": 0.0, + "learning_rate": 1.987128763313521e-05, + "loss": 1.4424, + "step": 2711 + }, + { + "epoch": 0.07962886840096306, + "grad_norm": 0.0, + "learning_rate": 1.9871135503789606e-05, + "loss": 1.3447, + "step": 2712 + }, + { + "epoch": 0.07965823007810206, + "grad_norm": 0.0, + "learning_rate": 1.98709832851768e-05, + "loss": 1.5029, + "step": 2713 + }, + { + "epoch": 0.07968759175524107, + "grad_norm": 0.0, + "learning_rate": 1.9870830977298163e-05, + "loss": 1.4385, + "step": 2714 + }, + { + "epoch": 0.07971695343238006, + "grad_norm": 0.0, + "learning_rate": 1.987067858015507e-05, + "loss": 1.4131, + "step": 2715 + }, + { + "epoch": 0.07974631510951906, + "grad_norm": 0.0, + "learning_rate": 1.9870526093748902e-05, + "loss": 1.4531, + "step": 2716 + }, + { + "epoch": 0.07977567678665805, + "grad_norm": 0.0, + "learning_rate": 1.987037351808104e-05, + "loss": 1.5342, + "step": 2717 + }, + { + "epoch": 0.07980503846379705, + "grad_norm": 0.0, + "learning_rate": 1.9870220853152865e-05, + "loss": 1.6182, + "step": 2718 + }, + { + "epoch": 0.07983440014093605, + "grad_norm": 0.0, + "learning_rate": 1.9870068098965755e-05, + "loss": 1.4971, + "step": 2719 + }, + { + "epoch": 0.07986376181807504, + "grad_norm": 0.0, + "learning_rate": 1.9869915255521087e-05, + "loss": 1.5039, + "step": 2720 + }, + { + "epoch": 0.07989312349521405, + "grad_norm": 0.0, + "learning_rate": 1.986976232282025e-05, + "loss": 1.5312, + "step": 2721 + }, + { + "epoch": 0.07992248517235305, + "grad_norm": 0.0, + "learning_rate": 1.986960930086463e-05, + "loss": 1.5957, + "step": 2722 + }, + { + "epoch": 0.07995184684949204, + "grad_norm": 0.0, + "learning_rate": 1.98694561896556e-05, + "loss": 1.4902, + "step": 2723 + }, + { + "epoch": 0.07998120852663104, + "grad_norm": 0.0, + "learning_rate": 1.986930298919455e-05, + "loss": 1.5596, + "step": 2724 + }, + { + "epoch": 0.08001057020377005, + "grad_norm": 0.0, + "learning_rate": 1.9869149699482867e-05, + "loss": 1.5166, + "step": 2725 + }, + { + "epoch": 0.08003993188090903, + "grad_norm": 0.0, + "learning_rate": 1.9868996320521934e-05, + "loss": 1.4482, + "step": 2726 + }, + { + "epoch": 0.08006929355804804, + "grad_norm": 0.0, + "learning_rate": 1.986884285231314e-05, + "loss": 1.4639, + "step": 2727 + }, + { + "epoch": 0.08009865523518703, + "grad_norm": 0.0, + "learning_rate": 1.9868689294857876e-05, + "loss": 1.5615, + "step": 2728 + }, + { + "epoch": 0.08012801691232603, + "grad_norm": 0.0, + "learning_rate": 1.9868535648157523e-05, + "loss": 1.4346, + "step": 2729 + }, + { + "epoch": 0.08015737858946503, + "grad_norm": 0.0, + "learning_rate": 1.9868381912213477e-05, + "loss": 1.4512, + "step": 2730 + }, + { + "epoch": 0.08018674026660402, + "grad_norm": 0.0, + "learning_rate": 1.9868228087027127e-05, + "loss": 1.5752, + "step": 2731 + }, + { + "epoch": 0.08021610194374303, + "grad_norm": 0.0, + "learning_rate": 1.986807417259986e-05, + "loss": 1.4551, + "step": 2732 + }, + { + "epoch": 0.08024546362088203, + "grad_norm": 0.0, + "learning_rate": 1.9867920168933072e-05, + "loss": 1.4316, + "step": 2733 + }, + { + "epoch": 0.08027482529802102, + "grad_norm": 0.0, + "learning_rate": 1.9867766076028156e-05, + "loss": 1.4043, + "step": 2734 + }, + { + "epoch": 0.08030418697516002, + "grad_norm": 0.0, + "learning_rate": 1.9867611893886503e-05, + "loss": 1.5352, + "step": 2735 + }, + { + "epoch": 0.08033354865229902, + "grad_norm": 0.0, + "learning_rate": 1.986745762250951e-05, + "loss": 1.4863, + "step": 2736 + }, + { + "epoch": 0.08036291032943801, + "grad_norm": 0.0, + "learning_rate": 1.986730326189857e-05, + "loss": 1.5625, + "step": 2737 + }, + { + "epoch": 0.08039227200657702, + "grad_norm": 0.0, + "learning_rate": 1.986714881205508e-05, + "loss": 1.457, + "step": 2738 + }, + { + "epoch": 0.08042163368371602, + "grad_norm": 0.0, + "learning_rate": 1.9866994272980435e-05, + "loss": 1.3574, + "step": 2739 + }, + { + "epoch": 0.08045099536085501, + "grad_norm": 0.0, + "learning_rate": 1.9866839644676035e-05, + "loss": 1.5469, + "step": 2740 + }, + { + "epoch": 0.08048035703799401, + "grad_norm": 0.0, + "learning_rate": 1.9866684927143277e-05, + "loss": 1.4629, + "step": 2741 + }, + { + "epoch": 0.080509718715133, + "grad_norm": 0.0, + "learning_rate": 1.9866530120383557e-05, + "loss": 1.4697, + "step": 2742 + }, + { + "epoch": 0.080539080392272, + "grad_norm": 0.0, + "learning_rate": 1.986637522439828e-05, + "loss": 1.4336, + "step": 2743 + }, + { + "epoch": 0.08056844206941101, + "grad_norm": 0.0, + "learning_rate": 1.9866220239188848e-05, + "loss": 1.5293, + "step": 2744 + }, + { + "epoch": 0.08059780374655, + "grad_norm": 0.0, + "learning_rate": 1.9866065164756657e-05, + "loss": 1.4453, + "step": 2745 + }, + { + "epoch": 0.080627165423689, + "grad_norm": 0.0, + "learning_rate": 1.986591000110311e-05, + "loss": 1.4844, + "step": 2746 + }, + { + "epoch": 0.080656527100828, + "grad_norm": 0.0, + "learning_rate": 1.9865754748229616e-05, + "loss": 1.4951, + "step": 2747 + }, + { + "epoch": 0.080685888777967, + "grad_norm": 0.0, + "learning_rate": 1.9865599406137576e-05, + "loss": 1.2178, + "step": 2748 + }, + { + "epoch": 0.080715250455106, + "grad_norm": 0.0, + "learning_rate": 1.986544397482839e-05, + "loss": 1.5107, + "step": 2749 + }, + { + "epoch": 0.080744612132245, + "grad_norm": 0.0, + "learning_rate": 1.986528845430347e-05, + "loss": 1.624, + "step": 2750 + }, + { + "epoch": 0.08077397380938399, + "grad_norm": 0.0, + "learning_rate": 1.986513284456422e-05, + "loss": 1.4912, + "step": 2751 + }, + { + "epoch": 0.08080333548652299, + "grad_norm": 0.0, + "learning_rate": 1.986497714561205e-05, + "loss": 1.4707, + "step": 2752 + }, + { + "epoch": 0.08083269716366198, + "grad_norm": 0.0, + "learning_rate": 1.9864821357448362e-05, + "loss": 1.5938, + "step": 2753 + }, + { + "epoch": 0.08086205884080098, + "grad_norm": 0.0, + "learning_rate": 1.9864665480074565e-05, + "loss": 1.4795, + "step": 2754 + }, + { + "epoch": 0.08089142051793999, + "grad_norm": 0.0, + "learning_rate": 1.9864509513492075e-05, + "loss": 1.4609, + "step": 2755 + }, + { + "epoch": 0.08092078219507898, + "grad_norm": 0.0, + "learning_rate": 1.98643534577023e-05, + "loss": 1.4648, + "step": 2756 + }, + { + "epoch": 0.08095014387221798, + "grad_norm": 0.0, + "learning_rate": 1.9864197312706648e-05, + "loss": 1.5166, + "step": 2757 + }, + { + "epoch": 0.08097950554935698, + "grad_norm": 0.0, + "learning_rate": 1.9864041078506534e-05, + "loss": 1.3594, + "step": 2758 + }, + { + "epoch": 0.08100886722649597, + "grad_norm": 0.0, + "learning_rate": 1.986388475510337e-05, + "loss": 1.7471, + "step": 2759 + }, + { + "epoch": 0.08103822890363498, + "grad_norm": 0.0, + "learning_rate": 1.9863728342498573e-05, + "loss": 1.583, + "step": 2760 + }, + { + "epoch": 0.08106759058077398, + "grad_norm": 0.0, + "learning_rate": 1.986357184069355e-05, + "loss": 1.4951, + "step": 2761 + }, + { + "epoch": 0.08109695225791297, + "grad_norm": 0.0, + "learning_rate": 1.9863415249689722e-05, + "loss": 1.4219, + "step": 2762 + }, + { + "epoch": 0.08112631393505197, + "grad_norm": 0.0, + "learning_rate": 1.986325856948851e-05, + "loss": 1.4355, + "step": 2763 + }, + { + "epoch": 0.08115567561219098, + "grad_norm": 0.0, + "learning_rate": 1.986310180009132e-05, + "loss": 1.5918, + "step": 2764 + }, + { + "epoch": 0.08118503728932996, + "grad_norm": 0.0, + "learning_rate": 1.986294494149957e-05, + "loss": 1.4922, + "step": 2765 + }, + { + "epoch": 0.08121439896646897, + "grad_norm": 0.0, + "learning_rate": 1.986278799371469e-05, + "loss": 1.6162, + "step": 2766 + }, + { + "epoch": 0.08124376064360796, + "grad_norm": 0.0, + "learning_rate": 1.9862630956738087e-05, + "loss": 1.3506, + "step": 2767 + }, + { + "epoch": 0.08127312232074696, + "grad_norm": 0.0, + "learning_rate": 1.986247383057119e-05, + "loss": 1.502, + "step": 2768 + }, + { + "epoch": 0.08130248399788596, + "grad_norm": 0.0, + "learning_rate": 1.9862316615215413e-05, + "loss": 1.4912, + "step": 2769 + }, + { + "epoch": 0.08133184567502495, + "grad_norm": 0.0, + "learning_rate": 1.9862159310672185e-05, + "loss": 1.4834, + "step": 2770 + }, + { + "epoch": 0.08136120735216396, + "grad_norm": 0.0, + "learning_rate": 1.9862001916942923e-05, + "loss": 1.4648, + "step": 2771 + }, + { + "epoch": 0.08139056902930296, + "grad_norm": 0.0, + "learning_rate": 1.986184443402905e-05, + "loss": 1.6318, + "step": 2772 + }, + { + "epoch": 0.08141993070644195, + "grad_norm": 0.0, + "learning_rate": 1.9861686861931994e-05, + "loss": 1.5684, + "step": 2773 + }, + { + "epoch": 0.08144929238358095, + "grad_norm": 0.0, + "learning_rate": 1.986152920065318e-05, + "loss": 1.5127, + "step": 2774 + }, + { + "epoch": 0.08147865406071995, + "grad_norm": 0.0, + "learning_rate": 1.986137145019403e-05, + "loss": 1.4902, + "step": 2775 + }, + { + "epoch": 0.08150801573785894, + "grad_norm": 0.0, + "learning_rate": 1.9861213610555973e-05, + "loss": 1.4004, + "step": 2776 + }, + { + "epoch": 0.08153737741499795, + "grad_norm": 0.0, + "learning_rate": 1.9861055681740433e-05, + "loss": 1.5166, + "step": 2777 + }, + { + "epoch": 0.08156673909213694, + "grad_norm": 0.0, + "learning_rate": 1.9860897663748844e-05, + "loss": 1.3984, + "step": 2778 + }, + { + "epoch": 0.08159610076927594, + "grad_norm": 0.0, + "learning_rate": 1.986073955658263e-05, + "loss": 1.5449, + "step": 2779 + }, + { + "epoch": 0.08162546244641494, + "grad_norm": 0.0, + "learning_rate": 1.9860581360243227e-05, + "loss": 1.54, + "step": 2780 + }, + { + "epoch": 0.08165482412355393, + "grad_norm": 0.0, + "learning_rate": 1.9860423074732057e-05, + "loss": 1.4619, + "step": 2781 + }, + { + "epoch": 0.08168418580069293, + "grad_norm": 0.0, + "learning_rate": 1.9860264700050557e-05, + "loss": 1.502, + "step": 2782 + }, + { + "epoch": 0.08171354747783194, + "grad_norm": 0.0, + "learning_rate": 1.9860106236200155e-05, + "loss": 1.583, + "step": 2783 + }, + { + "epoch": 0.08174290915497093, + "grad_norm": 0.0, + "learning_rate": 1.9859947683182292e-05, + "loss": 1.4502, + "step": 2784 + }, + { + "epoch": 0.08177227083210993, + "grad_norm": 0.0, + "learning_rate": 1.9859789040998396e-05, + "loss": 1.4912, + "step": 2785 + }, + { + "epoch": 0.08180163250924893, + "grad_norm": 0.0, + "learning_rate": 1.9859630309649903e-05, + "loss": 1.4736, + "step": 2786 + }, + { + "epoch": 0.08183099418638792, + "grad_norm": 0.0, + "learning_rate": 1.9859471489138243e-05, + "loss": 1.4873, + "step": 2787 + }, + { + "epoch": 0.08186035586352693, + "grad_norm": 0.0, + "learning_rate": 1.9859312579464866e-05, + "loss": 1.5195, + "step": 2788 + }, + { + "epoch": 0.08188971754066593, + "grad_norm": 0.0, + "learning_rate": 1.985915358063119e-05, + "loss": 1.6133, + "step": 2789 + }, + { + "epoch": 0.08191907921780492, + "grad_norm": 0.0, + "learning_rate": 1.9858994492638673e-05, + "loss": 1.501, + "step": 2790 + }, + { + "epoch": 0.08194844089494392, + "grad_norm": 0.0, + "learning_rate": 1.9858835315488737e-05, + "loss": 1.3691, + "step": 2791 + }, + { + "epoch": 0.08197780257208291, + "grad_norm": 0.0, + "learning_rate": 1.9858676049182834e-05, + "loss": 1.3428, + "step": 2792 + }, + { + "epoch": 0.08200716424922191, + "grad_norm": 0.0, + "learning_rate": 1.9858516693722393e-05, + "loss": 1.5869, + "step": 2793 + }, + { + "epoch": 0.08203652592636092, + "grad_norm": 0.0, + "learning_rate": 1.9858357249108865e-05, + "loss": 1.582, + "step": 2794 + }, + { + "epoch": 0.0820658876034999, + "grad_norm": 0.0, + "learning_rate": 1.9858197715343685e-05, + "loss": 1.5225, + "step": 2795 + }, + { + "epoch": 0.08209524928063891, + "grad_norm": 0.0, + "learning_rate": 1.98580380924283e-05, + "loss": 1.5244, + "step": 2796 + }, + { + "epoch": 0.08212461095777791, + "grad_norm": 0.0, + "learning_rate": 1.985787838036415e-05, + "loss": 1.5244, + "step": 2797 + }, + { + "epoch": 0.0821539726349169, + "grad_norm": 0.0, + "learning_rate": 1.9857718579152683e-05, + "loss": 1.4473, + "step": 2798 + }, + { + "epoch": 0.0821833343120559, + "grad_norm": 0.0, + "learning_rate": 1.985755868879534e-05, + "loss": 1.3555, + "step": 2799 + }, + { + "epoch": 0.08221269598919491, + "grad_norm": 0.0, + "learning_rate": 1.9857398709293574e-05, + "loss": 1.3623, + "step": 2800 + }, + { + "epoch": 0.0822420576663339, + "grad_norm": 0.0, + "learning_rate": 1.985723864064882e-05, + "loss": 1.4521, + "step": 2801 + }, + { + "epoch": 0.0822714193434729, + "grad_norm": 0.0, + "learning_rate": 1.985707848286254e-05, + "loss": 1.4912, + "step": 2802 + }, + { + "epoch": 0.08230078102061189, + "grad_norm": 0.0, + "learning_rate": 1.9856918235936168e-05, + "loss": 1.4072, + "step": 2803 + }, + { + "epoch": 0.0823301426977509, + "grad_norm": 0.0, + "learning_rate": 1.9856757899871162e-05, + "loss": 1.5312, + "step": 2804 + }, + { + "epoch": 0.0823595043748899, + "grad_norm": 0.0, + "learning_rate": 1.9856597474668974e-05, + "loss": 1.5771, + "step": 2805 + }, + { + "epoch": 0.08238886605202889, + "grad_norm": 0.0, + "learning_rate": 1.9856436960331048e-05, + "loss": 1.5498, + "step": 2806 + }, + { + "epoch": 0.08241822772916789, + "grad_norm": 0.0, + "learning_rate": 1.9856276356858836e-05, + "loss": 1.5352, + "step": 2807 + }, + { + "epoch": 0.08244758940630689, + "grad_norm": 0.0, + "learning_rate": 1.9856115664253797e-05, + "loss": 1.3867, + "step": 2808 + }, + { + "epoch": 0.08247695108344588, + "grad_norm": 0.0, + "learning_rate": 1.9855954882517375e-05, + "loss": 1.5762, + "step": 2809 + }, + { + "epoch": 0.08250631276058489, + "grad_norm": 0.0, + "learning_rate": 1.9855794011651034e-05, + "loss": 1.4775, + "step": 2810 + }, + { + "epoch": 0.08253567443772389, + "grad_norm": 0.0, + "learning_rate": 1.985563305165622e-05, + "loss": 1.4453, + "step": 2811 + }, + { + "epoch": 0.08256503611486288, + "grad_norm": 0.0, + "learning_rate": 1.9855472002534394e-05, + "loss": 1.4395, + "step": 2812 + }, + { + "epoch": 0.08259439779200188, + "grad_norm": 0.0, + "learning_rate": 1.985531086428701e-05, + "loss": 1.4893, + "step": 2813 + }, + { + "epoch": 0.08262375946914088, + "grad_norm": 0.0, + "learning_rate": 1.9855149636915525e-05, + "loss": 1.542, + "step": 2814 + }, + { + "epoch": 0.08265312114627987, + "grad_norm": 0.0, + "learning_rate": 1.98549883204214e-05, + "loss": 1.499, + "step": 2815 + }, + { + "epoch": 0.08268248282341888, + "grad_norm": 0.0, + "learning_rate": 1.985482691480609e-05, + "loss": 1.4326, + "step": 2816 + }, + { + "epoch": 0.08271184450055787, + "grad_norm": 0.0, + "learning_rate": 1.9854665420071058e-05, + "loss": 1.377, + "step": 2817 + }, + { + "epoch": 0.08274120617769687, + "grad_norm": 0.0, + "learning_rate": 1.985450383621776e-05, + "loss": 1.5127, + "step": 2818 + }, + { + "epoch": 0.08277056785483587, + "grad_norm": 0.0, + "learning_rate": 1.9854342163247663e-05, + "loss": 1.5117, + "step": 2819 + }, + { + "epoch": 0.08279992953197486, + "grad_norm": 0.0, + "learning_rate": 1.9854180401162223e-05, + "loss": 1.4912, + "step": 2820 + }, + { + "epoch": 0.08282929120911386, + "grad_norm": 0.0, + "learning_rate": 1.9854018549962906e-05, + "loss": 1.5215, + "step": 2821 + }, + { + "epoch": 0.08285865288625287, + "grad_norm": 0.0, + "learning_rate": 1.9853856609651177e-05, + "loss": 1.3525, + "step": 2822 + }, + { + "epoch": 0.08288801456339186, + "grad_norm": 0.0, + "learning_rate": 1.9853694580228502e-05, + "loss": 1.4268, + "step": 2823 + }, + { + "epoch": 0.08291737624053086, + "grad_norm": 0.0, + "learning_rate": 1.985353246169634e-05, + "loss": 1.5996, + "step": 2824 + }, + { + "epoch": 0.08294673791766986, + "grad_norm": 0.0, + "learning_rate": 1.9853370254056162e-05, + "loss": 1.5312, + "step": 2825 + }, + { + "epoch": 0.08297609959480885, + "grad_norm": 0.0, + "learning_rate": 1.985320795730943e-05, + "loss": 1.5244, + "step": 2826 + }, + { + "epoch": 0.08300546127194786, + "grad_norm": 0.0, + "learning_rate": 1.985304557145762e-05, + "loss": 1.5459, + "step": 2827 + }, + { + "epoch": 0.08303482294908685, + "grad_norm": 0.0, + "learning_rate": 1.985288309650219e-05, + "loss": 1.5967, + "step": 2828 + }, + { + "epoch": 0.08306418462622585, + "grad_norm": 0.0, + "learning_rate": 1.985272053244462e-05, + "loss": 1.6055, + "step": 2829 + }, + { + "epoch": 0.08309354630336485, + "grad_norm": 0.0, + "learning_rate": 1.9852557879286372e-05, + "loss": 1.5225, + "step": 2830 + }, + { + "epoch": 0.08312290798050384, + "grad_norm": 0.0, + "learning_rate": 1.985239513702892e-05, + "loss": 1.459, + "step": 2831 + }, + { + "epoch": 0.08315226965764284, + "grad_norm": 0.0, + "learning_rate": 1.9852232305673737e-05, + "loss": 1.4375, + "step": 2832 + }, + { + "epoch": 0.08318163133478185, + "grad_norm": 0.0, + "learning_rate": 1.985206938522229e-05, + "loss": 1.5645, + "step": 2833 + }, + { + "epoch": 0.08321099301192084, + "grad_norm": 0.0, + "learning_rate": 1.985190637567606e-05, + "loss": 1.5732, + "step": 2834 + }, + { + "epoch": 0.08324035468905984, + "grad_norm": 0.0, + "learning_rate": 1.985174327703652e-05, + "loss": 1.4272, + "step": 2835 + }, + { + "epoch": 0.08326971636619884, + "grad_norm": 0.0, + "learning_rate": 1.9851580089305137e-05, + "loss": 1.5635, + "step": 2836 + }, + { + "epoch": 0.08329907804333783, + "grad_norm": 0.0, + "learning_rate": 1.9851416812483396e-05, + "loss": 1.6465, + "step": 2837 + }, + { + "epoch": 0.08332843972047684, + "grad_norm": 0.0, + "learning_rate": 1.9851253446572767e-05, + "loss": 1.4922, + "step": 2838 + }, + { + "epoch": 0.08335780139761584, + "grad_norm": 0.0, + "learning_rate": 1.9851089991574732e-05, + "loss": 1.3975, + "step": 2839 + }, + { + "epoch": 0.08338716307475483, + "grad_norm": 0.0, + "learning_rate": 1.9850926447490767e-05, + "loss": 1.5107, + "step": 2840 + }, + { + "epoch": 0.08341652475189383, + "grad_norm": 0.0, + "learning_rate": 1.9850762814322353e-05, + "loss": 1.4854, + "step": 2841 + }, + { + "epoch": 0.08344588642903282, + "grad_norm": 0.0, + "learning_rate": 1.9850599092070967e-05, + "loss": 1.5127, + "step": 2842 + }, + { + "epoch": 0.08347524810617182, + "grad_norm": 0.0, + "learning_rate": 1.985043528073809e-05, + "loss": 1.4902, + "step": 2843 + }, + { + "epoch": 0.08350460978331083, + "grad_norm": 0.0, + "learning_rate": 1.9850271380325202e-05, + "loss": 1.5127, + "step": 2844 + }, + { + "epoch": 0.08353397146044982, + "grad_norm": 0.0, + "learning_rate": 1.985010739083379e-05, + "loss": 1.4775, + "step": 2845 + }, + { + "epoch": 0.08356333313758882, + "grad_norm": 0.0, + "learning_rate": 1.9849943312265332e-05, + "loss": 1.3687, + "step": 2846 + }, + { + "epoch": 0.08359269481472782, + "grad_norm": 0.0, + "learning_rate": 1.9849779144621318e-05, + "loss": 1.4375, + "step": 2847 + }, + { + "epoch": 0.08362205649186681, + "grad_norm": 0.0, + "learning_rate": 1.9849614887903224e-05, + "loss": 1.4922, + "step": 2848 + }, + { + "epoch": 0.08365141816900581, + "grad_norm": 0.0, + "learning_rate": 1.984945054211254e-05, + "loss": 1.4102, + "step": 2849 + }, + { + "epoch": 0.08368077984614482, + "grad_norm": 0.0, + "learning_rate": 1.9849286107250754e-05, + "loss": 1.4775, + "step": 2850 + }, + { + "epoch": 0.08371014152328381, + "grad_norm": 0.0, + "learning_rate": 1.9849121583319353e-05, + "loss": 1.4482, + "step": 2851 + }, + { + "epoch": 0.08373950320042281, + "grad_norm": 0.0, + "learning_rate": 1.984895697031982e-05, + "loss": 1.6162, + "step": 2852 + }, + { + "epoch": 0.0837688648775618, + "grad_norm": 0.0, + "learning_rate": 1.984879226825365e-05, + "loss": 1.5342, + "step": 2853 + }, + { + "epoch": 0.0837982265547008, + "grad_norm": 0.0, + "learning_rate": 1.9848627477122328e-05, + "loss": 1.4766, + "step": 2854 + }, + { + "epoch": 0.0838275882318398, + "grad_norm": 0.0, + "learning_rate": 1.9848462596927344e-05, + "loss": 1.6152, + "step": 2855 + }, + { + "epoch": 0.0838569499089788, + "grad_norm": 0.0, + "learning_rate": 1.9848297627670194e-05, + "loss": 1.4644, + "step": 2856 + }, + { + "epoch": 0.0838863115861178, + "grad_norm": 0.0, + "learning_rate": 1.9848132569352364e-05, + "loss": 1.5049, + "step": 2857 + }, + { + "epoch": 0.0839156732632568, + "grad_norm": 0.0, + "learning_rate": 1.9847967421975348e-05, + "loss": 1.4805, + "step": 2858 + }, + { + "epoch": 0.08394503494039579, + "grad_norm": 0.0, + "learning_rate": 1.984780218554064e-05, + "loss": 1.4043, + "step": 2859 + }, + { + "epoch": 0.0839743966175348, + "grad_norm": 0.0, + "learning_rate": 1.9847636860049738e-05, + "loss": 1.6309, + "step": 2860 + }, + { + "epoch": 0.0840037582946738, + "grad_norm": 0.0, + "learning_rate": 1.984747144550413e-05, + "loss": 1.54, + "step": 2861 + }, + { + "epoch": 0.08403311997181279, + "grad_norm": 0.0, + "learning_rate": 1.984730594190532e-05, + "loss": 1.4824, + "step": 2862 + }, + { + "epoch": 0.08406248164895179, + "grad_norm": 0.0, + "learning_rate": 1.98471403492548e-05, + "loss": 1.4609, + "step": 2863 + }, + { + "epoch": 0.08409184332609079, + "grad_norm": 0.0, + "learning_rate": 1.9846974667554066e-05, + "loss": 1.5176, + "step": 2864 + }, + { + "epoch": 0.08412120500322978, + "grad_norm": 0.0, + "learning_rate": 1.9846808896804616e-05, + "loss": 1.4834, + "step": 2865 + }, + { + "epoch": 0.08415056668036879, + "grad_norm": 0.0, + "learning_rate": 1.9846643037007952e-05, + "loss": 1.4873, + "step": 2866 + }, + { + "epoch": 0.08417992835750777, + "grad_norm": 0.0, + "learning_rate": 1.9846477088165577e-05, + "loss": 1.4902, + "step": 2867 + }, + { + "epoch": 0.08420929003464678, + "grad_norm": 0.0, + "learning_rate": 1.9846311050278985e-05, + "loss": 1.4326, + "step": 2868 + }, + { + "epoch": 0.08423865171178578, + "grad_norm": 0.0, + "learning_rate": 1.984614492334968e-05, + "loss": 1.6377, + "step": 2869 + }, + { + "epoch": 0.08426801338892477, + "grad_norm": 0.0, + "learning_rate": 1.984597870737917e-05, + "loss": 1.5762, + "step": 2870 + }, + { + "epoch": 0.08429737506606377, + "grad_norm": 0.0, + "learning_rate": 1.984581240236895e-05, + "loss": 1.4795, + "step": 2871 + }, + { + "epoch": 0.08432673674320278, + "grad_norm": 0.0, + "learning_rate": 1.9845646008320524e-05, + "loss": 1.4912, + "step": 2872 + }, + { + "epoch": 0.08435609842034177, + "grad_norm": 0.0, + "learning_rate": 1.9845479525235404e-05, + "loss": 1.5371, + "step": 2873 + }, + { + "epoch": 0.08438546009748077, + "grad_norm": 0.0, + "learning_rate": 1.9845312953115087e-05, + "loss": 1.5303, + "step": 2874 + }, + { + "epoch": 0.08441482177461977, + "grad_norm": 0.0, + "learning_rate": 1.9845146291961086e-05, + "loss": 1.5957, + "step": 2875 + }, + { + "epoch": 0.08444418345175876, + "grad_norm": 0.0, + "learning_rate": 1.9844979541774904e-05, + "loss": 1.4385, + "step": 2876 + }, + { + "epoch": 0.08447354512889776, + "grad_norm": 0.0, + "learning_rate": 1.984481270255805e-05, + "loss": 1.5752, + "step": 2877 + }, + { + "epoch": 0.08450290680603677, + "grad_norm": 0.0, + "learning_rate": 1.9844645774312038e-05, + "loss": 1.3545, + "step": 2878 + }, + { + "epoch": 0.08453226848317576, + "grad_norm": 0.0, + "learning_rate": 1.984447875703837e-05, + "loss": 1.4746, + "step": 2879 + }, + { + "epoch": 0.08456163016031476, + "grad_norm": 0.0, + "learning_rate": 1.984431165073856e-05, + "loss": 1.4883, + "step": 2880 + }, + { + "epoch": 0.08459099183745375, + "grad_norm": 0.0, + "learning_rate": 1.984414445541412e-05, + "loss": 1.4551, + "step": 2881 + }, + { + "epoch": 0.08462035351459275, + "grad_norm": 0.0, + "learning_rate": 1.9843977171066556e-05, + "loss": 1.4492, + "step": 2882 + }, + { + "epoch": 0.08464971519173176, + "grad_norm": 0.0, + "learning_rate": 1.984380979769739e-05, + "loss": 1.5537, + "step": 2883 + }, + { + "epoch": 0.08467907686887075, + "grad_norm": 0.0, + "learning_rate": 1.984364233530813e-05, + "loss": 1.6299, + "step": 2884 + }, + { + "epoch": 0.08470843854600975, + "grad_norm": 0.0, + "learning_rate": 1.9843474783900288e-05, + "loss": 1.4648, + "step": 2885 + }, + { + "epoch": 0.08473780022314875, + "grad_norm": 0.0, + "learning_rate": 1.9843307143475387e-05, + "loss": 1.5215, + "step": 2886 + }, + { + "epoch": 0.08476716190028774, + "grad_norm": 0.0, + "learning_rate": 1.984313941403494e-05, + "loss": 1.4355, + "step": 2887 + }, + { + "epoch": 0.08479652357742674, + "grad_norm": 0.0, + "learning_rate": 1.9842971595580456e-05, + "loss": 1.4717, + "step": 2888 + }, + { + "epoch": 0.08482588525456575, + "grad_norm": 0.0, + "learning_rate": 1.984280368811346e-05, + "loss": 1.5576, + "step": 2889 + }, + { + "epoch": 0.08485524693170474, + "grad_norm": 0.0, + "learning_rate": 1.9842635691635474e-05, + "loss": 1.4199, + "step": 2890 + }, + { + "epoch": 0.08488460860884374, + "grad_norm": 0.0, + "learning_rate": 1.984246760614801e-05, + "loss": 1.3818, + "step": 2891 + }, + { + "epoch": 0.08491397028598273, + "grad_norm": 0.0, + "learning_rate": 1.9842299431652593e-05, + "loss": 1.5586, + "step": 2892 + }, + { + "epoch": 0.08494333196312173, + "grad_norm": 0.0, + "learning_rate": 1.9842131168150737e-05, + "loss": 1.3369, + "step": 2893 + }, + { + "epoch": 0.08497269364026074, + "grad_norm": 0.0, + "learning_rate": 1.984196281564397e-05, + "loss": 1.46, + "step": 2894 + }, + { + "epoch": 0.08500205531739972, + "grad_norm": 0.0, + "learning_rate": 1.9841794374133816e-05, + "loss": 1.4795, + "step": 2895 + }, + { + "epoch": 0.08503141699453873, + "grad_norm": 0.0, + "learning_rate": 1.984162584362179e-05, + "loss": 1.5186, + "step": 2896 + }, + { + "epoch": 0.08506077867167773, + "grad_norm": 0.0, + "learning_rate": 1.9841457224109426e-05, + "loss": 1.5195, + "step": 2897 + }, + { + "epoch": 0.08509014034881672, + "grad_norm": 0.0, + "learning_rate": 1.984128851559824e-05, + "loss": 1.5342, + "step": 2898 + }, + { + "epoch": 0.08511950202595572, + "grad_norm": 0.0, + "learning_rate": 1.9841119718089766e-05, + "loss": 1.3682, + "step": 2899 + }, + { + "epoch": 0.08514886370309473, + "grad_norm": 0.0, + "learning_rate": 1.984095083158552e-05, + "loss": 1.4014, + "step": 2900 + }, + { + "epoch": 0.08517822538023372, + "grad_norm": 0.0, + "learning_rate": 1.9840781856087038e-05, + "loss": 1.501, + "step": 2901 + }, + { + "epoch": 0.08520758705737272, + "grad_norm": 0.0, + "learning_rate": 1.9840612791595848e-05, + "loss": 1.5703, + "step": 2902 + }, + { + "epoch": 0.08523694873451172, + "grad_norm": 0.0, + "learning_rate": 1.9840443638113472e-05, + "loss": 1.4463, + "step": 2903 + }, + { + "epoch": 0.08526631041165071, + "grad_norm": 0.0, + "learning_rate": 1.984027439564145e-05, + "loss": 1.5127, + "step": 2904 + }, + { + "epoch": 0.08529567208878971, + "grad_norm": 0.0, + "learning_rate": 1.98401050641813e-05, + "loss": 1.5049, + "step": 2905 + }, + { + "epoch": 0.0853250337659287, + "grad_norm": 0.0, + "learning_rate": 1.9839935643734566e-05, + "loss": 1.4912, + "step": 2906 + }, + { + "epoch": 0.08535439544306771, + "grad_norm": 0.0, + "learning_rate": 1.9839766134302766e-05, + "loss": 1.4785, + "step": 2907 + }, + { + "epoch": 0.08538375712020671, + "grad_norm": 0.0, + "learning_rate": 1.9839596535887447e-05, + "loss": 1.7354, + "step": 2908 + }, + { + "epoch": 0.0854131187973457, + "grad_norm": 0.0, + "learning_rate": 1.9839426848490135e-05, + "loss": 1.5391, + "step": 2909 + }, + { + "epoch": 0.0854424804744847, + "grad_norm": 0.0, + "learning_rate": 1.9839257072112364e-05, + "loss": 1.4473, + "step": 2910 + }, + { + "epoch": 0.0854718421516237, + "grad_norm": 0.0, + "learning_rate": 1.9839087206755673e-05, + "loss": 1.4707, + "step": 2911 + }, + { + "epoch": 0.0855012038287627, + "grad_norm": 0.0, + "learning_rate": 1.98389172524216e-05, + "loss": 1.5947, + "step": 2912 + }, + { + "epoch": 0.0855305655059017, + "grad_norm": 0.0, + "learning_rate": 1.9838747209111674e-05, + "loss": 1.5781, + "step": 2913 + }, + { + "epoch": 0.0855599271830407, + "grad_norm": 0.0, + "learning_rate": 1.983857707682744e-05, + "loss": 1.4277, + "step": 2914 + }, + { + "epoch": 0.08558928886017969, + "grad_norm": 0.0, + "learning_rate": 1.9838406855570432e-05, + "loss": 1.4424, + "step": 2915 + }, + { + "epoch": 0.0856186505373187, + "grad_norm": 0.0, + "learning_rate": 1.983823654534219e-05, + "loss": 1.4443, + "step": 2916 + }, + { + "epoch": 0.08564801221445768, + "grad_norm": 0.0, + "learning_rate": 1.9838066146144255e-05, + "loss": 1.4912, + "step": 2917 + }, + { + "epoch": 0.08567737389159669, + "grad_norm": 0.0, + "learning_rate": 1.9837895657978173e-05, + "loss": 1.4287, + "step": 2918 + }, + { + "epoch": 0.08570673556873569, + "grad_norm": 0.0, + "learning_rate": 1.983772508084548e-05, + "loss": 1.4683, + "step": 2919 + }, + { + "epoch": 0.08573609724587468, + "grad_norm": 0.0, + "learning_rate": 1.9837554414747716e-05, + "loss": 1.5576, + "step": 2920 + }, + { + "epoch": 0.08576545892301368, + "grad_norm": 0.0, + "learning_rate": 1.983738365968643e-05, + "loss": 1.3652, + "step": 2921 + }, + { + "epoch": 0.08579482060015269, + "grad_norm": 0.0, + "learning_rate": 1.9837212815663163e-05, + "loss": 1.4932, + "step": 2922 + }, + { + "epoch": 0.08582418227729167, + "grad_norm": 0.0, + "learning_rate": 1.9837041882679464e-05, + "loss": 1.5527, + "step": 2923 + }, + { + "epoch": 0.08585354395443068, + "grad_norm": 0.0, + "learning_rate": 1.9836870860736874e-05, + "loss": 1.5596, + "step": 2924 + }, + { + "epoch": 0.08588290563156968, + "grad_norm": 0.0, + "learning_rate": 1.9836699749836943e-05, + "loss": 1.397, + "step": 2925 + }, + { + "epoch": 0.08591226730870867, + "grad_norm": 0.0, + "learning_rate": 1.9836528549981216e-05, + "loss": 1.7295, + "step": 2926 + }, + { + "epoch": 0.08594162898584767, + "grad_norm": 0.0, + "learning_rate": 1.983635726117124e-05, + "loss": 1.5225, + "step": 2927 + }, + { + "epoch": 0.08597099066298668, + "grad_norm": 0.0, + "learning_rate": 1.9836185883408568e-05, + "loss": 1.4209, + "step": 2928 + }, + { + "epoch": 0.08600035234012567, + "grad_norm": 0.0, + "learning_rate": 1.9836014416694748e-05, + "loss": 1.6035, + "step": 2929 + }, + { + "epoch": 0.08602971401726467, + "grad_norm": 0.0, + "learning_rate": 1.9835842861031328e-05, + "loss": 1.5107, + "step": 2930 + }, + { + "epoch": 0.08605907569440366, + "grad_norm": 0.0, + "learning_rate": 1.9835671216419867e-05, + "loss": 1.5225, + "step": 2931 + }, + { + "epoch": 0.08608843737154266, + "grad_norm": 0.0, + "learning_rate": 1.983549948286191e-05, + "loss": 1.5586, + "step": 2932 + }, + { + "epoch": 0.08611779904868166, + "grad_norm": 0.0, + "learning_rate": 1.9835327660359012e-05, + "loss": 1.3652, + "step": 2933 + }, + { + "epoch": 0.08614716072582065, + "grad_norm": 0.0, + "learning_rate": 1.9835155748912728e-05, + "loss": 1.3818, + "step": 2934 + }, + { + "epoch": 0.08617652240295966, + "grad_norm": 0.0, + "learning_rate": 1.983498374852461e-05, + "loss": 1.4082, + "step": 2935 + }, + { + "epoch": 0.08620588408009866, + "grad_norm": 0.0, + "learning_rate": 1.9834811659196217e-05, + "loss": 1.3438, + "step": 2936 + }, + { + "epoch": 0.08623524575723765, + "grad_norm": 0.0, + "learning_rate": 1.98346394809291e-05, + "loss": 1.4883, + "step": 2937 + }, + { + "epoch": 0.08626460743437665, + "grad_norm": 0.0, + "learning_rate": 1.983446721372482e-05, + "loss": 1.5088, + "step": 2938 + }, + { + "epoch": 0.08629396911151566, + "grad_norm": 0.0, + "learning_rate": 1.983429485758494e-05, + "loss": 1.5264, + "step": 2939 + }, + { + "epoch": 0.08632333078865465, + "grad_norm": 0.0, + "learning_rate": 1.983412241251101e-05, + "loss": 1.4756, + "step": 2940 + }, + { + "epoch": 0.08635269246579365, + "grad_norm": 0.0, + "learning_rate": 1.983394987850459e-05, + "loss": 1.3125, + "step": 2941 + }, + { + "epoch": 0.08638205414293264, + "grad_norm": 0.0, + "learning_rate": 1.9833777255567245e-05, + "loss": 1.5156, + "step": 2942 + }, + { + "epoch": 0.08641141582007164, + "grad_norm": 0.0, + "learning_rate": 1.9833604543700532e-05, + "loss": 1.4326, + "step": 2943 + }, + { + "epoch": 0.08644077749721064, + "grad_norm": 0.0, + "learning_rate": 1.983343174290602e-05, + "loss": 1.4629, + "step": 2944 + }, + { + "epoch": 0.08647013917434963, + "grad_norm": 0.0, + "learning_rate": 1.983325885318526e-05, + "loss": 1.6631, + "step": 2945 + }, + { + "epoch": 0.08649950085148864, + "grad_norm": 0.0, + "learning_rate": 1.9833085874539824e-05, + "loss": 1.5791, + "step": 2946 + }, + { + "epoch": 0.08652886252862764, + "grad_norm": 0.0, + "learning_rate": 1.9832912806971274e-05, + "loss": 1.5, + "step": 2947 + }, + { + "epoch": 0.08655822420576663, + "grad_norm": 0.0, + "learning_rate": 1.9832739650481175e-05, + "loss": 1.4492, + "step": 2948 + }, + { + "epoch": 0.08658758588290563, + "grad_norm": 0.0, + "learning_rate": 1.9832566405071095e-05, + "loss": 1.4619, + "step": 2949 + }, + { + "epoch": 0.08661694756004464, + "grad_norm": 0.0, + "learning_rate": 1.9832393070742597e-05, + "loss": 1.4951, + "step": 2950 + }, + { + "epoch": 0.08664630923718362, + "grad_norm": 0.0, + "learning_rate": 1.9832219647497252e-05, + "loss": 1.5039, + "step": 2951 + }, + { + "epoch": 0.08667567091432263, + "grad_norm": 0.0, + "learning_rate": 1.9832046135336625e-05, + "loss": 1.5098, + "step": 2952 + }, + { + "epoch": 0.08670503259146163, + "grad_norm": 0.0, + "learning_rate": 1.9831872534262287e-05, + "loss": 1.4844, + "step": 2953 + }, + { + "epoch": 0.08673439426860062, + "grad_norm": 0.0, + "learning_rate": 1.9831698844275807e-05, + "loss": 1.3408, + "step": 2954 + }, + { + "epoch": 0.08676375594573962, + "grad_norm": 0.0, + "learning_rate": 1.983152506537876e-05, + "loss": 1.3691, + "step": 2955 + }, + { + "epoch": 0.08679311762287861, + "grad_norm": 0.0, + "learning_rate": 1.983135119757271e-05, + "loss": 1.3467, + "step": 2956 + }, + { + "epoch": 0.08682247930001762, + "grad_norm": 0.0, + "learning_rate": 1.9831177240859234e-05, + "loss": 1.5605, + "step": 2957 + }, + { + "epoch": 0.08685184097715662, + "grad_norm": 0.0, + "learning_rate": 1.9831003195239904e-05, + "loss": 1.4512, + "step": 2958 + }, + { + "epoch": 0.08688120265429561, + "grad_norm": 0.0, + "learning_rate": 1.9830829060716297e-05, + "loss": 1.5381, + "step": 2959 + }, + { + "epoch": 0.08691056433143461, + "grad_norm": 0.0, + "learning_rate": 1.9830654837289982e-05, + "loss": 1.4502, + "step": 2960 + }, + { + "epoch": 0.08693992600857361, + "grad_norm": 0.0, + "learning_rate": 1.983048052496254e-05, + "loss": 1.2188, + "step": 2961 + }, + { + "epoch": 0.0869692876857126, + "grad_norm": 0.0, + "learning_rate": 1.9830306123735544e-05, + "loss": 1.501, + "step": 2962 + }, + { + "epoch": 0.08699864936285161, + "grad_norm": 0.0, + "learning_rate": 1.9830131633610575e-05, + "loss": 1.498, + "step": 2963 + }, + { + "epoch": 0.08702801103999061, + "grad_norm": 0.0, + "learning_rate": 1.9829957054589204e-05, + "loss": 1.5078, + "step": 2964 + }, + { + "epoch": 0.0870573727171296, + "grad_norm": 0.0, + "learning_rate": 1.9829782386673012e-05, + "loss": 1.5156, + "step": 2965 + }, + { + "epoch": 0.0870867343942686, + "grad_norm": 0.0, + "learning_rate": 1.9829607629863586e-05, + "loss": 1.502, + "step": 2966 + }, + { + "epoch": 0.08711609607140759, + "grad_norm": 0.0, + "learning_rate": 1.98294327841625e-05, + "loss": 1.5088, + "step": 2967 + }, + { + "epoch": 0.0871454577485466, + "grad_norm": 0.0, + "learning_rate": 1.9829257849571335e-05, + "loss": 1.3682, + "step": 2968 + }, + { + "epoch": 0.0871748194256856, + "grad_norm": 0.0, + "learning_rate": 1.982908282609167e-05, + "loss": 1.5156, + "step": 2969 + }, + { + "epoch": 0.08720418110282459, + "grad_norm": 0.0, + "learning_rate": 1.9828907713725098e-05, + "loss": 1.502, + "step": 2970 + }, + { + "epoch": 0.08723354277996359, + "grad_norm": 0.0, + "learning_rate": 1.982873251247319e-05, + "loss": 1.5068, + "step": 2971 + }, + { + "epoch": 0.0872629044571026, + "grad_norm": 0.0, + "learning_rate": 1.9828557222337544e-05, + "loss": 1.583, + "step": 2972 + }, + { + "epoch": 0.08729226613424158, + "grad_norm": 0.0, + "learning_rate": 1.9828381843319734e-05, + "loss": 1.4395, + "step": 2973 + }, + { + "epoch": 0.08732162781138059, + "grad_norm": 0.0, + "learning_rate": 1.982820637542135e-05, + "loss": 1.5195, + "step": 2974 + }, + { + "epoch": 0.08735098948851959, + "grad_norm": 0.0, + "learning_rate": 1.9828030818643977e-05, + "loss": 1.5293, + "step": 2975 + }, + { + "epoch": 0.08738035116565858, + "grad_norm": 0.0, + "learning_rate": 1.9827855172989204e-05, + "loss": 1.501, + "step": 2976 + }, + { + "epoch": 0.08740971284279758, + "grad_norm": 0.0, + "learning_rate": 1.9827679438458625e-05, + "loss": 1.5127, + "step": 2977 + }, + { + "epoch": 0.08743907451993659, + "grad_norm": 0.0, + "learning_rate": 1.982750361505382e-05, + "loss": 1.3955, + "step": 2978 + }, + { + "epoch": 0.08746843619707557, + "grad_norm": 0.0, + "learning_rate": 1.9827327702776382e-05, + "loss": 1.502, + "step": 2979 + }, + { + "epoch": 0.08749779787421458, + "grad_norm": 0.0, + "learning_rate": 1.9827151701627905e-05, + "loss": 1.498, + "step": 2980 + }, + { + "epoch": 0.08752715955135357, + "grad_norm": 0.0, + "learning_rate": 1.982697561160998e-05, + "loss": 1.5156, + "step": 2981 + }, + { + "epoch": 0.08755652122849257, + "grad_norm": 0.0, + "learning_rate": 1.9826799432724195e-05, + "loss": 1.4912, + "step": 2982 + }, + { + "epoch": 0.08758588290563157, + "grad_norm": 0.0, + "learning_rate": 1.982662316497215e-05, + "loss": 1.4165, + "step": 2983 + }, + { + "epoch": 0.08761524458277056, + "grad_norm": 0.0, + "learning_rate": 1.9826446808355427e-05, + "loss": 1.6084, + "step": 2984 + }, + { + "epoch": 0.08764460625990957, + "grad_norm": 0.0, + "learning_rate": 1.9826270362875635e-05, + "loss": 1.5986, + "step": 2985 + }, + { + "epoch": 0.08767396793704857, + "grad_norm": 0.0, + "learning_rate": 1.982609382853436e-05, + "loss": 1.5186, + "step": 2986 + }, + { + "epoch": 0.08770332961418756, + "grad_norm": 0.0, + "learning_rate": 1.9825917205333206e-05, + "loss": 1.5566, + "step": 2987 + }, + { + "epoch": 0.08773269129132656, + "grad_norm": 0.0, + "learning_rate": 1.9825740493273765e-05, + "loss": 1.417, + "step": 2988 + }, + { + "epoch": 0.08776205296846556, + "grad_norm": 0.0, + "learning_rate": 1.9825563692357637e-05, + "loss": 1.3916, + "step": 2989 + }, + { + "epoch": 0.08779141464560455, + "grad_norm": 0.0, + "learning_rate": 1.9825386802586418e-05, + "loss": 1.458, + "step": 2990 + }, + { + "epoch": 0.08782077632274356, + "grad_norm": 0.0, + "learning_rate": 1.982520982396171e-05, + "loss": 1.5791, + "step": 2991 + }, + { + "epoch": 0.08785013799988255, + "grad_norm": 0.0, + "learning_rate": 1.9825032756485113e-05, + "loss": 1.6348, + "step": 2992 + }, + { + "epoch": 0.08787949967702155, + "grad_norm": 0.0, + "learning_rate": 1.9824855600158233e-05, + "loss": 1.501, + "step": 2993 + }, + { + "epoch": 0.08790886135416055, + "grad_norm": 0.0, + "learning_rate": 1.9824678354982662e-05, + "loss": 1.5049, + "step": 2994 + }, + { + "epoch": 0.08793822303129954, + "grad_norm": 0.0, + "learning_rate": 1.9824501020960012e-05, + "loss": 1.5078, + "step": 2995 + }, + { + "epoch": 0.08796758470843855, + "grad_norm": 0.0, + "learning_rate": 1.982432359809188e-05, + "loss": 1.4971, + "step": 2996 + }, + { + "epoch": 0.08799694638557755, + "grad_norm": 0.0, + "learning_rate": 1.9824146086379876e-05, + "loss": 1.5586, + "step": 2997 + }, + { + "epoch": 0.08802630806271654, + "grad_norm": 0.0, + "learning_rate": 1.9823968485825604e-05, + "loss": 1.4932, + "step": 2998 + }, + { + "epoch": 0.08805566973985554, + "grad_norm": 0.0, + "learning_rate": 1.9823790796430668e-05, + "loss": 1.3721, + "step": 2999 + }, + { + "epoch": 0.08808503141699454, + "grad_norm": 0.0, + "learning_rate": 1.9823613018196673e-05, + "loss": 1.4023, + "step": 3000 + }, + { + "epoch": 0.08811439309413353, + "grad_norm": 0.0, + "learning_rate": 1.9823435151125233e-05, + "loss": 1.4434, + "step": 3001 + }, + { + "epoch": 0.08814375477127254, + "grad_norm": 0.0, + "learning_rate": 1.982325719521795e-05, + "loss": 1.4668, + "step": 3002 + }, + { + "epoch": 0.08817311644841154, + "grad_norm": 0.0, + "learning_rate": 1.9823079150476442e-05, + "loss": 1.3164, + "step": 3003 + }, + { + "epoch": 0.08820247812555053, + "grad_norm": 0.0, + "learning_rate": 1.982290101690231e-05, + "loss": 1.5811, + "step": 3004 + }, + { + "epoch": 0.08823183980268953, + "grad_norm": 0.0, + "learning_rate": 1.9822722794497168e-05, + "loss": 1.4072, + "step": 3005 + }, + { + "epoch": 0.08826120147982852, + "grad_norm": 0.0, + "learning_rate": 1.9822544483262626e-05, + "loss": 1.4521, + "step": 3006 + }, + { + "epoch": 0.08829056315696752, + "grad_norm": 0.0, + "learning_rate": 1.9822366083200305e-05, + "loss": 1.5518, + "step": 3007 + }, + { + "epoch": 0.08831992483410653, + "grad_norm": 0.0, + "learning_rate": 1.9822187594311807e-05, + "loss": 1.4629, + "step": 3008 + }, + { + "epoch": 0.08834928651124552, + "grad_norm": 0.0, + "learning_rate": 1.982200901659875e-05, + "loss": 1.4551, + "step": 3009 + }, + { + "epoch": 0.08837864818838452, + "grad_norm": 0.0, + "learning_rate": 1.9821830350062755e-05, + "loss": 1.5303, + "step": 3010 + }, + { + "epoch": 0.08840800986552352, + "grad_norm": 0.0, + "learning_rate": 1.982165159470543e-05, + "loss": 1.6436, + "step": 3011 + }, + { + "epoch": 0.08843737154266251, + "grad_norm": 0.0, + "learning_rate": 1.9821472750528393e-05, + "loss": 1.5459, + "step": 3012 + }, + { + "epoch": 0.08846673321980152, + "grad_norm": 0.0, + "learning_rate": 1.9821293817533263e-05, + "loss": 1.4551, + "step": 3013 + }, + { + "epoch": 0.08849609489694052, + "grad_norm": 0.0, + "learning_rate": 1.982111479572166e-05, + "loss": 1.5439, + "step": 3014 + }, + { + "epoch": 0.08852545657407951, + "grad_norm": 0.0, + "learning_rate": 1.98209356850952e-05, + "loss": 1.4072, + "step": 3015 + }, + { + "epoch": 0.08855481825121851, + "grad_norm": 0.0, + "learning_rate": 1.9820756485655503e-05, + "loss": 1.5107, + "step": 3016 + }, + { + "epoch": 0.0885841799283575, + "grad_norm": 0.0, + "learning_rate": 1.9820577197404188e-05, + "loss": 1.4619, + "step": 3017 + }, + { + "epoch": 0.0886135416054965, + "grad_norm": 0.0, + "learning_rate": 1.982039782034288e-05, + "loss": 1.5381, + "step": 3018 + }, + { + "epoch": 0.08864290328263551, + "grad_norm": 0.0, + "learning_rate": 1.98202183544732e-05, + "loss": 1.542, + "step": 3019 + }, + { + "epoch": 0.0886722649597745, + "grad_norm": 0.0, + "learning_rate": 1.982003879979677e-05, + "loss": 1.5264, + "step": 3020 + }, + { + "epoch": 0.0887016266369135, + "grad_norm": 0.0, + "learning_rate": 1.9819859156315215e-05, + "loss": 1.5098, + "step": 3021 + }, + { + "epoch": 0.0887309883140525, + "grad_norm": 0.0, + "learning_rate": 1.9819679424030157e-05, + "loss": 1.4111, + "step": 3022 + }, + { + "epoch": 0.08876034999119149, + "grad_norm": 0.0, + "learning_rate": 1.9819499602943225e-05, + "loss": 1.457, + "step": 3023 + }, + { + "epoch": 0.0887897116683305, + "grad_norm": 0.0, + "learning_rate": 1.9819319693056043e-05, + "loss": 1.5488, + "step": 3024 + }, + { + "epoch": 0.0888190733454695, + "grad_norm": 0.0, + "learning_rate": 1.981913969437024e-05, + "loss": 1.4775, + "step": 3025 + }, + { + "epoch": 0.08884843502260849, + "grad_norm": 0.0, + "learning_rate": 1.9818959606887437e-05, + "loss": 1.4414, + "step": 3026 + }, + { + "epoch": 0.08887779669974749, + "grad_norm": 0.0, + "learning_rate": 1.9818779430609273e-05, + "loss": 1.5098, + "step": 3027 + }, + { + "epoch": 0.0889071583768865, + "grad_norm": 0.0, + "learning_rate": 1.9818599165537368e-05, + "loss": 1.6299, + "step": 3028 + }, + { + "epoch": 0.08893652005402548, + "grad_norm": 0.0, + "learning_rate": 1.9818418811673356e-05, + "loss": 1.5273, + "step": 3029 + }, + { + "epoch": 0.08896588173116449, + "grad_norm": 0.0, + "learning_rate": 1.981823836901887e-05, + "loss": 1.4102, + "step": 3030 + }, + { + "epoch": 0.08899524340830348, + "grad_norm": 0.0, + "learning_rate": 1.9818057837575538e-05, + "loss": 1.5098, + "step": 3031 + }, + { + "epoch": 0.08902460508544248, + "grad_norm": 0.0, + "learning_rate": 1.9817877217344998e-05, + "loss": 1.4062, + "step": 3032 + }, + { + "epoch": 0.08905396676258148, + "grad_norm": 0.0, + "learning_rate": 1.9817696508328877e-05, + "loss": 1.5703, + "step": 3033 + }, + { + "epoch": 0.08908332843972047, + "grad_norm": 0.0, + "learning_rate": 1.9817515710528813e-05, + "loss": 1.5264, + "step": 3034 + }, + { + "epoch": 0.08911269011685947, + "grad_norm": 0.0, + "learning_rate": 1.9817334823946438e-05, + "loss": 1.5156, + "step": 3035 + }, + { + "epoch": 0.08914205179399848, + "grad_norm": 0.0, + "learning_rate": 1.9817153848583396e-05, + "loss": 1.4854, + "step": 3036 + }, + { + "epoch": 0.08917141347113747, + "grad_norm": 0.0, + "learning_rate": 1.9816972784441312e-05, + "loss": 1.5576, + "step": 3037 + }, + { + "epoch": 0.08920077514827647, + "grad_norm": 0.0, + "learning_rate": 1.981679163152183e-05, + "loss": 1.4424, + "step": 3038 + }, + { + "epoch": 0.08923013682541547, + "grad_norm": 0.0, + "learning_rate": 1.981661038982659e-05, + "loss": 1.3828, + "step": 3039 + }, + { + "epoch": 0.08925949850255446, + "grad_norm": 0.0, + "learning_rate": 1.9816429059357224e-05, + "loss": 1.4453, + "step": 3040 + }, + { + "epoch": 0.08928886017969347, + "grad_norm": 0.0, + "learning_rate": 1.981624764011538e-05, + "loss": 1.5146, + "step": 3041 + }, + { + "epoch": 0.08931822185683246, + "grad_norm": 0.0, + "learning_rate": 1.9816066132102688e-05, + "loss": 1.4287, + "step": 3042 + }, + { + "epoch": 0.08934758353397146, + "grad_norm": 0.0, + "learning_rate": 1.98158845353208e-05, + "loss": 1.3828, + "step": 3043 + }, + { + "epoch": 0.08937694521111046, + "grad_norm": 0.0, + "learning_rate": 1.9815702849771357e-05, + "loss": 1.4756, + "step": 3044 + }, + { + "epoch": 0.08940630688824945, + "grad_norm": 0.0, + "learning_rate": 1.9815521075455997e-05, + "loss": 1.2881, + "step": 3045 + }, + { + "epoch": 0.08943566856538845, + "grad_norm": 0.0, + "learning_rate": 1.9815339212376367e-05, + "loss": 1.4854, + "step": 3046 + }, + { + "epoch": 0.08946503024252746, + "grad_norm": 0.0, + "learning_rate": 1.981515726053411e-05, + "loss": 1.4688, + "step": 3047 + }, + { + "epoch": 0.08949439191966645, + "grad_norm": 0.0, + "learning_rate": 1.9814975219930872e-05, + "loss": 1.4058, + "step": 3048 + }, + { + "epoch": 0.08952375359680545, + "grad_norm": 0.0, + "learning_rate": 1.98147930905683e-05, + "loss": 1.4033, + "step": 3049 + }, + { + "epoch": 0.08955311527394445, + "grad_norm": 0.0, + "learning_rate": 1.9814610872448038e-05, + "loss": 1.3682, + "step": 3050 + }, + { + "epoch": 0.08958247695108344, + "grad_norm": 0.0, + "learning_rate": 1.981442856557174e-05, + "loss": 1.3994, + "step": 3051 + }, + { + "epoch": 0.08961183862822245, + "grad_norm": 0.0, + "learning_rate": 1.981424616994105e-05, + "loss": 1.5508, + "step": 3052 + }, + { + "epoch": 0.08964120030536145, + "grad_norm": 0.0, + "learning_rate": 1.9814063685557618e-05, + "loss": 1.584, + "step": 3053 + }, + { + "epoch": 0.08967056198250044, + "grad_norm": 0.0, + "learning_rate": 1.9813881112423093e-05, + "loss": 1.5312, + "step": 3054 + }, + { + "epoch": 0.08969992365963944, + "grad_norm": 0.0, + "learning_rate": 1.981369845053913e-05, + "loss": 1.4082, + "step": 3055 + }, + { + "epoch": 0.08972928533677843, + "grad_norm": 0.0, + "learning_rate": 1.9813515699907377e-05, + "loss": 1.5166, + "step": 3056 + }, + { + "epoch": 0.08975864701391743, + "grad_norm": 0.0, + "learning_rate": 1.981333286052949e-05, + "loss": 1.5771, + "step": 3057 + }, + { + "epoch": 0.08978800869105644, + "grad_norm": 0.0, + "learning_rate": 1.9813149932407118e-05, + "loss": 1.4531, + "step": 3058 + }, + { + "epoch": 0.08981737036819543, + "grad_norm": 0.0, + "learning_rate": 1.981296691554192e-05, + "loss": 1.6328, + "step": 3059 + }, + { + "epoch": 0.08984673204533443, + "grad_norm": 0.0, + "learning_rate": 1.9812783809935548e-05, + "loss": 1.4756, + "step": 3060 + }, + { + "epoch": 0.08987609372247343, + "grad_norm": 0.0, + "learning_rate": 1.981260061558966e-05, + "loss": 1.5566, + "step": 3061 + }, + { + "epoch": 0.08990545539961242, + "grad_norm": 0.0, + "learning_rate": 1.9812417332505914e-05, + "loss": 1.4746, + "step": 3062 + }, + { + "epoch": 0.08993481707675142, + "grad_norm": 0.0, + "learning_rate": 1.981223396068596e-05, + "loss": 1.4219, + "step": 3063 + }, + { + "epoch": 0.08996417875389043, + "grad_norm": 0.0, + "learning_rate": 1.9812050500131464e-05, + "loss": 1.5332, + "step": 3064 + }, + { + "epoch": 0.08999354043102942, + "grad_norm": 0.0, + "learning_rate": 1.981186695084408e-05, + "loss": 1.5088, + "step": 3065 + }, + { + "epoch": 0.09002290210816842, + "grad_norm": 0.0, + "learning_rate": 1.9811683312825475e-05, + "loss": 1.4316, + "step": 3066 + }, + { + "epoch": 0.09005226378530741, + "grad_norm": 0.0, + "learning_rate": 1.98114995860773e-05, + "loss": 1.5195, + "step": 3067 + }, + { + "epoch": 0.09008162546244641, + "grad_norm": 0.0, + "learning_rate": 1.9811315770601226e-05, + "loss": 1.4346, + "step": 3068 + }, + { + "epoch": 0.09011098713958542, + "grad_norm": 0.0, + "learning_rate": 1.9811131866398908e-05, + "loss": 1.6084, + "step": 3069 + }, + { + "epoch": 0.0901403488167244, + "grad_norm": 0.0, + "learning_rate": 1.9810947873472012e-05, + "loss": 1.5107, + "step": 3070 + }, + { + "epoch": 0.09016971049386341, + "grad_norm": 0.0, + "learning_rate": 1.9810763791822204e-05, + "loss": 1.5264, + "step": 3071 + }, + { + "epoch": 0.09019907217100241, + "grad_norm": 0.0, + "learning_rate": 1.9810579621451142e-05, + "loss": 1.4414, + "step": 3072 + }, + { + "epoch": 0.0902284338481414, + "grad_norm": 0.0, + "learning_rate": 1.98103953623605e-05, + "loss": 1.5713, + "step": 3073 + }, + { + "epoch": 0.0902577955252804, + "grad_norm": 0.0, + "learning_rate": 1.981021101455194e-05, + "loss": 1.6318, + "step": 3074 + }, + { + "epoch": 0.09028715720241941, + "grad_norm": 0.0, + "learning_rate": 1.9810026578027128e-05, + "loss": 1.5479, + "step": 3075 + }, + { + "epoch": 0.0903165188795584, + "grad_norm": 0.0, + "learning_rate": 1.980984205278773e-05, + "loss": 1.5107, + "step": 3076 + }, + { + "epoch": 0.0903458805566974, + "grad_norm": 0.0, + "learning_rate": 1.9809657438835424e-05, + "loss": 1.4424, + "step": 3077 + }, + { + "epoch": 0.0903752422338364, + "grad_norm": 0.0, + "learning_rate": 1.980947273617187e-05, + "loss": 1.3916, + "step": 3078 + }, + { + "epoch": 0.09040460391097539, + "grad_norm": 0.0, + "learning_rate": 1.9809287944798745e-05, + "loss": 1.4824, + "step": 3079 + }, + { + "epoch": 0.0904339655881144, + "grad_norm": 0.0, + "learning_rate": 1.9809103064717713e-05, + "loss": 1.457, + "step": 3080 + }, + { + "epoch": 0.09046332726525338, + "grad_norm": 0.0, + "learning_rate": 1.980891809593045e-05, + "loss": 1.4248, + "step": 3081 + }, + { + "epoch": 0.09049268894239239, + "grad_norm": 0.0, + "learning_rate": 1.9808733038438632e-05, + "loss": 1.4727, + "step": 3082 + }, + { + "epoch": 0.09052205061953139, + "grad_norm": 0.0, + "learning_rate": 1.9808547892243928e-05, + "loss": 1.5176, + "step": 3083 + }, + { + "epoch": 0.09055141229667038, + "grad_norm": 0.0, + "learning_rate": 1.980836265734801e-05, + "loss": 1.4473, + "step": 3084 + }, + { + "epoch": 0.09058077397380938, + "grad_norm": 0.0, + "learning_rate": 1.9808177333752562e-05, + "loss": 1.5586, + "step": 3085 + }, + { + "epoch": 0.09061013565094839, + "grad_norm": 0.0, + "learning_rate": 1.980799192145925e-05, + "loss": 1.5352, + "step": 3086 + }, + { + "epoch": 0.09063949732808738, + "grad_norm": 0.0, + "learning_rate": 1.9807806420469757e-05, + "loss": 1.6006, + "step": 3087 + }, + { + "epoch": 0.09066885900522638, + "grad_norm": 0.0, + "learning_rate": 1.980762083078576e-05, + "loss": 1.6084, + "step": 3088 + }, + { + "epoch": 0.09069822068236538, + "grad_norm": 0.0, + "learning_rate": 1.9807435152408934e-05, + "loss": 1.6123, + "step": 3089 + }, + { + "epoch": 0.09072758235950437, + "grad_norm": 0.0, + "learning_rate": 1.980724938534096e-05, + "loss": 1.543, + "step": 3090 + }, + { + "epoch": 0.09075694403664337, + "grad_norm": 0.0, + "learning_rate": 1.980706352958352e-05, + "loss": 1.4736, + "step": 3091 + }, + { + "epoch": 0.09078630571378238, + "grad_norm": 0.0, + "learning_rate": 1.980687758513829e-05, + "loss": 1.416, + "step": 3092 + }, + { + "epoch": 0.09081566739092137, + "grad_norm": 0.0, + "learning_rate": 1.9806691552006957e-05, + "loss": 1.3721, + "step": 3093 + }, + { + "epoch": 0.09084502906806037, + "grad_norm": 0.0, + "learning_rate": 1.98065054301912e-05, + "loss": 1.5361, + "step": 3094 + }, + { + "epoch": 0.09087439074519936, + "grad_norm": 0.0, + "learning_rate": 1.9806319219692702e-05, + "loss": 1.5527, + "step": 3095 + }, + { + "epoch": 0.09090375242233836, + "grad_norm": 0.0, + "learning_rate": 1.9806132920513148e-05, + "loss": 1.4658, + "step": 3096 + }, + { + "epoch": 0.09093311409947737, + "grad_norm": 0.0, + "learning_rate": 1.9805946532654225e-05, + "loss": 1.4922, + "step": 3097 + }, + { + "epoch": 0.09096247577661636, + "grad_norm": 0.0, + "learning_rate": 1.9805760056117617e-05, + "loss": 1.5205, + "step": 3098 + }, + { + "epoch": 0.09099183745375536, + "grad_norm": 0.0, + "learning_rate": 1.9805573490905004e-05, + "loss": 1.4922, + "step": 3099 + }, + { + "epoch": 0.09102119913089436, + "grad_norm": 0.0, + "learning_rate": 1.9805386837018084e-05, + "loss": 1.5381, + "step": 3100 + }, + { + "epoch": 0.09105056080803335, + "grad_norm": 0.0, + "learning_rate": 1.9805200094458534e-05, + "loss": 1.3574, + "step": 3101 + }, + { + "epoch": 0.09107992248517235, + "grad_norm": 0.0, + "learning_rate": 1.980501326322805e-05, + "loss": 1.4512, + "step": 3102 + }, + { + "epoch": 0.09110928416231136, + "grad_norm": 0.0, + "learning_rate": 1.9804826343328324e-05, + "loss": 1.373, + "step": 3103 + }, + { + "epoch": 0.09113864583945035, + "grad_norm": 0.0, + "learning_rate": 1.980463933476104e-05, + "loss": 1.4072, + "step": 3104 + }, + { + "epoch": 0.09116800751658935, + "grad_norm": 0.0, + "learning_rate": 1.9804452237527895e-05, + "loss": 1.6338, + "step": 3105 + }, + { + "epoch": 0.09119736919372834, + "grad_norm": 0.0, + "learning_rate": 1.9804265051630572e-05, + "loss": 1.5684, + "step": 3106 + }, + { + "epoch": 0.09122673087086734, + "grad_norm": 0.0, + "learning_rate": 1.9804077777070773e-05, + "loss": 1.4502, + "step": 3107 + }, + { + "epoch": 0.09125609254800635, + "grad_norm": 0.0, + "learning_rate": 1.9803890413850186e-05, + "loss": 1.4424, + "step": 3108 + }, + { + "epoch": 0.09128545422514533, + "grad_norm": 0.0, + "learning_rate": 1.9803702961970504e-05, + "loss": 1.4131, + "step": 3109 + }, + { + "epoch": 0.09131481590228434, + "grad_norm": 0.0, + "learning_rate": 1.980351542143343e-05, + "loss": 1.4258, + "step": 3110 + }, + { + "epoch": 0.09134417757942334, + "grad_norm": 0.0, + "learning_rate": 1.980332779224065e-05, + "loss": 1.4873, + "step": 3111 + }, + { + "epoch": 0.09137353925656233, + "grad_norm": 0.0, + "learning_rate": 1.9803140074393873e-05, + "loss": 1.4844, + "step": 3112 + }, + { + "epoch": 0.09140290093370133, + "grad_norm": 0.0, + "learning_rate": 1.9802952267894785e-05, + "loss": 1.4629, + "step": 3113 + }, + { + "epoch": 0.09143226261084034, + "grad_norm": 0.0, + "learning_rate": 1.9802764372745093e-05, + "loss": 1.5205, + "step": 3114 + }, + { + "epoch": 0.09146162428797933, + "grad_norm": 0.0, + "learning_rate": 1.9802576388946488e-05, + "loss": 1.5039, + "step": 3115 + }, + { + "epoch": 0.09149098596511833, + "grad_norm": 0.0, + "learning_rate": 1.9802388316500677e-05, + "loss": 1.3906, + "step": 3116 + }, + { + "epoch": 0.09152034764225733, + "grad_norm": 0.0, + "learning_rate": 1.9802200155409358e-05, + "loss": 1.5361, + "step": 3117 + }, + { + "epoch": 0.09154970931939632, + "grad_norm": 0.0, + "learning_rate": 1.980201190567423e-05, + "loss": 1.3535, + "step": 3118 + }, + { + "epoch": 0.09157907099653533, + "grad_norm": 0.0, + "learning_rate": 1.9801823567297e-05, + "loss": 1.373, + "step": 3119 + }, + { + "epoch": 0.09160843267367431, + "grad_norm": 0.0, + "learning_rate": 1.980163514027937e-05, + "loss": 1.4629, + "step": 3120 + }, + { + "epoch": 0.09163779435081332, + "grad_norm": 0.0, + "learning_rate": 1.9801446624623043e-05, + "loss": 1.5166, + "step": 3121 + }, + { + "epoch": 0.09166715602795232, + "grad_norm": 0.0, + "learning_rate": 1.9801258020329724e-05, + "loss": 1.5625, + "step": 3122 + }, + { + "epoch": 0.09169651770509131, + "grad_norm": 0.0, + "learning_rate": 1.9801069327401115e-05, + "loss": 1.4482, + "step": 3123 + }, + { + "epoch": 0.09172587938223031, + "grad_norm": 0.0, + "learning_rate": 1.9800880545838928e-05, + "loss": 1.4043, + "step": 3124 + }, + { + "epoch": 0.09175524105936932, + "grad_norm": 0.0, + "learning_rate": 1.980069167564487e-05, + "loss": 1.5117, + "step": 3125 + }, + { + "epoch": 0.0917846027365083, + "grad_norm": 0.0, + "learning_rate": 1.9800502716820646e-05, + "loss": 1.4756, + "step": 3126 + }, + { + "epoch": 0.09181396441364731, + "grad_norm": 0.0, + "learning_rate": 1.9800313669367966e-05, + "loss": 1.5967, + "step": 3127 + }, + { + "epoch": 0.09184332609078631, + "grad_norm": 0.0, + "learning_rate": 1.980012453328854e-05, + "loss": 1.4609, + "step": 3128 + }, + { + "epoch": 0.0918726877679253, + "grad_norm": 0.0, + "learning_rate": 1.979993530858408e-05, + "loss": 1.6533, + "step": 3129 + }, + { + "epoch": 0.0919020494450643, + "grad_norm": 0.0, + "learning_rate": 1.9799745995256292e-05, + "loss": 1.4819, + "step": 3130 + }, + { + "epoch": 0.0919314111222033, + "grad_norm": 0.0, + "learning_rate": 1.9799556593306893e-05, + "loss": 1.501, + "step": 3131 + }, + { + "epoch": 0.0919607727993423, + "grad_norm": 0.0, + "learning_rate": 1.979936710273759e-05, + "loss": 1.584, + "step": 3132 + }, + { + "epoch": 0.0919901344764813, + "grad_norm": 0.0, + "learning_rate": 1.9799177523550105e-05, + "loss": 1.3262, + "step": 3133 + }, + { + "epoch": 0.09201949615362029, + "grad_norm": 0.0, + "learning_rate": 1.9798987855746147e-05, + "loss": 1.5732, + "step": 3134 + }, + { + "epoch": 0.09204885783075929, + "grad_norm": 0.0, + "learning_rate": 1.9798798099327433e-05, + "loss": 1.4229, + "step": 3135 + }, + { + "epoch": 0.0920782195078983, + "grad_norm": 0.0, + "learning_rate": 1.9798608254295675e-05, + "loss": 1.3125, + "step": 3136 + }, + { + "epoch": 0.09210758118503728, + "grad_norm": 0.0, + "learning_rate": 1.97984183206526e-05, + "loss": 1.4727, + "step": 3137 + }, + { + "epoch": 0.09213694286217629, + "grad_norm": 0.0, + "learning_rate": 1.9798228298399912e-05, + "loss": 1.3584, + "step": 3138 + }, + { + "epoch": 0.09216630453931529, + "grad_norm": 0.0, + "learning_rate": 1.9798038187539335e-05, + "loss": 1.5527, + "step": 3139 + }, + { + "epoch": 0.09219566621645428, + "grad_norm": 0.0, + "learning_rate": 1.9797847988072597e-05, + "loss": 1.4648, + "step": 3140 + }, + { + "epoch": 0.09222502789359328, + "grad_norm": 0.0, + "learning_rate": 1.97976577000014e-05, + "loss": 1.6094, + "step": 3141 + }, + { + "epoch": 0.09225438957073229, + "grad_norm": 0.0, + "learning_rate": 1.9797467323327483e-05, + "loss": 1.5342, + "step": 3142 + }, + { + "epoch": 0.09228375124787128, + "grad_norm": 0.0, + "learning_rate": 1.979727685805256e-05, + "loss": 1.5859, + "step": 3143 + }, + { + "epoch": 0.09231311292501028, + "grad_norm": 0.0, + "learning_rate": 1.979708630417835e-05, + "loss": 1.4424, + "step": 3144 + }, + { + "epoch": 0.09234247460214927, + "grad_norm": 0.0, + "learning_rate": 1.979689566170658e-05, + "loss": 1.4834, + "step": 3145 + }, + { + "epoch": 0.09237183627928827, + "grad_norm": 0.0, + "learning_rate": 1.9796704930638976e-05, + "loss": 1.4199, + "step": 3146 + }, + { + "epoch": 0.09240119795642728, + "grad_norm": 0.0, + "learning_rate": 1.9796514110977258e-05, + "loss": 1.4072, + "step": 3147 + }, + { + "epoch": 0.09243055963356626, + "grad_norm": 0.0, + "learning_rate": 1.9796323202723155e-05, + "loss": 1.4678, + "step": 3148 + }, + { + "epoch": 0.09245992131070527, + "grad_norm": 0.0, + "learning_rate": 1.9796132205878394e-05, + "loss": 1.4277, + "step": 3149 + }, + { + "epoch": 0.09248928298784427, + "grad_norm": 0.0, + "learning_rate": 1.9795941120444697e-05, + "loss": 1.5518, + "step": 3150 + }, + { + "epoch": 0.09251864466498326, + "grad_norm": 0.0, + "learning_rate": 1.9795749946423798e-05, + "loss": 1.3584, + "step": 3151 + }, + { + "epoch": 0.09254800634212226, + "grad_norm": 0.0, + "learning_rate": 1.9795558683817422e-05, + "loss": 1.4482, + "step": 3152 + }, + { + "epoch": 0.09257736801926127, + "grad_norm": 0.0, + "learning_rate": 1.97953673326273e-05, + "loss": 1.4072, + "step": 3153 + }, + { + "epoch": 0.09260672969640026, + "grad_norm": 0.0, + "learning_rate": 1.9795175892855164e-05, + "loss": 1.5898, + "step": 3154 + }, + { + "epoch": 0.09263609137353926, + "grad_norm": 0.0, + "learning_rate": 1.9794984364502742e-05, + "loss": 1.5146, + "step": 3155 + }, + { + "epoch": 0.09266545305067825, + "grad_norm": 0.0, + "learning_rate": 1.979479274757177e-05, + "loss": 1.4307, + "step": 3156 + }, + { + "epoch": 0.09269481472781725, + "grad_norm": 0.0, + "learning_rate": 1.979460104206398e-05, + "loss": 1.5703, + "step": 3157 + }, + { + "epoch": 0.09272417640495625, + "grad_norm": 0.0, + "learning_rate": 1.9794409247981102e-05, + "loss": 1.4629, + "step": 3158 + }, + { + "epoch": 0.09275353808209524, + "grad_norm": 0.0, + "learning_rate": 1.9794217365324874e-05, + "loss": 1.3828, + "step": 3159 + }, + { + "epoch": 0.09278289975923425, + "grad_norm": 0.0, + "learning_rate": 1.9794025394097026e-05, + "loss": 1.4756, + "step": 3160 + }, + { + "epoch": 0.09281226143637325, + "grad_norm": 0.0, + "learning_rate": 1.9793833334299303e-05, + "loss": 1.415, + "step": 3161 + }, + { + "epoch": 0.09284162311351224, + "grad_norm": 0.0, + "learning_rate": 1.9793641185933434e-05, + "loss": 1.4141, + "step": 3162 + }, + { + "epoch": 0.09287098479065124, + "grad_norm": 0.0, + "learning_rate": 1.9793448949001163e-05, + "loss": 1.5146, + "step": 3163 + }, + { + "epoch": 0.09290034646779025, + "grad_norm": 0.0, + "learning_rate": 1.979325662350422e-05, + "loss": 1.4756, + "step": 3164 + }, + { + "epoch": 0.09292970814492924, + "grad_norm": 0.0, + "learning_rate": 1.9793064209444353e-05, + "loss": 1.5088, + "step": 3165 + }, + { + "epoch": 0.09295906982206824, + "grad_norm": 0.0, + "learning_rate": 1.9792871706823294e-05, + "loss": 1.4541, + "step": 3166 + }, + { + "epoch": 0.09298843149920724, + "grad_norm": 0.0, + "learning_rate": 1.9792679115642793e-05, + "loss": 1.46, + "step": 3167 + }, + { + "epoch": 0.09301779317634623, + "grad_norm": 0.0, + "learning_rate": 1.9792486435904587e-05, + "loss": 1.5303, + "step": 3168 + }, + { + "epoch": 0.09304715485348523, + "grad_norm": 0.0, + "learning_rate": 1.9792293667610415e-05, + "loss": 1.4834, + "step": 3169 + }, + { + "epoch": 0.09307651653062422, + "grad_norm": 0.0, + "learning_rate": 1.9792100810762024e-05, + "loss": 1.3867, + "step": 3170 + }, + { + "epoch": 0.09310587820776323, + "grad_norm": 0.0, + "learning_rate": 1.979190786536116e-05, + "loss": 1.4688, + "step": 3171 + }, + { + "epoch": 0.09313523988490223, + "grad_norm": 0.0, + "learning_rate": 1.9791714831409564e-05, + "loss": 1.3926, + "step": 3172 + }, + { + "epoch": 0.09316460156204122, + "grad_norm": 0.0, + "learning_rate": 1.9791521708908985e-05, + "loss": 1.5156, + "step": 3173 + }, + { + "epoch": 0.09319396323918022, + "grad_norm": 0.0, + "learning_rate": 1.9791328497861163e-05, + "loss": 1.4912, + "step": 3174 + }, + { + "epoch": 0.09322332491631923, + "grad_norm": 0.0, + "learning_rate": 1.979113519826785e-05, + "loss": 1.3613, + "step": 3175 + }, + { + "epoch": 0.09325268659345821, + "grad_norm": 0.0, + "learning_rate": 1.97909418101308e-05, + "loss": 1.5479, + "step": 3176 + }, + { + "epoch": 0.09328204827059722, + "grad_norm": 0.0, + "learning_rate": 1.979074833345175e-05, + "loss": 1.4707, + "step": 3177 + }, + { + "epoch": 0.09331140994773622, + "grad_norm": 0.0, + "learning_rate": 1.979055476823246e-05, + "loss": 1.6025, + "step": 3178 + }, + { + "epoch": 0.09334077162487521, + "grad_norm": 0.0, + "learning_rate": 1.9790361114474673e-05, + "loss": 1.4824, + "step": 3179 + }, + { + "epoch": 0.09337013330201421, + "grad_norm": 0.0, + "learning_rate": 1.9790167372180143e-05, + "loss": 1.5293, + "step": 3180 + }, + { + "epoch": 0.0933994949791532, + "grad_norm": 0.0, + "learning_rate": 1.9789973541350623e-05, + "loss": 1.4785, + "step": 3181 + }, + { + "epoch": 0.0934288566562922, + "grad_norm": 0.0, + "learning_rate": 1.9789779621987864e-05, + "loss": 1.4414, + "step": 3182 + }, + { + "epoch": 0.09345821833343121, + "grad_norm": 0.0, + "learning_rate": 1.978958561409362e-05, + "loss": 1.415, + "step": 3183 + }, + { + "epoch": 0.0934875800105702, + "grad_norm": 0.0, + "learning_rate": 1.9789391517669647e-05, + "loss": 1.4395, + "step": 3184 + }, + { + "epoch": 0.0935169416877092, + "grad_norm": 0.0, + "learning_rate": 1.97891973327177e-05, + "loss": 1.5371, + "step": 3185 + }, + { + "epoch": 0.0935463033648482, + "grad_norm": 0.0, + "learning_rate": 1.9789003059239534e-05, + "loss": 1.4756, + "step": 3186 + }, + { + "epoch": 0.0935756650419872, + "grad_norm": 0.0, + "learning_rate": 1.9788808697236906e-05, + "loss": 1.4678, + "step": 3187 + }, + { + "epoch": 0.0936050267191262, + "grad_norm": 0.0, + "learning_rate": 1.9788614246711574e-05, + "loss": 1.5039, + "step": 3188 + }, + { + "epoch": 0.0936343883962652, + "grad_norm": 0.0, + "learning_rate": 1.9788419707665298e-05, + "loss": 1.5195, + "step": 3189 + }, + { + "epoch": 0.09366375007340419, + "grad_norm": 0.0, + "learning_rate": 1.9788225080099836e-05, + "loss": 1.5488, + "step": 3190 + }, + { + "epoch": 0.09369311175054319, + "grad_norm": 0.0, + "learning_rate": 1.9788030364016945e-05, + "loss": 1.3936, + "step": 3191 + }, + { + "epoch": 0.0937224734276822, + "grad_norm": 0.0, + "learning_rate": 1.978783555941839e-05, + "loss": 1.4268, + "step": 3192 + }, + { + "epoch": 0.09375183510482119, + "grad_norm": 0.0, + "learning_rate": 1.978764066630593e-05, + "loss": 1.5869, + "step": 3193 + }, + { + "epoch": 0.09378119678196019, + "grad_norm": 0.0, + "learning_rate": 1.9787445684681332e-05, + "loss": 1.5088, + "step": 3194 + }, + { + "epoch": 0.09381055845909918, + "grad_norm": 0.0, + "learning_rate": 1.9787250614546357e-05, + "loss": 1.6123, + "step": 3195 + }, + { + "epoch": 0.09383992013623818, + "grad_norm": 0.0, + "learning_rate": 1.9787055455902764e-05, + "loss": 1.417, + "step": 3196 + }, + { + "epoch": 0.09386928181337718, + "grad_norm": 0.0, + "learning_rate": 1.978686020875232e-05, + "loss": 1.499, + "step": 3197 + }, + { + "epoch": 0.09389864349051617, + "grad_norm": 0.0, + "learning_rate": 1.9786664873096798e-05, + "loss": 1.4746, + "step": 3198 + }, + { + "epoch": 0.09392800516765518, + "grad_norm": 0.0, + "learning_rate": 1.9786469448937958e-05, + "loss": 1.3848, + "step": 3199 + }, + { + "epoch": 0.09395736684479418, + "grad_norm": 0.0, + "learning_rate": 1.978627393627757e-05, + "loss": 1.5479, + "step": 3200 + }, + { + "epoch": 0.09398672852193317, + "grad_norm": 0.0, + "learning_rate": 1.9786078335117395e-05, + "loss": 1.5361, + "step": 3201 + }, + { + "epoch": 0.09401609019907217, + "grad_norm": 0.0, + "learning_rate": 1.978588264545921e-05, + "loss": 1.4951, + "step": 3202 + }, + { + "epoch": 0.09404545187621118, + "grad_norm": 0.0, + "learning_rate": 1.9785686867304785e-05, + "loss": 1.5576, + "step": 3203 + }, + { + "epoch": 0.09407481355335016, + "grad_norm": 0.0, + "learning_rate": 1.9785491000655886e-05, + "loss": 1.5215, + "step": 3204 + }, + { + "epoch": 0.09410417523048917, + "grad_norm": 0.0, + "learning_rate": 1.9785295045514284e-05, + "loss": 1.4531, + "step": 3205 + }, + { + "epoch": 0.09413353690762816, + "grad_norm": 0.0, + "learning_rate": 1.978509900188175e-05, + "loss": 1.4443, + "step": 3206 + }, + { + "epoch": 0.09416289858476716, + "grad_norm": 0.0, + "learning_rate": 1.9784902869760064e-05, + "loss": 1.3125, + "step": 3207 + }, + { + "epoch": 0.09419226026190616, + "grad_norm": 0.0, + "learning_rate": 1.9784706649150998e-05, + "loss": 1.5967, + "step": 3208 + }, + { + "epoch": 0.09422162193904515, + "grad_norm": 0.0, + "learning_rate": 1.978451034005632e-05, + "loss": 1.3984, + "step": 3209 + }, + { + "epoch": 0.09425098361618416, + "grad_norm": 0.0, + "learning_rate": 1.9784313942477807e-05, + "loss": 1.4932, + "step": 3210 + }, + { + "epoch": 0.09428034529332316, + "grad_norm": 0.0, + "learning_rate": 1.978411745641724e-05, + "loss": 1.4492, + "step": 3211 + }, + { + "epoch": 0.09430970697046215, + "grad_norm": 0.0, + "learning_rate": 1.9783920881876396e-05, + "loss": 1.5186, + "step": 3212 + }, + { + "epoch": 0.09433906864760115, + "grad_norm": 0.0, + "learning_rate": 1.9783724218857044e-05, + "loss": 1.4512, + "step": 3213 + }, + { + "epoch": 0.09436843032474015, + "grad_norm": 0.0, + "learning_rate": 1.9783527467360974e-05, + "loss": 1.3594, + "step": 3214 + }, + { + "epoch": 0.09439779200187914, + "grad_norm": 0.0, + "learning_rate": 1.9783330627389955e-05, + "loss": 1.3936, + "step": 3215 + }, + { + "epoch": 0.09442715367901815, + "grad_norm": 0.0, + "learning_rate": 1.9783133698945775e-05, + "loss": 1.4121, + "step": 3216 + }, + { + "epoch": 0.09445651535615715, + "grad_norm": 0.0, + "learning_rate": 1.978293668203021e-05, + "loss": 1.5303, + "step": 3217 + }, + { + "epoch": 0.09448587703329614, + "grad_norm": 0.0, + "learning_rate": 1.978273957664504e-05, + "loss": 1.501, + "step": 3218 + }, + { + "epoch": 0.09451523871043514, + "grad_norm": 0.0, + "learning_rate": 1.9782542382792055e-05, + "loss": 1.6406, + "step": 3219 + }, + { + "epoch": 0.09454460038757413, + "grad_norm": 0.0, + "learning_rate": 1.9782345100473034e-05, + "loss": 1.5713, + "step": 3220 + }, + { + "epoch": 0.09457396206471314, + "grad_norm": 0.0, + "learning_rate": 1.978214772968976e-05, + "loss": 1.4814, + "step": 3221 + }, + { + "epoch": 0.09460332374185214, + "grad_norm": 0.0, + "learning_rate": 1.9781950270444018e-05, + "loss": 1.3623, + "step": 3222 + }, + { + "epoch": 0.09463268541899113, + "grad_norm": 0.0, + "learning_rate": 1.9781752722737594e-05, + "loss": 1.5293, + "step": 3223 + }, + { + "epoch": 0.09466204709613013, + "grad_norm": 0.0, + "learning_rate": 1.9781555086572277e-05, + "loss": 1.4697, + "step": 3224 + }, + { + "epoch": 0.09469140877326913, + "grad_norm": 0.0, + "learning_rate": 1.9781357361949852e-05, + "loss": 1.4668, + "step": 3225 + }, + { + "epoch": 0.09472077045040812, + "grad_norm": 0.0, + "learning_rate": 1.9781159548872106e-05, + "loss": 1.5547, + "step": 3226 + }, + { + "epoch": 0.09475013212754713, + "grad_norm": 0.0, + "learning_rate": 1.978096164734083e-05, + "loss": 1.5029, + "step": 3227 + }, + { + "epoch": 0.09477949380468613, + "grad_norm": 0.0, + "learning_rate": 1.9780763657357812e-05, + "loss": 1.3428, + "step": 3228 + }, + { + "epoch": 0.09480885548182512, + "grad_norm": 0.0, + "learning_rate": 1.9780565578924843e-05, + "loss": 1.376, + "step": 3229 + }, + { + "epoch": 0.09483821715896412, + "grad_norm": 0.0, + "learning_rate": 1.978036741204371e-05, + "loss": 1.4795, + "step": 3230 + }, + { + "epoch": 0.09486757883610311, + "grad_norm": 0.0, + "learning_rate": 1.978016915671622e-05, + "loss": 1.5107, + "step": 3231 + }, + { + "epoch": 0.09489694051324211, + "grad_norm": 0.0, + "learning_rate": 1.977997081294415e-05, + "loss": 1.4795, + "step": 3232 + }, + { + "epoch": 0.09492630219038112, + "grad_norm": 0.0, + "learning_rate": 1.9779772380729302e-05, + "loss": 1.5781, + "step": 3233 + }, + { + "epoch": 0.09495566386752011, + "grad_norm": 0.0, + "learning_rate": 1.9779573860073462e-05, + "loss": 1.4492, + "step": 3234 + }, + { + "epoch": 0.09498502554465911, + "grad_norm": 0.0, + "learning_rate": 1.9779375250978435e-05, + "loss": 1.5527, + "step": 3235 + }, + { + "epoch": 0.09501438722179811, + "grad_norm": 0.0, + "learning_rate": 1.9779176553446015e-05, + "loss": 1.5166, + "step": 3236 + }, + { + "epoch": 0.0950437488989371, + "grad_norm": 0.0, + "learning_rate": 1.9778977767477992e-05, + "loss": 1.5869, + "step": 3237 + }, + { + "epoch": 0.0950731105760761, + "grad_norm": 0.0, + "learning_rate": 1.9778778893076172e-05, + "loss": 1.4707, + "step": 3238 + }, + { + "epoch": 0.09510247225321511, + "grad_norm": 0.0, + "learning_rate": 1.977857993024235e-05, + "loss": 1.4404, + "step": 3239 + }, + { + "epoch": 0.0951318339303541, + "grad_norm": 0.0, + "learning_rate": 1.9778380878978326e-05, + "loss": 1.4609, + "step": 3240 + }, + { + "epoch": 0.0951611956074931, + "grad_norm": 0.0, + "learning_rate": 1.9778181739285895e-05, + "loss": 1.4111, + "step": 3241 + }, + { + "epoch": 0.0951905572846321, + "grad_norm": 0.0, + "learning_rate": 1.9777982511166867e-05, + "loss": 1.373, + "step": 3242 + }, + { + "epoch": 0.0952199189617711, + "grad_norm": 0.0, + "learning_rate": 1.9777783194623037e-05, + "loss": 1.5137, + "step": 3243 + }, + { + "epoch": 0.0952492806389101, + "grad_norm": 0.0, + "learning_rate": 1.9777583789656208e-05, + "loss": 1.4531, + "step": 3244 + }, + { + "epoch": 0.09527864231604909, + "grad_norm": 0.0, + "learning_rate": 1.977738429626819e-05, + "loss": 1.5273, + "step": 3245 + }, + { + "epoch": 0.09530800399318809, + "grad_norm": 0.0, + "learning_rate": 1.977718471446078e-05, + "loss": 1.5234, + "step": 3246 + }, + { + "epoch": 0.09533736567032709, + "grad_norm": 0.0, + "learning_rate": 1.977698504423578e-05, + "loss": 1.5244, + "step": 3247 + }, + { + "epoch": 0.09536672734746608, + "grad_norm": 0.0, + "learning_rate": 1.9776785285595003e-05, + "loss": 1.3467, + "step": 3248 + }, + { + "epoch": 0.09539608902460509, + "grad_norm": 0.0, + "learning_rate": 1.9776585438540255e-05, + "loss": 1.5723, + "step": 3249 + }, + { + "epoch": 0.09542545070174409, + "grad_norm": 0.0, + "learning_rate": 1.9776385503073337e-05, + "loss": 1.5508, + "step": 3250 + }, + { + "epoch": 0.09545481237888308, + "grad_norm": 0.0, + "learning_rate": 1.9776185479196064e-05, + "loss": 1.5254, + "step": 3251 + }, + { + "epoch": 0.09548417405602208, + "grad_norm": 0.0, + "learning_rate": 1.977598536691024e-05, + "loss": 1.457, + "step": 3252 + }, + { + "epoch": 0.09551353573316108, + "grad_norm": 0.0, + "learning_rate": 1.9775785166217677e-05, + "loss": 1.5459, + "step": 3253 + }, + { + "epoch": 0.09554289741030007, + "grad_norm": 0.0, + "learning_rate": 1.9775584877120185e-05, + "loss": 1.5127, + "step": 3254 + }, + { + "epoch": 0.09557225908743908, + "grad_norm": 0.0, + "learning_rate": 1.977538449961958e-05, + "loss": 1.3477, + "step": 3255 + }, + { + "epoch": 0.09560162076457807, + "grad_norm": 0.0, + "learning_rate": 1.9775184033717664e-05, + "loss": 1.541, + "step": 3256 + }, + { + "epoch": 0.09563098244171707, + "grad_norm": 0.0, + "learning_rate": 1.9774983479416257e-05, + "loss": 1.4111, + "step": 3257 + }, + { + "epoch": 0.09566034411885607, + "grad_norm": 0.0, + "learning_rate": 1.9774782836717173e-05, + "loss": 1.5303, + "step": 3258 + }, + { + "epoch": 0.09568970579599506, + "grad_norm": 0.0, + "learning_rate": 1.9774582105622223e-05, + "loss": 1.3994, + "step": 3259 + }, + { + "epoch": 0.09571906747313406, + "grad_norm": 0.0, + "learning_rate": 1.977438128613322e-05, + "loss": 1.5762, + "step": 3260 + }, + { + "epoch": 0.09574842915027307, + "grad_norm": 0.0, + "learning_rate": 1.9774180378251988e-05, + "loss": 1.4902, + "step": 3261 + }, + { + "epoch": 0.09577779082741206, + "grad_norm": 0.0, + "learning_rate": 1.977397938198034e-05, + "loss": 1.4355, + "step": 3262 + }, + { + "epoch": 0.09580715250455106, + "grad_norm": 0.0, + "learning_rate": 1.977377829732009e-05, + "loss": 1.4551, + "step": 3263 + }, + { + "epoch": 0.09583651418169006, + "grad_norm": 0.0, + "learning_rate": 1.9773577124273063e-05, + "loss": 1.5166, + "step": 3264 + }, + { + "epoch": 0.09586587585882905, + "grad_norm": 0.0, + "learning_rate": 1.9773375862841077e-05, + "loss": 1.5195, + "step": 3265 + }, + { + "epoch": 0.09589523753596806, + "grad_norm": 0.0, + "learning_rate": 1.9773174513025943e-05, + "loss": 1.4365, + "step": 3266 + }, + { + "epoch": 0.09592459921310706, + "grad_norm": 0.0, + "learning_rate": 1.9772973074829494e-05, + "loss": 1.4062, + "step": 3267 + }, + { + "epoch": 0.09595396089024605, + "grad_norm": 0.0, + "learning_rate": 1.9772771548253546e-05, + "loss": 1.5762, + "step": 3268 + }, + { + "epoch": 0.09598332256738505, + "grad_norm": 0.0, + "learning_rate": 1.9772569933299922e-05, + "loss": 1.4219, + "step": 3269 + }, + { + "epoch": 0.09601268424452404, + "grad_norm": 0.0, + "learning_rate": 1.9772368229970444e-05, + "loss": 1.376, + "step": 3270 + }, + { + "epoch": 0.09604204592166304, + "grad_norm": 0.0, + "learning_rate": 1.9772166438266936e-05, + "loss": 1.5449, + "step": 3271 + }, + { + "epoch": 0.09607140759880205, + "grad_norm": 0.0, + "learning_rate": 1.9771964558191227e-05, + "loss": 1.3457, + "step": 3272 + }, + { + "epoch": 0.09610076927594104, + "grad_norm": 0.0, + "learning_rate": 1.977176258974514e-05, + "loss": 1.4707, + "step": 3273 + }, + { + "epoch": 0.09613013095308004, + "grad_norm": 0.0, + "learning_rate": 1.97715605329305e-05, + "loss": 1.5566, + "step": 3274 + }, + { + "epoch": 0.09615949263021904, + "grad_norm": 0.0, + "learning_rate": 1.977135838774914e-05, + "loss": 1.4434, + "step": 3275 + }, + { + "epoch": 0.09618885430735803, + "grad_norm": 0.0, + "learning_rate": 1.9771156154202878e-05, + "loss": 1.4902, + "step": 3276 + }, + { + "epoch": 0.09621821598449704, + "grad_norm": 0.0, + "learning_rate": 1.977095383229355e-05, + "loss": 1.4336, + "step": 3277 + }, + { + "epoch": 0.09624757766163604, + "grad_norm": 0.0, + "learning_rate": 1.977075142202298e-05, + "loss": 1.5449, + "step": 3278 + }, + { + "epoch": 0.09627693933877503, + "grad_norm": 0.0, + "learning_rate": 1.9770548923393013e-05, + "loss": 1.5137, + "step": 3279 + }, + { + "epoch": 0.09630630101591403, + "grad_norm": 0.0, + "learning_rate": 1.9770346336405463e-05, + "loss": 1.4229, + "step": 3280 + }, + { + "epoch": 0.09633566269305303, + "grad_norm": 0.0, + "learning_rate": 1.9770143661062168e-05, + "loss": 1.4199, + "step": 3281 + }, + { + "epoch": 0.09636502437019202, + "grad_norm": 0.0, + "learning_rate": 1.9769940897364964e-05, + "loss": 1.4336, + "step": 3282 + }, + { + "epoch": 0.09639438604733103, + "grad_norm": 0.0, + "learning_rate": 1.9769738045315682e-05, + "loss": 1.459, + "step": 3283 + }, + { + "epoch": 0.09642374772447002, + "grad_norm": 0.0, + "learning_rate": 1.9769535104916154e-05, + "loss": 1.4629, + "step": 3284 + }, + { + "epoch": 0.09645310940160902, + "grad_norm": 0.0, + "learning_rate": 1.976933207616822e-05, + "loss": 1.3369, + "step": 3285 + }, + { + "epoch": 0.09648247107874802, + "grad_norm": 0.0, + "learning_rate": 1.9769128959073715e-05, + "loss": 1.542, + "step": 3286 + }, + { + "epoch": 0.09651183275588701, + "grad_norm": 0.0, + "learning_rate": 1.9768925753634475e-05, + "loss": 1.4414, + "step": 3287 + }, + { + "epoch": 0.09654119443302601, + "grad_norm": 0.0, + "learning_rate": 1.9768722459852337e-05, + "loss": 1.4111, + "step": 3288 + }, + { + "epoch": 0.09657055611016502, + "grad_norm": 0.0, + "learning_rate": 1.9768519077729138e-05, + "loss": 1.5176, + "step": 3289 + }, + { + "epoch": 0.09659991778730401, + "grad_norm": 0.0, + "learning_rate": 1.9768315607266722e-05, + "loss": 1.4004, + "step": 3290 + }, + { + "epoch": 0.09662927946444301, + "grad_norm": 0.0, + "learning_rate": 1.9768112048466928e-05, + "loss": 1.4775, + "step": 3291 + }, + { + "epoch": 0.09665864114158201, + "grad_norm": 0.0, + "learning_rate": 1.976790840133159e-05, + "loss": 1.46, + "step": 3292 + }, + { + "epoch": 0.096688002818721, + "grad_norm": 0.0, + "learning_rate": 1.9767704665862556e-05, + "loss": 1.5371, + "step": 3293 + }, + { + "epoch": 0.09671736449586, + "grad_norm": 0.0, + "learning_rate": 1.976750084206167e-05, + "loss": 1.4316, + "step": 3294 + }, + { + "epoch": 0.096746726172999, + "grad_norm": 0.0, + "learning_rate": 1.976729692993077e-05, + "loss": 1.5576, + "step": 3295 + }, + { + "epoch": 0.096776087850138, + "grad_norm": 0.0, + "learning_rate": 1.9767092929471703e-05, + "loss": 1.3975, + "step": 3296 + }, + { + "epoch": 0.096805449527277, + "grad_norm": 0.0, + "learning_rate": 1.9766888840686315e-05, + "loss": 1.459, + "step": 3297 + }, + { + "epoch": 0.09683481120441599, + "grad_norm": 0.0, + "learning_rate": 1.9766684663576447e-05, + "loss": 1.5098, + "step": 3298 + }, + { + "epoch": 0.096864172881555, + "grad_norm": 0.0, + "learning_rate": 1.976648039814395e-05, + "loss": 1.4697, + "step": 3299 + }, + { + "epoch": 0.096893534558694, + "grad_norm": 0.0, + "learning_rate": 1.976627604439067e-05, + "loss": 1.5186, + "step": 3300 + }, + { + "epoch": 0.09692289623583299, + "grad_norm": 0.0, + "learning_rate": 1.9766071602318452e-05, + "loss": 1.4482, + "step": 3301 + }, + { + "epoch": 0.09695225791297199, + "grad_norm": 0.0, + "learning_rate": 1.976586707192915e-05, + "loss": 1.3105, + "step": 3302 + }, + { + "epoch": 0.09698161959011099, + "grad_norm": 0.0, + "learning_rate": 1.976566245322461e-05, + "loss": 1.582, + "step": 3303 + }, + { + "epoch": 0.09701098126724998, + "grad_norm": 0.0, + "learning_rate": 1.9765457746206683e-05, + "loss": 1.5566, + "step": 3304 + }, + { + "epoch": 0.09704034294438899, + "grad_norm": 0.0, + "learning_rate": 1.9765252950877222e-05, + "loss": 1.6309, + "step": 3305 + }, + { + "epoch": 0.09706970462152799, + "grad_norm": 0.0, + "learning_rate": 1.9765048067238078e-05, + "loss": 1.4346, + "step": 3306 + }, + { + "epoch": 0.09709906629866698, + "grad_norm": 0.0, + "learning_rate": 1.9764843095291103e-05, + "loss": 1.4326, + "step": 3307 + }, + { + "epoch": 0.09712842797580598, + "grad_norm": 0.0, + "learning_rate": 1.976463803503815e-05, + "loss": 1.4082, + "step": 3308 + }, + { + "epoch": 0.09715778965294497, + "grad_norm": 0.0, + "learning_rate": 1.976443288648107e-05, + "loss": 1.625, + "step": 3309 + }, + { + "epoch": 0.09718715133008397, + "grad_norm": 0.0, + "learning_rate": 1.976422764962173e-05, + "loss": 1.5518, + "step": 3310 + }, + { + "epoch": 0.09721651300722298, + "grad_norm": 0.0, + "learning_rate": 1.9764022324461977e-05, + "loss": 1.5049, + "step": 3311 + }, + { + "epoch": 0.09724587468436197, + "grad_norm": 0.0, + "learning_rate": 1.976381691100367e-05, + "loss": 1.5137, + "step": 3312 + }, + { + "epoch": 0.09727523636150097, + "grad_norm": 0.0, + "learning_rate": 1.9763611409248663e-05, + "loss": 1.4668, + "step": 3313 + }, + { + "epoch": 0.09730459803863997, + "grad_norm": 0.0, + "learning_rate": 1.9763405819198816e-05, + "loss": 1.5752, + "step": 3314 + }, + { + "epoch": 0.09733395971577896, + "grad_norm": 0.0, + "learning_rate": 1.9763200140855996e-05, + "loss": 1.4199, + "step": 3315 + }, + { + "epoch": 0.09736332139291796, + "grad_norm": 0.0, + "learning_rate": 1.9762994374222054e-05, + "loss": 1.4062, + "step": 3316 + }, + { + "epoch": 0.09739268307005697, + "grad_norm": 0.0, + "learning_rate": 1.976278851929885e-05, + "loss": 1.5732, + "step": 3317 + }, + { + "epoch": 0.09742204474719596, + "grad_norm": 0.0, + "learning_rate": 1.9762582576088256e-05, + "loss": 1.5928, + "step": 3318 + }, + { + "epoch": 0.09745140642433496, + "grad_norm": 0.0, + "learning_rate": 1.9762376544592122e-05, + "loss": 1.4424, + "step": 3319 + }, + { + "epoch": 0.09748076810147395, + "grad_norm": 0.0, + "learning_rate": 1.9762170424812322e-05, + "loss": 1.5117, + "step": 3320 + }, + { + "epoch": 0.09751012977861295, + "grad_norm": 0.0, + "learning_rate": 1.976196421675071e-05, + "loss": 1.5674, + "step": 3321 + }, + { + "epoch": 0.09753949145575196, + "grad_norm": 0.0, + "learning_rate": 1.976175792040916e-05, + "loss": 1.4297, + "step": 3322 + }, + { + "epoch": 0.09756885313289095, + "grad_norm": 0.0, + "learning_rate": 1.976155153578953e-05, + "loss": 1.4756, + "step": 3323 + }, + { + "epoch": 0.09759821481002995, + "grad_norm": 0.0, + "learning_rate": 1.9761345062893687e-05, + "loss": 1.5029, + "step": 3324 + }, + { + "epoch": 0.09762757648716895, + "grad_norm": 0.0, + "learning_rate": 1.9761138501723504e-05, + "loss": 1.4727, + "step": 3325 + }, + { + "epoch": 0.09765693816430794, + "grad_norm": 0.0, + "learning_rate": 1.9760931852280842e-05, + "loss": 1.4521, + "step": 3326 + }, + { + "epoch": 0.09768629984144694, + "grad_norm": 0.0, + "learning_rate": 1.976072511456758e-05, + "loss": 1.4414, + "step": 3327 + }, + { + "epoch": 0.09771566151858595, + "grad_norm": 0.0, + "learning_rate": 1.9760518288585576e-05, + "loss": 1.4014, + "step": 3328 + }, + { + "epoch": 0.09774502319572494, + "grad_norm": 0.0, + "learning_rate": 1.976031137433671e-05, + "loss": 1.4365, + "step": 3329 + }, + { + "epoch": 0.09777438487286394, + "grad_norm": 0.0, + "learning_rate": 1.976010437182284e-05, + "loss": 1.4512, + "step": 3330 + }, + { + "epoch": 0.09780374655000294, + "grad_norm": 0.0, + "learning_rate": 1.9759897281045856e-05, + "loss": 1.3984, + "step": 3331 + }, + { + "epoch": 0.09783310822714193, + "grad_norm": 0.0, + "learning_rate": 1.9759690102007615e-05, + "loss": 1.4863, + "step": 3332 + }, + { + "epoch": 0.09786246990428094, + "grad_norm": 0.0, + "learning_rate": 1.975948283471e-05, + "loss": 1.5205, + "step": 3333 + }, + { + "epoch": 0.09789183158141992, + "grad_norm": 0.0, + "learning_rate": 1.975927547915488e-05, + "loss": 1.4678, + "step": 3334 + }, + { + "epoch": 0.09792119325855893, + "grad_norm": 0.0, + "learning_rate": 1.9759068035344135e-05, + "loss": 1.3633, + "step": 3335 + }, + { + "epoch": 0.09795055493569793, + "grad_norm": 0.0, + "learning_rate": 1.9758860503279637e-05, + "loss": 1.3555, + "step": 3336 + }, + { + "epoch": 0.09797991661283692, + "grad_norm": 0.0, + "learning_rate": 1.975865288296326e-05, + "loss": 1.4688, + "step": 3337 + }, + { + "epoch": 0.09800927828997592, + "grad_norm": 0.0, + "learning_rate": 1.9758445174396894e-05, + "loss": 1.457, + "step": 3338 + }, + { + "epoch": 0.09803863996711493, + "grad_norm": 0.0, + "learning_rate": 1.9758237377582407e-05, + "loss": 1.5459, + "step": 3339 + }, + { + "epoch": 0.09806800164425392, + "grad_norm": 0.0, + "learning_rate": 1.9758029492521675e-05, + "loss": 1.3643, + "step": 3340 + }, + { + "epoch": 0.09809736332139292, + "grad_norm": 0.0, + "learning_rate": 1.975782151921659e-05, + "loss": 1.5264, + "step": 3341 + }, + { + "epoch": 0.09812672499853192, + "grad_norm": 0.0, + "learning_rate": 1.9757613457669023e-05, + "loss": 1.4434, + "step": 3342 + }, + { + "epoch": 0.09815608667567091, + "grad_norm": 0.0, + "learning_rate": 1.9757405307880856e-05, + "loss": 1.6562, + "step": 3343 + }, + { + "epoch": 0.09818544835280991, + "grad_norm": 0.0, + "learning_rate": 1.9757197069853977e-05, + "loss": 1.5117, + "step": 3344 + }, + { + "epoch": 0.0982148100299489, + "grad_norm": 0.0, + "learning_rate": 1.9756988743590266e-05, + "loss": 1.4971, + "step": 3345 + }, + { + "epoch": 0.09824417170708791, + "grad_norm": 0.0, + "learning_rate": 1.9756780329091608e-05, + "loss": 1.5254, + "step": 3346 + }, + { + "epoch": 0.09827353338422691, + "grad_norm": 0.0, + "learning_rate": 1.9756571826359882e-05, + "loss": 1.542, + "step": 3347 + }, + { + "epoch": 0.0983028950613659, + "grad_norm": 0.0, + "learning_rate": 1.9756363235396983e-05, + "loss": 1.5566, + "step": 3348 + }, + { + "epoch": 0.0983322567385049, + "grad_norm": 0.0, + "learning_rate": 1.975615455620479e-05, + "loss": 1.4766, + "step": 3349 + }, + { + "epoch": 0.0983616184156439, + "grad_norm": 0.0, + "learning_rate": 1.9755945788785197e-05, + "loss": 1.4463, + "step": 3350 + }, + { + "epoch": 0.0983909800927829, + "grad_norm": 0.0, + "learning_rate": 1.975573693314008e-05, + "loss": 1.375, + "step": 3351 + }, + { + "epoch": 0.0984203417699219, + "grad_norm": 0.0, + "learning_rate": 1.9755527989271343e-05, + "loss": 1.3975, + "step": 3352 + }, + { + "epoch": 0.0984497034470609, + "grad_norm": 0.0, + "learning_rate": 1.9755318957180864e-05, + "loss": 1.4863, + "step": 3353 + }, + { + "epoch": 0.09847906512419989, + "grad_norm": 0.0, + "learning_rate": 1.9755109836870535e-05, + "loss": 1.4473, + "step": 3354 + }, + { + "epoch": 0.0985084268013389, + "grad_norm": 0.0, + "learning_rate": 1.9754900628342254e-05, + "loss": 1.4668, + "step": 3355 + }, + { + "epoch": 0.0985377884784779, + "grad_norm": 0.0, + "learning_rate": 1.975469133159791e-05, + "loss": 1.5059, + "step": 3356 + }, + { + "epoch": 0.09856715015561689, + "grad_norm": 0.0, + "learning_rate": 1.975448194663939e-05, + "loss": 1.4023, + "step": 3357 + }, + { + "epoch": 0.09859651183275589, + "grad_norm": 0.0, + "learning_rate": 1.975427247346859e-05, + "loss": 1.6416, + "step": 3358 + }, + { + "epoch": 0.09862587350989488, + "grad_norm": 0.0, + "learning_rate": 1.975406291208741e-05, + "loss": 1.5498, + "step": 3359 + }, + { + "epoch": 0.09865523518703388, + "grad_norm": 0.0, + "learning_rate": 1.975385326249774e-05, + "loss": 1.5908, + "step": 3360 + }, + { + "epoch": 0.09868459686417289, + "grad_norm": 0.0, + "learning_rate": 1.9753643524701475e-05, + "loss": 1.5361, + "step": 3361 + }, + { + "epoch": 0.09871395854131187, + "grad_norm": 0.0, + "learning_rate": 1.9753433698700517e-05, + "loss": 1.5322, + "step": 3362 + }, + { + "epoch": 0.09874332021845088, + "grad_norm": 0.0, + "learning_rate": 1.9753223784496754e-05, + "loss": 1.4424, + "step": 3363 + }, + { + "epoch": 0.09877268189558988, + "grad_norm": 0.0, + "learning_rate": 1.9753013782092098e-05, + "loss": 1.5996, + "step": 3364 + }, + { + "epoch": 0.09880204357272887, + "grad_norm": 0.0, + "learning_rate": 1.9752803691488438e-05, + "loss": 1.373, + "step": 3365 + }, + { + "epoch": 0.09883140524986787, + "grad_norm": 0.0, + "learning_rate": 1.9752593512687673e-05, + "loss": 1.5479, + "step": 3366 + }, + { + "epoch": 0.09886076692700688, + "grad_norm": 0.0, + "learning_rate": 1.9752383245691713e-05, + "loss": 1.334, + "step": 3367 + }, + { + "epoch": 0.09889012860414587, + "grad_norm": 0.0, + "learning_rate": 1.975217289050245e-05, + "loss": 1.4131, + "step": 3368 + }, + { + "epoch": 0.09891949028128487, + "grad_norm": 0.0, + "learning_rate": 1.975196244712179e-05, + "loss": 1.4609, + "step": 3369 + }, + { + "epoch": 0.09894885195842386, + "grad_norm": 0.0, + "learning_rate": 1.9751751915551637e-05, + "loss": 1.4424, + "step": 3370 + }, + { + "epoch": 0.09897821363556286, + "grad_norm": 0.0, + "learning_rate": 1.9751541295793894e-05, + "loss": 1.5879, + "step": 3371 + }, + { + "epoch": 0.09900757531270186, + "grad_norm": 0.0, + "learning_rate": 1.9751330587850464e-05, + "loss": 1.4336, + "step": 3372 + }, + { + "epoch": 0.09903693698984085, + "grad_norm": 0.0, + "learning_rate": 1.9751119791723258e-05, + "loss": 1.4014, + "step": 3373 + }, + { + "epoch": 0.09906629866697986, + "grad_norm": 0.0, + "learning_rate": 1.9750908907414175e-05, + "loss": 1.3965, + "step": 3374 + }, + { + "epoch": 0.09909566034411886, + "grad_norm": 0.0, + "learning_rate": 1.9750697934925127e-05, + "loss": 1.375, + "step": 3375 + }, + { + "epoch": 0.09912502202125785, + "grad_norm": 0.0, + "learning_rate": 1.9750486874258018e-05, + "loss": 1.4248, + "step": 3376 + }, + { + "epoch": 0.09915438369839685, + "grad_norm": 0.0, + "learning_rate": 1.9750275725414762e-05, + "loss": 1.3721, + "step": 3377 + }, + { + "epoch": 0.09918374537553586, + "grad_norm": 0.0, + "learning_rate": 1.9750064488397263e-05, + "loss": 1.5703, + "step": 3378 + }, + { + "epoch": 0.09921310705267485, + "grad_norm": 0.0, + "learning_rate": 1.9749853163207437e-05, + "loss": 1.4307, + "step": 3379 + }, + { + "epoch": 0.09924246872981385, + "grad_norm": 0.0, + "learning_rate": 1.974964174984719e-05, + "loss": 1.3018, + "step": 3380 + }, + { + "epoch": 0.09927183040695285, + "grad_norm": 0.0, + "learning_rate": 1.9749430248318435e-05, + "loss": 1.6172, + "step": 3381 + }, + { + "epoch": 0.09930119208409184, + "grad_norm": 0.0, + "learning_rate": 1.974921865862309e-05, + "loss": 1.4717, + "step": 3382 + }, + { + "epoch": 0.09933055376123084, + "grad_norm": 0.0, + "learning_rate": 1.9749006980763058e-05, + "loss": 1.5898, + "step": 3383 + }, + { + "epoch": 0.09935991543836983, + "grad_norm": 0.0, + "learning_rate": 1.974879521474026e-05, + "loss": 1.4785, + "step": 3384 + }, + { + "epoch": 0.09938927711550884, + "grad_norm": 0.0, + "learning_rate": 1.974858336055661e-05, + "loss": 1.6494, + "step": 3385 + }, + { + "epoch": 0.09941863879264784, + "grad_norm": 0.0, + "learning_rate": 1.9748371418214027e-05, + "loss": 1.4961, + "step": 3386 + }, + { + "epoch": 0.09944800046978683, + "grad_norm": 0.0, + "learning_rate": 1.9748159387714423e-05, + "loss": 1.4648, + "step": 3387 + }, + { + "epoch": 0.09947736214692583, + "grad_norm": 0.0, + "learning_rate": 1.9747947269059717e-05, + "loss": 1.5049, + "step": 3388 + }, + { + "epoch": 0.09950672382406484, + "grad_norm": 0.0, + "learning_rate": 1.9747735062251826e-05, + "loss": 1.4697, + "step": 3389 + }, + { + "epoch": 0.09953608550120382, + "grad_norm": 0.0, + "learning_rate": 1.9747522767292673e-05, + "loss": 1.5391, + "step": 3390 + }, + { + "epoch": 0.09956544717834283, + "grad_norm": 0.0, + "learning_rate": 1.9747310384184174e-05, + "loss": 1.5254, + "step": 3391 + }, + { + "epoch": 0.09959480885548183, + "grad_norm": 0.0, + "learning_rate": 1.9747097912928253e-05, + "loss": 1.4521, + "step": 3392 + }, + { + "epoch": 0.09962417053262082, + "grad_norm": 0.0, + "learning_rate": 1.9746885353526827e-05, + "loss": 1.4229, + "step": 3393 + }, + { + "epoch": 0.09965353220975982, + "grad_norm": 0.0, + "learning_rate": 1.974667270598182e-05, + "loss": 1.5264, + "step": 3394 + }, + { + "epoch": 0.09968289388689881, + "grad_norm": 0.0, + "learning_rate": 1.9746459970295156e-05, + "loss": 1.4688, + "step": 3395 + }, + { + "epoch": 0.09971225556403782, + "grad_norm": 0.0, + "learning_rate": 1.974624714646876e-05, + "loss": 1.4922, + "step": 3396 + }, + { + "epoch": 0.09974161724117682, + "grad_norm": 0.0, + "learning_rate": 1.9746034234504554e-05, + "loss": 1.4551, + "step": 3397 + }, + { + "epoch": 0.09977097891831581, + "grad_norm": 0.0, + "learning_rate": 1.9745821234404463e-05, + "loss": 1.5322, + "step": 3398 + }, + { + "epoch": 0.09980034059545481, + "grad_norm": 0.0, + "learning_rate": 1.9745608146170417e-05, + "loss": 1.5225, + "step": 3399 + }, + { + "epoch": 0.09982970227259381, + "grad_norm": 0.0, + "learning_rate": 1.9745394969804338e-05, + "loss": 1.417, + "step": 3400 + }, + { + "epoch": 0.0998590639497328, + "grad_norm": 0.0, + "learning_rate": 1.974518170530816e-05, + "loss": 1.3945, + "step": 3401 + }, + { + "epoch": 0.09988842562687181, + "grad_norm": 0.0, + "learning_rate": 1.9744968352683805e-05, + "loss": 1.5713, + "step": 3402 + }, + { + "epoch": 0.09991778730401081, + "grad_norm": 0.0, + "learning_rate": 1.974475491193321e-05, + "loss": 1.4141, + "step": 3403 + }, + { + "epoch": 0.0999471489811498, + "grad_norm": 0.0, + "learning_rate": 1.9744541383058293e-05, + "loss": 1.4453, + "step": 3404 + }, + { + "epoch": 0.0999765106582888, + "grad_norm": 0.0, + "learning_rate": 1.9744327766061e-05, + "loss": 1.5605, + "step": 3405 + }, + { + "epoch": 0.1000058723354278, + "grad_norm": 0.0, + "learning_rate": 1.974411406094325e-05, + "loss": 1.4922, + "step": 3406 + }, + { + "epoch": 0.1000352340125668, + "grad_norm": 0.0, + "learning_rate": 1.9743900267706985e-05, + "loss": 1.3877, + "step": 3407 + }, + { + "epoch": 0.1000645956897058, + "grad_norm": 0.0, + "learning_rate": 1.974368638635413e-05, + "loss": 1.5391, + "step": 3408 + }, + { + "epoch": 0.10009395736684479, + "grad_norm": 0.0, + "learning_rate": 1.974347241688663e-05, + "loss": 1.5547, + "step": 3409 + }, + { + "epoch": 0.10012331904398379, + "grad_norm": 0.0, + "learning_rate": 1.974325835930641e-05, + "loss": 1.4766, + "step": 3410 + }, + { + "epoch": 0.1001526807211228, + "grad_norm": 0.0, + "learning_rate": 1.9743044213615405e-05, + "loss": 1.4629, + "step": 3411 + }, + { + "epoch": 0.10018204239826178, + "grad_norm": 0.0, + "learning_rate": 1.9742829979815563e-05, + "loss": 1.3135, + "step": 3412 + }, + { + "epoch": 0.10021140407540079, + "grad_norm": 0.0, + "learning_rate": 1.974261565790881e-05, + "loss": 1.4229, + "step": 3413 + }, + { + "epoch": 0.10024076575253979, + "grad_norm": 0.0, + "learning_rate": 1.974240124789709e-05, + "loss": 1.3691, + "step": 3414 + }, + { + "epoch": 0.10027012742967878, + "grad_norm": 0.0, + "learning_rate": 1.974218674978234e-05, + "loss": 1.5225, + "step": 3415 + }, + { + "epoch": 0.10029948910681778, + "grad_norm": 0.0, + "learning_rate": 1.9741972163566502e-05, + "loss": 1.3926, + "step": 3416 + }, + { + "epoch": 0.10032885078395679, + "grad_norm": 0.0, + "learning_rate": 1.9741757489251513e-05, + "loss": 1.4668, + "step": 3417 + }, + { + "epoch": 0.10035821246109577, + "grad_norm": 0.0, + "learning_rate": 1.9741542726839316e-05, + "loss": 1.4785, + "step": 3418 + }, + { + "epoch": 0.10038757413823478, + "grad_norm": 0.0, + "learning_rate": 1.9741327876331855e-05, + "loss": 1.3867, + "step": 3419 + }, + { + "epoch": 0.10041693581537377, + "grad_norm": 0.0, + "learning_rate": 1.974111293773107e-05, + "loss": 1.6201, + "step": 3420 + }, + { + "epoch": 0.10044629749251277, + "grad_norm": 0.0, + "learning_rate": 1.974089791103891e-05, + "loss": 1.4521, + "step": 3421 + }, + { + "epoch": 0.10047565916965177, + "grad_norm": 0.0, + "learning_rate": 1.9740682796257306e-05, + "loss": 1.4219, + "step": 3422 + }, + { + "epoch": 0.10050502084679076, + "grad_norm": 0.0, + "learning_rate": 1.974046759338822e-05, + "loss": 1.585, + "step": 3423 + }, + { + "epoch": 0.10053438252392977, + "grad_norm": 0.0, + "learning_rate": 1.9740252302433588e-05, + "loss": 1.5059, + "step": 3424 + }, + { + "epoch": 0.10056374420106877, + "grad_norm": 0.0, + "learning_rate": 1.9740036923395362e-05, + "loss": 1.6016, + "step": 3425 + }, + { + "epoch": 0.10059310587820776, + "grad_norm": 0.0, + "learning_rate": 1.9739821456275488e-05, + "loss": 1.4756, + "step": 3426 + }, + { + "epoch": 0.10062246755534676, + "grad_norm": 0.0, + "learning_rate": 1.9739605901075913e-05, + "loss": 1.5498, + "step": 3427 + }, + { + "epoch": 0.10065182923248576, + "grad_norm": 0.0, + "learning_rate": 1.9739390257798586e-05, + "loss": 1.5664, + "step": 3428 + }, + { + "epoch": 0.10068119090962475, + "grad_norm": 0.0, + "learning_rate": 1.9739174526445462e-05, + "loss": 1.5342, + "step": 3429 + }, + { + "epoch": 0.10071055258676376, + "grad_norm": 0.0, + "learning_rate": 1.9738958707018486e-05, + "loss": 1.4121, + "step": 3430 + }, + { + "epoch": 0.10073991426390276, + "grad_norm": 0.0, + "learning_rate": 1.9738742799519613e-05, + "loss": 1.5166, + "step": 3431 + }, + { + "epoch": 0.10076927594104175, + "grad_norm": 0.0, + "learning_rate": 1.9738526803950794e-05, + "loss": 1.4951, + "step": 3432 + }, + { + "epoch": 0.10079863761818075, + "grad_norm": 0.0, + "learning_rate": 1.9738310720313984e-05, + "loss": 1.4238, + "step": 3433 + }, + { + "epoch": 0.10082799929531974, + "grad_norm": 0.0, + "learning_rate": 1.9738094548611138e-05, + "loss": 1.417, + "step": 3434 + }, + { + "epoch": 0.10085736097245875, + "grad_norm": 0.0, + "learning_rate": 1.9737878288844204e-05, + "loss": 1.3945, + "step": 3435 + }, + { + "epoch": 0.10088672264959775, + "grad_norm": 0.0, + "learning_rate": 1.9737661941015148e-05, + "loss": 1.3887, + "step": 3436 + }, + { + "epoch": 0.10091608432673674, + "grad_norm": 0.0, + "learning_rate": 1.9737445505125918e-05, + "loss": 1.4395, + "step": 3437 + }, + { + "epoch": 0.10094544600387574, + "grad_norm": 0.0, + "learning_rate": 1.9737228981178475e-05, + "loss": 1.5205, + "step": 3438 + }, + { + "epoch": 0.10097480768101474, + "grad_norm": 0.0, + "learning_rate": 1.973701236917478e-05, + "loss": 1.4258, + "step": 3439 + }, + { + "epoch": 0.10100416935815373, + "grad_norm": 0.0, + "learning_rate": 1.9736795669116784e-05, + "loss": 1.416, + "step": 3440 + }, + { + "epoch": 0.10103353103529274, + "grad_norm": 0.0, + "learning_rate": 1.9736578881006453e-05, + "loss": 1.5986, + "step": 3441 + }, + { + "epoch": 0.10106289271243174, + "grad_norm": 0.0, + "learning_rate": 1.9736362004845742e-05, + "loss": 1.4404, + "step": 3442 + }, + { + "epoch": 0.10109225438957073, + "grad_norm": 0.0, + "learning_rate": 1.973614504063662e-05, + "loss": 1.3799, + "step": 3443 + }, + { + "epoch": 0.10112161606670973, + "grad_norm": 0.0, + "learning_rate": 1.9735927988381044e-05, + "loss": 1.4834, + "step": 3444 + }, + { + "epoch": 0.10115097774384872, + "grad_norm": 0.0, + "learning_rate": 1.9735710848080977e-05, + "loss": 1.5596, + "step": 3445 + }, + { + "epoch": 0.10118033942098772, + "grad_norm": 0.0, + "learning_rate": 1.9735493619738387e-05, + "loss": 1.4961, + "step": 3446 + }, + { + "epoch": 0.10120970109812673, + "grad_norm": 0.0, + "learning_rate": 1.973527630335523e-05, + "loss": 1.3838, + "step": 3447 + }, + { + "epoch": 0.10123906277526572, + "grad_norm": 0.0, + "learning_rate": 1.973505889893348e-05, + "loss": 1.3994, + "step": 3448 + }, + { + "epoch": 0.10126842445240472, + "grad_norm": 0.0, + "learning_rate": 1.9734841406475097e-05, + "loss": 1.5117, + "step": 3449 + }, + { + "epoch": 0.10129778612954372, + "grad_norm": 0.0, + "learning_rate": 1.973462382598205e-05, + "loss": 1.5547, + "step": 3450 + }, + { + "epoch": 0.10132714780668271, + "grad_norm": 0.0, + "learning_rate": 1.9734406157456308e-05, + "loss": 1.5996, + "step": 3451 + }, + { + "epoch": 0.10135650948382172, + "grad_norm": 0.0, + "learning_rate": 1.9734188400899835e-05, + "loss": 1.5488, + "step": 3452 + }, + { + "epoch": 0.10138587116096072, + "grad_norm": 0.0, + "learning_rate": 1.9733970556314606e-05, + "loss": 1.5215, + "step": 3453 + }, + { + "epoch": 0.10141523283809971, + "grad_norm": 0.0, + "learning_rate": 1.973375262370259e-05, + "loss": 1.4463, + "step": 3454 + }, + { + "epoch": 0.10144459451523871, + "grad_norm": 0.0, + "learning_rate": 1.973353460306575e-05, + "loss": 1.4043, + "step": 3455 + }, + { + "epoch": 0.10147395619237772, + "grad_norm": 0.0, + "learning_rate": 1.973331649440607e-05, + "loss": 1.5327, + "step": 3456 + }, + { + "epoch": 0.1015033178695167, + "grad_norm": 0.0, + "learning_rate": 1.9733098297725515e-05, + "loss": 1.4111, + "step": 3457 + }, + { + "epoch": 0.10153267954665571, + "grad_norm": 0.0, + "learning_rate": 1.973288001302606e-05, + "loss": 1.4873, + "step": 3458 + }, + { + "epoch": 0.1015620412237947, + "grad_norm": 0.0, + "learning_rate": 1.9732661640309672e-05, + "loss": 1.3994, + "step": 3459 + }, + { + "epoch": 0.1015914029009337, + "grad_norm": 0.0, + "learning_rate": 1.9732443179578342e-05, + "loss": 1.5664, + "step": 3460 + }, + { + "epoch": 0.1016207645780727, + "grad_norm": 0.0, + "learning_rate": 1.9732224630834028e-05, + "loss": 1.5059, + "step": 3461 + }, + { + "epoch": 0.10165012625521169, + "grad_norm": 0.0, + "learning_rate": 1.9732005994078718e-05, + "loss": 1.4922, + "step": 3462 + }, + { + "epoch": 0.1016794879323507, + "grad_norm": 0.0, + "learning_rate": 1.9731787269314387e-05, + "loss": 1.5498, + "step": 3463 + }, + { + "epoch": 0.1017088496094897, + "grad_norm": 0.0, + "learning_rate": 1.9731568456543005e-05, + "loss": 1.4277, + "step": 3464 + }, + { + "epoch": 0.10173821128662869, + "grad_norm": 0.0, + "learning_rate": 1.9731349555766563e-05, + "loss": 1.3765, + "step": 3465 + }, + { + "epoch": 0.10176757296376769, + "grad_norm": 0.0, + "learning_rate": 1.9731130566987033e-05, + "loss": 1.5322, + "step": 3466 + }, + { + "epoch": 0.1017969346409067, + "grad_norm": 0.0, + "learning_rate": 1.9730911490206398e-05, + "loss": 1.5215, + "step": 3467 + }, + { + "epoch": 0.10182629631804568, + "grad_norm": 0.0, + "learning_rate": 1.973069232542664e-05, + "loss": 1.4531, + "step": 3468 + }, + { + "epoch": 0.10185565799518469, + "grad_norm": 0.0, + "learning_rate": 1.973047307264973e-05, + "loss": 1.4668, + "step": 3469 + }, + { + "epoch": 0.10188501967232369, + "grad_norm": 0.0, + "learning_rate": 1.9730253731877667e-05, + "loss": 1.5361, + "step": 3470 + }, + { + "epoch": 0.10191438134946268, + "grad_norm": 0.0, + "learning_rate": 1.973003430311243e-05, + "loss": 1.5264, + "step": 3471 + }, + { + "epoch": 0.10194374302660168, + "grad_norm": 0.0, + "learning_rate": 1.9729814786355997e-05, + "loss": 1.4678, + "step": 3472 + }, + { + "epoch": 0.10197310470374067, + "grad_norm": 0.0, + "learning_rate": 1.9729595181610357e-05, + "loss": 1.5068, + "step": 3473 + }, + { + "epoch": 0.10200246638087967, + "grad_norm": 0.0, + "learning_rate": 1.9729375488877496e-05, + "loss": 1.583, + "step": 3474 + }, + { + "epoch": 0.10203182805801868, + "grad_norm": 0.0, + "learning_rate": 1.9729155708159403e-05, + "loss": 1.5244, + "step": 3475 + }, + { + "epoch": 0.10206118973515767, + "grad_norm": 0.0, + "learning_rate": 1.972893583945806e-05, + "loss": 1.5498, + "step": 3476 + }, + { + "epoch": 0.10209055141229667, + "grad_norm": 0.0, + "learning_rate": 1.972871588277546e-05, + "loss": 1.4932, + "step": 3477 + }, + { + "epoch": 0.10211991308943567, + "grad_norm": 0.0, + "learning_rate": 1.9728495838113594e-05, + "loss": 1.498, + "step": 3478 + }, + { + "epoch": 0.10214927476657466, + "grad_norm": 0.0, + "learning_rate": 1.9728275705474443e-05, + "loss": 1.585, + "step": 3479 + }, + { + "epoch": 0.10217863644371367, + "grad_norm": 0.0, + "learning_rate": 1.9728055484860008e-05, + "loss": 1.4512, + "step": 3480 + }, + { + "epoch": 0.10220799812085267, + "grad_norm": 0.0, + "learning_rate": 1.9727835176272273e-05, + "loss": 1.6787, + "step": 3481 + }, + { + "epoch": 0.10223735979799166, + "grad_norm": 0.0, + "learning_rate": 1.9727614779713234e-05, + "loss": 1.5469, + "step": 3482 + }, + { + "epoch": 0.10226672147513066, + "grad_norm": 0.0, + "learning_rate": 1.9727394295184884e-05, + "loss": 1.5898, + "step": 3483 + }, + { + "epoch": 0.10229608315226965, + "grad_norm": 0.0, + "learning_rate": 1.9727173722689215e-05, + "loss": 1.4521, + "step": 3484 + }, + { + "epoch": 0.10232544482940865, + "grad_norm": 0.0, + "learning_rate": 1.9726953062228225e-05, + "loss": 1.4053, + "step": 3485 + }, + { + "epoch": 0.10235480650654766, + "grad_norm": 0.0, + "learning_rate": 1.9726732313803904e-05, + "loss": 1.5664, + "step": 3486 + }, + { + "epoch": 0.10238416818368665, + "grad_norm": 0.0, + "learning_rate": 1.9726511477418254e-05, + "loss": 1.5469, + "step": 3487 + }, + { + "epoch": 0.10241352986082565, + "grad_norm": 0.0, + "learning_rate": 1.9726290553073267e-05, + "loss": 1.4121, + "step": 3488 + }, + { + "epoch": 0.10244289153796465, + "grad_norm": 0.0, + "learning_rate": 1.9726069540770946e-05, + "loss": 1.4912, + "step": 3489 + }, + { + "epoch": 0.10247225321510364, + "grad_norm": 0.0, + "learning_rate": 1.972584844051329e-05, + "loss": 1.5322, + "step": 3490 + }, + { + "epoch": 0.10250161489224265, + "grad_norm": 0.0, + "learning_rate": 1.9725627252302292e-05, + "loss": 1.4609, + "step": 3491 + }, + { + "epoch": 0.10253097656938165, + "grad_norm": 0.0, + "learning_rate": 1.972540597613996e-05, + "loss": 1.5146, + "step": 3492 + }, + { + "epoch": 0.10256033824652064, + "grad_norm": 0.0, + "learning_rate": 1.9725184612028286e-05, + "loss": 1.3867, + "step": 3493 + }, + { + "epoch": 0.10258969992365964, + "grad_norm": 0.0, + "learning_rate": 1.9724963159969277e-05, + "loss": 1.4258, + "step": 3494 + }, + { + "epoch": 0.10261906160079864, + "grad_norm": 0.0, + "learning_rate": 1.972474161996494e-05, + "loss": 1.4863, + "step": 3495 + }, + { + "epoch": 0.10264842327793763, + "grad_norm": 0.0, + "learning_rate": 1.972451999201727e-05, + "loss": 1.6631, + "step": 3496 + }, + { + "epoch": 0.10267778495507664, + "grad_norm": 0.0, + "learning_rate": 1.9724298276128273e-05, + "loss": 1.4648, + "step": 3497 + }, + { + "epoch": 0.10270714663221563, + "grad_norm": 0.0, + "learning_rate": 1.972407647229996e-05, + "loss": 1.4922, + "step": 3498 + }, + { + "epoch": 0.10273650830935463, + "grad_norm": 0.0, + "learning_rate": 1.9723854580534336e-05, + "loss": 1.4443, + "step": 3499 + }, + { + "epoch": 0.10276586998649363, + "grad_norm": 0.0, + "learning_rate": 1.97236326008334e-05, + "loss": 1.4404, + "step": 3500 + }, + { + "epoch": 0.10279523166363262, + "grad_norm": 0.0, + "learning_rate": 1.9723410533199165e-05, + "loss": 1.4902, + "step": 3501 + }, + { + "epoch": 0.10282459334077163, + "grad_norm": 0.0, + "learning_rate": 1.972318837763364e-05, + "loss": 1.4951, + "step": 3502 + }, + { + "epoch": 0.10285395501791063, + "grad_norm": 0.0, + "learning_rate": 1.972296613413883e-05, + "loss": 1.3623, + "step": 3503 + }, + { + "epoch": 0.10288331669504962, + "grad_norm": 0.0, + "learning_rate": 1.9722743802716747e-05, + "loss": 1.4004, + "step": 3504 + }, + { + "epoch": 0.10291267837218862, + "grad_norm": 0.0, + "learning_rate": 1.9722521383369403e-05, + "loss": 1.6367, + "step": 3505 + }, + { + "epoch": 0.10294204004932762, + "grad_norm": 0.0, + "learning_rate": 1.972229887609881e-05, + "loss": 1.4648, + "step": 3506 + }, + { + "epoch": 0.10297140172646661, + "grad_norm": 0.0, + "learning_rate": 1.9722076280906977e-05, + "loss": 1.5469, + "step": 3507 + }, + { + "epoch": 0.10300076340360562, + "grad_norm": 0.0, + "learning_rate": 1.9721853597795918e-05, + "loss": 1.3945, + "step": 3508 + }, + { + "epoch": 0.1030301250807446, + "grad_norm": 0.0, + "learning_rate": 1.9721630826767646e-05, + "loss": 1.6094, + "step": 3509 + }, + { + "epoch": 0.10305948675788361, + "grad_norm": 0.0, + "learning_rate": 1.972140796782418e-05, + "loss": 1.4883, + "step": 3510 + }, + { + "epoch": 0.10308884843502261, + "grad_norm": 0.0, + "learning_rate": 1.9721185020967527e-05, + "loss": 1.4805, + "step": 3511 + }, + { + "epoch": 0.1031182101121616, + "grad_norm": 0.0, + "learning_rate": 1.9720961986199715e-05, + "loss": 1.5576, + "step": 3512 + }, + { + "epoch": 0.1031475717893006, + "grad_norm": 0.0, + "learning_rate": 1.9720738863522747e-05, + "loss": 1.5244, + "step": 3513 + }, + { + "epoch": 0.10317693346643961, + "grad_norm": 0.0, + "learning_rate": 1.972051565293865e-05, + "loss": 1.4443, + "step": 3514 + }, + { + "epoch": 0.1032062951435786, + "grad_norm": 0.0, + "learning_rate": 1.9720292354449444e-05, + "loss": 1.5615, + "step": 3515 + }, + { + "epoch": 0.1032356568207176, + "grad_norm": 0.0, + "learning_rate": 1.9720068968057144e-05, + "loss": 1.4062, + "step": 3516 + }, + { + "epoch": 0.1032650184978566, + "grad_norm": 0.0, + "learning_rate": 1.971984549376377e-05, + "loss": 1.3848, + "step": 3517 + }, + { + "epoch": 0.10329438017499559, + "grad_norm": 0.0, + "learning_rate": 1.971962193157134e-05, + "loss": 1.5381, + "step": 3518 + }, + { + "epoch": 0.1033237418521346, + "grad_norm": 0.0, + "learning_rate": 1.9719398281481883e-05, + "loss": 1.5586, + "step": 3519 + }, + { + "epoch": 0.1033531035292736, + "grad_norm": 0.0, + "learning_rate": 1.9719174543497418e-05, + "loss": 1.4053, + "step": 3520 + }, + { + "epoch": 0.10338246520641259, + "grad_norm": 0.0, + "learning_rate": 1.971895071761997e-05, + "loss": 1.3926, + "step": 3521 + }, + { + "epoch": 0.10341182688355159, + "grad_norm": 0.0, + "learning_rate": 1.9718726803851563e-05, + "loss": 1.5527, + "step": 3522 + }, + { + "epoch": 0.10344118856069058, + "grad_norm": 0.0, + "learning_rate": 1.9718502802194214e-05, + "loss": 1.4268, + "step": 3523 + }, + { + "epoch": 0.10347055023782958, + "grad_norm": 0.0, + "learning_rate": 1.971827871264996e-05, + "loss": 1.4258, + "step": 3524 + }, + { + "epoch": 0.10349991191496859, + "grad_norm": 0.0, + "learning_rate": 1.971805453522082e-05, + "loss": 1.4048, + "step": 3525 + }, + { + "epoch": 0.10352927359210758, + "grad_norm": 0.0, + "learning_rate": 1.971783026990883e-05, + "loss": 1.667, + "step": 3526 + }, + { + "epoch": 0.10355863526924658, + "grad_norm": 0.0, + "learning_rate": 1.9717605916716006e-05, + "loss": 1.5449, + "step": 3527 + }, + { + "epoch": 0.10358799694638558, + "grad_norm": 0.0, + "learning_rate": 1.9717381475644382e-05, + "loss": 1.5547, + "step": 3528 + }, + { + "epoch": 0.10361735862352457, + "grad_norm": 0.0, + "learning_rate": 1.9717156946695994e-05, + "loss": 1.4639, + "step": 3529 + }, + { + "epoch": 0.10364672030066358, + "grad_norm": 0.0, + "learning_rate": 1.9716932329872863e-05, + "loss": 1.498, + "step": 3530 + }, + { + "epoch": 0.10367608197780258, + "grad_norm": 0.0, + "learning_rate": 1.9716707625177027e-05, + "loss": 1.5947, + "step": 3531 + }, + { + "epoch": 0.10370544365494157, + "grad_norm": 0.0, + "learning_rate": 1.9716482832610515e-05, + "loss": 1.5039, + "step": 3532 + }, + { + "epoch": 0.10373480533208057, + "grad_norm": 0.0, + "learning_rate": 1.971625795217536e-05, + "loss": 1.5029, + "step": 3533 + }, + { + "epoch": 0.10376416700921956, + "grad_norm": 0.0, + "learning_rate": 1.9716032983873593e-05, + "loss": 1.3887, + "step": 3534 + }, + { + "epoch": 0.10379352868635856, + "grad_norm": 0.0, + "learning_rate": 1.9715807927707256e-05, + "loss": 1.4805, + "step": 3535 + }, + { + "epoch": 0.10382289036349757, + "grad_norm": 0.0, + "learning_rate": 1.9715582783678378e-05, + "loss": 1.2861, + "step": 3536 + }, + { + "epoch": 0.10385225204063656, + "grad_norm": 0.0, + "learning_rate": 1.9715357551789e-05, + "loss": 1.5928, + "step": 3537 + }, + { + "epoch": 0.10388161371777556, + "grad_norm": 0.0, + "learning_rate": 1.9715132232041147e-05, + "loss": 1.4404, + "step": 3538 + }, + { + "epoch": 0.10391097539491456, + "grad_norm": 0.0, + "learning_rate": 1.9714906824436873e-05, + "loss": 1.4629, + "step": 3539 + }, + { + "epoch": 0.10394033707205355, + "grad_norm": 0.0, + "learning_rate": 1.9714681328978206e-05, + "loss": 1.4111, + "step": 3540 + }, + { + "epoch": 0.10396969874919255, + "grad_norm": 0.0, + "learning_rate": 1.9714455745667186e-05, + "loss": 1.5391, + "step": 3541 + }, + { + "epoch": 0.10399906042633156, + "grad_norm": 0.0, + "learning_rate": 1.971423007450586e-05, + "loss": 1.5186, + "step": 3542 + }, + { + "epoch": 0.10402842210347055, + "grad_norm": 0.0, + "learning_rate": 1.9714004315496255e-05, + "loss": 1.5684, + "step": 3543 + }, + { + "epoch": 0.10405778378060955, + "grad_norm": 0.0, + "learning_rate": 1.9713778468640427e-05, + "loss": 1.4785, + "step": 3544 + }, + { + "epoch": 0.10408714545774855, + "grad_norm": 0.0, + "learning_rate": 1.971355253394041e-05, + "loss": 1.4062, + "step": 3545 + }, + { + "epoch": 0.10411650713488754, + "grad_norm": 0.0, + "learning_rate": 1.9713326511398254e-05, + "loss": 1.4238, + "step": 3546 + }, + { + "epoch": 0.10414586881202655, + "grad_norm": 0.0, + "learning_rate": 1.9713100401015995e-05, + "loss": 1.6094, + "step": 3547 + }, + { + "epoch": 0.10417523048916554, + "grad_norm": 0.0, + "learning_rate": 1.9712874202795684e-05, + "loss": 1.5098, + "step": 3548 + }, + { + "epoch": 0.10420459216630454, + "grad_norm": 0.0, + "learning_rate": 1.971264791673936e-05, + "loss": 1.4873, + "step": 3549 + }, + { + "epoch": 0.10423395384344354, + "grad_norm": 0.0, + "learning_rate": 1.971242154284908e-05, + "loss": 1.5391, + "step": 3550 + }, + { + "epoch": 0.10426331552058253, + "grad_norm": 0.0, + "learning_rate": 1.9712195081126878e-05, + "loss": 1.582, + "step": 3551 + }, + { + "epoch": 0.10429267719772153, + "grad_norm": 0.0, + "learning_rate": 1.971196853157481e-05, + "loss": 1.5303, + "step": 3552 + }, + { + "epoch": 0.10432203887486054, + "grad_norm": 0.0, + "learning_rate": 1.9711741894194927e-05, + "loss": 1.5752, + "step": 3553 + }, + { + "epoch": 0.10435140055199953, + "grad_norm": 0.0, + "learning_rate": 1.9711515168989274e-05, + "loss": 1.5176, + "step": 3554 + }, + { + "epoch": 0.10438076222913853, + "grad_norm": 0.0, + "learning_rate": 1.97112883559599e-05, + "loss": 1.4863, + "step": 3555 + }, + { + "epoch": 0.10441012390627753, + "grad_norm": 0.0, + "learning_rate": 1.971106145510886e-05, + "loss": 1.4014, + "step": 3556 + }, + { + "epoch": 0.10443948558341652, + "grad_norm": 0.0, + "learning_rate": 1.97108344664382e-05, + "loss": 1.4844, + "step": 3557 + }, + { + "epoch": 0.10446884726055553, + "grad_norm": 0.0, + "learning_rate": 1.9710607389949984e-05, + "loss": 1.6113, + "step": 3558 + }, + { + "epoch": 0.10449820893769451, + "grad_norm": 0.0, + "learning_rate": 1.9710380225646252e-05, + "loss": 1.4951, + "step": 3559 + }, + { + "epoch": 0.10452757061483352, + "grad_norm": 0.0, + "learning_rate": 1.971015297352907e-05, + "loss": 1.4492, + "step": 3560 + }, + { + "epoch": 0.10455693229197252, + "grad_norm": 0.0, + "learning_rate": 1.9709925633600483e-05, + "loss": 1.416, + "step": 3561 + }, + { + "epoch": 0.10458629396911151, + "grad_norm": 0.0, + "learning_rate": 1.9709698205862552e-05, + "loss": 1.5449, + "step": 3562 + }, + { + "epoch": 0.10461565564625051, + "grad_norm": 0.0, + "learning_rate": 1.9709470690317337e-05, + "loss": 1.5537, + "step": 3563 + }, + { + "epoch": 0.10464501732338952, + "grad_norm": 0.0, + "learning_rate": 1.970924308696689e-05, + "loss": 1.4648, + "step": 3564 + }, + { + "epoch": 0.1046743790005285, + "grad_norm": 0.0, + "learning_rate": 1.970901539581327e-05, + "loss": 1.4492, + "step": 3565 + }, + { + "epoch": 0.10470374067766751, + "grad_norm": 0.0, + "learning_rate": 1.970878761685854e-05, + "loss": 1.4678, + "step": 3566 + }, + { + "epoch": 0.10473310235480651, + "grad_norm": 0.0, + "learning_rate": 1.9708559750104755e-05, + "loss": 1.417, + "step": 3567 + }, + { + "epoch": 0.1047624640319455, + "grad_norm": 0.0, + "learning_rate": 1.9708331795553978e-05, + "loss": 1.54, + "step": 3568 + }, + { + "epoch": 0.1047918257090845, + "grad_norm": 0.0, + "learning_rate": 1.9708103753208273e-05, + "loss": 1.4326, + "step": 3569 + }, + { + "epoch": 0.10482118738622351, + "grad_norm": 0.0, + "learning_rate": 1.9707875623069697e-05, + "loss": 1.5703, + "step": 3570 + }, + { + "epoch": 0.1048505490633625, + "grad_norm": 0.0, + "learning_rate": 1.9707647405140317e-05, + "loss": 1.3945, + "step": 3571 + }, + { + "epoch": 0.1048799107405015, + "grad_norm": 0.0, + "learning_rate": 1.9707419099422192e-05, + "loss": 1.5596, + "step": 3572 + }, + { + "epoch": 0.10490927241764049, + "grad_norm": 0.0, + "learning_rate": 1.9707190705917393e-05, + "loss": 1.4189, + "step": 3573 + }, + { + "epoch": 0.10493863409477949, + "grad_norm": 0.0, + "learning_rate": 1.970696222462798e-05, + "loss": 1.502, + "step": 3574 + }, + { + "epoch": 0.1049679957719185, + "grad_norm": 0.0, + "learning_rate": 1.9706733655556026e-05, + "loss": 1.3945, + "step": 3575 + }, + { + "epoch": 0.10499735744905749, + "grad_norm": 0.0, + "learning_rate": 1.9706504998703588e-05, + "loss": 1.4512, + "step": 3576 + }, + { + "epoch": 0.10502671912619649, + "grad_norm": 0.0, + "learning_rate": 1.9706276254072743e-05, + "loss": 1.458, + "step": 3577 + }, + { + "epoch": 0.10505608080333549, + "grad_norm": 0.0, + "learning_rate": 1.9706047421665553e-05, + "loss": 1.3662, + "step": 3578 + }, + { + "epoch": 0.10508544248047448, + "grad_norm": 0.0, + "learning_rate": 1.9705818501484094e-05, + "loss": 1.4053, + "step": 3579 + }, + { + "epoch": 0.10511480415761348, + "grad_norm": 0.0, + "learning_rate": 1.970558949353043e-05, + "loss": 1.4912, + "step": 3580 + }, + { + "epoch": 0.10514416583475249, + "grad_norm": 0.0, + "learning_rate": 1.970536039780664e-05, + "loss": 1.5303, + "step": 3581 + }, + { + "epoch": 0.10517352751189148, + "grad_norm": 0.0, + "learning_rate": 1.9705131214314782e-05, + "loss": 1.4492, + "step": 3582 + }, + { + "epoch": 0.10520288918903048, + "grad_norm": 0.0, + "learning_rate": 1.9704901943056943e-05, + "loss": 1.5186, + "step": 3583 + }, + { + "epoch": 0.10523225086616947, + "grad_norm": 0.0, + "learning_rate": 1.970467258403519e-05, + "loss": 1.4512, + "step": 3584 + }, + { + "epoch": 0.10526161254330847, + "grad_norm": 0.0, + "learning_rate": 1.9704443137251594e-05, + "loss": 1.4277, + "step": 3585 + }, + { + "epoch": 0.10529097422044748, + "grad_norm": 0.0, + "learning_rate": 1.9704213602708234e-05, + "loss": 1.4004, + "step": 3586 + }, + { + "epoch": 0.10532033589758646, + "grad_norm": 0.0, + "learning_rate": 1.9703983980407185e-05, + "loss": 1.541, + "step": 3587 + }, + { + "epoch": 0.10534969757472547, + "grad_norm": 0.0, + "learning_rate": 1.9703754270350525e-05, + "loss": 1.542, + "step": 3588 + }, + { + "epoch": 0.10537905925186447, + "grad_norm": 0.0, + "learning_rate": 1.9703524472540333e-05, + "loss": 1.5635, + "step": 3589 + }, + { + "epoch": 0.10540842092900346, + "grad_norm": 0.0, + "learning_rate": 1.970329458697868e-05, + "loss": 1.3721, + "step": 3590 + }, + { + "epoch": 0.10543778260614246, + "grad_norm": 0.0, + "learning_rate": 1.9703064613667648e-05, + "loss": 1.3462, + "step": 3591 + }, + { + "epoch": 0.10546714428328147, + "grad_norm": 0.0, + "learning_rate": 1.970283455260932e-05, + "loss": 1.499, + "step": 3592 + }, + { + "epoch": 0.10549650596042046, + "grad_norm": 0.0, + "learning_rate": 1.9702604403805774e-05, + "loss": 1.5312, + "step": 3593 + }, + { + "epoch": 0.10552586763755946, + "grad_norm": 0.0, + "learning_rate": 1.9702374167259092e-05, + "loss": 1.5488, + "step": 3594 + }, + { + "epoch": 0.10555522931469846, + "grad_norm": 0.0, + "learning_rate": 1.9702143842971356e-05, + "loss": 1.4834, + "step": 3595 + }, + { + "epoch": 0.10558459099183745, + "grad_norm": 0.0, + "learning_rate": 1.9701913430944645e-05, + "loss": 1.4678, + "step": 3596 + }, + { + "epoch": 0.10561395266897645, + "grad_norm": 0.0, + "learning_rate": 1.970168293118105e-05, + "loss": 1.5547, + "step": 3597 + }, + { + "epoch": 0.10564331434611544, + "grad_norm": 0.0, + "learning_rate": 1.970145234368265e-05, + "loss": 1.6885, + "step": 3598 + }, + { + "epoch": 0.10567267602325445, + "grad_norm": 0.0, + "learning_rate": 1.9701221668451532e-05, + "loss": 1.3975, + "step": 3599 + }, + { + "epoch": 0.10570203770039345, + "grad_norm": 0.0, + "learning_rate": 1.970099090548978e-05, + "loss": 1.3877, + "step": 3600 + }, + { + "epoch": 0.10573139937753244, + "grad_norm": 0.0, + "learning_rate": 1.9700760054799484e-05, + "loss": 1.4854, + "step": 3601 + }, + { + "epoch": 0.10576076105467144, + "grad_norm": 0.0, + "learning_rate": 1.9700529116382735e-05, + "loss": 1.5215, + "step": 3602 + }, + { + "epoch": 0.10579012273181045, + "grad_norm": 0.0, + "learning_rate": 1.970029809024161e-05, + "loss": 1.4844, + "step": 3603 + }, + { + "epoch": 0.10581948440894944, + "grad_norm": 0.0, + "learning_rate": 1.970006697637821e-05, + "loss": 1.6592, + "step": 3604 + }, + { + "epoch": 0.10584884608608844, + "grad_norm": 0.0, + "learning_rate": 1.9699835774794616e-05, + "loss": 1.5156, + "step": 3605 + }, + { + "epoch": 0.10587820776322744, + "grad_norm": 0.0, + "learning_rate": 1.9699604485492927e-05, + "loss": 1.4424, + "step": 3606 + }, + { + "epoch": 0.10590756944036643, + "grad_norm": 0.0, + "learning_rate": 1.969937310847523e-05, + "loss": 1.4082, + "step": 3607 + }, + { + "epoch": 0.10593693111750543, + "grad_norm": 0.0, + "learning_rate": 1.9699141643743615e-05, + "loss": 1.4307, + "step": 3608 + }, + { + "epoch": 0.10596629279464442, + "grad_norm": 0.0, + "learning_rate": 1.969891009130018e-05, + "loss": 1.5596, + "step": 3609 + }, + { + "epoch": 0.10599565447178343, + "grad_norm": 0.0, + "learning_rate": 1.969867845114702e-05, + "loss": 1.4238, + "step": 3610 + }, + { + "epoch": 0.10602501614892243, + "grad_norm": 0.0, + "learning_rate": 1.969844672328622e-05, + "loss": 1.3721, + "step": 3611 + }, + { + "epoch": 0.10605437782606142, + "grad_norm": 0.0, + "learning_rate": 1.9698214907719885e-05, + "loss": 1.4209, + "step": 3612 + }, + { + "epoch": 0.10608373950320042, + "grad_norm": 0.0, + "learning_rate": 1.969798300445011e-05, + "loss": 1.5781, + "step": 3613 + }, + { + "epoch": 0.10611310118033943, + "grad_norm": 0.0, + "learning_rate": 1.969775101347899e-05, + "loss": 1.3848, + "step": 3614 + }, + { + "epoch": 0.10614246285747841, + "grad_norm": 0.0, + "learning_rate": 1.969751893480863e-05, + "loss": 1.4229, + "step": 3615 + }, + { + "epoch": 0.10617182453461742, + "grad_norm": 0.0, + "learning_rate": 1.9697286768441114e-05, + "loss": 1.3232, + "step": 3616 + }, + { + "epoch": 0.10620118621175642, + "grad_norm": 0.0, + "learning_rate": 1.9697054514378554e-05, + "loss": 1.3799, + "step": 3617 + }, + { + "epoch": 0.10623054788889541, + "grad_norm": 0.0, + "learning_rate": 1.9696822172623046e-05, + "loss": 1.4033, + "step": 3618 + }, + { + "epoch": 0.10625990956603441, + "grad_norm": 0.0, + "learning_rate": 1.9696589743176693e-05, + "loss": 1.4355, + "step": 3619 + }, + { + "epoch": 0.10628927124317342, + "grad_norm": 0.0, + "learning_rate": 1.9696357226041597e-05, + "loss": 1.5068, + "step": 3620 + }, + { + "epoch": 0.1063186329203124, + "grad_norm": 0.0, + "learning_rate": 1.9696124621219854e-05, + "loss": 1.4258, + "step": 3621 + }, + { + "epoch": 0.10634799459745141, + "grad_norm": 0.0, + "learning_rate": 1.969589192871358e-05, + "loss": 1.5908, + "step": 3622 + }, + { + "epoch": 0.1063773562745904, + "grad_norm": 0.0, + "learning_rate": 1.9695659148524865e-05, + "loss": 1.4961, + "step": 3623 + }, + { + "epoch": 0.1064067179517294, + "grad_norm": 0.0, + "learning_rate": 1.9695426280655825e-05, + "loss": 1.4502, + "step": 3624 + }, + { + "epoch": 0.1064360796288684, + "grad_norm": 0.0, + "learning_rate": 1.969519332510856e-05, + "loss": 1.4736, + "step": 3625 + }, + { + "epoch": 0.1064654413060074, + "grad_norm": 0.0, + "learning_rate": 1.969496028188518e-05, + "loss": 1.2969, + "step": 3626 + }, + { + "epoch": 0.1064948029831464, + "grad_norm": 0.0, + "learning_rate": 1.969472715098779e-05, + "loss": 1.5117, + "step": 3627 + }, + { + "epoch": 0.1065241646602854, + "grad_norm": 0.0, + "learning_rate": 1.9694493932418503e-05, + "loss": 1.6348, + "step": 3628 + }, + { + "epoch": 0.10655352633742439, + "grad_norm": 0.0, + "learning_rate": 1.9694260626179426e-05, + "loss": 1.5029, + "step": 3629 + }, + { + "epoch": 0.10658288801456339, + "grad_norm": 0.0, + "learning_rate": 1.9694027232272662e-05, + "loss": 1.5205, + "step": 3630 + }, + { + "epoch": 0.1066122496917024, + "grad_norm": 0.0, + "learning_rate": 1.9693793750700326e-05, + "loss": 1.4419, + "step": 3631 + }, + { + "epoch": 0.10664161136884139, + "grad_norm": 0.0, + "learning_rate": 1.9693560181464535e-05, + "loss": 1.5029, + "step": 3632 + }, + { + "epoch": 0.10667097304598039, + "grad_norm": 0.0, + "learning_rate": 1.9693326524567397e-05, + "loss": 1.5234, + "step": 3633 + }, + { + "epoch": 0.10670033472311938, + "grad_norm": 0.0, + "learning_rate": 1.9693092780011022e-05, + "loss": 1.4336, + "step": 3634 + }, + { + "epoch": 0.10672969640025838, + "grad_norm": 0.0, + "learning_rate": 1.969285894779753e-05, + "loss": 1.501, + "step": 3635 + }, + { + "epoch": 0.10675905807739738, + "grad_norm": 0.0, + "learning_rate": 1.9692625027929028e-05, + "loss": 1.5195, + "step": 3636 + }, + { + "epoch": 0.10678841975453637, + "grad_norm": 0.0, + "learning_rate": 1.9692391020407636e-05, + "loss": 1.4092, + "step": 3637 + }, + { + "epoch": 0.10681778143167538, + "grad_norm": 0.0, + "learning_rate": 1.969215692523547e-05, + "loss": 1.4404, + "step": 3638 + }, + { + "epoch": 0.10684714310881438, + "grad_norm": 0.0, + "learning_rate": 1.9691922742414648e-05, + "loss": 1.4395, + "step": 3639 + }, + { + "epoch": 0.10687650478595337, + "grad_norm": 0.0, + "learning_rate": 1.9691688471947287e-05, + "loss": 1.5195, + "step": 3640 + }, + { + "epoch": 0.10690586646309237, + "grad_norm": 0.0, + "learning_rate": 1.9691454113835506e-05, + "loss": 1.4961, + "step": 3641 + }, + { + "epoch": 0.10693522814023138, + "grad_norm": 0.0, + "learning_rate": 1.9691219668081422e-05, + "loss": 1.5684, + "step": 3642 + }, + { + "epoch": 0.10696458981737036, + "grad_norm": 0.0, + "learning_rate": 1.9690985134687153e-05, + "loss": 1.4209, + "step": 3643 + }, + { + "epoch": 0.10699395149450937, + "grad_norm": 0.0, + "learning_rate": 1.9690750513654828e-05, + "loss": 1.5107, + "step": 3644 + }, + { + "epoch": 0.10702331317164837, + "grad_norm": 0.0, + "learning_rate": 1.9690515804986562e-05, + "loss": 1.4873, + "step": 3645 + }, + { + "epoch": 0.10705267484878736, + "grad_norm": 0.0, + "learning_rate": 1.969028100868448e-05, + "loss": 1.5957, + "step": 3646 + }, + { + "epoch": 0.10708203652592636, + "grad_norm": 0.0, + "learning_rate": 1.969004612475071e-05, + "loss": 1.5273, + "step": 3647 + }, + { + "epoch": 0.10711139820306535, + "grad_norm": 0.0, + "learning_rate": 1.9689811153187365e-05, + "loss": 1.542, + "step": 3648 + }, + { + "epoch": 0.10714075988020436, + "grad_norm": 0.0, + "learning_rate": 1.9689576093996577e-05, + "loss": 1.6279, + "step": 3649 + }, + { + "epoch": 0.10717012155734336, + "grad_norm": 0.0, + "learning_rate": 1.9689340947180472e-05, + "loss": 1.2871, + "step": 3650 + }, + { + "epoch": 0.10719948323448235, + "grad_norm": 0.0, + "learning_rate": 1.9689105712741177e-05, + "loss": 1.4463, + "step": 3651 + }, + { + "epoch": 0.10722884491162135, + "grad_norm": 0.0, + "learning_rate": 1.9688870390680816e-05, + "loss": 1.4922, + "step": 3652 + }, + { + "epoch": 0.10725820658876035, + "grad_norm": 0.0, + "learning_rate": 1.968863498100152e-05, + "loss": 1.4473, + "step": 3653 + }, + { + "epoch": 0.10728756826589934, + "grad_norm": 0.0, + "learning_rate": 1.9688399483705415e-05, + "loss": 1.2939, + "step": 3654 + }, + { + "epoch": 0.10731692994303835, + "grad_norm": 0.0, + "learning_rate": 1.9688163898794634e-05, + "loss": 1.499, + "step": 3655 + }, + { + "epoch": 0.10734629162017735, + "grad_norm": 0.0, + "learning_rate": 1.96879282262713e-05, + "loss": 1.582, + "step": 3656 + }, + { + "epoch": 0.10737565329731634, + "grad_norm": 0.0, + "learning_rate": 1.9687692466137558e-05, + "loss": 1.3916, + "step": 3657 + }, + { + "epoch": 0.10740501497445534, + "grad_norm": 0.0, + "learning_rate": 1.968745661839553e-05, + "loss": 1.5898, + "step": 3658 + }, + { + "epoch": 0.10743437665159433, + "grad_norm": 0.0, + "learning_rate": 1.9687220683047347e-05, + "loss": 1.54, + "step": 3659 + }, + { + "epoch": 0.10746373832873334, + "grad_norm": 0.0, + "learning_rate": 1.968698466009515e-05, + "loss": 1.6582, + "step": 3660 + }, + { + "epoch": 0.10749310000587234, + "grad_norm": 0.0, + "learning_rate": 1.9686748549541068e-05, + "loss": 1.4844, + "step": 3661 + }, + { + "epoch": 0.10752246168301133, + "grad_norm": 0.0, + "learning_rate": 1.9686512351387235e-05, + "loss": 1.4482, + "step": 3662 + }, + { + "epoch": 0.10755182336015033, + "grad_norm": 0.0, + "learning_rate": 1.9686276065635796e-05, + "loss": 1.5801, + "step": 3663 + }, + { + "epoch": 0.10758118503728933, + "grad_norm": 0.0, + "learning_rate": 1.968603969228888e-05, + "loss": 1.418, + "step": 3664 + }, + { + "epoch": 0.10761054671442832, + "grad_norm": 0.0, + "learning_rate": 1.9685803231348624e-05, + "loss": 1.5635, + "step": 3665 + }, + { + "epoch": 0.10763990839156733, + "grad_norm": 0.0, + "learning_rate": 1.9685566682817173e-05, + "loss": 1.5977, + "step": 3666 + }, + { + "epoch": 0.10766927006870633, + "grad_norm": 0.0, + "learning_rate": 1.9685330046696657e-05, + "loss": 1.5742, + "step": 3667 + }, + { + "epoch": 0.10769863174584532, + "grad_norm": 0.0, + "learning_rate": 1.9685093322989224e-05, + "loss": 1.4253, + "step": 3668 + }, + { + "epoch": 0.10772799342298432, + "grad_norm": 0.0, + "learning_rate": 1.9684856511697013e-05, + "loss": 1.4971, + "step": 3669 + }, + { + "epoch": 0.10775735510012333, + "grad_norm": 0.0, + "learning_rate": 1.968461961282216e-05, + "loss": 1.4854, + "step": 3670 + }, + { + "epoch": 0.10778671677726231, + "grad_norm": 0.0, + "learning_rate": 1.9684382626366812e-05, + "loss": 1.4863, + "step": 3671 + }, + { + "epoch": 0.10781607845440132, + "grad_norm": 0.0, + "learning_rate": 1.9684145552333115e-05, + "loss": 1.6094, + "step": 3672 + }, + { + "epoch": 0.10784544013154031, + "grad_norm": 0.0, + "learning_rate": 1.968390839072321e-05, + "loss": 1.5752, + "step": 3673 + }, + { + "epoch": 0.10787480180867931, + "grad_norm": 0.0, + "learning_rate": 1.968367114153924e-05, + "loss": 1.3613, + "step": 3674 + }, + { + "epoch": 0.10790416348581831, + "grad_norm": 0.0, + "learning_rate": 1.9683433804783353e-05, + "loss": 1.5195, + "step": 3675 + }, + { + "epoch": 0.1079335251629573, + "grad_norm": 0.0, + "learning_rate": 1.968319638045769e-05, + "loss": 1.6045, + "step": 3676 + }, + { + "epoch": 0.1079628868400963, + "grad_norm": 0.0, + "learning_rate": 1.9682958868564406e-05, + "loss": 1.4619, + "step": 3677 + }, + { + "epoch": 0.10799224851723531, + "grad_norm": 0.0, + "learning_rate": 1.968272126910564e-05, + "loss": 1.5791, + "step": 3678 + }, + { + "epoch": 0.1080216101943743, + "grad_norm": 0.0, + "learning_rate": 1.9682483582083552e-05, + "loss": 1.5049, + "step": 3679 + }, + { + "epoch": 0.1080509718715133, + "grad_norm": 0.0, + "learning_rate": 1.9682245807500286e-05, + "loss": 1.5361, + "step": 3680 + }, + { + "epoch": 0.1080803335486523, + "grad_norm": 0.0, + "learning_rate": 1.9682007945357987e-05, + "loss": 1.4121, + "step": 3681 + }, + { + "epoch": 0.1081096952257913, + "grad_norm": 0.0, + "learning_rate": 1.968176999565881e-05, + "loss": 1.5107, + "step": 3682 + }, + { + "epoch": 0.1081390569029303, + "grad_norm": 0.0, + "learning_rate": 1.968153195840491e-05, + "loss": 1.4277, + "step": 3683 + }, + { + "epoch": 0.1081684185800693, + "grad_norm": 0.0, + "learning_rate": 1.9681293833598437e-05, + "loss": 1.5107, + "step": 3684 + }, + { + "epoch": 0.10819778025720829, + "grad_norm": 0.0, + "learning_rate": 1.9681055621241543e-05, + "loss": 1.4326, + "step": 3685 + }, + { + "epoch": 0.10822714193434729, + "grad_norm": 0.0, + "learning_rate": 1.9680817321336385e-05, + "loss": 1.4951, + "step": 3686 + }, + { + "epoch": 0.10825650361148628, + "grad_norm": 0.0, + "learning_rate": 1.9680578933885116e-05, + "loss": 1.5166, + "step": 3687 + }, + { + "epoch": 0.10828586528862529, + "grad_norm": 0.0, + "learning_rate": 1.9680340458889896e-05, + "loss": 1.5137, + "step": 3688 + }, + { + "epoch": 0.10831522696576429, + "grad_norm": 0.0, + "learning_rate": 1.968010189635287e-05, + "loss": 1.5723, + "step": 3689 + }, + { + "epoch": 0.10834458864290328, + "grad_norm": 0.0, + "learning_rate": 1.967986324627621e-05, + "loss": 1.4233, + "step": 3690 + }, + { + "epoch": 0.10837395032004228, + "grad_norm": 0.0, + "learning_rate": 1.9679624508662068e-05, + "loss": 1.5381, + "step": 3691 + }, + { + "epoch": 0.10840331199718128, + "grad_norm": 0.0, + "learning_rate": 1.96793856835126e-05, + "loss": 1.4971, + "step": 3692 + }, + { + "epoch": 0.10843267367432027, + "grad_norm": 0.0, + "learning_rate": 1.967914677082997e-05, + "loss": 1.582, + "step": 3693 + }, + { + "epoch": 0.10846203535145928, + "grad_norm": 0.0, + "learning_rate": 1.9678907770616333e-05, + "loss": 1.4639, + "step": 3694 + }, + { + "epoch": 0.10849139702859828, + "grad_norm": 0.0, + "learning_rate": 1.9678668682873857e-05, + "loss": 1.6162, + "step": 3695 + }, + { + "epoch": 0.10852075870573727, + "grad_norm": 0.0, + "learning_rate": 1.96784295076047e-05, + "loss": 1.4365, + "step": 3696 + }, + { + "epoch": 0.10855012038287627, + "grad_norm": 0.0, + "learning_rate": 1.9678190244811028e-05, + "loss": 1.3945, + "step": 3697 + }, + { + "epoch": 0.10857948206001526, + "grad_norm": 0.0, + "learning_rate": 1.9677950894495002e-05, + "loss": 1.457, + "step": 3698 + }, + { + "epoch": 0.10860884373715426, + "grad_norm": 0.0, + "learning_rate": 1.967771145665879e-05, + "loss": 1.4775, + "step": 3699 + }, + { + "epoch": 0.10863820541429327, + "grad_norm": 0.0, + "learning_rate": 1.9677471931304553e-05, + "loss": 1.4385, + "step": 3700 + }, + { + "epoch": 0.10866756709143226, + "grad_norm": 0.0, + "learning_rate": 1.9677232318434457e-05, + "loss": 1.5137, + "step": 3701 + }, + { + "epoch": 0.10869692876857126, + "grad_norm": 0.0, + "learning_rate": 1.967699261805067e-05, + "loss": 1.2983, + "step": 3702 + }, + { + "epoch": 0.10872629044571026, + "grad_norm": 0.0, + "learning_rate": 1.9676752830155365e-05, + "loss": 1.5137, + "step": 3703 + }, + { + "epoch": 0.10875565212284925, + "grad_norm": 0.0, + "learning_rate": 1.9676512954750703e-05, + "loss": 1.5078, + "step": 3704 + }, + { + "epoch": 0.10878501379998826, + "grad_norm": 0.0, + "learning_rate": 1.9676272991838857e-05, + "loss": 1.3301, + "step": 3705 + }, + { + "epoch": 0.10881437547712726, + "grad_norm": 0.0, + "learning_rate": 1.9676032941421996e-05, + "loss": 1.4473, + "step": 3706 + }, + { + "epoch": 0.10884373715426625, + "grad_norm": 0.0, + "learning_rate": 1.967579280350229e-05, + "loss": 1.4121, + "step": 3707 + }, + { + "epoch": 0.10887309883140525, + "grad_norm": 0.0, + "learning_rate": 1.9675552578081915e-05, + "loss": 1.4453, + "step": 3708 + }, + { + "epoch": 0.10890246050854425, + "grad_norm": 0.0, + "learning_rate": 1.9675312265163038e-05, + "loss": 1.4824, + "step": 3709 + }, + { + "epoch": 0.10893182218568324, + "grad_norm": 0.0, + "learning_rate": 1.9675071864747833e-05, + "loss": 1.5039, + "step": 3710 + }, + { + "epoch": 0.10896118386282225, + "grad_norm": 0.0, + "learning_rate": 1.9674831376838478e-05, + "loss": 1.4795, + "step": 3711 + }, + { + "epoch": 0.10899054553996124, + "grad_norm": 0.0, + "learning_rate": 1.9674590801437142e-05, + "loss": 1.5, + "step": 3712 + }, + { + "epoch": 0.10901990721710024, + "grad_norm": 0.0, + "learning_rate": 1.9674350138546007e-05, + "loss": 1.3491, + "step": 3713 + }, + { + "epoch": 0.10904926889423924, + "grad_norm": 0.0, + "learning_rate": 1.9674109388167247e-05, + "loss": 1.4453, + "step": 3714 + }, + { + "epoch": 0.10907863057137823, + "grad_norm": 0.0, + "learning_rate": 1.9673868550303036e-05, + "loss": 1.5918, + "step": 3715 + }, + { + "epoch": 0.10910799224851724, + "grad_norm": 0.0, + "learning_rate": 1.967362762495555e-05, + "loss": 1.5928, + "step": 3716 + }, + { + "epoch": 0.10913735392565624, + "grad_norm": 0.0, + "learning_rate": 1.9673386612126976e-05, + "loss": 1.4844, + "step": 3717 + }, + { + "epoch": 0.10916671560279523, + "grad_norm": 0.0, + "learning_rate": 1.9673145511819492e-05, + "loss": 1.5029, + "step": 3718 + }, + { + "epoch": 0.10919607727993423, + "grad_norm": 0.0, + "learning_rate": 1.967290432403527e-05, + "loss": 1.543, + "step": 3719 + }, + { + "epoch": 0.10922543895707323, + "grad_norm": 0.0, + "learning_rate": 1.96726630487765e-05, + "loss": 1.5791, + "step": 3720 + }, + { + "epoch": 0.10925480063421222, + "grad_norm": 0.0, + "learning_rate": 1.9672421686045363e-05, + "loss": 1.458, + "step": 3721 + }, + { + "epoch": 0.10928416231135123, + "grad_norm": 0.0, + "learning_rate": 1.9672180235844035e-05, + "loss": 1.5156, + "step": 3722 + }, + { + "epoch": 0.10931352398849022, + "grad_norm": 0.0, + "learning_rate": 1.9671938698174707e-05, + "loss": 1.4688, + "step": 3723 + }, + { + "epoch": 0.10934288566562922, + "grad_norm": 0.0, + "learning_rate": 1.9671697073039558e-05, + "loss": 1.4658, + "step": 3724 + }, + { + "epoch": 0.10937224734276822, + "grad_norm": 0.0, + "learning_rate": 1.967145536044078e-05, + "loss": 1.3955, + "step": 3725 + }, + { + "epoch": 0.10940160901990721, + "grad_norm": 0.0, + "learning_rate": 1.967121356038055e-05, + "loss": 1.4307, + "step": 3726 + }, + { + "epoch": 0.10943097069704621, + "grad_norm": 0.0, + "learning_rate": 1.967097167286106e-05, + "loss": 1.6025, + "step": 3727 + }, + { + "epoch": 0.10946033237418522, + "grad_norm": 0.0, + "learning_rate": 1.9670729697884496e-05, + "loss": 1.5049, + "step": 3728 + }, + { + "epoch": 0.10948969405132421, + "grad_norm": 0.0, + "learning_rate": 1.9670487635453047e-05, + "loss": 1.5098, + "step": 3729 + }, + { + "epoch": 0.10951905572846321, + "grad_norm": 0.0, + "learning_rate": 1.9670245485568904e-05, + "loss": 1.4683, + "step": 3730 + }, + { + "epoch": 0.10954841740560221, + "grad_norm": 0.0, + "learning_rate": 1.967000324823425e-05, + "loss": 1.4385, + "step": 3731 + }, + { + "epoch": 0.1095777790827412, + "grad_norm": 0.0, + "learning_rate": 1.9669760923451284e-05, + "loss": 1.4414, + "step": 3732 + }, + { + "epoch": 0.1096071407598802, + "grad_norm": 0.0, + "learning_rate": 1.9669518511222193e-05, + "loss": 1.4951, + "step": 3733 + }, + { + "epoch": 0.10963650243701921, + "grad_norm": 0.0, + "learning_rate": 1.9669276011549168e-05, + "loss": 1.4541, + "step": 3734 + }, + { + "epoch": 0.1096658641141582, + "grad_norm": 0.0, + "learning_rate": 1.9669033424434404e-05, + "loss": 1.4248, + "step": 3735 + }, + { + "epoch": 0.1096952257912972, + "grad_norm": 0.0, + "learning_rate": 1.9668790749880098e-05, + "loss": 1.4346, + "step": 3736 + }, + { + "epoch": 0.10972458746843619, + "grad_norm": 0.0, + "learning_rate": 1.9668547987888436e-05, + "loss": 1.5283, + "step": 3737 + }, + { + "epoch": 0.1097539491455752, + "grad_norm": 0.0, + "learning_rate": 1.966830513846162e-05, + "loss": 1.6172, + "step": 3738 + }, + { + "epoch": 0.1097833108227142, + "grad_norm": 0.0, + "learning_rate": 1.9668062201601846e-05, + "loss": 1.5498, + "step": 3739 + }, + { + "epoch": 0.10981267249985319, + "grad_norm": 0.0, + "learning_rate": 1.9667819177311308e-05, + "loss": 1.417, + "step": 3740 + }, + { + "epoch": 0.10984203417699219, + "grad_norm": 0.0, + "learning_rate": 1.9667576065592208e-05, + "loss": 1.4648, + "step": 3741 + }, + { + "epoch": 0.10987139585413119, + "grad_norm": 0.0, + "learning_rate": 1.9667332866446737e-05, + "loss": 1.4365, + "step": 3742 + }, + { + "epoch": 0.10990075753127018, + "grad_norm": 0.0, + "learning_rate": 1.9667089579877102e-05, + "loss": 1.5537, + "step": 3743 + }, + { + "epoch": 0.10993011920840919, + "grad_norm": 0.0, + "learning_rate": 1.96668462058855e-05, + "loss": 1.4971, + "step": 3744 + }, + { + "epoch": 0.10995948088554819, + "grad_norm": 0.0, + "learning_rate": 1.966660274447413e-05, + "loss": 1.4297, + "step": 3745 + }, + { + "epoch": 0.10998884256268718, + "grad_norm": 0.0, + "learning_rate": 1.96663591956452e-05, + "loss": 1.3525, + "step": 3746 + }, + { + "epoch": 0.11001820423982618, + "grad_norm": 0.0, + "learning_rate": 1.9666115559400903e-05, + "loss": 1.4609, + "step": 3747 + }, + { + "epoch": 0.11004756591696517, + "grad_norm": 0.0, + "learning_rate": 1.966587183574345e-05, + "loss": 1.4707, + "step": 3748 + }, + { + "epoch": 0.11007692759410417, + "grad_norm": 0.0, + "learning_rate": 1.9665628024675042e-05, + "loss": 1.6133, + "step": 3749 + }, + { + "epoch": 0.11010628927124318, + "grad_norm": 0.0, + "learning_rate": 1.9665384126197886e-05, + "loss": 1.6357, + "step": 3750 + }, + { + "epoch": 0.11013565094838217, + "grad_norm": 0.0, + "learning_rate": 1.9665140140314184e-05, + "loss": 1.6094, + "step": 3751 + }, + { + "epoch": 0.11016501262552117, + "grad_norm": 0.0, + "learning_rate": 1.966489606702615e-05, + "loss": 1.4814, + "step": 3752 + }, + { + "epoch": 0.11019437430266017, + "grad_norm": 0.0, + "learning_rate": 1.9664651906335978e-05, + "loss": 1.4258, + "step": 3753 + }, + { + "epoch": 0.11022373597979916, + "grad_norm": 0.0, + "learning_rate": 1.9664407658245885e-05, + "loss": 1.4707, + "step": 3754 + }, + { + "epoch": 0.11025309765693816, + "grad_norm": 0.0, + "learning_rate": 1.9664163322758082e-05, + "loss": 1.4219, + "step": 3755 + }, + { + "epoch": 0.11028245933407717, + "grad_norm": 0.0, + "learning_rate": 1.966391889987477e-05, + "loss": 1.4453, + "step": 3756 + }, + { + "epoch": 0.11031182101121616, + "grad_norm": 0.0, + "learning_rate": 1.966367438959817e-05, + "loss": 1.3672, + "step": 3757 + }, + { + "epoch": 0.11034118268835516, + "grad_norm": 0.0, + "learning_rate": 1.9663429791930484e-05, + "loss": 1.4023, + "step": 3758 + }, + { + "epoch": 0.11037054436549416, + "grad_norm": 0.0, + "learning_rate": 1.966318510687393e-05, + "loss": 1.4873, + "step": 3759 + }, + { + "epoch": 0.11039990604263315, + "grad_norm": 0.0, + "learning_rate": 1.9662940334430716e-05, + "loss": 1.5488, + "step": 3760 + }, + { + "epoch": 0.11042926771977216, + "grad_norm": 0.0, + "learning_rate": 1.9662695474603057e-05, + "loss": 1.665, + "step": 3761 + }, + { + "epoch": 0.11045862939691115, + "grad_norm": 0.0, + "learning_rate": 1.9662450527393172e-05, + "loss": 1.5938, + "step": 3762 + }, + { + "epoch": 0.11048799107405015, + "grad_norm": 0.0, + "learning_rate": 1.966220549280327e-05, + "loss": 1.3594, + "step": 3763 + }, + { + "epoch": 0.11051735275118915, + "grad_norm": 0.0, + "learning_rate": 1.9661960370835567e-05, + "loss": 1.4346, + "step": 3764 + }, + { + "epoch": 0.11054671442832814, + "grad_norm": 0.0, + "learning_rate": 1.9661715161492286e-05, + "loss": 1.4395, + "step": 3765 + }, + { + "epoch": 0.11057607610546714, + "grad_norm": 0.0, + "learning_rate": 1.966146986477564e-05, + "loss": 1.5322, + "step": 3766 + }, + { + "epoch": 0.11060543778260615, + "grad_norm": 0.0, + "learning_rate": 1.9661224480687846e-05, + "loss": 1.5576, + "step": 3767 + }, + { + "epoch": 0.11063479945974514, + "grad_norm": 0.0, + "learning_rate": 1.9660979009231127e-05, + "loss": 1.4492, + "step": 3768 + }, + { + "epoch": 0.11066416113688414, + "grad_norm": 0.0, + "learning_rate": 1.9660733450407696e-05, + "loss": 1.5371, + "step": 3769 + }, + { + "epoch": 0.11069352281402314, + "grad_norm": 0.0, + "learning_rate": 1.9660487804219782e-05, + "loss": 1.5566, + "step": 3770 + }, + { + "epoch": 0.11072288449116213, + "grad_norm": 0.0, + "learning_rate": 1.9660242070669602e-05, + "loss": 1.4717, + "step": 3771 + }, + { + "epoch": 0.11075224616830114, + "grad_norm": 0.0, + "learning_rate": 1.9659996249759378e-05, + "loss": 1.54, + "step": 3772 + }, + { + "epoch": 0.11078160784544012, + "grad_norm": 0.0, + "learning_rate": 1.9659750341491335e-05, + "loss": 1.5029, + "step": 3773 + }, + { + "epoch": 0.11081096952257913, + "grad_norm": 0.0, + "learning_rate": 1.9659504345867698e-05, + "loss": 1.4951, + "step": 3774 + }, + { + "epoch": 0.11084033119971813, + "grad_norm": 0.0, + "learning_rate": 1.9659258262890683e-05, + "loss": 1.5254, + "step": 3775 + }, + { + "epoch": 0.11086969287685712, + "grad_norm": 0.0, + "learning_rate": 1.9659012092562526e-05, + "loss": 1.4355, + "step": 3776 + }, + { + "epoch": 0.11089905455399612, + "grad_norm": 0.0, + "learning_rate": 1.9658765834885448e-05, + "loss": 1.332, + "step": 3777 + }, + { + "epoch": 0.11092841623113513, + "grad_norm": 0.0, + "learning_rate": 1.9658519489861678e-05, + "loss": 1.46, + "step": 3778 + }, + { + "epoch": 0.11095777790827412, + "grad_norm": 0.0, + "learning_rate": 1.9658273057493443e-05, + "loss": 1.3408, + "step": 3779 + }, + { + "epoch": 0.11098713958541312, + "grad_norm": 0.0, + "learning_rate": 1.965802653778297e-05, + "loss": 1.5273, + "step": 3780 + }, + { + "epoch": 0.11101650126255212, + "grad_norm": 0.0, + "learning_rate": 1.965777993073249e-05, + "loss": 1.4795, + "step": 3781 + }, + { + "epoch": 0.11104586293969111, + "grad_norm": 0.0, + "learning_rate": 1.9657533236344232e-05, + "loss": 1.5225, + "step": 3782 + }, + { + "epoch": 0.11107522461683011, + "grad_norm": 0.0, + "learning_rate": 1.965728645462043e-05, + "loss": 1.4336, + "step": 3783 + }, + { + "epoch": 0.11110458629396912, + "grad_norm": 0.0, + "learning_rate": 1.9657039585563307e-05, + "loss": 1.5684, + "step": 3784 + }, + { + "epoch": 0.11113394797110811, + "grad_norm": 0.0, + "learning_rate": 1.965679262917511e-05, + "loss": 1.5889, + "step": 3785 + }, + { + "epoch": 0.11116330964824711, + "grad_norm": 0.0, + "learning_rate": 1.965654558545806e-05, + "loss": 1.5527, + "step": 3786 + }, + { + "epoch": 0.1111926713253861, + "grad_norm": 0.0, + "learning_rate": 1.9656298454414394e-05, + "loss": 1.459, + "step": 3787 + }, + { + "epoch": 0.1112220330025251, + "grad_norm": 0.0, + "learning_rate": 1.9656051236046353e-05, + "loss": 1.4629, + "step": 3788 + }, + { + "epoch": 0.1112513946796641, + "grad_norm": 0.0, + "learning_rate": 1.965580393035616e-05, + "loss": 1.3867, + "step": 3789 + }, + { + "epoch": 0.1112807563568031, + "grad_norm": 0.0, + "learning_rate": 1.9655556537346065e-05, + "loss": 1.4102, + "step": 3790 + }, + { + "epoch": 0.1113101180339421, + "grad_norm": 0.0, + "learning_rate": 1.96553090570183e-05, + "loss": 1.6201, + "step": 3791 + }, + { + "epoch": 0.1113394797110811, + "grad_norm": 0.0, + "learning_rate": 1.96550614893751e-05, + "loss": 1.4014, + "step": 3792 + }, + { + "epoch": 0.11136884138822009, + "grad_norm": 0.0, + "learning_rate": 1.9654813834418705e-05, + "loss": 1.4102, + "step": 3793 + }, + { + "epoch": 0.1113982030653591, + "grad_norm": 0.0, + "learning_rate": 1.965456609215136e-05, + "loss": 1.501, + "step": 3794 + }, + { + "epoch": 0.1114275647424981, + "grad_norm": 0.0, + "learning_rate": 1.96543182625753e-05, + "loss": 1.5684, + "step": 3795 + }, + { + "epoch": 0.11145692641963709, + "grad_norm": 0.0, + "learning_rate": 1.9654070345692764e-05, + "loss": 1.2783, + "step": 3796 + }, + { + "epoch": 0.11148628809677609, + "grad_norm": 0.0, + "learning_rate": 1.9653822341506e-05, + "loss": 1.5723, + "step": 3797 + }, + { + "epoch": 0.11151564977391508, + "grad_norm": 0.0, + "learning_rate": 1.9653574250017248e-05, + "loss": 1.4004, + "step": 3798 + }, + { + "epoch": 0.11154501145105408, + "grad_norm": 0.0, + "learning_rate": 1.965332607122875e-05, + "loss": 1.4541, + "step": 3799 + }, + { + "epoch": 0.11157437312819309, + "grad_norm": 0.0, + "learning_rate": 1.9653077805142754e-05, + "loss": 1.4609, + "step": 3800 + }, + { + "epoch": 0.11160373480533207, + "grad_norm": 0.0, + "learning_rate": 1.96528294517615e-05, + "loss": 1.4912, + "step": 3801 + }, + { + "epoch": 0.11163309648247108, + "grad_norm": 0.0, + "learning_rate": 1.965258101108724e-05, + "loss": 1.4092, + "step": 3802 + }, + { + "epoch": 0.11166245815961008, + "grad_norm": 0.0, + "learning_rate": 1.9652332483122222e-05, + "loss": 1.4951, + "step": 3803 + }, + { + "epoch": 0.11169181983674907, + "grad_norm": 0.0, + "learning_rate": 1.9652083867868682e-05, + "loss": 1.6064, + "step": 3804 + }, + { + "epoch": 0.11172118151388807, + "grad_norm": 0.0, + "learning_rate": 1.9651835165328877e-05, + "loss": 1.4951, + "step": 3805 + }, + { + "epoch": 0.11175054319102708, + "grad_norm": 0.0, + "learning_rate": 1.9651586375505062e-05, + "loss": 1.4434, + "step": 3806 + }, + { + "epoch": 0.11177990486816607, + "grad_norm": 0.0, + "learning_rate": 1.9651337498399472e-05, + "loss": 1.4492, + "step": 3807 + }, + { + "epoch": 0.11180926654530507, + "grad_norm": 0.0, + "learning_rate": 1.9651088534014367e-05, + "loss": 1.5, + "step": 3808 + }, + { + "epoch": 0.11183862822244407, + "grad_norm": 0.0, + "learning_rate": 1.9650839482351997e-05, + "loss": 1.5176, + "step": 3809 + }, + { + "epoch": 0.11186798989958306, + "grad_norm": 0.0, + "learning_rate": 1.9650590343414615e-05, + "loss": 1.499, + "step": 3810 + }, + { + "epoch": 0.11189735157672207, + "grad_norm": 0.0, + "learning_rate": 1.965034111720447e-05, + "loss": 1.4287, + "step": 3811 + }, + { + "epoch": 0.11192671325386105, + "grad_norm": 0.0, + "learning_rate": 1.9650091803723823e-05, + "loss": 1.5176, + "step": 3812 + }, + { + "epoch": 0.11195607493100006, + "grad_norm": 0.0, + "learning_rate": 1.9649842402974923e-05, + "loss": 1.4404, + "step": 3813 + }, + { + "epoch": 0.11198543660813906, + "grad_norm": 0.0, + "learning_rate": 1.9649592914960027e-05, + "loss": 1.4453, + "step": 3814 + }, + { + "epoch": 0.11201479828527805, + "grad_norm": 0.0, + "learning_rate": 1.9649343339681387e-05, + "loss": 1.583, + "step": 3815 + }, + { + "epoch": 0.11204415996241705, + "grad_norm": 0.0, + "learning_rate": 1.9649093677141268e-05, + "loss": 1.5127, + "step": 3816 + }, + { + "epoch": 0.11207352163955606, + "grad_norm": 0.0, + "learning_rate": 1.964884392734192e-05, + "loss": 1.6006, + "step": 3817 + }, + { + "epoch": 0.11210288331669505, + "grad_norm": 0.0, + "learning_rate": 1.964859409028561e-05, + "loss": 1.4355, + "step": 3818 + }, + { + "epoch": 0.11213224499383405, + "grad_norm": 0.0, + "learning_rate": 1.964834416597459e-05, + "loss": 1.5742, + "step": 3819 + }, + { + "epoch": 0.11216160667097305, + "grad_norm": 0.0, + "learning_rate": 1.9648094154411124e-05, + "loss": 1.5537, + "step": 3820 + }, + { + "epoch": 0.11219096834811204, + "grad_norm": 0.0, + "learning_rate": 1.9647844055597473e-05, + "loss": 1.3721, + "step": 3821 + }, + { + "epoch": 0.11222033002525104, + "grad_norm": 0.0, + "learning_rate": 1.964759386953589e-05, + "loss": 1.4375, + "step": 3822 + }, + { + "epoch": 0.11224969170239003, + "grad_norm": 0.0, + "learning_rate": 1.9647343596228655e-05, + "loss": 1.3359, + "step": 3823 + }, + { + "epoch": 0.11227905337952904, + "grad_norm": 0.0, + "learning_rate": 1.9647093235678015e-05, + "loss": 1.501, + "step": 3824 + }, + { + "epoch": 0.11230841505666804, + "grad_norm": 0.0, + "learning_rate": 1.964684278788624e-05, + "loss": 1.4189, + "step": 3825 + }, + { + "epoch": 0.11233777673380703, + "grad_norm": 0.0, + "learning_rate": 1.9646592252855595e-05, + "loss": 1.4707, + "step": 3826 + }, + { + "epoch": 0.11236713841094603, + "grad_norm": 0.0, + "learning_rate": 1.964634163058835e-05, + "loss": 1.4658, + "step": 3827 + }, + { + "epoch": 0.11239650008808504, + "grad_norm": 0.0, + "learning_rate": 1.9646090921086762e-05, + "loss": 1.5986, + "step": 3828 + }, + { + "epoch": 0.11242586176522402, + "grad_norm": 0.0, + "learning_rate": 1.9645840124353107e-05, + "loss": 1.4795, + "step": 3829 + }, + { + "epoch": 0.11245522344236303, + "grad_norm": 0.0, + "learning_rate": 1.9645589240389648e-05, + "loss": 1.4844, + "step": 3830 + }, + { + "epoch": 0.11248458511950203, + "grad_norm": 0.0, + "learning_rate": 1.9645338269198655e-05, + "loss": 1.5439, + "step": 3831 + }, + { + "epoch": 0.11251394679664102, + "grad_norm": 0.0, + "learning_rate": 1.9645087210782398e-05, + "loss": 1.3818, + "step": 3832 + }, + { + "epoch": 0.11254330847378002, + "grad_norm": 0.0, + "learning_rate": 1.964483606514315e-05, + "loss": 1.4482, + "step": 3833 + }, + { + "epoch": 0.11257267015091903, + "grad_norm": 0.0, + "learning_rate": 1.9644584832283174e-05, + "loss": 1.4795, + "step": 3834 + }, + { + "epoch": 0.11260203182805802, + "grad_norm": 0.0, + "learning_rate": 1.964433351220475e-05, + "loss": 1.4062, + "step": 3835 + }, + { + "epoch": 0.11263139350519702, + "grad_norm": 0.0, + "learning_rate": 1.9644082104910148e-05, + "loss": 1.4961, + "step": 3836 + }, + { + "epoch": 0.11266075518233601, + "grad_norm": 0.0, + "learning_rate": 1.9643830610401644e-05, + "loss": 1.5537, + "step": 3837 + }, + { + "epoch": 0.11269011685947501, + "grad_norm": 0.0, + "learning_rate": 1.9643579028681507e-05, + "loss": 1.4326, + "step": 3838 + }, + { + "epoch": 0.11271947853661402, + "grad_norm": 0.0, + "learning_rate": 1.9643327359752016e-05, + "loss": 1.498, + "step": 3839 + }, + { + "epoch": 0.112748840213753, + "grad_norm": 0.0, + "learning_rate": 1.9643075603615448e-05, + "loss": 1.583, + "step": 3840 + }, + { + "epoch": 0.11277820189089201, + "grad_norm": 0.0, + "learning_rate": 1.9642823760274074e-05, + "loss": 1.5312, + "step": 3841 + }, + { + "epoch": 0.11280756356803101, + "grad_norm": 0.0, + "learning_rate": 1.9642571829730176e-05, + "loss": 1.4268, + "step": 3842 + }, + { + "epoch": 0.11283692524517, + "grad_norm": 0.0, + "learning_rate": 1.9642319811986033e-05, + "loss": 1.5039, + "step": 3843 + }, + { + "epoch": 0.112866286922309, + "grad_norm": 0.0, + "learning_rate": 1.9642067707043923e-05, + "loss": 1.5635, + "step": 3844 + }, + { + "epoch": 0.112895648599448, + "grad_norm": 0.0, + "learning_rate": 1.9641815514906124e-05, + "loss": 1.4824, + "step": 3845 + }, + { + "epoch": 0.112925010276587, + "grad_norm": 0.0, + "learning_rate": 1.9641563235574917e-05, + "loss": 1.4072, + "step": 3846 + }, + { + "epoch": 0.112954371953726, + "grad_norm": 0.0, + "learning_rate": 1.9641310869052587e-05, + "loss": 1.5352, + "step": 3847 + }, + { + "epoch": 0.11298373363086499, + "grad_norm": 0.0, + "learning_rate": 1.9641058415341412e-05, + "loss": 1.5488, + "step": 3848 + }, + { + "epoch": 0.11301309530800399, + "grad_norm": 0.0, + "learning_rate": 1.9640805874443676e-05, + "loss": 1.4355, + "step": 3849 + }, + { + "epoch": 0.113042456985143, + "grad_norm": 0.0, + "learning_rate": 1.9640553246361665e-05, + "loss": 1.4043, + "step": 3850 + }, + { + "epoch": 0.11307181866228198, + "grad_norm": 0.0, + "learning_rate": 1.964030053109766e-05, + "loss": 1.5439, + "step": 3851 + }, + { + "epoch": 0.11310118033942099, + "grad_norm": 0.0, + "learning_rate": 1.964004772865395e-05, + "loss": 1.3564, + "step": 3852 + }, + { + "epoch": 0.11313054201655999, + "grad_norm": 0.0, + "learning_rate": 1.963979483903282e-05, + "loss": 1.458, + "step": 3853 + }, + { + "epoch": 0.11315990369369898, + "grad_norm": 0.0, + "learning_rate": 1.9639541862236554e-05, + "loss": 1.5, + "step": 3854 + }, + { + "epoch": 0.11318926537083798, + "grad_norm": 0.0, + "learning_rate": 1.9639288798267445e-05, + "loss": 1.4971, + "step": 3855 + }, + { + "epoch": 0.11321862704797699, + "grad_norm": 0.0, + "learning_rate": 1.9639035647127776e-05, + "loss": 1.5254, + "step": 3856 + }, + { + "epoch": 0.11324798872511598, + "grad_norm": 0.0, + "learning_rate": 1.9638782408819842e-05, + "loss": 1.5049, + "step": 3857 + }, + { + "epoch": 0.11327735040225498, + "grad_norm": 0.0, + "learning_rate": 1.963852908334593e-05, + "loss": 1.3389, + "step": 3858 + }, + { + "epoch": 0.11330671207939398, + "grad_norm": 0.0, + "learning_rate": 1.9638275670708328e-05, + "loss": 1.4736, + "step": 3859 + }, + { + "epoch": 0.11333607375653297, + "grad_norm": 0.0, + "learning_rate": 1.9638022170909332e-05, + "loss": 1.6211, + "step": 3860 + }, + { + "epoch": 0.11336543543367197, + "grad_norm": 0.0, + "learning_rate": 1.9637768583951235e-05, + "loss": 1.4082, + "step": 3861 + }, + { + "epoch": 0.11339479711081096, + "grad_norm": 0.0, + "learning_rate": 1.963751490983633e-05, + "loss": 1.4492, + "step": 3862 + }, + { + "epoch": 0.11342415878794997, + "grad_norm": 0.0, + "learning_rate": 1.9637261148566905e-05, + "loss": 1.5176, + "step": 3863 + }, + { + "epoch": 0.11345352046508897, + "grad_norm": 0.0, + "learning_rate": 1.9637007300145262e-05, + "loss": 1.5508, + "step": 3864 + }, + { + "epoch": 0.11348288214222796, + "grad_norm": 0.0, + "learning_rate": 1.963675336457369e-05, + "loss": 1.4922, + "step": 3865 + }, + { + "epoch": 0.11351224381936696, + "grad_norm": 0.0, + "learning_rate": 1.9636499341854493e-05, + "loss": 1.4727, + "step": 3866 + }, + { + "epoch": 0.11354160549650597, + "grad_norm": 0.0, + "learning_rate": 1.9636245231989964e-05, + "loss": 1.541, + "step": 3867 + }, + { + "epoch": 0.11357096717364495, + "grad_norm": 0.0, + "learning_rate": 1.96359910349824e-05, + "loss": 1.6143, + "step": 3868 + }, + { + "epoch": 0.11360032885078396, + "grad_norm": 0.0, + "learning_rate": 1.9635736750834102e-05, + "loss": 1.5664, + "step": 3869 + }, + { + "epoch": 0.11362969052792296, + "grad_norm": 0.0, + "learning_rate": 1.963548237954737e-05, + "loss": 1.3784, + "step": 3870 + }, + { + "epoch": 0.11365905220506195, + "grad_norm": 0.0, + "learning_rate": 1.9635227921124503e-05, + "loss": 1.3994, + "step": 3871 + }, + { + "epoch": 0.11368841388220095, + "grad_norm": 0.0, + "learning_rate": 1.9634973375567802e-05, + "loss": 1.376, + "step": 3872 + }, + { + "epoch": 0.11371777555933996, + "grad_norm": 0.0, + "learning_rate": 1.963471874287957e-05, + "loss": 1.54, + "step": 3873 + }, + { + "epoch": 0.11374713723647895, + "grad_norm": 0.0, + "learning_rate": 1.9634464023062106e-05, + "loss": 1.625, + "step": 3874 + }, + { + "epoch": 0.11377649891361795, + "grad_norm": 0.0, + "learning_rate": 1.9634209216117716e-05, + "loss": 1.3975, + "step": 3875 + }, + { + "epoch": 0.11380586059075694, + "grad_norm": 0.0, + "learning_rate": 1.963395432204871e-05, + "loss": 1.5605, + "step": 3876 + }, + { + "epoch": 0.11383522226789594, + "grad_norm": 0.0, + "learning_rate": 1.9633699340857383e-05, + "loss": 1.3955, + "step": 3877 + }, + { + "epoch": 0.11386458394503494, + "grad_norm": 0.0, + "learning_rate": 1.963344427254605e-05, + "loss": 1.3643, + "step": 3878 + }, + { + "epoch": 0.11389394562217393, + "grad_norm": 0.0, + "learning_rate": 1.963318911711701e-05, + "loss": 1.585, + "step": 3879 + }, + { + "epoch": 0.11392330729931294, + "grad_norm": 0.0, + "learning_rate": 1.9632933874572573e-05, + "loss": 1.2607, + "step": 3880 + }, + { + "epoch": 0.11395266897645194, + "grad_norm": 0.0, + "learning_rate": 1.963267854491505e-05, + "loss": 1.3789, + "step": 3881 + }, + { + "epoch": 0.11398203065359093, + "grad_norm": 0.0, + "learning_rate": 1.9632423128146745e-05, + "loss": 1.4365, + "step": 3882 + }, + { + "epoch": 0.11401139233072993, + "grad_norm": 0.0, + "learning_rate": 1.963216762426997e-05, + "loss": 1.4844, + "step": 3883 + }, + { + "epoch": 0.11404075400786894, + "grad_norm": 0.0, + "learning_rate": 1.9631912033287043e-05, + "loss": 1.4932, + "step": 3884 + }, + { + "epoch": 0.11407011568500793, + "grad_norm": 0.0, + "learning_rate": 1.9631656355200265e-05, + "loss": 1.4268, + "step": 3885 + }, + { + "epoch": 0.11409947736214693, + "grad_norm": 0.0, + "learning_rate": 1.9631400590011953e-05, + "loss": 1.3535, + "step": 3886 + }, + { + "epoch": 0.11412883903928592, + "grad_norm": 0.0, + "learning_rate": 1.9631144737724416e-05, + "loss": 1.5557, + "step": 3887 + }, + { + "epoch": 0.11415820071642492, + "grad_norm": 0.0, + "learning_rate": 1.9630888798339973e-05, + "loss": 1.3848, + "step": 3888 + }, + { + "epoch": 0.11418756239356392, + "grad_norm": 0.0, + "learning_rate": 1.9630632771860937e-05, + "loss": 1.3809, + "step": 3889 + }, + { + "epoch": 0.11421692407070291, + "grad_norm": 0.0, + "learning_rate": 1.9630376658289617e-05, + "loss": 1.4678, + "step": 3890 + }, + { + "epoch": 0.11424628574784192, + "grad_norm": 0.0, + "learning_rate": 1.9630120457628342e-05, + "loss": 1.4199, + "step": 3891 + }, + { + "epoch": 0.11427564742498092, + "grad_norm": 0.0, + "learning_rate": 1.9629864169879415e-05, + "loss": 1.3877, + "step": 3892 + }, + { + "epoch": 0.11430500910211991, + "grad_norm": 0.0, + "learning_rate": 1.9629607795045163e-05, + "loss": 1.5918, + "step": 3893 + }, + { + "epoch": 0.11433437077925891, + "grad_norm": 0.0, + "learning_rate": 1.9629351333127902e-05, + "loss": 1.4951, + "step": 3894 + }, + { + "epoch": 0.11436373245639792, + "grad_norm": 0.0, + "learning_rate": 1.9629094784129948e-05, + "loss": 1.5225, + "step": 3895 + }, + { + "epoch": 0.1143930941335369, + "grad_norm": 0.0, + "learning_rate": 1.962883814805363e-05, + "loss": 1.5381, + "step": 3896 + }, + { + "epoch": 0.11442245581067591, + "grad_norm": 0.0, + "learning_rate": 1.9628581424901256e-05, + "loss": 1.3965, + "step": 3897 + }, + { + "epoch": 0.11445181748781491, + "grad_norm": 0.0, + "learning_rate": 1.9628324614675156e-05, + "loss": 1.6182, + "step": 3898 + }, + { + "epoch": 0.1144811791649539, + "grad_norm": 0.0, + "learning_rate": 1.9628067717377654e-05, + "loss": 1.4131, + "step": 3899 + }, + { + "epoch": 0.1145105408420929, + "grad_norm": 0.0, + "learning_rate": 1.9627810733011067e-05, + "loss": 1.4668, + "step": 3900 + }, + { + "epoch": 0.11453990251923189, + "grad_norm": 0.0, + "learning_rate": 1.962755366157772e-05, + "loss": 1.4092, + "step": 3901 + }, + { + "epoch": 0.1145692641963709, + "grad_norm": 0.0, + "learning_rate": 1.9627296503079942e-05, + "loss": 1.4414, + "step": 3902 + }, + { + "epoch": 0.1145986258735099, + "grad_norm": 0.0, + "learning_rate": 1.9627039257520056e-05, + "loss": 1.4209, + "step": 3903 + }, + { + "epoch": 0.11462798755064889, + "grad_norm": 0.0, + "learning_rate": 1.962678192490039e-05, + "loss": 1.3828, + "step": 3904 + }, + { + "epoch": 0.11465734922778789, + "grad_norm": 0.0, + "learning_rate": 1.9626524505223265e-05, + "loss": 1.4531, + "step": 3905 + }, + { + "epoch": 0.1146867109049269, + "grad_norm": 0.0, + "learning_rate": 1.9626266998491016e-05, + "loss": 1.5088, + "step": 3906 + }, + { + "epoch": 0.11471607258206588, + "grad_norm": 0.0, + "learning_rate": 1.9626009404705972e-05, + "loss": 1.3799, + "step": 3907 + }, + { + "epoch": 0.11474543425920489, + "grad_norm": 0.0, + "learning_rate": 1.9625751723870457e-05, + "loss": 1.4766, + "step": 3908 + }, + { + "epoch": 0.11477479593634389, + "grad_norm": 0.0, + "learning_rate": 1.9625493955986803e-05, + "loss": 1.3818, + "step": 3909 + }, + { + "epoch": 0.11480415761348288, + "grad_norm": 0.0, + "learning_rate": 1.9625236101057344e-05, + "loss": 1.4932, + "step": 3910 + }, + { + "epoch": 0.11483351929062188, + "grad_norm": 0.0, + "learning_rate": 1.9624978159084408e-05, + "loss": 1.5312, + "step": 3911 + }, + { + "epoch": 0.11486288096776087, + "grad_norm": 0.0, + "learning_rate": 1.9624720130070332e-05, + "loss": 1.252, + "step": 3912 + }, + { + "epoch": 0.11489224264489988, + "grad_norm": 0.0, + "learning_rate": 1.9624462014017448e-05, + "loss": 1.5791, + "step": 3913 + }, + { + "epoch": 0.11492160432203888, + "grad_norm": 0.0, + "learning_rate": 1.9624203810928086e-05, + "loss": 1.4189, + "step": 3914 + }, + { + "epoch": 0.11495096599917787, + "grad_norm": 0.0, + "learning_rate": 1.9623945520804584e-05, + "loss": 1.4072, + "step": 3915 + }, + { + "epoch": 0.11498032767631687, + "grad_norm": 0.0, + "learning_rate": 1.962368714364928e-05, + "loss": 1.4648, + "step": 3916 + }, + { + "epoch": 0.11500968935345587, + "grad_norm": 0.0, + "learning_rate": 1.9623428679464503e-05, + "loss": 1.4854, + "step": 3917 + }, + { + "epoch": 0.11503905103059486, + "grad_norm": 0.0, + "learning_rate": 1.9623170128252603e-05, + "loss": 1.3389, + "step": 3918 + }, + { + "epoch": 0.11506841270773387, + "grad_norm": 0.0, + "learning_rate": 1.9622911490015907e-05, + "loss": 1.5605, + "step": 3919 + }, + { + "epoch": 0.11509777438487287, + "grad_norm": 0.0, + "learning_rate": 1.962265276475676e-05, + "loss": 1.6572, + "step": 3920 + }, + { + "epoch": 0.11512713606201186, + "grad_norm": 0.0, + "learning_rate": 1.96223939524775e-05, + "loss": 1.4854, + "step": 3921 + }, + { + "epoch": 0.11515649773915086, + "grad_norm": 0.0, + "learning_rate": 1.9622135053180464e-05, + "loss": 1.3779, + "step": 3922 + }, + { + "epoch": 0.11518585941628987, + "grad_norm": 0.0, + "learning_rate": 1.9621876066867997e-05, + "loss": 1.6055, + "step": 3923 + }, + { + "epoch": 0.11521522109342885, + "grad_norm": 0.0, + "learning_rate": 1.9621616993542446e-05, + "loss": 1.4229, + "step": 3924 + }, + { + "epoch": 0.11524458277056786, + "grad_norm": 0.0, + "learning_rate": 1.962135783320614e-05, + "loss": 1.4424, + "step": 3925 + }, + { + "epoch": 0.11527394444770685, + "grad_norm": 0.0, + "learning_rate": 1.9621098585861436e-05, + "loss": 1.3721, + "step": 3926 + }, + { + "epoch": 0.11530330612484585, + "grad_norm": 0.0, + "learning_rate": 1.9620839251510672e-05, + "loss": 1.5391, + "step": 3927 + }, + { + "epoch": 0.11533266780198485, + "grad_norm": 0.0, + "learning_rate": 1.9620579830156198e-05, + "loss": 1.4912, + "step": 3928 + }, + { + "epoch": 0.11536202947912384, + "grad_norm": 0.0, + "learning_rate": 1.9620320321800353e-05, + "loss": 1.5176, + "step": 3929 + }, + { + "epoch": 0.11539139115626285, + "grad_norm": 0.0, + "learning_rate": 1.962006072644549e-05, + "loss": 1.4131, + "step": 3930 + }, + { + "epoch": 0.11542075283340185, + "grad_norm": 0.0, + "learning_rate": 1.961980104409395e-05, + "loss": 1.5674, + "step": 3931 + }, + { + "epoch": 0.11545011451054084, + "grad_norm": 0.0, + "learning_rate": 1.9619541274748092e-05, + "loss": 1.4014, + "step": 3932 + }, + { + "epoch": 0.11547947618767984, + "grad_norm": 0.0, + "learning_rate": 1.9619281418410256e-05, + "loss": 1.458, + "step": 3933 + }, + { + "epoch": 0.11550883786481884, + "grad_norm": 0.0, + "learning_rate": 1.961902147508279e-05, + "loss": 1.4775, + "step": 3934 + }, + { + "epoch": 0.11553819954195783, + "grad_norm": 0.0, + "learning_rate": 1.9618761444768055e-05, + "loss": 1.5322, + "step": 3935 + }, + { + "epoch": 0.11556756121909684, + "grad_norm": 0.0, + "learning_rate": 1.9618501327468397e-05, + "loss": 1.459, + "step": 3936 + }, + { + "epoch": 0.11559692289623583, + "grad_norm": 0.0, + "learning_rate": 1.9618241123186163e-05, + "loss": 1.4375, + "step": 3937 + }, + { + "epoch": 0.11562628457337483, + "grad_norm": 0.0, + "learning_rate": 1.9617980831923715e-05, + "loss": 1.3647, + "step": 3938 + }, + { + "epoch": 0.11565564625051383, + "grad_norm": 0.0, + "learning_rate": 1.96177204536834e-05, + "loss": 1.5762, + "step": 3939 + }, + { + "epoch": 0.11568500792765282, + "grad_norm": 0.0, + "learning_rate": 1.961745998846758e-05, + "loss": 1.415, + "step": 3940 + }, + { + "epoch": 0.11571436960479183, + "grad_norm": 0.0, + "learning_rate": 1.9617199436278604e-05, + "loss": 1.4971, + "step": 3941 + }, + { + "epoch": 0.11574373128193083, + "grad_norm": 0.0, + "learning_rate": 1.9616938797118833e-05, + "loss": 1.4346, + "step": 3942 + }, + { + "epoch": 0.11577309295906982, + "grad_norm": 0.0, + "learning_rate": 1.961667807099062e-05, + "loss": 1.5215, + "step": 3943 + }, + { + "epoch": 0.11580245463620882, + "grad_norm": 0.0, + "learning_rate": 1.9616417257896322e-05, + "loss": 1.4717, + "step": 3944 + }, + { + "epoch": 0.11583181631334782, + "grad_norm": 0.0, + "learning_rate": 1.9616156357838305e-05, + "loss": 1.4756, + "step": 3945 + }, + { + "epoch": 0.11586117799048681, + "grad_norm": 0.0, + "learning_rate": 1.9615895370818914e-05, + "loss": 1.4756, + "step": 3946 + }, + { + "epoch": 0.11589053966762582, + "grad_norm": 0.0, + "learning_rate": 1.961563429684053e-05, + "loss": 1.5566, + "step": 3947 + }, + { + "epoch": 0.11591990134476482, + "grad_norm": 0.0, + "learning_rate": 1.9615373135905496e-05, + "loss": 1.3926, + "step": 3948 + }, + { + "epoch": 0.11594926302190381, + "grad_norm": 0.0, + "learning_rate": 1.9615111888016177e-05, + "loss": 1.5068, + "step": 3949 + }, + { + "epoch": 0.11597862469904281, + "grad_norm": 0.0, + "learning_rate": 1.9614850553174945e-05, + "loss": 1.3809, + "step": 3950 + }, + { + "epoch": 0.1160079863761818, + "grad_norm": 0.0, + "learning_rate": 1.9614589131384155e-05, + "loss": 1.4531, + "step": 3951 + }, + { + "epoch": 0.1160373480533208, + "grad_norm": 0.0, + "learning_rate": 1.961432762264617e-05, + "loss": 1.4639, + "step": 3952 + }, + { + "epoch": 0.11606670973045981, + "grad_norm": 0.0, + "learning_rate": 1.9614066026963362e-05, + "loss": 1.4131, + "step": 3953 + }, + { + "epoch": 0.1160960714075988, + "grad_norm": 0.0, + "learning_rate": 1.9613804344338093e-05, + "loss": 1.4502, + "step": 3954 + }, + { + "epoch": 0.1161254330847378, + "grad_norm": 0.0, + "learning_rate": 1.9613542574772732e-05, + "loss": 1.4131, + "step": 3955 + }, + { + "epoch": 0.1161547947618768, + "grad_norm": 0.0, + "learning_rate": 1.9613280718269637e-05, + "loss": 1.7207, + "step": 3956 + }, + { + "epoch": 0.11618415643901579, + "grad_norm": 0.0, + "learning_rate": 1.9613018774831184e-05, + "loss": 1.4717, + "step": 3957 + }, + { + "epoch": 0.1162135181161548, + "grad_norm": 0.0, + "learning_rate": 1.9612756744459744e-05, + "loss": 1.5381, + "step": 3958 + }, + { + "epoch": 0.1162428797932938, + "grad_norm": 0.0, + "learning_rate": 1.9612494627157678e-05, + "loss": 1.5723, + "step": 3959 + }, + { + "epoch": 0.11627224147043279, + "grad_norm": 0.0, + "learning_rate": 1.9612232422927365e-05, + "loss": 1.5322, + "step": 3960 + }, + { + "epoch": 0.11630160314757179, + "grad_norm": 0.0, + "learning_rate": 1.961197013177117e-05, + "loss": 1.3467, + "step": 3961 + }, + { + "epoch": 0.11633096482471078, + "grad_norm": 0.0, + "learning_rate": 1.9611707753691468e-05, + "loss": 1.5137, + "step": 3962 + }, + { + "epoch": 0.11636032650184978, + "grad_norm": 0.0, + "learning_rate": 1.961144528869063e-05, + "loss": 1.5332, + "step": 3963 + }, + { + "epoch": 0.11638968817898879, + "grad_norm": 0.0, + "learning_rate": 1.9611182736771033e-05, + "loss": 1.5293, + "step": 3964 + }, + { + "epoch": 0.11641904985612778, + "grad_norm": 0.0, + "learning_rate": 1.9610920097935047e-05, + "loss": 1.543, + "step": 3965 + }, + { + "epoch": 0.11644841153326678, + "grad_norm": 0.0, + "learning_rate": 1.961065737218505e-05, + "loss": 1.5098, + "step": 3966 + }, + { + "epoch": 0.11647777321040578, + "grad_norm": 0.0, + "learning_rate": 1.961039455952342e-05, + "loss": 1.3828, + "step": 3967 + }, + { + "epoch": 0.11650713488754477, + "grad_norm": 0.0, + "learning_rate": 1.9610131659952526e-05, + "loss": 1.5137, + "step": 3968 + }, + { + "epoch": 0.11653649656468378, + "grad_norm": 0.0, + "learning_rate": 1.9609868673474754e-05, + "loss": 1.582, + "step": 3969 + }, + { + "epoch": 0.11656585824182278, + "grad_norm": 0.0, + "learning_rate": 1.9609605600092475e-05, + "loss": 1.5537, + "step": 3970 + }, + { + "epoch": 0.11659521991896177, + "grad_norm": 0.0, + "learning_rate": 1.9609342439808075e-05, + "loss": 1.5459, + "step": 3971 + }, + { + "epoch": 0.11662458159610077, + "grad_norm": 0.0, + "learning_rate": 1.960907919262393e-05, + "loss": 1.4932, + "step": 3972 + }, + { + "epoch": 0.11665394327323977, + "grad_norm": 0.0, + "learning_rate": 1.960881585854242e-05, + "loss": 1.46, + "step": 3973 + }, + { + "epoch": 0.11668330495037876, + "grad_norm": 0.0, + "learning_rate": 1.9608552437565927e-05, + "loss": 1.6006, + "step": 3974 + }, + { + "epoch": 0.11671266662751777, + "grad_norm": 0.0, + "learning_rate": 1.960828892969683e-05, + "loss": 1.543, + "step": 3975 + }, + { + "epoch": 0.11674202830465676, + "grad_norm": 0.0, + "learning_rate": 1.9608025334937523e-05, + "loss": 1.3955, + "step": 3976 + }, + { + "epoch": 0.11677138998179576, + "grad_norm": 0.0, + "learning_rate": 1.960776165329038e-05, + "loss": 1.458, + "step": 3977 + }, + { + "epoch": 0.11680075165893476, + "grad_norm": 0.0, + "learning_rate": 1.9607497884757786e-05, + "loss": 1.5166, + "step": 3978 + }, + { + "epoch": 0.11683011333607375, + "grad_norm": 0.0, + "learning_rate": 1.9607234029342127e-05, + "loss": 1.335, + "step": 3979 + }, + { + "epoch": 0.11685947501321275, + "grad_norm": 0.0, + "learning_rate": 1.960697008704579e-05, + "loss": 1.5596, + "step": 3980 + }, + { + "epoch": 0.11688883669035176, + "grad_norm": 0.0, + "learning_rate": 1.9606706057871164e-05, + "loss": 1.542, + "step": 3981 + }, + { + "epoch": 0.11691819836749075, + "grad_norm": 0.0, + "learning_rate": 1.9606441941820635e-05, + "loss": 1.5303, + "step": 3982 + }, + { + "epoch": 0.11694756004462975, + "grad_norm": 0.0, + "learning_rate": 1.9606177738896587e-05, + "loss": 1.4102, + "step": 3983 + }, + { + "epoch": 0.11697692172176875, + "grad_norm": 0.0, + "learning_rate": 1.960591344910142e-05, + "loss": 1.4629, + "step": 3984 + }, + { + "epoch": 0.11700628339890774, + "grad_norm": 0.0, + "learning_rate": 1.9605649072437517e-05, + "loss": 1.541, + "step": 3985 + }, + { + "epoch": 0.11703564507604675, + "grad_norm": 0.0, + "learning_rate": 1.9605384608907263e-05, + "loss": 1.3379, + "step": 3986 + }, + { + "epoch": 0.11706500675318574, + "grad_norm": 0.0, + "learning_rate": 1.9605120058513064e-05, + "loss": 1.5, + "step": 3987 + }, + { + "epoch": 0.11709436843032474, + "grad_norm": 0.0, + "learning_rate": 1.9604855421257297e-05, + "loss": 1.4014, + "step": 3988 + }, + { + "epoch": 0.11712373010746374, + "grad_norm": 0.0, + "learning_rate": 1.9604590697142368e-05, + "loss": 1.502, + "step": 3989 + }, + { + "epoch": 0.11715309178460273, + "grad_norm": 0.0, + "learning_rate": 1.9604325886170663e-05, + "loss": 1.3506, + "step": 3990 + }, + { + "epoch": 0.11718245346174173, + "grad_norm": 0.0, + "learning_rate": 1.960406098834458e-05, + "loss": 1.4854, + "step": 3991 + }, + { + "epoch": 0.11721181513888074, + "grad_norm": 0.0, + "learning_rate": 1.9603796003666514e-05, + "loss": 1.4277, + "step": 3992 + }, + { + "epoch": 0.11724117681601973, + "grad_norm": 0.0, + "learning_rate": 1.9603530932138862e-05, + "loss": 1.5693, + "step": 3993 + }, + { + "epoch": 0.11727053849315873, + "grad_norm": 0.0, + "learning_rate": 1.9603265773764018e-05, + "loss": 1.502, + "step": 3994 + }, + { + "epoch": 0.11729990017029773, + "grad_norm": 0.0, + "learning_rate": 1.9603000528544383e-05, + "loss": 1.5674, + "step": 3995 + }, + { + "epoch": 0.11732926184743672, + "grad_norm": 0.0, + "learning_rate": 1.9602735196482357e-05, + "loss": 1.499, + "step": 3996 + }, + { + "epoch": 0.11735862352457573, + "grad_norm": 0.0, + "learning_rate": 1.9602469777580334e-05, + "loss": 1.4121, + "step": 3997 + }, + { + "epoch": 0.11738798520171473, + "grad_norm": 0.0, + "learning_rate": 1.9602204271840718e-05, + "loss": 1.4668, + "step": 3998 + }, + { + "epoch": 0.11741734687885372, + "grad_norm": 0.0, + "learning_rate": 1.9601938679265912e-05, + "loss": 1.5947, + "step": 3999 + }, + { + "epoch": 0.11744670855599272, + "grad_norm": 0.0, + "learning_rate": 1.9601672999858314e-05, + "loss": 1.4668, + "step": 4000 + }, + { + "epoch": 0.11747607023313171, + "grad_norm": 0.0, + "learning_rate": 1.9601407233620327e-05, + "loss": 1.4746, + "step": 4001 + }, + { + "epoch": 0.11750543191027071, + "grad_norm": 0.0, + "learning_rate": 1.960114138055436e-05, + "loss": 1.3926, + "step": 4002 + }, + { + "epoch": 0.11753479358740972, + "grad_norm": 0.0, + "learning_rate": 1.9600875440662808e-05, + "loss": 1.4111, + "step": 4003 + }, + { + "epoch": 0.1175641552645487, + "grad_norm": 0.0, + "learning_rate": 1.9600609413948082e-05, + "loss": 1.4326, + "step": 4004 + }, + { + "epoch": 0.11759351694168771, + "grad_norm": 0.0, + "learning_rate": 1.9600343300412585e-05, + "loss": 1.5283, + "step": 4005 + }, + { + "epoch": 0.11762287861882671, + "grad_norm": 0.0, + "learning_rate": 1.9600077100058724e-05, + "loss": 1.3994, + "step": 4006 + }, + { + "epoch": 0.1176522402959657, + "grad_norm": 0.0, + "learning_rate": 1.9599810812888913e-05, + "loss": 1.4326, + "step": 4007 + }, + { + "epoch": 0.1176816019731047, + "grad_norm": 0.0, + "learning_rate": 1.959954443890555e-05, + "loss": 1.46, + "step": 4008 + }, + { + "epoch": 0.11771096365024371, + "grad_norm": 0.0, + "learning_rate": 1.959927797811105e-05, + "loss": 1.5293, + "step": 4009 + }, + { + "epoch": 0.1177403253273827, + "grad_norm": 0.0, + "learning_rate": 1.9599011430507816e-05, + "loss": 1.4062, + "step": 4010 + }, + { + "epoch": 0.1177696870045217, + "grad_norm": 0.0, + "learning_rate": 1.9598744796098267e-05, + "loss": 1.5273, + "step": 4011 + }, + { + "epoch": 0.11779904868166069, + "grad_norm": 0.0, + "learning_rate": 1.959847807488481e-05, + "loss": 1.4912, + "step": 4012 + }, + { + "epoch": 0.11782841035879969, + "grad_norm": 0.0, + "learning_rate": 1.9598211266869858e-05, + "loss": 1.3535, + "step": 4013 + }, + { + "epoch": 0.1178577720359387, + "grad_norm": 0.0, + "learning_rate": 1.9597944372055825e-05, + "loss": 1.4492, + "step": 4014 + }, + { + "epoch": 0.11788713371307769, + "grad_norm": 0.0, + "learning_rate": 1.959767739044512e-05, + "loss": 1.3506, + "step": 4015 + }, + { + "epoch": 0.11791649539021669, + "grad_norm": 0.0, + "learning_rate": 1.9597410322040162e-05, + "loss": 1.5811, + "step": 4016 + }, + { + "epoch": 0.11794585706735569, + "grad_norm": 0.0, + "learning_rate": 1.9597143166843363e-05, + "loss": 1.4678, + "step": 4017 + }, + { + "epoch": 0.11797521874449468, + "grad_norm": 0.0, + "learning_rate": 1.9596875924857142e-05, + "loss": 1.4619, + "step": 4018 + }, + { + "epoch": 0.11800458042163368, + "grad_norm": 0.0, + "learning_rate": 1.9596608596083912e-05, + "loss": 1.5254, + "step": 4019 + }, + { + "epoch": 0.11803394209877269, + "grad_norm": 0.0, + "learning_rate": 1.9596341180526095e-05, + "loss": 1.5127, + "step": 4020 + }, + { + "epoch": 0.11806330377591168, + "grad_norm": 0.0, + "learning_rate": 1.959607367818611e-05, + "loss": 1.4854, + "step": 4021 + }, + { + "epoch": 0.11809266545305068, + "grad_norm": 0.0, + "learning_rate": 1.9595806089066368e-05, + "loss": 1.3428, + "step": 4022 + }, + { + "epoch": 0.11812202713018968, + "grad_norm": 0.0, + "learning_rate": 1.9595538413169295e-05, + "loss": 1.5332, + "step": 4023 + }, + { + "epoch": 0.11815138880732867, + "grad_norm": 0.0, + "learning_rate": 1.9595270650497312e-05, + "loss": 1.4941, + "step": 4024 + }, + { + "epoch": 0.11818075048446768, + "grad_norm": 0.0, + "learning_rate": 1.9595002801052838e-05, + "loss": 1.5508, + "step": 4025 + }, + { + "epoch": 0.11821011216160666, + "grad_norm": 0.0, + "learning_rate": 1.95947348648383e-05, + "loss": 1.417, + "step": 4026 + }, + { + "epoch": 0.11823947383874567, + "grad_norm": 0.0, + "learning_rate": 1.9594466841856113e-05, + "loss": 1.4512, + "step": 4027 + }, + { + "epoch": 0.11826883551588467, + "grad_norm": 0.0, + "learning_rate": 1.9594198732108704e-05, + "loss": 1.6348, + "step": 4028 + }, + { + "epoch": 0.11829819719302366, + "grad_norm": 0.0, + "learning_rate": 1.9593930535598503e-05, + "loss": 1.415, + "step": 4029 + }, + { + "epoch": 0.11832755887016266, + "grad_norm": 0.0, + "learning_rate": 1.9593662252327928e-05, + "loss": 1.5713, + "step": 4030 + }, + { + "epoch": 0.11835692054730167, + "grad_norm": 0.0, + "learning_rate": 1.959339388229941e-05, + "loss": 1.5508, + "step": 4031 + }, + { + "epoch": 0.11838628222444066, + "grad_norm": 0.0, + "learning_rate": 1.9593125425515372e-05, + "loss": 1.457, + "step": 4032 + }, + { + "epoch": 0.11841564390157966, + "grad_norm": 0.0, + "learning_rate": 1.9592856881978247e-05, + "loss": 1.3691, + "step": 4033 + }, + { + "epoch": 0.11844500557871866, + "grad_norm": 0.0, + "learning_rate": 1.959258825169046e-05, + "loss": 1.4912, + "step": 4034 + }, + { + "epoch": 0.11847436725585765, + "grad_norm": 0.0, + "learning_rate": 1.959231953465444e-05, + "loss": 1.4795, + "step": 4035 + }, + { + "epoch": 0.11850372893299665, + "grad_norm": 0.0, + "learning_rate": 1.9592050730872616e-05, + "loss": 1.5068, + "step": 4036 + }, + { + "epoch": 0.11853309061013564, + "grad_norm": 0.0, + "learning_rate": 1.9591781840347424e-05, + "loss": 1.5703, + "step": 4037 + }, + { + "epoch": 0.11856245228727465, + "grad_norm": 0.0, + "learning_rate": 1.959151286308129e-05, + "loss": 1.4199, + "step": 4038 + }, + { + "epoch": 0.11859181396441365, + "grad_norm": 0.0, + "learning_rate": 1.959124379907665e-05, + "loss": 1.5186, + "step": 4039 + }, + { + "epoch": 0.11862117564155264, + "grad_norm": 0.0, + "learning_rate": 1.9590974648335933e-05, + "loss": 1.5771, + "step": 4040 + }, + { + "epoch": 0.11865053731869164, + "grad_norm": 0.0, + "learning_rate": 1.959070541086158e-05, + "loss": 1.4199, + "step": 4041 + }, + { + "epoch": 0.11867989899583065, + "grad_norm": 0.0, + "learning_rate": 1.959043608665602e-05, + "loss": 1.4297, + "step": 4042 + }, + { + "epoch": 0.11870926067296964, + "grad_norm": 0.0, + "learning_rate": 1.959016667572169e-05, + "loss": 1.4756, + "step": 4043 + }, + { + "epoch": 0.11873862235010864, + "grad_norm": 0.0, + "learning_rate": 1.958989717806103e-05, + "loss": 1.5527, + "step": 4044 + }, + { + "epoch": 0.11876798402724764, + "grad_norm": 0.0, + "learning_rate": 1.958962759367647e-05, + "loss": 1.4629, + "step": 4045 + }, + { + "epoch": 0.11879734570438663, + "grad_norm": 0.0, + "learning_rate": 1.9589357922570453e-05, + "loss": 1.4834, + "step": 4046 + }, + { + "epoch": 0.11882670738152563, + "grad_norm": 0.0, + "learning_rate": 1.9589088164745417e-05, + "loss": 1.5625, + "step": 4047 + }, + { + "epoch": 0.11885606905866464, + "grad_norm": 0.0, + "learning_rate": 1.9588818320203798e-05, + "loss": 1.5049, + "step": 4048 + }, + { + "epoch": 0.11888543073580363, + "grad_norm": 0.0, + "learning_rate": 1.958854838894804e-05, + "loss": 1.54, + "step": 4049 + }, + { + "epoch": 0.11891479241294263, + "grad_norm": 0.0, + "learning_rate": 1.9588278370980583e-05, + "loss": 1.5156, + "step": 4050 + }, + { + "epoch": 0.11894415409008162, + "grad_norm": 0.0, + "learning_rate": 1.9588008266303874e-05, + "loss": 1.5176, + "step": 4051 + }, + { + "epoch": 0.11897351576722062, + "grad_norm": 0.0, + "learning_rate": 1.9587738074920345e-05, + "loss": 1.5342, + "step": 4052 + }, + { + "epoch": 0.11900287744435963, + "grad_norm": 0.0, + "learning_rate": 1.958746779683245e-05, + "loss": 1.4795, + "step": 4053 + }, + { + "epoch": 0.11903223912149861, + "grad_norm": 0.0, + "learning_rate": 1.9587197432042626e-05, + "loss": 1.4775, + "step": 4054 + }, + { + "epoch": 0.11906160079863762, + "grad_norm": 0.0, + "learning_rate": 1.9586926980553317e-05, + "loss": 1.46, + "step": 4055 + }, + { + "epoch": 0.11909096247577662, + "grad_norm": 0.0, + "learning_rate": 1.9586656442366975e-05, + "loss": 1.4717, + "step": 4056 + }, + { + "epoch": 0.11912032415291561, + "grad_norm": 0.0, + "learning_rate": 1.9586385817486044e-05, + "loss": 1.5098, + "step": 4057 + }, + { + "epoch": 0.11914968583005461, + "grad_norm": 0.0, + "learning_rate": 1.9586115105912972e-05, + "loss": 1.5273, + "step": 4058 + }, + { + "epoch": 0.11917904750719362, + "grad_norm": 0.0, + "learning_rate": 1.9585844307650205e-05, + "loss": 1.4658, + "step": 4059 + }, + { + "epoch": 0.1192084091843326, + "grad_norm": 0.0, + "learning_rate": 1.9585573422700192e-05, + "loss": 1.4893, + "step": 4060 + }, + { + "epoch": 0.11923777086147161, + "grad_norm": 0.0, + "learning_rate": 1.9585302451065386e-05, + "loss": 1.54, + "step": 4061 + }, + { + "epoch": 0.11926713253861061, + "grad_norm": 0.0, + "learning_rate": 1.9585031392748233e-05, + "loss": 1.4756, + "step": 4062 + }, + { + "epoch": 0.1192964942157496, + "grad_norm": 0.0, + "learning_rate": 1.9584760247751187e-05, + "loss": 1.4014, + "step": 4063 + }, + { + "epoch": 0.1193258558928886, + "grad_norm": 0.0, + "learning_rate": 1.9584489016076697e-05, + "loss": 1.4863, + "step": 4064 + }, + { + "epoch": 0.1193552175700276, + "grad_norm": 0.0, + "learning_rate": 1.958421769772722e-05, + "loss": 1.4668, + "step": 4065 + }, + { + "epoch": 0.1193845792471666, + "grad_norm": 0.0, + "learning_rate": 1.9583946292705208e-05, + "loss": 1.4697, + "step": 4066 + }, + { + "epoch": 0.1194139409243056, + "grad_norm": 0.0, + "learning_rate": 1.9583674801013118e-05, + "loss": 1.4326, + "step": 4067 + }, + { + "epoch": 0.11944330260144459, + "grad_norm": 0.0, + "learning_rate": 1.95834032226534e-05, + "loss": 1.5195, + "step": 4068 + }, + { + "epoch": 0.11947266427858359, + "grad_norm": 0.0, + "learning_rate": 1.9583131557628512e-05, + "loss": 1.5879, + "step": 4069 + }, + { + "epoch": 0.1195020259557226, + "grad_norm": 0.0, + "learning_rate": 1.958285980594091e-05, + "loss": 1.5625, + "step": 4070 + }, + { + "epoch": 0.11953138763286159, + "grad_norm": 0.0, + "learning_rate": 1.9582587967593053e-05, + "loss": 1.5381, + "step": 4071 + }, + { + "epoch": 0.11956074931000059, + "grad_norm": 0.0, + "learning_rate": 1.9582316042587398e-05, + "loss": 1.3252, + "step": 4072 + }, + { + "epoch": 0.11959011098713959, + "grad_norm": 0.0, + "learning_rate": 1.958204403092641e-05, + "loss": 1.5889, + "step": 4073 + }, + { + "epoch": 0.11961947266427858, + "grad_norm": 0.0, + "learning_rate": 1.958177193261254e-05, + "loss": 1.5244, + "step": 4074 + }, + { + "epoch": 0.11964883434141758, + "grad_norm": 0.0, + "learning_rate": 1.958149974764825e-05, + "loss": 1.4678, + "step": 4075 + }, + { + "epoch": 0.11967819601855657, + "grad_norm": 0.0, + "learning_rate": 1.9581227476036008e-05, + "loss": 1.5107, + "step": 4076 + }, + { + "epoch": 0.11970755769569558, + "grad_norm": 0.0, + "learning_rate": 1.9580955117778272e-05, + "loss": 1.4492, + "step": 4077 + }, + { + "epoch": 0.11973691937283458, + "grad_norm": 0.0, + "learning_rate": 1.9580682672877503e-05, + "loss": 1.3408, + "step": 4078 + }, + { + "epoch": 0.11976628104997357, + "grad_norm": 0.0, + "learning_rate": 1.958041014133617e-05, + "loss": 1.3555, + "step": 4079 + }, + { + "epoch": 0.11979564272711257, + "grad_norm": 0.0, + "learning_rate": 1.958013752315673e-05, + "loss": 1.4658, + "step": 4080 + }, + { + "epoch": 0.11982500440425158, + "grad_norm": 0.0, + "learning_rate": 1.9579864818341655e-05, + "loss": 1.3604, + "step": 4081 + }, + { + "epoch": 0.11985436608139056, + "grad_norm": 0.0, + "learning_rate": 1.957959202689341e-05, + "loss": 1.5225, + "step": 4082 + }, + { + "epoch": 0.11988372775852957, + "grad_norm": 0.0, + "learning_rate": 1.9579319148814462e-05, + "loss": 1.4287, + "step": 4083 + }, + { + "epoch": 0.11991308943566857, + "grad_norm": 0.0, + "learning_rate": 1.9579046184107272e-05, + "loss": 1.498, + "step": 4084 + }, + { + "epoch": 0.11994245111280756, + "grad_norm": 0.0, + "learning_rate": 1.957877313277432e-05, + "loss": 1.6162, + "step": 4085 + }, + { + "epoch": 0.11997181278994656, + "grad_norm": 0.0, + "learning_rate": 1.9578499994818068e-05, + "loss": 1.4111, + "step": 4086 + }, + { + "epoch": 0.12000117446708557, + "grad_norm": 0.0, + "learning_rate": 1.9578226770240987e-05, + "loss": 1.4258, + "step": 4087 + }, + { + "epoch": 0.12003053614422456, + "grad_norm": 0.0, + "learning_rate": 1.9577953459045548e-05, + "loss": 1.6006, + "step": 4088 + }, + { + "epoch": 0.12005989782136356, + "grad_norm": 0.0, + "learning_rate": 1.9577680061234223e-05, + "loss": 1.4062, + "step": 4089 + }, + { + "epoch": 0.12008925949850255, + "grad_norm": 0.0, + "learning_rate": 1.9577406576809484e-05, + "loss": 1.5635, + "step": 4090 + }, + { + "epoch": 0.12011862117564155, + "grad_norm": 0.0, + "learning_rate": 1.9577133005773806e-05, + "loss": 1.4092, + "step": 4091 + }, + { + "epoch": 0.12014798285278055, + "grad_norm": 0.0, + "learning_rate": 1.957685934812966e-05, + "loss": 1.46, + "step": 4092 + }, + { + "epoch": 0.12017734452991954, + "grad_norm": 0.0, + "learning_rate": 1.957658560387952e-05, + "loss": 1.3213, + "step": 4093 + }, + { + "epoch": 0.12020670620705855, + "grad_norm": 0.0, + "learning_rate": 1.957631177302587e-05, + "loss": 1.54, + "step": 4094 + }, + { + "epoch": 0.12023606788419755, + "grad_norm": 0.0, + "learning_rate": 1.9576037855571176e-05, + "loss": 1.5596, + "step": 4095 + }, + { + "epoch": 0.12026542956133654, + "grad_norm": 0.0, + "learning_rate": 1.957576385151792e-05, + "loss": 1.5352, + "step": 4096 + }, + { + "epoch": 0.12029479123847554, + "grad_norm": 0.0, + "learning_rate": 1.9575489760868577e-05, + "loss": 1.4111, + "step": 4097 + }, + { + "epoch": 0.12032415291561455, + "grad_norm": 0.0, + "learning_rate": 1.957521558362563e-05, + "loss": 1.4629, + "step": 4098 + }, + { + "epoch": 0.12035351459275354, + "grad_norm": 0.0, + "learning_rate": 1.9574941319791557e-05, + "loss": 1.5078, + "step": 4099 + }, + { + "epoch": 0.12038287626989254, + "grad_norm": 0.0, + "learning_rate": 1.9574666969368836e-05, + "loss": 1.3975, + "step": 4100 + }, + { + "epoch": 0.12041223794703153, + "grad_norm": 0.0, + "learning_rate": 1.9574392532359948e-05, + "loss": 1.5605, + "step": 4101 + }, + { + "epoch": 0.12044159962417053, + "grad_norm": 0.0, + "learning_rate": 1.957411800876738e-05, + "loss": 1.5078, + "step": 4102 + }, + { + "epoch": 0.12047096130130953, + "grad_norm": 0.0, + "learning_rate": 1.957384339859361e-05, + "loss": 1.3652, + "step": 4103 + }, + { + "epoch": 0.12050032297844852, + "grad_norm": 0.0, + "learning_rate": 1.957356870184112e-05, + "loss": 1.3994, + "step": 4104 + }, + { + "epoch": 0.12052968465558753, + "grad_norm": 0.0, + "learning_rate": 1.9573293918512396e-05, + "loss": 1.5127, + "step": 4105 + }, + { + "epoch": 0.12055904633272653, + "grad_norm": 0.0, + "learning_rate": 1.9573019048609926e-05, + "loss": 1.5459, + "step": 4106 + }, + { + "epoch": 0.12058840800986552, + "grad_norm": 0.0, + "learning_rate": 1.9572744092136192e-05, + "loss": 1.5371, + "step": 4107 + }, + { + "epoch": 0.12061776968700452, + "grad_norm": 0.0, + "learning_rate": 1.9572469049093682e-05, + "loss": 1.5156, + "step": 4108 + }, + { + "epoch": 0.12064713136414353, + "grad_norm": 0.0, + "learning_rate": 1.9572193919484882e-05, + "loss": 1.4287, + "step": 4109 + }, + { + "epoch": 0.12067649304128251, + "grad_norm": 0.0, + "learning_rate": 1.9571918703312284e-05, + "loss": 1.4707, + "step": 4110 + }, + { + "epoch": 0.12070585471842152, + "grad_norm": 0.0, + "learning_rate": 1.957164340057837e-05, + "loss": 1.4648, + "step": 4111 + }, + { + "epoch": 0.12073521639556052, + "grad_norm": 0.0, + "learning_rate": 1.9571368011285633e-05, + "loss": 1.4736, + "step": 4112 + }, + { + "epoch": 0.12076457807269951, + "grad_norm": 0.0, + "learning_rate": 1.9571092535436568e-05, + "loss": 1.459, + "step": 4113 + }, + { + "epoch": 0.12079393974983851, + "grad_norm": 0.0, + "learning_rate": 1.9570816973033658e-05, + "loss": 1.4082, + "step": 4114 + }, + { + "epoch": 0.1208233014269775, + "grad_norm": 0.0, + "learning_rate": 1.95705413240794e-05, + "loss": 1.3721, + "step": 4115 + }, + { + "epoch": 0.1208526631041165, + "grad_norm": 0.0, + "learning_rate": 1.957026558857629e-05, + "loss": 1.3125, + "step": 4116 + }, + { + "epoch": 0.12088202478125551, + "grad_norm": 0.0, + "learning_rate": 1.956998976652681e-05, + "loss": 1.5117, + "step": 4117 + }, + { + "epoch": 0.1209113864583945, + "grad_norm": 0.0, + "learning_rate": 1.9569713857933464e-05, + "loss": 1.4922, + "step": 4118 + }, + { + "epoch": 0.1209407481355335, + "grad_norm": 0.0, + "learning_rate": 1.9569437862798745e-05, + "loss": 1.4941, + "step": 4119 + }, + { + "epoch": 0.1209701098126725, + "grad_norm": 0.0, + "learning_rate": 1.956916178112515e-05, + "loss": 1.4824, + "step": 4120 + }, + { + "epoch": 0.1209994714898115, + "grad_norm": 0.0, + "learning_rate": 1.9568885612915172e-05, + "loss": 1.5684, + "step": 4121 + }, + { + "epoch": 0.1210288331669505, + "grad_norm": 0.0, + "learning_rate": 1.9568609358171313e-05, + "loss": 1.5732, + "step": 4122 + }, + { + "epoch": 0.1210581948440895, + "grad_norm": 0.0, + "learning_rate": 1.9568333016896072e-05, + "loss": 1.4746, + "step": 4123 + }, + { + "epoch": 0.12108755652122849, + "grad_norm": 0.0, + "learning_rate": 1.956805658909194e-05, + "loss": 1.5547, + "step": 4124 + }, + { + "epoch": 0.1211169181983675, + "grad_norm": 0.0, + "learning_rate": 1.9567780074761423e-05, + "loss": 1.5586, + "step": 4125 + }, + { + "epoch": 0.12114627987550648, + "grad_norm": 0.0, + "learning_rate": 1.9567503473907023e-05, + "loss": 1.4268, + "step": 4126 + }, + { + "epoch": 0.12117564155264549, + "grad_norm": 0.0, + "learning_rate": 1.9567226786531236e-05, + "loss": 1.4639, + "step": 4127 + }, + { + "epoch": 0.12120500322978449, + "grad_norm": 0.0, + "learning_rate": 1.9566950012636567e-05, + "loss": 1.3369, + "step": 4128 + }, + { + "epoch": 0.12123436490692348, + "grad_norm": 0.0, + "learning_rate": 1.956667315222552e-05, + "loss": 1.4727, + "step": 4129 + }, + { + "epoch": 0.12126372658406248, + "grad_norm": 0.0, + "learning_rate": 1.9566396205300595e-05, + "loss": 1.4111, + "step": 4130 + }, + { + "epoch": 0.12129308826120148, + "grad_norm": 0.0, + "learning_rate": 1.95661191718643e-05, + "loss": 1.6133, + "step": 4131 + }, + { + "epoch": 0.12132244993834047, + "grad_norm": 0.0, + "learning_rate": 1.9565842051919142e-05, + "loss": 1.5059, + "step": 4132 + }, + { + "epoch": 0.12135181161547948, + "grad_norm": 0.0, + "learning_rate": 1.9565564845467623e-05, + "loss": 1.5127, + "step": 4133 + }, + { + "epoch": 0.12138117329261848, + "grad_norm": 0.0, + "learning_rate": 1.9565287552512254e-05, + "loss": 1.5254, + "step": 4134 + }, + { + "epoch": 0.12141053496975747, + "grad_norm": 0.0, + "learning_rate": 1.9565010173055533e-05, + "loss": 1.4346, + "step": 4135 + }, + { + "epoch": 0.12143989664689647, + "grad_norm": 0.0, + "learning_rate": 1.9564732707099984e-05, + "loss": 1.4385, + "step": 4136 + }, + { + "epoch": 0.12146925832403548, + "grad_norm": 0.0, + "learning_rate": 1.95644551546481e-05, + "loss": 1.4658, + "step": 4137 + }, + { + "epoch": 0.12149862000117446, + "grad_norm": 0.0, + "learning_rate": 1.9564177515702405e-05, + "loss": 1.4126, + "step": 4138 + }, + { + "epoch": 0.12152798167831347, + "grad_norm": 0.0, + "learning_rate": 1.95638997902654e-05, + "loss": 1.4248, + "step": 4139 + }, + { + "epoch": 0.12155734335545246, + "grad_norm": 0.0, + "learning_rate": 1.95636219783396e-05, + "loss": 1.6309, + "step": 4140 + }, + { + "epoch": 0.12158670503259146, + "grad_norm": 0.0, + "learning_rate": 1.9563344079927516e-05, + "loss": 1.3389, + "step": 4141 + }, + { + "epoch": 0.12161606670973046, + "grad_norm": 0.0, + "learning_rate": 1.9563066095031665e-05, + "loss": 1.4922, + "step": 4142 + }, + { + "epoch": 0.12164542838686945, + "grad_norm": 0.0, + "learning_rate": 1.9562788023654556e-05, + "loss": 1.5625, + "step": 4143 + }, + { + "epoch": 0.12167479006400846, + "grad_norm": 0.0, + "learning_rate": 1.9562509865798707e-05, + "loss": 1.5703, + "step": 4144 + }, + { + "epoch": 0.12170415174114746, + "grad_norm": 0.0, + "learning_rate": 1.956223162146663e-05, + "loss": 1.5234, + "step": 4145 + }, + { + "epoch": 0.12173351341828645, + "grad_norm": 0.0, + "learning_rate": 1.9561953290660845e-05, + "loss": 1.627, + "step": 4146 + }, + { + "epoch": 0.12176287509542545, + "grad_norm": 0.0, + "learning_rate": 1.956167487338387e-05, + "loss": 1.3887, + "step": 4147 + }, + { + "epoch": 0.12179223677256446, + "grad_norm": 0.0, + "learning_rate": 1.9561396369638217e-05, + "loss": 1.4688, + "step": 4148 + }, + { + "epoch": 0.12182159844970344, + "grad_norm": 0.0, + "learning_rate": 1.9561117779426407e-05, + "loss": 1.4199, + "step": 4149 + }, + { + "epoch": 0.12185096012684245, + "grad_norm": 0.0, + "learning_rate": 1.9560839102750962e-05, + "loss": 1.4775, + "step": 4150 + }, + { + "epoch": 0.12188032180398144, + "grad_norm": 0.0, + "learning_rate": 1.9560560339614402e-05, + "loss": 1.415, + "step": 4151 + }, + { + "epoch": 0.12190968348112044, + "grad_norm": 0.0, + "learning_rate": 1.9560281490019247e-05, + "loss": 1.5186, + "step": 4152 + }, + { + "epoch": 0.12193904515825944, + "grad_norm": 0.0, + "learning_rate": 1.9560002553968016e-05, + "loss": 1.3564, + "step": 4153 + }, + { + "epoch": 0.12196840683539843, + "grad_norm": 0.0, + "learning_rate": 1.955972353146323e-05, + "loss": 1.4561, + "step": 4154 + }, + { + "epoch": 0.12199776851253744, + "grad_norm": 0.0, + "learning_rate": 1.9559444422507422e-05, + "loss": 1.3379, + "step": 4155 + }, + { + "epoch": 0.12202713018967644, + "grad_norm": 0.0, + "learning_rate": 1.955916522710311e-05, + "loss": 1.5762, + "step": 4156 + }, + { + "epoch": 0.12205649186681543, + "grad_norm": 0.0, + "learning_rate": 1.9558885945252813e-05, + "loss": 1.3467, + "step": 4157 + }, + { + "epoch": 0.12208585354395443, + "grad_norm": 0.0, + "learning_rate": 1.9558606576959066e-05, + "loss": 1.459, + "step": 4158 + }, + { + "epoch": 0.12211521522109343, + "grad_norm": 0.0, + "learning_rate": 1.955832712222439e-05, + "loss": 1.5059, + "step": 4159 + }, + { + "epoch": 0.12214457689823242, + "grad_norm": 0.0, + "learning_rate": 1.9558047581051317e-05, + "loss": 1.5, + "step": 4160 + }, + { + "epoch": 0.12217393857537143, + "grad_norm": 0.0, + "learning_rate": 1.955776795344237e-05, + "loss": 1.4805, + "step": 4161 + }, + { + "epoch": 0.12220330025251043, + "grad_norm": 0.0, + "learning_rate": 1.955748823940008e-05, + "loss": 1.5195, + "step": 4162 + }, + { + "epoch": 0.12223266192964942, + "grad_norm": 0.0, + "learning_rate": 1.9557208438926976e-05, + "loss": 1.5098, + "step": 4163 + }, + { + "epoch": 0.12226202360678842, + "grad_norm": 0.0, + "learning_rate": 1.9556928552025587e-05, + "loss": 1.4727, + "step": 4164 + }, + { + "epoch": 0.12229138528392741, + "grad_norm": 0.0, + "learning_rate": 1.955664857869845e-05, + "loss": 1.459, + "step": 4165 + }, + { + "epoch": 0.12232074696106641, + "grad_norm": 0.0, + "learning_rate": 1.9556368518948088e-05, + "loss": 1.5518, + "step": 4166 + }, + { + "epoch": 0.12235010863820542, + "grad_norm": 0.0, + "learning_rate": 1.9556088372777036e-05, + "loss": 1.3945, + "step": 4167 + }, + { + "epoch": 0.12237947031534441, + "grad_norm": 0.0, + "learning_rate": 1.9555808140187834e-05, + "loss": 1.4414, + "step": 4168 + }, + { + "epoch": 0.12240883199248341, + "grad_norm": 0.0, + "learning_rate": 1.9555527821183013e-05, + "loss": 1.4292, + "step": 4169 + }, + { + "epoch": 0.12243819366962241, + "grad_norm": 0.0, + "learning_rate": 1.9555247415765103e-05, + "loss": 1.625, + "step": 4170 + }, + { + "epoch": 0.1224675553467614, + "grad_norm": 0.0, + "learning_rate": 1.9554966923936646e-05, + "loss": 1.5205, + "step": 4171 + }, + { + "epoch": 0.1224969170239004, + "grad_norm": 0.0, + "learning_rate": 1.9554686345700173e-05, + "loss": 1.627, + "step": 4172 + }, + { + "epoch": 0.12252627870103941, + "grad_norm": 0.0, + "learning_rate": 1.9554405681058224e-05, + "loss": 1.3916, + "step": 4173 + }, + { + "epoch": 0.1225556403781784, + "grad_norm": 0.0, + "learning_rate": 1.9554124930013342e-05, + "loss": 1.3965, + "step": 4174 + }, + { + "epoch": 0.1225850020553174, + "grad_norm": 0.0, + "learning_rate": 1.955384409256806e-05, + "loss": 1.376, + "step": 4175 + }, + { + "epoch": 0.12261436373245639, + "grad_norm": 0.0, + "learning_rate": 1.9553563168724915e-05, + "loss": 1.4824, + "step": 4176 + }, + { + "epoch": 0.1226437254095954, + "grad_norm": 0.0, + "learning_rate": 1.9553282158486456e-05, + "loss": 1.4727, + "step": 4177 + }, + { + "epoch": 0.1226730870867344, + "grad_norm": 0.0, + "learning_rate": 1.9553001061855218e-05, + "loss": 1.4551, + "step": 4178 + }, + { + "epoch": 0.12270244876387339, + "grad_norm": 0.0, + "learning_rate": 1.9552719878833744e-05, + "loss": 1.374, + "step": 4179 + }, + { + "epoch": 0.12273181044101239, + "grad_norm": 0.0, + "learning_rate": 1.955243860942458e-05, + "loss": 1.4883, + "step": 4180 + }, + { + "epoch": 0.1227611721181514, + "grad_norm": 0.0, + "learning_rate": 1.9552157253630265e-05, + "loss": 1.5068, + "step": 4181 + }, + { + "epoch": 0.12279053379529038, + "grad_norm": 0.0, + "learning_rate": 1.955187581145335e-05, + "loss": 1.5801, + "step": 4182 + }, + { + "epoch": 0.12281989547242939, + "grad_norm": 0.0, + "learning_rate": 1.955159428289637e-05, + "loss": 1.5176, + "step": 4183 + }, + { + "epoch": 0.12284925714956839, + "grad_norm": 0.0, + "learning_rate": 1.955131266796188e-05, + "loss": 1.3916, + "step": 4184 + }, + { + "epoch": 0.12287861882670738, + "grad_norm": 0.0, + "learning_rate": 1.9551030966652422e-05, + "loss": 1.4736, + "step": 4185 + }, + { + "epoch": 0.12290798050384638, + "grad_norm": 0.0, + "learning_rate": 1.9550749178970547e-05, + "loss": 1.3828, + "step": 4186 + }, + { + "epoch": 0.12293734218098538, + "grad_norm": 0.0, + "learning_rate": 1.9550467304918798e-05, + "loss": 1.4629, + "step": 4187 + }, + { + "epoch": 0.12296670385812437, + "grad_norm": 0.0, + "learning_rate": 1.955018534449973e-05, + "loss": 1.5068, + "step": 4188 + }, + { + "epoch": 0.12299606553526338, + "grad_norm": 0.0, + "learning_rate": 1.9549903297715888e-05, + "loss": 1.4609, + "step": 4189 + }, + { + "epoch": 0.12302542721240237, + "grad_norm": 0.0, + "learning_rate": 1.9549621164569825e-05, + "loss": 1.5518, + "step": 4190 + }, + { + "epoch": 0.12305478888954137, + "grad_norm": 0.0, + "learning_rate": 1.9549338945064095e-05, + "loss": 1.5371, + "step": 4191 + }, + { + "epoch": 0.12308415056668037, + "grad_norm": 0.0, + "learning_rate": 1.9549056639201245e-05, + "loss": 1.3867, + "step": 4192 + }, + { + "epoch": 0.12311351224381936, + "grad_norm": 0.0, + "learning_rate": 1.954877424698383e-05, + "loss": 1.4805, + "step": 4193 + }, + { + "epoch": 0.12314287392095837, + "grad_norm": 0.0, + "learning_rate": 1.9548491768414405e-05, + "loss": 1.4014, + "step": 4194 + }, + { + "epoch": 0.12317223559809737, + "grad_norm": 0.0, + "learning_rate": 1.9548209203495524e-05, + "loss": 1.4688, + "step": 4195 + }, + { + "epoch": 0.12320159727523636, + "grad_norm": 0.0, + "learning_rate": 1.954792655222974e-05, + "loss": 1.333, + "step": 4196 + }, + { + "epoch": 0.12323095895237536, + "grad_norm": 0.0, + "learning_rate": 1.9547643814619612e-05, + "loss": 1.3857, + "step": 4197 + }, + { + "epoch": 0.12326032062951436, + "grad_norm": 0.0, + "learning_rate": 1.9547360990667698e-05, + "loss": 1.4531, + "step": 4198 + }, + { + "epoch": 0.12328968230665335, + "grad_norm": 0.0, + "learning_rate": 1.9547078080376546e-05, + "loss": 1.3633, + "step": 4199 + }, + { + "epoch": 0.12331904398379236, + "grad_norm": 0.0, + "learning_rate": 1.954679508374873e-05, + "loss": 1.3604, + "step": 4200 + }, + { + "epoch": 0.12334840566093135, + "grad_norm": 0.0, + "learning_rate": 1.9546512000786798e-05, + "loss": 1.4746, + "step": 4201 + }, + { + "epoch": 0.12337776733807035, + "grad_norm": 0.0, + "learning_rate": 1.9546228831493314e-05, + "loss": 1.4131, + "step": 4202 + }, + { + "epoch": 0.12340712901520935, + "grad_norm": 0.0, + "learning_rate": 1.9545945575870835e-05, + "loss": 1.5732, + "step": 4203 + }, + { + "epoch": 0.12343649069234834, + "grad_norm": 0.0, + "learning_rate": 1.9545662233921926e-05, + "loss": 1.2988, + "step": 4204 + }, + { + "epoch": 0.12346585236948734, + "grad_norm": 0.0, + "learning_rate": 1.954537880564915e-05, + "loss": 1.4629, + "step": 4205 + }, + { + "epoch": 0.12349521404662635, + "grad_norm": 0.0, + "learning_rate": 1.954509529105507e-05, + "loss": 1.501, + "step": 4206 + }, + { + "epoch": 0.12352457572376534, + "grad_norm": 0.0, + "learning_rate": 1.954481169014225e-05, + "loss": 1.5078, + "step": 4207 + }, + { + "epoch": 0.12355393740090434, + "grad_norm": 0.0, + "learning_rate": 1.9544528002913247e-05, + "loss": 1.5879, + "step": 4208 + }, + { + "epoch": 0.12358329907804334, + "grad_norm": 0.0, + "learning_rate": 1.954424422937064e-05, + "loss": 1.8379, + "step": 4209 + }, + { + "epoch": 0.12361266075518233, + "grad_norm": 0.0, + "learning_rate": 1.9543960369516983e-05, + "loss": 1.5137, + "step": 4210 + }, + { + "epoch": 0.12364202243232134, + "grad_norm": 0.0, + "learning_rate": 1.954367642335485e-05, + "loss": 1.4824, + "step": 4211 + }, + { + "epoch": 0.12367138410946034, + "grad_norm": 0.0, + "learning_rate": 1.9543392390886808e-05, + "loss": 1.4199, + "step": 4212 + }, + { + "epoch": 0.12370074578659933, + "grad_norm": 0.0, + "learning_rate": 1.9543108272115424e-05, + "loss": 1.4268, + "step": 4213 + }, + { + "epoch": 0.12373010746373833, + "grad_norm": 0.0, + "learning_rate": 1.954282406704327e-05, + "loss": 1.4883, + "step": 4214 + }, + { + "epoch": 0.12375946914087732, + "grad_norm": 0.0, + "learning_rate": 1.954253977567291e-05, + "loss": 1.3975, + "step": 4215 + }, + { + "epoch": 0.12378883081801632, + "grad_norm": 0.0, + "learning_rate": 1.954225539800692e-05, + "loss": 1.5898, + "step": 4216 + }, + { + "epoch": 0.12381819249515533, + "grad_norm": 0.0, + "learning_rate": 1.9541970934047868e-05, + "loss": 1.5576, + "step": 4217 + }, + { + "epoch": 0.12384755417229432, + "grad_norm": 0.0, + "learning_rate": 1.9541686383798334e-05, + "loss": 1.4795, + "step": 4218 + }, + { + "epoch": 0.12387691584943332, + "grad_norm": 0.0, + "learning_rate": 1.9541401747260887e-05, + "loss": 1.5342, + "step": 4219 + }, + { + "epoch": 0.12390627752657232, + "grad_norm": 0.0, + "learning_rate": 1.9541117024438098e-05, + "loss": 1.4316, + "step": 4220 + }, + { + "epoch": 0.12393563920371131, + "grad_norm": 0.0, + "learning_rate": 1.9540832215332545e-05, + "loss": 1.5381, + "step": 4221 + }, + { + "epoch": 0.12396500088085032, + "grad_norm": 0.0, + "learning_rate": 1.95405473199468e-05, + "loss": 1.4639, + "step": 4222 + }, + { + "epoch": 0.12399436255798932, + "grad_norm": 0.0, + "learning_rate": 1.9540262338283444e-05, + "loss": 1.5137, + "step": 4223 + }, + { + "epoch": 0.12402372423512831, + "grad_norm": 0.0, + "learning_rate": 1.9539977270345052e-05, + "loss": 1.5439, + "step": 4224 + }, + { + "epoch": 0.12405308591226731, + "grad_norm": 0.0, + "learning_rate": 1.9539692116134204e-05, + "loss": 1.6123, + "step": 4225 + }, + { + "epoch": 0.1240824475894063, + "grad_norm": 0.0, + "learning_rate": 1.9539406875653473e-05, + "loss": 1.583, + "step": 4226 + }, + { + "epoch": 0.1241118092665453, + "grad_norm": 0.0, + "learning_rate": 1.953912154890545e-05, + "loss": 1.5723, + "step": 4227 + }, + { + "epoch": 0.1241411709436843, + "grad_norm": 0.0, + "learning_rate": 1.95388361358927e-05, + "loss": 1.3965, + "step": 4228 + }, + { + "epoch": 0.1241705326208233, + "grad_norm": 0.0, + "learning_rate": 1.9538550636617816e-05, + "loss": 1.5332, + "step": 4229 + }, + { + "epoch": 0.1241998942979623, + "grad_norm": 0.0, + "learning_rate": 1.9538265051083374e-05, + "loss": 1.4854, + "step": 4230 + }, + { + "epoch": 0.1242292559751013, + "grad_norm": 0.0, + "learning_rate": 1.953797937929196e-05, + "loss": 1.5264, + "step": 4231 + }, + { + "epoch": 0.12425861765224029, + "grad_norm": 0.0, + "learning_rate": 1.9537693621246156e-05, + "loss": 1.5205, + "step": 4232 + }, + { + "epoch": 0.1242879793293793, + "grad_norm": 0.0, + "learning_rate": 1.9537407776948546e-05, + "loss": 1.4824, + "step": 4233 + }, + { + "epoch": 0.1243173410065183, + "grad_norm": 0.0, + "learning_rate": 1.953712184640171e-05, + "loss": 1.4541, + "step": 4234 + }, + { + "epoch": 0.12434670268365729, + "grad_norm": 0.0, + "learning_rate": 1.9536835829608244e-05, + "loss": 1.5273, + "step": 4235 + }, + { + "epoch": 0.12437606436079629, + "grad_norm": 0.0, + "learning_rate": 1.953654972657073e-05, + "loss": 1.5967, + "step": 4236 + }, + { + "epoch": 0.1244054260379353, + "grad_norm": 0.0, + "learning_rate": 1.9536263537291752e-05, + "loss": 1.5049, + "step": 4237 + }, + { + "epoch": 0.12443478771507428, + "grad_norm": 0.0, + "learning_rate": 1.95359772617739e-05, + "loss": 1.4619, + "step": 4238 + }, + { + "epoch": 0.12446414939221329, + "grad_norm": 0.0, + "learning_rate": 1.9535690900019767e-05, + "loss": 1.5303, + "step": 4239 + }, + { + "epoch": 0.12449351106935228, + "grad_norm": 0.0, + "learning_rate": 1.9535404452031933e-05, + "loss": 1.3711, + "step": 4240 + }, + { + "epoch": 0.12452287274649128, + "grad_norm": 0.0, + "learning_rate": 1.9535117917812998e-05, + "loss": 1.5225, + "step": 4241 + }, + { + "epoch": 0.12455223442363028, + "grad_norm": 0.0, + "learning_rate": 1.953483129736555e-05, + "loss": 1.4688, + "step": 4242 + }, + { + "epoch": 0.12458159610076927, + "grad_norm": 0.0, + "learning_rate": 1.9534544590692183e-05, + "loss": 1.5625, + "step": 4243 + }, + { + "epoch": 0.12461095777790827, + "grad_norm": 0.0, + "learning_rate": 1.953425779779548e-05, + "loss": 1.5332, + "step": 4244 + }, + { + "epoch": 0.12464031945504728, + "grad_norm": 0.0, + "learning_rate": 1.9533970918678048e-05, + "loss": 1.4307, + "step": 4245 + }, + { + "epoch": 0.12466968113218627, + "grad_norm": 0.0, + "learning_rate": 1.9533683953342476e-05, + "loss": 1.4297, + "step": 4246 + }, + { + "epoch": 0.12469904280932527, + "grad_norm": 0.0, + "learning_rate": 1.9533396901791355e-05, + "loss": 1.5381, + "step": 4247 + }, + { + "epoch": 0.12472840448646427, + "grad_norm": 0.0, + "learning_rate": 1.9533109764027285e-05, + "loss": 1.3242, + "step": 4248 + }, + { + "epoch": 0.12475776616360326, + "grad_norm": 0.0, + "learning_rate": 1.953282254005286e-05, + "loss": 1.5205, + "step": 4249 + }, + { + "epoch": 0.12478712784074227, + "grad_norm": 0.0, + "learning_rate": 1.9532535229870683e-05, + "loss": 1.3691, + "step": 4250 + }, + { + "epoch": 0.12481648951788125, + "grad_norm": 0.0, + "learning_rate": 1.953224783348335e-05, + "loss": 1.4609, + "step": 4251 + }, + { + "epoch": 0.12484585119502026, + "grad_norm": 0.0, + "learning_rate": 1.9531960350893454e-05, + "loss": 1.4756, + "step": 4252 + }, + { + "epoch": 0.12487521287215926, + "grad_norm": 0.0, + "learning_rate": 1.95316727821036e-05, + "loss": 1.502, + "step": 4253 + }, + { + "epoch": 0.12490457454929825, + "grad_norm": 0.0, + "learning_rate": 1.9531385127116392e-05, + "loss": 1.4746, + "step": 4254 + }, + { + "epoch": 0.12493393622643725, + "grad_norm": 0.0, + "learning_rate": 1.9531097385934423e-05, + "loss": 1.3838, + "step": 4255 + }, + { + "epoch": 0.12496329790357626, + "grad_norm": 0.0, + "learning_rate": 1.9530809558560303e-05, + "loss": 1.5986, + "step": 4256 + }, + { + "epoch": 0.12499265958071525, + "grad_norm": 0.0, + "learning_rate": 1.953052164499663e-05, + "loss": 1.5215, + "step": 4257 + }, + { + "epoch": 0.12502202125785425, + "grad_norm": 0.0, + "learning_rate": 1.9530233645246008e-05, + "loss": 1.418, + "step": 4258 + }, + { + "epoch": 0.12505138293499324, + "grad_norm": 0.0, + "learning_rate": 1.952994555931104e-05, + "loss": 1.5127, + "step": 4259 + }, + { + "epoch": 0.12508074461213226, + "grad_norm": 0.0, + "learning_rate": 1.9529657387194335e-05, + "loss": 1.4717, + "step": 4260 + }, + { + "epoch": 0.12511010628927124, + "grad_norm": 0.0, + "learning_rate": 1.95293691288985e-05, + "loss": 1.501, + "step": 4261 + }, + { + "epoch": 0.12513946796641023, + "grad_norm": 0.0, + "learning_rate": 1.952908078442614e-05, + "loss": 1.4775, + "step": 4262 + }, + { + "epoch": 0.12516882964354925, + "grad_norm": 0.0, + "learning_rate": 1.9528792353779858e-05, + "loss": 1.3857, + "step": 4263 + }, + { + "epoch": 0.12519819132068824, + "grad_norm": 0.0, + "learning_rate": 1.952850383696227e-05, + "loss": 1.5674, + "step": 4264 + }, + { + "epoch": 0.12522755299782723, + "grad_norm": 0.0, + "learning_rate": 1.952821523397598e-05, + "loss": 1.46, + "step": 4265 + }, + { + "epoch": 0.12525691467496625, + "grad_norm": 0.0, + "learning_rate": 1.95279265448236e-05, + "loss": 1.3057, + "step": 4266 + }, + { + "epoch": 0.12528627635210524, + "grad_norm": 0.0, + "learning_rate": 1.9527637769507742e-05, + "loss": 1.4736, + "step": 4267 + }, + { + "epoch": 0.12531563802924423, + "grad_norm": 0.0, + "learning_rate": 1.9527348908031012e-05, + "loss": 1.6504, + "step": 4268 + }, + { + "epoch": 0.12534499970638324, + "grad_norm": 0.0, + "learning_rate": 1.9527059960396028e-05, + "loss": 1.459, + "step": 4269 + }, + { + "epoch": 0.12537436138352223, + "grad_norm": 0.0, + "learning_rate": 1.95267709266054e-05, + "loss": 1.3857, + "step": 4270 + }, + { + "epoch": 0.12540372306066122, + "grad_norm": 0.0, + "learning_rate": 1.9526481806661742e-05, + "loss": 1.5439, + "step": 4271 + }, + { + "epoch": 0.1254330847378002, + "grad_norm": 0.0, + "learning_rate": 1.9526192600567672e-05, + "loss": 1.3545, + "step": 4272 + }, + { + "epoch": 0.12546244641493923, + "grad_norm": 0.0, + "learning_rate": 1.95259033083258e-05, + "loss": 1.3936, + "step": 4273 + }, + { + "epoch": 0.12549180809207822, + "grad_norm": 0.0, + "learning_rate": 1.9525613929938744e-05, + "loss": 1.4448, + "step": 4274 + }, + { + "epoch": 0.1255211697692172, + "grad_norm": 0.0, + "learning_rate": 1.9525324465409127e-05, + "loss": 1.3369, + "step": 4275 + }, + { + "epoch": 0.12555053144635622, + "grad_norm": 0.0, + "learning_rate": 1.9525034914739557e-05, + "loss": 1.4424, + "step": 4276 + }, + { + "epoch": 0.1255798931234952, + "grad_norm": 0.0, + "learning_rate": 1.9524745277932655e-05, + "loss": 1.3984, + "step": 4277 + }, + { + "epoch": 0.1256092548006342, + "grad_norm": 0.0, + "learning_rate": 1.9524455554991043e-05, + "loss": 1.4141, + "step": 4278 + }, + { + "epoch": 0.12563861647777322, + "grad_norm": 0.0, + "learning_rate": 1.952416574591734e-05, + "loss": 1.4277, + "step": 4279 + }, + { + "epoch": 0.1256679781549122, + "grad_norm": 0.0, + "learning_rate": 1.9523875850714172e-05, + "loss": 1.4082, + "step": 4280 + }, + { + "epoch": 0.1256973398320512, + "grad_norm": 0.0, + "learning_rate": 1.952358586938415e-05, + "loss": 1.4775, + "step": 4281 + }, + { + "epoch": 0.12572670150919021, + "grad_norm": 0.0, + "learning_rate": 1.9523295801929906e-05, + "loss": 1.4385, + "step": 4282 + }, + { + "epoch": 0.1257560631863292, + "grad_norm": 0.0, + "learning_rate": 1.9523005648354057e-05, + "loss": 1.3926, + "step": 4283 + }, + { + "epoch": 0.1257854248634682, + "grad_norm": 0.0, + "learning_rate": 1.952271540865923e-05, + "loss": 1.4287, + "step": 4284 + }, + { + "epoch": 0.1258147865406072, + "grad_norm": 0.0, + "learning_rate": 1.952242508284805e-05, + "loss": 1.4658, + "step": 4285 + }, + { + "epoch": 0.1258441482177462, + "grad_norm": 0.0, + "learning_rate": 1.952213467092314e-05, + "loss": 1.4424, + "step": 4286 + }, + { + "epoch": 0.1258735098948852, + "grad_norm": 0.0, + "learning_rate": 1.9521844172887126e-05, + "loss": 1.4541, + "step": 4287 + }, + { + "epoch": 0.1259028715720242, + "grad_norm": 0.0, + "learning_rate": 1.9521553588742638e-05, + "loss": 1.458, + "step": 4288 + }, + { + "epoch": 0.1259322332491632, + "grad_norm": 0.0, + "learning_rate": 1.95212629184923e-05, + "loss": 1.4395, + "step": 4289 + }, + { + "epoch": 0.12596159492630218, + "grad_norm": 0.0, + "learning_rate": 1.9520972162138744e-05, + "loss": 1.4521, + "step": 4290 + }, + { + "epoch": 0.1259909566034412, + "grad_norm": 0.0, + "learning_rate": 1.95206813196846e-05, + "loss": 1.3501, + "step": 4291 + }, + { + "epoch": 0.1260203182805802, + "grad_norm": 0.0, + "learning_rate": 1.9520390391132496e-05, + "loss": 1.5225, + "step": 4292 + }, + { + "epoch": 0.12604967995771918, + "grad_norm": 0.0, + "learning_rate": 1.9520099376485064e-05, + "loss": 1.5225, + "step": 4293 + }, + { + "epoch": 0.1260790416348582, + "grad_norm": 0.0, + "learning_rate": 1.9519808275744933e-05, + "loss": 1.3804, + "step": 4294 + }, + { + "epoch": 0.12610840331199719, + "grad_norm": 0.0, + "learning_rate": 1.9519517088914737e-05, + "loss": 1.5078, + "step": 4295 + }, + { + "epoch": 0.12613776498913618, + "grad_norm": 0.0, + "learning_rate": 1.9519225815997114e-05, + "loss": 1.4023, + "step": 4296 + }, + { + "epoch": 0.12616712666627516, + "grad_norm": 0.0, + "learning_rate": 1.9518934456994692e-05, + "loss": 1.4043, + "step": 4297 + }, + { + "epoch": 0.12619648834341418, + "grad_norm": 0.0, + "learning_rate": 1.951864301191011e-05, + "loss": 1.4736, + "step": 4298 + }, + { + "epoch": 0.12622585002055317, + "grad_norm": 0.0, + "learning_rate": 1.9518351480746e-05, + "loss": 1.501, + "step": 4299 + }, + { + "epoch": 0.12625521169769216, + "grad_norm": 0.0, + "learning_rate": 1.9518059863505e-05, + "loss": 1.6221, + "step": 4300 + }, + { + "epoch": 0.12628457337483118, + "grad_norm": 0.0, + "learning_rate": 1.9517768160189747e-05, + "loss": 1.3672, + "step": 4301 + }, + { + "epoch": 0.12631393505197017, + "grad_norm": 0.0, + "learning_rate": 1.9517476370802874e-05, + "loss": 1.4922, + "step": 4302 + }, + { + "epoch": 0.12634329672910916, + "grad_norm": 0.0, + "learning_rate": 1.951718449534703e-05, + "loss": 1.5742, + "step": 4303 + }, + { + "epoch": 0.12637265840624817, + "grad_norm": 0.0, + "learning_rate": 1.951689253382485e-05, + "loss": 1.4678, + "step": 4304 + }, + { + "epoch": 0.12640202008338716, + "grad_norm": 0.0, + "learning_rate": 1.951660048623897e-05, + "loss": 1.4961, + "step": 4305 + }, + { + "epoch": 0.12643138176052615, + "grad_norm": 0.0, + "learning_rate": 1.951630835259204e-05, + "loss": 1.5039, + "step": 4306 + }, + { + "epoch": 0.12646074343766517, + "grad_norm": 0.0, + "learning_rate": 1.951601613288669e-05, + "loss": 1.3936, + "step": 4307 + }, + { + "epoch": 0.12649010511480416, + "grad_norm": 0.0, + "learning_rate": 1.9515723827125572e-05, + "loss": 1.5088, + "step": 4308 + }, + { + "epoch": 0.12651946679194315, + "grad_norm": 0.0, + "learning_rate": 1.9515431435311327e-05, + "loss": 1.3691, + "step": 4309 + }, + { + "epoch": 0.12654882846908216, + "grad_norm": 0.0, + "learning_rate": 1.95151389574466e-05, + "loss": 1.5098, + "step": 4310 + }, + { + "epoch": 0.12657819014622115, + "grad_norm": 0.0, + "learning_rate": 1.951484639353403e-05, + "loss": 1.4219, + "step": 4311 + }, + { + "epoch": 0.12660755182336014, + "grad_norm": 0.0, + "learning_rate": 1.951455374357627e-05, + "loss": 1.4824, + "step": 4312 + }, + { + "epoch": 0.12663691350049916, + "grad_norm": 0.0, + "learning_rate": 1.951426100757596e-05, + "loss": 1.4229, + "step": 4313 + }, + { + "epoch": 0.12666627517763815, + "grad_norm": 0.0, + "learning_rate": 1.9513968185535756e-05, + "loss": 1.5068, + "step": 4314 + }, + { + "epoch": 0.12669563685477714, + "grad_norm": 0.0, + "learning_rate": 1.95136752774583e-05, + "loss": 1.3262, + "step": 4315 + }, + { + "epoch": 0.12672499853191616, + "grad_norm": 0.0, + "learning_rate": 1.9513382283346242e-05, + "loss": 1.4551, + "step": 4316 + }, + { + "epoch": 0.12675436020905514, + "grad_norm": 0.0, + "learning_rate": 1.9513089203202233e-05, + "loss": 1.3623, + "step": 4317 + }, + { + "epoch": 0.12678372188619413, + "grad_norm": 0.0, + "learning_rate": 1.951279603702892e-05, + "loss": 1.4736, + "step": 4318 + }, + { + "epoch": 0.12681308356333315, + "grad_norm": 0.0, + "learning_rate": 1.9512502784828954e-05, + "loss": 1.5107, + "step": 4319 + }, + { + "epoch": 0.12684244524047214, + "grad_norm": 0.0, + "learning_rate": 1.9512209446604997e-05, + "loss": 1.4902, + "step": 4320 + }, + { + "epoch": 0.12687180691761113, + "grad_norm": 0.0, + "learning_rate": 1.9511916022359683e-05, + "loss": 1.4951, + "step": 4321 + }, + { + "epoch": 0.12690116859475012, + "grad_norm": 0.0, + "learning_rate": 1.9511622512095684e-05, + "loss": 1.5723, + "step": 4322 + }, + { + "epoch": 0.12693053027188914, + "grad_norm": 0.0, + "learning_rate": 1.9511328915815645e-05, + "loss": 1.4453, + "step": 4323 + }, + { + "epoch": 0.12695989194902813, + "grad_norm": 0.0, + "learning_rate": 1.9511035233522222e-05, + "loss": 1.5039, + "step": 4324 + }, + { + "epoch": 0.12698925362616711, + "grad_norm": 0.0, + "learning_rate": 1.951074146521807e-05, + "loss": 1.4502, + "step": 4325 + }, + { + "epoch": 0.12701861530330613, + "grad_norm": 0.0, + "learning_rate": 1.951044761090585e-05, + "loss": 1.4092, + "step": 4326 + }, + { + "epoch": 0.12704797698044512, + "grad_norm": 0.0, + "learning_rate": 1.9510153670588215e-05, + "loss": 1.3447, + "step": 4327 + }, + { + "epoch": 0.1270773386575841, + "grad_norm": 0.0, + "learning_rate": 1.9509859644267827e-05, + "loss": 1.5498, + "step": 4328 + }, + { + "epoch": 0.12710670033472313, + "grad_norm": 0.0, + "learning_rate": 1.950956553194734e-05, + "loss": 1.5293, + "step": 4329 + }, + { + "epoch": 0.12713606201186212, + "grad_norm": 0.0, + "learning_rate": 1.950927133362942e-05, + "loss": 1.4639, + "step": 4330 + }, + { + "epoch": 0.1271654236890011, + "grad_norm": 0.0, + "learning_rate": 1.950897704931672e-05, + "loss": 1.4023, + "step": 4331 + }, + { + "epoch": 0.12719478536614012, + "grad_norm": 0.0, + "learning_rate": 1.9508682679011906e-05, + "loss": 1.4648, + "step": 4332 + }, + { + "epoch": 0.1272241470432791, + "grad_norm": 0.0, + "learning_rate": 1.950838822271764e-05, + "loss": 1.3672, + "step": 4333 + }, + { + "epoch": 0.1272535087204181, + "grad_norm": 0.0, + "learning_rate": 1.9508093680436584e-05, + "loss": 1.3892, + "step": 4334 + }, + { + "epoch": 0.12728287039755712, + "grad_norm": 0.0, + "learning_rate": 1.9507799052171402e-05, + "loss": 1.3682, + "step": 4335 + }, + { + "epoch": 0.1273122320746961, + "grad_norm": 0.0, + "learning_rate": 1.950750433792476e-05, + "loss": 1.3643, + "step": 4336 + }, + { + "epoch": 0.1273415937518351, + "grad_norm": 0.0, + "learning_rate": 1.9507209537699317e-05, + "loss": 1.5723, + "step": 4337 + }, + { + "epoch": 0.12737095542897411, + "grad_norm": 0.0, + "learning_rate": 1.950691465149775e-05, + "loss": 1.3057, + "step": 4338 + }, + { + "epoch": 0.1274003171061131, + "grad_norm": 0.0, + "learning_rate": 1.950661967932271e-05, + "loss": 1.5264, + "step": 4339 + }, + { + "epoch": 0.1274296787832521, + "grad_norm": 0.0, + "learning_rate": 1.950632462117688e-05, + "loss": 1.4346, + "step": 4340 + }, + { + "epoch": 0.1274590404603911, + "grad_norm": 0.0, + "learning_rate": 1.9506029477062924e-05, + "loss": 1.5898, + "step": 4341 + }, + { + "epoch": 0.1274884021375301, + "grad_norm": 0.0, + "learning_rate": 1.9505734246983503e-05, + "loss": 1.5586, + "step": 4342 + }, + { + "epoch": 0.1275177638146691, + "grad_norm": 0.0, + "learning_rate": 1.9505438930941293e-05, + "loss": 1.4414, + "step": 4343 + }, + { + "epoch": 0.1275471254918081, + "grad_norm": 0.0, + "learning_rate": 1.9505143528938965e-05, + "loss": 1.4883, + "step": 4344 + }, + { + "epoch": 0.1275764871689471, + "grad_norm": 0.0, + "learning_rate": 1.9504848040979194e-05, + "loss": 1.5059, + "step": 4345 + }, + { + "epoch": 0.12760584884608608, + "grad_norm": 0.0, + "learning_rate": 1.9504552467064645e-05, + "loss": 1.5479, + "step": 4346 + }, + { + "epoch": 0.12763521052322507, + "grad_norm": 0.0, + "learning_rate": 1.9504256807197996e-05, + "loss": 1.4736, + "step": 4347 + }, + { + "epoch": 0.1276645722003641, + "grad_norm": 0.0, + "learning_rate": 1.9503961061381914e-05, + "loss": 1.5078, + "step": 4348 + }, + { + "epoch": 0.12769393387750308, + "grad_norm": 0.0, + "learning_rate": 1.950366522961908e-05, + "loss": 1.3721, + "step": 4349 + }, + { + "epoch": 0.12772329555464207, + "grad_norm": 0.0, + "learning_rate": 1.9503369311912167e-05, + "loss": 1.4395, + "step": 4350 + }, + { + "epoch": 0.12775265723178109, + "grad_norm": 0.0, + "learning_rate": 1.9503073308263857e-05, + "loss": 1.5186, + "step": 4351 + }, + { + "epoch": 0.12778201890892008, + "grad_norm": 0.0, + "learning_rate": 1.9502777218676815e-05, + "loss": 1.5225, + "step": 4352 + }, + { + "epoch": 0.12781138058605906, + "grad_norm": 0.0, + "learning_rate": 1.9502481043153726e-05, + "loss": 1.3242, + "step": 4353 + }, + { + "epoch": 0.12784074226319808, + "grad_norm": 0.0, + "learning_rate": 1.9502184781697265e-05, + "loss": 1.373, + "step": 4354 + }, + { + "epoch": 0.12787010394033707, + "grad_norm": 0.0, + "learning_rate": 1.950188843431012e-05, + "loss": 1.5293, + "step": 4355 + }, + { + "epoch": 0.12789946561747606, + "grad_norm": 0.0, + "learning_rate": 1.950159200099496e-05, + "loss": 1.5859, + "step": 4356 + }, + { + "epoch": 0.12792882729461508, + "grad_norm": 0.0, + "learning_rate": 1.950129548175447e-05, + "loss": 1.6562, + "step": 4357 + }, + { + "epoch": 0.12795818897175407, + "grad_norm": 0.0, + "learning_rate": 1.9500998876591334e-05, + "loss": 1.4355, + "step": 4358 + }, + { + "epoch": 0.12798755064889306, + "grad_norm": 0.0, + "learning_rate": 1.9500702185508232e-05, + "loss": 1.5195, + "step": 4359 + }, + { + "epoch": 0.12801691232603207, + "grad_norm": 0.0, + "learning_rate": 1.9500405408507843e-05, + "loss": 1.6465, + "step": 4360 + }, + { + "epoch": 0.12804627400317106, + "grad_norm": 0.0, + "learning_rate": 1.950010854559286e-05, + "loss": 1.3862, + "step": 4361 + }, + { + "epoch": 0.12807563568031005, + "grad_norm": 0.0, + "learning_rate": 1.949981159676596e-05, + "loss": 1.4019, + "step": 4362 + }, + { + "epoch": 0.12810499735744907, + "grad_norm": 0.0, + "learning_rate": 1.949951456202983e-05, + "loss": 1.5, + "step": 4363 + }, + { + "epoch": 0.12813435903458806, + "grad_norm": 0.0, + "learning_rate": 1.9499217441387156e-05, + "loss": 1.4443, + "step": 4364 + }, + { + "epoch": 0.12816372071172705, + "grad_norm": 0.0, + "learning_rate": 1.949892023484063e-05, + "loss": 1.4248, + "step": 4365 + }, + { + "epoch": 0.12819308238886606, + "grad_norm": 0.0, + "learning_rate": 1.949862294239293e-05, + "loss": 1.3955, + "step": 4366 + }, + { + "epoch": 0.12822244406600505, + "grad_norm": 0.0, + "learning_rate": 1.9498325564046756e-05, + "loss": 1.4043, + "step": 4367 + }, + { + "epoch": 0.12825180574314404, + "grad_norm": 0.0, + "learning_rate": 1.9498028099804786e-05, + "loss": 1.54, + "step": 4368 + }, + { + "epoch": 0.12828116742028306, + "grad_norm": 0.0, + "learning_rate": 1.949773054966972e-05, + "loss": 1.4229, + "step": 4369 + }, + { + "epoch": 0.12831052909742205, + "grad_norm": 0.0, + "learning_rate": 1.949743291364424e-05, + "loss": 1.6074, + "step": 4370 + }, + { + "epoch": 0.12833989077456104, + "grad_norm": 0.0, + "learning_rate": 1.9497135191731043e-05, + "loss": 1.667, + "step": 4371 + }, + { + "epoch": 0.12836925245170003, + "grad_norm": 0.0, + "learning_rate": 1.9496837383932822e-05, + "loss": 1.4072, + "step": 4372 + }, + { + "epoch": 0.12839861412883904, + "grad_norm": 0.0, + "learning_rate": 1.9496539490252267e-05, + "loss": 1.54, + "step": 4373 + }, + { + "epoch": 0.12842797580597803, + "grad_norm": 0.0, + "learning_rate": 1.9496241510692075e-05, + "loss": 1.5156, + "step": 4374 + }, + { + "epoch": 0.12845733748311702, + "grad_norm": 0.0, + "learning_rate": 1.949594344525494e-05, + "loss": 1.4258, + "step": 4375 + }, + { + "epoch": 0.12848669916025604, + "grad_norm": 0.0, + "learning_rate": 1.9495645293943553e-05, + "loss": 1.5215, + "step": 4376 + }, + { + "epoch": 0.12851606083739503, + "grad_norm": 0.0, + "learning_rate": 1.9495347056760618e-05, + "loss": 1.4116, + "step": 4377 + }, + { + "epoch": 0.12854542251453402, + "grad_norm": 0.0, + "learning_rate": 1.9495048733708823e-05, + "loss": 1.5439, + "step": 4378 + }, + { + "epoch": 0.12857478419167304, + "grad_norm": 0.0, + "learning_rate": 1.9494750324790872e-05, + "loss": 1.4463, + "step": 4379 + }, + { + "epoch": 0.12860414586881203, + "grad_norm": 0.0, + "learning_rate": 1.9494451830009462e-05, + "loss": 1.4785, + "step": 4380 + }, + { + "epoch": 0.12863350754595101, + "grad_norm": 0.0, + "learning_rate": 1.9494153249367294e-05, + "loss": 1.4971, + "step": 4381 + }, + { + "epoch": 0.12866286922309003, + "grad_norm": 0.0, + "learning_rate": 1.9493854582867065e-05, + "loss": 1.4893, + "step": 4382 + }, + { + "epoch": 0.12869223090022902, + "grad_norm": 0.0, + "learning_rate": 1.9493555830511478e-05, + "loss": 1.4688, + "step": 4383 + }, + { + "epoch": 0.128721592577368, + "grad_norm": 0.0, + "learning_rate": 1.9493256992303236e-05, + "loss": 1.6621, + "step": 4384 + }, + { + "epoch": 0.12875095425450703, + "grad_norm": 0.0, + "learning_rate": 1.9492958068245034e-05, + "loss": 1.5176, + "step": 4385 + }, + { + "epoch": 0.12878031593164602, + "grad_norm": 0.0, + "learning_rate": 1.949265905833959e-05, + "loss": 1.4785, + "step": 4386 + }, + { + "epoch": 0.128809677608785, + "grad_norm": 0.0, + "learning_rate": 1.9492359962589594e-05, + "loss": 1.4229, + "step": 4387 + }, + { + "epoch": 0.12883903928592402, + "grad_norm": 0.0, + "learning_rate": 1.9492060780997754e-05, + "loss": 1.4658, + "step": 4388 + }, + { + "epoch": 0.128868400963063, + "grad_norm": 0.0, + "learning_rate": 1.949176151356678e-05, + "loss": 1.3799, + "step": 4389 + }, + { + "epoch": 0.128897762640202, + "grad_norm": 0.0, + "learning_rate": 1.9491462160299377e-05, + "loss": 1.5205, + "step": 4390 + }, + { + "epoch": 0.12892712431734102, + "grad_norm": 0.0, + "learning_rate": 1.9491162721198248e-05, + "loss": 1.4932, + "step": 4391 + }, + { + "epoch": 0.12895648599448, + "grad_norm": 0.0, + "learning_rate": 1.9490863196266105e-05, + "loss": 1.5957, + "step": 4392 + }, + { + "epoch": 0.128985847671619, + "grad_norm": 0.0, + "learning_rate": 1.9490563585505658e-05, + "loss": 1.3789, + "step": 4393 + }, + { + "epoch": 0.12901520934875801, + "grad_norm": 0.0, + "learning_rate": 1.9490263888919608e-05, + "loss": 1.5068, + "step": 4394 + }, + { + "epoch": 0.129044571025897, + "grad_norm": 0.0, + "learning_rate": 1.9489964106510676e-05, + "loss": 1.3848, + "step": 4395 + }, + { + "epoch": 0.129073932703036, + "grad_norm": 0.0, + "learning_rate": 1.9489664238281568e-05, + "loss": 1.4766, + "step": 4396 + }, + { + "epoch": 0.12910329438017498, + "grad_norm": 0.0, + "learning_rate": 1.9489364284234995e-05, + "loss": 1.5088, + "step": 4397 + }, + { + "epoch": 0.129132656057314, + "grad_norm": 0.0, + "learning_rate": 1.948906424437367e-05, + "loss": 1.4863, + "step": 4398 + }, + { + "epoch": 0.129162017734453, + "grad_norm": 0.0, + "learning_rate": 1.9488764118700308e-05, + "loss": 1.502, + "step": 4399 + }, + { + "epoch": 0.12919137941159198, + "grad_norm": 0.0, + "learning_rate": 1.948846390721762e-05, + "loss": 1.4863, + "step": 4400 + }, + { + "epoch": 0.129220741088731, + "grad_norm": 0.0, + "learning_rate": 1.9488163609928327e-05, + "loss": 1.5537, + "step": 4401 + }, + { + "epoch": 0.12925010276586998, + "grad_norm": 0.0, + "learning_rate": 1.9487863226835134e-05, + "loss": 1.3877, + "step": 4402 + }, + { + "epoch": 0.12927946444300897, + "grad_norm": 0.0, + "learning_rate": 1.948756275794077e-05, + "loss": 1.6406, + "step": 4403 + }, + { + "epoch": 0.129308826120148, + "grad_norm": 0.0, + "learning_rate": 1.9487262203247943e-05, + "loss": 1.4199, + "step": 4404 + }, + { + "epoch": 0.12933818779728698, + "grad_norm": 0.0, + "learning_rate": 1.9486961562759374e-05, + "loss": 1.3154, + "step": 4405 + }, + { + "epoch": 0.12936754947442597, + "grad_norm": 0.0, + "learning_rate": 1.9486660836477783e-05, + "loss": 1.4414, + "step": 4406 + }, + { + "epoch": 0.129396911151565, + "grad_norm": 0.0, + "learning_rate": 1.9486360024405886e-05, + "loss": 1.4717, + "step": 4407 + }, + { + "epoch": 0.12942627282870398, + "grad_norm": 0.0, + "learning_rate": 1.948605912654641e-05, + "loss": 1.5186, + "step": 4408 + }, + { + "epoch": 0.12945563450584296, + "grad_norm": 0.0, + "learning_rate": 1.948575814290207e-05, + "loss": 1.5332, + "step": 4409 + }, + { + "epoch": 0.12948499618298198, + "grad_norm": 0.0, + "learning_rate": 1.9485457073475593e-05, + "loss": 1.3496, + "step": 4410 + }, + { + "epoch": 0.12951435786012097, + "grad_norm": 0.0, + "learning_rate": 1.9485155918269693e-05, + "loss": 1.5322, + "step": 4411 + }, + { + "epoch": 0.12954371953725996, + "grad_norm": 0.0, + "learning_rate": 1.94848546772871e-05, + "loss": 1.3945, + "step": 4412 + }, + { + "epoch": 0.12957308121439898, + "grad_norm": 0.0, + "learning_rate": 1.948455335053054e-05, + "loss": 1.5049, + "step": 4413 + }, + { + "epoch": 0.12960244289153797, + "grad_norm": 0.0, + "learning_rate": 1.9484251938002732e-05, + "loss": 1.4902, + "step": 4414 + }, + { + "epoch": 0.12963180456867696, + "grad_norm": 0.0, + "learning_rate": 1.9483950439706406e-05, + "loss": 1.4316, + "step": 4415 + }, + { + "epoch": 0.12966116624581597, + "grad_norm": 0.0, + "learning_rate": 1.948364885564429e-05, + "loss": 1.5, + "step": 4416 + }, + { + "epoch": 0.12969052792295496, + "grad_norm": 0.0, + "learning_rate": 1.9483347185819102e-05, + "loss": 1.4424, + "step": 4417 + }, + { + "epoch": 0.12971988960009395, + "grad_norm": 0.0, + "learning_rate": 1.9483045430233583e-05, + "loss": 1.4189, + "step": 4418 + }, + { + "epoch": 0.12974925127723297, + "grad_norm": 0.0, + "learning_rate": 1.9482743588890448e-05, + "loss": 1.4248, + "step": 4419 + }, + { + "epoch": 0.12977861295437196, + "grad_norm": 0.0, + "learning_rate": 1.9482441661792443e-05, + "loss": 1.3613, + "step": 4420 + }, + { + "epoch": 0.12980797463151095, + "grad_norm": 0.0, + "learning_rate": 1.9482139648942283e-05, + "loss": 1.3574, + "step": 4421 + }, + { + "epoch": 0.12983733630864994, + "grad_norm": 0.0, + "learning_rate": 1.948183755034271e-05, + "loss": 1.4512, + "step": 4422 + }, + { + "epoch": 0.12986669798578895, + "grad_norm": 0.0, + "learning_rate": 1.948153536599645e-05, + "loss": 1.3682, + "step": 4423 + }, + { + "epoch": 0.12989605966292794, + "grad_norm": 0.0, + "learning_rate": 1.9481233095906237e-05, + "loss": 1.4893, + "step": 4424 + }, + { + "epoch": 0.12992542134006693, + "grad_norm": 0.0, + "learning_rate": 1.9480930740074808e-05, + "loss": 1.5186, + "step": 4425 + }, + { + "epoch": 0.12995478301720595, + "grad_norm": 0.0, + "learning_rate": 1.948062829850489e-05, + "loss": 1.459, + "step": 4426 + }, + { + "epoch": 0.12998414469434494, + "grad_norm": 0.0, + "learning_rate": 1.9480325771199225e-05, + "loss": 1.5449, + "step": 4427 + }, + { + "epoch": 0.13001350637148393, + "grad_norm": 0.0, + "learning_rate": 1.9480023158160543e-05, + "loss": 1.4951, + "step": 4428 + }, + { + "epoch": 0.13004286804862294, + "grad_norm": 0.0, + "learning_rate": 1.9479720459391586e-05, + "loss": 1.4824, + "step": 4429 + }, + { + "epoch": 0.13007222972576193, + "grad_norm": 0.0, + "learning_rate": 1.9479417674895088e-05, + "loss": 1.4844, + "step": 4430 + }, + { + "epoch": 0.13010159140290092, + "grad_norm": 0.0, + "learning_rate": 1.947911480467379e-05, + "loss": 1.5312, + "step": 4431 + }, + { + "epoch": 0.13013095308003994, + "grad_norm": 0.0, + "learning_rate": 1.947881184873043e-05, + "loss": 1.5879, + "step": 4432 + }, + { + "epoch": 0.13016031475717893, + "grad_norm": 0.0, + "learning_rate": 1.9478508807067743e-05, + "loss": 1.4473, + "step": 4433 + }, + { + "epoch": 0.13018967643431792, + "grad_norm": 0.0, + "learning_rate": 1.9478205679688476e-05, + "loss": 1.5576, + "step": 4434 + }, + { + "epoch": 0.13021903811145694, + "grad_norm": 0.0, + "learning_rate": 1.9477902466595367e-05, + "loss": 1.291, + "step": 4435 + }, + { + "epoch": 0.13024839978859593, + "grad_norm": 0.0, + "learning_rate": 1.947759916779116e-05, + "loss": 1.6182, + "step": 4436 + }, + { + "epoch": 0.13027776146573491, + "grad_norm": 0.0, + "learning_rate": 1.9477295783278597e-05, + "loss": 1.54, + "step": 4437 + }, + { + "epoch": 0.13030712314287393, + "grad_norm": 0.0, + "learning_rate": 1.9476992313060418e-05, + "loss": 1.4355, + "step": 4438 + }, + { + "epoch": 0.13033648482001292, + "grad_norm": 0.0, + "learning_rate": 1.9476688757139373e-05, + "loss": 1.4111, + "step": 4439 + }, + { + "epoch": 0.1303658464971519, + "grad_norm": 0.0, + "learning_rate": 1.9476385115518202e-05, + "loss": 1.3242, + "step": 4440 + }, + { + "epoch": 0.13039520817429093, + "grad_norm": 0.0, + "learning_rate": 1.9476081388199655e-05, + "loss": 1.5, + "step": 4441 + }, + { + "epoch": 0.13042456985142992, + "grad_norm": 0.0, + "learning_rate": 1.9475777575186476e-05, + "loss": 1.5684, + "step": 4442 + }, + { + "epoch": 0.1304539315285689, + "grad_norm": 0.0, + "learning_rate": 1.9475473676481416e-05, + "loss": 1.585, + "step": 4443 + }, + { + "epoch": 0.13048329320570792, + "grad_norm": 0.0, + "learning_rate": 1.947516969208722e-05, + "loss": 1.4902, + "step": 4444 + }, + { + "epoch": 0.1305126548828469, + "grad_norm": 0.0, + "learning_rate": 1.9474865622006637e-05, + "loss": 1.4521, + "step": 4445 + }, + { + "epoch": 0.1305420165599859, + "grad_norm": 0.0, + "learning_rate": 1.9474561466242416e-05, + "loss": 1.4434, + "step": 4446 + }, + { + "epoch": 0.1305713782371249, + "grad_norm": 0.0, + "learning_rate": 1.9474257224797313e-05, + "loss": 1.4531, + "step": 4447 + }, + { + "epoch": 0.1306007399142639, + "grad_norm": 0.0, + "learning_rate": 1.9473952897674075e-05, + "loss": 1.4971, + "step": 4448 + }, + { + "epoch": 0.1306301015914029, + "grad_norm": 0.0, + "learning_rate": 1.9473648484875453e-05, + "loss": 1.5469, + "step": 4449 + }, + { + "epoch": 0.1306594632685419, + "grad_norm": 0.0, + "learning_rate": 1.9473343986404202e-05, + "loss": 1.4834, + "step": 4450 + }, + { + "epoch": 0.1306888249456809, + "grad_norm": 0.0, + "learning_rate": 1.9473039402263073e-05, + "loss": 1.5469, + "step": 4451 + }, + { + "epoch": 0.1307181866228199, + "grad_norm": 0.0, + "learning_rate": 1.9472734732454825e-05, + "loss": 1.4727, + "step": 4452 + }, + { + "epoch": 0.13074754829995888, + "grad_norm": 0.0, + "learning_rate": 1.947242997698221e-05, + "loss": 1.4961, + "step": 4453 + }, + { + "epoch": 0.1307769099770979, + "grad_norm": 0.0, + "learning_rate": 1.9472125135847982e-05, + "loss": 1.46, + "step": 4454 + }, + { + "epoch": 0.1308062716542369, + "grad_norm": 0.0, + "learning_rate": 1.9471820209054904e-05, + "loss": 1.5098, + "step": 4455 + }, + { + "epoch": 0.13083563333137588, + "grad_norm": 0.0, + "learning_rate": 1.947151519660573e-05, + "loss": 1.3193, + "step": 4456 + }, + { + "epoch": 0.1308649950085149, + "grad_norm": 0.0, + "learning_rate": 1.947121009850322e-05, + "loss": 1.3418, + "step": 4457 + }, + { + "epoch": 0.13089435668565388, + "grad_norm": 0.0, + "learning_rate": 1.947090491475013e-05, + "loss": 1.5811, + "step": 4458 + }, + { + "epoch": 0.13092371836279287, + "grad_norm": 0.0, + "learning_rate": 1.9470599645349222e-05, + "loss": 1.4512, + "step": 4459 + }, + { + "epoch": 0.1309530800399319, + "grad_norm": 0.0, + "learning_rate": 1.9470294290303252e-05, + "loss": 1.5459, + "step": 4460 + }, + { + "epoch": 0.13098244171707088, + "grad_norm": 0.0, + "learning_rate": 1.946998884961499e-05, + "loss": 1.4775, + "step": 4461 + }, + { + "epoch": 0.13101180339420987, + "grad_norm": 0.0, + "learning_rate": 1.946968332328719e-05, + "loss": 1.3994, + "step": 4462 + }, + { + "epoch": 0.1310411650713489, + "grad_norm": 0.0, + "learning_rate": 1.9469377711322623e-05, + "loss": 1.541, + "step": 4463 + }, + { + "epoch": 0.13107052674848788, + "grad_norm": 0.0, + "learning_rate": 1.9469072013724048e-05, + "loss": 1.5186, + "step": 4464 + }, + { + "epoch": 0.13109988842562686, + "grad_norm": 0.0, + "learning_rate": 1.9468766230494225e-05, + "loss": 1.4199, + "step": 4465 + }, + { + "epoch": 0.13112925010276588, + "grad_norm": 0.0, + "learning_rate": 1.9468460361635925e-05, + "loss": 1.4258, + "step": 4466 + }, + { + "epoch": 0.13115861177990487, + "grad_norm": 0.0, + "learning_rate": 1.9468154407151914e-05, + "loss": 1.5664, + "step": 4467 + }, + { + "epoch": 0.13118797345704386, + "grad_norm": 0.0, + "learning_rate": 1.9467848367044958e-05, + "loss": 1.4502, + "step": 4468 + }, + { + "epoch": 0.13121733513418288, + "grad_norm": 0.0, + "learning_rate": 1.9467542241317824e-05, + "loss": 1.4326, + "step": 4469 + }, + { + "epoch": 0.13124669681132187, + "grad_norm": 0.0, + "learning_rate": 1.946723602997328e-05, + "loss": 1.5957, + "step": 4470 + }, + { + "epoch": 0.13127605848846086, + "grad_norm": 0.0, + "learning_rate": 1.94669297330141e-05, + "loss": 1.4795, + "step": 4471 + }, + { + "epoch": 0.13130542016559985, + "grad_norm": 0.0, + "learning_rate": 1.9466623350443047e-05, + "loss": 1.5029, + "step": 4472 + }, + { + "epoch": 0.13133478184273886, + "grad_norm": 0.0, + "learning_rate": 1.9466316882262895e-05, + "loss": 1.5283, + "step": 4473 + }, + { + "epoch": 0.13136414351987785, + "grad_norm": 0.0, + "learning_rate": 1.9466010328476416e-05, + "loss": 1.4385, + "step": 4474 + }, + { + "epoch": 0.13139350519701684, + "grad_norm": 0.0, + "learning_rate": 1.946570368908638e-05, + "loss": 1.3887, + "step": 4475 + }, + { + "epoch": 0.13142286687415586, + "grad_norm": 0.0, + "learning_rate": 1.946539696409556e-05, + "loss": 1.5342, + "step": 4476 + }, + { + "epoch": 0.13145222855129485, + "grad_norm": 0.0, + "learning_rate": 1.9465090153506734e-05, + "loss": 1.5303, + "step": 4477 + }, + { + "epoch": 0.13148159022843384, + "grad_norm": 0.0, + "learning_rate": 1.9464783257322674e-05, + "loss": 1.415, + "step": 4478 + }, + { + "epoch": 0.13151095190557285, + "grad_norm": 0.0, + "learning_rate": 1.946447627554615e-05, + "loss": 1.4951, + "step": 4479 + }, + { + "epoch": 0.13154031358271184, + "grad_norm": 0.0, + "learning_rate": 1.9464169208179947e-05, + "loss": 1.4619, + "step": 4480 + }, + { + "epoch": 0.13156967525985083, + "grad_norm": 0.0, + "learning_rate": 1.9463862055226837e-05, + "loss": 1.4316, + "step": 4481 + }, + { + "epoch": 0.13159903693698985, + "grad_norm": 0.0, + "learning_rate": 1.94635548166896e-05, + "loss": 1.5566, + "step": 4482 + }, + { + "epoch": 0.13162839861412884, + "grad_norm": 0.0, + "learning_rate": 1.9463247492571013e-05, + "loss": 1.5234, + "step": 4483 + }, + { + "epoch": 0.13165776029126783, + "grad_norm": 0.0, + "learning_rate": 1.9462940082873855e-05, + "loss": 1.2705, + "step": 4484 + }, + { + "epoch": 0.13168712196840685, + "grad_norm": 0.0, + "learning_rate": 1.9462632587600908e-05, + "loss": 1.5361, + "step": 4485 + }, + { + "epoch": 0.13171648364554583, + "grad_norm": 0.0, + "learning_rate": 1.946232500675495e-05, + "loss": 1.3857, + "step": 4486 + }, + { + "epoch": 0.13174584532268482, + "grad_norm": 0.0, + "learning_rate": 1.946201734033876e-05, + "loss": 1.4463, + "step": 4487 + }, + { + "epoch": 0.13177520699982384, + "grad_norm": 0.0, + "learning_rate": 1.9461709588355128e-05, + "loss": 1.3203, + "step": 4488 + }, + { + "epoch": 0.13180456867696283, + "grad_norm": 0.0, + "learning_rate": 1.9461401750806832e-05, + "loss": 1.5752, + "step": 4489 + }, + { + "epoch": 0.13183393035410182, + "grad_norm": 0.0, + "learning_rate": 1.946109382769666e-05, + "loss": 1.4902, + "step": 4490 + }, + { + "epoch": 0.13186329203124084, + "grad_norm": 0.0, + "learning_rate": 1.9460785819027386e-05, + "loss": 1.4131, + "step": 4491 + }, + { + "epoch": 0.13189265370837983, + "grad_norm": 0.0, + "learning_rate": 1.946047772480181e-05, + "loss": 1.3491, + "step": 4492 + }, + { + "epoch": 0.13192201538551881, + "grad_norm": 0.0, + "learning_rate": 1.9460169545022705e-05, + "loss": 1.4229, + "step": 4493 + }, + { + "epoch": 0.13195137706265783, + "grad_norm": 0.0, + "learning_rate": 1.9459861279692868e-05, + "loss": 1.4326, + "step": 4494 + }, + { + "epoch": 0.13198073873979682, + "grad_norm": 0.0, + "learning_rate": 1.945955292881508e-05, + "loss": 1.418, + "step": 4495 + }, + { + "epoch": 0.1320101004169358, + "grad_norm": 0.0, + "learning_rate": 1.9459244492392136e-05, + "loss": 1.4941, + "step": 4496 + }, + { + "epoch": 0.1320394620940748, + "grad_norm": 0.0, + "learning_rate": 1.945893597042682e-05, + "loss": 1.4443, + "step": 4497 + }, + { + "epoch": 0.13206882377121382, + "grad_norm": 0.0, + "learning_rate": 1.9458627362921923e-05, + "loss": 1.582, + "step": 4498 + }, + { + "epoch": 0.1320981854483528, + "grad_norm": 0.0, + "learning_rate": 1.9458318669880237e-05, + "loss": 1.5498, + "step": 4499 + }, + { + "epoch": 0.1321275471254918, + "grad_norm": 0.0, + "learning_rate": 1.945800989130455e-05, + "loss": 1.4551, + "step": 4500 + }, + { + "epoch": 0.1321569088026308, + "grad_norm": 0.0, + "learning_rate": 1.9457701027197663e-05, + "loss": 1.5459, + "step": 4501 + }, + { + "epoch": 0.1321862704797698, + "grad_norm": 0.0, + "learning_rate": 1.945739207756236e-05, + "loss": 1.6143, + "step": 4502 + }, + { + "epoch": 0.1322156321569088, + "grad_norm": 0.0, + "learning_rate": 1.945708304240144e-05, + "loss": 1.5098, + "step": 4503 + }, + { + "epoch": 0.1322449938340478, + "grad_norm": 0.0, + "learning_rate": 1.9456773921717695e-05, + "loss": 1.5283, + "step": 4504 + }, + { + "epoch": 0.1322743555111868, + "grad_norm": 0.0, + "learning_rate": 1.945646471551392e-05, + "loss": 1.4219, + "step": 4505 + }, + { + "epoch": 0.1323037171883258, + "grad_norm": 0.0, + "learning_rate": 1.9456155423792917e-05, + "loss": 1.5576, + "step": 4506 + }, + { + "epoch": 0.1323330788654648, + "grad_norm": 0.0, + "learning_rate": 1.945584604655747e-05, + "loss": 1.4912, + "step": 4507 + }, + { + "epoch": 0.1323624405426038, + "grad_norm": 0.0, + "learning_rate": 1.9455536583810393e-05, + "loss": 1.4346, + "step": 4508 + }, + { + "epoch": 0.13239180221974278, + "grad_norm": 0.0, + "learning_rate": 1.9455227035554476e-05, + "loss": 1.4219, + "step": 4509 + }, + { + "epoch": 0.1324211638968818, + "grad_norm": 0.0, + "learning_rate": 1.945491740179252e-05, + "loss": 1.4658, + "step": 4510 + }, + { + "epoch": 0.1324505255740208, + "grad_norm": 0.0, + "learning_rate": 1.9454607682527322e-05, + "loss": 1.5625, + "step": 4511 + }, + { + "epoch": 0.13247988725115978, + "grad_norm": 0.0, + "learning_rate": 1.9454297877761686e-05, + "loss": 1.5225, + "step": 4512 + }, + { + "epoch": 0.1325092489282988, + "grad_norm": 0.0, + "learning_rate": 1.9453987987498415e-05, + "loss": 1.5332, + "step": 4513 + }, + { + "epoch": 0.13253861060543778, + "grad_norm": 0.0, + "learning_rate": 1.945367801174031e-05, + "loss": 1.4668, + "step": 4514 + }, + { + "epoch": 0.13256797228257677, + "grad_norm": 0.0, + "learning_rate": 1.945336795049017e-05, + "loss": 1.4336, + "step": 4515 + }, + { + "epoch": 0.1325973339597158, + "grad_norm": 0.0, + "learning_rate": 1.9453057803750805e-05, + "loss": 1.4365, + "step": 4516 + }, + { + "epoch": 0.13262669563685478, + "grad_norm": 0.0, + "learning_rate": 1.9452747571525015e-05, + "loss": 1.4258, + "step": 4517 + }, + { + "epoch": 0.13265605731399377, + "grad_norm": 0.0, + "learning_rate": 1.945243725381561e-05, + "loss": 1.4053, + "step": 4518 + }, + { + "epoch": 0.1326854189911328, + "grad_norm": 0.0, + "learning_rate": 1.945212685062539e-05, + "loss": 1.4307, + "step": 4519 + }, + { + "epoch": 0.13271478066827178, + "grad_norm": 0.0, + "learning_rate": 1.945181636195717e-05, + "loss": 1.5352, + "step": 4520 + }, + { + "epoch": 0.13274414234541076, + "grad_norm": 0.0, + "learning_rate": 1.9451505787813756e-05, + "loss": 1.4854, + "step": 4521 + }, + { + "epoch": 0.13277350402254975, + "grad_norm": 0.0, + "learning_rate": 1.9451195128197953e-05, + "loss": 1.4463, + "step": 4522 + }, + { + "epoch": 0.13280286569968877, + "grad_norm": 0.0, + "learning_rate": 1.945088438311257e-05, + "loss": 1.4746, + "step": 4523 + }, + { + "epoch": 0.13283222737682776, + "grad_norm": 0.0, + "learning_rate": 1.945057355256042e-05, + "loss": 1.4424, + "step": 4524 + }, + { + "epoch": 0.13286158905396675, + "grad_norm": 0.0, + "learning_rate": 1.9450262636544313e-05, + "loss": 1.4746, + "step": 4525 + }, + { + "epoch": 0.13289095073110577, + "grad_norm": 0.0, + "learning_rate": 1.944995163506706e-05, + "loss": 1.4697, + "step": 4526 + }, + { + "epoch": 0.13292031240824476, + "grad_norm": 0.0, + "learning_rate": 1.9449640548131477e-05, + "loss": 1.4873, + "step": 4527 + }, + { + "epoch": 0.13294967408538375, + "grad_norm": 0.0, + "learning_rate": 1.9449329375740373e-05, + "loss": 1.4814, + "step": 4528 + }, + { + "epoch": 0.13297903576252276, + "grad_norm": 0.0, + "learning_rate": 1.9449018117896563e-05, + "loss": 1.4512, + "step": 4529 + }, + { + "epoch": 0.13300839743966175, + "grad_norm": 0.0, + "learning_rate": 1.9448706774602864e-05, + "loss": 1.3623, + "step": 4530 + }, + { + "epoch": 0.13303775911680074, + "grad_norm": 0.0, + "learning_rate": 1.9448395345862087e-05, + "loss": 1.6475, + "step": 4531 + }, + { + "epoch": 0.13306712079393976, + "grad_norm": 0.0, + "learning_rate": 1.9448083831677054e-05, + "loss": 1.501, + "step": 4532 + }, + { + "epoch": 0.13309648247107875, + "grad_norm": 0.0, + "learning_rate": 1.9447772232050575e-05, + "loss": 1.4443, + "step": 4533 + }, + { + "epoch": 0.13312584414821774, + "grad_norm": 0.0, + "learning_rate": 1.9447460546985475e-05, + "loss": 1.2998, + "step": 4534 + }, + { + "epoch": 0.13315520582535675, + "grad_norm": 0.0, + "learning_rate": 1.944714877648457e-05, + "loss": 1.4404, + "step": 4535 + }, + { + "epoch": 0.13318456750249574, + "grad_norm": 0.0, + "learning_rate": 1.944683692055068e-05, + "loss": 1.4951, + "step": 4536 + }, + { + "epoch": 0.13321392917963473, + "grad_norm": 0.0, + "learning_rate": 1.944652497918662e-05, + "loss": 1.4658, + "step": 4537 + }, + { + "epoch": 0.13324329085677375, + "grad_norm": 0.0, + "learning_rate": 1.944621295239522e-05, + "loss": 1.4287, + "step": 4538 + }, + { + "epoch": 0.13327265253391274, + "grad_norm": 0.0, + "learning_rate": 1.9445900840179295e-05, + "loss": 1.5693, + "step": 4539 + }, + { + "epoch": 0.13330201421105173, + "grad_norm": 0.0, + "learning_rate": 1.944558864254167e-05, + "loss": 1.5176, + "step": 4540 + }, + { + "epoch": 0.13333137588819075, + "grad_norm": 0.0, + "learning_rate": 1.9445276359485166e-05, + "loss": 1.5273, + "step": 4541 + }, + { + "epoch": 0.13336073756532973, + "grad_norm": 0.0, + "learning_rate": 1.944496399101261e-05, + "loss": 1.4043, + "step": 4542 + }, + { + "epoch": 0.13339009924246872, + "grad_norm": 0.0, + "learning_rate": 1.9444651537126827e-05, + "loss": 1.5137, + "step": 4543 + }, + { + "epoch": 0.13341946091960774, + "grad_norm": 0.0, + "learning_rate": 1.9444338997830643e-05, + "loss": 1.5938, + "step": 4544 + }, + { + "epoch": 0.13344882259674673, + "grad_norm": 0.0, + "learning_rate": 1.944402637312688e-05, + "loss": 1.5186, + "step": 4545 + }, + { + "epoch": 0.13347818427388572, + "grad_norm": 0.0, + "learning_rate": 1.9443713663018367e-05, + "loss": 1.5234, + "step": 4546 + }, + { + "epoch": 0.1335075459510247, + "grad_norm": 0.0, + "learning_rate": 1.9443400867507933e-05, + "loss": 1.3574, + "step": 4547 + }, + { + "epoch": 0.13353690762816373, + "grad_norm": 0.0, + "learning_rate": 1.944308798659841e-05, + "loss": 1.5479, + "step": 4548 + }, + { + "epoch": 0.13356626930530272, + "grad_norm": 0.0, + "learning_rate": 1.944277502029262e-05, + "loss": 1.3359, + "step": 4549 + }, + { + "epoch": 0.1335956309824417, + "grad_norm": 0.0, + "learning_rate": 1.94424619685934e-05, + "loss": 1.3496, + "step": 4550 + }, + { + "epoch": 0.13362499265958072, + "grad_norm": 0.0, + "learning_rate": 1.9442148831503577e-05, + "loss": 1.3652, + "step": 4551 + }, + { + "epoch": 0.1336543543367197, + "grad_norm": 0.0, + "learning_rate": 1.9441835609025983e-05, + "loss": 1.335, + "step": 4552 + }, + { + "epoch": 0.1336837160138587, + "grad_norm": 0.0, + "learning_rate": 1.9441522301163452e-05, + "loss": 1.3105, + "step": 4553 + }, + { + "epoch": 0.13371307769099772, + "grad_norm": 0.0, + "learning_rate": 1.9441208907918815e-05, + "loss": 1.4121, + "step": 4554 + }, + { + "epoch": 0.1337424393681367, + "grad_norm": 0.0, + "learning_rate": 1.9440895429294914e-05, + "loss": 1.4683, + "step": 4555 + }, + { + "epoch": 0.1337718010452757, + "grad_norm": 0.0, + "learning_rate": 1.944058186529457e-05, + "loss": 1.5068, + "step": 4556 + }, + { + "epoch": 0.1338011627224147, + "grad_norm": 0.0, + "learning_rate": 1.944026821592063e-05, + "loss": 1.3682, + "step": 4557 + }, + { + "epoch": 0.1338305243995537, + "grad_norm": 0.0, + "learning_rate": 1.943995448117593e-05, + "loss": 1.5908, + "step": 4558 + }, + { + "epoch": 0.1338598860766927, + "grad_norm": 0.0, + "learning_rate": 1.9439640661063298e-05, + "loss": 1.293, + "step": 4559 + }, + { + "epoch": 0.1338892477538317, + "grad_norm": 0.0, + "learning_rate": 1.9439326755585583e-05, + "loss": 1.4971, + "step": 4560 + }, + { + "epoch": 0.1339186094309707, + "grad_norm": 0.0, + "learning_rate": 1.9439012764745615e-05, + "loss": 1.5156, + "step": 4561 + }, + { + "epoch": 0.1339479711081097, + "grad_norm": 0.0, + "learning_rate": 1.943869868854624e-05, + "loss": 1.5283, + "step": 4562 + }, + { + "epoch": 0.1339773327852487, + "grad_norm": 0.0, + "learning_rate": 1.9438384526990296e-05, + "loss": 1.4277, + "step": 4563 + }, + { + "epoch": 0.1340066944623877, + "grad_norm": 0.0, + "learning_rate": 1.943807028008062e-05, + "loss": 1.4492, + "step": 4564 + }, + { + "epoch": 0.13403605613952668, + "grad_norm": 0.0, + "learning_rate": 1.9437755947820062e-05, + "loss": 1.4492, + "step": 4565 + }, + { + "epoch": 0.1340654178166657, + "grad_norm": 0.0, + "learning_rate": 1.9437441530211454e-05, + "loss": 1.4082, + "step": 4566 + }, + { + "epoch": 0.1340947794938047, + "grad_norm": 0.0, + "learning_rate": 1.943712702725765e-05, + "loss": 1.4482, + "step": 4567 + }, + { + "epoch": 0.13412414117094368, + "grad_norm": 0.0, + "learning_rate": 1.9436812438961485e-05, + "loss": 1.4883, + "step": 4568 + }, + { + "epoch": 0.1341535028480827, + "grad_norm": 0.0, + "learning_rate": 1.9436497765325812e-05, + "loss": 1.3594, + "step": 4569 + }, + { + "epoch": 0.13418286452522168, + "grad_norm": 0.0, + "learning_rate": 1.9436183006353474e-05, + "loss": 1.4629, + "step": 4570 + }, + { + "epoch": 0.13421222620236067, + "grad_norm": 0.0, + "learning_rate": 1.9435868162047314e-05, + "loss": 1.4106, + "step": 4571 + }, + { + "epoch": 0.1342415878794997, + "grad_norm": 0.0, + "learning_rate": 1.943555323241018e-05, + "loss": 1.4609, + "step": 4572 + }, + { + "epoch": 0.13427094955663868, + "grad_norm": 0.0, + "learning_rate": 1.9435238217444925e-05, + "loss": 1.5518, + "step": 4573 + }, + { + "epoch": 0.13430031123377767, + "grad_norm": 0.0, + "learning_rate": 1.9434923117154393e-05, + "loss": 1.5547, + "step": 4574 + }, + { + "epoch": 0.13432967291091666, + "grad_norm": 0.0, + "learning_rate": 1.9434607931541432e-05, + "loss": 1.377, + "step": 4575 + }, + { + "epoch": 0.13435903458805568, + "grad_norm": 0.0, + "learning_rate": 1.94342926606089e-05, + "loss": 1.5059, + "step": 4576 + }, + { + "epoch": 0.13438839626519467, + "grad_norm": 0.0, + "learning_rate": 1.9433977304359644e-05, + "loss": 1.5625, + "step": 4577 + }, + { + "epoch": 0.13441775794233365, + "grad_norm": 0.0, + "learning_rate": 1.943366186279651e-05, + "loss": 1.3457, + "step": 4578 + }, + { + "epoch": 0.13444711961947267, + "grad_norm": 0.0, + "learning_rate": 1.9433346335922362e-05, + "loss": 1.5615, + "step": 4579 + }, + { + "epoch": 0.13447648129661166, + "grad_norm": 0.0, + "learning_rate": 1.943303072374004e-05, + "loss": 1.5146, + "step": 4580 + }, + { + "epoch": 0.13450584297375065, + "grad_norm": 0.0, + "learning_rate": 1.9432715026252413e-05, + "loss": 1.4668, + "step": 4581 + }, + { + "epoch": 0.13453520465088967, + "grad_norm": 0.0, + "learning_rate": 1.9432399243462322e-05, + "loss": 1.4502, + "step": 4582 + }, + { + "epoch": 0.13456456632802866, + "grad_norm": 0.0, + "learning_rate": 1.9432083375372632e-05, + "loss": 1.4814, + "step": 4583 + }, + { + "epoch": 0.13459392800516765, + "grad_norm": 0.0, + "learning_rate": 1.9431767421986196e-05, + "loss": 1.4424, + "step": 4584 + }, + { + "epoch": 0.13462328968230666, + "grad_norm": 0.0, + "learning_rate": 1.943145138330587e-05, + "loss": 1.4883, + "step": 4585 + }, + { + "epoch": 0.13465265135944565, + "grad_norm": 0.0, + "learning_rate": 1.9431135259334516e-05, + "loss": 1.3818, + "step": 4586 + }, + { + "epoch": 0.13468201303658464, + "grad_norm": 0.0, + "learning_rate": 1.943081905007499e-05, + "loss": 1.4648, + "step": 4587 + }, + { + "epoch": 0.13471137471372366, + "grad_norm": 0.0, + "learning_rate": 1.943050275553015e-05, + "loss": 1.6543, + "step": 4588 + }, + { + "epoch": 0.13474073639086265, + "grad_norm": 0.0, + "learning_rate": 1.943018637570286e-05, + "loss": 1.5371, + "step": 4589 + }, + { + "epoch": 0.13477009806800164, + "grad_norm": 0.0, + "learning_rate": 1.942986991059598e-05, + "loss": 1.4277, + "step": 4590 + }, + { + "epoch": 0.13479945974514065, + "grad_norm": 0.0, + "learning_rate": 1.942955336021237e-05, + "loss": 1.5352, + "step": 4591 + }, + { + "epoch": 0.13482882142227964, + "grad_norm": 0.0, + "learning_rate": 1.9429236724554896e-05, + "loss": 1.46, + "step": 4592 + }, + { + "epoch": 0.13485818309941863, + "grad_norm": 0.0, + "learning_rate": 1.942892000362642e-05, + "loss": 1.4121, + "step": 4593 + }, + { + "epoch": 0.13488754477655765, + "grad_norm": 0.0, + "learning_rate": 1.9428603197429804e-05, + "loss": 1.542, + "step": 4594 + }, + { + "epoch": 0.13491690645369664, + "grad_norm": 0.0, + "learning_rate": 1.9428286305967914e-05, + "loss": 1.4189, + "step": 4595 + }, + { + "epoch": 0.13494626813083563, + "grad_norm": 0.0, + "learning_rate": 1.942796932924362e-05, + "loss": 1.5703, + "step": 4596 + }, + { + "epoch": 0.13497562980797465, + "grad_norm": 0.0, + "learning_rate": 1.9427652267259782e-05, + "loss": 1.6172, + "step": 4597 + }, + { + "epoch": 0.13500499148511363, + "grad_norm": 0.0, + "learning_rate": 1.9427335120019268e-05, + "loss": 1.4854, + "step": 4598 + }, + { + "epoch": 0.13503435316225262, + "grad_norm": 0.0, + "learning_rate": 1.942701788752495e-05, + "loss": 1.4092, + "step": 4599 + }, + { + "epoch": 0.1350637148393916, + "grad_norm": 0.0, + "learning_rate": 1.9426700569779692e-05, + "loss": 1.5244, + "step": 4600 + }, + { + "epoch": 0.13509307651653063, + "grad_norm": 0.0, + "learning_rate": 1.9426383166786372e-05, + "loss": 1.4111, + "step": 4601 + }, + { + "epoch": 0.13512243819366962, + "grad_norm": 0.0, + "learning_rate": 1.9426065678547853e-05, + "loss": 1.4814, + "step": 4602 + }, + { + "epoch": 0.1351517998708086, + "grad_norm": 0.0, + "learning_rate": 1.9425748105067008e-05, + "loss": 1.5498, + "step": 4603 + }, + { + "epoch": 0.13518116154794763, + "grad_norm": 0.0, + "learning_rate": 1.9425430446346706e-05, + "loss": 1.2773, + "step": 4604 + }, + { + "epoch": 0.13521052322508662, + "grad_norm": 0.0, + "learning_rate": 1.9425112702389824e-05, + "loss": 1.5479, + "step": 4605 + }, + { + "epoch": 0.1352398849022256, + "grad_norm": 0.0, + "learning_rate": 1.9424794873199236e-05, + "loss": 1.4668, + "step": 4606 + }, + { + "epoch": 0.13526924657936462, + "grad_norm": 0.0, + "learning_rate": 1.9424476958777814e-05, + "loss": 1.4092, + "step": 4607 + }, + { + "epoch": 0.1352986082565036, + "grad_norm": 0.0, + "learning_rate": 1.942415895912843e-05, + "loss": 1.4434, + "step": 4608 + }, + { + "epoch": 0.1353279699336426, + "grad_norm": 0.0, + "learning_rate": 1.942384087425397e-05, + "loss": 1.498, + "step": 4609 + }, + { + "epoch": 0.13535733161078162, + "grad_norm": 0.0, + "learning_rate": 1.9423522704157295e-05, + "loss": 1.5088, + "step": 4610 + }, + { + "epoch": 0.1353866932879206, + "grad_norm": 0.0, + "learning_rate": 1.9423204448841297e-05, + "loss": 1.4629, + "step": 4611 + }, + { + "epoch": 0.1354160549650596, + "grad_norm": 0.0, + "learning_rate": 1.9422886108308844e-05, + "loss": 1.4912, + "step": 4612 + }, + { + "epoch": 0.1354454166421986, + "grad_norm": 0.0, + "learning_rate": 1.942256768256282e-05, + "loss": 1.5195, + "step": 4613 + }, + { + "epoch": 0.1354747783193376, + "grad_norm": 0.0, + "learning_rate": 1.9422249171606106e-05, + "loss": 1.3564, + "step": 4614 + }, + { + "epoch": 0.1355041399964766, + "grad_norm": 0.0, + "learning_rate": 1.9421930575441577e-05, + "loss": 1.5098, + "step": 4615 + }, + { + "epoch": 0.1355335016736156, + "grad_norm": 0.0, + "learning_rate": 1.9421611894072113e-05, + "loss": 1.3574, + "step": 4616 + }, + { + "epoch": 0.1355628633507546, + "grad_norm": 0.0, + "learning_rate": 1.9421293127500608e-05, + "loss": 1.4688, + "step": 4617 + }, + { + "epoch": 0.1355922250278936, + "grad_norm": 0.0, + "learning_rate": 1.942097427572993e-05, + "loss": 1.4131, + "step": 4618 + }, + { + "epoch": 0.1356215867050326, + "grad_norm": 0.0, + "learning_rate": 1.942065533876297e-05, + "loss": 1.4775, + "step": 4619 + }, + { + "epoch": 0.1356509483821716, + "grad_norm": 0.0, + "learning_rate": 1.9420336316602616e-05, + "loss": 1.3877, + "step": 4620 + }, + { + "epoch": 0.13568031005931058, + "grad_norm": 0.0, + "learning_rate": 1.942001720925174e-05, + "loss": 1.4551, + "step": 4621 + }, + { + "epoch": 0.1357096717364496, + "grad_norm": 0.0, + "learning_rate": 1.941969801671324e-05, + "loss": 1.5117, + "step": 4622 + }, + { + "epoch": 0.1357390334135886, + "grad_norm": 0.0, + "learning_rate": 1.941937873899e-05, + "loss": 1.3936, + "step": 4623 + }, + { + "epoch": 0.13576839509072758, + "grad_norm": 0.0, + "learning_rate": 1.94190593760849e-05, + "loss": 1.5879, + "step": 4624 + }, + { + "epoch": 0.13579775676786657, + "grad_norm": 0.0, + "learning_rate": 1.941873992800084e-05, + "loss": 1.4434, + "step": 4625 + }, + { + "epoch": 0.13582711844500558, + "grad_norm": 0.0, + "learning_rate": 1.9418420394740703e-05, + "loss": 1.4053, + "step": 4626 + }, + { + "epoch": 0.13585648012214457, + "grad_norm": 0.0, + "learning_rate": 1.9418100776307376e-05, + "loss": 1.5137, + "step": 4627 + }, + { + "epoch": 0.13588584179928356, + "grad_norm": 0.0, + "learning_rate": 1.9417781072703752e-05, + "loss": 1.4834, + "step": 4628 + }, + { + "epoch": 0.13591520347642258, + "grad_norm": 0.0, + "learning_rate": 1.941746128393272e-05, + "loss": 1.3682, + "step": 4629 + }, + { + "epoch": 0.13594456515356157, + "grad_norm": 0.0, + "learning_rate": 1.9417141409997177e-05, + "loss": 1.4053, + "step": 4630 + }, + { + "epoch": 0.13597392683070056, + "grad_norm": 0.0, + "learning_rate": 1.941682145090001e-05, + "loss": 1.4863, + "step": 4631 + }, + { + "epoch": 0.13600328850783958, + "grad_norm": 0.0, + "learning_rate": 1.9416501406644115e-05, + "loss": 1.4629, + "step": 4632 + }, + { + "epoch": 0.13603265018497857, + "grad_norm": 0.0, + "learning_rate": 1.9416181277232386e-05, + "loss": 1.457, + "step": 4633 + }, + { + "epoch": 0.13606201186211755, + "grad_norm": 0.0, + "learning_rate": 1.941586106266772e-05, + "loss": 1.4668, + "step": 4634 + }, + { + "epoch": 0.13609137353925657, + "grad_norm": 0.0, + "learning_rate": 1.9415540762953008e-05, + "loss": 1.5449, + "step": 4635 + }, + { + "epoch": 0.13612073521639556, + "grad_norm": 0.0, + "learning_rate": 1.9415220378091152e-05, + "loss": 1.4424, + "step": 4636 + }, + { + "epoch": 0.13615009689353455, + "grad_norm": 0.0, + "learning_rate": 1.941489990808505e-05, + "loss": 1.5244, + "step": 4637 + }, + { + "epoch": 0.13617945857067357, + "grad_norm": 0.0, + "learning_rate": 1.941457935293759e-05, + "loss": 1.4785, + "step": 4638 + }, + { + "epoch": 0.13620882024781256, + "grad_norm": 0.0, + "learning_rate": 1.941425871265168e-05, + "loss": 1.3467, + "step": 4639 + }, + { + "epoch": 0.13623818192495155, + "grad_norm": 0.0, + "learning_rate": 1.941393798723022e-05, + "loss": 1.4287, + "step": 4640 + }, + { + "epoch": 0.13626754360209056, + "grad_norm": 0.0, + "learning_rate": 1.9413617176676104e-05, + "loss": 1.4209, + "step": 4641 + }, + { + "epoch": 0.13629690527922955, + "grad_norm": 0.0, + "learning_rate": 1.941329628099224e-05, + "loss": 1.4062, + "step": 4642 + }, + { + "epoch": 0.13632626695636854, + "grad_norm": 0.0, + "learning_rate": 1.9412975300181528e-05, + "loss": 1.5449, + "step": 4643 + }, + { + "epoch": 0.13635562863350756, + "grad_norm": 0.0, + "learning_rate": 1.941265423424687e-05, + "loss": 1.5059, + "step": 4644 + }, + { + "epoch": 0.13638499031064655, + "grad_norm": 0.0, + "learning_rate": 1.9412333083191165e-05, + "loss": 1.375, + "step": 4645 + }, + { + "epoch": 0.13641435198778554, + "grad_norm": 0.0, + "learning_rate": 1.941201184701732e-05, + "loss": 1.4639, + "step": 4646 + }, + { + "epoch": 0.13644371366492455, + "grad_norm": 0.0, + "learning_rate": 1.941169052572825e-05, + "loss": 1.4766, + "step": 4647 + }, + { + "epoch": 0.13647307534206354, + "grad_norm": 0.0, + "learning_rate": 1.9411369119326844e-05, + "loss": 1.5479, + "step": 4648 + }, + { + "epoch": 0.13650243701920253, + "grad_norm": 0.0, + "learning_rate": 1.9411047627816022e-05, + "loss": 1.5439, + "step": 4649 + }, + { + "epoch": 0.13653179869634152, + "grad_norm": 0.0, + "learning_rate": 1.9410726051198684e-05, + "loss": 1.5352, + "step": 4650 + }, + { + "epoch": 0.13656116037348054, + "grad_norm": 0.0, + "learning_rate": 1.941040438947774e-05, + "loss": 1.415, + "step": 4651 + }, + { + "epoch": 0.13659052205061953, + "grad_norm": 0.0, + "learning_rate": 1.9410082642656098e-05, + "loss": 1.4971, + "step": 4652 + }, + { + "epoch": 0.13661988372775852, + "grad_norm": 0.0, + "learning_rate": 1.940976081073667e-05, + "loss": 1.5293, + "step": 4653 + }, + { + "epoch": 0.13664924540489753, + "grad_norm": 0.0, + "learning_rate": 1.9409438893722367e-05, + "loss": 1.3926, + "step": 4654 + }, + { + "epoch": 0.13667860708203652, + "grad_norm": 0.0, + "learning_rate": 1.9409116891616097e-05, + "loss": 1.3945, + "step": 4655 + }, + { + "epoch": 0.1367079687591755, + "grad_norm": 0.0, + "learning_rate": 1.940879480442077e-05, + "loss": 1.4541, + "step": 4656 + }, + { + "epoch": 0.13673733043631453, + "grad_norm": 0.0, + "learning_rate": 1.9408472632139305e-05, + "loss": 1.4307, + "step": 4657 + }, + { + "epoch": 0.13676669211345352, + "grad_norm": 0.0, + "learning_rate": 1.940815037477461e-05, + "loss": 1.5518, + "step": 4658 + }, + { + "epoch": 0.1367960537905925, + "grad_norm": 0.0, + "learning_rate": 1.9407828032329605e-05, + "loss": 1.5107, + "step": 4659 + }, + { + "epoch": 0.13682541546773153, + "grad_norm": 0.0, + "learning_rate": 1.9407505604807203e-05, + "loss": 1.4814, + "step": 4660 + }, + { + "epoch": 0.13685477714487052, + "grad_norm": 0.0, + "learning_rate": 1.9407183092210314e-05, + "loss": 1.334, + "step": 4661 + }, + { + "epoch": 0.1368841388220095, + "grad_norm": 0.0, + "learning_rate": 1.9406860494541863e-05, + "loss": 1.3535, + "step": 4662 + }, + { + "epoch": 0.13691350049914852, + "grad_norm": 0.0, + "learning_rate": 1.9406537811804758e-05, + "loss": 1.3252, + "step": 4663 + }, + { + "epoch": 0.1369428621762875, + "grad_norm": 0.0, + "learning_rate": 1.9406215044001927e-05, + "loss": 1.4512, + "step": 4664 + }, + { + "epoch": 0.1369722238534265, + "grad_norm": 0.0, + "learning_rate": 1.9405892191136283e-05, + "loss": 1.4385, + "step": 4665 + }, + { + "epoch": 0.13700158553056552, + "grad_norm": 0.0, + "learning_rate": 1.9405569253210746e-05, + "loss": 1.5312, + "step": 4666 + }, + { + "epoch": 0.1370309472077045, + "grad_norm": 0.0, + "learning_rate": 1.9405246230228242e-05, + "loss": 1.2373, + "step": 4667 + }, + { + "epoch": 0.1370603088848435, + "grad_norm": 0.0, + "learning_rate": 1.9404923122191683e-05, + "loss": 1.5732, + "step": 4668 + }, + { + "epoch": 0.1370896705619825, + "grad_norm": 0.0, + "learning_rate": 1.9404599929103997e-05, + "loss": 1.3916, + "step": 4669 + }, + { + "epoch": 0.1371190322391215, + "grad_norm": 0.0, + "learning_rate": 1.9404276650968104e-05, + "loss": 1.4609, + "step": 4670 + }, + { + "epoch": 0.1371483939162605, + "grad_norm": 0.0, + "learning_rate": 1.940395328778693e-05, + "loss": 1.4502, + "step": 4671 + }, + { + "epoch": 0.1371777555933995, + "grad_norm": 0.0, + "learning_rate": 1.94036298395634e-05, + "loss": 1.4199, + "step": 4672 + }, + { + "epoch": 0.1372071172705385, + "grad_norm": 0.0, + "learning_rate": 1.9403306306300433e-05, + "loss": 1.5215, + "step": 4673 + }, + { + "epoch": 0.1372364789476775, + "grad_norm": 0.0, + "learning_rate": 1.940298268800096e-05, + "loss": 1.4307, + "step": 4674 + }, + { + "epoch": 0.13726584062481648, + "grad_norm": 0.0, + "learning_rate": 1.9402658984667912e-05, + "loss": 1.2959, + "step": 4675 + }, + { + "epoch": 0.1372952023019555, + "grad_norm": 0.0, + "learning_rate": 1.9402335196304203e-05, + "loss": 1.5137, + "step": 4676 + }, + { + "epoch": 0.13732456397909448, + "grad_norm": 0.0, + "learning_rate": 1.9402011322912773e-05, + "loss": 1.3994, + "step": 4677 + }, + { + "epoch": 0.13735392565623347, + "grad_norm": 0.0, + "learning_rate": 1.9401687364496545e-05, + "loss": 1.626, + "step": 4678 + }, + { + "epoch": 0.1373832873333725, + "grad_norm": 0.0, + "learning_rate": 1.940136332105845e-05, + "loss": 1.5586, + "step": 4679 + }, + { + "epoch": 0.13741264901051148, + "grad_norm": 0.0, + "learning_rate": 1.940103919260142e-05, + "loss": 1.5137, + "step": 4680 + }, + { + "epoch": 0.13744201068765047, + "grad_norm": 0.0, + "learning_rate": 1.9400714979128386e-05, + "loss": 1.3779, + "step": 4681 + }, + { + "epoch": 0.13747137236478948, + "grad_norm": 0.0, + "learning_rate": 1.9400390680642277e-05, + "loss": 1.6289, + "step": 4682 + }, + { + "epoch": 0.13750073404192847, + "grad_norm": 0.0, + "learning_rate": 1.9400066297146027e-05, + "loss": 1.4238, + "step": 4683 + }, + { + "epoch": 0.13753009571906746, + "grad_norm": 0.0, + "learning_rate": 1.939974182864257e-05, + "loss": 1.4121, + "step": 4684 + }, + { + "epoch": 0.13755945739620648, + "grad_norm": 0.0, + "learning_rate": 1.9399417275134844e-05, + "loss": 1.3184, + "step": 4685 + }, + { + "epoch": 0.13758881907334547, + "grad_norm": 0.0, + "learning_rate": 1.939909263662578e-05, + "loss": 1.3398, + "step": 4686 + }, + { + "epoch": 0.13761818075048446, + "grad_norm": 0.0, + "learning_rate": 1.939876791311831e-05, + "loss": 1.4893, + "step": 4687 + }, + { + "epoch": 0.13764754242762348, + "grad_norm": 0.0, + "learning_rate": 1.9398443104615377e-05, + "loss": 1.5723, + "step": 4688 + }, + { + "epoch": 0.13767690410476247, + "grad_norm": 0.0, + "learning_rate": 1.9398118211119915e-05, + "loss": 1.4688, + "step": 4689 + }, + { + "epoch": 0.13770626578190145, + "grad_norm": 0.0, + "learning_rate": 1.9397793232634865e-05, + "loss": 1.4067, + "step": 4690 + }, + { + "epoch": 0.13773562745904047, + "grad_norm": 0.0, + "learning_rate": 1.939746816916316e-05, + "loss": 1.5469, + "step": 4691 + }, + { + "epoch": 0.13776498913617946, + "grad_norm": 0.0, + "learning_rate": 1.939714302070775e-05, + "loss": 1.4766, + "step": 4692 + }, + { + "epoch": 0.13779435081331845, + "grad_norm": 0.0, + "learning_rate": 1.9396817787271564e-05, + "loss": 1.541, + "step": 4693 + }, + { + "epoch": 0.13782371249045747, + "grad_norm": 0.0, + "learning_rate": 1.939649246885755e-05, + "loss": 1.6719, + "step": 4694 + }, + { + "epoch": 0.13785307416759646, + "grad_norm": 0.0, + "learning_rate": 1.9396167065468646e-05, + "loss": 1.3916, + "step": 4695 + }, + { + "epoch": 0.13788243584473545, + "grad_norm": 0.0, + "learning_rate": 1.93958415771078e-05, + "loss": 1.4629, + "step": 4696 + }, + { + "epoch": 0.13791179752187446, + "grad_norm": 0.0, + "learning_rate": 1.939551600377795e-05, + "loss": 1.3213, + "step": 4697 + }, + { + "epoch": 0.13794115919901345, + "grad_norm": 0.0, + "learning_rate": 1.9395190345482045e-05, + "loss": 1.4961, + "step": 4698 + }, + { + "epoch": 0.13797052087615244, + "grad_norm": 0.0, + "learning_rate": 1.939486460222303e-05, + "loss": 1.4863, + "step": 4699 + }, + { + "epoch": 0.13799988255329143, + "grad_norm": 0.0, + "learning_rate": 1.939453877400384e-05, + "loss": 1.5791, + "step": 4700 + }, + { + "epoch": 0.13802924423043045, + "grad_norm": 0.0, + "learning_rate": 1.9394212860827435e-05, + "loss": 1.3936, + "step": 4701 + }, + { + "epoch": 0.13805860590756944, + "grad_norm": 0.0, + "learning_rate": 1.9393886862696757e-05, + "loss": 1.4512, + "step": 4702 + }, + { + "epoch": 0.13808796758470843, + "grad_norm": 0.0, + "learning_rate": 1.9393560779614753e-05, + "loss": 1.4141, + "step": 4703 + }, + { + "epoch": 0.13811732926184744, + "grad_norm": 0.0, + "learning_rate": 1.9393234611584374e-05, + "loss": 1.5264, + "step": 4704 + }, + { + "epoch": 0.13814669093898643, + "grad_norm": 0.0, + "learning_rate": 1.939290835860857e-05, + "loss": 1.4346, + "step": 4705 + }, + { + "epoch": 0.13817605261612542, + "grad_norm": 0.0, + "learning_rate": 1.939258202069029e-05, + "loss": 1.5176, + "step": 4706 + }, + { + "epoch": 0.13820541429326444, + "grad_norm": 0.0, + "learning_rate": 1.9392255597832484e-05, + "loss": 1.5273, + "step": 4707 + }, + { + "epoch": 0.13823477597040343, + "grad_norm": 0.0, + "learning_rate": 1.9391929090038104e-05, + "loss": 1.4746, + "step": 4708 + }, + { + "epoch": 0.13826413764754242, + "grad_norm": 0.0, + "learning_rate": 1.9391602497310104e-05, + "loss": 1.4248, + "step": 4709 + }, + { + "epoch": 0.13829349932468143, + "grad_norm": 0.0, + "learning_rate": 1.9391275819651442e-05, + "loss": 1.499, + "step": 4710 + }, + { + "epoch": 0.13832286100182042, + "grad_norm": 0.0, + "learning_rate": 1.9390949057065062e-05, + "loss": 1.3965, + "step": 4711 + }, + { + "epoch": 0.1383522226789594, + "grad_norm": 0.0, + "learning_rate": 1.9390622209553926e-05, + "loss": 1.5303, + "step": 4712 + }, + { + "epoch": 0.13838158435609843, + "grad_norm": 0.0, + "learning_rate": 1.939029527712099e-05, + "loss": 1.4443, + "step": 4713 + }, + { + "epoch": 0.13841094603323742, + "grad_norm": 0.0, + "learning_rate": 1.938996825976921e-05, + "loss": 1.3574, + "step": 4714 + }, + { + "epoch": 0.1384403077103764, + "grad_norm": 0.0, + "learning_rate": 1.938964115750154e-05, + "loss": 1.3779, + "step": 4715 + }, + { + "epoch": 0.13846966938751543, + "grad_norm": 0.0, + "learning_rate": 1.938931397032094e-05, + "loss": 1.4756, + "step": 4716 + }, + { + "epoch": 0.13849903106465442, + "grad_norm": 0.0, + "learning_rate": 1.9388986698230372e-05, + "loss": 1.5811, + "step": 4717 + }, + { + "epoch": 0.1385283927417934, + "grad_norm": 0.0, + "learning_rate": 1.938865934123279e-05, + "loss": 1.3271, + "step": 4718 + }, + { + "epoch": 0.13855775441893242, + "grad_norm": 0.0, + "learning_rate": 1.9388331899331157e-05, + "loss": 1.5098, + "step": 4719 + }, + { + "epoch": 0.1385871160960714, + "grad_norm": 0.0, + "learning_rate": 1.9388004372528437e-05, + "loss": 1.542, + "step": 4720 + }, + { + "epoch": 0.1386164777732104, + "grad_norm": 0.0, + "learning_rate": 1.9387676760827587e-05, + "loss": 1.4531, + "step": 4721 + }, + { + "epoch": 0.13864583945034942, + "grad_norm": 0.0, + "learning_rate": 1.938734906423157e-05, + "loss": 1.3877, + "step": 4722 + }, + { + "epoch": 0.1386752011274884, + "grad_norm": 0.0, + "learning_rate": 1.9387021282743355e-05, + "loss": 1.3643, + "step": 4723 + }, + { + "epoch": 0.1387045628046274, + "grad_norm": 0.0, + "learning_rate": 1.93866934163659e-05, + "loss": 1.5361, + "step": 4724 + }, + { + "epoch": 0.13873392448176639, + "grad_norm": 0.0, + "learning_rate": 1.9386365465102175e-05, + "loss": 1.5762, + "step": 4725 + }, + { + "epoch": 0.1387632861589054, + "grad_norm": 0.0, + "learning_rate": 1.938603742895514e-05, + "loss": 1.6367, + "step": 4726 + }, + { + "epoch": 0.1387926478360444, + "grad_norm": 0.0, + "learning_rate": 1.9385709307927767e-05, + "loss": 1.5361, + "step": 4727 + }, + { + "epoch": 0.13882200951318338, + "grad_norm": 0.0, + "learning_rate": 1.938538110202302e-05, + "loss": 1.3672, + "step": 4728 + }, + { + "epoch": 0.1388513711903224, + "grad_norm": 0.0, + "learning_rate": 1.9385052811243868e-05, + "loss": 1.5205, + "step": 4729 + }, + { + "epoch": 0.1388807328674614, + "grad_norm": 0.0, + "learning_rate": 1.938472443559328e-05, + "loss": 1.5127, + "step": 4730 + }, + { + "epoch": 0.13891009454460038, + "grad_norm": 0.0, + "learning_rate": 1.9384395975074225e-05, + "loss": 1.5166, + "step": 4731 + }, + { + "epoch": 0.1389394562217394, + "grad_norm": 0.0, + "learning_rate": 1.9384067429689676e-05, + "loss": 1.5117, + "step": 4732 + }, + { + "epoch": 0.13896881789887838, + "grad_norm": 0.0, + "learning_rate": 1.9383738799442597e-05, + "loss": 1.3652, + "step": 4733 + }, + { + "epoch": 0.13899817957601737, + "grad_norm": 0.0, + "learning_rate": 1.938341008433597e-05, + "loss": 1.4287, + "step": 4734 + }, + { + "epoch": 0.1390275412531564, + "grad_norm": 0.0, + "learning_rate": 1.938308128437276e-05, + "loss": 1.5439, + "step": 4735 + }, + { + "epoch": 0.13905690293029538, + "grad_norm": 0.0, + "learning_rate": 1.9382752399555943e-05, + "loss": 1.4727, + "step": 4736 + }, + { + "epoch": 0.13908626460743437, + "grad_norm": 0.0, + "learning_rate": 1.9382423429888495e-05, + "loss": 1.626, + "step": 4737 + }, + { + "epoch": 0.13911562628457338, + "grad_norm": 0.0, + "learning_rate": 1.9382094375373386e-05, + "loss": 1.4619, + "step": 4738 + }, + { + "epoch": 0.13914498796171237, + "grad_norm": 0.0, + "learning_rate": 1.93817652360136e-05, + "loss": 1.5166, + "step": 4739 + }, + { + "epoch": 0.13917434963885136, + "grad_norm": 0.0, + "learning_rate": 1.9381436011812104e-05, + "loss": 1.5078, + "step": 4740 + }, + { + "epoch": 0.13920371131599038, + "grad_norm": 0.0, + "learning_rate": 1.938110670277188e-05, + "loss": 1.4512, + "step": 4741 + }, + { + "epoch": 0.13923307299312937, + "grad_norm": 0.0, + "learning_rate": 1.9380777308895907e-05, + "loss": 1.5449, + "step": 4742 + }, + { + "epoch": 0.13926243467026836, + "grad_norm": 0.0, + "learning_rate": 1.938044783018716e-05, + "loss": 1.4658, + "step": 4743 + }, + { + "epoch": 0.13929179634740738, + "grad_norm": 0.0, + "learning_rate": 1.938011826664862e-05, + "loss": 1.4688, + "step": 4744 + }, + { + "epoch": 0.13932115802454637, + "grad_norm": 0.0, + "learning_rate": 1.9379788618283274e-05, + "loss": 1.5596, + "step": 4745 + }, + { + "epoch": 0.13935051970168535, + "grad_norm": 0.0, + "learning_rate": 1.937945888509409e-05, + "loss": 1.4521, + "step": 4746 + }, + { + "epoch": 0.13937988137882437, + "grad_norm": 0.0, + "learning_rate": 1.9379129067084062e-05, + "loss": 1.4014, + "step": 4747 + }, + { + "epoch": 0.13940924305596336, + "grad_norm": 0.0, + "learning_rate": 1.9378799164256167e-05, + "loss": 1.3984, + "step": 4748 + }, + { + "epoch": 0.13943860473310235, + "grad_norm": 0.0, + "learning_rate": 1.937846917661339e-05, + "loss": 1.4404, + "step": 4749 + }, + { + "epoch": 0.13946796641024134, + "grad_norm": 0.0, + "learning_rate": 1.937813910415871e-05, + "loss": 1.4961, + "step": 4750 + }, + { + "epoch": 0.13949732808738036, + "grad_norm": 0.0, + "learning_rate": 1.937780894689512e-05, + "loss": 1.4023, + "step": 4751 + }, + { + "epoch": 0.13952668976451935, + "grad_norm": 0.0, + "learning_rate": 1.9377478704825604e-05, + "loss": 1.4678, + "step": 4752 + }, + { + "epoch": 0.13955605144165834, + "grad_norm": 0.0, + "learning_rate": 1.9377148377953144e-05, + "loss": 1.5127, + "step": 4753 + }, + { + "epoch": 0.13958541311879735, + "grad_norm": 0.0, + "learning_rate": 1.937681796628073e-05, + "loss": 1.4385, + "step": 4754 + }, + { + "epoch": 0.13961477479593634, + "grad_norm": 0.0, + "learning_rate": 1.937648746981135e-05, + "loss": 1.4951, + "step": 4755 + }, + { + "epoch": 0.13964413647307533, + "grad_norm": 0.0, + "learning_rate": 1.937615688854799e-05, + "loss": 1.4346, + "step": 4756 + }, + { + "epoch": 0.13967349815021435, + "grad_norm": 0.0, + "learning_rate": 1.9375826222493647e-05, + "loss": 1.3701, + "step": 4757 + }, + { + "epoch": 0.13970285982735334, + "grad_norm": 0.0, + "learning_rate": 1.93754954716513e-05, + "loss": 1.4043, + "step": 4758 + }, + { + "epoch": 0.13973222150449233, + "grad_norm": 0.0, + "learning_rate": 1.937516463602395e-05, + "loss": 1.4043, + "step": 4759 + }, + { + "epoch": 0.13976158318163134, + "grad_norm": 0.0, + "learning_rate": 1.9374833715614586e-05, + "loss": 1.502, + "step": 4760 + }, + { + "epoch": 0.13979094485877033, + "grad_norm": 0.0, + "learning_rate": 1.93745027104262e-05, + "loss": 1.4814, + "step": 4761 + }, + { + "epoch": 0.13982030653590932, + "grad_norm": 0.0, + "learning_rate": 1.937417162046178e-05, + "loss": 1.4219, + "step": 4762 + }, + { + "epoch": 0.13984966821304834, + "grad_norm": 0.0, + "learning_rate": 1.937384044572433e-05, + "loss": 1.4189, + "step": 4763 + }, + { + "epoch": 0.13987902989018733, + "grad_norm": 0.0, + "learning_rate": 1.937350918621684e-05, + "loss": 1.4893, + "step": 4764 + }, + { + "epoch": 0.13990839156732632, + "grad_norm": 0.0, + "learning_rate": 1.9373177841942303e-05, + "loss": 1.4346, + "step": 4765 + }, + { + "epoch": 0.13993775324446533, + "grad_norm": 0.0, + "learning_rate": 1.937284641290372e-05, + "loss": 1.5703, + "step": 4766 + }, + { + "epoch": 0.13996711492160432, + "grad_norm": 0.0, + "learning_rate": 1.9372514899104087e-05, + "loss": 1.3369, + "step": 4767 + }, + { + "epoch": 0.1399964765987433, + "grad_norm": 0.0, + "learning_rate": 1.93721833005464e-05, + "loss": 1.4922, + "step": 4768 + }, + { + "epoch": 0.14002583827588233, + "grad_norm": 0.0, + "learning_rate": 1.937185161723366e-05, + "loss": 1.498, + "step": 4769 + }, + { + "epoch": 0.14005519995302132, + "grad_norm": 0.0, + "learning_rate": 1.9371519849168867e-05, + "loss": 1.4873, + "step": 4770 + }, + { + "epoch": 0.1400845616301603, + "grad_norm": 0.0, + "learning_rate": 1.937118799635502e-05, + "loss": 1.4805, + "step": 4771 + }, + { + "epoch": 0.14011392330729933, + "grad_norm": 0.0, + "learning_rate": 1.937085605879512e-05, + "loss": 1.2515, + "step": 4772 + }, + { + "epoch": 0.14014328498443832, + "grad_norm": 0.0, + "learning_rate": 1.9370524036492164e-05, + "loss": 1.4717, + "step": 4773 + }, + { + "epoch": 0.1401726466615773, + "grad_norm": 0.0, + "learning_rate": 1.9370191929449163e-05, + "loss": 1.5156, + "step": 4774 + }, + { + "epoch": 0.1402020083387163, + "grad_norm": 0.0, + "learning_rate": 1.9369859737669115e-05, + "loss": 1.5078, + "step": 4775 + }, + { + "epoch": 0.1402313700158553, + "grad_norm": 0.0, + "learning_rate": 1.936952746115503e-05, + "loss": 1.3701, + "step": 4776 + }, + { + "epoch": 0.1402607316929943, + "grad_norm": 0.0, + "learning_rate": 1.9369195099909905e-05, + "loss": 1.3867, + "step": 4777 + }, + { + "epoch": 0.1402900933701333, + "grad_norm": 0.0, + "learning_rate": 1.936886265393675e-05, + "loss": 1.4756, + "step": 4778 + }, + { + "epoch": 0.1403194550472723, + "grad_norm": 0.0, + "learning_rate": 1.936853012323857e-05, + "loss": 1.4492, + "step": 4779 + }, + { + "epoch": 0.1403488167244113, + "grad_norm": 0.0, + "learning_rate": 1.9368197507818373e-05, + "loss": 1.6582, + "step": 4780 + }, + { + "epoch": 0.14037817840155029, + "grad_norm": 0.0, + "learning_rate": 1.936786480767917e-05, + "loss": 1.5127, + "step": 4781 + }, + { + "epoch": 0.1404075400786893, + "grad_norm": 0.0, + "learning_rate": 1.936753202282396e-05, + "loss": 1.5186, + "step": 4782 + }, + { + "epoch": 0.1404369017558283, + "grad_norm": 0.0, + "learning_rate": 1.9367199153255765e-05, + "loss": 1.4619, + "step": 4783 + }, + { + "epoch": 0.14046626343296728, + "grad_norm": 0.0, + "learning_rate": 1.9366866198977587e-05, + "loss": 1.5459, + "step": 4784 + }, + { + "epoch": 0.1404956251101063, + "grad_norm": 0.0, + "learning_rate": 1.9366533159992437e-05, + "loss": 1.3438, + "step": 4785 + }, + { + "epoch": 0.1405249867872453, + "grad_norm": 0.0, + "learning_rate": 1.9366200036303332e-05, + "loss": 1.4775, + "step": 4786 + }, + { + "epoch": 0.14055434846438428, + "grad_norm": 0.0, + "learning_rate": 1.9365866827913276e-05, + "loss": 1.4766, + "step": 4787 + }, + { + "epoch": 0.1405837101415233, + "grad_norm": 0.0, + "learning_rate": 1.936553353482529e-05, + "loss": 1.4658, + "step": 4788 + }, + { + "epoch": 0.14061307181866228, + "grad_norm": 0.0, + "learning_rate": 1.936520015704239e-05, + "loss": 1.3408, + "step": 4789 + }, + { + "epoch": 0.14064243349580127, + "grad_norm": 0.0, + "learning_rate": 1.9364866694567584e-05, + "loss": 1.3291, + "step": 4790 + }, + { + "epoch": 0.1406717951729403, + "grad_norm": 0.0, + "learning_rate": 1.9364533147403886e-05, + "loss": 1.5752, + "step": 4791 + }, + { + "epoch": 0.14070115685007928, + "grad_norm": 0.0, + "learning_rate": 1.9364199515554322e-05, + "loss": 1.458, + "step": 4792 + }, + { + "epoch": 0.14073051852721827, + "grad_norm": 0.0, + "learning_rate": 1.93638657990219e-05, + "loss": 1.4941, + "step": 4793 + }, + { + "epoch": 0.14075988020435728, + "grad_norm": 0.0, + "learning_rate": 1.9363531997809647e-05, + "loss": 1.5098, + "step": 4794 + }, + { + "epoch": 0.14078924188149627, + "grad_norm": 0.0, + "learning_rate": 1.936319811192057e-05, + "loss": 1.4004, + "step": 4795 + }, + { + "epoch": 0.14081860355863526, + "grad_norm": 0.0, + "learning_rate": 1.93628641413577e-05, + "loss": 1.4541, + "step": 4796 + }, + { + "epoch": 0.14084796523577428, + "grad_norm": 0.0, + "learning_rate": 1.9362530086124045e-05, + "loss": 1.3174, + "step": 4797 + }, + { + "epoch": 0.14087732691291327, + "grad_norm": 0.0, + "learning_rate": 1.936219594622264e-05, + "loss": 1.5518, + "step": 4798 + }, + { + "epoch": 0.14090668859005226, + "grad_norm": 0.0, + "learning_rate": 1.936186172165649e-05, + "loss": 1.5371, + "step": 4799 + }, + { + "epoch": 0.14093605026719125, + "grad_norm": 0.0, + "learning_rate": 1.9361527412428634e-05, + "loss": 1.4805, + "step": 4800 + }, + { + "epoch": 0.14096541194433027, + "grad_norm": 0.0, + "learning_rate": 1.9361193018542088e-05, + "loss": 1.5068, + "step": 4801 + }, + { + "epoch": 0.14099477362146925, + "grad_norm": 0.0, + "learning_rate": 1.9360858539999873e-05, + "loss": 1.4883, + "step": 4802 + }, + { + "epoch": 0.14102413529860824, + "grad_norm": 0.0, + "learning_rate": 1.936052397680502e-05, + "loss": 1.3877, + "step": 4803 + }, + { + "epoch": 0.14105349697574726, + "grad_norm": 0.0, + "learning_rate": 1.936018932896055e-05, + "loss": 1.4316, + "step": 4804 + }, + { + "epoch": 0.14108285865288625, + "grad_norm": 0.0, + "learning_rate": 1.935985459646949e-05, + "loss": 1.4414, + "step": 4805 + }, + { + "epoch": 0.14111222033002524, + "grad_norm": 0.0, + "learning_rate": 1.9359519779334867e-05, + "loss": 1.4951, + "step": 4806 + }, + { + "epoch": 0.14114158200716426, + "grad_norm": 0.0, + "learning_rate": 1.9359184877559712e-05, + "loss": 1.5215, + "step": 4807 + }, + { + "epoch": 0.14117094368430325, + "grad_norm": 0.0, + "learning_rate": 1.9358849891147052e-05, + "loss": 1.4463, + "step": 4808 + }, + { + "epoch": 0.14120030536144224, + "grad_norm": 0.0, + "learning_rate": 1.935851482009991e-05, + "loss": 1.4287, + "step": 4809 + }, + { + "epoch": 0.14122966703858125, + "grad_norm": 0.0, + "learning_rate": 1.9358179664421327e-05, + "loss": 1.4805, + "step": 4810 + }, + { + "epoch": 0.14125902871572024, + "grad_norm": 0.0, + "learning_rate": 1.9357844424114326e-05, + "loss": 1.4199, + "step": 4811 + }, + { + "epoch": 0.14128839039285923, + "grad_norm": 0.0, + "learning_rate": 1.9357509099181944e-05, + "loss": 1.5225, + "step": 4812 + }, + { + "epoch": 0.14131775206999825, + "grad_norm": 0.0, + "learning_rate": 1.935717368962721e-05, + "loss": 1.4214, + "step": 4813 + }, + { + "epoch": 0.14134711374713724, + "grad_norm": 0.0, + "learning_rate": 1.9356838195453155e-05, + "loss": 1.4619, + "step": 4814 + }, + { + "epoch": 0.14137647542427623, + "grad_norm": 0.0, + "learning_rate": 1.935650261666282e-05, + "loss": 1.5586, + "step": 4815 + }, + { + "epoch": 0.14140583710141524, + "grad_norm": 0.0, + "learning_rate": 1.9356166953259234e-05, + "loss": 1.4385, + "step": 4816 + }, + { + "epoch": 0.14143519877855423, + "grad_norm": 0.0, + "learning_rate": 1.935583120524543e-05, + "loss": 1.5039, + "step": 4817 + }, + { + "epoch": 0.14146456045569322, + "grad_norm": 0.0, + "learning_rate": 1.9355495372624455e-05, + "loss": 1.4961, + "step": 4818 + }, + { + "epoch": 0.14149392213283224, + "grad_norm": 0.0, + "learning_rate": 1.9355159455399334e-05, + "loss": 1.4775, + "step": 4819 + }, + { + "epoch": 0.14152328380997123, + "grad_norm": 0.0, + "learning_rate": 1.9354823453573112e-05, + "loss": 1.4932, + "step": 4820 + }, + { + "epoch": 0.14155264548711022, + "grad_norm": 0.0, + "learning_rate": 1.9354487367148827e-05, + "loss": 1.3564, + "step": 4821 + }, + { + "epoch": 0.14158200716424924, + "grad_norm": 0.0, + "learning_rate": 1.9354151196129514e-05, + "loss": 1.4878, + "step": 4822 + }, + { + "epoch": 0.14161136884138822, + "grad_norm": 0.0, + "learning_rate": 1.935381494051822e-05, + "loss": 1.3955, + "step": 4823 + }, + { + "epoch": 0.1416407305185272, + "grad_norm": 0.0, + "learning_rate": 1.935347860031798e-05, + "loss": 1.3672, + "step": 4824 + }, + { + "epoch": 0.1416700921956662, + "grad_norm": 0.0, + "learning_rate": 1.9353142175531838e-05, + "loss": 1.4844, + "step": 4825 + }, + { + "epoch": 0.14169945387280522, + "grad_norm": 0.0, + "learning_rate": 1.9352805666162837e-05, + "loss": 1.3936, + "step": 4826 + }, + { + "epoch": 0.1417288155499442, + "grad_norm": 0.0, + "learning_rate": 1.9352469072214018e-05, + "loss": 1.3779, + "step": 4827 + }, + { + "epoch": 0.1417581772270832, + "grad_norm": 0.0, + "learning_rate": 1.9352132393688427e-05, + "loss": 1.3711, + "step": 4828 + }, + { + "epoch": 0.14178753890422222, + "grad_norm": 0.0, + "learning_rate": 1.9351795630589107e-05, + "loss": 1.4004, + "step": 4829 + }, + { + "epoch": 0.1418169005813612, + "grad_norm": 0.0, + "learning_rate": 1.93514587829191e-05, + "loss": 1.3994, + "step": 4830 + }, + { + "epoch": 0.1418462622585002, + "grad_norm": 0.0, + "learning_rate": 1.935112185068146e-05, + "loss": 1.4316, + "step": 4831 + }, + { + "epoch": 0.1418756239356392, + "grad_norm": 0.0, + "learning_rate": 1.935078483387923e-05, + "loss": 1.6768, + "step": 4832 + }, + { + "epoch": 0.1419049856127782, + "grad_norm": 0.0, + "learning_rate": 1.9350447732515457e-05, + "loss": 1.5029, + "step": 4833 + }, + { + "epoch": 0.1419343472899172, + "grad_norm": 0.0, + "learning_rate": 1.9350110546593194e-05, + "loss": 1.457, + "step": 4834 + }, + { + "epoch": 0.1419637089670562, + "grad_norm": 0.0, + "learning_rate": 1.9349773276115483e-05, + "loss": 1.5107, + "step": 4835 + }, + { + "epoch": 0.1419930706441952, + "grad_norm": 0.0, + "learning_rate": 1.934943592108538e-05, + "loss": 1.5586, + "step": 4836 + }, + { + "epoch": 0.14202243232133419, + "grad_norm": 0.0, + "learning_rate": 1.9349098481505933e-05, + "loss": 1.4697, + "step": 4837 + }, + { + "epoch": 0.1420517939984732, + "grad_norm": 0.0, + "learning_rate": 1.9348760957380192e-05, + "loss": 1.4199, + "step": 4838 + }, + { + "epoch": 0.1420811556756122, + "grad_norm": 0.0, + "learning_rate": 1.9348423348711215e-05, + "loss": 1.458, + "step": 4839 + }, + { + "epoch": 0.14211051735275118, + "grad_norm": 0.0, + "learning_rate": 1.9348085655502048e-05, + "loss": 1.415, + "step": 4840 + }, + { + "epoch": 0.1421398790298902, + "grad_norm": 0.0, + "learning_rate": 1.934774787775575e-05, + "loss": 1.5986, + "step": 4841 + }, + { + "epoch": 0.1421692407070292, + "grad_norm": 0.0, + "learning_rate": 1.934741001547537e-05, + "loss": 1.583, + "step": 4842 + }, + { + "epoch": 0.14219860238416818, + "grad_norm": 0.0, + "learning_rate": 1.9347072068663974e-05, + "loss": 1.4756, + "step": 4843 + }, + { + "epoch": 0.1422279640613072, + "grad_norm": 0.0, + "learning_rate": 1.934673403732461e-05, + "loss": 1.5293, + "step": 4844 + }, + { + "epoch": 0.14225732573844618, + "grad_norm": 0.0, + "learning_rate": 1.9346395921460334e-05, + "loss": 1.29, + "step": 4845 + }, + { + "epoch": 0.14228668741558517, + "grad_norm": 0.0, + "learning_rate": 1.9346057721074208e-05, + "loss": 1.4844, + "step": 4846 + }, + { + "epoch": 0.1423160490927242, + "grad_norm": 0.0, + "learning_rate": 1.9345719436169285e-05, + "loss": 1.4541, + "step": 4847 + }, + { + "epoch": 0.14234541076986318, + "grad_norm": 0.0, + "learning_rate": 1.9345381066748633e-05, + "loss": 1.4141, + "step": 4848 + }, + { + "epoch": 0.14237477244700217, + "grad_norm": 0.0, + "learning_rate": 1.93450426128153e-05, + "loss": 1.542, + "step": 4849 + }, + { + "epoch": 0.14240413412414116, + "grad_norm": 0.0, + "learning_rate": 1.9344704074372358e-05, + "loss": 1.3027, + "step": 4850 + }, + { + "epoch": 0.14243349580128017, + "grad_norm": 0.0, + "learning_rate": 1.9344365451422863e-05, + "loss": 1.4531, + "step": 4851 + }, + { + "epoch": 0.14246285747841916, + "grad_norm": 0.0, + "learning_rate": 1.9344026743969876e-05, + "loss": 1.4453, + "step": 4852 + }, + { + "epoch": 0.14249221915555815, + "grad_norm": 0.0, + "learning_rate": 1.9343687952016464e-05, + "loss": 1.5674, + "step": 4853 + }, + { + "epoch": 0.14252158083269717, + "grad_norm": 0.0, + "learning_rate": 1.9343349075565692e-05, + "loss": 1.5488, + "step": 4854 + }, + { + "epoch": 0.14255094250983616, + "grad_norm": 0.0, + "learning_rate": 1.934301011462062e-05, + "loss": 1.4863, + "step": 4855 + }, + { + "epoch": 0.14258030418697515, + "grad_norm": 0.0, + "learning_rate": 1.934267106918431e-05, + "loss": 1.4375, + "step": 4856 + }, + { + "epoch": 0.14260966586411417, + "grad_norm": 0.0, + "learning_rate": 1.9342331939259837e-05, + "loss": 1.5713, + "step": 4857 + }, + { + "epoch": 0.14263902754125315, + "grad_norm": 0.0, + "learning_rate": 1.934199272485026e-05, + "loss": 1.5439, + "step": 4858 + }, + { + "epoch": 0.14266838921839214, + "grad_norm": 0.0, + "learning_rate": 1.9341653425958656e-05, + "loss": 1.375, + "step": 4859 + }, + { + "epoch": 0.14269775089553116, + "grad_norm": 0.0, + "learning_rate": 1.9341314042588084e-05, + "loss": 1.4219, + "step": 4860 + }, + { + "epoch": 0.14272711257267015, + "grad_norm": 0.0, + "learning_rate": 1.9340974574741618e-05, + "loss": 1.5312, + "step": 4861 + }, + { + "epoch": 0.14275647424980914, + "grad_norm": 0.0, + "learning_rate": 1.9340635022422326e-05, + "loss": 1.3818, + "step": 4862 + }, + { + "epoch": 0.14278583592694816, + "grad_norm": 0.0, + "learning_rate": 1.934029538563328e-05, + "loss": 1.4092, + "step": 4863 + }, + { + "epoch": 0.14281519760408715, + "grad_norm": 0.0, + "learning_rate": 1.933995566437755e-05, + "loss": 1.4463, + "step": 4864 + }, + { + "epoch": 0.14284455928122614, + "grad_norm": 0.0, + "learning_rate": 1.9339615858658208e-05, + "loss": 1.4873, + "step": 4865 + }, + { + "epoch": 0.14287392095836515, + "grad_norm": 0.0, + "learning_rate": 1.9339275968478327e-05, + "loss": 1.4736, + "step": 4866 + }, + { + "epoch": 0.14290328263550414, + "grad_norm": 0.0, + "learning_rate": 1.9338935993840983e-05, + "loss": 1.377, + "step": 4867 + }, + { + "epoch": 0.14293264431264313, + "grad_norm": 0.0, + "learning_rate": 1.933859593474925e-05, + "loss": 1.2588, + "step": 4868 + }, + { + "epoch": 0.14296200598978215, + "grad_norm": 0.0, + "learning_rate": 1.9338255791206203e-05, + "loss": 1.457, + "step": 4869 + }, + { + "epoch": 0.14299136766692114, + "grad_norm": 0.0, + "learning_rate": 1.9337915563214918e-05, + "loss": 1.4824, + "step": 4870 + }, + { + "epoch": 0.14302072934406013, + "grad_norm": 0.0, + "learning_rate": 1.9337575250778467e-05, + "loss": 1.3887, + "step": 4871 + }, + { + "epoch": 0.14305009102119914, + "grad_norm": 0.0, + "learning_rate": 1.9337234853899933e-05, + "loss": 1.4082, + "step": 4872 + }, + { + "epoch": 0.14307945269833813, + "grad_norm": 0.0, + "learning_rate": 1.9336894372582395e-05, + "loss": 1.458, + "step": 4873 + }, + { + "epoch": 0.14310881437547712, + "grad_norm": 0.0, + "learning_rate": 1.9336553806828927e-05, + "loss": 1.415, + "step": 4874 + }, + { + "epoch": 0.1431381760526161, + "grad_norm": 0.0, + "learning_rate": 1.9336213156642615e-05, + "loss": 1.417, + "step": 4875 + }, + { + "epoch": 0.14316753772975513, + "grad_norm": 0.0, + "learning_rate": 1.9335872422026535e-05, + "loss": 1.3721, + "step": 4876 + }, + { + "epoch": 0.14319689940689412, + "grad_norm": 0.0, + "learning_rate": 1.933553160298377e-05, + "loss": 1.4414, + "step": 4877 + }, + { + "epoch": 0.1432262610840331, + "grad_norm": 0.0, + "learning_rate": 1.9335190699517403e-05, + "loss": 1.4492, + "step": 4878 + }, + { + "epoch": 0.14325562276117212, + "grad_norm": 0.0, + "learning_rate": 1.9334849711630513e-05, + "loss": 1.415, + "step": 4879 + }, + { + "epoch": 0.1432849844383111, + "grad_norm": 0.0, + "learning_rate": 1.933450863932619e-05, + "loss": 1.4355, + "step": 4880 + }, + { + "epoch": 0.1433143461154501, + "grad_norm": 0.0, + "learning_rate": 1.933416748260751e-05, + "loss": 1.499, + "step": 4881 + }, + { + "epoch": 0.14334370779258912, + "grad_norm": 0.0, + "learning_rate": 1.9333826241477566e-05, + "loss": 1.3057, + "step": 4882 + }, + { + "epoch": 0.1433730694697281, + "grad_norm": 0.0, + "learning_rate": 1.933348491593944e-05, + "loss": 1.6211, + "step": 4883 + }, + { + "epoch": 0.1434024311468671, + "grad_norm": 0.0, + "learning_rate": 1.933314350599622e-05, + "loss": 1.4248, + "step": 4884 + }, + { + "epoch": 0.14343179282400612, + "grad_norm": 0.0, + "learning_rate": 1.9332802011650994e-05, + "loss": 1.2397, + "step": 4885 + }, + { + "epoch": 0.1434611545011451, + "grad_norm": 0.0, + "learning_rate": 1.9332460432906846e-05, + "loss": 1.4258, + "step": 4886 + }, + { + "epoch": 0.1434905161782841, + "grad_norm": 0.0, + "learning_rate": 1.933211876976687e-05, + "loss": 1.3506, + "step": 4887 + }, + { + "epoch": 0.1435198778554231, + "grad_norm": 0.0, + "learning_rate": 1.9331777022234155e-05, + "loss": 1.4551, + "step": 4888 + }, + { + "epoch": 0.1435492395325621, + "grad_norm": 0.0, + "learning_rate": 1.9331435190311788e-05, + "loss": 1.4414, + "step": 4889 + }, + { + "epoch": 0.1435786012097011, + "grad_norm": 0.0, + "learning_rate": 1.9331093274002866e-05, + "loss": 1.459, + "step": 4890 + }, + { + "epoch": 0.1436079628868401, + "grad_norm": 0.0, + "learning_rate": 1.933075127331048e-05, + "loss": 1.4189, + "step": 4891 + }, + { + "epoch": 0.1436373245639791, + "grad_norm": 0.0, + "learning_rate": 1.9330409188237713e-05, + "loss": 1.4336, + "step": 4892 + }, + { + "epoch": 0.14366668624111809, + "grad_norm": 0.0, + "learning_rate": 1.933006701878767e-05, + "loss": 1.4941, + "step": 4893 + }, + { + "epoch": 0.1436960479182571, + "grad_norm": 0.0, + "learning_rate": 1.9329724764963442e-05, + "loss": 1.4395, + "step": 4894 + }, + { + "epoch": 0.1437254095953961, + "grad_norm": 0.0, + "learning_rate": 1.932938242676812e-05, + "loss": 1.4434, + "step": 4895 + }, + { + "epoch": 0.14375477127253508, + "grad_norm": 0.0, + "learning_rate": 1.9329040004204807e-05, + "loss": 1.459, + "step": 4896 + }, + { + "epoch": 0.1437841329496741, + "grad_norm": 0.0, + "learning_rate": 1.93286974972766e-05, + "loss": 1.4961, + "step": 4897 + }, + { + "epoch": 0.1438134946268131, + "grad_norm": 0.0, + "learning_rate": 1.9328354905986586e-05, + "loss": 1.5371, + "step": 4898 + }, + { + "epoch": 0.14384285630395208, + "grad_norm": 0.0, + "learning_rate": 1.9328012230337872e-05, + "loss": 1.3926, + "step": 4899 + }, + { + "epoch": 0.14387221798109107, + "grad_norm": 0.0, + "learning_rate": 1.9327669470333553e-05, + "loss": 1.4736, + "step": 4900 + }, + { + "epoch": 0.14390157965823008, + "grad_norm": 0.0, + "learning_rate": 1.932732662597673e-05, + "loss": 1.5049, + "step": 4901 + }, + { + "epoch": 0.14393094133536907, + "grad_norm": 0.0, + "learning_rate": 1.932698369727051e-05, + "loss": 1.4463, + "step": 4902 + }, + { + "epoch": 0.14396030301250806, + "grad_norm": 0.0, + "learning_rate": 1.9326640684217977e-05, + "loss": 1.5459, + "step": 4903 + }, + { + "epoch": 0.14398966468964708, + "grad_norm": 0.0, + "learning_rate": 1.932629758682225e-05, + "loss": 1.5771, + "step": 4904 + }, + { + "epoch": 0.14401902636678607, + "grad_norm": 0.0, + "learning_rate": 1.9325954405086424e-05, + "loss": 1.5674, + "step": 4905 + }, + { + "epoch": 0.14404838804392506, + "grad_norm": 0.0, + "learning_rate": 1.9325611139013604e-05, + "loss": 1.3394, + "step": 4906 + }, + { + "epoch": 0.14407774972106407, + "grad_norm": 0.0, + "learning_rate": 1.9325267788606897e-05, + "loss": 1.5303, + "step": 4907 + }, + { + "epoch": 0.14410711139820306, + "grad_norm": 0.0, + "learning_rate": 1.93249243538694e-05, + "loss": 1.4805, + "step": 4908 + }, + { + "epoch": 0.14413647307534205, + "grad_norm": 0.0, + "learning_rate": 1.9324580834804227e-05, + "loss": 1.5137, + "step": 4909 + }, + { + "epoch": 0.14416583475248107, + "grad_norm": 0.0, + "learning_rate": 1.932423723141448e-05, + "loss": 1.4092, + "step": 4910 + }, + { + "epoch": 0.14419519642962006, + "grad_norm": 0.0, + "learning_rate": 1.932389354370327e-05, + "loss": 1.499, + "step": 4911 + }, + { + "epoch": 0.14422455810675905, + "grad_norm": 0.0, + "learning_rate": 1.9323549771673703e-05, + "loss": 1.4404, + "step": 4912 + }, + { + "epoch": 0.14425391978389807, + "grad_norm": 0.0, + "learning_rate": 1.9323205915328884e-05, + "loss": 1.6123, + "step": 4913 + }, + { + "epoch": 0.14428328146103706, + "grad_norm": 0.0, + "learning_rate": 1.932286197467193e-05, + "loss": 1.5098, + "step": 4914 + }, + { + "epoch": 0.14431264313817604, + "grad_norm": 0.0, + "learning_rate": 1.9322517949705948e-05, + "loss": 1.4209, + "step": 4915 + }, + { + "epoch": 0.14434200481531506, + "grad_norm": 0.0, + "learning_rate": 1.9322173840434044e-05, + "loss": 1.5371, + "step": 4916 + }, + { + "epoch": 0.14437136649245405, + "grad_norm": 0.0, + "learning_rate": 1.9321829646859337e-05, + "loss": 1.4258, + "step": 4917 + }, + { + "epoch": 0.14440072816959304, + "grad_norm": 0.0, + "learning_rate": 1.9321485368984937e-05, + "loss": 1.6611, + "step": 4918 + }, + { + "epoch": 0.14443008984673206, + "grad_norm": 0.0, + "learning_rate": 1.932114100681396e-05, + "loss": 1.5137, + "step": 4919 + }, + { + "epoch": 0.14445945152387105, + "grad_norm": 0.0, + "learning_rate": 1.9320796560349515e-05, + "loss": 1.5732, + "step": 4920 + }, + { + "epoch": 0.14448881320101004, + "grad_norm": 0.0, + "learning_rate": 1.932045202959472e-05, + "loss": 1.3018, + "step": 4921 + }, + { + "epoch": 0.14451817487814905, + "grad_norm": 0.0, + "learning_rate": 1.932010741455269e-05, + "loss": 1.3447, + "step": 4922 + }, + { + "epoch": 0.14454753655528804, + "grad_norm": 0.0, + "learning_rate": 1.9319762715226544e-05, + "loss": 1.4033, + "step": 4923 + }, + { + "epoch": 0.14457689823242703, + "grad_norm": 0.0, + "learning_rate": 1.9319417931619395e-05, + "loss": 1.5518, + "step": 4924 + }, + { + "epoch": 0.14460625990956602, + "grad_norm": 0.0, + "learning_rate": 1.9319073063734363e-05, + "loss": 1.4883, + "step": 4925 + }, + { + "epoch": 0.14463562158670504, + "grad_norm": 0.0, + "learning_rate": 1.9318728111574566e-05, + "loss": 1.3975, + "step": 4926 + }, + { + "epoch": 0.14466498326384403, + "grad_norm": 0.0, + "learning_rate": 1.9318383075143123e-05, + "loss": 1.4834, + "step": 4927 + }, + { + "epoch": 0.14469434494098302, + "grad_norm": 0.0, + "learning_rate": 1.9318037954443157e-05, + "loss": 1.459, + "step": 4928 + }, + { + "epoch": 0.14472370661812203, + "grad_norm": 0.0, + "learning_rate": 1.931769274947779e-05, + "loss": 1.4404, + "step": 4929 + }, + { + "epoch": 0.14475306829526102, + "grad_norm": 0.0, + "learning_rate": 1.931734746025014e-05, + "loss": 1.4922, + "step": 4930 + }, + { + "epoch": 0.1447824299724, + "grad_norm": 0.0, + "learning_rate": 1.9317002086763327e-05, + "loss": 1.4277, + "step": 4931 + }, + { + "epoch": 0.14481179164953903, + "grad_norm": 0.0, + "learning_rate": 1.931665662902048e-05, + "loss": 1.4609, + "step": 4932 + }, + { + "epoch": 0.14484115332667802, + "grad_norm": 0.0, + "learning_rate": 1.931631108702472e-05, + "loss": 1.459, + "step": 4933 + }, + { + "epoch": 0.144870515003817, + "grad_norm": 0.0, + "learning_rate": 1.9315965460779174e-05, + "loss": 1.4707, + "step": 4934 + }, + { + "epoch": 0.14489987668095602, + "grad_norm": 0.0, + "learning_rate": 1.931561975028697e-05, + "loss": 1.4648, + "step": 4935 + }, + { + "epoch": 0.144929238358095, + "grad_norm": 0.0, + "learning_rate": 1.9315273955551225e-05, + "loss": 1.4521, + "step": 4936 + }, + { + "epoch": 0.144958600035234, + "grad_norm": 0.0, + "learning_rate": 1.9314928076575075e-05, + "loss": 1.4072, + "step": 4937 + }, + { + "epoch": 0.14498796171237302, + "grad_norm": 0.0, + "learning_rate": 1.9314582113361645e-05, + "loss": 1.3916, + "step": 4938 + }, + { + "epoch": 0.145017323389512, + "grad_norm": 0.0, + "learning_rate": 1.931423606591406e-05, + "loss": 1.4912, + "step": 4939 + }, + { + "epoch": 0.145046685066651, + "grad_norm": 0.0, + "learning_rate": 1.9313889934235458e-05, + "loss": 1.4014, + "step": 4940 + }, + { + "epoch": 0.14507604674379002, + "grad_norm": 0.0, + "learning_rate": 1.9313543718328966e-05, + "loss": 1.4453, + "step": 4941 + }, + { + "epoch": 0.145105408420929, + "grad_norm": 0.0, + "learning_rate": 1.9313197418197705e-05, + "loss": 1.4639, + "step": 4942 + }, + { + "epoch": 0.145134770098068, + "grad_norm": 0.0, + "learning_rate": 1.931285103384482e-05, + "loss": 1.5752, + "step": 4943 + }, + { + "epoch": 0.145164131775207, + "grad_norm": 0.0, + "learning_rate": 1.9312504565273437e-05, + "loss": 1.5625, + "step": 4944 + }, + { + "epoch": 0.145193493452346, + "grad_norm": 0.0, + "learning_rate": 1.9312158012486688e-05, + "loss": 1.501, + "step": 4945 + }, + { + "epoch": 0.145222855129485, + "grad_norm": 0.0, + "learning_rate": 1.9311811375487714e-05, + "loss": 1.4707, + "step": 4946 + }, + { + "epoch": 0.145252216806624, + "grad_norm": 0.0, + "learning_rate": 1.931146465427964e-05, + "loss": 1.3564, + "step": 4947 + }, + { + "epoch": 0.145281578483763, + "grad_norm": 0.0, + "learning_rate": 1.9311117848865607e-05, + "loss": 1.3066, + "step": 4948 + }, + { + "epoch": 0.14531094016090199, + "grad_norm": 0.0, + "learning_rate": 1.931077095924875e-05, + "loss": 1.4717, + "step": 4949 + }, + { + "epoch": 0.14534030183804098, + "grad_norm": 0.0, + "learning_rate": 1.931042398543221e-05, + "loss": 1.5508, + "step": 4950 + }, + { + "epoch": 0.14536966351518, + "grad_norm": 0.0, + "learning_rate": 1.9310076927419123e-05, + "loss": 1.3291, + "step": 4951 + }, + { + "epoch": 0.14539902519231898, + "grad_norm": 0.0, + "learning_rate": 1.9309729785212622e-05, + "loss": 1.416, + "step": 4952 + }, + { + "epoch": 0.14542838686945797, + "grad_norm": 0.0, + "learning_rate": 1.9309382558815848e-05, + "loss": 1.4746, + "step": 4953 + }, + { + "epoch": 0.145457748546597, + "grad_norm": 0.0, + "learning_rate": 1.9309035248231947e-05, + "loss": 1.501, + "step": 4954 + }, + { + "epoch": 0.14548711022373598, + "grad_norm": 0.0, + "learning_rate": 1.9308687853464057e-05, + "loss": 1.4375, + "step": 4955 + }, + { + "epoch": 0.14551647190087497, + "grad_norm": 0.0, + "learning_rate": 1.930834037451532e-05, + "loss": 1.3779, + "step": 4956 + }, + { + "epoch": 0.14554583357801398, + "grad_norm": 0.0, + "learning_rate": 1.9307992811388873e-05, + "loss": 1.4375, + "step": 4957 + }, + { + "epoch": 0.14557519525515297, + "grad_norm": 0.0, + "learning_rate": 1.9307645164087864e-05, + "loss": 1.3125, + "step": 4958 + }, + { + "epoch": 0.14560455693229196, + "grad_norm": 0.0, + "learning_rate": 1.9307297432615436e-05, + "loss": 1.5059, + "step": 4959 + }, + { + "epoch": 0.14563391860943098, + "grad_norm": 0.0, + "learning_rate": 1.9306949616974733e-05, + "loss": 1.457, + "step": 4960 + }, + { + "epoch": 0.14566328028656997, + "grad_norm": 0.0, + "learning_rate": 1.9306601717168907e-05, + "loss": 1.4912, + "step": 4961 + }, + { + "epoch": 0.14569264196370896, + "grad_norm": 0.0, + "learning_rate": 1.9306253733201094e-05, + "loss": 1.3633, + "step": 4962 + }, + { + "epoch": 0.14572200364084797, + "grad_norm": 0.0, + "learning_rate": 1.9305905665074442e-05, + "loss": 1.459, + "step": 4963 + }, + { + "epoch": 0.14575136531798696, + "grad_norm": 0.0, + "learning_rate": 1.9305557512792106e-05, + "loss": 1.5645, + "step": 4964 + }, + { + "epoch": 0.14578072699512595, + "grad_norm": 0.0, + "learning_rate": 1.930520927635723e-05, + "loss": 1.4243, + "step": 4965 + }, + { + "epoch": 0.14581008867226497, + "grad_norm": 0.0, + "learning_rate": 1.9304860955772963e-05, + "loss": 1.5586, + "step": 4966 + }, + { + "epoch": 0.14583945034940396, + "grad_norm": 0.0, + "learning_rate": 1.9304512551042455e-05, + "loss": 1.4561, + "step": 4967 + }, + { + "epoch": 0.14586881202654295, + "grad_norm": 0.0, + "learning_rate": 1.9304164062168856e-05, + "loss": 1.3525, + "step": 4968 + }, + { + "epoch": 0.14589817370368197, + "grad_norm": 0.0, + "learning_rate": 1.930381548915532e-05, + "loss": 1.4355, + "step": 4969 + }, + { + "epoch": 0.14592753538082096, + "grad_norm": 0.0, + "learning_rate": 1.9303466832004997e-05, + "loss": 1.3174, + "step": 4970 + }, + { + "epoch": 0.14595689705795994, + "grad_norm": 0.0, + "learning_rate": 1.930311809072104e-05, + "loss": 1.5771, + "step": 4971 + }, + { + "epoch": 0.14598625873509896, + "grad_norm": 0.0, + "learning_rate": 1.9302769265306605e-05, + "loss": 1.4648, + "step": 4972 + }, + { + "epoch": 0.14601562041223795, + "grad_norm": 0.0, + "learning_rate": 1.9302420355764844e-05, + "loss": 1.4971, + "step": 4973 + }, + { + "epoch": 0.14604498208937694, + "grad_norm": 0.0, + "learning_rate": 1.9302071362098914e-05, + "loss": 1.2676, + "step": 4974 + }, + { + "epoch": 0.14607434376651596, + "grad_norm": 0.0, + "learning_rate": 1.930172228431197e-05, + "loss": 1.4883, + "step": 4975 + }, + { + "epoch": 0.14610370544365495, + "grad_norm": 0.0, + "learning_rate": 1.930137312240717e-05, + "loss": 1.4775, + "step": 4976 + }, + { + "epoch": 0.14613306712079394, + "grad_norm": 0.0, + "learning_rate": 1.930102387638767e-05, + "loss": 1.2798, + "step": 4977 + }, + { + "epoch": 0.14616242879793293, + "grad_norm": 0.0, + "learning_rate": 1.930067454625663e-05, + "loss": 1.4668, + "step": 4978 + }, + { + "epoch": 0.14619179047507194, + "grad_norm": 0.0, + "learning_rate": 1.9300325132017207e-05, + "loss": 1.5439, + "step": 4979 + }, + { + "epoch": 0.14622115215221093, + "grad_norm": 0.0, + "learning_rate": 1.9299975633672564e-05, + "loss": 1.6143, + "step": 4980 + }, + { + "epoch": 0.14625051382934992, + "grad_norm": 0.0, + "learning_rate": 1.9299626051225858e-05, + "loss": 1.5479, + "step": 4981 + }, + { + "epoch": 0.14627987550648894, + "grad_norm": 0.0, + "learning_rate": 1.929927638468025e-05, + "loss": 1.2383, + "step": 4982 + }, + { + "epoch": 0.14630923718362793, + "grad_norm": 0.0, + "learning_rate": 1.9298926634038907e-05, + "loss": 1.5781, + "step": 4983 + }, + { + "epoch": 0.14633859886076692, + "grad_norm": 0.0, + "learning_rate": 1.9298576799304988e-05, + "loss": 1.3032, + "step": 4984 + }, + { + "epoch": 0.14636796053790593, + "grad_norm": 0.0, + "learning_rate": 1.9298226880481657e-05, + "loss": 1.4883, + "step": 4985 + }, + { + "epoch": 0.14639732221504492, + "grad_norm": 0.0, + "learning_rate": 1.9297876877572078e-05, + "loss": 1.4014, + "step": 4986 + }, + { + "epoch": 0.1464266838921839, + "grad_norm": 0.0, + "learning_rate": 1.9297526790579418e-05, + "loss": 1.5195, + "step": 4987 + }, + { + "epoch": 0.14645604556932293, + "grad_norm": 0.0, + "learning_rate": 1.9297176619506843e-05, + "loss": 1.4375, + "step": 4988 + }, + { + "epoch": 0.14648540724646192, + "grad_norm": 0.0, + "learning_rate": 1.929682636435752e-05, + "loss": 1.666, + "step": 4989 + }, + { + "epoch": 0.1465147689236009, + "grad_norm": 0.0, + "learning_rate": 1.9296476025134613e-05, + "loss": 1.4268, + "step": 4990 + }, + { + "epoch": 0.14654413060073992, + "grad_norm": 0.0, + "learning_rate": 1.9296125601841295e-05, + "loss": 1.3086, + "step": 4991 + }, + { + "epoch": 0.14657349227787891, + "grad_norm": 0.0, + "learning_rate": 1.929577509448073e-05, + "loss": 1.3506, + "step": 4992 + }, + { + "epoch": 0.1466028539550179, + "grad_norm": 0.0, + "learning_rate": 1.929542450305609e-05, + "loss": 1.5322, + "step": 4993 + }, + { + "epoch": 0.14663221563215692, + "grad_norm": 0.0, + "learning_rate": 1.9295073827570546e-05, + "loss": 1.3379, + "step": 4994 + }, + { + "epoch": 0.1466615773092959, + "grad_norm": 0.0, + "learning_rate": 1.929472306802727e-05, + "loss": 1.4844, + "step": 4995 + }, + { + "epoch": 0.1466909389864349, + "grad_norm": 0.0, + "learning_rate": 1.9294372224429432e-05, + "loss": 1.4385, + "step": 4996 + }, + { + "epoch": 0.14672030066357392, + "grad_norm": 0.0, + "learning_rate": 1.9294021296780205e-05, + "loss": 1.5635, + "step": 4997 + }, + { + "epoch": 0.1467496623407129, + "grad_norm": 0.0, + "learning_rate": 1.9293670285082765e-05, + "loss": 1.3906, + "step": 4998 + }, + { + "epoch": 0.1467790240178519, + "grad_norm": 0.0, + "learning_rate": 1.9293319189340286e-05, + "loss": 1.3389, + "step": 4999 + }, + { + "epoch": 0.1468083856949909, + "grad_norm": 0.0, + "learning_rate": 1.929296800955594e-05, + "loss": 1.4668, + "step": 5000 + }, + { + "epoch": 0.1468377473721299, + "grad_norm": 0.0, + "learning_rate": 1.9292616745732902e-05, + "loss": 1.5928, + "step": 5001 + }, + { + "epoch": 0.1468671090492689, + "grad_norm": 0.0, + "learning_rate": 1.9292265397874353e-05, + "loss": 1.5283, + "step": 5002 + }, + { + "epoch": 0.14689647072640788, + "grad_norm": 0.0, + "learning_rate": 1.9291913965983468e-05, + "loss": 1.5459, + "step": 5003 + }, + { + "epoch": 0.1469258324035469, + "grad_norm": 0.0, + "learning_rate": 1.9291562450063424e-05, + "loss": 1.4297, + "step": 5004 + }, + { + "epoch": 0.14695519408068589, + "grad_norm": 0.0, + "learning_rate": 1.9291210850117397e-05, + "loss": 1.6221, + "step": 5005 + }, + { + "epoch": 0.14698455575782488, + "grad_norm": 0.0, + "learning_rate": 1.929085916614858e-05, + "loss": 1.5527, + "step": 5006 + }, + { + "epoch": 0.1470139174349639, + "grad_norm": 0.0, + "learning_rate": 1.929050739816014e-05, + "loss": 1.4912, + "step": 5007 + }, + { + "epoch": 0.14704327911210288, + "grad_norm": 0.0, + "learning_rate": 1.929015554615526e-05, + "loss": 1.4082, + "step": 5008 + }, + { + "epoch": 0.14707264078924187, + "grad_norm": 0.0, + "learning_rate": 1.928980361013713e-05, + "loss": 1.3516, + "step": 5009 + }, + { + "epoch": 0.1471020024663809, + "grad_norm": 0.0, + "learning_rate": 1.928945159010892e-05, + "loss": 1.4385, + "step": 5010 + }, + { + "epoch": 0.14713136414351988, + "grad_norm": 0.0, + "learning_rate": 1.9289099486073823e-05, + "loss": 1.4814, + "step": 5011 + }, + { + "epoch": 0.14716072582065887, + "grad_norm": 0.0, + "learning_rate": 1.9288747298035023e-05, + "loss": 1.4111, + "step": 5012 + }, + { + "epoch": 0.14719008749779788, + "grad_norm": 0.0, + "learning_rate": 1.9288395025995698e-05, + "loss": 1.4316, + "step": 5013 + }, + { + "epoch": 0.14721944917493687, + "grad_norm": 0.0, + "learning_rate": 1.9288042669959043e-05, + "loss": 1.5537, + "step": 5014 + }, + { + "epoch": 0.14724881085207586, + "grad_norm": 0.0, + "learning_rate": 1.9287690229928233e-05, + "loss": 1.4053, + "step": 5015 + }, + { + "epoch": 0.14727817252921488, + "grad_norm": 0.0, + "learning_rate": 1.9287337705906466e-05, + "loss": 1.3867, + "step": 5016 + }, + { + "epoch": 0.14730753420635387, + "grad_norm": 0.0, + "learning_rate": 1.9286985097896923e-05, + "loss": 1.3799, + "step": 5017 + }, + { + "epoch": 0.14733689588349286, + "grad_norm": 0.0, + "learning_rate": 1.92866324059028e-05, + "loss": 1.541, + "step": 5018 + }, + { + "epoch": 0.14736625756063187, + "grad_norm": 0.0, + "learning_rate": 1.9286279629927277e-05, + "loss": 1.5303, + "step": 5019 + }, + { + "epoch": 0.14739561923777086, + "grad_norm": 0.0, + "learning_rate": 1.928592676997355e-05, + "loss": 1.4521, + "step": 5020 + }, + { + "epoch": 0.14742498091490985, + "grad_norm": 0.0, + "learning_rate": 1.9285573826044812e-05, + "loss": 1.3623, + "step": 5021 + }, + { + "epoch": 0.14745434259204887, + "grad_norm": 0.0, + "learning_rate": 1.928522079814425e-05, + "loss": 1.4316, + "step": 5022 + }, + { + "epoch": 0.14748370426918786, + "grad_norm": 0.0, + "learning_rate": 1.9284867686275058e-05, + "loss": 1.4785, + "step": 5023 + }, + { + "epoch": 0.14751306594632685, + "grad_norm": 0.0, + "learning_rate": 1.9284514490440428e-05, + "loss": 1.5615, + "step": 5024 + }, + { + "epoch": 0.14754242762346587, + "grad_norm": 0.0, + "learning_rate": 1.9284161210643556e-05, + "loss": 1.4902, + "step": 5025 + }, + { + "epoch": 0.14757178930060486, + "grad_norm": 0.0, + "learning_rate": 1.928380784688764e-05, + "loss": 1.4365, + "step": 5026 + }, + { + "epoch": 0.14760115097774384, + "grad_norm": 0.0, + "learning_rate": 1.928345439917587e-05, + "loss": 1.4336, + "step": 5027 + }, + { + "epoch": 0.14763051265488283, + "grad_norm": 0.0, + "learning_rate": 1.9283100867511444e-05, + "loss": 1.3994, + "step": 5028 + }, + { + "epoch": 0.14765987433202185, + "grad_norm": 0.0, + "learning_rate": 1.928274725189756e-05, + "loss": 1.5449, + "step": 5029 + }, + { + "epoch": 0.14768923600916084, + "grad_norm": 0.0, + "learning_rate": 1.9282393552337413e-05, + "loss": 1.5537, + "step": 5030 + }, + { + "epoch": 0.14771859768629983, + "grad_norm": 0.0, + "learning_rate": 1.9282039768834208e-05, + "loss": 1.5801, + "step": 5031 + }, + { + "epoch": 0.14774795936343885, + "grad_norm": 0.0, + "learning_rate": 1.9281685901391135e-05, + "loss": 1.3428, + "step": 5032 + }, + { + "epoch": 0.14777732104057784, + "grad_norm": 0.0, + "learning_rate": 1.9281331950011404e-05, + "loss": 1.5879, + "step": 5033 + }, + { + "epoch": 0.14780668271771683, + "grad_norm": 0.0, + "learning_rate": 1.928097791469821e-05, + "loss": 1.4951, + "step": 5034 + }, + { + "epoch": 0.14783604439485584, + "grad_norm": 0.0, + "learning_rate": 1.9280623795454752e-05, + "loss": 1.3994, + "step": 5035 + }, + { + "epoch": 0.14786540607199483, + "grad_norm": 0.0, + "learning_rate": 1.928026959228424e-05, + "loss": 1.4463, + "step": 5036 + }, + { + "epoch": 0.14789476774913382, + "grad_norm": 0.0, + "learning_rate": 1.9279915305189873e-05, + "loss": 1.3623, + "step": 5037 + }, + { + "epoch": 0.14792412942627284, + "grad_norm": 0.0, + "learning_rate": 1.9279560934174855e-05, + "loss": 1.5146, + "step": 5038 + }, + { + "epoch": 0.14795349110341183, + "grad_norm": 0.0, + "learning_rate": 1.927920647924239e-05, + "loss": 1.4971, + "step": 5039 + }, + { + "epoch": 0.14798285278055082, + "grad_norm": 0.0, + "learning_rate": 1.927885194039569e-05, + "loss": 1.6689, + "step": 5040 + }, + { + "epoch": 0.14801221445768983, + "grad_norm": 0.0, + "learning_rate": 1.927849731763795e-05, + "loss": 1.5654, + "step": 5041 + }, + { + "epoch": 0.14804157613482882, + "grad_norm": 0.0, + "learning_rate": 1.9278142610972382e-05, + "loss": 1.4639, + "step": 5042 + }, + { + "epoch": 0.1480709378119678, + "grad_norm": 0.0, + "learning_rate": 1.9277787820402196e-05, + "loss": 1.4854, + "step": 5043 + }, + { + "epoch": 0.14810029948910683, + "grad_norm": 0.0, + "learning_rate": 1.92774329459306e-05, + "loss": 1.5225, + "step": 5044 + }, + { + "epoch": 0.14812966116624582, + "grad_norm": 0.0, + "learning_rate": 1.9277077987560798e-05, + "loss": 1.3574, + "step": 5045 + }, + { + "epoch": 0.1481590228433848, + "grad_norm": 0.0, + "learning_rate": 1.9276722945296008e-05, + "loss": 1.5742, + "step": 5046 + }, + { + "epoch": 0.14818838452052382, + "grad_norm": 0.0, + "learning_rate": 1.927636781913943e-05, + "loss": 1.4961, + "step": 5047 + }, + { + "epoch": 0.14821774619766281, + "grad_norm": 0.0, + "learning_rate": 1.927601260909429e-05, + "loss": 1.501, + "step": 5048 + }, + { + "epoch": 0.1482471078748018, + "grad_norm": 0.0, + "learning_rate": 1.9275657315163787e-05, + "loss": 1.5254, + "step": 5049 + }, + { + "epoch": 0.14827646955194082, + "grad_norm": 0.0, + "learning_rate": 1.9275301937351143e-05, + "loss": 1.4463, + "step": 5050 + }, + { + "epoch": 0.1483058312290798, + "grad_norm": 0.0, + "learning_rate": 1.9274946475659568e-05, + "loss": 1.417, + "step": 5051 + }, + { + "epoch": 0.1483351929062188, + "grad_norm": 0.0, + "learning_rate": 1.9274590930092272e-05, + "loss": 1.5205, + "step": 5052 + }, + { + "epoch": 0.1483645545833578, + "grad_norm": 0.0, + "learning_rate": 1.927423530065248e-05, + "loss": 1.5342, + "step": 5053 + }, + { + "epoch": 0.1483939162604968, + "grad_norm": 0.0, + "learning_rate": 1.92738795873434e-05, + "loss": 1.2715, + "step": 5054 + }, + { + "epoch": 0.1484232779376358, + "grad_norm": 0.0, + "learning_rate": 1.9273523790168255e-05, + "loss": 1.5547, + "step": 5055 + }, + { + "epoch": 0.14845263961477478, + "grad_norm": 0.0, + "learning_rate": 1.9273167909130257e-05, + "loss": 1.3799, + "step": 5056 + }, + { + "epoch": 0.1484820012919138, + "grad_norm": 0.0, + "learning_rate": 1.9272811944232627e-05, + "loss": 1.5547, + "step": 5057 + }, + { + "epoch": 0.1485113629690528, + "grad_norm": 0.0, + "learning_rate": 1.9272455895478582e-05, + "loss": 1.4648, + "step": 5058 + }, + { + "epoch": 0.14854072464619178, + "grad_norm": 0.0, + "learning_rate": 1.927209976287135e-05, + "loss": 1.5264, + "step": 5059 + }, + { + "epoch": 0.1485700863233308, + "grad_norm": 0.0, + "learning_rate": 1.9271743546414135e-05, + "loss": 1.5518, + "step": 5060 + }, + { + "epoch": 0.14859944800046979, + "grad_norm": 0.0, + "learning_rate": 1.9271387246110177e-05, + "loss": 1.4531, + "step": 5061 + }, + { + "epoch": 0.14862880967760878, + "grad_norm": 0.0, + "learning_rate": 1.9271030861962687e-05, + "loss": 1.5908, + "step": 5062 + }, + { + "epoch": 0.1486581713547478, + "grad_norm": 0.0, + "learning_rate": 1.9270674393974892e-05, + "loss": 1.5244, + "step": 5063 + }, + { + "epoch": 0.14868753303188678, + "grad_norm": 0.0, + "learning_rate": 1.927031784215001e-05, + "loss": 1.4287, + "step": 5064 + }, + { + "epoch": 0.14871689470902577, + "grad_norm": 0.0, + "learning_rate": 1.9269961206491274e-05, + "loss": 1.5537, + "step": 5065 + }, + { + "epoch": 0.1487462563861648, + "grad_norm": 0.0, + "learning_rate": 1.92696044870019e-05, + "loss": 1.5586, + "step": 5066 + }, + { + "epoch": 0.14877561806330378, + "grad_norm": 0.0, + "learning_rate": 1.9269247683685127e-05, + "loss": 1.5576, + "step": 5067 + }, + { + "epoch": 0.14880497974044277, + "grad_norm": 0.0, + "learning_rate": 1.9268890796544167e-05, + "loss": 1.4004, + "step": 5068 + }, + { + "epoch": 0.14883434141758178, + "grad_norm": 0.0, + "learning_rate": 1.9268533825582255e-05, + "loss": 1.5029, + "step": 5069 + }, + { + "epoch": 0.14886370309472077, + "grad_norm": 0.0, + "learning_rate": 1.9268176770802615e-05, + "loss": 1.375, + "step": 5070 + }, + { + "epoch": 0.14889306477185976, + "grad_norm": 0.0, + "learning_rate": 1.9267819632208482e-05, + "loss": 1.4961, + "step": 5071 + }, + { + "epoch": 0.14892242644899878, + "grad_norm": 0.0, + "learning_rate": 1.9267462409803082e-05, + "loss": 1.4297, + "step": 5072 + }, + { + "epoch": 0.14895178812613777, + "grad_norm": 0.0, + "learning_rate": 1.9267105103589647e-05, + "loss": 1.5449, + "step": 5073 + }, + { + "epoch": 0.14898114980327676, + "grad_norm": 0.0, + "learning_rate": 1.926674771357141e-05, + "loss": 1.5811, + "step": 5074 + }, + { + "epoch": 0.14901051148041577, + "grad_norm": 0.0, + "learning_rate": 1.9266390239751593e-05, + "loss": 1.5557, + "step": 5075 + }, + { + "epoch": 0.14903987315755476, + "grad_norm": 0.0, + "learning_rate": 1.926603268213344e-05, + "loss": 1.4561, + "step": 5076 + }, + { + "epoch": 0.14906923483469375, + "grad_norm": 0.0, + "learning_rate": 1.926567504072018e-05, + "loss": 1.3135, + "step": 5077 + }, + { + "epoch": 0.14909859651183274, + "grad_norm": 0.0, + "learning_rate": 1.9265317315515046e-05, + "loss": 1.4414, + "step": 5078 + }, + { + "epoch": 0.14912795818897176, + "grad_norm": 0.0, + "learning_rate": 1.9264959506521278e-05, + "loss": 1.4609, + "step": 5079 + }, + { + "epoch": 0.14915731986611075, + "grad_norm": 0.0, + "learning_rate": 1.9264601613742108e-05, + "loss": 1.375, + "step": 5080 + }, + { + "epoch": 0.14918668154324974, + "grad_norm": 0.0, + "learning_rate": 1.9264243637180767e-05, + "loss": 1.4668, + "step": 5081 + }, + { + "epoch": 0.14921604322038876, + "grad_norm": 0.0, + "learning_rate": 1.9263885576840505e-05, + "loss": 1.5098, + "step": 5082 + }, + { + "epoch": 0.14924540489752774, + "grad_norm": 0.0, + "learning_rate": 1.926352743272455e-05, + "loss": 1.6104, + "step": 5083 + }, + { + "epoch": 0.14927476657466673, + "grad_norm": 0.0, + "learning_rate": 1.9263169204836142e-05, + "loss": 1.4707, + "step": 5084 + }, + { + "epoch": 0.14930412825180575, + "grad_norm": 0.0, + "learning_rate": 1.926281089317853e-05, + "loss": 1.583, + "step": 5085 + }, + { + "epoch": 0.14933348992894474, + "grad_norm": 0.0, + "learning_rate": 1.926245249775494e-05, + "loss": 1.5312, + "step": 5086 + }, + { + "epoch": 0.14936285160608373, + "grad_norm": 0.0, + "learning_rate": 1.9262094018568622e-05, + "loss": 1.5879, + "step": 5087 + }, + { + "epoch": 0.14939221328322275, + "grad_norm": 0.0, + "learning_rate": 1.9261735455622815e-05, + "loss": 1.3223, + "step": 5088 + }, + { + "epoch": 0.14942157496036174, + "grad_norm": 0.0, + "learning_rate": 1.9261376808920763e-05, + "loss": 1.4619, + "step": 5089 + }, + { + "epoch": 0.14945093663750073, + "grad_norm": 0.0, + "learning_rate": 1.926101807846571e-05, + "loss": 1.46, + "step": 5090 + }, + { + "epoch": 0.14948029831463974, + "grad_norm": 0.0, + "learning_rate": 1.9260659264260898e-05, + "loss": 1.502, + "step": 5091 + }, + { + "epoch": 0.14950965999177873, + "grad_norm": 0.0, + "learning_rate": 1.926030036630957e-05, + "loss": 1.4375, + "step": 5092 + }, + { + "epoch": 0.14953902166891772, + "grad_norm": 0.0, + "learning_rate": 1.925994138461498e-05, + "loss": 1.4941, + "step": 5093 + }, + { + "epoch": 0.14956838334605674, + "grad_norm": 0.0, + "learning_rate": 1.925958231918036e-05, + "loss": 1.377, + "step": 5094 + }, + { + "epoch": 0.14959774502319573, + "grad_norm": 0.0, + "learning_rate": 1.9259223170008972e-05, + "loss": 1.3994, + "step": 5095 + }, + { + "epoch": 0.14962710670033472, + "grad_norm": 0.0, + "learning_rate": 1.9258863937104054e-05, + "loss": 1.3887, + "step": 5096 + }, + { + "epoch": 0.14965646837747373, + "grad_norm": 0.0, + "learning_rate": 1.9258504620468862e-05, + "loss": 1.4863, + "step": 5097 + }, + { + "epoch": 0.14968583005461272, + "grad_norm": 0.0, + "learning_rate": 1.9258145220106642e-05, + "loss": 1.4072, + "step": 5098 + }, + { + "epoch": 0.1497151917317517, + "grad_norm": 0.0, + "learning_rate": 1.925778573602064e-05, + "loss": 1.4824, + "step": 5099 + }, + { + "epoch": 0.14974455340889073, + "grad_norm": 0.0, + "learning_rate": 1.925742616821411e-05, + "loss": 1.4365, + "step": 5100 + }, + { + "epoch": 0.14977391508602972, + "grad_norm": 0.0, + "learning_rate": 1.9257066516690307e-05, + "loss": 1.4268, + "step": 5101 + }, + { + "epoch": 0.1498032767631687, + "grad_norm": 0.0, + "learning_rate": 1.925670678145248e-05, + "loss": 1.4053, + "step": 5102 + }, + { + "epoch": 0.1498326384403077, + "grad_norm": 0.0, + "learning_rate": 1.9256346962503883e-05, + "loss": 1.6396, + "step": 5103 + }, + { + "epoch": 0.14986200011744671, + "grad_norm": 0.0, + "learning_rate": 1.925598705984777e-05, + "loss": 1.5371, + "step": 5104 + }, + { + "epoch": 0.1498913617945857, + "grad_norm": 0.0, + "learning_rate": 1.92556270734874e-05, + "loss": 1.4961, + "step": 5105 + }, + { + "epoch": 0.1499207234717247, + "grad_norm": 0.0, + "learning_rate": 1.9255267003426016e-05, + "loss": 1.4678, + "step": 5106 + }, + { + "epoch": 0.1499500851488637, + "grad_norm": 0.0, + "learning_rate": 1.9254906849666885e-05, + "loss": 1.4795, + "step": 5107 + }, + { + "epoch": 0.1499794468260027, + "grad_norm": 0.0, + "learning_rate": 1.9254546612213262e-05, + "loss": 1.4482, + "step": 5108 + }, + { + "epoch": 0.1500088085031417, + "grad_norm": 0.0, + "learning_rate": 1.9254186291068403e-05, + "loss": 1.4648, + "step": 5109 + }, + { + "epoch": 0.1500381701802807, + "grad_norm": 0.0, + "learning_rate": 1.925382588623557e-05, + "loss": 1.4355, + "step": 5110 + }, + { + "epoch": 0.1500675318574197, + "grad_norm": 0.0, + "learning_rate": 1.9253465397718017e-05, + "loss": 1.4053, + "step": 5111 + }, + { + "epoch": 0.15009689353455868, + "grad_norm": 0.0, + "learning_rate": 1.9253104825519006e-05, + "loss": 1.4658, + "step": 5112 + }, + { + "epoch": 0.1501262552116977, + "grad_norm": 0.0, + "learning_rate": 1.92527441696418e-05, + "loss": 1.4121, + "step": 5113 + }, + { + "epoch": 0.1501556168888367, + "grad_norm": 0.0, + "learning_rate": 1.925238343008966e-05, + "loss": 1.4717, + "step": 5114 + }, + { + "epoch": 0.15018497856597568, + "grad_norm": 0.0, + "learning_rate": 1.9252022606865844e-05, + "loss": 1.4434, + "step": 5115 + }, + { + "epoch": 0.1502143402431147, + "grad_norm": 0.0, + "learning_rate": 1.925166169997362e-05, + "loss": 1.4932, + "step": 5116 + }, + { + "epoch": 0.15024370192025369, + "grad_norm": 0.0, + "learning_rate": 1.9251300709416246e-05, + "loss": 1.46, + "step": 5117 + }, + { + "epoch": 0.15027306359739268, + "grad_norm": 0.0, + "learning_rate": 1.9250939635196996e-05, + "loss": 1.5156, + "step": 5118 + }, + { + "epoch": 0.1503024252745317, + "grad_norm": 0.0, + "learning_rate": 1.9250578477319127e-05, + "loss": 1.4268, + "step": 5119 + }, + { + "epoch": 0.15033178695167068, + "grad_norm": 0.0, + "learning_rate": 1.9250217235785906e-05, + "loss": 1.4668, + "step": 5120 + }, + { + "epoch": 0.15036114862880967, + "grad_norm": 0.0, + "learning_rate": 1.9249855910600605e-05, + "loss": 1.5, + "step": 5121 + }, + { + "epoch": 0.1503905103059487, + "grad_norm": 0.0, + "learning_rate": 1.9249494501766485e-05, + "loss": 1.4365, + "step": 5122 + }, + { + "epoch": 0.15041987198308768, + "grad_norm": 0.0, + "learning_rate": 1.924913300928682e-05, + "loss": 1.2852, + "step": 5123 + }, + { + "epoch": 0.15044923366022667, + "grad_norm": 0.0, + "learning_rate": 1.9248771433164875e-05, + "loss": 1.4531, + "step": 5124 + }, + { + "epoch": 0.15047859533736568, + "grad_norm": 0.0, + "learning_rate": 1.9248409773403922e-05, + "loss": 1.4512, + "step": 5125 + }, + { + "epoch": 0.15050795701450467, + "grad_norm": 0.0, + "learning_rate": 1.924804803000723e-05, + "loss": 1.4336, + "step": 5126 + }, + { + "epoch": 0.15053731869164366, + "grad_norm": 0.0, + "learning_rate": 1.9247686202978073e-05, + "loss": 1.458, + "step": 5127 + }, + { + "epoch": 0.15056668036878265, + "grad_norm": 0.0, + "learning_rate": 1.924732429231972e-05, + "loss": 1.543, + "step": 5128 + }, + { + "epoch": 0.15059604204592167, + "grad_norm": 0.0, + "learning_rate": 1.9246962298035444e-05, + "loss": 1.5244, + "step": 5129 + }, + { + "epoch": 0.15062540372306066, + "grad_norm": 0.0, + "learning_rate": 1.924660022012852e-05, + "loss": 1.4648, + "step": 5130 + }, + { + "epoch": 0.15065476540019965, + "grad_norm": 0.0, + "learning_rate": 1.9246238058602225e-05, + "loss": 1.3984, + "step": 5131 + }, + { + "epoch": 0.15068412707733866, + "grad_norm": 0.0, + "learning_rate": 1.924587581345983e-05, + "loss": 1.5439, + "step": 5132 + }, + { + "epoch": 0.15071348875447765, + "grad_norm": 0.0, + "learning_rate": 1.9245513484704612e-05, + "loss": 1.4365, + "step": 5133 + }, + { + "epoch": 0.15074285043161664, + "grad_norm": 0.0, + "learning_rate": 1.9245151072339845e-05, + "loss": 1.5664, + "step": 5134 + }, + { + "epoch": 0.15077221210875566, + "grad_norm": 0.0, + "learning_rate": 1.9244788576368813e-05, + "loss": 1.4658, + "step": 5135 + }, + { + "epoch": 0.15080157378589465, + "grad_norm": 0.0, + "learning_rate": 1.9244425996794786e-05, + "loss": 1.4199, + "step": 5136 + }, + { + "epoch": 0.15083093546303364, + "grad_norm": 0.0, + "learning_rate": 1.924406333362105e-05, + "loss": 1.4775, + "step": 5137 + }, + { + "epoch": 0.15086029714017266, + "grad_norm": 0.0, + "learning_rate": 1.924370058685088e-05, + "loss": 1.4326, + "step": 5138 + }, + { + "epoch": 0.15088965881731164, + "grad_norm": 0.0, + "learning_rate": 1.9243337756487565e-05, + "loss": 1.4443, + "step": 5139 + }, + { + "epoch": 0.15091902049445063, + "grad_norm": 0.0, + "learning_rate": 1.9242974842534373e-05, + "loss": 1.4023, + "step": 5140 + }, + { + "epoch": 0.15094838217158965, + "grad_norm": 0.0, + "learning_rate": 1.9242611844994594e-05, + "loss": 1.3809, + "step": 5141 + }, + { + "epoch": 0.15097774384872864, + "grad_norm": 0.0, + "learning_rate": 1.9242248763871508e-05, + "loss": 1.3809, + "step": 5142 + }, + { + "epoch": 0.15100710552586763, + "grad_norm": 0.0, + "learning_rate": 1.9241885599168398e-05, + "loss": 1.3828, + "step": 5143 + }, + { + "epoch": 0.15103646720300665, + "grad_norm": 0.0, + "learning_rate": 1.9241522350888554e-05, + "loss": 1.585, + "step": 5144 + }, + { + "epoch": 0.15106582888014564, + "grad_norm": 0.0, + "learning_rate": 1.9241159019035255e-05, + "loss": 1.6084, + "step": 5145 + }, + { + "epoch": 0.15109519055728463, + "grad_norm": 0.0, + "learning_rate": 1.9240795603611786e-05, + "loss": 1.3564, + "step": 5146 + }, + { + "epoch": 0.15112455223442364, + "grad_norm": 0.0, + "learning_rate": 1.9240432104621438e-05, + "loss": 1.665, + "step": 5147 + }, + { + "epoch": 0.15115391391156263, + "grad_norm": 0.0, + "learning_rate": 1.9240068522067495e-05, + "loss": 1.6484, + "step": 5148 + }, + { + "epoch": 0.15118327558870162, + "grad_norm": 0.0, + "learning_rate": 1.9239704855953247e-05, + "loss": 1.5947, + "step": 5149 + }, + { + "epoch": 0.15121263726584064, + "grad_norm": 0.0, + "learning_rate": 1.923934110628198e-05, + "loss": 1.4512, + "step": 5150 + }, + { + "epoch": 0.15124199894297963, + "grad_norm": 0.0, + "learning_rate": 1.923897727305699e-05, + "loss": 1.4902, + "step": 5151 + }, + { + "epoch": 0.15127136062011862, + "grad_norm": 0.0, + "learning_rate": 1.9238613356281554e-05, + "loss": 1.4395, + "step": 5152 + }, + { + "epoch": 0.1513007222972576, + "grad_norm": 0.0, + "learning_rate": 1.9238249355958975e-05, + "loss": 1.542, + "step": 5153 + }, + { + "epoch": 0.15133008397439662, + "grad_norm": 0.0, + "learning_rate": 1.9237885272092543e-05, + "loss": 1.5977, + "step": 5154 + }, + { + "epoch": 0.1513594456515356, + "grad_norm": 0.0, + "learning_rate": 1.9237521104685548e-05, + "loss": 1.5645, + "step": 5155 + }, + { + "epoch": 0.1513888073286746, + "grad_norm": 0.0, + "learning_rate": 1.9237156853741285e-05, + "loss": 1.4219, + "step": 5156 + }, + { + "epoch": 0.15141816900581362, + "grad_norm": 0.0, + "learning_rate": 1.9236792519263042e-05, + "loss": 1.6133, + "step": 5157 + }, + { + "epoch": 0.1514475306829526, + "grad_norm": 0.0, + "learning_rate": 1.9236428101254123e-05, + "loss": 1.4355, + "step": 5158 + }, + { + "epoch": 0.1514768923600916, + "grad_norm": 0.0, + "learning_rate": 1.9236063599717818e-05, + "loss": 1.5039, + "step": 5159 + }, + { + "epoch": 0.15150625403723061, + "grad_norm": 0.0, + "learning_rate": 1.9235699014657424e-05, + "loss": 1.5098, + "step": 5160 + }, + { + "epoch": 0.1515356157143696, + "grad_norm": 0.0, + "learning_rate": 1.923533434607624e-05, + "loss": 1.4268, + "step": 5161 + }, + { + "epoch": 0.1515649773915086, + "grad_norm": 0.0, + "learning_rate": 1.923496959397756e-05, + "loss": 1.3975, + "step": 5162 + }, + { + "epoch": 0.1515943390686476, + "grad_norm": 0.0, + "learning_rate": 1.923460475836469e-05, + "loss": 1.6313, + "step": 5163 + }, + { + "epoch": 0.1516237007457866, + "grad_norm": 0.0, + "learning_rate": 1.9234239839240918e-05, + "loss": 1.3213, + "step": 5164 + }, + { + "epoch": 0.1516530624229256, + "grad_norm": 0.0, + "learning_rate": 1.9233874836609554e-05, + "loss": 1.3662, + "step": 5165 + }, + { + "epoch": 0.1516824241000646, + "grad_norm": 0.0, + "learning_rate": 1.9233509750473894e-05, + "loss": 1.502, + "step": 5166 + }, + { + "epoch": 0.1517117857772036, + "grad_norm": 0.0, + "learning_rate": 1.923314458083724e-05, + "loss": 1.5273, + "step": 5167 + }, + { + "epoch": 0.15174114745434258, + "grad_norm": 0.0, + "learning_rate": 1.9232779327702894e-05, + "loss": 1.293, + "step": 5168 + }, + { + "epoch": 0.1517705091314816, + "grad_norm": 0.0, + "learning_rate": 1.923241399107416e-05, + "loss": 1.5, + "step": 5169 + }, + { + "epoch": 0.1517998708086206, + "grad_norm": 0.0, + "learning_rate": 1.9232048570954345e-05, + "loss": 1.5791, + "step": 5170 + }, + { + "epoch": 0.15182923248575958, + "grad_norm": 0.0, + "learning_rate": 1.923168306734675e-05, + "loss": 1.5029, + "step": 5171 + }, + { + "epoch": 0.1518585941628986, + "grad_norm": 0.0, + "learning_rate": 1.9231317480254677e-05, + "loss": 1.3984, + "step": 5172 + }, + { + "epoch": 0.1518879558400376, + "grad_norm": 0.0, + "learning_rate": 1.923095180968144e-05, + "loss": 1.4121, + "step": 5173 + }, + { + "epoch": 0.15191731751717658, + "grad_norm": 0.0, + "learning_rate": 1.923058605563034e-05, + "loss": 1.6143, + "step": 5174 + }, + { + "epoch": 0.1519466791943156, + "grad_norm": 0.0, + "learning_rate": 1.9230220218104684e-05, + "loss": 1.4619, + "step": 5175 + }, + { + "epoch": 0.15197604087145458, + "grad_norm": 0.0, + "learning_rate": 1.9229854297107785e-05, + "loss": 1.5166, + "step": 5176 + }, + { + "epoch": 0.15200540254859357, + "grad_norm": 0.0, + "learning_rate": 1.922948829264295e-05, + "loss": 1.5029, + "step": 5177 + }, + { + "epoch": 0.15203476422573256, + "grad_norm": 0.0, + "learning_rate": 1.922912220471349e-05, + "loss": 1.5498, + "step": 5178 + }, + { + "epoch": 0.15206412590287158, + "grad_norm": 0.0, + "learning_rate": 1.922875603332271e-05, + "loss": 1.3818, + "step": 5179 + }, + { + "epoch": 0.15209348758001057, + "grad_norm": 0.0, + "learning_rate": 1.922838977847393e-05, + "loss": 1.3848, + "step": 5180 + }, + { + "epoch": 0.15212284925714956, + "grad_norm": 0.0, + "learning_rate": 1.9228023440170455e-05, + "loss": 1.416, + "step": 5181 + }, + { + "epoch": 0.15215221093428857, + "grad_norm": 0.0, + "learning_rate": 1.9227657018415602e-05, + "loss": 1.3594, + "step": 5182 + }, + { + "epoch": 0.15218157261142756, + "grad_norm": 0.0, + "learning_rate": 1.9227290513212683e-05, + "loss": 1.3916, + "step": 5183 + }, + { + "epoch": 0.15221093428856655, + "grad_norm": 0.0, + "learning_rate": 1.9226923924565014e-05, + "loss": 1.5146, + "step": 5184 + }, + { + "epoch": 0.15224029596570557, + "grad_norm": 0.0, + "learning_rate": 1.922655725247591e-05, + "loss": 1.5117, + "step": 5185 + }, + { + "epoch": 0.15226965764284456, + "grad_norm": 0.0, + "learning_rate": 1.922619049694868e-05, + "loss": 1.4219, + "step": 5186 + }, + { + "epoch": 0.15229901931998355, + "grad_norm": 0.0, + "learning_rate": 1.922582365798665e-05, + "loss": 1.46, + "step": 5187 + }, + { + "epoch": 0.15232838099712256, + "grad_norm": 0.0, + "learning_rate": 1.9225456735593133e-05, + "loss": 1.4883, + "step": 5188 + }, + { + "epoch": 0.15235774267426155, + "grad_norm": 0.0, + "learning_rate": 1.9225089729771447e-05, + "loss": 1.3867, + "step": 5189 + }, + { + "epoch": 0.15238710435140054, + "grad_norm": 0.0, + "learning_rate": 1.9224722640524915e-05, + "loss": 1.4805, + "step": 5190 + }, + { + "epoch": 0.15241646602853956, + "grad_norm": 0.0, + "learning_rate": 1.9224355467856855e-05, + "loss": 1.3945, + "step": 5191 + }, + { + "epoch": 0.15244582770567855, + "grad_norm": 0.0, + "learning_rate": 1.9223988211770585e-05, + "loss": 1.4531, + "step": 5192 + }, + { + "epoch": 0.15247518938281754, + "grad_norm": 0.0, + "learning_rate": 1.9223620872269425e-05, + "loss": 1.374, + "step": 5193 + }, + { + "epoch": 0.15250455105995656, + "grad_norm": 0.0, + "learning_rate": 1.92232534493567e-05, + "loss": 1.5693, + "step": 5194 + }, + { + "epoch": 0.15253391273709554, + "grad_norm": 0.0, + "learning_rate": 1.922288594303573e-05, + "loss": 1.4736, + "step": 5195 + }, + { + "epoch": 0.15256327441423453, + "grad_norm": 0.0, + "learning_rate": 1.9222518353309843e-05, + "loss": 1.4756, + "step": 5196 + }, + { + "epoch": 0.15259263609137355, + "grad_norm": 0.0, + "learning_rate": 1.922215068018236e-05, + "loss": 1.3799, + "step": 5197 + }, + { + "epoch": 0.15262199776851254, + "grad_norm": 0.0, + "learning_rate": 1.922178292365661e-05, + "loss": 1.5439, + "step": 5198 + }, + { + "epoch": 0.15265135944565153, + "grad_norm": 0.0, + "learning_rate": 1.9221415083735908e-05, + "loss": 1.5166, + "step": 5199 + }, + { + "epoch": 0.15268072112279055, + "grad_norm": 0.0, + "learning_rate": 1.9221047160423595e-05, + "loss": 1.4277, + "step": 5200 + }, + { + "epoch": 0.15271008279992954, + "grad_norm": 0.0, + "learning_rate": 1.9220679153722988e-05, + "loss": 1.4072, + "step": 5201 + }, + { + "epoch": 0.15273944447706853, + "grad_norm": 0.0, + "learning_rate": 1.9220311063637416e-05, + "loss": 1.3857, + "step": 5202 + }, + { + "epoch": 0.15276880615420751, + "grad_norm": 0.0, + "learning_rate": 1.921994289017021e-05, + "loss": 1.4893, + "step": 5203 + }, + { + "epoch": 0.15279816783134653, + "grad_norm": 0.0, + "learning_rate": 1.9219574633324702e-05, + "loss": 1.4834, + "step": 5204 + }, + { + "epoch": 0.15282752950848552, + "grad_norm": 0.0, + "learning_rate": 1.921920629310422e-05, + "loss": 1.4521, + "step": 5205 + }, + { + "epoch": 0.1528568911856245, + "grad_norm": 0.0, + "learning_rate": 1.921883786951209e-05, + "loss": 1.4717, + "step": 5206 + }, + { + "epoch": 0.15288625286276353, + "grad_norm": 0.0, + "learning_rate": 1.921846936255165e-05, + "loss": 1.4834, + "step": 5207 + }, + { + "epoch": 0.15291561453990252, + "grad_norm": 0.0, + "learning_rate": 1.9218100772226235e-05, + "loss": 1.4707, + "step": 5208 + }, + { + "epoch": 0.1529449762170415, + "grad_norm": 0.0, + "learning_rate": 1.921773209853917e-05, + "loss": 1.4912, + "step": 5209 + }, + { + "epoch": 0.15297433789418052, + "grad_norm": 0.0, + "learning_rate": 1.921736334149379e-05, + "loss": 1.5615, + "step": 5210 + }, + { + "epoch": 0.1530036995713195, + "grad_norm": 0.0, + "learning_rate": 1.9216994501093436e-05, + "loss": 1.4541, + "step": 5211 + }, + { + "epoch": 0.1530330612484585, + "grad_norm": 0.0, + "learning_rate": 1.9216625577341438e-05, + "loss": 1.377, + "step": 5212 + }, + { + "epoch": 0.15306242292559752, + "grad_norm": 0.0, + "learning_rate": 1.9216256570241138e-05, + "loss": 1.3496, + "step": 5213 + }, + { + "epoch": 0.1530917846027365, + "grad_norm": 0.0, + "learning_rate": 1.9215887479795868e-05, + "loss": 1.5, + "step": 5214 + }, + { + "epoch": 0.1531211462798755, + "grad_norm": 0.0, + "learning_rate": 1.921551830600897e-05, + "loss": 1.4521, + "step": 5215 + }, + { + "epoch": 0.15315050795701451, + "grad_norm": 0.0, + "learning_rate": 1.9215149048883775e-05, + "loss": 1.4072, + "step": 5216 + }, + { + "epoch": 0.1531798696341535, + "grad_norm": 0.0, + "learning_rate": 1.921477970842363e-05, + "loss": 1.3945, + "step": 5217 + }, + { + "epoch": 0.1532092313112925, + "grad_norm": 0.0, + "learning_rate": 1.9214410284631874e-05, + "loss": 1.4941, + "step": 5218 + }, + { + "epoch": 0.1532385929884315, + "grad_norm": 0.0, + "learning_rate": 1.9214040777511843e-05, + "loss": 1.5508, + "step": 5219 + }, + { + "epoch": 0.1532679546655705, + "grad_norm": 0.0, + "learning_rate": 1.921367118706688e-05, + "loss": 1.418, + "step": 5220 + }, + { + "epoch": 0.1532973163427095, + "grad_norm": 0.0, + "learning_rate": 1.921330151330033e-05, + "loss": 1.4502, + "step": 5221 + }, + { + "epoch": 0.1533266780198485, + "grad_norm": 0.0, + "learning_rate": 1.9212931756215537e-05, + "loss": 1.3555, + "step": 5222 + }, + { + "epoch": 0.1533560396969875, + "grad_norm": 0.0, + "learning_rate": 1.921256191581584e-05, + "loss": 1.4727, + "step": 5223 + }, + { + "epoch": 0.15338540137412648, + "grad_norm": 0.0, + "learning_rate": 1.9212191992104586e-05, + "loss": 1.4287, + "step": 5224 + }, + { + "epoch": 0.1534147630512655, + "grad_norm": 0.0, + "learning_rate": 1.9211821985085123e-05, + "loss": 1.3467, + "step": 5225 + }, + { + "epoch": 0.1534441247284045, + "grad_norm": 0.0, + "learning_rate": 1.9211451894760794e-05, + "loss": 1.4385, + "step": 5226 + }, + { + "epoch": 0.15347348640554348, + "grad_norm": 0.0, + "learning_rate": 1.9211081721134945e-05, + "loss": 1.3799, + "step": 5227 + }, + { + "epoch": 0.15350284808268247, + "grad_norm": 0.0, + "learning_rate": 1.9210711464210927e-05, + "loss": 1.4072, + "step": 5228 + }, + { + "epoch": 0.1535322097598215, + "grad_norm": 0.0, + "learning_rate": 1.9210341123992088e-05, + "loss": 1.3115, + "step": 5229 + }, + { + "epoch": 0.15356157143696048, + "grad_norm": 0.0, + "learning_rate": 1.9209970700481772e-05, + "loss": 1.4268, + "step": 5230 + }, + { + "epoch": 0.15359093311409946, + "grad_norm": 0.0, + "learning_rate": 1.9209600193683333e-05, + "loss": 1.46, + "step": 5231 + }, + { + "epoch": 0.15362029479123848, + "grad_norm": 0.0, + "learning_rate": 1.9209229603600122e-05, + "loss": 1.4004, + "step": 5232 + }, + { + "epoch": 0.15364965646837747, + "grad_norm": 0.0, + "learning_rate": 1.9208858930235486e-05, + "loss": 1.374, + "step": 5233 + }, + { + "epoch": 0.15367901814551646, + "grad_norm": 0.0, + "learning_rate": 1.920848817359278e-05, + "loss": 1.4795, + "step": 5234 + }, + { + "epoch": 0.15370837982265548, + "grad_norm": 0.0, + "learning_rate": 1.920811733367536e-05, + "loss": 1.4658, + "step": 5235 + }, + { + "epoch": 0.15373774149979447, + "grad_norm": 0.0, + "learning_rate": 1.9207746410486578e-05, + "loss": 1.457, + "step": 5236 + }, + { + "epoch": 0.15376710317693346, + "grad_norm": 0.0, + "learning_rate": 1.9207375404029783e-05, + "loss": 1.4814, + "step": 5237 + }, + { + "epoch": 0.15379646485407247, + "grad_norm": 0.0, + "learning_rate": 1.9207004314308338e-05, + "loss": 1.4727, + "step": 5238 + }, + { + "epoch": 0.15382582653121146, + "grad_norm": 0.0, + "learning_rate": 1.9206633141325594e-05, + "loss": 1.2979, + "step": 5239 + }, + { + "epoch": 0.15385518820835045, + "grad_norm": 0.0, + "learning_rate": 1.9206261885084906e-05, + "loss": 1.5684, + "step": 5240 + }, + { + "epoch": 0.15388454988548947, + "grad_norm": 0.0, + "learning_rate": 1.9205890545589637e-05, + "loss": 1.3574, + "step": 5241 + }, + { + "epoch": 0.15391391156262846, + "grad_norm": 0.0, + "learning_rate": 1.920551912284314e-05, + "loss": 1.543, + "step": 5242 + }, + { + "epoch": 0.15394327323976745, + "grad_norm": 0.0, + "learning_rate": 1.9205147616848778e-05, + "loss": 1.4541, + "step": 5243 + }, + { + "epoch": 0.15397263491690646, + "grad_norm": 0.0, + "learning_rate": 1.920477602760991e-05, + "loss": 1.4824, + "step": 5244 + }, + { + "epoch": 0.15400199659404545, + "grad_norm": 0.0, + "learning_rate": 1.920440435512989e-05, + "loss": 1.4678, + "step": 5245 + }, + { + "epoch": 0.15403135827118444, + "grad_norm": 0.0, + "learning_rate": 1.920403259941209e-05, + "loss": 1.5244, + "step": 5246 + }, + { + "epoch": 0.15406071994832346, + "grad_norm": 0.0, + "learning_rate": 1.9203660760459862e-05, + "loss": 1.3984, + "step": 5247 + }, + { + "epoch": 0.15409008162546245, + "grad_norm": 0.0, + "learning_rate": 1.9203288838276575e-05, + "loss": 1.4941, + "step": 5248 + }, + { + "epoch": 0.15411944330260144, + "grad_norm": 0.0, + "learning_rate": 1.920291683286559e-05, + "loss": 1.5273, + "step": 5249 + }, + { + "epoch": 0.15414880497974046, + "grad_norm": 0.0, + "learning_rate": 1.9202544744230268e-05, + "loss": 1.4531, + "step": 5250 + }, + { + "epoch": 0.15417816665687945, + "grad_norm": 0.0, + "learning_rate": 1.9202172572373977e-05, + "loss": 1.3457, + "step": 5251 + }, + { + "epoch": 0.15420752833401843, + "grad_norm": 0.0, + "learning_rate": 1.9201800317300087e-05, + "loss": 1.4434, + "step": 5252 + }, + { + "epoch": 0.15423689001115742, + "grad_norm": 0.0, + "learning_rate": 1.920142797901196e-05, + "loss": 1.4473, + "step": 5253 + }, + { + "epoch": 0.15426625168829644, + "grad_norm": 0.0, + "learning_rate": 1.9201055557512958e-05, + "loss": 1.5488, + "step": 5254 + }, + { + "epoch": 0.15429561336543543, + "grad_norm": 0.0, + "learning_rate": 1.9200683052806457e-05, + "loss": 1.4512, + "step": 5255 + }, + { + "epoch": 0.15432497504257442, + "grad_norm": 0.0, + "learning_rate": 1.9200310464895823e-05, + "loss": 1.5039, + "step": 5256 + }, + { + "epoch": 0.15435433671971344, + "grad_norm": 0.0, + "learning_rate": 1.9199937793784424e-05, + "loss": 1.502, + "step": 5257 + }, + { + "epoch": 0.15438369839685243, + "grad_norm": 0.0, + "learning_rate": 1.9199565039475634e-05, + "loss": 1.458, + "step": 5258 + }, + { + "epoch": 0.15441306007399141, + "grad_norm": 0.0, + "learning_rate": 1.919919220197282e-05, + "loss": 1.4463, + "step": 5259 + }, + { + "epoch": 0.15444242175113043, + "grad_norm": 0.0, + "learning_rate": 1.9198819281279356e-05, + "loss": 1.4111, + "step": 5260 + }, + { + "epoch": 0.15447178342826942, + "grad_norm": 0.0, + "learning_rate": 1.919844627739861e-05, + "loss": 1.5049, + "step": 5261 + }, + { + "epoch": 0.1545011451054084, + "grad_norm": 0.0, + "learning_rate": 1.9198073190333963e-05, + "loss": 1.3828, + "step": 5262 + }, + { + "epoch": 0.15453050678254743, + "grad_norm": 0.0, + "learning_rate": 1.919770002008878e-05, + "loss": 1.5586, + "step": 5263 + }, + { + "epoch": 0.15455986845968642, + "grad_norm": 0.0, + "learning_rate": 1.9197326766666445e-05, + "loss": 1.5332, + "step": 5264 + }, + { + "epoch": 0.1545892301368254, + "grad_norm": 0.0, + "learning_rate": 1.9196953430070327e-05, + "loss": 1.6201, + "step": 5265 + }, + { + "epoch": 0.15461859181396442, + "grad_norm": 0.0, + "learning_rate": 1.91965800103038e-05, + "loss": 1.6494, + "step": 5266 + }, + { + "epoch": 0.1546479534911034, + "grad_norm": 0.0, + "learning_rate": 1.9196206507370246e-05, + "loss": 1.3994, + "step": 5267 + }, + { + "epoch": 0.1546773151682424, + "grad_norm": 0.0, + "learning_rate": 1.9195832921273045e-05, + "loss": 1.5586, + "step": 5268 + }, + { + "epoch": 0.15470667684538142, + "grad_norm": 0.0, + "learning_rate": 1.9195459252015572e-05, + "loss": 1.3643, + "step": 5269 + }, + { + "epoch": 0.1547360385225204, + "grad_norm": 0.0, + "learning_rate": 1.9195085499601202e-05, + "loss": 1.5596, + "step": 5270 + }, + { + "epoch": 0.1547654001996594, + "grad_norm": 0.0, + "learning_rate": 1.919471166403332e-05, + "loss": 1.4639, + "step": 5271 + }, + { + "epoch": 0.15479476187679841, + "grad_norm": 0.0, + "learning_rate": 1.9194337745315307e-05, + "loss": 1.5107, + "step": 5272 + }, + { + "epoch": 0.1548241235539374, + "grad_norm": 0.0, + "learning_rate": 1.919396374345054e-05, + "loss": 1.3984, + "step": 5273 + }, + { + "epoch": 0.1548534852310764, + "grad_norm": 0.0, + "learning_rate": 1.919358965844241e-05, + "loss": 1.4756, + "step": 5274 + }, + { + "epoch": 0.1548828469082154, + "grad_norm": 0.0, + "learning_rate": 1.9193215490294287e-05, + "loss": 1.458, + "step": 5275 + }, + { + "epoch": 0.1549122085853544, + "grad_norm": 0.0, + "learning_rate": 1.9192841239009566e-05, + "loss": 1.4238, + "step": 5276 + }, + { + "epoch": 0.1549415702624934, + "grad_norm": 0.0, + "learning_rate": 1.9192466904591628e-05, + "loss": 1.2734, + "step": 5277 + }, + { + "epoch": 0.15497093193963238, + "grad_norm": 0.0, + "learning_rate": 1.9192092487043857e-05, + "loss": 1.4385, + "step": 5278 + }, + { + "epoch": 0.1550002936167714, + "grad_norm": 0.0, + "learning_rate": 1.9191717986369638e-05, + "loss": 1.3843, + "step": 5279 + }, + { + "epoch": 0.15502965529391038, + "grad_norm": 0.0, + "learning_rate": 1.919134340257236e-05, + "loss": 1.4893, + "step": 5280 + }, + { + "epoch": 0.15505901697104937, + "grad_norm": 0.0, + "learning_rate": 1.919096873565541e-05, + "loss": 1.5088, + "step": 5281 + }, + { + "epoch": 0.1550883786481884, + "grad_norm": 0.0, + "learning_rate": 1.919059398562218e-05, + "loss": 1.3594, + "step": 5282 + }, + { + "epoch": 0.15511774032532738, + "grad_norm": 0.0, + "learning_rate": 1.9190219152476047e-05, + "loss": 1.2812, + "step": 5283 + }, + { + "epoch": 0.15514710200246637, + "grad_norm": 0.0, + "learning_rate": 1.918984423622041e-05, + "loss": 1.4355, + "step": 5284 + }, + { + "epoch": 0.1551764636796054, + "grad_norm": 0.0, + "learning_rate": 1.918946923685866e-05, + "loss": 1.4756, + "step": 5285 + }, + { + "epoch": 0.15520582535674438, + "grad_norm": 0.0, + "learning_rate": 1.918909415439419e-05, + "loss": 1.3516, + "step": 5286 + }, + { + "epoch": 0.15523518703388337, + "grad_norm": 0.0, + "learning_rate": 1.9188718988830387e-05, + "loss": 1.4395, + "step": 5287 + }, + { + "epoch": 0.15526454871102238, + "grad_norm": 0.0, + "learning_rate": 1.9188343740170642e-05, + "loss": 1.3164, + "step": 5288 + }, + { + "epoch": 0.15529391038816137, + "grad_norm": 0.0, + "learning_rate": 1.918796840841835e-05, + "loss": 1.3574, + "step": 5289 + }, + { + "epoch": 0.15532327206530036, + "grad_norm": 0.0, + "learning_rate": 1.918759299357691e-05, + "loss": 1.4521, + "step": 5290 + }, + { + "epoch": 0.15535263374243938, + "grad_norm": 0.0, + "learning_rate": 1.9187217495649713e-05, + "loss": 1.4932, + "step": 5291 + }, + { + "epoch": 0.15538199541957837, + "grad_norm": 0.0, + "learning_rate": 1.9186841914640152e-05, + "loss": 1.4229, + "step": 5292 + }, + { + "epoch": 0.15541135709671736, + "grad_norm": 0.0, + "learning_rate": 1.918646625055163e-05, + "loss": 1.4688, + "step": 5293 + }, + { + "epoch": 0.15544071877385637, + "grad_norm": 0.0, + "learning_rate": 1.918609050338754e-05, + "loss": 1.4941, + "step": 5294 + }, + { + "epoch": 0.15547008045099536, + "grad_norm": 0.0, + "learning_rate": 1.9185714673151283e-05, + "loss": 1.46, + "step": 5295 + }, + { + "epoch": 0.15549944212813435, + "grad_norm": 0.0, + "learning_rate": 1.918533875984625e-05, + "loss": 1.5498, + "step": 5296 + }, + { + "epoch": 0.15552880380527337, + "grad_norm": 0.0, + "learning_rate": 1.918496276347585e-05, + "loss": 1.502, + "step": 5297 + }, + { + "epoch": 0.15555816548241236, + "grad_norm": 0.0, + "learning_rate": 1.9184586684043484e-05, + "loss": 1.4834, + "step": 5298 + }, + { + "epoch": 0.15558752715955135, + "grad_norm": 0.0, + "learning_rate": 1.9184210521552544e-05, + "loss": 1.5039, + "step": 5299 + }, + { + "epoch": 0.15561688883669036, + "grad_norm": 0.0, + "learning_rate": 1.9183834276006435e-05, + "loss": 1.4707, + "step": 5300 + }, + { + "epoch": 0.15564625051382935, + "grad_norm": 0.0, + "learning_rate": 1.918345794740856e-05, + "loss": 1.5605, + "step": 5301 + }, + { + "epoch": 0.15567561219096834, + "grad_norm": 0.0, + "learning_rate": 1.9183081535762326e-05, + "loss": 1.4033, + "step": 5302 + }, + { + "epoch": 0.15570497386810733, + "grad_norm": 0.0, + "learning_rate": 1.9182705041071132e-05, + "loss": 1.5488, + "step": 5303 + }, + { + "epoch": 0.15573433554524635, + "grad_norm": 0.0, + "learning_rate": 1.9182328463338383e-05, + "loss": 1.4531, + "step": 5304 + }, + { + "epoch": 0.15576369722238534, + "grad_norm": 0.0, + "learning_rate": 1.918195180256749e-05, + "loss": 1.4648, + "step": 5305 + }, + { + "epoch": 0.15579305889952433, + "grad_norm": 0.0, + "learning_rate": 1.9181575058761855e-05, + "loss": 1.5693, + "step": 5306 + }, + { + "epoch": 0.15582242057666335, + "grad_norm": 0.0, + "learning_rate": 1.9181198231924885e-05, + "loss": 1.4219, + "step": 5307 + }, + { + "epoch": 0.15585178225380233, + "grad_norm": 0.0, + "learning_rate": 1.9180821322059984e-05, + "loss": 1.5215, + "step": 5308 + }, + { + "epoch": 0.15588114393094132, + "grad_norm": 0.0, + "learning_rate": 1.9180444329170567e-05, + "loss": 1.4053, + "step": 5309 + }, + { + "epoch": 0.15591050560808034, + "grad_norm": 0.0, + "learning_rate": 1.9180067253260042e-05, + "loss": 1.3926, + "step": 5310 + }, + { + "epoch": 0.15593986728521933, + "grad_norm": 0.0, + "learning_rate": 1.9179690094331818e-05, + "loss": 1.4922, + "step": 5311 + }, + { + "epoch": 0.15596922896235832, + "grad_norm": 0.0, + "learning_rate": 1.9179312852389303e-05, + "loss": 1.4697, + "step": 5312 + }, + { + "epoch": 0.15599859063949734, + "grad_norm": 0.0, + "learning_rate": 1.9178935527435913e-05, + "loss": 1.4619, + "step": 5313 + }, + { + "epoch": 0.15602795231663633, + "grad_norm": 0.0, + "learning_rate": 1.9178558119475056e-05, + "loss": 1.5078, + "step": 5314 + }, + { + "epoch": 0.15605731399377532, + "grad_norm": 0.0, + "learning_rate": 1.9178180628510154e-05, + "loss": 1.5195, + "step": 5315 + }, + { + "epoch": 0.15608667567091433, + "grad_norm": 0.0, + "learning_rate": 1.9177803054544605e-05, + "loss": 1.4062, + "step": 5316 + }, + { + "epoch": 0.15611603734805332, + "grad_norm": 0.0, + "learning_rate": 1.9177425397581838e-05, + "loss": 1.4414, + "step": 5317 + }, + { + "epoch": 0.1561453990251923, + "grad_norm": 0.0, + "learning_rate": 1.9177047657625262e-05, + "loss": 1.4766, + "step": 5318 + }, + { + "epoch": 0.15617476070233133, + "grad_norm": 0.0, + "learning_rate": 1.917666983467829e-05, + "loss": 1.5762, + "step": 5319 + }, + { + "epoch": 0.15620412237947032, + "grad_norm": 0.0, + "learning_rate": 1.9176291928744345e-05, + "loss": 1.5176, + "step": 5320 + }, + { + "epoch": 0.1562334840566093, + "grad_norm": 0.0, + "learning_rate": 1.9175913939826846e-05, + "loss": 1.4346, + "step": 5321 + }, + { + "epoch": 0.15626284573374832, + "grad_norm": 0.0, + "learning_rate": 1.91755358679292e-05, + "loss": 1.6631, + "step": 5322 + }, + { + "epoch": 0.1562922074108873, + "grad_norm": 0.0, + "learning_rate": 1.917515771305484e-05, + "loss": 1.4717, + "step": 5323 + }, + { + "epoch": 0.1563215690880263, + "grad_norm": 0.0, + "learning_rate": 1.9174779475207175e-05, + "loss": 1.4297, + "step": 5324 + }, + { + "epoch": 0.15635093076516532, + "grad_norm": 0.0, + "learning_rate": 1.917440115438963e-05, + "loss": 1.4297, + "step": 5325 + }, + { + "epoch": 0.1563802924423043, + "grad_norm": 0.0, + "learning_rate": 1.917402275060563e-05, + "loss": 1.3818, + "step": 5326 + }, + { + "epoch": 0.1564096541194433, + "grad_norm": 0.0, + "learning_rate": 1.9173644263858585e-05, + "loss": 1.5078, + "step": 5327 + }, + { + "epoch": 0.1564390157965823, + "grad_norm": 0.0, + "learning_rate": 1.917326569415193e-05, + "loss": 1.4697, + "step": 5328 + }, + { + "epoch": 0.1564683774737213, + "grad_norm": 0.0, + "learning_rate": 1.917288704148908e-05, + "loss": 1.3564, + "step": 5329 + }, + { + "epoch": 0.1564977391508603, + "grad_norm": 0.0, + "learning_rate": 1.9172508305873472e-05, + "loss": 1.5127, + "step": 5330 + }, + { + "epoch": 0.15652710082799928, + "grad_norm": 0.0, + "learning_rate": 1.9172129487308515e-05, + "loss": 1.4512, + "step": 5331 + }, + { + "epoch": 0.1565564625051383, + "grad_norm": 0.0, + "learning_rate": 1.9171750585797647e-05, + "loss": 1.5479, + "step": 5332 + }, + { + "epoch": 0.1565858241822773, + "grad_norm": 0.0, + "learning_rate": 1.9171371601344283e-05, + "loss": 1.6006, + "step": 5333 + }, + { + "epoch": 0.15661518585941628, + "grad_norm": 0.0, + "learning_rate": 1.917099253395186e-05, + "loss": 1.459, + "step": 5334 + }, + { + "epoch": 0.1566445475365553, + "grad_norm": 0.0, + "learning_rate": 1.9170613383623805e-05, + "loss": 1.4912, + "step": 5335 + }, + { + "epoch": 0.15667390921369428, + "grad_norm": 0.0, + "learning_rate": 1.9170234150363542e-05, + "loss": 1.4717, + "step": 5336 + }, + { + "epoch": 0.15670327089083327, + "grad_norm": 0.0, + "learning_rate": 1.9169854834174508e-05, + "loss": 1.3896, + "step": 5337 + }, + { + "epoch": 0.1567326325679723, + "grad_norm": 0.0, + "learning_rate": 1.916947543506012e-05, + "loss": 1.2197, + "step": 5338 + }, + { + "epoch": 0.15676199424511128, + "grad_norm": 0.0, + "learning_rate": 1.9169095953023825e-05, + "loss": 1.4238, + "step": 5339 + }, + { + "epoch": 0.15679135592225027, + "grad_norm": 0.0, + "learning_rate": 1.916871638806904e-05, + "loss": 1.4883, + "step": 5340 + }, + { + "epoch": 0.1568207175993893, + "grad_norm": 0.0, + "learning_rate": 1.916833674019921e-05, + "loss": 1.4717, + "step": 5341 + }, + { + "epoch": 0.15685007927652828, + "grad_norm": 0.0, + "learning_rate": 1.9167957009417762e-05, + "loss": 1.4658, + "step": 5342 + }, + { + "epoch": 0.15687944095366727, + "grad_norm": 0.0, + "learning_rate": 1.916757719572813e-05, + "loss": 1.332, + "step": 5343 + }, + { + "epoch": 0.15690880263080628, + "grad_norm": 0.0, + "learning_rate": 1.9167197299133748e-05, + "loss": 1.4932, + "step": 5344 + }, + { + "epoch": 0.15693816430794527, + "grad_norm": 0.0, + "learning_rate": 1.9166817319638055e-05, + "loss": 1.5127, + "step": 5345 + }, + { + "epoch": 0.15696752598508426, + "grad_norm": 0.0, + "learning_rate": 1.9166437257244483e-05, + "loss": 1.4453, + "step": 5346 + }, + { + "epoch": 0.15699688766222328, + "grad_norm": 0.0, + "learning_rate": 1.9166057111956477e-05, + "loss": 1.5283, + "step": 5347 + }, + { + "epoch": 0.15702624933936227, + "grad_norm": 0.0, + "learning_rate": 1.9165676883777463e-05, + "loss": 1.4561, + "step": 5348 + }, + { + "epoch": 0.15705561101650126, + "grad_norm": 0.0, + "learning_rate": 1.916529657271089e-05, + "loss": 1.3604, + "step": 5349 + }, + { + "epoch": 0.15708497269364027, + "grad_norm": 0.0, + "learning_rate": 1.9164916178760185e-05, + "loss": 1.542, + "step": 5350 + }, + { + "epoch": 0.15711433437077926, + "grad_norm": 0.0, + "learning_rate": 1.9164535701928802e-05, + "loss": 1.3154, + "step": 5351 + }, + { + "epoch": 0.15714369604791825, + "grad_norm": 0.0, + "learning_rate": 1.9164155142220178e-05, + "loss": 1.4736, + "step": 5352 + }, + { + "epoch": 0.15717305772505727, + "grad_norm": 0.0, + "learning_rate": 1.916377449963775e-05, + "loss": 1.3672, + "step": 5353 + }, + { + "epoch": 0.15720241940219626, + "grad_norm": 0.0, + "learning_rate": 1.916339377418496e-05, + "loss": 1.498, + "step": 5354 + }, + { + "epoch": 0.15723178107933525, + "grad_norm": 0.0, + "learning_rate": 1.9163012965865255e-05, + "loss": 1.4795, + "step": 5355 + }, + { + "epoch": 0.15726114275647424, + "grad_norm": 0.0, + "learning_rate": 1.9162632074682074e-05, + "loss": 1.3125, + "step": 5356 + }, + { + "epoch": 0.15729050443361325, + "grad_norm": 0.0, + "learning_rate": 1.9162251100638868e-05, + "loss": 1.5479, + "step": 5357 + }, + { + "epoch": 0.15731986611075224, + "grad_norm": 0.0, + "learning_rate": 1.916187004373908e-05, + "loss": 1.584, + "step": 5358 + }, + { + "epoch": 0.15734922778789123, + "grad_norm": 0.0, + "learning_rate": 1.916148890398615e-05, + "loss": 1.4941, + "step": 5359 + }, + { + "epoch": 0.15737858946503025, + "grad_norm": 0.0, + "learning_rate": 1.9161107681383533e-05, + "loss": 1.3779, + "step": 5360 + }, + { + "epoch": 0.15740795114216924, + "grad_norm": 0.0, + "learning_rate": 1.9160726375934674e-05, + "loss": 1.4453, + "step": 5361 + }, + { + "epoch": 0.15743731281930823, + "grad_norm": 0.0, + "learning_rate": 1.916034498764302e-05, + "loss": 1.4688, + "step": 5362 + }, + { + "epoch": 0.15746667449644725, + "grad_norm": 0.0, + "learning_rate": 1.915996351651202e-05, + "loss": 1.4873, + "step": 5363 + }, + { + "epoch": 0.15749603617358623, + "grad_norm": 0.0, + "learning_rate": 1.915958196254512e-05, + "loss": 1.4844, + "step": 5364 + }, + { + "epoch": 0.15752539785072522, + "grad_norm": 0.0, + "learning_rate": 1.9159200325745776e-05, + "loss": 1.6025, + "step": 5365 + }, + { + "epoch": 0.15755475952786424, + "grad_norm": 0.0, + "learning_rate": 1.915881860611744e-05, + "loss": 1.5176, + "step": 5366 + }, + { + "epoch": 0.15758412120500323, + "grad_norm": 0.0, + "learning_rate": 1.9158436803663564e-05, + "loss": 1.4873, + "step": 5367 + }, + { + "epoch": 0.15761348288214222, + "grad_norm": 0.0, + "learning_rate": 1.9158054918387595e-05, + "loss": 1.4688, + "step": 5368 + }, + { + "epoch": 0.15764284455928124, + "grad_norm": 0.0, + "learning_rate": 1.915767295029299e-05, + "loss": 1.4473, + "step": 5369 + }, + { + "epoch": 0.15767220623642023, + "grad_norm": 0.0, + "learning_rate": 1.91572908993832e-05, + "loss": 1.5186, + "step": 5370 + }, + { + "epoch": 0.15770156791355922, + "grad_norm": 0.0, + "learning_rate": 1.9156908765661692e-05, + "loss": 1.502, + "step": 5371 + }, + { + "epoch": 0.15773092959069823, + "grad_norm": 0.0, + "learning_rate": 1.9156526549131907e-05, + "loss": 1.3516, + "step": 5372 + }, + { + "epoch": 0.15776029126783722, + "grad_norm": 0.0, + "learning_rate": 1.9156144249797306e-05, + "loss": 1.5684, + "step": 5373 + }, + { + "epoch": 0.1577896529449762, + "grad_norm": 0.0, + "learning_rate": 1.9155761867661348e-05, + "loss": 1.3623, + "step": 5374 + }, + { + "epoch": 0.15781901462211523, + "grad_norm": 0.0, + "learning_rate": 1.9155379402727495e-05, + "loss": 1.4141, + "step": 5375 + }, + { + "epoch": 0.15784837629925422, + "grad_norm": 0.0, + "learning_rate": 1.91549968549992e-05, + "loss": 1.4971, + "step": 5376 + }, + { + "epoch": 0.1578777379763932, + "grad_norm": 0.0, + "learning_rate": 1.915461422447992e-05, + "loss": 1.3613, + "step": 5377 + }, + { + "epoch": 0.15790709965353222, + "grad_norm": 0.0, + "learning_rate": 1.9154231511173123e-05, + "loss": 1.6025, + "step": 5378 + }, + { + "epoch": 0.1579364613306712, + "grad_norm": 0.0, + "learning_rate": 1.9153848715082264e-05, + "loss": 1.4297, + "step": 5379 + }, + { + "epoch": 0.1579658230078102, + "grad_norm": 0.0, + "learning_rate": 1.9153465836210807e-05, + "loss": 1.4707, + "step": 5380 + }, + { + "epoch": 0.1579951846849492, + "grad_norm": 0.0, + "learning_rate": 1.9153082874562218e-05, + "loss": 1.4609, + "step": 5381 + }, + { + "epoch": 0.1580245463620882, + "grad_norm": 0.0, + "learning_rate": 1.915269983013995e-05, + "loss": 1.5098, + "step": 5382 + }, + { + "epoch": 0.1580539080392272, + "grad_norm": 0.0, + "learning_rate": 1.9152316702947476e-05, + "loss": 1.3799, + "step": 5383 + }, + { + "epoch": 0.1580832697163662, + "grad_norm": 0.0, + "learning_rate": 1.9151933492988258e-05, + "loss": 1.4424, + "step": 5384 + }, + { + "epoch": 0.1581126313935052, + "grad_norm": 0.0, + "learning_rate": 1.9151550200265763e-05, + "loss": 1.4336, + "step": 5385 + }, + { + "epoch": 0.1581419930706442, + "grad_norm": 0.0, + "learning_rate": 1.9151166824783453e-05, + "loss": 1.5254, + "step": 5386 + }, + { + "epoch": 0.15817135474778318, + "grad_norm": 0.0, + "learning_rate": 1.91507833665448e-05, + "loss": 1.3516, + "step": 5387 + }, + { + "epoch": 0.1582007164249222, + "grad_norm": 0.0, + "learning_rate": 1.915039982555327e-05, + "loss": 1.5205, + "step": 5388 + }, + { + "epoch": 0.1582300781020612, + "grad_norm": 0.0, + "learning_rate": 1.915001620181233e-05, + "loss": 1.4443, + "step": 5389 + }, + { + "epoch": 0.15825943977920018, + "grad_norm": 0.0, + "learning_rate": 1.9149632495325448e-05, + "loss": 1.4521, + "step": 5390 + }, + { + "epoch": 0.1582888014563392, + "grad_norm": 0.0, + "learning_rate": 1.9149248706096098e-05, + "loss": 1.6533, + "step": 5391 + }, + { + "epoch": 0.15831816313347818, + "grad_norm": 0.0, + "learning_rate": 1.914886483412775e-05, + "loss": 1.4141, + "step": 5392 + }, + { + "epoch": 0.15834752481061717, + "grad_norm": 0.0, + "learning_rate": 1.9148480879423873e-05, + "loss": 1.4961, + "step": 5393 + }, + { + "epoch": 0.1583768864877562, + "grad_norm": 0.0, + "learning_rate": 1.914809684198794e-05, + "loss": 1.417, + "step": 5394 + }, + { + "epoch": 0.15840624816489518, + "grad_norm": 0.0, + "learning_rate": 1.9147712721823425e-05, + "loss": 1.5352, + "step": 5395 + }, + { + "epoch": 0.15843560984203417, + "grad_norm": 0.0, + "learning_rate": 1.9147328518933802e-05, + "loss": 1.3188, + "step": 5396 + }, + { + "epoch": 0.1584649715191732, + "grad_norm": 0.0, + "learning_rate": 1.9146944233322545e-05, + "loss": 1.4785, + "step": 5397 + }, + { + "epoch": 0.15849433319631218, + "grad_norm": 0.0, + "learning_rate": 1.9146559864993127e-05, + "loss": 1.3604, + "step": 5398 + }, + { + "epoch": 0.15852369487345117, + "grad_norm": 0.0, + "learning_rate": 1.9146175413949028e-05, + "loss": 1.2998, + "step": 5399 + }, + { + "epoch": 0.15855305655059018, + "grad_norm": 0.0, + "learning_rate": 1.9145790880193716e-05, + "loss": 1.4365, + "step": 5400 + }, + { + "epoch": 0.15858241822772917, + "grad_norm": 0.0, + "learning_rate": 1.9145406263730682e-05, + "loss": 1.6318, + "step": 5401 + }, + { + "epoch": 0.15861177990486816, + "grad_norm": 0.0, + "learning_rate": 1.914502156456339e-05, + "loss": 1.498, + "step": 5402 + }, + { + "epoch": 0.15864114158200718, + "grad_norm": 0.0, + "learning_rate": 1.9144636782695335e-05, + "loss": 1.4424, + "step": 5403 + }, + { + "epoch": 0.15867050325914617, + "grad_norm": 0.0, + "learning_rate": 1.914425191812998e-05, + "loss": 1.5635, + "step": 5404 + }, + { + "epoch": 0.15869986493628516, + "grad_norm": 0.0, + "learning_rate": 1.914386697087082e-05, + "loss": 1.5762, + "step": 5405 + }, + { + "epoch": 0.15872922661342415, + "grad_norm": 0.0, + "learning_rate": 1.9143481940921324e-05, + "loss": 1.3975, + "step": 5406 + }, + { + "epoch": 0.15875858829056316, + "grad_norm": 0.0, + "learning_rate": 1.9143096828284976e-05, + "loss": 1.4717, + "step": 5407 + }, + { + "epoch": 0.15878794996770215, + "grad_norm": 0.0, + "learning_rate": 1.914271163296527e-05, + "loss": 1.5801, + "step": 5408 + }, + { + "epoch": 0.15881731164484114, + "grad_norm": 0.0, + "learning_rate": 1.9142326354965676e-05, + "loss": 1.4023, + "step": 5409 + }, + { + "epoch": 0.15884667332198016, + "grad_norm": 0.0, + "learning_rate": 1.9141940994289686e-05, + "loss": 1.4326, + "step": 5410 + }, + { + "epoch": 0.15887603499911915, + "grad_norm": 0.0, + "learning_rate": 1.914155555094078e-05, + "loss": 1.4521, + "step": 5411 + }, + { + "epoch": 0.15890539667625814, + "grad_norm": 0.0, + "learning_rate": 1.9141170024922444e-05, + "loss": 1.4873, + "step": 5412 + }, + { + "epoch": 0.15893475835339715, + "grad_norm": 0.0, + "learning_rate": 1.914078441623817e-05, + "loss": 1.5039, + "step": 5413 + }, + { + "epoch": 0.15896412003053614, + "grad_norm": 0.0, + "learning_rate": 1.9140398724891442e-05, + "loss": 1.5215, + "step": 5414 + }, + { + "epoch": 0.15899348170767513, + "grad_norm": 0.0, + "learning_rate": 1.9140012950885745e-05, + "loss": 1.3975, + "step": 5415 + }, + { + "epoch": 0.15902284338481415, + "grad_norm": 0.0, + "learning_rate": 1.913962709422457e-05, + "loss": 1.5283, + "step": 5416 + }, + { + "epoch": 0.15905220506195314, + "grad_norm": 0.0, + "learning_rate": 1.913924115491141e-05, + "loss": 1.4189, + "step": 5417 + }, + { + "epoch": 0.15908156673909213, + "grad_norm": 0.0, + "learning_rate": 1.9138855132949747e-05, + "loss": 1.4102, + "step": 5418 + }, + { + "epoch": 0.15911092841623115, + "grad_norm": 0.0, + "learning_rate": 1.913846902834308e-05, + "loss": 1.3589, + "step": 5419 + }, + { + "epoch": 0.15914029009337013, + "grad_norm": 0.0, + "learning_rate": 1.9138082841094897e-05, + "loss": 1.4922, + "step": 5420 + }, + { + "epoch": 0.15916965177050912, + "grad_norm": 0.0, + "learning_rate": 1.913769657120869e-05, + "loss": 1.4404, + "step": 5421 + }, + { + "epoch": 0.15919901344764814, + "grad_norm": 0.0, + "learning_rate": 1.9137310218687954e-05, + "loss": 1.4785, + "step": 5422 + }, + { + "epoch": 0.15922837512478713, + "grad_norm": 0.0, + "learning_rate": 1.9136923783536175e-05, + "loss": 1.3447, + "step": 5423 + }, + { + "epoch": 0.15925773680192612, + "grad_norm": 0.0, + "learning_rate": 1.913653726575686e-05, + "loss": 1.4404, + "step": 5424 + }, + { + "epoch": 0.15928709847906514, + "grad_norm": 0.0, + "learning_rate": 1.91361506653535e-05, + "loss": 1.46, + "step": 5425 + }, + { + "epoch": 0.15931646015620413, + "grad_norm": 0.0, + "learning_rate": 1.9135763982329587e-05, + "loss": 1.4541, + "step": 5426 + }, + { + "epoch": 0.15934582183334312, + "grad_norm": 0.0, + "learning_rate": 1.913537721668862e-05, + "loss": 1.4111, + "step": 5427 + }, + { + "epoch": 0.15937518351048213, + "grad_norm": 0.0, + "learning_rate": 1.91349903684341e-05, + "loss": 1.5176, + "step": 5428 + }, + { + "epoch": 0.15940454518762112, + "grad_norm": 0.0, + "learning_rate": 1.9134603437569522e-05, + "loss": 1.5332, + "step": 5429 + }, + { + "epoch": 0.1594339068647601, + "grad_norm": 0.0, + "learning_rate": 1.9134216424098383e-05, + "loss": 1.3955, + "step": 5430 + }, + { + "epoch": 0.1594632685418991, + "grad_norm": 0.0, + "learning_rate": 1.9133829328024187e-05, + "loss": 1.3574, + "step": 5431 + }, + { + "epoch": 0.15949263021903812, + "grad_norm": 0.0, + "learning_rate": 1.9133442149350432e-05, + "loss": 1.4678, + "step": 5432 + }, + { + "epoch": 0.1595219918961771, + "grad_norm": 0.0, + "learning_rate": 1.9133054888080624e-05, + "loss": 1.5781, + "step": 5433 + }, + { + "epoch": 0.1595513535733161, + "grad_norm": 0.0, + "learning_rate": 1.913266754421826e-05, + "loss": 1.4023, + "step": 5434 + }, + { + "epoch": 0.1595807152504551, + "grad_norm": 0.0, + "learning_rate": 1.9132280117766845e-05, + "loss": 1.542, + "step": 5435 + }, + { + "epoch": 0.1596100769275941, + "grad_norm": 0.0, + "learning_rate": 1.9131892608729877e-05, + "loss": 1.4473, + "step": 5436 + }, + { + "epoch": 0.1596394386047331, + "grad_norm": 0.0, + "learning_rate": 1.913150501711087e-05, + "loss": 1.3711, + "step": 5437 + }, + { + "epoch": 0.1596688002818721, + "grad_norm": 0.0, + "learning_rate": 1.9131117342913327e-05, + "loss": 1.5547, + "step": 5438 + }, + { + "epoch": 0.1596981619590111, + "grad_norm": 0.0, + "learning_rate": 1.9130729586140747e-05, + "loss": 1.4648, + "step": 5439 + }, + { + "epoch": 0.1597275236361501, + "grad_norm": 0.0, + "learning_rate": 1.9130341746796645e-05, + "loss": 1.4814, + "step": 5440 + }, + { + "epoch": 0.1597568853132891, + "grad_norm": 0.0, + "learning_rate": 1.9129953824884516e-05, + "loss": 1.6729, + "step": 5441 + }, + { + "epoch": 0.1597862469904281, + "grad_norm": 0.0, + "learning_rate": 1.9129565820407884e-05, + "loss": 1.5859, + "step": 5442 + }, + { + "epoch": 0.15981560866756708, + "grad_norm": 0.0, + "learning_rate": 1.912917773337025e-05, + "loss": 1.3721, + "step": 5443 + }, + { + "epoch": 0.1598449703447061, + "grad_norm": 0.0, + "learning_rate": 1.912878956377512e-05, + "loss": 1.4102, + "step": 5444 + }, + { + "epoch": 0.1598743320218451, + "grad_norm": 0.0, + "learning_rate": 1.912840131162601e-05, + "loss": 1.4307, + "step": 5445 + }, + { + "epoch": 0.15990369369898408, + "grad_norm": 0.0, + "learning_rate": 1.9128012976926432e-05, + "loss": 1.3857, + "step": 5446 + }, + { + "epoch": 0.1599330553761231, + "grad_norm": 0.0, + "learning_rate": 1.9127624559679888e-05, + "loss": 1.4785, + "step": 5447 + }, + { + "epoch": 0.15996241705326208, + "grad_norm": 0.0, + "learning_rate": 1.9127236059889906e-05, + "loss": 1.3477, + "step": 5448 + }, + { + "epoch": 0.15999177873040107, + "grad_norm": 0.0, + "learning_rate": 1.912684747755998e-05, + "loss": 1.3955, + "step": 5449 + }, + { + "epoch": 0.1600211404075401, + "grad_norm": 0.0, + "learning_rate": 1.9126458812693644e-05, + "loss": 1.3936, + "step": 5450 + }, + { + "epoch": 0.16005050208467908, + "grad_norm": 0.0, + "learning_rate": 1.9126070065294403e-05, + "loss": 1.4463, + "step": 5451 + }, + { + "epoch": 0.16007986376181807, + "grad_norm": 0.0, + "learning_rate": 1.912568123536577e-05, + "loss": 1.5449, + "step": 5452 + }, + { + "epoch": 0.1601092254389571, + "grad_norm": 0.0, + "learning_rate": 1.9125292322911263e-05, + "loss": 1.3574, + "step": 5453 + }, + { + "epoch": 0.16013858711609608, + "grad_norm": 0.0, + "learning_rate": 1.9124903327934405e-05, + "loss": 1.499, + "step": 5454 + }, + { + "epoch": 0.16016794879323507, + "grad_norm": 0.0, + "learning_rate": 1.9124514250438706e-05, + "loss": 1.4795, + "step": 5455 + }, + { + "epoch": 0.16019731047037405, + "grad_norm": 0.0, + "learning_rate": 1.9124125090427686e-05, + "loss": 1.5264, + "step": 5456 + }, + { + "epoch": 0.16022667214751307, + "grad_norm": 0.0, + "learning_rate": 1.9123735847904873e-05, + "loss": 1.4746, + "step": 5457 + }, + { + "epoch": 0.16025603382465206, + "grad_norm": 0.0, + "learning_rate": 1.9123346522873777e-05, + "loss": 1.4209, + "step": 5458 + }, + { + "epoch": 0.16028539550179105, + "grad_norm": 0.0, + "learning_rate": 1.912295711533792e-05, + "loss": 1.3574, + "step": 5459 + }, + { + "epoch": 0.16031475717893007, + "grad_norm": 0.0, + "learning_rate": 1.9122567625300825e-05, + "loss": 1.4453, + "step": 5460 + }, + { + "epoch": 0.16034411885606906, + "grad_norm": 0.0, + "learning_rate": 1.9122178052766017e-05, + "loss": 1.4902, + "step": 5461 + }, + { + "epoch": 0.16037348053320805, + "grad_norm": 0.0, + "learning_rate": 1.9121788397737017e-05, + "loss": 1.4043, + "step": 5462 + }, + { + "epoch": 0.16040284221034706, + "grad_norm": 0.0, + "learning_rate": 1.9121398660217345e-05, + "loss": 1.3906, + "step": 5463 + }, + { + "epoch": 0.16043220388748605, + "grad_norm": 0.0, + "learning_rate": 1.912100884021053e-05, + "loss": 1.4082, + "step": 5464 + }, + { + "epoch": 0.16046156556462504, + "grad_norm": 0.0, + "learning_rate": 1.91206189377201e-05, + "loss": 1.3613, + "step": 5465 + }, + { + "epoch": 0.16049092724176406, + "grad_norm": 0.0, + "learning_rate": 1.9120228952749575e-05, + "loss": 1.5273, + "step": 5466 + }, + { + "epoch": 0.16052028891890305, + "grad_norm": 0.0, + "learning_rate": 1.911983888530248e-05, + "loss": 1.4873, + "step": 5467 + }, + { + "epoch": 0.16054965059604204, + "grad_norm": 0.0, + "learning_rate": 1.911944873538235e-05, + "loss": 1.5859, + "step": 5468 + }, + { + "epoch": 0.16057901227318105, + "grad_norm": 0.0, + "learning_rate": 1.9119058502992707e-05, + "loss": 1.4336, + "step": 5469 + }, + { + "epoch": 0.16060837395032004, + "grad_norm": 0.0, + "learning_rate": 1.9118668188137086e-05, + "loss": 1.5283, + "step": 5470 + }, + { + "epoch": 0.16063773562745903, + "grad_norm": 0.0, + "learning_rate": 1.911827779081901e-05, + "loss": 1.5693, + "step": 5471 + }, + { + "epoch": 0.16066709730459805, + "grad_norm": 0.0, + "learning_rate": 1.9117887311042018e-05, + "loss": 1.4473, + "step": 5472 + }, + { + "epoch": 0.16069645898173704, + "grad_norm": 0.0, + "learning_rate": 1.911749674880963e-05, + "loss": 1.4092, + "step": 5473 + }, + { + "epoch": 0.16072582065887603, + "grad_norm": 0.0, + "learning_rate": 1.9117106104125388e-05, + "loss": 1.4795, + "step": 5474 + }, + { + "epoch": 0.16075518233601505, + "grad_norm": 0.0, + "learning_rate": 1.9116715376992817e-05, + "loss": 1.5342, + "step": 5475 + }, + { + "epoch": 0.16078454401315403, + "grad_norm": 0.0, + "learning_rate": 1.9116324567415456e-05, + "loss": 1.4775, + "step": 5476 + }, + { + "epoch": 0.16081390569029302, + "grad_norm": 0.0, + "learning_rate": 1.911593367539684e-05, + "loss": 1.4434, + "step": 5477 + }, + { + "epoch": 0.16084326736743204, + "grad_norm": 0.0, + "learning_rate": 1.9115542700940498e-05, + "loss": 1.4248, + "step": 5478 + }, + { + "epoch": 0.16087262904457103, + "grad_norm": 0.0, + "learning_rate": 1.9115151644049967e-05, + "loss": 1.5029, + "step": 5479 + }, + { + "epoch": 0.16090199072171002, + "grad_norm": 0.0, + "learning_rate": 1.911476050472879e-05, + "loss": 1.3779, + "step": 5480 + }, + { + "epoch": 0.160931352398849, + "grad_norm": 0.0, + "learning_rate": 1.9114369282980494e-05, + "loss": 1.5176, + "step": 5481 + }, + { + "epoch": 0.16096071407598803, + "grad_norm": 0.0, + "learning_rate": 1.9113977978808625e-05, + "loss": 1.5166, + "step": 5482 + }, + { + "epoch": 0.16099007575312702, + "grad_norm": 0.0, + "learning_rate": 1.911358659221672e-05, + "loss": 1.5234, + "step": 5483 + }, + { + "epoch": 0.161019437430266, + "grad_norm": 0.0, + "learning_rate": 1.9113195123208313e-05, + "loss": 1.5371, + "step": 5484 + }, + { + "epoch": 0.16104879910740502, + "grad_norm": 0.0, + "learning_rate": 1.9112803571786954e-05, + "loss": 1.4707, + "step": 5485 + }, + { + "epoch": 0.161078160784544, + "grad_norm": 0.0, + "learning_rate": 1.9112411937956178e-05, + "loss": 1.4434, + "step": 5486 + }, + { + "epoch": 0.161107522461683, + "grad_norm": 0.0, + "learning_rate": 1.9112020221719525e-05, + "loss": 1.3525, + "step": 5487 + }, + { + "epoch": 0.16113688413882202, + "grad_norm": 0.0, + "learning_rate": 1.911162842308054e-05, + "loss": 1.4717, + "step": 5488 + }, + { + "epoch": 0.161166245815961, + "grad_norm": 0.0, + "learning_rate": 1.9111236542042766e-05, + "loss": 1.4951, + "step": 5489 + }, + { + "epoch": 0.1611956074931, + "grad_norm": 0.0, + "learning_rate": 1.9110844578609745e-05, + "loss": 1.3721, + "step": 5490 + }, + { + "epoch": 0.161224969170239, + "grad_norm": 0.0, + "learning_rate": 1.9110452532785023e-05, + "loss": 1.5723, + "step": 5491 + }, + { + "epoch": 0.161254330847378, + "grad_norm": 0.0, + "learning_rate": 1.911006040457215e-05, + "loss": 1.4082, + "step": 5492 + }, + { + "epoch": 0.161283692524517, + "grad_norm": 0.0, + "learning_rate": 1.9109668193974665e-05, + "loss": 1.4004, + "step": 5493 + }, + { + "epoch": 0.161313054201656, + "grad_norm": 0.0, + "learning_rate": 1.9109275900996117e-05, + "loss": 1.4121, + "step": 5494 + }, + { + "epoch": 0.161342415878795, + "grad_norm": 0.0, + "learning_rate": 1.9108883525640054e-05, + "loss": 1.3691, + "step": 5495 + }, + { + "epoch": 0.161371777555934, + "grad_norm": 0.0, + "learning_rate": 1.9108491067910023e-05, + "loss": 1.4287, + "step": 5496 + }, + { + "epoch": 0.161401139233073, + "grad_norm": 0.0, + "learning_rate": 1.910809852780958e-05, + "loss": 1.3916, + "step": 5497 + }, + { + "epoch": 0.161430500910212, + "grad_norm": 0.0, + "learning_rate": 1.9107705905342265e-05, + "loss": 1.54, + "step": 5498 + }, + { + "epoch": 0.16145986258735098, + "grad_norm": 0.0, + "learning_rate": 1.9107313200511633e-05, + "loss": 1.4717, + "step": 5499 + }, + { + "epoch": 0.16148922426449, + "grad_norm": 0.0, + "learning_rate": 1.9106920413321236e-05, + "loss": 1.501, + "step": 5500 + }, + { + "epoch": 0.161518585941629, + "grad_norm": 0.0, + "learning_rate": 1.9106527543774625e-05, + "loss": 1.4619, + "step": 5501 + }, + { + "epoch": 0.16154794761876798, + "grad_norm": 0.0, + "learning_rate": 1.9106134591875354e-05, + "loss": 1.4336, + "step": 5502 + }, + { + "epoch": 0.161577309295907, + "grad_norm": 0.0, + "learning_rate": 1.9105741557626974e-05, + "loss": 1.3848, + "step": 5503 + }, + { + "epoch": 0.16160667097304598, + "grad_norm": 0.0, + "learning_rate": 1.9105348441033043e-05, + "loss": 1.3994, + "step": 5504 + }, + { + "epoch": 0.16163603265018497, + "grad_norm": 0.0, + "learning_rate": 1.9104955242097115e-05, + "loss": 1.4492, + "step": 5505 + }, + { + "epoch": 0.16166539432732396, + "grad_norm": 0.0, + "learning_rate": 1.910456196082274e-05, + "loss": 1.3691, + "step": 5506 + }, + { + "epoch": 0.16169475600446298, + "grad_norm": 0.0, + "learning_rate": 1.9104168597213486e-05, + "loss": 1.3887, + "step": 5507 + }, + { + "epoch": 0.16172411768160197, + "grad_norm": 0.0, + "learning_rate": 1.9103775151272898e-05, + "loss": 1.4141, + "step": 5508 + }, + { + "epoch": 0.16175347935874096, + "grad_norm": 0.0, + "learning_rate": 1.9103381623004546e-05, + "loss": 1.498, + "step": 5509 + }, + { + "epoch": 0.16178284103587998, + "grad_norm": 0.0, + "learning_rate": 1.9102988012411977e-05, + "loss": 1.4277, + "step": 5510 + }, + { + "epoch": 0.16181220271301897, + "grad_norm": 0.0, + "learning_rate": 1.910259431949876e-05, + "loss": 1.4863, + "step": 5511 + }, + { + "epoch": 0.16184156439015795, + "grad_norm": 0.0, + "learning_rate": 1.910220054426845e-05, + "loss": 1.3662, + "step": 5512 + }, + { + "epoch": 0.16187092606729697, + "grad_norm": 0.0, + "learning_rate": 1.9101806686724607e-05, + "loss": 1.4727, + "step": 5513 + }, + { + "epoch": 0.16190028774443596, + "grad_norm": 0.0, + "learning_rate": 1.91014127468708e-05, + "loss": 1.541, + "step": 5514 + }, + { + "epoch": 0.16192964942157495, + "grad_norm": 0.0, + "learning_rate": 1.910101872471058e-05, + "loss": 1.4365, + "step": 5515 + }, + { + "epoch": 0.16195901109871397, + "grad_norm": 0.0, + "learning_rate": 1.910062462024752e-05, + "loss": 1.3232, + "step": 5516 + }, + { + "epoch": 0.16198837277585296, + "grad_norm": 0.0, + "learning_rate": 1.9100230433485183e-05, + "loss": 1.4756, + "step": 5517 + }, + { + "epoch": 0.16201773445299195, + "grad_norm": 0.0, + "learning_rate": 1.909983616442713e-05, + "loss": 1.4316, + "step": 5518 + }, + { + "epoch": 0.16204709613013096, + "grad_norm": 0.0, + "learning_rate": 1.909944181307693e-05, + "loss": 1.3984, + "step": 5519 + }, + { + "epoch": 0.16207645780726995, + "grad_norm": 0.0, + "learning_rate": 1.9099047379438143e-05, + "loss": 1.4824, + "step": 5520 + }, + { + "epoch": 0.16210581948440894, + "grad_norm": 0.0, + "learning_rate": 1.909865286351434e-05, + "loss": 1.5264, + "step": 5521 + }, + { + "epoch": 0.16213518116154796, + "grad_norm": 0.0, + "learning_rate": 1.9098258265309094e-05, + "loss": 1.3672, + "step": 5522 + }, + { + "epoch": 0.16216454283868695, + "grad_norm": 0.0, + "learning_rate": 1.9097863584825968e-05, + "loss": 1.4062, + "step": 5523 + }, + { + "epoch": 0.16219390451582594, + "grad_norm": 0.0, + "learning_rate": 1.909746882206853e-05, + "loss": 1.3955, + "step": 5524 + }, + { + "epoch": 0.16222326619296495, + "grad_norm": 0.0, + "learning_rate": 1.9097073977040348e-05, + "loss": 1.502, + "step": 5525 + }, + { + "epoch": 0.16225262787010394, + "grad_norm": 0.0, + "learning_rate": 1.9096679049745e-05, + "loss": 1.5127, + "step": 5526 + }, + { + "epoch": 0.16228198954724293, + "grad_norm": 0.0, + "learning_rate": 1.9096284040186053e-05, + "loss": 1.5293, + "step": 5527 + }, + { + "epoch": 0.16231135122438195, + "grad_norm": 0.0, + "learning_rate": 1.909588894836708e-05, + "loss": 1.5049, + "step": 5528 + }, + { + "epoch": 0.16234071290152094, + "grad_norm": 0.0, + "learning_rate": 1.9095493774291653e-05, + "loss": 1.3496, + "step": 5529 + }, + { + "epoch": 0.16237007457865993, + "grad_norm": 0.0, + "learning_rate": 1.909509851796335e-05, + "loss": 1.5664, + "step": 5530 + }, + { + "epoch": 0.16239943625579892, + "grad_norm": 0.0, + "learning_rate": 1.9094703179385736e-05, + "loss": 1.4121, + "step": 5531 + }, + { + "epoch": 0.16242879793293794, + "grad_norm": 0.0, + "learning_rate": 1.9094307758562392e-05, + "loss": 1.3208, + "step": 5532 + }, + { + "epoch": 0.16245815961007692, + "grad_norm": 0.0, + "learning_rate": 1.90939122554969e-05, + "loss": 1.5283, + "step": 5533 + }, + { + "epoch": 0.1624875212872159, + "grad_norm": 0.0, + "learning_rate": 1.9093516670192827e-05, + "loss": 1.5449, + "step": 5534 + }, + { + "epoch": 0.16251688296435493, + "grad_norm": 0.0, + "learning_rate": 1.909312100265375e-05, + "loss": 1.4531, + "step": 5535 + }, + { + "epoch": 0.16254624464149392, + "grad_norm": 0.0, + "learning_rate": 1.9092725252883258e-05, + "loss": 1.3809, + "step": 5536 + }, + { + "epoch": 0.1625756063186329, + "grad_norm": 0.0, + "learning_rate": 1.909232942088492e-05, + "loss": 1.4092, + "step": 5537 + }, + { + "epoch": 0.16260496799577193, + "grad_norm": 0.0, + "learning_rate": 1.9091933506662318e-05, + "loss": 1.4248, + "step": 5538 + }, + { + "epoch": 0.16263432967291092, + "grad_norm": 0.0, + "learning_rate": 1.9091537510219033e-05, + "loss": 1.311, + "step": 5539 + }, + { + "epoch": 0.1626636913500499, + "grad_norm": 0.0, + "learning_rate": 1.9091141431558648e-05, + "loss": 1.3555, + "step": 5540 + }, + { + "epoch": 0.16269305302718892, + "grad_norm": 0.0, + "learning_rate": 1.909074527068474e-05, + "loss": 1.2451, + "step": 5541 + }, + { + "epoch": 0.1627224147043279, + "grad_norm": 0.0, + "learning_rate": 1.9090349027600894e-05, + "loss": 1.4521, + "step": 5542 + }, + { + "epoch": 0.1627517763814669, + "grad_norm": 0.0, + "learning_rate": 1.9089952702310694e-05, + "loss": 1.3408, + "step": 5543 + }, + { + "epoch": 0.16278113805860592, + "grad_norm": 0.0, + "learning_rate": 1.9089556294817722e-05, + "loss": 1.3672, + "step": 5544 + }, + { + "epoch": 0.1628104997357449, + "grad_norm": 0.0, + "learning_rate": 1.908915980512557e-05, + "loss": 1.4609, + "step": 5545 + }, + { + "epoch": 0.1628398614128839, + "grad_norm": 0.0, + "learning_rate": 1.9088763233237815e-05, + "loss": 1.4521, + "step": 5546 + }, + { + "epoch": 0.1628692230900229, + "grad_norm": 0.0, + "learning_rate": 1.9088366579158048e-05, + "loss": 1.5957, + "step": 5547 + }, + { + "epoch": 0.1628985847671619, + "grad_norm": 0.0, + "learning_rate": 1.9087969842889854e-05, + "loss": 1.4082, + "step": 5548 + }, + { + "epoch": 0.1629279464443009, + "grad_norm": 0.0, + "learning_rate": 1.908757302443682e-05, + "loss": 1.6025, + "step": 5549 + }, + { + "epoch": 0.1629573081214399, + "grad_norm": 0.0, + "learning_rate": 1.9087176123802536e-05, + "loss": 1.5771, + "step": 5550 + }, + { + "epoch": 0.1629866697985789, + "grad_norm": 0.0, + "learning_rate": 1.9086779140990586e-05, + "loss": 1.5029, + "step": 5551 + }, + { + "epoch": 0.1630160314757179, + "grad_norm": 0.0, + "learning_rate": 1.9086382076004573e-05, + "loss": 1.5498, + "step": 5552 + }, + { + "epoch": 0.1630453931528569, + "grad_norm": 0.0, + "learning_rate": 1.908598492884808e-05, + "loss": 1.4141, + "step": 5553 + }, + { + "epoch": 0.1630747548299959, + "grad_norm": 0.0, + "learning_rate": 1.9085587699524694e-05, + "loss": 1.2988, + "step": 5554 + }, + { + "epoch": 0.16310411650713488, + "grad_norm": 0.0, + "learning_rate": 1.908519038803801e-05, + "loss": 1.4004, + "step": 5555 + }, + { + "epoch": 0.16313347818427387, + "grad_norm": 0.0, + "learning_rate": 1.9084792994391627e-05, + "loss": 1.4844, + "step": 5556 + }, + { + "epoch": 0.1631628398614129, + "grad_norm": 0.0, + "learning_rate": 1.9084395518589135e-05, + "loss": 1.4209, + "step": 5557 + }, + { + "epoch": 0.16319220153855188, + "grad_norm": 0.0, + "learning_rate": 1.9083997960634124e-05, + "loss": 1.3467, + "step": 5558 + }, + { + "epoch": 0.16322156321569087, + "grad_norm": 0.0, + "learning_rate": 1.9083600320530194e-05, + "loss": 1.374, + "step": 5559 + }, + { + "epoch": 0.16325092489282989, + "grad_norm": 0.0, + "learning_rate": 1.9083202598280942e-05, + "loss": 1.4766, + "step": 5560 + }, + { + "epoch": 0.16328028656996887, + "grad_norm": 0.0, + "learning_rate": 1.908280479388996e-05, + "loss": 1.4033, + "step": 5561 + }, + { + "epoch": 0.16330964824710786, + "grad_norm": 0.0, + "learning_rate": 1.9082406907360848e-05, + "loss": 1.4395, + "step": 5562 + }, + { + "epoch": 0.16333900992424688, + "grad_norm": 0.0, + "learning_rate": 1.9082008938697206e-05, + "loss": 1.5596, + "step": 5563 + }, + { + "epoch": 0.16336837160138587, + "grad_norm": 0.0, + "learning_rate": 1.908161088790263e-05, + "loss": 1.4355, + "step": 5564 + }, + { + "epoch": 0.16339773327852486, + "grad_norm": 0.0, + "learning_rate": 1.9081212754980723e-05, + "loss": 1.4609, + "step": 5565 + }, + { + "epoch": 0.16342709495566388, + "grad_norm": 0.0, + "learning_rate": 1.908081453993508e-05, + "loss": 1.5264, + "step": 5566 + }, + { + "epoch": 0.16345645663280287, + "grad_norm": 0.0, + "learning_rate": 1.908041624276931e-05, + "loss": 1.5078, + "step": 5567 + }, + { + "epoch": 0.16348581830994185, + "grad_norm": 0.0, + "learning_rate": 1.908001786348701e-05, + "loss": 1.5303, + "step": 5568 + }, + { + "epoch": 0.16351517998708087, + "grad_norm": 0.0, + "learning_rate": 1.907961940209178e-05, + "loss": 1.4512, + "step": 5569 + }, + { + "epoch": 0.16354454166421986, + "grad_norm": 0.0, + "learning_rate": 1.9079220858587225e-05, + "loss": 1.2866, + "step": 5570 + }, + { + "epoch": 0.16357390334135885, + "grad_norm": 0.0, + "learning_rate": 1.9078822232976955e-05, + "loss": 1.4766, + "step": 5571 + }, + { + "epoch": 0.16360326501849787, + "grad_norm": 0.0, + "learning_rate": 1.9078423525264568e-05, + "loss": 1.4707, + "step": 5572 + }, + { + "epoch": 0.16363262669563686, + "grad_norm": 0.0, + "learning_rate": 1.907802473545367e-05, + "loss": 1.4111, + "step": 5573 + }, + { + "epoch": 0.16366198837277585, + "grad_norm": 0.0, + "learning_rate": 1.9077625863547874e-05, + "loss": 1.665, + "step": 5574 + }, + { + "epoch": 0.16369135004991486, + "grad_norm": 0.0, + "learning_rate": 1.9077226909550777e-05, + "loss": 1.3066, + "step": 5575 + }, + { + "epoch": 0.16372071172705385, + "grad_norm": 0.0, + "learning_rate": 1.9076827873466e-05, + "loss": 1.3457, + "step": 5576 + }, + { + "epoch": 0.16375007340419284, + "grad_norm": 0.0, + "learning_rate": 1.9076428755297137e-05, + "loss": 1.4453, + "step": 5577 + }, + { + "epoch": 0.16377943508133186, + "grad_norm": 0.0, + "learning_rate": 1.9076029555047806e-05, + "loss": 1.4746, + "step": 5578 + }, + { + "epoch": 0.16380879675847085, + "grad_norm": 0.0, + "learning_rate": 1.9075630272721617e-05, + "loss": 1.4785, + "step": 5579 + }, + { + "epoch": 0.16383815843560984, + "grad_norm": 0.0, + "learning_rate": 1.9075230908322176e-05, + "loss": 1.418, + "step": 5580 + }, + { + "epoch": 0.16386752011274883, + "grad_norm": 0.0, + "learning_rate": 1.9074831461853104e-05, + "loss": 1.5029, + "step": 5581 + }, + { + "epoch": 0.16389688178988784, + "grad_norm": 0.0, + "learning_rate": 1.9074431933318e-05, + "loss": 1.5215, + "step": 5582 + }, + { + "epoch": 0.16392624346702683, + "grad_norm": 0.0, + "learning_rate": 1.907403232272049e-05, + "loss": 1.5293, + "step": 5583 + }, + { + "epoch": 0.16395560514416582, + "grad_norm": 0.0, + "learning_rate": 1.9073632630064178e-05, + "loss": 1.3115, + "step": 5584 + }, + { + "epoch": 0.16398496682130484, + "grad_norm": 0.0, + "learning_rate": 1.9073232855352684e-05, + "loss": 1.4922, + "step": 5585 + }, + { + "epoch": 0.16401432849844383, + "grad_norm": 0.0, + "learning_rate": 1.9072832998589617e-05, + "loss": 1.4434, + "step": 5586 + }, + { + "epoch": 0.16404369017558282, + "grad_norm": 0.0, + "learning_rate": 1.9072433059778602e-05, + "loss": 1.3271, + "step": 5587 + }, + { + "epoch": 0.16407305185272184, + "grad_norm": 0.0, + "learning_rate": 1.907203303892325e-05, + "loss": 1.4102, + "step": 5588 + }, + { + "epoch": 0.16410241352986082, + "grad_norm": 0.0, + "learning_rate": 1.9071632936027185e-05, + "loss": 1.4062, + "step": 5589 + }, + { + "epoch": 0.1641317752069998, + "grad_norm": 0.0, + "learning_rate": 1.9071232751094013e-05, + "loss": 1.5205, + "step": 5590 + }, + { + "epoch": 0.16416113688413883, + "grad_norm": 0.0, + "learning_rate": 1.9070832484127364e-05, + "loss": 1.5078, + "step": 5591 + }, + { + "epoch": 0.16419049856127782, + "grad_norm": 0.0, + "learning_rate": 1.9070432135130853e-05, + "loss": 1.4189, + "step": 5592 + }, + { + "epoch": 0.1642198602384168, + "grad_norm": 0.0, + "learning_rate": 1.90700317041081e-05, + "loss": 1.5234, + "step": 5593 + }, + { + "epoch": 0.16424922191555583, + "grad_norm": 0.0, + "learning_rate": 1.9069631191062728e-05, + "loss": 1.5293, + "step": 5594 + }, + { + "epoch": 0.16427858359269482, + "grad_norm": 0.0, + "learning_rate": 1.9069230595998358e-05, + "loss": 1.4316, + "step": 5595 + }, + { + "epoch": 0.1643079452698338, + "grad_norm": 0.0, + "learning_rate": 1.9068829918918613e-05, + "loss": 1.4766, + "step": 5596 + }, + { + "epoch": 0.16433730694697282, + "grad_norm": 0.0, + "learning_rate": 1.906842915982712e-05, + "loss": 1.4785, + "step": 5597 + }, + { + "epoch": 0.1643666686241118, + "grad_norm": 0.0, + "learning_rate": 1.9068028318727494e-05, + "loss": 1.3574, + "step": 5598 + }, + { + "epoch": 0.1643960303012508, + "grad_norm": 0.0, + "learning_rate": 1.9067627395623368e-05, + "loss": 1.4053, + "step": 5599 + }, + { + "epoch": 0.16442539197838982, + "grad_norm": 0.0, + "learning_rate": 1.9067226390518368e-05, + "loss": 1.4473, + "step": 5600 + }, + { + "epoch": 0.1644547536555288, + "grad_norm": 0.0, + "learning_rate": 1.9066825303416113e-05, + "loss": 1.4727, + "step": 5601 + }, + { + "epoch": 0.1644841153326678, + "grad_norm": 0.0, + "learning_rate": 1.9066424134320235e-05, + "loss": 1.373, + "step": 5602 + }, + { + "epoch": 0.1645134770098068, + "grad_norm": 0.0, + "learning_rate": 1.9066022883234367e-05, + "loss": 1.4766, + "step": 5603 + }, + { + "epoch": 0.1645428386869458, + "grad_norm": 0.0, + "learning_rate": 1.9065621550162126e-05, + "loss": 1.4541, + "step": 5604 + }, + { + "epoch": 0.1645722003640848, + "grad_norm": 0.0, + "learning_rate": 1.9065220135107152e-05, + "loss": 1.4014, + "step": 5605 + }, + { + "epoch": 0.16460156204122378, + "grad_norm": 0.0, + "learning_rate": 1.9064818638073067e-05, + "loss": 1.5264, + "step": 5606 + }, + { + "epoch": 0.1646309237183628, + "grad_norm": 0.0, + "learning_rate": 1.906441705906351e-05, + "loss": 1.4473, + "step": 5607 + }, + { + "epoch": 0.1646602853955018, + "grad_norm": 0.0, + "learning_rate": 1.9064015398082104e-05, + "loss": 1.4775, + "step": 5608 + }, + { + "epoch": 0.16468964707264078, + "grad_norm": 0.0, + "learning_rate": 1.9063613655132487e-05, + "loss": 1.5117, + "step": 5609 + }, + { + "epoch": 0.1647190087497798, + "grad_norm": 0.0, + "learning_rate": 1.906321183021829e-05, + "loss": 1.4258, + "step": 5610 + }, + { + "epoch": 0.16474837042691878, + "grad_norm": 0.0, + "learning_rate": 1.9062809923343146e-05, + "loss": 1.5146, + "step": 5611 + }, + { + "epoch": 0.16477773210405777, + "grad_norm": 0.0, + "learning_rate": 1.9062407934510693e-05, + "loss": 1.4375, + "step": 5612 + }, + { + "epoch": 0.1648070937811968, + "grad_norm": 0.0, + "learning_rate": 1.9062005863724567e-05, + "loss": 1.457, + "step": 5613 + }, + { + "epoch": 0.16483645545833578, + "grad_norm": 0.0, + "learning_rate": 1.9061603710988396e-05, + "loss": 1.4004, + "step": 5614 + }, + { + "epoch": 0.16486581713547477, + "grad_norm": 0.0, + "learning_rate": 1.9061201476305823e-05, + "loss": 1.5508, + "step": 5615 + }, + { + "epoch": 0.16489517881261379, + "grad_norm": 0.0, + "learning_rate": 1.9060799159680486e-05, + "loss": 1.4512, + "step": 5616 + }, + { + "epoch": 0.16492454048975277, + "grad_norm": 0.0, + "learning_rate": 1.906039676111602e-05, + "loss": 1.4756, + "step": 5617 + }, + { + "epoch": 0.16495390216689176, + "grad_norm": 0.0, + "learning_rate": 1.9059994280616067e-05, + "loss": 1.4932, + "step": 5618 + }, + { + "epoch": 0.16498326384403078, + "grad_norm": 0.0, + "learning_rate": 1.9059591718184265e-05, + "loss": 1.5137, + "step": 5619 + }, + { + "epoch": 0.16501262552116977, + "grad_norm": 0.0, + "learning_rate": 1.9059189073824256e-05, + "loss": 1.6182, + "step": 5620 + }, + { + "epoch": 0.16504198719830876, + "grad_norm": 0.0, + "learning_rate": 1.9058786347539678e-05, + "loss": 1.5029, + "step": 5621 + }, + { + "epoch": 0.16507134887544778, + "grad_norm": 0.0, + "learning_rate": 1.9058383539334176e-05, + "loss": 1.6572, + "step": 5622 + }, + { + "epoch": 0.16510071055258677, + "grad_norm": 0.0, + "learning_rate": 1.9057980649211392e-05, + "loss": 1.5293, + "step": 5623 + }, + { + "epoch": 0.16513007222972576, + "grad_norm": 0.0, + "learning_rate": 1.9057577677174966e-05, + "loss": 1.5283, + "step": 5624 + }, + { + "epoch": 0.16515943390686477, + "grad_norm": 0.0, + "learning_rate": 1.905717462322855e-05, + "loss": 1.3838, + "step": 5625 + }, + { + "epoch": 0.16518879558400376, + "grad_norm": 0.0, + "learning_rate": 1.9056771487375783e-05, + "loss": 1.4258, + "step": 5626 + }, + { + "epoch": 0.16521815726114275, + "grad_norm": 0.0, + "learning_rate": 1.9056368269620313e-05, + "loss": 1.415, + "step": 5627 + }, + { + "epoch": 0.16524751893828177, + "grad_norm": 0.0, + "learning_rate": 1.9055964969965783e-05, + "loss": 1.3262, + "step": 5628 + }, + { + "epoch": 0.16527688061542076, + "grad_norm": 0.0, + "learning_rate": 1.9055561588415842e-05, + "loss": 1.4492, + "step": 5629 + }, + { + "epoch": 0.16530624229255975, + "grad_norm": 0.0, + "learning_rate": 1.9055158124974143e-05, + "loss": 1.582, + "step": 5630 + }, + { + "epoch": 0.16533560396969874, + "grad_norm": 0.0, + "learning_rate": 1.9054754579644324e-05, + "loss": 1.5098, + "step": 5631 + }, + { + "epoch": 0.16536496564683775, + "grad_norm": 0.0, + "learning_rate": 1.9054350952430045e-05, + "loss": 1.4492, + "step": 5632 + }, + { + "epoch": 0.16539432732397674, + "grad_norm": 0.0, + "learning_rate": 1.905394724333495e-05, + "loss": 1.4443, + "step": 5633 + }, + { + "epoch": 0.16542368900111573, + "grad_norm": 0.0, + "learning_rate": 1.905354345236269e-05, + "loss": 1.5381, + "step": 5634 + }, + { + "epoch": 0.16545305067825475, + "grad_norm": 0.0, + "learning_rate": 1.905313957951692e-05, + "loss": 1.4727, + "step": 5635 + }, + { + "epoch": 0.16548241235539374, + "grad_norm": 0.0, + "learning_rate": 1.905273562480129e-05, + "loss": 1.6025, + "step": 5636 + }, + { + "epoch": 0.16551177403253273, + "grad_norm": 0.0, + "learning_rate": 1.9052331588219454e-05, + "loss": 1.6006, + "step": 5637 + }, + { + "epoch": 0.16554113570967174, + "grad_norm": 0.0, + "learning_rate": 1.905192746977506e-05, + "loss": 1.4219, + "step": 5638 + }, + { + "epoch": 0.16557049738681073, + "grad_norm": 0.0, + "learning_rate": 1.9051523269471774e-05, + "loss": 1.4189, + "step": 5639 + }, + { + "epoch": 0.16559985906394972, + "grad_norm": 0.0, + "learning_rate": 1.9051118987313242e-05, + "loss": 1.415, + "step": 5640 + }, + { + "epoch": 0.16562922074108874, + "grad_norm": 0.0, + "learning_rate": 1.9050714623303125e-05, + "loss": 1.4707, + "step": 5641 + }, + { + "epoch": 0.16565858241822773, + "grad_norm": 0.0, + "learning_rate": 1.9050310177445075e-05, + "loss": 1.5039, + "step": 5642 + }, + { + "epoch": 0.16568794409536672, + "grad_norm": 0.0, + "learning_rate": 1.9049905649742754e-05, + "loss": 1.3818, + "step": 5643 + }, + { + "epoch": 0.16571730577250574, + "grad_norm": 0.0, + "learning_rate": 1.9049501040199815e-05, + "loss": 1.4785, + "step": 5644 + }, + { + "epoch": 0.16574666744964472, + "grad_norm": 0.0, + "learning_rate": 1.9049096348819924e-05, + "loss": 1.4785, + "step": 5645 + }, + { + "epoch": 0.1657760291267837, + "grad_norm": 0.0, + "learning_rate": 1.9048691575606736e-05, + "loss": 1.5078, + "step": 5646 + }, + { + "epoch": 0.16580539080392273, + "grad_norm": 0.0, + "learning_rate": 1.904828672056391e-05, + "loss": 1.5059, + "step": 5647 + }, + { + "epoch": 0.16583475248106172, + "grad_norm": 0.0, + "learning_rate": 1.9047881783695116e-05, + "loss": 1.2842, + "step": 5648 + }, + { + "epoch": 0.1658641141582007, + "grad_norm": 0.0, + "learning_rate": 1.9047476765004008e-05, + "loss": 1.4629, + "step": 5649 + }, + { + "epoch": 0.16589347583533973, + "grad_norm": 0.0, + "learning_rate": 1.904707166449425e-05, + "loss": 1.3535, + "step": 5650 + }, + { + "epoch": 0.16592283751247872, + "grad_norm": 0.0, + "learning_rate": 1.9046666482169506e-05, + "loss": 1.4756, + "step": 5651 + }, + { + "epoch": 0.1659521991896177, + "grad_norm": 0.0, + "learning_rate": 1.904626121803344e-05, + "loss": 1.4834, + "step": 5652 + }, + { + "epoch": 0.16598156086675672, + "grad_norm": 0.0, + "learning_rate": 1.9045855872089714e-05, + "loss": 1.3887, + "step": 5653 + }, + { + "epoch": 0.1660109225438957, + "grad_norm": 0.0, + "learning_rate": 1.9045450444342e-05, + "loss": 1.5059, + "step": 5654 + }, + { + "epoch": 0.1660402842210347, + "grad_norm": 0.0, + "learning_rate": 1.9045044934793958e-05, + "loss": 1.4443, + "step": 5655 + }, + { + "epoch": 0.1660696458981737, + "grad_norm": 0.0, + "learning_rate": 1.9044639343449262e-05, + "loss": 1.4141, + "step": 5656 + }, + { + "epoch": 0.1660990075753127, + "grad_norm": 0.0, + "learning_rate": 1.9044233670311574e-05, + "loss": 1.4844, + "step": 5657 + }, + { + "epoch": 0.1661283692524517, + "grad_norm": 0.0, + "learning_rate": 1.904382791538457e-05, + "loss": 1.4658, + "step": 5658 + }, + { + "epoch": 0.16615773092959069, + "grad_norm": 0.0, + "learning_rate": 1.9043422078671903e-05, + "loss": 1.5693, + "step": 5659 + }, + { + "epoch": 0.1661870926067297, + "grad_norm": 0.0, + "learning_rate": 1.9043016160177262e-05, + "loss": 1.4219, + "step": 5660 + }, + { + "epoch": 0.1662164542838687, + "grad_norm": 0.0, + "learning_rate": 1.904261015990431e-05, + "loss": 1.3994, + "step": 5661 + }, + { + "epoch": 0.16624581596100768, + "grad_norm": 0.0, + "learning_rate": 1.9042204077856717e-05, + "loss": 1.3213, + "step": 5662 + }, + { + "epoch": 0.1662751776381467, + "grad_norm": 0.0, + "learning_rate": 1.9041797914038156e-05, + "loss": 1.4248, + "step": 5663 + }, + { + "epoch": 0.1663045393152857, + "grad_norm": 0.0, + "learning_rate": 1.90413916684523e-05, + "loss": 1.4473, + "step": 5664 + }, + { + "epoch": 0.16633390099242468, + "grad_norm": 0.0, + "learning_rate": 1.9040985341102826e-05, + "loss": 1.6182, + "step": 5665 + }, + { + "epoch": 0.1663632626695637, + "grad_norm": 0.0, + "learning_rate": 1.9040578931993403e-05, + "loss": 1.4297, + "step": 5666 + }, + { + "epoch": 0.16639262434670268, + "grad_norm": 0.0, + "learning_rate": 1.9040172441127713e-05, + "loss": 1.3779, + "step": 5667 + }, + { + "epoch": 0.16642198602384167, + "grad_norm": 0.0, + "learning_rate": 1.9039765868509425e-05, + "loss": 1.5039, + "step": 5668 + }, + { + "epoch": 0.1664513477009807, + "grad_norm": 0.0, + "learning_rate": 1.9039359214142222e-05, + "loss": 1.6299, + "step": 5669 + }, + { + "epoch": 0.16648070937811968, + "grad_norm": 0.0, + "learning_rate": 1.9038952478029777e-05, + "loss": 1.3418, + "step": 5670 + }, + { + "epoch": 0.16651007105525867, + "grad_norm": 0.0, + "learning_rate": 1.903854566017577e-05, + "loss": 1.4326, + "step": 5671 + }, + { + "epoch": 0.16653943273239769, + "grad_norm": 0.0, + "learning_rate": 1.903813876058388e-05, + "loss": 1.543, + "step": 5672 + }, + { + "epoch": 0.16656879440953667, + "grad_norm": 0.0, + "learning_rate": 1.9037731779257786e-05, + "loss": 1.5889, + "step": 5673 + }, + { + "epoch": 0.16659815608667566, + "grad_norm": 0.0, + "learning_rate": 1.903732471620117e-05, + "loss": 1.4219, + "step": 5674 + }, + { + "epoch": 0.16662751776381468, + "grad_norm": 0.0, + "learning_rate": 1.903691757141771e-05, + "loss": 1.4248, + "step": 5675 + }, + { + "epoch": 0.16665687944095367, + "grad_norm": 0.0, + "learning_rate": 1.903651034491109e-05, + "loss": 1.4277, + "step": 5676 + }, + { + "epoch": 0.16668624111809266, + "grad_norm": 0.0, + "learning_rate": 1.9036103036684997e-05, + "loss": 1.4492, + "step": 5677 + }, + { + "epoch": 0.16671560279523168, + "grad_norm": 0.0, + "learning_rate": 1.9035695646743107e-05, + "loss": 1.4551, + "step": 5678 + }, + { + "epoch": 0.16674496447237067, + "grad_norm": 0.0, + "learning_rate": 1.9035288175089108e-05, + "loss": 1.4609, + "step": 5679 + }, + { + "epoch": 0.16677432614950966, + "grad_norm": 0.0, + "learning_rate": 1.9034880621726684e-05, + "loss": 1.3867, + "step": 5680 + }, + { + "epoch": 0.16680368782664864, + "grad_norm": 0.0, + "learning_rate": 1.903447298665952e-05, + "loss": 1.4932, + "step": 5681 + }, + { + "epoch": 0.16683304950378766, + "grad_norm": 0.0, + "learning_rate": 1.9034065269891303e-05, + "loss": 1.5469, + "step": 5682 + }, + { + "epoch": 0.16686241118092665, + "grad_norm": 0.0, + "learning_rate": 1.903365747142572e-05, + "loss": 1.4189, + "step": 5683 + }, + { + "epoch": 0.16689177285806564, + "grad_norm": 0.0, + "learning_rate": 1.903324959126646e-05, + "loss": 1.3838, + "step": 5684 + }, + { + "epoch": 0.16692113453520466, + "grad_norm": 0.0, + "learning_rate": 1.903284162941721e-05, + "loss": 1.3984, + "step": 5685 + }, + { + "epoch": 0.16695049621234365, + "grad_norm": 0.0, + "learning_rate": 1.903243358588166e-05, + "loss": 1.5049, + "step": 5686 + }, + { + "epoch": 0.16697985788948264, + "grad_norm": 0.0, + "learning_rate": 1.90320254606635e-05, + "loss": 1.4629, + "step": 5687 + }, + { + "epoch": 0.16700921956662165, + "grad_norm": 0.0, + "learning_rate": 1.9031617253766418e-05, + "loss": 1.5068, + "step": 5688 + }, + { + "epoch": 0.16703858124376064, + "grad_norm": 0.0, + "learning_rate": 1.903120896519411e-05, + "loss": 1.4414, + "step": 5689 + }, + { + "epoch": 0.16706794292089963, + "grad_norm": 0.0, + "learning_rate": 1.9030800594950268e-05, + "loss": 1.3301, + "step": 5690 + }, + { + "epoch": 0.16709730459803865, + "grad_norm": 0.0, + "learning_rate": 1.903039214303858e-05, + "loss": 1.4043, + "step": 5691 + }, + { + "epoch": 0.16712666627517764, + "grad_norm": 0.0, + "learning_rate": 1.9029983609462746e-05, + "loss": 1.3418, + "step": 5692 + }, + { + "epoch": 0.16715602795231663, + "grad_norm": 0.0, + "learning_rate": 1.9029574994226457e-05, + "loss": 1.7188, + "step": 5693 + }, + { + "epoch": 0.16718538962945564, + "grad_norm": 0.0, + "learning_rate": 1.902916629733341e-05, + "loss": 1.4268, + "step": 5694 + }, + { + "epoch": 0.16721475130659463, + "grad_norm": 0.0, + "learning_rate": 1.9028757518787296e-05, + "loss": 1.4717, + "step": 5695 + }, + { + "epoch": 0.16724411298373362, + "grad_norm": 0.0, + "learning_rate": 1.902834865859182e-05, + "loss": 1.6074, + "step": 5696 + }, + { + "epoch": 0.16727347466087264, + "grad_norm": 0.0, + "learning_rate": 1.9027939716750672e-05, + "loss": 1.5479, + "step": 5697 + }, + { + "epoch": 0.16730283633801163, + "grad_norm": 0.0, + "learning_rate": 1.9027530693267552e-05, + "loss": 1.3599, + "step": 5698 + }, + { + "epoch": 0.16733219801515062, + "grad_norm": 0.0, + "learning_rate": 1.9027121588146163e-05, + "loss": 1.5664, + "step": 5699 + }, + { + "epoch": 0.16736155969228964, + "grad_norm": 0.0, + "learning_rate": 1.9026712401390203e-05, + "loss": 1.457, + "step": 5700 + }, + { + "epoch": 0.16739092136942862, + "grad_norm": 0.0, + "learning_rate": 1.9026303133003366e-05, + "loss": 1.5732, + "step": 5701 + }, + { + "epoch": 0.16742028304656761, + "grad_norm": 0.0, + "learning_rate": 1.902589378298936e-05, + "loss": 1.4658, + "step": 5702 + }, + { + "epoch": 0.16744964472370663, + "grad_norm": 0.0, + "learning_rate": 1.9025484351351884e-05, + "loss": 1.333, + "step": 5703 + }, + { + "epoch": 0.16747900640084562, + "grad_norm": 0.0, + "learning_rate": 1.9025074838094646e-05, + "loss": 1.5205, + "step": 5704 + }, + { + "epoch": 0.1675083680779846, + "grad_norm": 0.0, + "learning_rate": 1.902466524322134e-05, + "loss": 1.4912, + "step": 5705 + }, + { + "epoch": 0.1675377297551236, + "grad_norm": 0.0, + "learning_rate": 1.9024255566735678e-05, + "loss": 1.373, + "step": 5706 + }, + { + "epoch": 0.16756709143226262, + "grad_norm": 0.0, + "learning_rate": 1.902384580864136e-05, + "loss": 1.5127, + "step": 5707 + }, + { + "epoch": 0.1675964531094016, + "grad_norm": 0.0, + "learning_rate": 1.9023435968942094e-05, + "loss": 1.4531, + "step": 5708 + }, + { + "epoch": 0.1676258147865406, + "grad_norm": 0.0, + "learning_rate": 1.9023026047641583e-05, + "loss": 1.4736, + "step": 5709 + }, + { + "epoch": 0.1676551764636796, + "grad_norm": 0.0, + "learning_rate": 1.902261604474354e-05, + "loss": 1.5664, + "step": 5710 + }, + { + "epoch": 0.1676845381408186, + "grad_norm": 0.0, + "learning_rate": 1.902220596025167e-05, + "loss": 1.5039, + "step": 5711 + }, + { + "epoch": 0.1677138998179576, + "grad_norm": 0.0, + "learning_rate": 1.9021795794169676e-05, + "loss": 1.3955, + "step": 5712 + }, + { + "epoch": 0.1677432614950966, + "grad_norm": 0.0, + "learning_rate": 1.9021385546501276e-05, + "loss": 1.5879, + "step": 5713 + }, + { + "epoch": 0.1677726231722356, + "grad_norm": 0.0, + "learning_rate": 1.9020975217250176e-05, + "loss": 1.5264, + "step": 5714 + }, + { + "epoch": 0.16780198484937459, + "grad_norm": 0.0, + "learning_rate": 1.9020564806420086e-05, + "loss": 1.5596, + "step": 5715 + }, + { + "epoch": 0.1678313465265136, + "grad_norm": 0.0, + "learning_rate": 1.902015431401472e-05, + "loss": 1.5098, + "step": 5716 + }, + { + "epoch": 0.1678607082036526, + "grad_norm": 0.0, + "learning_rate": 1.9019743740037786e-05, + "loss": 1.5361, + "step": 5717 + }, + { + "epoch": 0.16789006988079158, + "grad_norm": 0.0, + "learning_rate": 1.9019333084493e-05, + "loss": 1.2998, + "step": 5718 + }, + { + "epoch": 0.1679194315579306, + "grad_norm": 0.0, + "learning_rate": 1.9018922347384077e-05, + "loss": 1.2993, + "step": 5719 + }, + { + "epoch": 0.1679487932350696, + "grad_norm": 0.0, + "learning_rate": 1.9018511528714728e-05, + "loss": 1.3838, + "step": 5720 + }, + { + "epoch": 0.16797815491220858, + "grad_norm": 0.0, + "learning_rate": 1.901810062848867e-05, + "loss": 1.4512, + "step": 5721 + }, + { + "epoch": 0.1680075165893476, + "grad_norm": 0.0, + "learning_rate": 1.9017689646709617e-05, + "loss": 1.4727, + "step": 5722 + }, + { + "epoch": 0.16803687826648658, + "grad_norm": 0.0, + "learning_rate": 1.9017278583381286e-05, + "loss": 1.5244, + "step": 5723 + }, + { + "epoch": 0.16806623994362557, + "grad_norm": 0.0, + "learning_rate": 1.90168674385074e-05, + "loss": 1.4844, + "step": 5724 + }, + { + "epoch": 0.1680956016207646, + "grad_norm": 0.0, + "learning_rate": 1.9016456212091666e-05, + "loss": 1.5, + "step": 5725 + }, + { + "epoch": 0.16812496329790358, + "grad_norm": 0.0, + "learning_rate": 1.9016044904137817e-05, + "loss": 1.5059, + "step": 5726 + }, + { + "epoch": 0.16815432497504257, + "grad_norm": 0.0, + "learning_rate": 1.901563351464956e-05, + "loss": 1.4854, + "step": 5727 + }, + { + "epoch": 0.16818368665218159, + "grad_norm": 0.0, + "learning_rate": 1.901522204363062e-05, + "loss": 1.501, + "step": 5728 + }, + { + "epoch": 0.16821304832932057, + "grad_norm": 0.0, + "learning_rate": 1.901481049108472e-05, + "loss": 1.4736, + "step": 5729 + }, + { + "epoch": 0.16824241000645956, + "grad_norm": 0.0, + "learning_rate": 1.901439885701558e-05, + "loss": 1.4277, + "step": 5730 + }, + { + "epoch": 0.16827177168359855, + "grad_norm": 0.0, + "learning_rate": 1.9013987141426922e-05, + "loss": 1.4629, + "step": 5731 + }, + { + "epoch": 0.16830113336073757, + "grad_norm": 0.0, + "learning_rate": 1.901357534432247e-05, + "loss": 1.4297, + "step": 5732 + }, + { + "epoch": 0.16833049503787656, + "grad_norm": 0.0, + "learning_rate": 1.9013163465705946e-05, + "loss": 1.4551, + "step": 5733 + }, + { + "epoch": 0.16835985671501555, + "grad_norm": 0.0, + "learning_rate": 1.901275150558108e-05, + "loss": 1.3818, + "step": 5734 + }, + { + "epoch": 0.16838921839215457, + "grad_norm": 0.0, + "learning_rate": 1.9012339463951588e-05, + "loss": 1.5674, + "step": 5735 + }, + { + "epoch": 0.16841858006929356, + "grad_norm": 0.0, + "learning_rate": 1.9011927340821207e-05, + "loss": 1.4453, + "step": 5736 + }, + { + "epoch": 0.16844794174643254, + "grad_norm": 0.0, + "learning_rate": 1.901151513619366e-05, + "loss": 1.4551, + "step": 5737 + }, + { + "epoch": 0.16847730342357156, + "grad_norm": 0.0, + "learning_rate": 1.9011102850072668e-05, + "loss": 1.4077, + "step": 5738 + }, + { + "epoch": 0.16850666510071055, + "grad_norm": 0.0, + "learning_rate": 1.901069048246197e-05, + "loss": 1.4824, + "step": 5739 + }, + { + "epoch": 0.16853602677784954, + "grad_norm": 0.0, + "learning_rate": 1.901027803336529e-05, + "loss": 1.3525, + "step": 5740 + }, + { + "epoch": 0.16856538845498856, + "grad_norm": 0.0, + "learning_rate": 1.9009865502786357e-05, + "loss": 1.3506, + "step": 5741 + }, + { + "epoch": 0.16859475013212755, + "grad_norm": 0.0, + "learning_rate": 1.9009452890728904e-05, + "loss": 1.4346, + "step": 5742 + }, + { + "epoch": 0.16862411180926654, + "grad_norm": 0.0, + "learning_rate": 1.900904019719666e-05, + "loss": 1.4414, + "step": 5743 + }, + { + "epoch": 0.16865347348640555, + "grad_norm": 0.0, + "learning_rate": 1.900862742219336e-05, + "loss": 1.4482, + "step": 5744 + }, + { + "epoch": 0.16868283516354454, + "grad_norm": 0.0, + "learning_rate": 1.900821456572273e-05, + "loss": 1.5156, + "step": 5745 + }, + { + "epoch": 0.16871219684068353, + "grad_norm": 0.0, + "learning_rate": 1.9007801627788515e-05, + "loss": 1.4971, + "step": 5746 + }, + { + "epoch": 0.16874155851782255, + "grad_norm": 0.0, + "learning_rate": 1.900738860839444e-05, + "loss": 1.543, + "step": 5747 + }, + { + "epoch": 0.16877092019496154, + "grad_norm": 0.0, + "learning_rate": 1.9006975507544244e-05, + "loss": 1.4746, + "step": 5748 + }, + { + "epoch": 0.16880028187210053, + "grad_norm": 0.0, + "learning_rate": 1.900656232524166e-05, + "loss": 1.335, + "step": 5749 + }, + { + "epoch": 0.16882964354923954, + "grad_norm": 0.0, + "learning_rate": 1.9006149061490426e-05, + "loss": 1.4561, + "step": 5750 + }, + { + "epoch": 0.16885900522637853, + "grad_norm": 0.0, + "learning_rate": 1.900573571629428e-05, + "loss": 1.4482, + "step": 5751 + }, + { + "epoch": 0.16888836690351752, + "grad_norm": 0.0, + "learning_rate": 1.9005322289656963e-05, + "loss": 1.4277, + "step": 5752 + }, + { + "epoch": 0.16891772858065654, + "grad_norm": 0.0, + "learning_rate": 1.9004908781582207e-05, + "loss": 1.4736, + "step": 5753 + }, + { + "epoch": 0.16894709025779553, + "grad_norm": 0.0, + "learning_rate": 1.9004495192073754e-05, + "loss": 1.4189, + "step": 5754 + }, + { + "epoch": 0.16897645193493452, + "grad_norm": 0.0, + "learning_rate": 1.900408152113535e-05, + "loss": 1.4775, + "step": 5755 + }, + { + "epoch": 0.16900581361207354, + "grad_norm": 0.0, + "learning_rate": 1.9003667768770724e-05, + "loss": 1.5068, + "step": 5756 + }, + { + "epoch": 0.16903517528921252, + "grad_norm": 0.0, + "learning_rate": 1.9003253934983627e-05, + "loss": 1.6289, + "step": 5757 + }, + { + "epoch": 0.16906453696635151, + "grad_norm": 0.0, + "learning_rate": 1.90028400197778e-05, + "loss": 1.5361, + "step": 5758 + }, + { + "epoch": 0.1690938986434905, + "grad_norm": 0.0, + "learning_rate": 1.9002426023156984e-05, + "loss": 1.4551, + "step": 5759 + }, + { + "epoch": 0.16912326032062952, + "grad_norm": 0.0, + "learning_rate": 1.9002011945124925e-05, + "loss": 1.4629, + "step": 5760 + }, + { + "epoch": 0.1691526219977685, + "grad_norm": 0.0, + "learning_rate": 1.9001597785685366e-05, + "loss": 1.5186, + "step": 5761 + }, + { + "epoch": 0.1691819836749075, + "grad_norm": 0.0, + "learning_rate": 1.900118354484205e-05, + "loss": 1.3486, + "step": 5762 + }, + { + "epoch": 0.16921134535204652, + "grad_norm": 0.0, + "learning_rate": 1.900076922259873e-05, + "loss": 1.4561, + "step": 5763 + }, + { + "epoch": 0.1692407070291855, + "grad_norm": 0.0, + "learning_rate": 1.9000354818959148e-05, + "loss": 1.335, + "step": 5764 + }, + { + "epoch": 0.1692700687063245, + "grad_norm": 0.0, + "learning_rate": 1.8999940333927053e-05, + "loss": 1.4258, + "step": 5765 + }, + { + "epoch": 0.1692994303834635, + "grad_norm": 0.0, + "learning_rate": 1.899952576750619e-05, + "loss": 1.4346, + "step": 5766 + }, + { + "epoch": 0.1693287920606025, + "grad_norm": 0.0, + "learning_rate": 1.8999111119700313e-05, + "loss": 1.4268, + "step": 5767 + }, + { + "epoch": 0.1693581537377415, + "grad_norm": 0.0, + "learning_rate": 1.8998696390513167e-05, + "loss": 1.5234, + "step": 5768 + }, + { + "epoch": 0.1693875154148805, + "grad_norm": 0.0, + "learning_rate": 1.899828157994851e-05, + "loss": 1.3467, + "step": 5769 + }, + { + "epoch": 0.1694168770920195, + "grad_norm": 0.0, + "learning_rate": 1.8997866688010084e-05, + "loss": 1.4404, + "step": 5770 + }, + { + "epoch": 0.16944623876915849, + "grad_norm": 0.0, + "learning_rate": 1.8997451714701645e-05, + "loss": 1.4414, + "step": 5771 + }, + { + "epoch": 0.1694756004462975, + "grad_norm": 0.0, + "learning_rate": 1.8997036660026947e-05, + "loss": 1.5264, + "step": 5772 + }, + { + "epoch": 0.1695049621234365, + "grad_norm": 0.0, + "learning_rate": 1.899662152398974e-05, + "loss": 1.5342, + "step": 5773 + }, + { + "epoch": 0.16953432380057548, + "grad_norm": 0.0, + "learning_rate": 1.8996206306593783e-05, + "loss": 1.3301, + "step": 5774 + }, + { + "epoch": 0.1695636854777145, + "grad_norm": 0.0, + "learning_rate": 1.899579100784283e-05, + "loss": 1.4805, + "step": 5775 + }, + { + "epoch": 0.1695930471548535, + "grad_norm": 0.0, + "learning_rate": 1.899537562774063e-05, + "loss": 1.4531, + "step": 5776 + }, + { + "epoch": 0.16962240883199248, + "grad_norm": 0.0, + "learning_rate": 1.899496016629095e-05, + "loss": 1.4033, + "step": 5777 + }, + { + "epoch": 0.1696517705091315, + "grad_norm": 0.0, + "learning_rate": 1.8994544623497537e-05, + "loss": 1.4561, + "step": 5778 + }, + { + "epoch": 0.16968113218627048, + "grad_norm": 0.0, + "learning_rate": 1.8994128999364157e-05, + "loss": 1.4873, + "step": 5779 + }, + { + "epoch": 0.16971049386340947, + "grad_norm": 0.0, + "learning_rate": 1.8993713293894564e-05, + "loss": 1.4004, + "step": 5780 + }, + { + "epoch": 0.1697398555405485, + "grad_norm": 0.0, + "learning_rate": 1.8993297507092515e-05, + "loss": 1.3906, + "step": 5781 + }, + { + "epoch": 0.16976921721768748, + "grad_norm": 0.0, + "learning_rate": 1.8992881638961778e-05, + "loss": 1.4707, + "step": 5782 + }, + { + "epoch": 0.16979857889482647, + "grad_norm": 0.0, + "learning_rate": 1.899246568950611e-05, + "loss": 1.457, + "step": 5783 + }, + { + "epoch": 0.16982794057196546, + "grad_norm": 0.0, + "learning_rate": 1.8992049658729267e-05, + "loss": 1.54, + "step": 5784 + }, + { + "epoch": 0.16985730224910447, + "grad_norm": 0.0, + "learning_rate": 1.8991633546635016e-05, + "loss": 1.5166, + "step": 5785 + }, + { + "epoch": 0.16988666392624346, + "grad_norm": 0.0, + "learning_rate": 1.8991217353227124e-05, + "loss": 1.4229, + "step": 5786 + }, + { + "epoch": 0.16991602560338245, + "grad_norm": 0.0, + "learning_rate": 1.899080107850935e-05, + "loss": 1.5527, + "step": 5787 + }, + { + "epoch": 0.16994538728052147, + "grad_norm": 0.0, + "learning_rate": 1.8990384722485456e-05, + "loss": 1.2773, + "step": 5788 + }, + { + "epoch": 0.16997474895766046, + "grad_norm": 0.0, + "learning_rate": 1.8989968285159214e-05, + "loss": 1.4541, + "step": 5789 + }, + { + "epoch": 0.17000411063479945, + "grad_norm": 0.0, + "learning_rate": 1.8989551766534386e-05, + "loss": 1.3848, + "step": 5790 + }, + { + "epoch": 0.17003347231193847, + "grad_norm": 0.0, + "learning_rate": 1.8989135166614738e-05, + "loss": 1.4756, + "step": 5791 + }, + { + "epoch": 0.17006283398907746, + "grad_norm": 0.0, + "learning_rate": 1.8988718485404037e-05, + "loss": 1.4492, + "step": 5792 + }, + { + "epoch": 0.17009219566621644, + "grad_norm": 0.0, + "learning_rate": 1.8988301722906058e-05, + "loss": 1.4072, + "step": 5793 + }, + { + "epoch": 0.17012155734335546, + "grad_norm": 0.0, + "learning_rate": 1.898788487912456e-05, + "loss": 1.4268, + "step": 5794 + }, + { + "epoch": 0.17015091902049445, + "grad_norm": 0.0, + "learning_rate": 1.898746795406332e-05, + "loss": 1.457, + "step": 5795 + }, + { + "epoch": 0.17018028069763344, + "grad_norm": 0.0, + "learning_rate": 1.8987050947726103e-05, + "loss": 1.5273, + "step": 5796 + }, + { + "epoch": 0.17020964237477246, + "grad_norm": 0.0, + "learning_rate": 1.8986633860116685e-05, + "loss": 1.4941, + "step": 5797 + }, + { + "epoch": 0.17023900405191145, + "grad_norm": 0.0, + "learning_rate": 1.8986216691238833e-05, + "loss": 1.4316, + "step": 5798 + }, + { + "epoch": 0.17026836572905044, + "grad_norm": 0.0, + "learning_rate": 1.898579944109632e-05, + "loss": 1.4785, + "step": 5799 + }, + { + "epoch": 0.17029772740618945, + "grad_norm": 0.0, + "learning_rate": 1.898538210969293e-05, + "loss": 1.3828, + "step": 5800 + }, + { + "epoch": 0.17032708908332844, + "grad_norm": 0.0, + "learning_rate": 1.8984964697032418e-05, + "loss": 1.3506, + "step": 5801 + }, + { + "epoch": 0.17035645076046743, + "grad_norm": 0.0, + "learning_rate": 1.8984547203118575e-05, + "loss": 1.4346, + "step": 5802 + }, + { + "epoch": 0.17038581243760645, + "grad_norm": 0.0, + "learning_rate": 1.898412962795517e-05, + "loss": 1.4746, + "step": 5803 + }, + { + "epoch": 0.17041517411474544, + "grad_norm": 0.0, + "learning_rate": 1.898371197154598e-05, + "loss": 1.5537, + "step": 5804 + }, + { + "epoch": 0.17044453579188443, + "grad_norm": 0.0, + "learning_rate": 1.898329423389478e-05, + "loss": 1.3936, + "step": 5805 + }, + { + "epoch": 0.17047389746902344, + "grad_norm": 0.0, + "learning_rate": 1.8982876415005353e-05, + "loss": 1.3955, + "step": 5806 + }, + { + "epoch": 0.17050325914616243, + "grad_norm": 0.0, + "learning_rate": 1.898245851488147e-05, + "loss": 1.4824, + "step": 5807 + }, + { + "epoch": 0.17053262082330142, + "grad_norm": 0.0, + "learning_rate": 1.898204053352692e-05, + "loss": 1.3662, + "step": 5808 + }, + { + "epoch": 0.1705619825004404, + "grad_norm": 0.0, + "learning_rate": 1.8981622470945472e-05, + "loss": 1.418, + "step": 5809 + }, + { + "epoch": 0.17059134417757943, + "grad_norm": 0.0, + "learning_rate": 1.8981204327140914e-05, + "loss": 1.3936, + "step": 5810 + }, + { + "epoch": 0.17062070585471842, + "grad_norm": 0.0, + "learning_rate": 1.8980786102117023e-05, + "loss": 1.4297, + "step": 5811 + }, + { + "epoch": 0.1706500675318574, + "grad_norm": 0.0, + "learning_rate": 1.8980367795877587e-05, + "loss": 1.4404, + "step": 5812 + }, + { + "epoch": 0.17067942920899642, + "grad_norm": 0.0, + "learning_rate": 1.897994940842638e-05, + "loss": 1.3857, + "step": 5813 + }, + { + "epoch": 0.17070879088613541, + "grad_norm": 0.0, + "learning_rate": 1.8979530939767194e-05, + "loss": 1.5586, + "step": 5814 + }, + { + "epoch": 0.1707381525632744, + "grad_norm": 0.0, + "learning_rate": 1.897911238990381e-05, + "loss": 1.6133, + "step": 5815 + }, + { + "epoch": 0.17076751424041342, + "grad_norm": 0.0, + "learning_rate": 1.897869375884001e-05, + "loss": 1.5068, + "step": 5816 + }, + { + "epoch": 0.1707968759175524, + "grad_norm": 0.0, + "learning_rate": 1.8978275046579588e-05, + "loss": 1.627, + "step": 5817 + }, + { + "epoch": 0.1708262375946914, + "grad_norm": 0.0, + "learning_rate": 1.8977856253126323e-05, + "loss": 1.4795, + "step": 5818 + }, + { + "epoch": 0.17085559927183042, + "grad_norm": 0.0, + "learning_rate": 1.8977437378484004e-05, + "loss": 1.5703, + "step": 5819 + }, + { + "epoch": 0.1708849609489694, + "grad_norm": 0.0, + "learning_rate": 1.897701842265642e-05, + "loss": 1.2676, + "step": 5820 + }, + { + "epoch": 0.1709143226261084, + "grad_norm": 0.0, + "learning_rate": 1.897659938564736e-05, + "loss": 1.4707, + "step": 5821 + }, + { + "epoch": 0.1709436843032474, + "grad_norm": 0.0, + "learning_rate": 1.8976180267460612e-05, + "loss": 1.3828, + "step": 5822 + }, + { + "epoch": 0.1709730459803864, + "grad_norm": 0.0, + "learning_rate": 1.8975761068099967e-05, + "loss": 1.4941, + "step": 5823 + }, + { + "epoch": 0.1710024076575254, + "grad_norm": 0.0, + "learning_rate": 1.897534178756922e-05, + "loss": 1.5166, + "step": 5824 + }, + { + "epoch": 0.1710317693346644, + "grad_norm": 0.0, + "learning_rate": 1.897492242587215e-05, + "loss": 1.375, + "step": 5825 + }, + { + "epoch": 0.1710611310118034, + "grad_norm": 0.0, + "learning_rate": 1.8974502983012565e-05, + "loss": 1.373, + "step": 5826 + }, + { + "epoch": 0.17109049268894239, + "grad_norm": 0.0, + "learning_rate": 1.8974083458994247e-05, + "loss": 1.4238, + "step": 5827 + }, + { + "epoch": 0.1711198543660814, + "grad_norm": 0.0, + "learning_rate": 1.8973663853820994e-05, + "loss": 1.3418, + "step": 5828 + }, + { + "epoch": 0.1711492160432204, + "grad_norm": 0.0, + "learning_rate": 1.8973244167496605e-05, + "loss": 1.3398, + "step": 5829 + }, + { + "epoch": 0.17117857772035938, + "grad_norm": 0.0, + "learning_rate": 1.8972824400024867e-05, + "loss": 1.5098, + "step": 5830 + }, + { + "epoch": 0.1712079393974984, + "grad_norm": 0.0, + "learning_rate": 1.8972404551409584e-05, + "loss": 1.4512, + "step": 5831 + }, + { + "epoch": 0.1712373010746374, + "grad_norm": 0.0, + "learning_rate": 1.8971984621654543e-05, + "loss": 1.3369, + "step": 5832 + }, + { + "epoch": 0.17126666275177638, + "grad_norm": 0.0, + "learning_rate": 1.8971564610763554e-05, + "loss": 1.4756, + "step": 5833 + }, + { + "epoch": 0.17129602442891537, + "grad_norm": 0.0, + "learning_rate": 1.8971144518740404e-05, + "loss": 1.5869, + "step": 5834 + }, + { + "epoch": 0.17132538610605438, + "grad_norm": 0.0, + "learning_rate": 1.8970724345588897e-05, + "loss": 1.5332, + "step": 5835 + }, + { + "epoch": 0.17135474778319337, + "grad_norm": 0.0, + "learning_rate": 1.8970304091312833e-05, + "loss": 1.3857, + "step": 5836 + }, + { + "epoch": 0.17138410946033236, + "grad_norm": 0.0, + "learning_rate": 1.8969883755916014e-05, + "loss": 1.4961, + "step": 5837 + }, + { + "epoch": 0.17141347113747138, + "grad_norm": 0.0, + "learning_rate": 1.8969463339402233e-05, + "loss": 1.4736, + "step": 5838 + }, + { + "epoch": 0.17144283281461037, + "grad_norm": 0.0, + "learning_rate": 1.89690428417753e-05, + "loss": 1.6201, + "step": 5839 + }, + { + "epoch": 0.17147219449174936, + "grad_norm": 0.0, + "learning_rate": 1.896862226303902e-05, + "loss": 1.4014, + "step": 5840 + }, + { + "epoch": 0.17150155616888837, + "grad_norm": 0.0, + "learning_rate": 1.896820160319719e-05, + "loss": 1.4521, + "step": 5841 + }, + { + "epoch": 0.17153091784602736, + "grad_norm": 0.0, + "learning_rate": 1.8967780862253615e-05, + "loss": 1.4854, + "step": 5842 + }, + { + "epoch": 0.17156027952316635, + "grad_norm": 0.0, + "learning_rate": 1.89673600402121e-05, + "loss": 1.2988, + "step": 5843 + }, + { + "epoch": 0.17158964120030537, + "grad_norm": 0.0, + "learning_rate": 1.8966939137076453e-05, + "loss": 1.334, + "step": 5844 + }, + { + "epoch": 0.17161900287744436, + "grad_norm": 0.0, + "learning_rate": 1.8966518152850478e-05, + "loss": 1.5312, + "step": 5845 + }, + { + "epoch": 0.17164836455458335, + "grad_norm": 0.0, + "learning_rate": 1.8966097087537985e-05, + "loss": 1.6123, + "step": 5846 + }, + { + "epoch": 0.17167772623172237, + "grad_norm": 0.0, + "learning_rate": 1.896567594114278e-05, + "loss": 1.4326, + "step": 5847 + }, + { + "epoch": 0.17170708790886136, + "grad_norm": 0.0, + "learning_rate": 1.8965254713668667e-05, + "loss": 1.5225, + "step": 5848 + }, + { + "epoch": 0.17173644958600034, + "grad_norm": 0.0, + "learning_rate": 1.8964833405119462e-05, + "loss": 1.335, + "step": 5849 + }, + { + "epoch": 0.17176581126313936, + "grad_norm": 0.0, + "learning_rate": 1.8964412015498975e-05, + "loss": 1.293, + "step": 5850 + }, + { + "epoch": 0.17179517294027835, + "grad_norm": 0.0, + "learning_rate": 1.896399054481101e-05, + "loss": 1.4795, + "step": 5851 + }, + { + "epoch": 0.17182453461741734, + "grad_norm": 0.0, + "learning_rate": 1.8963568993059386e-05, + "loss": 1.4014, + "step": 5852 + }, + { + "epoch": 0.17185389629455636, + "grad_norm": 0.0, + "learning_rate": 1.8963147360247913e-05, + "loss": 1.4082, + "step": 5853 + }, + { + "epoch": 0.17188325797169535, + "grad_norm": 0.0, + "learning_rate": 1.89627256463804e-05, + "loss": 1.4609, + "step": 5854 + }, + { + "epoch": 0.17191261964883434, + "grad_norm": 0.0, + "learning_rate": 1.8962303851460664e-05, + "loss": 1.2598, + "step": 5855 + }, + { + "epoch": 0.17194198132597335, + "grad_norm": 0.0, + "learning_rate": 1.896188197549252e-05, + "loss": 1.3994, + "step": 5856 + }, + { + "epoch": 0.17197134300311234, + "grad_norm": 0.0, + "learning_rate": 1.8961460018479784e-05, + "loss": 1.4414, + "step": 5857 + }, + { + "epoch": 0.17200070468025133, + "grad_norm": 0.0, + "learning_rate": 1.8961037980426265e-05, + "loss": 1.458, + "step": 5858 + }, + { + "epoch": 0.17203006635739032, + "grad_norm": 0.0, + "learning_rate": 1.8960615861335787e-05, + "loss": 1.4209, + "step": 5859 + }, + { + "epoch": 0.17205942803452934, + "grad_norm": 0.0, + "learning_rate": 1.8960193661212166e-05, + "loss": 1.3887, + "step": 5860 + }, + { + "epoch": 0.17208878971166833, + "grad_norm": 0.0, + "learning_rate": 1.8959771380059216e-05, + "loss": 1.4707, + "step": 5861 + }, + { + "epoch": 0.17211815138880732, + "grad_norm": 0.0, + "learning_rate": 1.8959349017880764e-05, + "loss": 1.582, + "step": 5862 + }, + { + "epoch": 0.17214751306594633, + "grad_norm": 0.0, + "learning_rate": 1.895892657468062e-05, + "loss": 1.417, + "step": 5863 + }, + { + "epoch": 0.17217687474308532, + "grad_norm": 0.0, + "learning_rate": 1.895850405046261e-05, + "loss": 1.3223, + "step": 5864 + }, + { + "epoch": 0.1722062364202243, + "grad_norm": 0.0, + "learning_rate": 1.895808144523055e-05, + "loss": 1.5283, + "step": 5865 + }, + { + "epoch": 0.17223559809736333, + "grad_norm": 0.0, + "learning_rate": 1.8957658758988272e-05, + "loss": 1.2563, + "step": 5866 + }, + { + "epoch": 0.17226495977450232, + "grad_norm": 0.0, + "learning_rate": 1.895723599173959e-05, + "loss": 1.3398, + "step": 5867 + }, + { + "epoch": 0.1722943214516413, + "grad_norm": 0.0, + "learning_rate": 1.8956813143488327e-05, + "loss": 1.5889, + "step": 5868 + }, + { + "epoch": 0.17232368312878033, + "grad_norm": 0.0, + "learning_rate": 1.8956390214238308e-05, + "loss": 1.4805, + "step": 5869 + }, + { + "epoch": 0.17235304480591931, + "grad_norm": 0.0, + "learning_rate": 1.895596720399336e-05, + "loss": 1.4697, + "step": 5870 + }, + { + "epoch": 0.1723824064830583, + "grad_norm": 0.0, + "learning_rate": 1.8955544112757306e-05, + "loss": 1.4697, + "step": 5871 + }, + { + "epoch": 0.17241176816019732, + "grad_norm": 0.0, + "learning_rate": 1.8955120940533972e-05, + "loss": 1.4521, + "step": 5872 + }, + { + "epoch": 0.1724411298373363, + "grad_norm": 0.0, + "learning_rate": 1.8954697687327187e-05, + "loss": 1.5723, + "step": 5873 + }, + { + "epoch": 0.1724704915144753, + "grad_norm": 0.0, + "learning_rate": 1.895427435314078e-05, + "loss": 1.3496, + "step": 5874 + }, + { + "epoch": 0.17249985319161432, + "grad_norm": 0.0, + "learning_rate": 1.8953850937978575e-05, + "loss": 1.3994, + "step": 5875 + }, + { + "epoch": 0.1725292148687533, + "grad_norm": 0.0, + "learning_rate": 1.8953427441844404e-05, + "loss": 1.4814, + "step": 5876 + }, + { + "epoch": 0.1725585765458923, + "grad_norm": 0.0, + "learning_rate": 1.8953003864742093e-05, + "loss": 1.5586, + "step": 5877 + }, + { + "epoch": 0.1725879382230313, + "grad_norm": 0.0, + "learning_rate": 1.8952580206675477e-05, + "loss": 1.4072, + "step": 5878 + }, + { + "epoch": 0.1726172999001703, + "grad_norm": 0.0, + "learning_rate": 1.8952156467648385e-05, + "loss": 1.417, + "step": 5879 + }, + { + "epoch": 0.1726466615773093, + "grad_norm": 0.0, + "learning_rate": 1.895173264766465e-05, + "loss": 1.3369, + "step": 5880 + }, + { + "epoch": 0.1726760232544483, + "grad_norm": 0.0, + "learning_rate": 1.8951308746728103e-05, + "loss": 1.5762, + "step": 5881 + }, + { + "epoch": 0.1727053849315873, + "grad_norm": 0.0, + "learning_rate": 1.8950884764842577e-05, + "loss": 1.4268, + "step": 5882 + }, + { + "epoch": 0.1727347466087263, + "grad_norm": 0.0, + "learning_rate": 1.895046070201191e-05, + "loss": 1.4126, + "step": 5883 + }, + { + "epoch": 0.17276410828586528, + "grad_norm": 0.0, + "learning_rate": 1.8950036558239935e-05, + "loss": 1.3652, + "step": 5884 + }, + { + "epoch": 0.1727934699630043, + "grad_norm": 0.0, + "learning_rate": 1.8949612333530487e-05, + "loss": 1.2686, + "step": 5885 + }, + { + "epoch": 0.17282283164014328, + "grad_norm": 0.0, + "learning_rate": 1.8949188027887403e-05, + "loss": 1.5469, + "step": 5886 + }, + { + "epoch": 0.17285219331728227, + "grad_norm": 0.0, + "learning_rate": 1.894876364131452e-05, + "loss": 1.4736, + "step": 5887 + }, + { + "epoch": 0.1728815549944213, + "grad_norm": 0.0, + "learning_rate": 1.8948339173815673e-05, + "loss": 1.5273, + "step": 5888 + }, + { + "epoch": 0.17291091667156028, + "grad_norm": 0.0, + "learning_rate": 1.8947914625394705e-05, + "loss": 1.582, + "step": 5889 + }, + { + "epoch": 0.17294027834869927, + "grad_norm": 0.0, + "learning_rate": 1.8947489996055453e-05, + "loss": 1.4248, + "step": 5890 + }, + { + "epoch": 0.17296964002583828, + "grad_norm": 0.0, + "learning_rate": 1.8947065285801755e-05, + "loss": 1.3525, + "step": 5891 + }, + { + "epoch": 0.17299900170297727, + "grad_norm": 0.0, + "learning_rate": 1.8946640494637458e-05, + "loss": 1.4092, + "step": 5892 + }, + { + "epoch": 0.17302836338011626, + "grad_norm": 0.0, + "learning_rate": 1.89462156225664e-05, + "loss": 1.2832, + "step": 5893 + }, + { + "epoch": 0.17305772505725528, + "grad_norm": 0.0, + "learning_rate": 1.894579066959242e-05, + "loss": 1.1587, + "step": 5894 + }, + { + "epoch": 0.17308708673439427, + "grad_norm": 0.0, + "learning_rate": 1.894536563571937e-05, + "loss": 1.5771, + "step": 5895 + }, + { + "epoch": 0.17311644841153326, + "grad_norm": 0.0, + "learning_rate": 1.894494052095108e-05, + "loss": 1.5, + "step": 5896 + }, + { + "epoch": 0.17314581008867228, + "grad_norm": 0.0, + "learning_rate": 1.8944515325291405e-05, + "loss": 1.3936, + "step": 5897 + }, + { + "epoch": 0.17317517176581126, + "grad_norm": 0.0, + "learning_rate": 1.8944090048744188e-05, + "loss": 1.498, + "step": 5898 + }, + { + "epoch": 0.17320453344295025, + "grad_norm": 0.0, + "learning_rate": 1.8943664691313273e-05, + "loss": 1.4102, + "step": 5899 + }, + { + "epoch": 0.17323389512008927, + "grad_norm": 0.0, + "learning_rate": 1.894323925300251e-05, + "loss": 1.4297, + "step": 5900 + }, + { + "epoch": 0.17326325679722826, + "grad_norm": 0.0, + "learning_rate": 1.894281373381574e-05, + "loss": 1.5059, + "step": 5901 + }, + { + "epoch": 0.17329261847436725, + "grad_norm": 0.0, + "learning_rate": 1.8942388133756816e-05, + "loss": 1.3555, + "step": 5902 + }, + { + "epoch": 0.17332198015150627, + "grad_norm": 0.0, + "learning_rate": 1.894196245282959e-05, + "loss": 1.5029, + "step": 5903 + }, + { + "epoch": 0.17335134182864526, + "grad_norm": 0.0, + "learning_rate": 1.89415366910379e-05, + "loss": 1.4688, + "step": 5904 + }, + { + "epoch": 0.17338070350578424, + "grad_norm": 0.0, + "learning_rate": 1.894111084838561e-05, + "loss": 1.3203, + "step": 5905 + }, + { + "epoch": 0.17341006518292326, + "grad_norm": 0.0, + "learning_rate": 1.8940684924876565e-05, + "loss": 1.3848, + "step": 5906 + }, + { + "epoch": 0.17343942686006225, + "grad_norm": 0.0, + "learning_rate": 1.8940258920514615e-05, + "loss": 1.4336, + "step": 5907 + }, + { + "epoch": 0.17346878853720124, + "grad_norm": 0.0, + "learning_rate": 1.893983283530361e-05, + "loss": 1.4453, + "step": 5908 + }, + { + "epoch": 0.17349815021434023, + "grad_norm": 0.0, + "learning_rate": 1.8939406669247413e-05, + "loss": 1.4678, + "step": 5909 + }, + { + "epoch": 0.17352751189147925, + "grad_norm": 0.0, + "learning_rate": 1.893898042234987e-05, + "loss": 1.46, + "step": 5910 + }, + { + "epoch": 0.17355687356861824, + "grad_norm": 0.0, + "learning_rate": 1.8938554094614837e-05, + "loss": 1.4297, + "step": 5911 + }, + { + "epoch": 0.17358623524575723, + "grad_norm": 0.0, + "learning_rate": 1.893812768604617e-05, + "loss": 1.3887, + "step": 5912 + }, + { + "epoch": 0.17361559692289624, + "grad_norm": 0.0, + "learning_rate": 1.8937701196647727e-05, + "loss": 1.5264, + "step": 5913 + }, + { + "epoch": 0.17364495860003523, + "grad_norm": 0.0, + "learning_rate": 1.893727462642336e-05, + "loss": 1.4561, + "step": 5914 + }, + { + "epoch": 0.17367432027717422, + "grad_norm": 0.0, + "learning_rate": 1.8936847975376934e-05, + "loss": 1.5068, + "step": 5915 + }, + { + "epoch": 0.17370368195431324, + "grad_norm": 0.0, + "learning_rate": 1.89364212435123e-05, + "loss": 1.4717, + "step": 5916 + }, + { + "epoch": 0.17373304363145223, + "grad_norm": 0.0, + "learning_rate": 1.893599443083332e-05, + "loss": 1.4326, + "step": 5917 + }, + { + "epoch": 0.17376240530859122, + "grad_norm": 0.0, + "learning_rate": 1.8935567537343856e-05, + "loss": 1.3916, + "step": 5918 + }, + { + "epoch": 0.17379176698573023, + "grad_norm": 0.0, + "learning_rate": 1.893514056304776e-05, + "loss": 1.4141, + "step": 5919 + }, + { + "epoch": 0.17382112866286922, + "grad_norm": 0.0, + "learning_rate": 1.8934713507948907e-05, + "loss": 1.3174, + "step": 5920 + }, + { + "epoch": 0.1738504903400082, + "grad_norm": 0.0, + "learning_rate": 1.893428637205115e-05, + "loss": 1.4844, + "step": 5921 + }, + { + "epoch": 0.17387985201714723, + "grad_norm": 0.0, + "learning_rate": 1.893385915535835e-05, + "loss": 1.4082, + "step": 5922 + }, + { + "epoch": 0.17390921369428622, + "grad_norm": 0.0, + "learning_rate": 1.8933431857874378e-05, + "loss": 1.3428, + "step": 5923 + }, + { + "epoch": 0.1739385753714252, + "grad_norm": 0.0, + "learning_rate": 1.893300447960309e-05, + "loss": 1.4814, + "step": 5924 + }, + { + "epoch": 0.17396793704856423, + "grad_norm": 0.0, + "learning_rate": 1.8932577020548358e-05, + "loss": 1.334, + "step": 5925 + }, + { + "epoch": 0.17399729872570321, + "grad_norm": 0.0, + "learning_rate": 1.8932149480714042e-05, + "loss": 1.5381, + "step": 5926 + }, + { + "epoch": 0.1740266604028422, + "grad_norm": 0.0, + "learning_rate": 1.8931721860104012e-05, + "loss": 1.3564, + "step": 5927 + }, + { + "epoch": 0.17405602207998122, + "grad_norm": 0.0, + "learning_rate": 1.8931294158722133e-05, + "loss": 1.5947, + "step": 5928 + }, + { + "epoch": 0.1740853837571202, + "grad_norm": 0.0, + "learning_rate": 1.8930866376572274e-05, + "loss": 1.3955, + "step": 5929 + }, + { + "epoch": 0.1741147454342592, + "grad_norm": 0.0, + "learning_rate": 1.89304385136583e-05, + "loss": 1.335, + "step": 5930 + }, + { + "epoch": 0.17414410711139822, + "grad_norm": 0.0, + "learning_rate": 1.8930010569984087e-05, + "loss": 1.4639, + "step": 5931 + }, + { + "epoch": 0.1741734687885372, + "grad_norm": 0.0, + "learning_rate": 1.89295825455535e-05, + "loss": 1.3154, + "step": 5932 + }, + { + "epoch": 0.1742028304656762, + "grad_norm": 0.0, + "learning_rate": 1.8929154440370416e-05, + "loss": 1.3584, + "step": 5933 + }, + { + "epoch": 0.17423219214281518, + "grad_norm": 0.0, + "learning_rate": 1.8928726254438694e-05, + "loss": 1.5283, + "step": 5934 + }, + { + "epoch": 0.1742615538199542, + "grad_norm": 0.0, + "learning_rate": 1.8928297987762218e-05, + "loss": 1.3828, + "step": 5935 + }, + { + "epoch": 0.1742909154970932, + "grad_norm": 0.0, + "learning_rate": 1.8927869640344857e-05, + "loss": 1.7314, + "step": 5936 + }, + { + "epoch": 0.17432027717423218, + "grad_norm": 0.0, + "learning_rate": 1.892744121219048e-05, + "loss": 1.291, + "step": 5937 + }, + { + "epoch": 0.1743496388513712, + "grad_norm": 0.0, + "learning_rate": 1.8927012703302972e-05, + "loss": 1.4238, + "step": 5938 + }, + { + "epoch": 0.1743790005285102, + "grad_norm": 0.0, + "learning_rate": 1.89265841136862e-05, + "loss": 1.4385, + "step": 5939 + }, + { + "epoch": 0.17440836220564918, + "grad_norm": 0.0, + "learning_rate": 1.892615544334404e-05, + "loss": 1.4365, + "step": 5940 + }, + { + "epoch": 0.1744377238827882, + "grad_norm": 0.0, + "learning_rate": 1.8925726692280372e-05, + "loss": 1.4482, + "step": 5941 + }, + { + "epoch": 0.17446708555992718, + "grad_norm": 0.0, + "learning_rate": 1.892529786049907e-05, + "loss": 1.543, + "step": 5942 + }, + { + "epoch": 0.17449644723706617, + "grad_norm": 0.0, + "learning_rate": 1.8924868948004013e-05, + "loss": 1.4639, + "step": 5943 + }, + { + "epoch": 0.1745258089142052, + "grad_norm": 0.0, + "learning_rate": 1.892443995479908e-05, + "loss": 1.4873, + "step": 5944 + }, + { + "epoch": 0.17455517059134418, + "grad_norm": 0.0, + "learning_rate": 1.8924010880888154e-05, + "loss": 1.4736, + "step": 5945 + }, + { + "epoch": 0.17458453226848317, + "grad_norm": 0.0, + "learning_rate": 1.892358172627511e-05, + "loss": 1.3447, + "step": 5946 + }, + { + "epoch": 0.17461389394562218, + "grad_norm": 0.0, + "learning_rate": 1.892315249096383e-05, + "loss": 1.4717, + "step": 5947 + }, + { + "epoch": 0.17464325562276117, + "grad_norm": 0.0, + "learning_rate": 1.8922723174958203e-05, + "loss": 1.4736, + "step": 5948 + }, + { + "epoch": 0.17467261729990016, + "grad_norm": 0.0, + "learning_rate": 1.89222937782621e-05, + "loss": 1.4531, + "step": 5949 + }, + { + "epoch": 0.17470197897703918, + "grad_norm": 0.0, + "learning_rate": 1.892186430087941e-05, + "loss": 1.5488, + "step": 5950 + }, + { + "epoch": 0.17473134065417817, + "grad_norm": 0.0, + "learning_rate": 1.8921434742814015e-05, + "loss": 1.4404, + "step": 5951 + }, + { + "epoch": 0.17476070233131716, + "grad_norm": 0.0, + "learning_rate": 1.8921005104069803e-05, + "loss": 1.3867, + "step": 5952 + }, + { + "epoch": 0.17479006400845618, + "grad_norm": 0.0, + "learning_rate": 1.8920575384650656e-05, + "loss": 1.4424, + "step": 5953 + }, + { + "epoch": 0.17481942568559516, + "grad_norm": 0.0, + "learning_rate": 1.892014558456046e-05, + "loss": 1.5381, + "step": 5954 + }, + { + "epoch": 0.17484878736273415, + "grad_norm": 0.0, + "learning_rate": 1.8919715703803107e-05, + "loss": 1.3574, + "step": 5955 + }, + { + "epoch": 0.17487814903987317, + "grad_norm": 0.0, + "learning_rate": 1.8919285742382476e-05, + "loss": 1.415, + "step": 5956 + }, + { + "epoch": 0.17490751071701216, + "grad_norm": 0.0, + "learning_rate": 1.891885570030246e-05, + "loss": 1.2163, + "step": 5957 + }, + { + "epoch": 0.17493687239415115, + "grad_norm": 0.0, + "learning_rate": 1.8918425577566952e-05, + "loss": 1.4199, + "step": 5958 + }, + { + "epoch": 0.17496623407129014, + "grad_norm": 0.0, + "learning_rate": 1.8917995374179834e-05, + "loss": 1.3818, + "step": 5959 + }, + { + "epoch": 0.17499559574842916, + "grad_norm": 0.0, + "learning_rate": 1.8917565090145e-05, + "loss": 1.3516, + "step": 5960 + }, + { + "epoch": 0.17502495742556815, + "grad_norm": 0.0, + "learning_rate": 1.891713472546634e-05, + "loss": 1.5576, + "step": 5961 + }, + { + "epoch": 0.17505431910270713, + "grad_norm": 0.0, + "learning_rate": 1.8916704280147754e-05, + "loss": 1.4658, + "step": 5962 + }, + { + "epoch": 0.17508368077984615, + "grad_norm": 0.0, + "learning_rate": 1.891627375419312e-05, + "loss": 1.3428, + "step": 5963 + }, + { + "epoch": 0.17511304245698514, + "grad_norm": 0.0, + "learning_rate": 1.891584314760634e-05, + "loss": 1.3691, + "step": 5964 + }, + { + "epoch": 0.17514240413412413, + "grad_norm": 0.0, + "learning_rate": 1.8915412460391312e-05, + "loss": 1.4941, + "step": 5965 + }, + { + "epoch": 0.17517176581126315, + "grad_norm": 0.0, + "learning_rate": 1.891498169255192e-05, + "loss": 1.3398, + "step": 5966 + }, + { + "epoch": 0.17520112748840214, + "grad_norm": 0.0, + "learning_rate": 1.891455084409207e-05, + "loss": 1.3516, + "step": 5967 + }, + { + "epoch": 0.17523048916554113, + "grad_norm": 0.0, + "learning_rate": 1.891411991501565e-05, + "loss": 1.3789, + "step": 5968 + }, + { + "epoch": 0.17525985084268014, + "grad_norm": 0.0, + "learning_rate": 1.8913688905326563e-05, + "loss": 1.2559, + "step": 5969 + }, + { + "epoch": 0.17528921251981913, + "grad_norm": 0.0, + "learning_rate": 1.89132578150287e-05, + "loss": 1.4238, + "step": 5970 + }, + { + "epoch": 0.17531857419695812, + "grad_norm": 0.0, + "learning_rate": 1.891282664412597e-05, + "loss": 1.5186, + "step": 5971 + }, + { + "epoch": 0.17534793587409714, + "grad_norm": 0.0, + "learning_rate": 1.8912395392622263e-05, + "loss": 1.293, + "step": 5972 + }, + { + "epoch": 0.17537729755123613, + "grad_norm": 0.0, + "learning_rate": 1.8911964060521483e-05, + "loss": 1.5088, + "step": 5973 + }, + { + "epoch": 0.17540665922837512, + "grad_norm": 0.0, + "learning_rate": 1.891153264782753e-05, + "loss": 1.5693, + "step": 5974 + }, + { + "epoch": 0.17543602090551413, + "grad_norm": 0.0, + "learning_rate": 1.89111011545443e-05, + "loss": 1.5498, + "step": 5975 + }, + { + "epoch": 0.17546538258265312, + "grad_norm": 0.0, + "learning_rate": 1.8910669580675706e-05, + "loss": 1.4033, + "step": 5976 + }, + { + "epoch": 0.1754947442597921, + "grad_norm": 0.0, + "learning_rate": 1.8910237926225644e-05, + "loss": 1.4199, + "step": 5977 + }, + { + "epoch": 0.17552410593693113, + "grad_norm": 0.0, + "learning_rate": 1.8909806191198016e-05, + "loss": 1.5771, + "step": 5978 + }, + { + "epoch": 0.17555346761407012, + "grad_norm": 0.0, + "learning_rate": 1.890937437559673e-05, + "loss": 1.3848, + "step": 5979 + }, + { + "epoch": 0.1755828292912091, + "grad_norm": 0.0, + "learning_rate": 1.890894247942569e-05, + "loss": 1.4795, + "step": 5980 + }, + { + "epoch": 0.17561219096834813, + "grad_norm": 0.0, + "learning_rate": 1.89085105026888e-05, + "loss": 1.4951, + "step": 5981 + }, + { + "epoch": 0.17564155264548711, + "grad_norm": 0.0, + "learning_rate": 1.890807844538997e-05, + "loss": 1.4697, + "step": 5982 + }, + { + "epoch": 0.1756709143226261, + "grad_norm": 0.0, + "learning_rate": 1.8907646307533103e-05, + "loss": 1.3755, + "step": 5983 + }, + { + "epoch": 0.1757002759997651, + "grad_norm": 0.0, + "learning_rate": 1.8907214089122112e-05, + "loss": 1.4688, + "step": 5984 + }, + { + "epoch": 0.1757296376769041, + "grad_norm": 0.0, + "learning_rate": 1.8906781790160903e-05, + "loss": 1.417, + "step": 5985 + }, + { + "epoch": 0.1757589993540431, + "grad_norm": 0.0, + "learning_rate": 1.8906349410653383e-05, + "loss": 1.3447, + "step": 5986 + }, + { + "epoch": 0.1757883610311821, + "grad_norm": 0.0, + "learning_rate": 1.8905916950603466e-05, + "loss": 1.4072, + "step": 5987 + }, + { + "epoch": 0.1758177227083211, + "grad_norm": 0.0, + "learning_rate": 1.890548441001506e-05, + "loss": 1.4707, + "step": 5988 + }, + { + "epoch": 0.1758470843854601, + "grad_norm": 0.0, + "learning_rate": 1.8905051788892076e-05, + "loss": 1.3311, + "step": 5989 + }, + { + "epoch": 0.17587644606259908, + "grad_norm": 0.0, + "learning_rate": 1.890461908723843e-05, + "loss": 1.5332, + "step": 5990 + }, + { + "epoch": 0.1759058077397381, + "grad_norm": 0.0, + "learning_rate": 1.8904186305058038e-05, + "loss": 1.6338, + "step": 5991 + }, + { + "epoch": 0.1759351694168771, + "grad_norm": 0.0, + "learning_rate": 1.8903753442354803e-05, + "loss": 1.5693, + "step": 5992 + }, + { + "epoch": 0.17596453109401608, + "grad_norm": 0.0, + "learning_rate": 1.8903320499132647e-05, + "loss": 1.4336, + "step": 5993 + }, + { + "epoch": 0.1759938927711551, + "grad_norm": 0.0, + "learning_rate": 1.8902887475395483e-05, + "loss": 1.4375, + "step": 5994 + }, + { + "epoch": 0.1760232544482941, + "grad_norm": 0.0, + "learning_rate": 1.8902454371147227e-05, + "loss": 1.4814, + "step": 5995 + }, + { + "epoch": 0.17605261612543308, + "grad_norm": 0.0, + "learning_rate": 1.8902021186391796e-05, + "loss": 1.54, + "step": 5996 + }, + { + "epoch": 0.1760819778025721, + "grad_norm": 0.0, + "learning_rate": 1.8901587921133107e-05, + "loss": 1.3799, + "step": 5997 + }, + { + "epoch": 0.17611133947971108, + "grad_norm": 0.0, + "learning_rate": 1.8901154575375084e-05, + "loss": 1.5049, + "step": 5998 + }, + { + "epoch": 0.17614070115685007, + "grad_norm": 0.0, + "learning_rate": 1.8900721149121635e-05, + "loss": 1.5625, + "step": 5999 + }, + { + "epoch": 0.1761700628339891, + "grad_norm": 0.0, + "learning_rate": 1.8900287642376686e-05, + "loss": 1.4316, + "step": 6000 + }, + { + "epoch": 0.17619942451112808, + "grad_norm": 0.0, + "learning_rate": 1.8899854055144158e-05, + "loss": 1.415, + "step": 6001 + }, + { + "epoch": 0.17622878618826707, + "grad_norm": 0.0, + "learning_rate": 1.8899420387427966e-05, + "loss": 1.4668, + "step": 6002 + }, + { + "epoch": 0.17625814786540608, + "grad_norm": 0.0, + "learning_rate": 1.8898986639232043e-05, + "loss": 1.4268, + "step": 6003 + }, + { + "epoch": 0.17628750954254507, + "grad_norm": 0.0, + "learning_rate": 1.88985528105603e-05, + "loss": 1.4609, + "step": 6004 + }, + { + "epoch": 0.17631687121968406, + "grad_norm": 0.0, + "learning_rate": 1.8898118901416668e-05, + "loss": 1.4404, + "step": 6005 + }, + { + "epoch": 0.17634623289682308, + "grad_norm": 0.0, + "learning_rate": 1.8897684911805065e-05, + "loss": 1.499, + "step": 6006 + }, + { + "epoch": 0.17637559457396207, + "grad_norm": 0.0, + "learning_rate": 1.889725084172942e-05, + "loss": 1.5576, + "step": 6007 + }, + { + "epoch": 0.17640495625110106, + "grad_norm": 0.0, + "learning_rate": 1.889681669119366e-05, + "loss": 1.4502, + "step": 6008 + }, + { + "epoch": 0.17643431792824005, + "grad_norm": 0.0, + "learning_rate": 1.8896382460201703e-05, + "loss": 1.4355, + "step": 6009 + }, + { + "epoch": 0.17646367960537906, + "grad_norm": 0.0, + "learning_rate": 1.8895948148757482e-05, + "loss": 1.4785, + "step": 6010 + }, + { + "epoch": 0.17649304128251805, + "grad_norm": 0.0, + "learning_rate": 1.8895513756864926e-05, + "loss": 1.5186, + "step": 6011 + }, + { + "epoch": 0.17652240295965704, + "grad_norm": 0.0, + "learning_rate": 1.8895079284527963e-05, + "loss": 1.4258, + "step": 6012 + }, + { + "epoch": 0.17655176463679606, + "grad_norm": 0.0, + "learning_rate": 1.8894644731750517e-05, + "loss": 1.3721, + "step": 6013 + }, + { + "epoch": 0.17658112631393505, + "grad_norm": 0.0, + "learning_rate": 1.889421009853652e-05, + "loss": 1.4854, + "step": 6014 + }, + { + "epoch": 0.17661048799107404, + "grad_norm": 0.0, + "learning_rate": 1.88937753848899e-05, + "loss": 1.4824, + "step": 6015 + }, + { + "epoch": 0.17663984966821306, + "grad_norm": 0.0, + "learning_rate": 1.88933405908146e-05, + "loss": 1.4658, + "step": 6016 + }, + { + "epoch": 0.17666921134535205, + "grad_norm": 0.0, + "learning_rate": 1.8892905716314537e-05, + "loss": 1.4229, + "step": 6017 + }, + { + "epoch": 0.17669857302249103, + "grad_norm": 0.0, + "learning_rate": 1.8892470761393648e-05, + "loss": 1.5234, + "step": 6018 + }, + { + "epoch": 0.17672793469963005, + "grad_norm": 0.0, + "learning_rate": 1.8892035726055874e-05, + "loss": 1.4736, + "step": 6019 + }, + { + "epoch": 0.17675729637676904, + "grad_norm": 0.0, + "learning_rate": 1.8891600610305142e-05, + "loss": 1.3438, + "step": 6020 + }, + { + "epoch": 0.17678665805390803, + "grad_norm": 0.0, + "learning_rate": 1.889116541414539e-05, + "loss": 1.6006, + "step": 6021 + }, + { + "epoch": 0.17681601973104705, + "grad_norm": 0.0, + "learning_rate": 1.889073013758055e-05, + "loss": 1.3164, + "step": 6022 + }, + { + "epoch": 0.17684538140818604, + "grad_norm": 0.0, + "learning_rate": 1.889029478061456e-05, + "loss": 1.5234, + "step": 6023 + }, + { + "epoch": 0.17687474308532503, + "grad_norm": 0.0, + "learning_rate": 1.888985934325136e-05, + "loss": 1.4658, + "step": 6024 + }, + { + "epoch": 0.17690410476246404, + "grad_norm": 0.0, + "learning_rate": 1.8889423825494884e-05, + "loss": 1.333, + "step": 6025 + }, + { + "epoch": 0.17693346643960303, + "grad_norm": 0.0, + "learning_rate": 1.888898822734907e-05, + "loss": 1.4229, + "step": 6026 + }, + { + "epoch": 0.17696282811674202, + "grad_norm": 0.0, + "learning_rate": 1.888855254881786e-05, + "loss": 1.4648, + "step": 6027 + }, + { + "epoch": 0.17699218979388104, + "grad_norm": 0.0, + "learning_rate": 1.8888116789905193e-05, + "loss": 1.3965, + "step": 6028 + }, + { + "epoch": 0.17702155147102003, + "grad_norm": 0.0, + "learning_rate": 1.888768095061501e-05, + "loss": 1.5322, + "step": 6029 + }, + { + "epoch": 0.17705091314815902, + "grad_norm": 0.0, + "learning_rate": 1.8887245030951252e-05, + "loss": 1.3242, + "step": 6030 + }, + { + "epoch": 0.17708027482529803, + "grad_norm": 0.0, + "learning_rate": 1.888680903091786e-05, + "loss": 1.502, + "step": 6031 + }, + { + "epoch": 0.17710963650243702, + "grad_norm": 0.0, + "learning_rate": 1.888637295051878e-05, + "loss": 1.4619, + "step": 6032 + }, + { + "epoch": 0.177138998179576, + "grad_norm": 0.0, + "learning_rate": 1.888593678975795e-05, + "loss": 1.3818, + "step": 6033 + }, + { + "epoch": 0.177168359856715, + "grad_norm": 0.0, + "learning_rate": 1.888550054863932e-05, + "loss": 1.4824, + "step": 6034 + }, + { + "epoch": 0.17719772153385402, + "grad_norm": 0.0, + "learning_rate": 1.8885064227166835e-05, + "loss": 1.46, + "step": 6035 + }, + { + "epoch": 0.177227083210993, + "grad_norm": 0.0, + "learning_rate": 1.8884627825344438e-05, + "loss": 1.5703, + "step": 6036 + }, + { + "epoch": 0.177256444888132, + "grad_norm": 0.0, + "learning_rate": 1.8884191343176076e-05, + "loss": 1.5186, + "step": 6037 + }, + { + "epoch": 0.17728580656527101, + "grad_norm": 0.0, + "learning_rate": 1.8883754780665694e-05, + "loss": 1.4355, + "step": 6038 + }, + { + "epoch": 0.17731516824241, + "grad_norm": 0.0, + "learning_rate": 1.8883318137817243e-05, + "loss": 1.4463, + "step": 6039 + }, + { + "epoch": 0.177344529919549, + "grad_norm": 0.0, + "learning_rate": 1.8882881414634676e-05, + "loss": 1.4688, + "step": 6040 + }, + { + "epoch": 0.177373891596688, + "grad_norm": 0.0, + "learning_rate": 1.8882444611121933e-05, + "loss": 1.4258, + "step": 6041 + }, + { + "epoch": 0.177403253273827, + "grad_norm": 0.0, + "learning_rate": 1.888200772728297e-05, + "loss": 1.4004, + "step": 6042 + }, + { + "epoch": 0.177432614950966, + "grad_norm": 0.0, + "learning_rate": 1.8881570763121734e-05, + "loss": 1.4531, + "step": 6043 + }, + { + "epoch": 0.177461976628105, + "grad_norm": 0.0, + "learning_rate": 1.8881133718642178e-05, + "loss": 1.5527, + "step": 6044 + }, + { + "epoch": 0.177491338305244, + "grad_norm": 0.0, + "learning_rate": 1.8880696593848256e-05, + "loss": 1.5557, + "step": 6045 + }, + { + "epoch": 0.17752069998238298, + "grad_norm": 0.0, + "learning_rate": 1.8880259388743925e-05, + "loss": 1.3662, + "step": 6046 + }, + { + "epoch": 0.177550061659522, + "grad_norm": 0.0, + "learning_rate": 1.8879822103333132e-05, + "loss": 1.4473, + "step": 6047 + }, + { + "epoch": 0.177579423336661, + "grad_norm": 0.0, + "learning_rate": 1.887938473761983e-05, + "loss": 1.3721, + "step": 6048 + }, + { + "epoch": 0.17760878501379998, + "grad_norm": 0.0, + "learning_rate": 1.8878947291607984e-05, + "loss": 1.4395, + "step": 6049 + }, + { + "epoch": 0.177638146690939, + "grad_norm": 0.0, + "learning_rate": 1.887850976530154e-05, + "loss": 1.4805, + "step": 6050 + }, + { + "epoch": 0.177667508368078, + "grad_norm": 0.0, + "learning_rate": 1.887807215870446e-05, + "loss": 1.4131, + "step": 6051 + }, + { + "epoch": 0.17769687004521698, + "grad_norm": 0.0, + "learning_rate": 1.8877634471820692e-05, + "loss": 1.5088, + "step": 6052 + }, + { + "epoch": 0.177726231722356, + "grad_norm": 0.0, + "learning_rate": 1.8877196704654212e-05, + "loss": 1.4775, + "step": 6053 + }, + { + "epoch": 0.17775559339949498, + "grad_norm": 0.0, + "learning_rate": 1.8876758857208964e-05, + "loss": 1.4482, + "step": 6054 + }, + { + "epoch": 0.17778495507663397, + "grad_norm": 0.0, + "learning_rate": 1.8876320929488913e-05, + "loss": 1.4229, + "step": 6055 + }, + { + "epoch": 0.177814316753773, + "grad_norm": 0.0, + "learning_rate": 1.887588292149802e-05, + "loss": 1.458, + "step": 6056 + }, + { + "epoch": 0.17784367843091198, + "grad_norm": 0.0, + "learning_rate": 1.887544483324024e-05, + "loss": 1.5332, + "step": 6057 + }, + { + "epoch": 0.17787304010805097, + "grad_norm": 0.0, + "learning_rate": 1.8875006664719545e-05, + "loss": 1.5088, + "step": 6058 + }, + { + "epoch": 0.17790240178518996, + "grad_norm": 0.0, + "learning_rate": 1.887456841593989e-05, + "loss": 1.3623, + "step": 6059 + }, + { + "epoch": 0.17793176346232897, + "grad_norm": 0.0, + "learning_rate": 1.8874130086905236e-05, + "loss": 1.3604, + "step": 6060 + }, + { + "epoch": 0.17796112513946796, + "grad_norm": 0.0, + "learning_rate": 1.8873691677619555e-05, + "loss": 1.3809, + "step": 6061 + }, + { + "epoch": 0.17799048681660695, + "grad_norm": 0.0, + "learning_rate": 1.887325318808681e-05, + "loss": 1.3369, + "step": 6062 + }, + { + "epoch": 0.17801984849374597, + "grad_norm": 0.0, + "learning_rate": 1.887281461831096e-05, + "loss": 1.4746, + "step": 6063 + }, + { + "epoch": 0.17804921017088496, + "grad_norm": 0.0, + "learning_rate": 1.8872375968295974e-05, + "loss": 1.4697, + "step": 6064 + }, + { + "epoch": 0.17807857184802395, + "grad_norm": 0.0, + "learning_rate": 1.8871937238045822e-05, + "loss": 1.311, + "step": 6065 + }, + { + "epoch": 0.17810793352516296, + "grad_norm": 0.0, + "learning_rate": 1.887149842756447e-05, + "loss": 1.4355, + "step": 6066 + }, + { + "epoch": 0.17813729520230195, + "grad_norm": 0.0, + "learning_rate": 1.8871059536855883e-05, + "loss": 1.335, + "step": 6067 + }, + { + "epoch": 0.17816665687944094, + "grad_norm": 0.0, + "learning_rate": 1.8870620565924037e-05, + "loss": 1.5107, + "step": 6068 + }, + { + "epoch": 0.17819601855657996, + "grad_norm": 0.0, + "learning_rate": 1.8870181514772895e-05, + "loss": 1.5049, + "step": 6069 + }, + { + "epoch": 0.17822538023371895, + "grad_norm": 0.0, + "learning_rate": 1.886974238340643e-05, + "loss": 1.6172, + "step": 6070 + }, + { + "epoch": 0.17825474191085794, + "grad_norm": 0.0, + "learning_rate": 1.8869303171828613e-05, + "loss": 1.4902, + "step": 6071 + }, + { + "epoch": 0.17828410358799696, + "grad_norm": 0.0, + "learning_rate": 1.8868863880043414e-05, + "loss": 1.4961, + "step": 6072 + }, + { + "epoch": 0.17831346526513595, + "grad_norm": 0.0, + "learning_rate": 1.8868424508054813e-05, + "loss": 1.4258, + "step": 6073 + }, + { + "epoch": 0.17834282694227493, + "grad_norm": 0.0, + "learning_rate": 1.8867985055866775e-05, + "loss": 1.4053, + "step": 6074 + }, + { + "epoch": 0.17837218861941395, + "grad_norm": 0.0, + "learning_rate": 1.8867545523483277e-05, + "loss": 1.4355, + "step": 6075 + }, + { + "epoch": 0.17840155029655294, + "grad_norm": 0.0, + "learning_rate": 1.886710591090829e-05, + "loss": 1.5742, + "step": 6076 + }, + { + "epoch": 0.17843091197369193, + "grad_norm": 0.0, + "learning_rate": 1.8866666218145797e-05, + "loss": 1.3701, + "step": 6077 + }, + { + "epoch": 0.17846027365083095, + "grad_norm": 0.0, + "learning_rate": 1.8866226445199772e-05, + "loss": 1.4023, + "step": 6078 + }, + { + "epoch": 0.17848963532796994, + "grad_norm": 0.0, + "learning_rate": 1.8865786592074187e-05, + "loss": 1.4541, + "step": 6079 + }, + { + "epoch": 0.17851899700510893, + "grad_norm": 0.0, + "learning_rate": 1.8865346658773024e-05, + "loss": 1.4102, + "step": 6080 + }, + { + "epoch": 0.17854835868224794, + "grad_norm": 0.0, + "learning_rate": 1.8864906645300263e-05, + "loss": 1.3867, + "step": 6081 + }, + { + "epoch": 0.17857772035938693, + "grad_norm": 0.0, + "learning_rate": 1.886446655165988e-05, + "loss": 1.501, + "step": 6082 + }, + { + "epoch": 0.17860708203652592, + "grad_norm": 0.0, + "learning_rate": 1.8864026377855854e-05, + "loss": 1.3594, + "step": 6083 + }, + { + "epoch": 0.1786364437136649, + "grad_norm": 0.0, + "learning_rate": 1.8863586123892167e-05, + "loss": 1.2637, + "step": 6084 + }, + { + "epoch": 0.17866580539080393, + "grad_norm": 0.0, + "learning_rate": 1.8863145789772804e-05, + "loss": 1.3037, + "step": 6085 + }, + { + "epoch": 0.17869516706794292, + "grad_norm": 0.0, + "learning_rate": 1.8862705375501745e-05, + "loss": 1.5576, + "step": 6086 + }, + { + "epoch": 0.1787245287450819, + "grad_norm": 0.0, + "learning_rate": 1.886226488108297e-05, + "loss": 1.5957, + "step": 6087 + }, + { + "epoch": 0.17875389042222092, + "grad_norm": 0.0, + "learning_rate": 1.886182430652046e-05, + "loss": 1.4883, + "step": 6088 + }, + { + "epoch": 0.1787832520993599, + "grad_norm": 0.0, + "learning_rate": 1.8861383651818205e-05, + "loss": 1.5244, + "step": 6089 + }, + { + "epoch": 0.1788126137764989, + "grad_norm": 0.0, + "learning_rate": 1.886094291698019e-05, + "loss": 1.5176, + "step": 6090 + }, + { + "epoch": 0.17884197545363792, + "grad_norm": 0.0, + "learning_rate": 1.88605021020104e-05, + "loss": 1.4707, + "step": 6091 + }, + { + "epoch": 0.1788713371307769, + "grad_norm": 0.0, + "learning_rate": 1.8860061206912818e-05, + "loss": 1.4785, + "step": 6092 + }, + { + "epoch": 0.1789006988079159, + "grad_norm": 0.0, + "learning_rate": 1.8859620231691432e-05, + "loss": 1.3896, + "step": 6093 + }, + { + "epoch": 0.17893006048505491, + "grad_norm": 0.0, + "learning_rate": 1.8859179176350233e-05, + "loss": 1.5703, + "step": 6094 + }, + { + "epoch": 0.1789594221621939, + "grad_norm": 0.0, + "learning_rate": 1.8858738040893206e-05, + "loss": 1.4141, + "step": 6095 + }, + { + "epoch": 0.1789887838393329, + "grad_norm": 0.0, + "learning_rate": 1.8858296825324344e-05, + "loss": 1.5244, + "step": 6096 + }, + { + "epoch": 0.1790181455164719, + "grad_norm": 0.0, + "learning_rate": 1.8857855529647637e-05, + "loss": 1.3535, + "step": 6097 + }, + { + "epoch": 0.1790475071936109, + "grad_norm": 0.0, + "learning_rate": 1.8857414153867074e-05, + "loss": 1.458, + "step": 6098 + }, + { + "epoch": 0.1790768688707499, + "grad_norm": 0.0, + "learning_rate": 1.8856972697986644e-05, + "loss": 1.3047, + "step": 6099 + }, + { + "epoch": 0.1791062305478889, + "grad_norm": 0.0, + "learning_rate": 1.885653116201034e-05, + "loss": 1.3945, + "step": 6100 + }, + { + "epoch": 0.1791355922250279, + "grad_norm": 0.0, + "learning_rate": 1.8856089545942158e-05, + "loss": 1.4092, + "step": 6101 + }, + { + "epoch": 0.17916495390216688, + "grad_norm": 0.0, + "learning_rate": 1.885564784978609e-05, + "loss": 1.4561, + "step": 6102 + }, + { + "epoch": 0.1791943155793059, + "grad_norm": 0.0, + "learning_rate": 1.8855206073546134e-05, + "loss": 1.3369, + "step": 6103 + }, + { + "epoch": 0.1792236772564449, + "grad_norm": 0.0, + "learning_rate": 1.8854764217226278e-05, + "loss": 1.3965, + "step": 6104 + }, + { + "epoch": 0.17925303893358388, + "grad_norm": 0.0, + "learning_rate": 1.885432228083052e-05, + "loss": 1.3701, + "step": 6105 + }, + { + "epoch": 0.1792824006107229, + "grad_norm": 0.0, + "learning_rate": 1.8853880264362863e-05, + "loss": 1.4697, + "step": 6106 + }, + { + "epoch": 0.1793117622878619, + "grad_norm": 0.0, + "learning_rate": 1.8853438167827295e-05, + "loss": 1.3867, + "step": 6107 + }, + { + "epoch": 0.17934112396500088, + "grad_norm": 0.0, + "learning_rate": 1.885299599122782e-05, + "loss": 1.4727, + "step": 6108 + }, + { + "epoch": 0.17937048564213987, + "grad_norm": 0.0, + "learning_rate": 1.8852553734568433e-05, + "loss": 1.3677, + "step": 6109 + }, + { + "epoch": 0.17939984731927888, + "grad_norm": 0.0, + "learning_rate": 1.8852111397853143e-05, + "loss": 1.4932, + "step": 6110 + }, + { + "epoch": 0.17942920899641787, + "grad_norm": 0.0, + "learning_rate": 1.8851668981085933e-05, + "loss": 1.4453, + "step": 6111 + }, + { + "epoch": 0.17945857067355686, + "grad_norm": 0.0, + "learning_rate": 1.8851226484270818e-05, + "loss": 1.4541, + "step": 6112 + }, + { + "epoch": 0.17948793235069588, + "grad_norm": 0.0, + "learning_rate": 1.8850783907411793e-05, + "loss": 1.2798, + "step": 6113 + }, + { + "epoch": 0.17951729402783487, + "grad_norm": 0.0, + "learning_rate": 1.8850341250512864e-05, + "loss": 1.4072, + "step": 6114 + }, + { + "epoch": 0.17954665570497386, + "grad_norm": 0.0, + "learning_rate": 1.884989851357803e-05, + "loss": 1.3027, + "step": 6115 + }, + { + "epoch": 0.17957601738211287, + "grad_norm": 0.0, + "learning_rate": 1.88494556966113e-05, + "loss": 1.5576, + "step": 6116 + }, + { + "epoch": 0.17960537905925186, + "grad_norm": 0.0, + "learning_rate": 1.8849012799616678e-05, + "loss": 1.418, + "step": 6117 + }, + { + "epoch": 0.17963474073639085, + "grad_norm": 0.0, + "learning_rate": 1.8848569822598164e-05, + "loss": 1.376, + "step": 6118 + }, + { + "epoch": 0.17966410241352987, + "grad_norm": 0.0, + "learning_rate": 1.8848126765559766e-05, + "loss": 1.3486, + "step": 6119 + }, + { + "epoch": 0.17969346409066886, + "grad_norm": 0.0, + "learning_rate": 1.8847683628505493e-05, + "loss": 1.4375, + "step": 6120 + }, + { + "epoch": 0.17972282576780785, + "grad_norm": 0.0, + "learning_rate": 1.884724041143935e-05, + "loss": 1.5205, + "step": 6121 + }, + { + "epoch": 0.17975218744494686, + "grad_norm": 0.0, + "learning_rate": 1.8846797114365347e-05, + "loss": 1.4873, + "step": 6122 + }, + { + "epoch": 0.17978154912208585, + "grad_norm": 0.0, + "learning_rate": 1.8846353737287494e-05, + "loss": 1.3188, + "step": 6123 + }, + { + "epoch": 0.17981091079922484, + "grad_norm": 0.0, + "learning_rate": 1.8845910280209794e-05, + "loss": 1.249, + "step": 6124 + }, + { + "epoch": 0.17984027247636386, + "grad_norm": 0.0, + "learning_rate": 1.8845466743136263e-05, + "loss": 1.4346, + "step": 6125 + }, + { + "epoch": 0.17986963415350285, + "grad_norm": 0.0, + "learning_rate": 1.8845023126070913e-05, + "loss": 1.4971, + "step": 6126 + }, + { + "epoch": 0.17989899583064184, + "grad_norm": 0.0, + "learning_rate": 1.8844579429017754e-05, + "loss": 1.3652, + "step": 6127 + }, + { + "epoch": 0.17992835750778086, + "grad_norm": 0.0, + "learning_rate": 1.8844135651980796e-05, + "loss": 1.3633, + "step": 6128 + }, + { + "epoch": 0.17995771918491985, + "grad_norm": 0.0, + "learning_rate": 1.8843691794964056e-05, + "loss": 1.5205, + "step": 6129 + }, + { + "epoch": 0.17998708086205883, + "grad_norm": 0.0, + "learning_rate": 1.8843247857971546e-05, + "loss": 1.4023, + "step": 6130 + }, + { + "epoch": 0.18001644253919785, + "grad_norm": 0.0, + "learning_rate": 1.884280384100728e-05, + "loss": 1.3975, + "step": 6131 + }, + { + "epoch": 0.18004580421633684, + "grad_norm": 0.0, + "learning_rate": 1.8842359744075275e-05, + "loss": 1.4795, + "step": 6132 + }, + { + "epoch": 0.18007516589347583, + "grad_norm": 0.0, + "learning_rate": 1.8841915567179544e-05, + "loss": 1.3457, + "step": 6133 + }, + { + "epoch": 0.18010452757061482, + "grad_norm": 0.0, + "learning_rate": 1.884147131032411e-05, + "loss": 1.417, + "step": 6134 + }, + { + "epoch": 0.18013388924775384, + "grad_norm": 0.0, + "learning_rate": 1.884102697351298e-05, + "loss": 1.4883, + "step": 6135 + }, + { + "epoch": 0.18016325092489283, + "grad_norm": 0.0, + "learning_rate": 1.8840582556750187e-05, + "loss": 1.4521, + "step": 6136 + }, + { + "epoch": 0.18019261260203182, + "grad_norm": 0.0, + "learning_rate": 1.8840138060039735e-05, + "loss": 1.5947, + "step": 6137 + }, + { + "epoch": 0.18022197427917083, + "grad_norm": 0.0, + "learning_rate": 1.8839693483385656e-05, + "loss": 1.4219, + "step": 6138 + }, + { + "epoch": 0.18025133595630982, + "grad_norm": 0.0, + "learning_rate": 1.8839248826791962e-05, + "loss": 1.4473, + "step": 6139 + }, + { + "epoch": 0.1802806976334488, + "grad_norm": 0.0, + "learning_rate": 1.8838804090262678e-05, + "loss": 1.4375, + "step": 6140 + }, + { + "epoch": 0.18031005931058783, + "grad_norm": 0.0, + "learning_rate": 1.8838359273801825e-05, + "loss": 1.5664, + "step": 6141 + }, + { + "epoch": 0.18033942098772682, + "grad_norm": 0.0, + "learning_rate": 1.8837914377413425e-05, + "loss": 1.4072, + "step": 6142 + }, + { + "epoch": 0.1803687826648658, + "grad_norm": 0.0, + "learning_rate": 1.8837469401101504e-05, + "loss": 1.4863, + "step": 6143 + }, + { + "epoch": 0.18039814434200482, + "grad_norm": 0.0, + "learning_rate": 1.883702434487008e-05, + "loss": 1.416, + "step": 6144 + }, + { + "epoch": 0.1804275060191438, + "grad_norm": 0.0, + "learning_rate": 1.8836579208723183e-05, + "loss": 1.4248, + "step": 6145 + }, + { + "epoch": 0.1804568676962828, + "grad_norm": 0.0, + "learning_rate": 1.8836133992664834e-05, + "loss": 1.3877, + "step": 6146 + }, + { + "epoch": 0.18048622937342182, + "grad_norm": 0.0, + "learning_rate": 1.8835688696699067e-05, + "loss": 1.4805, + "step": 6147 + }, + { + "epoch": 0.1805155910505608, + "grad_norm": 0.0, + "learning_rate": 1.8835243320829907e-05, + "loss": 1.5537, + "step": 6148 + }, + { + "epoch": 0.1805449527276998, + "grad_norm": 0.0, + "learning_rate": 1.8834797865061372e-05, + "loss": 1.5645, + "step": 6149 + }, + { + "epoch": 0.18057431440483881, + "grad_norm": 0.0, + "learning_rate": 1.8834352329397502e-05, + "loss": 1.543, + "step": 6150 + }, + { + "epoch": 0.1806036760819778, + "grad_norm": 0.0, + "learning_rate": 1.883390671384232e-05, + "loss": 1.542, + "step": 6151 + }, + { + "epoch": 0.1806330377591168, + "grad_norm": 0.0, + "learning_rate": 1.8833461018399858e-05, + "loss": 1.3877, + "step": 6152 + }, + { + "epoch": 0.1806623994362558, + "grad_norm": 0.0, + "learning_rate": 1.8833015243074144e-05, + "loss": 1.5439, + "step": 6153 + }, + { + "epoch": 0.1806917611133948, + "grad_norm": 0.0, + "learning_rate": 1.883256938786921e-05, + "loss": 1.5947, + "step": 6154 + }, + { + "epoch": 0.1807211227905338, + "grad_norm": 0.0, + "learning_rate": 1.883212345278909e-05, + "loss": 1.416, + "step": 6155 + }, + { + "epoch": 0.1807504844676728, + "grad_norm": 0.0, + "learning_rate": 1.8831677437837817e-05, + "loss": 1.457, + "step": 6156 + }, + { + "epoch": 0.1807798461448118, + "grad_norm": 0.0, + "learning_rate": 1.8831231343019424e-05, + "loss": 1.4912, + "step": 6157 + }, + { + "epoch": 0.18080920782195078, + "grad_norm": 0.0, + "learning_rate": 1.8830785168337944e-05, + "loss": 1.4253, + "step": 6158 + }, + { + "epoch": 0.1808385694990898, + "grad_norm": 0.0, + "learning_rate": 1.883033891379741e-05, + "loss": 1.4268, + "step": 6159 + }, + { + "epoch": 0.1808679311762288, + "grad_norm": 0.0, + "learning_rate": 1.882989257940186e-05, + "loss": 1.5479, + "step": 6160 + }, + { + "epoch": 0.18089729285336778, + "grad_norm": 0.0, + "learning_rate": 1.8829446165155333e-05, + "loss": 1.3594, + "step": 6161 + }, + { + "epoch": 0.18092665453050677, + "grad_norm": 0.0, + "learning_rate": 1.882899967106186e-05, + "loss": 1.415, + "step": 6162 + }, + { + "epoch": 0.1809560162076458, + "grad_norm": 0.0, + "learning_rate": 1.882855309712548e-05, + "loss": 1.5039, + "step": 6163 + }, + { + "epoch": 0.18098537788478478, + "grad_norm": 0.0, + "learning_rate": 1.8828106443350238e-05, + "loss": 1.4268, + "step": 6164 + }, + { + "epoch": 0.18101473956192377, + "grad_norm": 0.0, + "learning_rate": 1.8827659709740166e-05, + "loss": 1.4424, + "step": 6165 + }, + { + "epoch": 0.18104410123906278, + "grad_norm": 0.0, + "learning_rate": 1.8827212896299305e-05, + "loss": 1.3955, + "step": 6166 + }, + { + "epoch": 0.18107346291620177, + "grad_norm": 0.0, + "learning_rate": 1.88267660030317e-05, + "loss": 1.4736, + "step": 6167 + }, + { + "epoch": 0.18110282459334076, + "grad_norm": 0.0, + "learning_rate": 1.8826319029941386e-05, + "loss": 1.5, + "step": 6168 + }, + { + "epoch": 0.18113218627047978, + "grad_norm": 0.0, + "learning_rate": 1.8825871977032412e-05, + "loss": 1.4619, + "step": 6169 + }, + { + "epoch": 0.18116154794761877, + "grad_norm": 0.0, + "learning_rate": 1.8825424844308818e-05, + "loss": 1.4326, + "step": 6170 + }, + { + "epoch": 0.18119090962475776, + "grad_norm": 0.0, + "learning_rate": 1.8824977631774642e-05, + "loss": 1.5107, + "step": 6171 + }, + { + "epoch": 0.18122027130189677, + "grad_norm": 0.0, + "learning_rate": 1.882453033943394e-05, + "loss": 1.582, + "step": 6172 + }, + { + "epoch": 0.18124963297903576, + "grad_norm": 0.0, + "learning_rate": 1.8824082967290747e-05, + "loss": 1.623, + "step": 6173 + }, + { + "epoch": 0.18127899465617475, + "grad_norm": 0.0, + "learning_rate": 1.882363551534911e-05, + "loss": 1.4199, + "step": 6174 + }, + { + "epoch": 0.18130835633331377, + "grad_norm": 0.0, + "learning_rate": 1.8823187983613077e-05, + "loss": 1.374, + "step": 6175 + }, + { + "epoch": 0.18133771801045276, + "grad_norm": 0.0, + "learning_rate": 1.8822740372086698e-05, + "loss": 1.3906, + "step": 6176 + }, + { + "epoch": 0.18136707968759175, + "grad_norm": 0.0, + "learning_rate": 1.8822292680774016e-05, + "loss": 1.3193, + "step": 6177 + }, + { + "epoch": 0.18139644136473076, + "grad_norm": 0.0, + "learning_rate": 1.8821844909679084e-05, + "loss": 1.4951, + "step": 6178 + }, + { + "epoch": 0.18142580304186975, + "grad_norm": 0.0, + "learning_rate": 1.882139705880595e-05, + "loss": 1.4316, + "step": 6179 + }, + { + "epoch": 0.18145516471900874, + "grad_norm": 0.0, + "learning_rate": 1.8820949128158662e-05, + "loss": 1.5186, + "step": 6180 + }, + { + "epoch": 0.18148452639614776, + "grad_norm": 0.0, + "learning_rate": 1.882050111774127e-05, + "loss": 1.5234, + "step": 6181 + }, + { + "epoch": 0.18151388807328675, + "grad_norm": 0.0, + "learning_rate": 1.882005302755783e-05, + "loss": 1.4775, + "step": 6182 + }, + { + "epoch": 0.18154324975042574, + "grad_norm": 0.0, + "learning_rate": 1.881960485761239e-05, + "loss": 1.4561, + "step": 6183 + }, + { + "epoch": 0.18157261142756476, + "grad_norm": 0.0, + "learning_rate": 1.8819156607909004e-05, + "loss": 1.5059, + "step": 6184 + }, + { + "epoch": 0.18160197310470375, + "grad_norm": 0.0, + "learning_rate": 1.8818708278451726e-05, + "loss": 1.4805, + "step": 6185 + }, + { + "epoch": 0.18163133478184273, + "grad_norm": 0.0, + "learning_rate": 1.8818259869244615e-05, + "loss": 1.3369, + "step": 6186 + }, + { + "epoch": 0.18166069645898172, + "grad_norm": 0.0, + "learning_rate": 1.8817811380291716e-05, + "loss": 1.5928, + "step": 6187 + }, + { + "epoch": 0.18169005813612074, + "grad_norm": 0.0, + "learning_rate": 1.8817362811597097e-05, + "loss": 1.3662, + "step": 6188 + }, + { + "epoch": 0.18171941981325973, + "grad_norm": 0.0, + "learning_rate": 1.88169141631648e-05, + "loss": 1.5137, + "step": 6189 + }, + { + "epoch": 0.18174878149039872, + "grad_norm": 0.0, + "learning_rate": 1.8816465434998896e-05, + "loss": 1.4619, + "step": 6190 + }, + { + "epoch": 0.18177814316753774, + "grad_norm": 0.0, + "learning_rate": 1.8816016627103436e-05, + "loss": 1.3613, + "step": 6191 + }, + { + "epoch": 0.18180750484467673, + "grad_norm": 0.0, + "learning_rate": 1.8815567739482477e-05, + "loss": 1.4004, + "step": 6192 + }, + { + "epoch": 0.18183686652181572, + "grad_norm": 0.0, + "learning_rate": 1.8815118772140084e-05, + "loss": 1.4961, + "step": 6193 + }, + { + "epoch": 0.18186622819895473, + "grad_norm": 0.0, + "learning_rate": 1.881466972508031e-05, + "loss": 1.3525, + "step": 6194 + }, + { + "epoch": 0.18189558987609372, + "grad_norm": 0.0, + "learning_rate": 1.8814220598307226e-05, + "loss": 1.3594, + "step": 6195 + }, + { + "epoch": 0.1819249515532327, + "grad_norm": 0.0, + "learning_rate": 1.8813771391824885e-05, + "loss": 1.5752, + "step": 6196 + }, + { + "epoch": 0.18195431323037173, + "grad_norm": 0.0, + "learning_rate": 1.8813322105637353e-05, + "loss": 1.4873, + "step": 6197 + }, + { + "epoch": 0.18198367490751072, + "grad_norm": 0.0, + "learning_rate": 1.881287273974869e-05, + "loss": 1.333, + "step": 6198 + }, + { + "epoch": 0.1820130365846497, + "grad_norm": 0.0, + "learning_rate": 1.8812423294162965e-05, + "loss": 1.3896, + "step": 6199 + }, + { + "epoch": 0.18204239826178872, + "grad_norm": 0.0, + "learning_rate": 1.881197376888424e-05, + "loss": 1.3975, + "step": 6200 + }, + { + "epoch": 0.1820717599389277, + "grad_norm": 0.0, + "learning_rate": 1.8811524163916576e-05, + "loss": 1.4902, + "step": 6201 + }, + { + "epoch": 0.1821011216160667, + "grad_norm": 0.0, + "learning_rate": 1.881107447926404e-05, + "loss": 1.5176, + "step": 6202 + }, + { + "epoch": 0.18213048329320572, + "grad_norm": 0.0, + "learning_rate": 1.8810624714930707e-05, + "loss": 1.3174, + "step": 6203 + }, + { + "epoch": 0.1821598449703447, + "grad_norm": 0.0, + "learning_rate": 1.8810174870920635e-05, + "loss": 1.5039, + "step": 6204 + }, + { + "epoch": 0.1821892066474837, + "grad_norm": 0.0, + "learning_rate": 1.8809724947237896e-05, + "loss": 1.5156, + "step": 6205 + }, + { + "epoch": 0.18221856832462272, + "grad_norm": 0.0, + "learning_rate": 1.880927494388656e-05, + "loss": 1.3809, + "step": 6206 + }, + { + "epoch": 0.1822479300017617, + "grad_norm": 0.0, + "learning_rate": 1.880882486087069e-05, + "loss": 1.4824, + "step": 6207 + }, + { + "epoch": 0.1822772916789007, + "grad_norm": 0.0, + "learning_rate": 1.8808374698194364e-05, + "loss": 1.4951, + "step": 6208 + }, + { + "epoch": 0.1823066533560397, + "grad_norm": 0.0, + "learning_rate": 1.880792445586165e-05, + "loss": 1.4473, + "step": 6209 + }, + { + "epoch": 0.1823360150331787, + "grad_norm": 0.0, + "learning_rate": 1.8807474133876618e-05, + "loss": 1.3047, + "step": 6210 + }, + { + "epoch": 0.1823653767103177, + "grad_norm": 0.0, + "learning_rate": 1.880702373224334e-05, + "loss": 1.4434, + "step": 6211 + }, + { + "epoch": 0.18239473838745668, + "grad_norm": 0.0, + "learning_rate": 1.8806573250965894e-05, + "loss": 1.5176, + "step": 6212 + }, + { + "epoch": 0.1824241000645957, + "grad_norm": 0.0, + "learning_rate": 1.8806122690048353e-05, + "loss": 1.3848, + "step": 6213 + }, + { + "epoch": 0.18245346174173468, + "grad_norm": 0.0, + "learning_rate": 1.8805672049494787e-05, + "loss": 1.4551, + "step": 6214 + }, + { + "epoch": 0.18248282341887367, + "grad_norm": 0.0, + "learning_rate": 1.8805221329309268e-05, + "loss": 1.4922, + "step": 6215 + }, + { + "epoch": 0.1825121850960127, + "grad_norm": 0.0, + "learning_rate": 1.8804770529495885e-05, + "loss": 1.3418, + "step": 6216 + }, + { + "epoch": 0.18254154677315168, + "grad_norm": 0.0, + "learning_rate": 1.8804319650058702e-05, + "loss": 1.4385, + "step": 6217 + }, + { + "epoch": 0.18257090845029067, + "grad_norm": 0.0, + "learning_rate": 1.8803868691001805e-05, + "loss": 1.5088, + "step": 6218 + }, + { + "epoch": 0.1826002701274297, + "grad_norm": 0.0, + "learning_rate": 1.8803417652329268e-05, + "loss": 1.4727, + "step": 6219 + }, + { + "epoch": 0.18262963180456868, + "grad_norm": 0.0, + "learning_rate": 1.880296653404517e-05, + "loss": 1.5039, + "step": 6220 + }, + { + "epoch": 0.18265899348170767, + "grad_norm": 0.0, + "learning_rate": 1.880251533615359e-05, + "loss": 1.4785, + "step": 6221 + }, + { + "epoch": 0.18268835515884668, + "grad_norm": 0.0, + "learning_rate": 1.8802064058658613e-05, + "loss": 1.3672, + "step": 6222 + }, + { + "epoch": 0.18271771683598567, + "grad_norm": 0.0, + "learning_rate": 1.8801612701564314e-05, + "loss": 1.3477, + "step": 6223 + }, + { + "epoch": 0.18274707851312466, + "grad_norm": 0.0, + "learning_rate": 1.8801161264874777e-05, + "loss": 1.4287, + "step": 6224 + }, + { + "epoch": 0.18277644019026368, + "grad_norm": 0.0, + "learning_rate": 1.8800709748594084e-05, + "loss": 1.4141, + "step": 6225 + }, + { + "epoch": 0.18280580186740267, + "grad_norm": 0.0, + "learning_rate": 1.880025815272632e-05, + "loss": 1.3008, + "step": 6226 + }, + { + "epoch": 0.18283516354454166, + "grad_norm": 0.0, + "learning_rate": 1.879980647727557e-05, + "loss": 1.394, + "step": 6227 + }, + { + "epoch": 0.18286452522168067, + "grad_norm": 0.0, + "learning_rate": 1.879935472224591e-05, + "loss": 1.5137, + "step": 6228 + }, + { + "epoch": 0.18289388689881966, + "grad_norm": 0.0, + "learning_rate": 1.879890288764144e-05, + "loss": 1.4277, + "step": 6229 + }, + { + "epoch": 0.18292324857595865, + "grad_norm": 0.0, + "learning_rate": 1.8798450973466234e-05, + "loss": 1.5186, + "step": 6230 + }, + { + "epoch": 0.18295261025309767, + "grad_norm": 0.0, + "learning_rate": 1.8797998979724382e-05, + "loss": 1.5117, + "step": 6231 + }, + { + "epoch": 0.18298197193023666, + "grad_norm": 0.0, + "learning_rate": 1.8797546906419972e-05, + "loss": 1.2881, + "step": 6232 + }, + { + "epoch": 0.18301133360737565, + "grad_norm": 0.0, + "learning_rate": 1.8797094753557092e-05, + "loss": 1.3418, + "step": 6233 + }, + { + "epoch": 0.18304069528451467, + "grad_norm": 0.0, + "learning_rate": 1.8796642521139833e-05, + "loss": 1.5176, + "step": 6234 + }, + { + "epoch": 0.18307005696165365, + "grad_norm": 0.0, + "learning_rate": 1.879619020917228e-05, + "loss": 1.3916, + "step": 6235 + }, + { + "epoch": 0.18309941863879264, + "grad_norm": 0.0, + "learning_rate": 1.8795737817658527e-05, + "loss": 1.4111, + "step": 6236 + }, + { + "epoch": 0.18312878031593163, + "grad_norm": 0.0, + "learning_rate": 1.8795285346602665e-05, + "loss": 1.5186, + "step": 6237 + }, + { + "epoch": 0.18315814199307065, + "grad_norm": 0.0, + "learning_rate": 1.8794832796008783e-05, + "loss": 1.4551, + "step": 6238 + }, + { + "epoch": 0.18318750367020964, + "grad_norm": 0.0, + "learning_rate": 1.879438016588098e-05, + "loss": 1.3496, + "step": 6239 + }, + { + "epoch": 0.18321686534734863, + "grad_norm": 0.0, + "learning_rate": 1.879392745622334e-05, + "loss": 1.4307, + "step": 6240 + }, + { + "epoch": 0.18324622702448765, + "grad_norm": 0.0, + "learning_rate": 1.8793474667039965e-05, + "loss": 1.4434, + "step": 6241 + }, + { + "epoch": 0.18327558870162663, + "grad_norm": 0.0, + "learning_rate": 1.8793021798334948e-05, + "loss": 1.5488, + "step": 6242 + }, + { + "epoch": 0.18330495037876562, + "grad_norm": 0.0, + "learning_rate": 1.879256885011238e-05, + "loss": 1.252, + "step": 6243 + }, + { + "epoch": 0.18333431205590464, + "grad_norm": 0.0, + "learning_rate": 1.879211582237636e-05, + "loss": 1.3848, + "step": 6244 + }, + { + "epoch": 0.18336367373304363, + "grad_norm": 0.0, + "learning_rate": 1.8791662715130986e-05, + "loss": 1.4639, + "step": 6245 + }, + { + "epoch": 0.18339303541018262, + "grad_norm": 0.0, + "learning_rate": 1.8791209528380354e-05, + "loss": 1.377, + "step": 6246 + }, + { + "epoch": 0.18342239708732164, + "grad_norm": 0.0, + "learning_rate": 1.879075626212856e-05, + "loss": 1.4004, + "step": 6247 + }, + { + "epoch": 0.18345175876446063, + "grad_norm": 0.0, + "learning_rate": 1.879030291637971e-05, + "loss": 1.5381, + "step": 6248 + }, + { + "epoch": 0.18348112044159962, + "grad_norm": 0.0, + "learning_rate": 1.87898494911379e-05, + "loss": 1.4531, + "step": 6249 + }, + { + "epoch": 0.18351048211873863, + "grad_norm": 0.0, + "learning_rate": 1.8789395986407228e-05, + "loss": 1.4941, + "step": 6250 + }, + { + "epoch": 0.18353984379587762, + "grad_norm": 0.0, + "learning_rate": 1.87889424021918e-05, + "loss": 1.333, + "step": 6251 + }, + { + "epoch": 0.1835692054730166, + "grad_norm": 0.0, + "learning_rate": 1.8788488738495713e-05, + "loss": 1.5244, + "step": 6252 + }, + { + "epoch": 0.18359856715015563, + "grad_norm": 0.0, + "learning_rate": 1.8788034995323066e-05, + "loss": 1.3916, + "step": 6253 + }, + { + "epoch": 0.18362792882729462, + "grad_norm": 0.0, + "learning_rate": 1.8787581172677974e-05, + "loss": 1.4277, + "step": 6254 + }, + { + "epoch": 0.1836572905044336, + "grad_norm": 0.0, + "learning_rate": 1.8787127270564535e-05, + "loss": 1.4404, + "step": 6255 + }, + { + "epoch": 0.18368665218157262, + "grad_norm": 0.0, + "learning_rate": 1.8786673288986856e-05, + "loss": 1.3896, + "step": 6256 + }, + { + "epoch": 0.1837160138587116, + "grad_norm": 0.0, + "learning_rate": 1.8786219227949033e-05, + "loss": 1.2646, + "step": 6257 + }, + { + "epoch": 0.1837453755358506, + "grad_norm": 0.0, + "learning_rate": 1.8785765087455183e-05, + "loss": 1.3945, + "step": 6258 + }, + { + "epoch": 0.18377473721298962, + "grad_norm": 0.0, + "learning_rate": 1.8785310867509408e-05, + "loss": 1.3701, + "step": 6259 + }, + { + "epoch": 0.1838040988901286, + "grad_norm": 0.0, + "learning_rate": 1.878485656811582e-05, + "loss": 1.3535, + "step": 6260 + }, + { + "epoch": 0.1838334605672676, + "grad_norm": 0.0, + "learning_rate": 1.8784402189278523e-05, + "loss": 1.3506, + "step": 6261 + }, + { + "epoch": 0.1838628222444066, + "grad_norm": 0.0, + "learning_rate": 1.8783947731001624e-05, + "loss": 1.5254, + "step": 6262 + }, + { + "epoch": 0.1838921839215456, + "grad_norm": 0.0, + "learning_rate": 1.878349319328924e-05, + "loss": 1.4619, + "step": 6263 + }, + { + "epoch": 0.1839215455986846, + "grad_norm": 0.0, + "learning_rate": 1.8783038576145475e-05, + "loss": 1.4121, + "step": 6264 + }, + { + "epoch": 0.18395090727582358, + "grad_norm": 0.0, + "learning_rate": 1.8782583879574444e-05, + "loss": 1.2925, + "step": 6265 + }, + { + "epoch": 0.1839802689529626, + "grad_norm": 0.0, + "learning_rate": 1.8782129103580258e-05, + "loss": 1.4336, + "step": 6266 + }, + { + "epoch": 0.1840096306301016, + "grad_norm": 0.0, + "learning_rate": 1.878167424816703e-05, + "loss": 1.4424, + "step": 6267 + }, + { + "epoch": 0.18403899230724058, + "grad_norm": 0.0, + "learning_rate": 1.878121931333887e-05, + "loss": 1.5664, + "step": 6268 + }, + { + "epoch": 0.1840683539843796, + "grad_norm": 0.0, + "learning_rate": 1.87807642990999e-05, + "loss": 1.4365, + "step": 6269 + }, + { + "epoch": 0.18409771566151859, + "grad_norm": 0.0, + "learning_rate": 1.8780309205454223e-05, + "loss": 1.292, + "step": 6270 + }, + { + "epoch": 0.18412707733865757, + "grad_norm": 0.0, + "learning_rate": 1.8779854032405963e-05, + "loss": 1.3389, + "step": 6271 + }, + { + "epoch": 0.1841564390157966, + "grad_norm": 0.0, + "learning_rate": 1.877939877995924e-05, + "loss": 1.4111, + "step": 6272 + }, + { + "epoch": 0.18418580069293558, + "grad_norm": 0.0, + "learning_rate": 1.877894344811816e-05, + "loss": 1.3535, + "step": 6273 + }, + { + "epoch": 0.18421516237007457, + "grad_norm": 0.0, + "learning_rate": 1.877848803688685e-05, + "loss": 1.4834, + "step": 6274 + }, + { + "epoch": 0.1842445240472136, + "grad_norm": 0.0, + "learning_rate": 1.877803254626942e-05, + "loss": 1.5645, + "step": 6275 + }, + { + "epoch": 0.18427388572435258, + "grad_norm": 0.0, + "learning_rate": 1.8777576976269994e-05, + "loss": 1.2891, + "step": 6276 + }, + { + "epoch": 0.18430324740149157, + "grad_norm": 0.0, + "learning_rate": 1.8777121326892692e-05, + "loss": 1.4609, + "step": 6277 + }, + { + "epoch": 0.18433260907863058, + "grad_norm": 0.0, + "learning_rate": 1.8776665598141637e-05, + "loss": 1.5361, + "step": 6278 + }, + { + "epoch": 0.18436197075576957, + "grad_norm": 0.0, + "learning_rate": 1.8776209790020945e-05, + "loss": 1.4111, + "step": 6279 + }, + { + "epoch": 0.18439133243290856, + "grad_norm": 0.0, + "learning_rate": 1.877575390253474e-05, + "loss": 1.3828, + "step": 6280 + }, + { + "epoch": 0.18442069411004758, + "grad_norm": 0.0, + "learning_rate": 1.8775297935687144e-05, + "loss": 1.3984, + "step": 6281 + }, + { + "epoch": 0.18445005578718657, + "grad_norm": 0.0, + "learning_rate": 1.8774841889482282e-05, + "loss": 1.3652, + "step": 6282 + }, + { + "epoch": 0.18447941746432556, + "grad_norm": 0.0, + "learning_rate": 1.877438576392428e-05, + "loss": 1.4297, + "step": 6283 + }, + { + "epoch": 0.18450877914146457, + "grad_norm": 0.0, + "learning_rate": 1.877392955901726e-05, + "loss": 1.4746, + "step": 6284 + }, + { + "epoch": 0.18453814081860356, + "grad_norm": 0.0, + "learning_rate": 1.8773473274765344e-05, + "loss": 1.3359, + "step": 6285 + }, + { + "epoch": 0.18456750249574255, + "grad_norm": 0.0, + "learning_rate": 1.8773016911172666e-05, + "loss": 1.4307, + "step": 6286 + }, + { + "epoch": 0.18459686417288154, + "grad_norm": 0.0, + "learning_rate": 1.8772560468243348e-05, + "loss": 1.4727, + "step": 6287 + }, + { + "epoch": 0.18462622585002056, + "grad_norm": 0.0, + "learning_rate": 1.8772103945981518e-05, + "loss": 1.3164, + "step": 6288 + }, + { + "epoch": 0.18465558752715955, + "grad_norm": 0.0, + "learning_rate": 1.8771647344391308e-05, + "loss": 1.5273, + "step": 6289 + }, + { + "epoch": 0.18468494920429854, + "grad_norm": 0.0, + "learning_rate": 1.8771190663476845e-05, + "loss": 1.3809, + "step": 6290 + }, + { + "epoch": 0.18471431088143755, + "grad_norm": 0.0, + "learning_rate": 1.8770733903242257e-05, + "loss": 1.3896, + "step": 6291 + }, + { + "epoch": 0.18474367255857654, + "grad_norm": 0.0, + "learning_rate": 1.8770277063691673e-05, + "loss": 1.4668, + "step": 6292 + }, + { + "epoch": 0.18477303423571553, + "grad_norm": 0.0, + "learning_rate": 1.876982014482923e-05, + "loss": 1.4873, + "step": 6293 + }, + { + "epoch": 0.18480239591285455, + "grad_norm": 0.0, + "learning_rate": 1.8769363146659058e-05, + "loss": 1.4873, + "step": 6294 + }, + { + "epoch": 0.18483175758999354, + "grad_norm": 0.0, + "learning_rate": 1.8768906069185287e-05, + "loss": 1.4014, + "step": 6295 + }, + { + "epoch": 0.18486111926713253, + "grad_norm": 0.0, + "learning_rate": 1.8768448912412055e-05, + "loss": 1.5166, + "step": 6296 + }, + { + "epoch": 0.18489048094427155, + "grad_norm": 0.0, + "learning_rate": 1.8767991676343495e-05, + "loss": 1.4697, + "step": 6297 + }, + { + "epoch": 0.18491984262141054, + "grad_norm": 0.0, + "learning_rate": 1.876753436098374e-05, + "loss": 1.5566, + "step": 6298 + }, + { + "epoch": 0.18494920429854952, + "grad_norm": 0.0, + "learning_rate": 1.8767076966336924e-05, + "loss": 1.373, + "step": 6299 + }, + { + "epoch": 0.18497856597568854, + "grad_norm": 0.0, + "learning_rate": 1.8766619492407187e-05, + "loss": 1.3926, + "step": 6300 + }, + { + "epoch": 0.18500792765282753, + "grad_norm": 0.0, + "learning_rate": 1.8766161939198667e-05, + "loss": 1.499, + "step": 6301 + }, + { + "epoch": 0.18503728932996652, + "grad_norm": 0.0, + "learning_rate": 1.87657043067155e-05, + "loss": 1.3838, + "step": 6302 + }, + { + "epoch": 0.18506665100710554, + "grad_norm": 0.0, + "learning_rate": 1.8765246594961818e-05, + "loss": 1.499, + "step": 6303 + }, + { + "epoch": 0.18509601268424453, + "grad_norm": 0.0, + "learning_rate": 1.876478880394177e-05, + "loss": 1.4189, + "step": 6304 + }, + { + "epoch": 0.18512537436138352, + "grad_norm": 0.0, + "learning_rate": 1.8764330933659493e-05, + "loss": 1.5273, + "step": 6305 + }, + { + "epoch": 0.18515473603852253, + "grad_norm": 0.0, + "learning_rate": 1.8763872984119126e-05, + "loss": 1.5498, + "step": 6306 + }, + { + "epoch": 0.18518409771566152, + "grad_norm": 0.0, + "learning_rate": 1.876341495532481e-05, + "loss": 1.417, + "step": 6307 + }, + { + "epoch": 0.1852134593928005, + "grad_norm": 0.0, + "learning_rate": 1.876295684728069e-05, + "loss": 1.4678, + "step": 6308 + }, + { + "epoch": 0.18524282106993953, + "grad_norm": 0.0, + "learning_rate": 1.8762498659990905e-05, + "loss": 1.3916, + "step": 6309 + }, + { + "epoch": 0.18527218274707852, + "grad_norm": 0.0, + "learning_rate": 1.8762040393459598e-05, + "loss": 1.3125, + "step": 6310 + }, + { + "epoch": 0.1853015444242175, + "grad_norm": 0.0, + "learning_rate": 1.8761582047690924e-05, + "loss": 1.4746, + "step": 6311 + }, + { + "epoch": 0.1853309061013565, + "grad_norm": 0.0, + "learning_rate": 1.8761123622689012e-05, + "loss": 1.5449, + "step": 6312 + }, + { + "epoch": 0.1853602677784955, + "grad_norm": 0.0, + "learning_rate": 1.8760665118458018e-05, + "loss": 1.5459, + "step": 6313 + }, + { + "epoch": 0.1853896294556345, + "grad_norm": 0.0, + "learning_rate": 1.8760206535002086e-05, + "loss": 1.373, + "step": 6314 + }, + { + "epoch": 0.1854189911327735, + "grad_norm": 0.0, + "learning_rate": 1.8759747872325366e-05, + "loss": 1.4658, + "step": 6315 + }, + { + "epoch": 0.1854483528099125, + "grad_norm": 0.0, + "learning_rate": 1.8759289130431996e-05, + "loss": 1.4111, + "step": 6316 + }, + { + "epoch": 0.1854777144870515, + "grad_norm": 0.0, + "learning_rate": 1.8758830309326135e-05, + "loss": 1.3594, + "step": 6317 + }, + { + "epoch": 0.1855070761641905, + "grad_norm": 0.0, + "learning_rate": 1.875837140901193e-05, + "loss": 1.4854, + "step": 6318 + }, + { + "epoch": 0.1855364378413295, + "grad_norm": 0.0, + "learning_rate": 1.8757912429493523e-05, + "loss": 1.3711, + "step": 6319 + }, + { + "epoch": 0.1855657995184685, + "grad_norm": 0.0, + "learning_rate": 1.8757453370775075e-05, + "loss": 1.3848, + "step": 6320 + }, + { + "epoch": 0.18559516119560748, + "grad_norm": 0.0, + "learning_rate": 1.8756994232860735e-05, + "loss": 1.4365, + "step": 6321 + }, + { + "epoch": 0.1856245228727465, + "grad_norm": 0.0, + "learning_rate": 1.8756535015754652e-05, + "loss": 1.5234, + "step": 6322 + }, + { + "epoch": 0.1856538845498855, + "grad_norm": 0.0, + "learning_rate": 1.875607571946098e-05, + "loss": 1.3916, + "step": 6323 + }, + { + "epoch": 0.18568324622702448, + "grad_norm": 0.0, + "learning_rate": 1.8755616343983872e-05, + "loss": 1.4189, + "step": 6324 + }, + { + "epoch": 0.1857126079041635, + "grad_norm": 0.0, + "learning_rate": 1.8755156889327483e-05, + "loss": 1.5059, + "step": 6325 + }, + { + "epoch": 0.18574196958130249, + "grad_norm": 0.0, + "learning_rate": 1.875469735549597e-05, + "loss": 1.4258, + "step": 6326 + }, + { + "epoch": 0.18577133125844147, + "grad_norm": 0.0, + "learning_rate": 1.8754237742493488e-05, + "loss": 1.4521, + "step": 6327 + }, + { + "epoch": 0.1858006929355805, + "grad_norm": 0.0, + "learning_rate": 1.875377805032419e-05, + "loss": 1.3789, + "step": 6328 + }, + { + "epoch": 0.18583005461271948, + "grad_norm": 0.0, + "learning_rate": 1.8753318278992234e-05, + "loss": 1.416, + "step": 6329 + }, + { + "epoch": 0.18585941628985847, + "grad_norm": 0.0, + "learning_rate": 1.8752858428501777e-05, + "loss": 1.4834, + "step": 6330 + }, + { + "epoch": 0.1858887779669975, + "grad_norm": 0.0, + "learning_rate": 1.8752398498856986e-05, + "loss": 1.4512, + "step": 6331 + }, + { + "epoch": 0.18591813964413648, + "grad_norm": 0.0, + "learning_rate": 1.875193849006201e-05, + "loss": 1.5352, + "step": 6332 + }, + { + "epoch": 0.18594750132127547, + "grad_norm": 0.0, + "learning_rate": 1.8751478402121012e-05, + "loss": 1.4541, + "step": 6333 + }, + { + "epoch": 0.18597686299841448, + "grad_norm": 0.0, + "learning_rate": 1.8751018235038154e-05, + "loss": 1.5879, + "step": 6334 + }, + { + "epoch": 0.18600622467555347, + "grad_norm": 0.0, + "learning_rate": 1.8750557988817598e-05, + "loss": 1.3828, + "step": 6335 + }, + { + "epoch": 0.18603558635269246, + "grad_norm": 0.0, + "learning_rate": 1.8750097663463507e-05, + "loss": 1.4482, + "step": 6336 + }, + { + "epoch": 0.18606494802983145, + "grad_norm": 0.0, + "learning_rate": 1.8749637258980038e-05, + "loss": 1.4375, + "step": 6337 + }, + { + "epoch": 0.18609430970697047, + "grad_norm": 0.0, + "learning_rate": 1.8749176775371362e-05, + "loss": 1.3447, + "step": 6338 + }, + { + "epoch": 0.18612367138410946, + "grad_norm": 0.0, + "learning_rate": 1.8748716212641637e-05, + "loss": 1.3926, + "step": 6339 + }, + { + "epoch": 0.18615303306124845, + "grad_norm": 0.0, + "learning_rate": 1.874825557079503e-05, + "loss": 1.3877, + "step": 6340 + }, + { + "epoch": 0.18618239473838746, + "grad_norm": 0.0, + "learning_rate": 1.874779484983571e-05, + "loss": 1.3223, + "step": 6341 + }, + { + "epoch": 0.18621175641552645, + "grad_norm": 0.0, + "learning_rate": 1.874733404976784e-05, + "loss": 1.4531, + "step": 6342 + }, + { + "epoch": 0.18624111809266544, + "grad_norm": 0.0, + "learning_rate": 1.874687317059559e-05, + "loss": 1.3232, + "step": 6343 + }, + { + "epoch": 0.18627047976980446, + "grad_norm": 0.0, + "learning_rate": 1.8746412212323123e-05, + "loss": 1.4336, + "step": 6344 + }, + { + "epoch": 0.18629984144694345, + "grad_norm": 0.0, + "learning_rate": 1.874595117495461e-05, + "loss": 1.4951, + "step": 6345 + }, + { + "epoch": 0.18632920312408244, + "grad_norm": 0.0, + "learning_rate": 1.8745490058494223e-05, + "loss": 1.4463, + "step": 6346 + }, + { + "epoch": 0.18635856480122145, + "grad_norm": 0.0, + "learning_rate": 1.8745028862946126e-05, + "loss": 1.3901, + "step": 6347 + }, + { + "epoch": 0.18638792647836044, + "grad_norm": 0.0, + "learning_rate": 1.87445675883145e-05, + "loss": 1.4668, + "step": 6348 + }, + { + "epoch": 0.18641728815549943, + "grad_norm": 0.0, + "learning_rate": 1.8744106234603504e-05, + "loss": 1.4727, + "step": 6349 + }, + { + "epoch": 0.18644664983263845, + "grad_norm": 0.0, + "learning_rate": 1.8743644801817317e-05, + "loss": 1.5684, + "step": 6350 + }, + { + "epoch": 0.18647601150977744, + "grad_norm": 0.0, + "learning_rate": 1.874318328996011e-05, + "loss": 1.418, + "step": 6351 + }, + { + "epoch": 0.18650537318691643, + "grad_norm": 0.0, + "learning_rate": 1.874272169903606e-05, + "loss": 1.4844, + "step": 6352 + }, + { + "epoch": 0.18653473486405545, + "grad_norm": 0.0, + "learning_rate": 1.8742260029049336e-05, + "loss": 1.3711, + "step": 6353 + }, + { + "epoch": 0.18656409654119444, + "grad_norm": 0.0, + "learning_rate": 1.8741798280004117e-05, + "loss": 1.3867, + "step": 6354 + }, + { + "epoch": 0.18659345821833342, + "grad_norm": 0.0, + "learning_rate": 1.8741336451904578e-05, + "loss": 1.4893, + "step": 6355 + }, + { + "epoch": 0.18662281989547244, + "grad_norm": 0.0, + "learning_rate": 1.8740874544754892e-05, + "loss": 1.4043, + "step": 6356 + }, + { + "epoch": 0.18665218157261143, + "grad_norm": 0.0, + "learning_rate": 1.8740412558559242e-05, + "loss": 1.3584, + "step": 6357 + }, + { + "epoch": 0.18668154324975042, + "grad_norm": 0.0, + "learning_rate": 1.87399504933218e-05, + "loss": 1.416, + "step": 6358 + }, + { + "epoch": 0.18671090492688944, + "grad_norm": 0.0, + "learning_rate": 1.873948834904675e-05, + "loss": 1.4199, + "step": 6359 + }, + { + "epoch": 0.18674026660402843, + "grad_norm": 0.0, + "learning_rate": 1.8739026125738266e-05, + "loss": 1.4297, + "step": 6360 + }, + { + "epoch": 0.18676962828116742, + "grad_norm": 0.0, + "learning_rate": 1.8738563823400534e-05, + "loss": 1.4121, + "step": 6361 + }, + { + "epoch": 0.1867989899583064, + "grad_norm": 0.0, + "learning_rate": 1.873810144203773e-05, + "loss": 1.5186, + "step": 6362 + }, + { + "epoch": 0.18682835163544542, + "grad_norm": 0.0, + "learning_rate": 1.8737638981654034e-05, + "loss": 1.2812, + "step": 6363 + }, + { + "epoch": 0.1868577133125844, + "grad_norm": 0.0, + "learning_rate": 1.8737176442253634e-05, + "loss": 1.2773, + "step": 6364 + }, + { + "epoch": 0.1868870749897234, + "grad_norm": 0.0, + "learning_rate": 1.873671382384071e-05, + "loss": 1.5527, + "step": 6365 + }, + { + "epoch": 0.18691643666686242, + "grad_norm": 0.0, + "learning_rate": 1.8736251126419445e-05, + "loss": 1.3926, + "step": 6366 + }, + { + "epoch": 0.1869457983440014, + "grad_norm": 0.0, + "learning_rate": 1.8735788349994022e-05, + "loss": 1.3682, + "step": 6367 + }, + { + "epoch": 0.1869751600211404, + "grad_norm": 0.0, + "learning_rate": 1.8735325494568628e-05, + "loss": 1.417, + "step": 6368 + }, + { + "epoch": 0.1870045216982794, + "grad_norm": 0.0, + "learning_rate": 1.873486256014745e-05, + "loss": 1.4131, + "step": 6369 + }, + { + "epoch": 0.1870338833754184, + "grad_norm": 0.0, + "learning_rate": 1.873439954673467e-05, + "loss": 1.5449, + "step": 6370 + }, + { + "epoch": 0.1870632450525574, + "grad_norm": 0.0, + "learning_rate": 1.8733936454334482e-05, + "loss": 1.5146, + "step": 6371 + }, + { + "epoch": 0.1870926067296964, + "grad_norm": 0.0, + "learning_rate": 1.8733473282951067e-05, + "loss": 1.3535, + "step": 6372 + }, + { + "epoch": 0.1871219684068354, + "grad_norm": 0.0, + "learning_rate": 1.873301003258862e-05, + "loss": 1.3096, + "step": 6373 + }, + { + "epoch": 0.1871513300839744, + "grad_norm": 0.0, + "learning_rate": 1.8732546703251323e-05, + "loss": 1.4209, + "step": 6374 + }, + { + "epoch": 0.1871806917611134, + "grad_norm": 0.0, + "learning_rate": 1.873208329494337e-05, + "loss": 1.4014, + "step": 6375 + }, + { + "epoch": 0.1872100534382524, + "grad_norm": 0.0, + "learning_rate": 1.8731619807668952e-05, + "loss": 1.3564, + "step": 6376 + }, + { + "epoch": 0.18723941511539138, + "grad_norm": 0.0, + "learning_rate": 1.873115624143226e-05, + "loss": 1.4248, + "step": 6377 + }, + { + "epoch": 0.1872687767925304, + "grad_norm": 0.0, + "learning_rate": 1.8730692596237484e-05, + "loss": 1.5234, + "step": 6378 + }, + { + "epoch": 0.1872981384696694, + "grad_norm": 0.0, + "learning_rate": 1.873022887208882e-05, + "loss": 1.4414, + "step": 6379 + }, + { + "epoch": 0.18732750014680838, + "grad_norm": 0.0, + "learning_rate": 1.8729765068990463e-05, + "loss": 1.291, + "step": 6380 + }, + { + "epoch": 0.1873568618239474, + "grad_norm": 0.0, + "learning_rate": 1.8729301186946605e-05, + "loss": 1.3711, + "step": 6381 + }, + { + "epoch": 0.18738622350108639, + "grad_norm": 0.0, + "learning_rate": 1.8728837225961438e-05, + "loss": 1.4814, + "step": 6382 + }, + { + "epoch": 0.18741558517822537, + "grad_norm": 0.0, + "learning_rate": 1.8728373186039163e-05, + "loss": 1.4541, + "step": 6383 + }, + { + "epoch": 0.1874449468553644, + "grad_norm": 0.0, + "learning_rate": 1.8727909067183972e-05, + "loss": 1.3721, + "step": 6384 + }, + { + "epoch": 0.18747430853250338, + "grad_norm": 0.0, + "learning_rate": 1.8727444869400065e-05, + "loss": 1.3857, + "step": 6385 + }, + { + "epoch": 0.18750367020964237, + "grad_norm": 0.0, + "learning_rate": 1.8726980592691637e-05, + "loss": 1.5352, + "step": 6386 + }, + { + "epoch": 0.18753303188678136, + "grad_norm": 0.0, + "learning_rate": 1.872651623706289e-05, + "loss": 1.3848, + "step": 6387 + }, + { + "epoch": 0.18756239356392038, + "grad_norm": 0.0, + "learning_rate": 1.872605180251802e-05, + "loss": 1.1963, + "step": 6388 + }, + { + "epoch": 0.18759175524105937, + "grad_norm": 0.0, + "learning_rate": 1.8725587289061232e-05, + "loss": 1.3486, + "step": 6389 + }, + { + "epoch": 0.18762111691819836, + "grad_norm": 0.0, + "learning_rate": 1.8725122696696723e-05, + "loss": 1.4023, + "step": 6390 + }, + { + "epoch": 0.18765047859533737, + "grad_norm": 0.0, + "learning_rate": 1.8724658025428694e-05, + "loss": 1.3643, + "step": 6391 + }, + { + "epoch": 0.18767984027247636, + "grad_norm": 0.0, + "learning_rate": 1.8724193275261347e-05, + "loss": 1.4697, + "step": 6392 + }, + { + "epoch": 0.18770920194961535, + "grad_norm": 0.0, + "learning_rate": 1.872372844619889e-05, + "loss": 1.4443, + "step": 6393 + }, + { + "epoch": 0.18773856362675437, + "grad_norm": 0.0, + "learning_rate": 1.8723263538245517e-05, + "loss": 1.374, + "step": 6394 + }, + { + "epoch": 0.18776792530389336, + "grad_norm": 0.0, + "learning_rate": 1.8722798551405443e-05, + "loss": 1.5029, + "step": 6395 + }, + { + "epoch": 0.18779728698103235, + "grad_norm": 0.0, + "learning_rate": 1.8722333485682863e-05, + "loss": 1.3682, + "step": 6396 + }, + { + "epoch": 0.18782664865817136, + "grad_norm": 0.0, + "learning_rate": 1.8721868341081987e-05, + "loss": 1.6084, + "step": 6397 + }, + { + "epoch": 0.18785601033531035, + "grad_norm": 0.0, + "learning_rate": 1.8721403117607023e-05, + "loss": 1.4219, + "step": 6398 + }, + { + "epoch": 0.18788537201244934, + "grad_norm": 0.0, + "learning_rate": 1.8720937815262178e-05, + "loss": 1.4922, + "step": 6399 + }, + { + "epoch": 0.18791473368958836, + "grad_norm": 0.0, + "learning_rate": 1.872047243405166e-05, + "loss": 1.4668, + "step": 6400 + }, + { + "epoch": 0.18794409536672735, + "grad_norm": 0.0, + "learning_rate": 1.8720006973979676e-05, + "loss": 1.5977, + "step": 6401 + }, + { + "epoch": 0.18797345704386634, + "grad_norm": 0.0, + "learning_rate": 1.8719541435050438e-05, + "loss": 1.375, + "step": 6402 + }, + { + "epoch": 0.18800281872100535, + "grad_norm": 0.0, + "learning_rate": 1.871907581726815e-05, + "loss": 1.5244, + "step": 6403 + }, + { + "epoch": 0.18803218039814434, + "grad_norm": 0.0, + "learning_rate": 1.8718610120637026e-05, + "loss": 1.3223, + "step": 6404 + }, + { + "epoch": 0.18806154207528333, + "grad_norm": 0.0, + "learning_rate": 1.8718144345161277e-05, + "loss": 1.4502, + "step": 6405 + }, + { + "epoch": 0.18809090375242235, + "grad_norm": 0.0, + "learning_rate": 1.871767849084512e-05, + "loss": 1.5195, + "step": 6406 + }, + { + "epoch": 0.18812026542956134, + "grad_norm": 0.0, + "learning_rate": 1.871721255769276e-05, + "loss": 1.4727, + "step": 6407 + }, + { + "epoch": 0.18814962710670033, + "grad_norm": 0.0, + "learning_rate": 1.871674654570842e-05, + "loss": 1.5537, + "step": 6408 + }, + { + "epoch": 0.18817898878383935, + "grad_norm": 0.0, + "learning_rate": 1.8716280454896304e-05, + "loss": 1.4014, + "step": 6409 + }, + { + "epoch": 0.18820835046097834, + "grad_norm": 0.0, + "learning_rate": 1.871581428526063e-05, + "loss": 1.4688, + "step": 6410 + }, + { + "epoch": 0.18823771213811732, + "grad_norm": 0.0, + "learning_rate": 1.871534803680562e-05, + "loss": 1.4336, + "step": 6411 + }, + { + "epoch": 0.18826707381525631, + "grad_norm": 0.0, + "learning_rate": 1.8714881709535482e-05, + "loss": 1.4229, + "step": 6412 + }, + { + "epoch": 0.18829643549239533, + "grad_norm": 0.0, + "learning_rate": 1.8714415303454435e-05, + "loss": 1.4072, + "step": 6413 + }, + { + "epoch": 0.18832579716953432, + "grad_norm": 0.0, + "learning_rate": 1.8713948818566706e-05, + "loss": 1.4941, + "step": 6414 + }, + { + "epoch": 0.1883551588466733, + "grad_norm": 0.0, + "learning_rate": 1.8713482254876497e-05, + "loss": 1.5195, + "step": 6415 + }, + { + "epoch": 0.18838452052381233, + "grad_norm": 0.0, + "learning_rate": 1.8713015612388042e-05, + "loss": 1.4541, + "step": 6416 + }, + { + "epoch": 0.18841388220095132, + "grad_norm": 0.0, + "learning_rate": 1.8712548891105554e-05, + "loss": 1.4736, + "step": 6417 + }, + { + "epoch": 0.1884432438780903, + "grad_norm": 0.0, + "learning_rate": 1.8712082091033257e-05, + "loss": 1.4932, + "step": 6418 + }, + { + "epoch": 0.18847260555522932, + "grad_norm": 0.0, + "learning_rate": 1.871161521217537e-05, + "loss": 1.3789, + "step": 6419 + }, + { + "epoch": 0.1885019672323683, + "grad_norm": 0.0, + "learning_rate": 1.8711148254536114e-05, + "loss": 1.4219, + "step": 6420 + }, + { + "epoch": 0.1885313289095073, + "grad_norm": 0.0, + "learning_rate": 1.8710681218119713e-05, + "loss": 1.373, + "step": 6421 + }, + { + "epoch": 0.18856069058664632, + "grad_norm": 0.0, + "learning_rate": 1.8710214102930394e-05, + "loss": 1.4482, + "step": 6422 + }, + { + "epoch": 0.1885900522637853, + "grad_norm": 0.0, + "learning_rate": 1.8709746908972374e-05, + "loss": 1.5322, + "step": 6423 + }, + { + "epoch": 0.1886194139409243, + "grad_norm": 0.0, + "learning_rate": 1.8709279636249886e-05, + "loss": 1.5527, + "step": 6424 + }, + { + "epoch": 0.1886487756180633, + "grad_norm": 0.0, + "learning_rate": 1.870881228476715e-05, + "loss": 1.3477, + "step": 6425 + }, + { + "epoch": 0.1886781372952023, + "grad_norm": 0.0, + "learning_rate": 1.870834485452839e-05, + "loss": 1.457, + "step": 6426 + }, + { + "epoch": 0.1887074989723413, + "grad_norm": 0.0, + "learning_rate": 1.8707877345537846e-05, + "loss": 1.4805, + "step": 6427 + }, + { + "epoch": 0.1887368606494803, + "grad_norm": 0.0, + "learning_rate": 1.870740975779973e-05, + "loss": 1.4492, + "step": 6428 + }, + { + "epoch": 0.1887662223266193, + "grad_norm": 0.0, + "learning_rate": 1.8706942091318284e-05, + "loss": 1.376, + "step": 6429 + }, + { + "epoch": 0.1887955840037583, + "grad_norm": 0.0, + "learning_rate": 1.8706474346097723e-05, + "loss": 1.4004, + "step": 6430 + }, + { + "epoch": 0.1888249456808973, + "grad_norm": 0.0, + "learning_rate": 1.870600652214229e-05, + "loss": 1.5518, + "step": 6431 + }, + { + "epoch": 0.1888543073580363, + "grad_norm": 0.0, + "learning_rate": 1.870553861945621e-05, + "loss": 1.3896, + "step": 6432 + }, + { + "epoch": 0.18888366903517528, + "grad_norm": 0.0, + "learning_rate": 1.870507063804371e-05, + "loss": 1.5244, + "step": 6433 + }, + { + "epoch": 0.1889130307123143, + "grad_norm": 0.0, + "learning_rate": 1.8704602577909034e-05, + "loss": 1.4658, + "step": 6434 + }, + { + "epoch": 0.1889423923894533, + "grad_norm": 0.0, + "learning_rate": 1.8704134439056403e-05, + "loss": 1.3809, + "step": 6435 + }, + { + "epoch": 0.18897175406659228, + "grad_norm": 0.0, + "learning_rate": 1.8703666221490055e-05, + "loss": 1.6006, + "step": 6436 + }, + { + "epoch": 0.18900111574373127, + "grad_norm": 0.0, + "learning_rate": 1.8703197925214226e-05, + "loss": 1.5127, + "step": 6437 + }, + { + "epoch": 0.18903047742087029, + "grad_norm": 0.0, + "learning_rate": 1.8702729550233147e-05, + "loss": 1.5566, + "step": 6438 + }, + { + "epoch": 0.18905983909800927, + "grad_norm": 0.0, + "learning_rate": 1.870226109655106e-05, + "loss": 1.3594, + "step": 6439 + }, + { + "epoch": 0.18908920077514826, + "grad_norm": 0.0, + "learning_rate": 1.8701792564172196e-05, + "loss": 1.373, + "step": 6440 + }, + { + "epoch": 0.18911856245228728, + "grad_norm": 0.0, + "learning_rate": 1.870132395310079e-05, + "loss": 1.4043, + "step": 6441 + }, + { + "epoch": 0.18914792412942627, + "grad_norm": 0.0, + "learning_rate": 1.8700855263341086e-05, + "loss": 1.4092, + "step": 6442 + }, + { + "epoch": 0.18917728580656526, + "grad_norm": 0.0, + "learning_rate": 1.8700386494897315e-05, + "loss": 1.3135, + "step": 6443 + }, + { + "epoch": 0.18920664748370428, + "grad_norm": 0.0, + "learning_rate": 1.8699917647773725e-05, + "loss": 1.5527, + "step": 6444 + }, + { + "epoch": 0.18923600916084327, + "grad_norm": 0.0, + "learning_rate": 1.869944872197455e-05, + "loss": 1.3965, + "step": 6445 + }, + { + "epoch": 0.18926537083798226, + "grad_norm": 0.0, + "learning_rate": 1.8698979717504036e-05, + "loss": 1.373, + "step": 6446 + }, + { + "epoch": 0.18929473251512127, + "grad_norm": 0.0, + "learning_rate": 1.8698510634366418e-05, + "loss": 1.3193, + "step": 6447 + }, + { + "epoch": 0.18932409419226026, + "grad_norm": 0.0, + "learning_rate": 1.869804147256594e-05, + "loss": 1.4316, + "step": 6448 + }, + { + "epoch": 0.18935345586939925, + "grad_norm": 0.0, + "learning_rate": 1.8697572232106847e-05, + "loss": 1.5059, + "step": 6449 + }, + { + "epoch": 0.18938281754653827, + "grad_norm": 0.0, + "learning_rate": 1.8697102912993378e-05, + "loss": 1.3965, + "step": 6450 + }, + { + "epoch": 0.18941217922367726, + "grad_norm": 0.0, + "learning_rate": 1.869663351522978e-05, + "loss": 1.4307, + "step": 6451 + }, + { + "epoch": 0.18944154090081625, + "grad_norm": 0.0, + "learning_rate": 1.86961640388203e-05, + "loss": 1.4541, + "step": 6452 + }, + { + "epoch": 0.18947090257795526, + "grad_norm": 0.0, + "learning_rate": 1.869569448376918e-05, + "loss": 1.3486, + "step": 6453 + }, + { + "epoch": 0.18950026425509425, + "grad_norm": 0.0, + "learning_rate": 1.8695224850080673e-05, + "loss": 1.4229, + "step": 6454 + }, + { + "epoch": 0.18952962593223324, + "grad_norm": 0.0, + "learning_rate": 1.8694755137759016e-05, + "loss": 1.4951, + "step": 6455 + }, + { + "epoch": 0.18955898760937226, + "grad_norm": 0.0, + "learning_rate": 1.869428534680846e-05, + "loss": 1.5449, + "step": 6456 + }, + { + "epoch": 0.18958834928651125, + "grad_norm": 0.0, + "learning_rate": 1.8693815477233258e-05, + "loss": 1.3711, + "step": 6457 + }, + { + "epoch": 0.18961771096365024, + "grad_norm": 0.0, + "learning_rate": 1.8693345529037655e-05, + "loss": 1.5254, + "step": 6458 + }, + { + "epoch": 0.18964707264078925, + "grad_norm": 0.0, + "learning_rate": 1.8692875502225902e-05, + "loss": 1.377, + "step": 6459 + }, + { + "epoch": 0.18967643431792824, + "grad_norm": 0.0, + "learning_rate": 1.8692405396802248e-05, + "loss": 1.2939, + "step": 6460 + }, + { + "epoch": 0.18970579599506723, + "grad_norm": 0.0, + "learning_rate": 1.8691935212770948e-05, + "loss": 1.5371, + "step": 6461 + }, + { + "epoch": 0.18973515767220622, + "grad_norm": 0.0, + "learning_rate": 1.869146495013625e-05, + "loss": 1.4668, + "step": 6462 + }, + { + "epoch": 0.18976451934934524, + "grad_norm": 0.0, + "learning_rate": 1.8690994608902412e-05, + "loss": 1.4277, + "step": 6463 + }, + { + "epoch": 0.18979388102648423, + "grad_norm": 0.0, + "learning_rate": 1.8690524189073684e-05, + "loss": 1.4395, + "step": 6464 + }, + { + "epoch": 0.18982324270362322, + "grad_norm": 0.0, + "learning_rate": 1.8690053690654315e-05, + "loss": 1.5342, + "step": 6465 + }, + { + "epoch": 0.18985260438076224, + "grad_norm": 0.0, + "learning_rate": 1.8689583113648567e-05, + "loss": 1.5801, + "step": 6466 + }, + { + "epoch": 0.18988196605790122, + "grad_norm": 0.0, + "learning_rate": 1.8689112458060696e-05, + "loss": 1.5078, + "step": 6467 + }, + { + "epoch": 0.18991132773504021, + "grad_norm": 0.0, + "learning_rate": 1.8688641723894955e-05, + "loss": 1.4434, + "step": 6468 + }, + { + "epoch": 0.18994068941217923, + "grad_norm": 0.0, + "learning_rate": 1.8688170911155598e-05, + "loss": 1.4473, + "step": 6469 + }, + { + "epoch": 0.18997005108931822, + "grad_norm": 0.0, + "learning_rate": 1.868770001984689e-05, + "loss": 1.4287, + "step": 6470 + }, + { + "epoch": 0.1899994127664572, + "grad_norm": 0.0, + "learning_rate": 1.8687229049973083e-05, + "loss": 1.4062, + "step": 6471 + }, + { + "epoch": 0.19002877444359623, + "grad_norm": 0.0, + "learning_rate": 1.8686758001538443e-05, + "loss": 1.4102, + "step": 6472 + }, + { + "epoch": 0.19005813612073522, + "grad_norm": 0.0, + "learning_rate": 1.8686286874547223e-05, + "loss": 1.4424, + "step": 6473 + }, + { + "epoch": 0.1900874977978742, + "grad_norm": 0.0, + "learning_rate": 1.868581566900369e-05, + "loss": 1.457, + "step": 6474 + }, + { + "epoch": 0.19011685947501322, + "grad_norm": 0.0, + "learning_rate": 1.8685344384912096e-05, + "loss": 1.3853, + "step": 6475 + }, + { + "epoch": 0.1901462211521522, + "grad_norm": 0.0, + "learning_rate": 1.868487302227671e-05, + "loss": 1.4697, + "step": 6476 + }, + { + "epoch": 0.1901755828292912, + "grad_norm": 0.0, + "learning_rate": 1.8684401581101793e-05, + "loss": 1.4863, + "step": 6477 + }, + { + "epoch": 0.19020494450643022, + "grad_norm": 0.0, + "learning_rate": 1.8683930061391608e-05, + "loss": 1.4736, + "step": 6478 + }, + { + "epoch": 0.1902343061835692, + "grad_norm": 0.0, + "learning_rate": 1.8683458463150423e-05, + "loss": 1.335, + "step": 6479 + }, + { + "epoch": 0.1902636678607082, + "grad_norm": 0.0, + "learning_rate": 1.8682986786382497e-05, + "loss": 1.4121, + "step": 6480 + }, + { + "epoch": 0.1902930295378472, + "grad_norm": 0.0, + "learning_rate": 1.8682515031092093e-05, + "loss": 1.4209, + "step": 6481 + }, + { + "epoch": 0.1903223912149862, + "grad_norm": 0.0, + "learning_rate": 1.8682043197283487e-05, + "loss": 1.4668, + "step": 6482 + }, + { + "epoch": 0.1903517528921252, + "grad_norm": 0.0, + "learning_rate": 1.868157128496094e-05, + "loss": 1.25, + "step": 6483 + }, + { + "epoch": 0.1903811145692642, + "grad_norm": 0.0, + "learning_rate": 1.8681099294128722e-05, + "loss": 1.4326, + "step": 6484 + }, + { + "epoch": 0.1904104762464032, + "grad_norm": 0.0, + "learning_rate": 1.8680627224791098e-05, + "loss": 1.4404, + "step": 6485 + }, + { + "epoch": 0.1904398379235422, + "grad_norm": 0.0, + "learning_rate": 1.8680155076952338e-05, + "loss": 1.3945, + "step": 6486 + }, + { + "epoch": 0.19046919960068118, + "grad_norm": 0.0, + "learning_rate": 1.8679682850616717e-05, + "loss": 1.5283, + "step": 6487 + }, + { + "epoch": 0.1904985612778202, + "grad_norm": 0.0, + "learning_rate": 1.8679210545788495e-05, + "loss": 1.5098, + "step": 6488 + }, + { + "epoch": 0.19052792295495918, + "grad_norm": 0.0, + "learning_rate": 1.8678738162471953e-05, + "loss": 1.3984, + "step": 6489 + }, + { + "epoch": 0.19055728463209817, + "grad_norm": 0.0, + "learning_rate": 1.8678265700671358e-05, + "loss": 1.4248, + "step": 6490 + }, + { + "epoch": 0.1905866463092372, + "grad_norm": 0.0, + "learning_rate": 1.8677793160390984e-05, + "loss": 1.5742, + "step": 6491 + }, + { + "epoch": 0.19061600798637618, + "grad_norm": 0.0, + "learning_rate": 1.8677320541635106e-05, + "loss": 1.5352, + "step": 6492 + }, + { + "epoch": 0.19064536966351517, + "grad_norm": 0.0, + "learning_rate": 1.867684784440799e-05, + "loss": 1.4297, + "step": 6493 + }, + { + "epoch": 0.19067473134065419, + "grad_norm": 0.0, + "learning_rate": 1.8676375068713923e-05, + "loss": 1.5986, + "step": 6494 + }, + { + "epoch": 0.19070409301779317, + "grad_norm": 0.0, + "learning_rate": 1.8675902214557173e-05, + "loss": 1.4482, + "step": 6495 + }, + { + "epoch": 0.19073345469493216, + "grad_norm": 0.0, + "learning_rate": 1.8675429281942018e-05, + "loss": 1.4229, + "step": 6496 + }, + { + "epoch": 0.19076281637207118, + "grad_norm": 0.0, + "learning_rate": 1.8674956270872734e-05, + "loss": 1.4697, + "step": 6497 + }, + { + "epoch": 0.19079217804921017, + "grad_norm": 0.0, + "learning_rate": 1.8674483181353595e-05, + "loss": 1.4248, + "step": 6498 + }, + { + "epoch": 0.19082153972634916, + "grad_norm": 0.0, + "learning_rate": 1.8674010013388886e-05, + "loss": 1.4814, + "step": 6499 + }, + { + "epoch": 0.19085090140348818, + "grad_norm": 0.0, + "learning_rate": 1.8673536766982884e-05, + "loss": 1.4072, + "step": 6500 + }, + { + "epoch": 0.19088026308062717, + "grad_norm": 0.0, + "learning_rate": 1.8673063442139866e-05, + "loss": 1.3447, + "step": 6501 + }, + { + "epoch": 0.19090962475776616, + "grad_norm": 0.0, + "learning_rate": 1.8672590038864112e-05, + "loss": 1.4502, + "step": 6502 + }, + { + "epoch": 0.19093898643490517, + "grad_norm": 0.0, + "learning_rate": 1.8672116557159906e-05, + "loss": 1.4785, + "step": 6503 + }, + { + "epoch": 0.19096834811204416, + "grad_norm": 0.0, + "learning_rate": 1.8671642997031533e-05, + "loss": 1.502, + "step": 6504 + }, + { + "epoch": 0.19099770978918315, + "grad_norm": 0.0, + "learning_rate": 1.8671169358483266e-05, + "loss": 1.4805, + "step": 6505 + }, + { + "epoch": 0.19102707146632217, + "grad_norm": 0.0, + "learning_rate": 1.86706956415194e-05, + "loss": 1.4268, + "step": 6506 + }, + { + "epoch": 0.19105643314346116, + "grad_norm": 0.0, + "learning_rate": 1.8670221846144207e-05, + "loss": 1.4346, + "step": 6507 + }, + { + "epoch": 0.19108579482060015, + "grad_norm": 0.0, + "learning_rate": 1.866974797236198e-05, + "loss": 1.4639, + "step": 6508 + }, + { + "epoch": 0.19111515649773916, + "grad_norm": 0.0, + "learning_rate": 1.8669274020177002e-05, + "loss": 1.4521, + "step": 6509 + }, + { + "epoch": 0.19114451817487815, + "grad_norm": 0.0, + "learning_rate": 1.8668799989593556e-05, + "loss": 1.3413, + "step": 6510 + }, + { + "epoch": 0.19117387985201714, + "grad_norm": 0.0, + "learning_rate": 1.8668325880615937e-05, + "loss": 1.4443, + "step": 6511 + }, + { + "epoch": 0.19120324152915613, + "grad_norm": 0.0, + "learning_rate": 1.8667851693248424e-05, + "loss": 1.3936, + "step": 6512 + }, + { + "epoch": 0.19123260320629515, + "grad_norm": 0.0, + "learning_rate": 1.866737742749531e-05, + "loss": 1.5029, + "step": 6513 + }, + { + "epoch": 0.19126196488343414, + "grad_norm": 0.0, + "learning_rate": 1.866690308336088e-05, + "loss": 1.4248, + "step": 6514 + }, + { + "epoch": 0.19129132656057313, + "grad_norm": 0.0, + "learning_rate": 1.8666428660849425e-05, + "loss": 1.4131, + "step": 6515 + }, + { + "epoch": 0.19132068823771214, + "grad_norm": 0.0, + "learning_rate": 1.866595415996524e-05, + "loss": 1.332, + "step": 6516 + }, + { + "epoch": 0.19135004991485113, + "grad_norm": 0.0, + "learning_rate": 1.8665479580712607e-05, + "loss": 1.5176, + "step": 6517 + }, + { + "epoch": 0.19137941159199012, + "grad_norm": 0.0, + "learning_rate": 1.8665004923095824e-05, + "loss": 1.4961, + "step": 6518 + }, + { + "epoch": 0.19140877326912914, + "grad_norm": 0.0, + "learning_rate": 1.8664530187119186e-05, + "loss": 1.4639, + "step": 6519 + }, + { + "epoch": 0.19143813494626813, + "grad_norm": 0.0, + "learning_rate": 1.866405537278698e-05, + "loss": 1.4785, + "step": 6520 + }, + { + "epoch": 0.19146749662340712, + "grad_norm": 0.0, + "learning_rate": 1.8663580480103503e-05, + "loss": 1.4287, + "step": 6521 + }, + { + "epoch": 0.19149685830054614, + "grad_norm": 0.0, + "learning_rate": 1.8663105509073047e-05, + "loss": 1.4355, + "step": 6522 + }, + { + "epoch": 0.19152621997768512, + "grad_norm": 0.0, + "learning_rate": 1.866263045969991e-05, + "loss": 1.4424, + "step": 6523 + }, + { + "epoch": 0.19155558165482411, + "grad_norm": 0.0, + "learning_rate": 1.866215533198839e-05, + "loss": 1.4199, + "step": 6524 + }, + { + "epoch": 0.19158494333196313, + "grad_norm": 0.0, + "learning_rate": 1.8661680125942778e-05, + "loss": 1.5596, + "step": 6525 + }, + { + "epoch": 0.19161430500910212, + "grad_norm": 0.0, + "learning_rate": 1.8661204841567377e-05, + "loss": 1.4541, + "step": 6526 + }, + { + "epoch": 0.1916436666862411, + "grad_norm": 0.0, + "learning_rate": 1.866072947886648e-05, + "loss": 1.4673, + "step": 6527 + }, + { + "epoch": 0.19167302836338013, + "grad_norm": 0.0, + "learning_rate": 1.866025403784439e-05, + "loss": 1.4277, + "step": 6528 + }, + { + "epoch": 0.19170239004051912, + "grad_norm": 0.0, + "learning_rate": 1.86597785185054e-05, + "loss": 1.5781, + "step": 6529 + }, + { + "epoch": 0.1917317517176581, + "grad_norm": 0.0, + "learning_rate": 1.8659302920853822e-05, + "loss": 1.5059, + "step": 6530 + }, + { + "epoch": 0.19176111339479712, + "grad_norm": 0.0, + "learning_rate": 1.8658827244893944e-05, + "loss": 1.5166, + "step": 6531 + }, + { + "epoch": 0.1917904750719361, + "grad_norm": 0.0, + "learning_rate": 1.865835149063008e-05, + "loss": 1.3418, + "step": 6532 + }, + { + "epoch": 0.1918198367490751, + "grad_norm": 0.0, + "learning_rate": 1.865787565806652e-05, + "loss": 1.4375, + "step": 6533 + }, + { + "epoch": 0.19184919842621412, + "grad_norm": 0.0, + "learning_rate": 1.8657399747207576e-05, + "loss": 1.3613, + "step": 6534 + }, + { + "epoch": 0.1918785601033531, + "grad_norm": 0.0, + "learning_rate": 1.8656923758057547e-05, + "loss": 1.4541, + "step": 6535 + }, + { + "epoch": 0.1919079217804921, + "grad_norm": 0.0, + "learning_rate": 1.8656447690620743e-05, + "loss": 1.3018, + "step": 6536 + }, + { + "epoch": 0.1919372834576311, + "grad_norm": 0.0, + "learning_rate": 1.8655971544901465e-05, + "loss": 1.396, + "step": 6537 + }, + { + "epoch": 0.1919666451347701, + "grad_norm": 0.0, + "learning_rate": 1.8655495320904017e-05, + "loss": 1.3994, + "step": 6538 + }, + { + "epoch": 0.1919960068119091, + "grad_norm": 0.0, + "learning_rate": 1.865501901863271e-05, + "loss": 1.4912, + "step": 6539 + }, + { + "epoch": 0.19202536848904808, + "grad_norm": 0.0, + "learning_rate": 1.8654542638091844e-05, + "loss": 1.4971, + "step": 6540 + }, + { + "epoch": 0.1920547301661871, + "grad_norm": 0.0, + "learning_rate": 1.865406617928574e-05, + "loss": 1.3018, + "step": 6541 + }, + { + "epoch": 0.1920840918433261, + "grad_norm": 0.0, + "learning_rate": 1.8653589642218697e-05, + "loss": 1.3511, + "step": 6542 + }, + { + "epoch": 0.19211345352046508, + "grad_norm": 0.0, + "learning_rate": 1.8653113026895026e-05, + "loss": 1.4941, + "step": 6543 + }, + { + "epoch": 0.1921428151976041, + "grad_norm": 0.0, + "learning_rate": 1.8652636333319035e-05, + "loss": 1.4209, + "step": 6544 + }, + { + "epoch": 0.19217217687474308, + "grad_norm": 0.0, + "learning_rate": 1.8652159561495043e-05, + "loss": 1.334, + "step": 6545 + }, + { + "epoch": 0.19220153855188207, + "grad_norm": 0.0, + "learning_rate": 1.8651682711427354e-05, + "loss": 1.5098, + "step": 6546 + }, + { + "epoch": 0.1922309002290211, + "grad_norm": 0.0, + "learning_rate": 1.8651205783120282e-05, + "loss": 1.4688, + "step": 6547 + }, + { + "epoch": 0.19226026190616008, + "grad_norm": 0.0, + "learning_rate": 1.865072877657814e-05, + "loss": 1.5068, + "step": 6548 + }, + { + "epoch": 0.19228962358329907, + "grad_norm": 0.0, + "learning_rate": 1.865025169180524e-05, + "loss": 1.4697, + "step": 6549 + }, + { + "epoch": 0.19231898526043809, + "grad_norm": 0.0, + "learning_rate": 1.8649774528805902e-05, + "loss": 1.4346, + "step": 6550 + }, + { + "epoch": 0.19234834693757707, + "grad_norm": 0.0, + "learning_rate": 1.864929728758444e-05, + "loss": 1.4512, + "step": 6551 + }, + { + "epoch": 0.19237770861471606, + "grad_norm": 0.0, + "learning_rate": 1.8648819968145166e-05, + "loss": 1.3633, + "step": 6552 + }, + { + "epoch": 0.19240707029185508, + "grad_norm": 0.0, + "learning_rate": 1.8648342570492395e-05, + "loss": 1.3237, + "step": 6553 + }, + { + "epoch": 0.19243643196899407, + "grad_norm": 0.0, + "learning_rate": 1.864786509463045e-05, + "loss": 1.4746, + "step": 6554 + }, + { + "epoch": 0.19246579364613306, + "grad_norm": 0.0, + "learning_rate": 1.864738754056364e-05, + "loss": 1.418, + "step": 6555 + }, + { + "epoch": 0.19249515532327208, + "grad_norm": 0.0, + "learning_rate": 1.8646909908296296e-05, + "loss": 1.4727, + "step": 6556 + }, + { + "epoch": 0.19252451700041107, + "grad_norm": 0.0, + "learning_rate": 1.864643219783273e-05, + "loss": 1.3931, + "step": 6557 + }, + { + "epoch": 0.19255387867755006, + "grad_norm": 0.0, + "learning_rate": 1.8645954409177264e-05, + "loss": 1.3574, + "step": 6558 + }, + { + "epoch": 0.19258324035468907, + "grad_norm": 0.0, + "learning_rate": 1.8645476542334217e-05, + "loss": 1.3672, + "step": 6559 + }, + { + "epoch": 0.19261260203182806, + "grad_norm": 0.0, + "learning_rate": 1.8644998597307912e-05, + "loss": 1.3662, + "step": 6560 + }, + { + "epoch": 0.19264196370896705, + "grad_norm": 0.0, + "learning_rate": 1.864452057410267e-05, + "loss": 1.668, + "step": 6561 + }, + { + "epoch": 0.19267132538610607, + "grad_norm": 0.0, + "learning_rate": 1.8644042472722818e-05, + "loss": 1.5361, + "step": 6562 + }, + { + "epoch": 0.19270068706324506, + "grad_norm": 0.0, + "learning_rate": 1.8643564293172672e-05, + "loss": 1.3828, + "step": 6563 + }, + { + "epoch": 0.19273004874038405, + "grad_norm": 0.0, + "learning_rate": 1.864308603545656e-05, + "loss": 1.5293, + "step": 6564 + }, + { + "epoch": 0.19275941041752304, + "grad_norm": 0.0, + "learning_rate": 1.8642607699578808e-05, + "loss": 1.4385, + "step": 6565 + }, + { + "epoch": 0.19278877209466205, + "grad_norm": 0.0, + "learning_rate": 1.8642129285543743e-05, + "loss": 1.4824, + "step": 6566 + }, + { + "epoch": 0.19281813377180104, + "grad_norm": 0.0, + "learning_rate": 1.864165079335569e-05, + "loss": 1.5195, + "step": 6567 + }, + { + "epoch": 0.19284749544894003, + "grad_norm": 0.0, + "learning_rate": 1.8641172223018974e-05, + "loss": 1.4932, + "step": 6568 + }, + { + "epoch": 0.19287685712607905, + "grad_norm": 0.0, + "learning_rate": 1.864069357453793e-05, + "loss": 1.459, + "step": 6569 + }, + { + "epoch": 0.19290621880321804, + "grad_norm": 0.0, + "learning_rate": 1.864021484791687e-05, + "loss": 1.6279, + "step": 6570 + }, + { + "epoch": 0.19293558048035703, + "grad_norm": 0.0, + "learning_rate": 1.8639736043160143e-05, + "loss": 1.3125, + "step": 6571 + }, + { + "epoch": 0.19296494215749604, + "grad_norm": 0.0, + "learning_rate": 1.8639257160272066e-05, + "loss": 1.458, + "step": 6572 + }, + { + "epoch": 0.19299430383463503, + "grad_norm": 0.0, + "learning_rate": 1.8638778199256977e-05, + "loss": 1.5029, + "step": 6573 + }, + { + "epoch": 0.19302366551177402, + "grad_norm": 0.0, + "learning_rate": 1.8638299160119203e-05, + "loss": 1.3047, + "step": 6574 + }, + { + "epoch": 0.19305302718891304, + "grad_norm": 0.0, + "learning_rate": 1.863782004286308e-05, + "loss": 1.4619, + "step": 6575 + }, + { + "epoch": 0.19308238886605203, + "grad_norm": 0.0, + "learning_rate": 1.8637340847492935e-05, + "loss": 1.3281, + "step": 6576 + }, + { + "epoch": 0.19311175054319102, + "grad_norm": 0.0, + "learning_rate": 1.8636861574013104e-05, + "loss": 1.2861, + "step": 6577 + }, + { + "epoch": 0.19314111222033004, + "grad_norm": 0.0, + "learning_rate": 1.8636382222427922e-05, + "loss": 1.2773, + "step": 6578 + }, + { + "epoch": 0.19317047389746902, + "grad_norm": 0.0, + "learning_rate": 1.8635902792741728e-05, + "loss": 1.4609, + "step": 6579 + }, + { + "epoch": 0.19319983557460801, + "grad_norm": 0.0, + "learning_rate": 1.8635423284958845e-05, + "loss": 1.5078, + "step": 6580 + }, + { + "epoch": 0.19322919725174703, + "grad_norm": 0.0, + "learning_rate": 1.8634943699083624e-05, + "loss": 1.4658, + "step": 6581 + }, + { + "epoch": 0.19325855892888602, + "grad_norm": 0.0, + "learning_rate": 1.8634464035120396e-05, + "loss": 1.4795, + "step": 6582 + }, + { + "epoch": 0.193287920606025, + "grad_norm": 0.0, + "learning_rate": 1.86339842930735e-05, + "loss": 1.3994, + "step": 6583 + }, + { + "epoch": 0.19331728228316403, + "grad_norm": 0.0, + "learning_rate": 1.8633504472947268e-05, + "loss": 1.5938, + "step": 6584 + }, + { + "epoch": 0.19334664396030302, + "grad_norm": 0.0, + "learning_rate": 1.8633024574746046e-05, + "loss": 1.5068, + "step": 6585 + }, + { + "epoch": 0.193376005637442, + "grad_norm": 0.0, + "learning_rate": 1.863254459847417e-05, + "loss": 1.4375, + "step": 6586 + }, + { + "epoch": 0.19340536731458102, + "grad_norm": 0.0, + "learning_rate": 1.8632064544135987e-05, + "loss": 1.3203, + "step": 6587 + }, + { + "epoch": 0.19343472899172, + "grad_norm": 0.0, + "learning_rate": 1.863158441173583e-05, + "loss": 1.4277, + "step": 6588 + }, + { + "epoch": 0.193464090668859, + "grad_norm": 0.0, + "learning_rate": 1.8631104201278047e-05, + "loss": 1.457, + "step": 6589 + }, + { + "epoch": 0.193493452345998, + "grad_norm": 0.0, + "learning_rate": 1.8630623912766973e-05, + "loss": 1.3799, + "step": 6590 + }, + { + "epoch": 0.193522814023137, + "grad_norm": 0.0, + "learning_rate": 1.8630143546206964e-05, + "loss": 1.502, + "step": 6591 + }, + { + "epoch": 0.193552175700276, + "grad_norm": 0.0, + "learning_rate": 1.8629663101602352e-05, + "loss": 1.4092, + "step": 6592 + }, + { + "epoch": 0.193581537377415, + "grad_norm": 0.0, + "learning_rate": 1.8629182578957484e-05, + "loss": 1.4326, + "step": 6593 + }, + { + "epoch": 0.193610899054554, + "grad_norm": 0.0, + "learning_rate": 1.8628701978276714e-05, + "loss": 1.3184, + "step": 6594 + }, + { + "epoch": 0.193640260731693, + "grad_norm": 0.0, + "learning_rate": 1.862822129956438e-05, + "loss": 1.6035, + "step": 6595 + }, + { + "epoch": 0.19366962240883198, + "grad_norm": 0.0, + "learning_rate": 1.862774054282483e-05, + "loss": 1.4746, + "step": 6596 + }, + { + "epoch": 0.193698984085971, + "grad_norm": 0.0, + "learning_rate": 1.862725970806241e-05, + "loss": 1.4521, + "step": 6597 + }, + { + "epoch": 0.19372834576311, + "grad_norm": 0.0, + "learning_rate": 1.8626778795281476e-05, + "loss": 1.4756, + "step": 6598 + }, + { + "epoch": 0.19375770744024898, + "grad_norm": 0.0, + "learning_rate": 1.862629780448637e-05, + "loss": 1.4922, + "step": 6599 + }, + { + "epoch": 0.193787069117388, + "grad_norm": 0.0, + "learning_rate": 1.8625816735681445e-05, + "loss": 1.4883, + "step": 6600 + }, + { + "epoch": 0.19381643079452698, + "grad_norm": 0.0, + "learning_rate": 1.862533558887105e-05, + "loss": 1.3623, + "step": 6601 + }, + { + "epoch": 0.19384579247166597, + "grad_norm": 0.0, + "learning_rate": 1.8624854364059534e-05, + "loss": 1.3994, + "step": 6602 + }, + { + "epoch": 0.193875154148805, + "grad_norm": 0.0, + "learning_rate": 1.8624373061251253e-05, + "loss": 1.4785, + "step": 6603 + }, + { + "epoch": 0.19390451582594398, + "grad_norm": 0.0, + "learning_rate": 1.8623891680450557e-05, + "loss": 1.3896, + "step": 6604 + }, + { + "epoch": 0.19393387750308297, + "grad_norm": 0.0, + "learning_rate": 1.86234102216618e-05, + "loss": 1.4199, + "step": 6605 + }, + { + "epoch": 0.19396323918022199, + "grad_norm": 0.0, + "learning_rate": 1.8622928684889334e-05, + "loss": 1.46, + "step": 6606 + }, + { + "epoch": 0.19399260085736098, + "grad_norm": 0.0, + "learning_rate": 1.8622447070137518e-05, + "loss": 1.2886, + "step": 6607 + }, + { + "epoch": 0.19402196253449996, + "grad_norm": 0.0, + "learning_rate": 1.8621965377410705e-05, + "loss": 1.334, + "step": 6608 + }, + { + "epoch": 0.19405132421163898, + "grad_norm": 0.0, + "learning_rate": 1.8621483606713252e-05, + "loss": 1.4102, + "step": 6609 + }, + { + "epoch": 0.19408068588877797, + "grad_norm": 0.0, + "learning_rate": 1.862100175804951e-05, + "loss": 1.4199, + "step": 6610 + }, + { + "epoch": 0.19411004756591696, + "grad_norm": 0.0, + "learning_rate": 1.8620519831423845e-05, + "loss": 1.3691, + "step": 6611 + }, + { + "epoch": 0.19413940924305598, + "grad_norm": 0.0, + "learning_rate": 1.8620037826840613e-05, + "loss": 1.3721, + "step": 6612 + }, + { + "epoch": 0.19416877092019497, + "grad_norm": 0.0, + "learning_rate": 1.861955574430417e-05, + "loss": 1.3164, + "step": 6613 + }, + { + "epoch": 0.19419813259733396, + "grad_norm": 0.0, + "learning_rate": 1.8619073583818874e-05, + "loss": 1.4678, + "step": 6614 + }, + { + "epoch": 0.19422749427447294, + "grad_norm": 0.0, + "learning_rate": 1.861859134538909e-05, + "loss": 1.4346, + "step": 6615 + }, + { + "epoch": 0.19425685595161196, + "grad_norm": 0.0, + "learning_rate": 1.8618109029019178e-05, + "loss": 1.354, + "step": 6616 + }, + { + "epoch": 0.19428621762875095, + "grad_norm": 0.0, + "learning_rate": 1.8617626634713497e-05, + "loss": 1.5645, + "step": 6617 + }, + { + "epoch": 0.19431557930588994, + "grad_norm": 0.0, + "learning_rate": 1.8617144162476415e-05, + "loss": 1.4014, + "step": 6618 + }, + { + "epoch": 0.19434494098302896, + "grad_norm": 0.0, + "learning_rate": 1.861666161231229e-05, + "loss": 1.3945, + "step": 6619 + }, + { + "epoch": 0.19437430266016795, + "grad_norm": 0.0, + "learning_rate": 1.8616178984225484e-05, + "loss": 1.5713, + "step": 6620 + }, + { + "epoch": 0.19440366433730694, + "grad_norm": 0.0, + "learning_rate": 1.8615696278220366e-05, + "loss": 1.4678, + "step": 6621 + }, + { + "epoch": 0.19443302601444595, + "grad_norm": 0.0, + "learning_rate": 1.8615213494301302e-05, + "loss": 1.4448, + "step": 6622 + }, + { + "epoch": 0.19446238769158494, + "grad_norm": 0.0, + "learning_rate": 1.8614730632472656e-05, + "loss": 1.4531, + "step": 6623 + }, + { + "epoch": 0.19449174936872393, + "grad_norm": 0.0, + "learning_rate": 1.8614247692738795e-05, + "loss": 1.3672, + "step": 6624 + }, + { + "epoch": 0.19452111104586295, + "grad_norm": 0.0, + "learning_rate": 1.8613764675104083e-05, + "loss": 1.502, + "step": 6625 + }, + { + "epoch": 0.19455047272300194, + "grad_norm": 0.0, + "learning_rate": 1.8613281579572894e-05, + "loss": 1.4258, + "step": 6626 + }, + { + "epoch": 0.19457983440014093, + "grad_norm": 0.0, + "learning_rate": 1.861279840614959e-05, + "loss": 1.2476, + "step": 6627 + }, + { + "epoch": 0.19460919607727994, + "grad_norm": 0.0, + "learning_rate": 1.861231515483855e-05, + "loss": 1.5645, + "step": 6628 + }, + { + "epoch": 0.19463855775441893, + "grad_norm": 0.0, + "learning_rate": 1.8611831825644134e-05, + "loss": 1.5107, + "step": 6629 + }, + { + "epoch": 0.19466791943155792, + "grad_norm": 0.0, + "learning_rate": 1.861134841857072e-05, + "loss": 1.416, + "step": 6630 + }, + { + "epoch": 0.19469728110869694, + "grad_norm": 0.0, + "learning_rate": 1.8610864933622676e-05, + "loss": 1.4629, + "step": 6631 + }, + { + "epoch": 0.19472664278583593, + "grad_norm": 0.0, + "learning_rate": 1.8610381370804372e-05, + "loss": 1.5781, + "step": 6632 + }, + { + "epoch": 0.19475600446297492, + "grad_norm": 0.0, + "learning_rate": 1.8609897730120186e-05, + "loss": 1.4355, + "step": 6633 + }, + { + "epoch": 0.19478536614011394, + "grad_norm": 0.0, + "learning_rate": 1.8609414011574492e-05, + "loss": 1.2959, + "step": 6634 + }, + { + "epoch": 0.19481472781725293, + "grad_norm": 0.0, + "learning_rate": 1.860893021517166e-05, + "loss": 1.5137, + "step": 6635 + }, + { + "epoch": 0.19484408949439191, + "grad_norm": 0.0, + "learning_rate": 1.860844634091607e-05, + "loss": 1.3477, + "step": 6636 + }, + { + "epoch": 0.19487345117153093, + "grad_norm": 0.0, + "learning_rate": 1.8607962388812092e-05, + "loss": 1.3223, + "step": 6637 + }, + { + "epoch": 0.19490281284866992, + "grad_norm": 0.0, + "learning_rate": 1.8607478358864106e-05, + "loss": 1.4639, + "step": 6638 + }, + { + "epoch": 0.1949321745258089, + "grad_norm": 0.0, + "learning_rate": 1.860699425107649e-05, + "loss": 1.4805, + "step": 6639 + }, + { + "epoch": 0.1949615362029479, + "grad_norm": 0.0, + "learning_rate": 1.860651006545362e-05, + "loss": 1.4795, + "step": 6640 + }, + { + "epoch": 0.19499089788008692, + "grad_norm": 0.0, + "learning_rate": 1.8606025801999874e-05, + "loss": 1.4355, + "step": 6641 + }, + { + "epoch": 0.1950202595572259, + "grad_norm": 0.0, + "learning_rate": 1.8605541460719637e-05, + "loss": 1.458, + "step": 6642 + }, + { + "epoch": 0.1950496212343649, + "grad_norm": 0.0, + "learning_rate": 1.860505704161728e-05, + "loss": 1.5068, + "step": 6643 + }, + { + "epoch": 0.1950789829115039, + "grad_norm": 0.0, + "learning_rate": 1.860457254469719e-05, + "loss": 1.4785, + "step": 6644 + }, + { + "epoch": 0.1951083445886429, + "grad_norm": 0.0, + "learning_rate": 1.8604087969963745e-05, + "loss": 1.3311, + "step": 6645 + }, + { + "epoch": 0.1951377062657819, + "grad_norm": 0.0, + "learning_rate": 1.8603603317421332e-05, + "loss": 1.4844, + "step": 6646 + }, + { + "epoch": 0.1951670679429209, + "grad_norm": 0.0, + "learning_rate": 1.8603118587074332e-05, + "loss": 1.4541, + "step": 6647 + }, + { + "epoch": 0.1951964296200599, + "grad_norm": 0.0, + "learning_rate": 1.860263377892712e-05, + "loss": 1.5225, + "step": 6648 + }, + { + "epoch": 0.1952257912971989, + "grad_norm": 0.0, + "learning_rate": 1.8602148892984096e-05, + "loss": 1.4004, + "step": 6649 + }, + { + "epoch": 0.1952551529743379, + "grad_norm": 0.0, + "learning_rate": 1.8601663929249635e-05, + "loss": 1.5039, + "step": 6650 + }, + { + "epoch": 0.1952845146514769, + "grad_norm": 0.0, + "learning_rate": 1.860117888772812e-05, + "loss": 1.4033, + "step": 6651 + }, + { + "epoch": 0.19531387632861588, + "grad_norm": 0.0, + "learning_rate": 1.8600693768423943e-05, + "loss": 1.3394, + "step": 6652 + }, + { + "epoch": 0.1953432380057549, + "grad_norm": 0.0, + "learning_rate": 1.860020857134149e-05, + "loss": 1.4141, + "step": 6653 + }, + { + "epoch": 0.1953725996828939, + "grad_norm": 0.0, + "learning_rate": 1.8599723296485147e-05, + "loss": 1.3379, + "step": 6654 + }, + { + "epoch": 0.19540196136003288, + "grad_norm": 0.0, + "learning_rate": 1.8599237943859307e-05, + "loss": 1.5186, + "step": 6655 + }, + { + "epoch": 0.1954313230371719, + "grad_norm": 0.0, + "learning_rate": 1.8598752513468355e-05, + "loss": 1.3887, + "step": 6656 + }, + { + "epoch": 0.19546068471431088, + "grad_norm": 0.0, + "learning_rate": 1.859826700531668e-05, + "loss": 1.3164, + "step": 6657 + }, + { + "epoch": 0.19549004639144987, + "grad_norm": 0.0, + "learning_rate": 1.8597781419408677e-05, + "loss": 1.3604, + "step": 6658 + }, + { + "epoch": 0.1955194080685889, + "grad_norm": 0.0, + "learning_rate": 1.8597295755748734e-05, + "loss": 1.4316, + "step": 6659 + }, + { + "epoch": 0.19554876974572788, + "grad_norm": 0.0, + "learning_rate": 1.8596810014341242e-05, + "loss": 1.5566, + "step": 6660 + }, + { + "epoch": 0.19557813142286687, + "grad_norm": 0.0, + "learning_rate": 1.8596324195190597e-05, + "loss": 1.3721, + "step": 6661 + }, + { + "epoch": 0.19560749310000589, + "grad_norm": 0.0, + "learning_rate": 1.859583829830119e-05, + "loss": 1.4893, + "step": 6662 + }, + { + "epoch": 0.19563685477714488, + "grad_norm": 0.0, + "learning_rate": 1.8595352323677415e-05, + "loss": 1.4609, + "step": 6663 + }, + { + "epoch": 0.19566621645428386, + "grad_norm": 0.0, + "learning_rate": 1.859486627132367e-05, + "loss": 1.5156, + "step": 6664 + }, + { + "epoch": 0.19569557813142285, + "grad_norm": 0.0, + "learning_rate": 1.8594380141244345e-05, + "loss": 1.4678, + "step": 6665 + }, + { + "epoch": 0.19572493980856187, + "grad_norm": 0.0, + "learning_rate": 1.8593893933443843e-05, + "loss": 1.3477, + "step": 6666 + }, + { + "epoch": 0.19575430148570086, + "grad_norm": 0.0, + "learning_rate": 1.8593407647926557e-05, + "loss": 1.2812, + "step": 6667 + }, + { + "epoch": 0.19578366316283985, + "grad_norm": 0.0, + "learning_rate": 1.859292128469688e-05, + "loss": 1.4697, + "step": 6668 + }, + { + "epoch": 0.19581302483997887, + "grad_norm": 0.0, + "learning_rate": 1.859243484375922e-05, + "loss": 1.4502, + "step": 6669 + }, + { + "epoch": 0.19584238651711786, + "grad_norm": 0.0, + "learning_rate": 1.859194832511797e-05, + "loss": 1.4248, + "step": 6670 + }, + { + "epoch": 0.19587174819425685, + "grad_norm": 0.0, + "learning_rate": 1.8591461728777532e-05, + "loss": 1.3945, + "step": 6671 + }, + { + "epoch": 0.19590110987139586, + "grad_norm": 0.0, + "learning_rate": 1.85909750547423e-05, + "loss": 1.3291, + "step": 6672 + }, + { + "epoch": 0.19593047154853485, + "grad_norm": 0.0, + "learning_rate": 1.8590488303016684e-05, + "loss": 1.5, + "step": 6673 + }, + { + "epoch": 0.19595983322567384, + "grad_norm": 0.0, + "learning_rate": 1.8590001473605085e-05, + "loss": 1.3711, + "step": 6674 + }, + { + "epoch": 0.19598919490281286, + "grad_norm": 0.0, + "learning_rate": 1.85895145665119e-05, + "loss": 1.4512, + "step": 6675 + }, + { + "epoch": 0.19601855657995185, + "grad_norm": 0.0, + "learning_rate": 1.8589027581741535e-05, + "loss": 1.4199, + "step": 6676 + }, + { + "epoch": 0.19604791825709084, + "grad_norm": 0.0, + "learning_rate": 1.8588540519298393e-05, + "loss": 1.4404, + "step": 6677 + }, + { + "epoch": 0.19607727993422985, + "grad_norm": 0.0, + "learning_rate": 1.858805337918688e-05, + "loss": 1.4297, + "step": 6678 + }, + { + "epoch": 0.19610664161136884, + "grad_norm": 0.0, + "learning_rate": 1.85875661614114e-05, + "loss": 1.4834, + "step": 6679 + }, + { + "epoch": 0.19613600328850783, + "grad_norm": 0.0, + "learning_rate": 1.858707886597636e-05, + "loss": 1.5205, + "step": 6680 + }, + { + "epoch": 0.19616536496564685, + "grad_norm": 0.0, + "learning_rate": 1.858659149288617e-05, + "loss": 1.4756, + "step": 6681 + }, + { + "epoch": 0.19619472664278584, + "grad_norm": 0.0, + "learning_rate": 1.858610404214523e-05, + "loss": 1.5234, + "step": 6682 + }, + { + "epoch": 0.19622408831992483, + "grad_norm": 0.0, + "learning_rate": 1.8585616513757955e-05, + "loss": 1.3984, + "step": 6683 + }, + { + "epoch": 0.19625344999706384, + "grad_norm": 0.0, + "learning_rate": 1.858512890772875e-05, + "loss": 1.4639, + "step": 6684 + }, + { + "epoch": 0.19628281167420283, + "grad_norm": 0.0, + "learning_rate": 1.8584641224062028e-05, + "loss": 1.4648, + "step": 6685 + }, + { + "epoch": 0.19631217335134182, + "grad_norm": 0.0, + "learning_rate": 1.8584153462762192e-05, + "loss": 1.5742, + "step": 6686 + }, + { + "epoch": 0.19634153502848084, + "grad_norm": 0.0, + "learning_rate": 1.858366562383366e-05, + "loss": 1.6113, + "step": 6687 + }, + { + "epoch": 0.19637089670561983, + "grad_norm": 0.0, + "learning_rate": 1.858317770728084e-05, + "loss": 1.3545, + "step": 6688 + }, + { + "epoch": 0.19640025838275882, + "grad_norm": 0.0, + "learning_rate": 1.8582689713108152e-05, + "loss": 1.377, + "step": 6689 + }, + { + "epoch": 0.1964296200598978, + "grad_norm": 0.0, + "learning_rate": 1.858220164132e-05, + "loss": 1.4971, + "step": 6690 + }, + { + "epoch": 0.19645898173703683, + "grad_norm": 0.0, + "learning_rate": 1.8581713491920795e-05, + "loss": 1.5303, + "step": 6691 + }, + { + "epoch": 0.19648834341417581, + "grad_norm": 0.0, + "learning_rate": 1.858122526491496e-05, + "loss": 1.499, + "step": 6692 + }, + { + "epoch": 0.1965177050913148, + "grad_norm": 0.0, + "learning_rate": 1.8580736960306912e-05, + "loss": 1.5098, + "step": 6693 + }, + { + "epoch": 0.19654706676845382, + "grad_norm": 0.0, + "learning_rate": 1.8580248578101057e-05, + "loss": 1.5938, + "step": 6694 + }, + { + "epoch": 0.1965764284455928, + "grad_norm": 0.0, + "learning_rate": 1.8579760118301817e-05, + "loss": 1.3242, + "step": 6695 + }, + { + "epoch": 0.1966057901227318, + "grad_norm": 0.0, + "learning_rate": 1.857927158091361e-05, + "loss": 1.333, + "step": 6696 + }, + { + "epoch": 0.19663515179987082, + "grad_norm": 0.0, + "learning_rate": 1.8578782965940855e-05, + "loss": 1.4287, + "step": 6697 + }, + { + "epoch": 0.1966645134770098, + "grad_norm": 0.0, + "learning_rate": 1.8578294273387966e-05, + "loss": 1.3457, + "step": 6698 + }, + { + "epoch": 0.1966938751541488, + "grad_norm": 0.0, + "learning_rate": 1.8577805503259364e-05, + "loss": 1.4492, + "step": 6699 + }, + { + "epoch": 0.1967232368312878, + "grad_norm": 0.0, + "learning_rate": 1.8577316655559475e-05, + "loss": 1.5234, + "step": 6700 + }, + { + "epoch": 0.1967525985084268, + "grad_norm": 0.0, + "learning_rate": 1.857682773029271e-05, + "loss": 1.3467, + "step": 6701 + }, + { + "epoch": 0.1967819601855658, + "grad_norm": 0.0, + "learning_rate": 1.8576338727463497e-05, + "loss": 1.5391, + "step": 6702 + }, + { + "epoch": 0.1968113218627048, + "grad_norm": 0.0, + "learning_rate": 1.8575849647076258e-05, + "loss": 1.5117, + "step": 6703 + }, + { + "epoch": 0.1968406835398438, + "grad_norm": 0.0, + "learning_rate": 1.857536048913541e-05, + "loss": 1.3555, + "step": 6704 + }, + { + "epoch": 0.1968700452169828, + "grad_norm": 0.0, + "learning_rate": 1.8574871253645383e-05, + "loss": 1.2725, + "step": 6705 + }, + { + "epoch": 0.1968994068941218, + "grad_norm": 0.0, + "learning_rate": 1.85743819406106e-05, + "loss": 1.3828, + "step": 6706 + }, + { + "epoch": 0.1969287685712608, + "grad_norm": 0.0, + "learning_rate": 1.8573892550035488e-05, + "loss": 1.4463, + "step": 6707 + }, + { + "epoch": 0.19695813024839978, + "grad_norm": 0.0, + "learning_rate": 1.8573403081924463e-05, + "loss": 1.3721, + "step": 6708 + }, + { + "epoch": 0.1969874919255388, + "grad_norm": 0.0, + "learning_rate": 1.857291353628196e-05, + "loss": 1.4707, + "step": 6709 + }, + { + "epoch": 0.1970168536026778, + "grad_norm": 0.0, + "learning_rate": 1.8572423913112408e-05, + "loss": 1.3984, + "step": 6710 + }, + { + "epoch": 0.19704621527981678, + "grad_norm": 0.0, + "learning_rate": 1.857193421242023e-05, + "loss": 1.4258, + "step": 6711 + }, + { + "epoch": 0.1970755769569558, + "grad_norm": 0.0, + "learning_rate": 1.8571444434209855e-05, + "loss": 1.5176, + "step": 6712 + }, + { + "epoch": 0.19710493863409478, + "grad_norm": 0.0, + "learning_rate": 1.8570954578485715e-05, + "loss": 1.4082, + "step": 6713 + }, + { + "epoch": 0.19713430031123377, + "grad_norm": 0.0, + "learning_rate": 1.8570464645252233e-05, + "loss": 1.3799, + "step": 6714 + }, + { + "epoch": 0.19716366198837276, + "grad_norm": 0.0, + "learning_rate": 1.8569974634513847e-05, + "loss": 1.3438, + "step": 6715 + }, + { + "epoch": 0.19719302366551178, + "grad_norm": 0.0, + "learning_rate": 1.8569484546274985e-05, + "loss": 1.4395, + "step": 6716 + }, + { + "epoch": 0.19722238534265077, + "grad_norm": 0.0, + "learning_rate": 1.8568994380540083e-05, + "loss": 1.4502, + "step": 6717 + }, + { + "epoch": 0.19725174701978976, + "grad_norm": 0.0, + "learning_rate": 1.8568504137313566e-05, + "loss": 1.4326, + "step": 6718 + }, + { + "epoch": 0.19728110869692878, + "grad_norm": 0.0, + "learning_rate": 1.856801381659987e-05, + "loss": 1.4766, + "step": 6719 + }, + { + "epoch": 0.19731047037406776, + "grad_norm": 0.0, + "learning_rate": 1.8567523418403435e-05, + "loss": 1.4551, + "step": 6720 + }, + { + "epoch": 0.19733983205120675, + "grad_norm": 0.0, + "learning_rate": 1.8567032942728688e-05, + "loss": 1.3594, + "step": 6721 + }, + { + "epoch": 0.19736919372834577, + "grad_norm": 0.0, + "learning_rate": 1.8566542389580072e-05, + "loss": 1.4492, + "step": 6722 + }, + { + "epoch": 0.19739855540548476, + "grad_norm": 0.0, + "learning_rate": 1.8566051758962017e-05, + "loss": 1.2725, + "step": 6723 + }, + { + "epoch": 0.19742791708262375, + "grad_norm": 0.0, + "learning_rate": 1.8565561050878962e-05, + "loss": 1.4326, + "step": 6724 + }, + { + "epoch": 0.19745727875976277, + "grad_norm": 0.0, + "learning_rate": 1.856507026533535e-05, + "loss": 1.5752, + "step": 6725 + }, + { + "epoch": 0.19748664043690176, + "grad_norm": 0.0, + "learning_rate": 1.8564579402335606e-05, + "loss": 1.418, + "step": 6726 + }, + { + "epoch": 0.19751600211404075, + "grad_norm": 0.0, + "learning_rate": 1.856408846188418e-05, + "loss": 1.4248, + "step": 6727 + }, + { + "epoch": 0.19754536379117976, + "grad_norm": 0.0, + "learning_rate": 1.8563597443985507e-05, + "loss": 1.3037, + "step": 6728 + }, + { + "epoch": 0.19757472546831875, + "grad_norm": 0.0, + "learning_rate": 1.8563106348644035e-05, + "loss": 1.4844, + "step": 6729 + }, + { + "epoch": 0.19760408714545774, + "grad_norm": 0.0, + "learning_rate": 1.8562615175864194e-05, + "loss": 1.4863, + "step": 6730 + }, + { + "epoch": 0.19763344882259676, + "grad_norm": 0.0, + "learning_rate": 1.856212392565043e-05, + "loss": 1.3682, + "step": 6731 + }, + { + "epoch": 0.19766281049973575, + "grad_norm": 0.0, + "learning_rate": 1.8561632598007195e-05, + "loss": 1.3311, + "step": 6732 + }, + { + "epoch": 0.19769217217687474, + "grad_norm": 0.0, + "learning_rate": 1.8561141192938916e-05, + "loss": 1.5166, + "step": 6733 + }, + { + "epoch": 0.19772153385401375, + "grad_norm": 0.0, + "learning_rate": 1.8560649710450044e-05, + "loss": 1.5049, + "step": 6734 + }, + { + "epoch": 0.19775089553115274, + "grad_norm": 0.0, + "learning_rate": 1.856015815054503e-05, + "loss": 1.5176, + "step": 6735 + }, + { + "epoch": 0.19778025720829173, + "grad_norm": 0.0, + "learning_rate": 1.855966651322831e-05, + "loss": 1.3691, + "step": 6736 + }, + { + "epoch": 0.19780961888543075, + "grad_norm": 0.0, + "learning_rate": 1.855917479850433e-05, + "loss": 1.5967, + "step": 6737 + }, + { + "epoch": 0.19783898056256974, + "grad_norm": 0.0, + "learning_rate": 1.8558683006377547e-05, + "loss": 1.4492, + "step": 6738 + }, + { + "epoch": 0.19786834223970873, + "grad_norm": 0.0, + "learning_rate": 1.85581911368524e-05, + "loss": 1.4316, + "step": 6739 + }, + { + "epoch": 0.19789770391684772, + "grad_norm": 0.0, + "learning_rate": 1.8557699189933336e-05, + "loss": 1.2979, + "step": 6740 + }, + { + "epoch": 0.19792706559398673, + "grad_norm": 0.0, + "learning_rate": 1.855720716562481e-05, + "loss": 1.3457, + "step": 6741 + }, + { + "epoch": 0.19795642727112572, + "grad_norm": 0.0, + "learning_rate": 1.8556715063931264e-05, + "loss": 1.4863, + "step": 6742 + }, + { + "epoch": 0.1979857889482647, + "grad_norm": 0.0, + "learning_rate": 1.8556222884857153e-05, + "loss": 1.4219, + "step": 6743 + }, + { + "epoch": 0.19801515062540373, + "grad_norm": 0.0, + "learning_rate": 1.8555730628406928e-05, + "loss": 1.4326, + "step": 6744 + }, + { + "epoch": 0.19804451230254272, + "grad_norm": 0.0, + "learning_rate": 1.8555238294585044e-05, + "loss": 1.4316, + "step": 6745 + }, + { + "epoch": 0.1980738739796817, + "grad_norm": 0.0, + "learning_rate": 1.8554745883395943e-05, + "loss": 1.4385, + "step": 6746 + }, + { + "epoch": 0.19810323565682073, + "grad_norm": 0.0, + "learning_rate": 1.8554253394844087e-05, + "loss": 1.4902, + "step": 6747 + }, + { + "epoch": 0.19813259733395971, + "grad_norm": 0.0, + "learning_rate": 1.8553760828933926e-05, + "loss": 1.4951, + "step": 6748 + }, + { + "epoch": 0.1981619590110987, + "grad_norm": 0.0, + "learning_rate": 1.8553268185669917e-05, + "loss": 1.4902, + "step": 6749 + }, + { + "epoch": 0.19819132068823772, + "grad_norm": 0.0, + "learning_rate": 1.8552775465056512e-05, + "loss": 1.3301, + "step": 6750 + }, + { + "epoch": 0.1982206823653767, + "grad_norm": 0.0, + "learning_rate": 1.8552282667098168e-05, + "loss": 1.3652, + "step": 6751 + }, + { + "epoch": 0.1982500440425157, + "grad_norm": 0.0, + "learning_rate": 1.8551789791799342e-05, + "loss": 1.4736, + "step": 6752 + }, + { + "epoch": 0.19827940571965472, + "grad_norm": 0.0, + "learning_rate": 1.8551296839164488e-05, + "loss": 1.3232, + "step": 6753 + }, + { + "epoch": 0.1983087673967937, + "grad_norm": 0.0, + "learning_rate": 1.855080380919807e-05, + "loss": 1.5234, + "step": 6754 + }, + { + "epoch": 0.1983381290739327, + "grad_norm": 0.0, + "learning_rate": 1.8550310701904543e-05, + "loss": 1.4375, + "step": 6755 + }, + { + "epoch": 0.1983674907510717, + "grad_norm": 0.0, + "learning_rate": 1.8549817517288364e-05, + "loss": 1.5049, + "step": 6756 + }, + { + "epoch": 0.1983968524282107, + "grad_norm": 0.0, + "learning_rate": 1.8549324255353997e-05, + "loss": 1.5566, + "step": 6757 + }, + { + "epoch": 0.1984262141053497, + "grad_norm": 0.0, + "learning_rate": 1.8548830916105902e-05, + "loss": 1.3945, + "step": 6758 + }, + { + "epoch": 0.1984555757824887, + "grad_norm": 0.0, + "learning_rate": 1.854833749954854e-05, + "loss": 1.4658, + "step": 6759 + }, + { + "epoch": 0.1984849374596277, + "grad_norm": 0.0, + "learning_rate": 1.8547844005686372e-05, + "loss": 1.5049, + "step": 6760 + }, + { + "epoch": 0.1985142991367667, + "grad_norm": 0.0, + "learning_rate": 1.854735043452386e-05, + "loss": 1.4785, + "step": 6761 + }, + { + "epoch": 0.1985436608139057, + "grad_norm": 0.0, + "learning_rate": 1.854685678606547e-05, + "loss": 1.418, + "step": 6762 + }, + { + "epoch": 0.1985730224910447, + "grad_norm": 0.0, + "learning_rate": 1.8546363060315666e-05, + "loss": 1.5645, + "step": 6763 + }, + { + "epoch": 0.19860238416818368, + "grad_norm": 0.0, + "learning_rate": 1.8545869257278912e-05, + "loss": 1.5684, + "step": 6764 + }, + { + "epoch": 0.19863174584532267, + "grad_norm": 0.0, + "learning_rate": 1.8545375376959672e-05, + "loss": 1.3682, + "step": 6765 + }, + { + "epoch": 0.1986611075224617, + "grad_norm": 0.0, + "learning_rate": 1.8544881419362415e-05, + "loss": 1.4844, + "step": 6766 + }, + { + "epoch": 0.19869046919960068, + "grad_norm": 0.0, + "learning_rate": 1.854438738449161e-05, + "loss": 1.4395, + "step": 6767 + }, + { + "epoch": 0.19871983087673967, + "grad_norm": 0.0, + "learning_rate": 1.854389327235172e-05, + "loss": 1.4736, + "step": 6768 + }, + { + "epoch": 0.19874919255387868, + "grad_norm": 0.0, + "learning_rate": 1.8543399082947212e-05, + "loss": 1.3369, + "step": 6769 + }, + { + "epoch": 0.19877855423101767, + "grad_norm": 0.0, + "learning_rate": 1.8542904816282557e-05, + "loss": 1.3682, + "step": 6770 + }, + { + "epoch": 0.19880791590815666, + "grad_norm": 0.0, + "learning_rate": 1.8542410472362232e-05, + "loss": 1.459, + "step": 6771 + }, + { + "epoch": 0.19883727758529568, + "grad_norm": 0.0, + "learning_rate": 1.8541916051190697e-05, + "loss": 1.3516, + "step": 6772 + }, + { + "epoch": 0.19886663926243467, + "grad_norm": 0.0, + "learning_rate": 1.854142155277243e-05, + "loss": 1.5635, + "step": 6773 + }, + { + "epoch": 0.19889600093957366, + "grad_norm": 0.0, + "learning_rate": 1.85409269771119e-05, + "loss": 1.3945, + "step": 6774 + }, + { + "epoch": 0.19892536261671268, + "grad_norm": 0.0, + "learning_rate": 1.8540432324213577e-05, + "loss": 1.3623, + "step": 6775 + }, + { + "epoch": 0.19895472429385166, + "grad_norm": 0.0, + "learning_rate": 1.853993759408194e-05, + "loss": 1.4023, + "step": 6776 + }, + { + "epoch": 0.19898408597099065, + "grad_norm": 0.0, + "learning_rate": 1.853944278672146e-05, + "loss": 1.5137, + "step": 6777 + }, + { + "epoch": 0.19901344764812967, + "grad_norm": 0.0, + "learning_rate": 1.853894790213661e-05, + "loss": 1.5273, + "step": 6778 + }, + { + "epoch": 0.19904280932526866, + "grad_norm": 0.0, + "learning_rate": 1.853845294033187e-05, + "loss": 1.4727, + "step": 6779 + }, + { + "epoch": 0.19907217100240765, + "grad_norm": 0.0, + "learning_rate": 1.8537957901311713e-05, + "loss": 1.3184, + "step": 6780 + }, + { + "epoch": 0.19910153267954667, + "grad_norm": 0.0, + "learning_rate": 1.8537462785080615e-05, + "loss": 1.3252, + "step": 6781 + }, + { + "epoch": 0.19913089435668566, + "grad_norm": 0.0, + "learning_rate": 1.8536967591643053e-05, + "loss": 1.4277, + "step": 6782 + }, + { + "epoch": 0.19916025603382465, + "grad_norm": 0.0, + "learning_rate": 1.853647232100351e-05, + "loss": 1.3643, + "step": 6783 + }, + { + "epoch": 0.19918961771096366, + "grad_norm": 0.0, + "learning_rate": 1.853597697316646e-05, + "loss": 1.4014, + "step": 6784 + }, + { + "epoch": 0.19921897938810265, + "grad_norm": 0.0, + "learning_rate": 1.8535481548136386e-05, + "loss": 1.4766, + "step": 6785 + }, + { + "epoch": 0.19924834106524164, + "grad_norm": 0.0, + "learning_rate": 1.853498604591776e-05, + "loss": 1.3896, + "step": 6786 + }, + { + "epoch": 0.19927770274238066, + "grad_norm": 0.0, + "learning_rate": 1.8534490466515077e-05, + "loss": 1.3887, + "step": 6787 + }, + { + "epoch": 0.19930706441951965, + "grad_norm": 0.0, + "learning_rate": 1.853399480993281e-05, + "loss": 1.4932, + "step": 6788 + }, + { + "epoch": 0.19933642609665864, + "grad_norm": 0.0, + "learning_rate": 1.8533499076175437e-05, + "loss": 1.4336, + "step": 6789 + }, + { + "epoch": 0.19936578777379763, + "grad_norm": 0.0, + "learning_rate": 1.853300326524745e-05, + "loss": 1.3311, + "step": 6790 + }, + { + "epoch": 0.19939514945093664, + "grad_norm": 0.0, + "learning_rate": 1.8532507377153327e-05, + "loss": 1.5225, + "step": 6791 + }, + { + "epoch": 0.19942451112807563, + "grad_norm": 0.0, + "learning_rate": 1.8532011411897558e-05, + "loss": 1.4287, + "step": 6792 + }, + { + "epoch": 0.19945387280521462, + "grad_norm": 0.0, + "learning_rate": 1.8531515369484623e-05, + "loss": 1.5693, + "step": 6793 + }, + { + "epoch": 0.19948323448235364, + "grad_norm": 0.0, + "learning_rate": 1.853101924991901e-05, + "loss": 1.458, + "step": 6794 + }, + { + "epoch": 0.19951259615949263, + "grad_norm": 0.0, + "learning_rate": 1.8530523053205203e-05, + "loss": 1.4189, + "step": 6795 + }, + { + "epoch": 0.19954195783663162, + "grad_norm": 0.0, + "learning_rate": 1.8530026779347695e-05, + "loss": 1.3711, + "step": 6796 + }, + { + "epoch": 0.19957131951377063, + "grad_norm": 0.0, + "learning_rate": 1.8529530428350966e-05, + "loss": 1.4736, + "step": 6797 + }, + { + "epoch": 0.19960068119090962, + "grad_norm": 0.0, + "learning_rate": 1.852903400021951e-05, + "loss": 1.3994, + "step": 6798 + }, + { + "epoch": 0.1996300428680486, + "grad_norm": 0.0, + "learning_rate": 1.852853749495782e-05, + "loss": 1.3379, + "step": 6799 + }, + { + "epoch": 0.19965940454518763, + "grad_norm": 0.0, + "learning_rate": 1.852804091257038e-05, + "loss": 1.4746, + "step": 6800 + }, + { + "epoch": 0.19968876622232662, + "grad_norm": 0.0, + "learning_rate": 1.8527544253061676e-05, + "loss": 1.502, + "step": 6801 + }, + { + "epoch": 0.1997181278994656, + "grad_norm": 0.0, + "learning_rate": 1.852704751643621e-05, + "loss": 1.5928, + "step": 6802 + }, + { + "epoch": 0.19974748957660463, + "grad_norm": 0.0, + "learning_rate": 1.852655070269847e-05, + "loss": 1.3057, + "step": 6803 + }, + { + "epoch": 0.19977685125374361, + "grad_norm": 0.0, + "learning_rate": 1.8526053811852945e-05, + "loss": 1.3916, + "step": 6804 + }, + { + "epoch": 0.1998062129308826, + "grad_norm": 0.0, + "learning_rate": 1.8525556843904134e-05, + "loss": 1.3818, + "step": 6805 + }, + { + "epoch": 0.19983557460802162, + "grad_norm": 0.0, + "learning_rate": 1.852505979885653e-05, + "loss": 1.583, + "step": 6806 + }, + { + "epoch": 0.1998649362851606, + "grad_norm": 0.0, + "learning_rate": 1.8524562676714623e-05, + "loss": 1.5352, + "step": 6807 + }, + { + "epoch": 0.1998942979622996, + "grad_norm": 0.0, + "learning_rate": 1.8524065477482917e-05, + "loss": 1.5098, + "step": 6808 + }, + { + "epoch": 0.19992365963943862, + "grad_norm": 0.0, + "learning_rate": 1.8523568201165903e-05, + "loss": 1.3252, + "step": 6809 + }, + { + "epoch": 0.1999530213165776, + "grad_norm": 0.0, + "learning_rate": 1.8523070847768077e-05, + "loss": 1.3936, + "step": 6810 + }, + { + "epoch": 0.1999823829937166, + "grad_norm": 0.0, + "learning_rate": 1.852257341729394e-05, + "loss": 1.415, + "step": 6811 + }, + { + "epoch": 0.2000117446708556, + "grad_norm": 0.0, + "learning_rate": 1.8522075909747987e-05, + "loss": 1.4727, + "step": 6812 + }, + { + "epoch": 0.2000411063479946, + "grad_norm": 0.0, + "learning_rate": 1.852157832513472e-05, + "loss": 1.4121, + "step": 6813 + }, + { + "epoch": 0.2000704680251336, + "grad_norm": 0.0, + "learning_rate": 1.852108066345864e-05, + "loss": 1.3936, + "step": 6814 + }, + { + "epoch": 0.20009982970227258, + "grad_norm": 0.0, + "learning_rate": 1.8520582924724243e-05, + "loss": 1.3232, + "step": 6815 + }, + { + "epoch": 0.2001291913794116, + "grad_norm": 0.0, + "learning_rate": 1.852008510893603e-05, + "loss": 1.5381, + "step": 6816 + }, + { + "epoch": 0.2001585530565506, + "grad_norm": 0.0, + "learning_rate": 1.851958721609851e-05, + "loss": 1.501, + "step": 6817 + }, + { + "epoch": 0.20018791473368958, + "grad_norm": 0.0, + "learning_rate": 1.851908924621618e-05, + "loss": 1.3945, + "step": 6818 + }, + { + "epoch": 0.2002172764108286, + "grad_norm": 0.0, + "learning_rate": 1.8518591199293542e-05, + "loss": 1.5049, + "step": 6819 + }, + { + "epoch": 0.20024663808796758, + "grad_norm": 0.0, + "learning_rate": 1.8518093075335103e-05, + "loss": 1.4092, + "step": 6820 + }, + { + "epoch": 0.20027599976510657, + "grad_norm": 0.0, + "learning_rate": 1.851759487434537e-05, + "loss": 1.4863, + "step": 6821 + }, + { + "epoch": 0.2003053614422456, + "grad_norm": 0.0, + "learning_rate": 1.8517096596328842e-05, + "loss": 1.4883, + "step": 6822 + }, + { + "epoch": 0.20033472311938458, + "grad_norm": 0.0, + "learning_rate": 1.8516598241290026e-05, + "loss": 1.4961, + "step": 6823 + }, + { + "epoch": 0.20036408479652357, + "grad_norm": 0.0, + "learning_rate": 1.8516099809233436e-05, + "loss": 1.457, + "step": 6824 + }, + { + "epoch": 0.20039344647366258, + "grad_norm": 0.0, + "learning_rate": 1.851560130016357e-05, + "loss": 1.3374, + "step": 6825 + }, + { + "epoch": 0.20042280815080157, + "grad_norm": 0.0, + "learning_rate": 1.8515102714084943e-05, + "loss": 1.3506, + "step": 6826 + }, + { + "epoch": 0.20045216982794056, + "grad_norm": 0.0, + "learning_rate": 1.851460405100206e-05, + "loss": 1.5537, + "step": 6827 + }, + { + "epoch": 0.20048153150507958, + "grad_norm": 0.0, + "learning_rate": 1.8514105310919435e-05, + "loss": 1.4131, + "step": 6828 + }, + { + "epoch": 0.20051089318221857, + "grad_norm": 0.0, + "learning_rate": 1.8513606493841572e-05, + "loss": 1.4707, + "step": 6829 + }, + { + "epoch": 0.20054025485935756, + "grad_norm": 0.0, + "learning_rate": 1.8513107599772984e-05, + "loss": 1.4014, + "step": 6830 + }, + { + "epoch": 0.20056961653649658, + "grad_norm": 0.0, + "learning_rate": 1.8512608628718186e-05, + "loss": 1.4521, + "step": 6831 + }, + { + "epoch": 0.20059897821363556, + "grad_norm": 0.0, + "learning_rate": 1.851210958068169e-05, + "loss": 1.4199, + "step": 6832 + }, + { + "epoch": 0.20062833989077455, + "grad_norm": 0.0, + "learning_rate": 1.8511610455668006e-05, + "loss": 1.4355, + "step": 6833 + }, + { + "epoch": 0.20065770156791357, + "grad_norm": 0.0, + "learning_rate": 1.8511111253681648e-05, + "loss": 1.4541, + "step": 6834 + }, + { + "epoch": 0.20068706324505256, + "grad_norm": 0.0, + "learning_rate": 1.851061197472713e-05, + "loss": 1.415, + "step": 6835 + }, + { + "epoch": 0.20071642492219155, + "grad_norm": 0.0, + "learning_rate": 1.851011261880897e-05, + "loss": 1.3291, + "step": 6836 + }, + { + "epoch": 0.20074578659933057, + "grad_norm": 0.0, + "learning_rate": 1.850961318593168e-05, + "loss": 1.3896, + "step": 6837 + }, + { + "epoch": 0.20077514827646956, + "grad_norm": 0.0, + "learning_rate": 1.8509113676099782e-05, + "loss": 1.4561, + "step": 6838 + }, + { + "epoch": 0.20080450995360855, + "grad_norm": 0.0, + "learning_rate": 1.8508614089317787e-05, + "loss": 1.4473, + "step": 6839 + }, + { + "epoch": 0.20083387163074753, + "grad_norm": 0.0, + "learning_rate": 1.8508114425590215e-05, + "loss": 1.5801, + "step": 6840 + }, + { + "epoch": 0.20086323330788655, + "grad_norm": 0.0, + "learning_rate": 1.850761468492159e-05, + "loss": 1.377, + "step": 6841 + }, + { + "epoch": 0.20089259498502554, + "grad_norm": 0.0, + "learning_rate": 1.850711486731642e-05, + "loss": 1.4023, + "step": 6842 + }, + { + "epoch": 0.20092195666216453, + "grad_norm": 0.0, + "learning_rate": 1.8506614972779235e-05, + "loss": 1.584, + "step": 6843 + }, + { + "epoch": 0.20095131833930355, + "grad_norm": 0.0, + "learning_rate": 1.8506115001314554e-05, + "loss": 1.5596, + "step": 6844 + }, + { + "epoch": 0.20098068001644254, + "grad_norm": 0.0, + "learning_rate": 1.8505614952926895e-05, + "loss": 1.5479, + "step": 6845 + }, + { + "epoch": 0.20101004169358153, + "grad_norm": 0.0, + "learning_rate": 1.850511482762078e-05, + "loss": 1.4482, + "step": 6846 + }, + { + "epoch": 0.20103940337072054, + "grad_norm": 0.0, + "learning_rate": 1.8504614625400736e-05, + "loss": 1.4551, + "step": 6847 + }, + { + "epoch": 0.20106876504785953, + "grad_norm": 0.0, + "learning_rate": 1.8504114346271283e-05, + "loss": 1.4971, + "step": 6848 + }, + { + "epoch": 0.20109812672499852, + "grad_norm": 0.0, + "learning_rate": 1.8503613990236948e-05, + "loss": 1.418, + "step": 6849 + }, + { + "epoch": 0.20112748840213754, + "grad_norm": 0.0, + "learning_rate": 1.850311355730225e-05, + "loss": 1.5, + "step": 6850 + }, + { + "epoch": 0.20115685007927653, + "grad_norm": 0.0, + "learning_rate": 1.8502613047471718e-05, + "loss": 1.3955, + "step": 6851 + }, + { + "epoch": 0.20118621175641552, + "grad_norm": 0.0, + "learning_rate": 1.850211246074988e-05, + "loss": 1.4482, + "step": 6852 + }, + { + "epoch": 0.20121557343355453, + "grad_norm": 0.0, + "learning_rate": 1.8501611797141263e-05, + "loss": 1.4395, + "step": 6853 + }, + { + "epoch": 0.20124493511069352, + "grad_norm": 0.0, + "learning_rate": 1.8501111056650395e-05, + "loss": 1.3096, + "step": 6854 + }, + { + "epoch": 0.2012742967878325, + "grad_norm": 0.0, + "learning_rate": 1.8500610239281798e-05, + "loss": 1.4707, + "step": 6855 + }, + { + "epoch": 0.20130365846497153, + "grad_norm": 0.0, + "learning_rate": 1.8500109345040007e-05, + "loss": 1.3789, + "step": 6856 + }, + { + "epoch": 0.20133302014211052, + "grad_norm": 0.0, + "learning_rate": 1.8499608373929556e-05, + "loss": 1.5215, + "step": 6857 + }, + { + "epoch": 0.2013623818192495, + "grad_norm": 0.0, + "learning_rate": 1.849910732595496e-05, + "loss": 1.3906, + "step": 6858 + }, + { + "epoch": 0.20139174349638853, + "grad_norm": 0.0, + "learning_rate": 1.8498606201120766e-05, + "loss": 1.3125, + "step": 6859 + }, + { + "epoch": 0.20142110517352751, + "grad_norm": 0.0, + "learning_rate": 1.8498104999431497e-05, + "loss": 1.4238, + "step": 6860 + }, + { + "epoch": 0.2014504668506665, + "grad_norm": 0.0, + "learning_rate": 1.849760372089169e-05, + "loss": 1.4072, + "step": 6861 + }, + { + "epoch": 0.20147982852780552, + "grad_norm": 0.0, + "learning_rate": 1.8497102365505876e-05, + "loss": 1.3838, + "step": 6862 + }, + { + "epoch": 0.2015091902049445, + "grad_norm": 0.0, + "learning_rate": 1.849660093327859e-05, + "loss": 1.3149, + "step": 6863 + }, + { + "epoch": 0.2015385518820835, + "grad_norm": 0.0, + "learning_rate": 1.8496099424214362e-05, + "loss": 1.4453, + "step": 6864 + }, + { + "epoch": 0.2015679135592225, + "grad_norm": 0.0, + "learning_rate": 1.8495597838317735e-05, + "loss": 1.3262, + "step": 6865 + }, + { + "epoch": 0.2015972752363615, + "grad_norm": 0.0, + "learning_rate": 1.8495096175593238e-05, + "loss": 1.4502, + "step": 6866 + }, + { + "epoch": 0.2016266369135005, + "grad_norm": 0.0, + "learning_rate": 1.8494594436045413e-05, + "loss": 1.4375, + "step": 6867 + }, + { + "epoch": 0.20165599859063948, + "grad_norm": 0.0, + "learning_rate": 1.8494092619678795e-05, + "loss": 1.543, + "step": 6868 + }, + { + "epoch": 0.2016853602677785, + "grad_norm": 0.0, + "learning_rate": 1.8493590726497917e-05, + "loss": 1.4795, + "step": 6869 + }, + { + "epoch": 0.2017147219449175, + "grad_norm": 0.0, + "learning_rate": 1.849308875650733e-05, + "loss": 1.4326, + "step": 6870 + }, + { + "epoch": 0.20174408362205648, + "grad_norm": 0.0, + "learning_rate": 1.8492586709711564e-05, + "loss": 1.2832, + "step": 6871 + }, + { + "epoch": 0.2017734452991955, + "grad_norm": 0.0, + "learning_rate": 1.849208458611516e-05, + "loss": 1.2676, + "step": 6872 + }, + { + "epoch": 0.2018028069763345, + "grad_norm": 0.0, + "learning_rate": 1.849158238572266e-05, + "loss": 1.3945, + "step": 6873 + }, + { + "epoch": 0.20183216865347348, + "grad_norm": 0.0, + "learning_rate": 1.8491080108538607e-05, + "loss": 1.4561, + "step": 6874 + }, + { + "epoch": 0.2018615303306125, + "grad_norm": 0.0, + "learning_rate": 1.8490577754567543e-05, + "loss": 1.374, + "step": 6875 + }, + { + "epoch": 0.20189089200775148, + "grad_norm": 0.0, + "learning_rate": 1.8490075323814007e-05, + "loss": 1.5713, + "step": 6876 + }, + { + "epoch": 0.20192025368489047, + "grad_norm": 0.0, + "learning_rate": 1.8489572816282544e-05, + "loss": 1.4844, + "step": 6877 + }, + { + "epoch": 0.2019496153620295, + "grad_norm": 0.0, + "learning_rate": 1.8489070231977706e-05, + "loss": 1.4717, + "step": 6878 + }, + { + "epoch": 0.20197897703916848, + "grad_norm": 0.0, + "learning_rate": 1.848856757090403e-05, + "loss": 1.4023, + "step": 6879 + }, + { + "epoch": 0.20200833871630747, + "grad_norm": 0.0, + "learning_rate": 1.8488064833066063e-05, + "loss": 1.4434, + "step": 6880 + }, + { + "epoch": 0.20203770039344648, + "grad_norm": 0.0, + "learning_rate": 1.848756201846835e-05, + "loss": 1.3936, + "step": 6881 + }, + { + "epoch": 0.20206706207058547, + "grad_norm": 0.0, + "learning_rate": 1.848705912711544e-05, + "loss": 1.5928, + "step": 6882 + }, + { + "epoch": 0.20209642374772446, + "grad_norm": 0.0, + "learning_rate": 1.8486556159011882e-05, + "loss": 1.4277, + "step": 6883 + }, + { + "epoch": 0.20212578542486348, + "grad_norm": 0.0, + "learning_rate": 1.8486053114162224e-05, + "loss": 1.3818, + "step": 6884 + }, + { + "epoch": 0.20215514710200247, + "grad_norm": 0.0, + "learning_rate": 1.8485549992571014e-05, + "loss": 1.4658, + "step": 6885 + }, + { + "epoch": 0.20218450877914146, + "grad_norm": 0.0, + "learning_rate": 1.8485046794242797e-05, + "loss": 1.4141, + "step": 6886 + }, + { + "epoch": 0.20221387045628048, + "grad_norm": 0.0, + "learning_rate": 1.8484543519182136e-05, + "loss": 1.3906, + "step": 6887 + }, + { + "epoch": 0.20224323213341946, + "grad_norm": 0.0, + "learning_rate": 1.8484040167393572e-05, + "loss": 1.4521, + "step": 6888 + }, + { + "epoch": 0.20227259381055845, + "grad_norm": 0.0, + "learning_rate": 1.848353673888166e-05, + "loss": 1.4854, + "step": 6889 + }, + { + "epoch": 0.20230195548769744, + "grad_norm": 0.0, + "learning_rate": 1.8483033233650955e-05, + "loss": 1.5107, + "step": 6890 + }, + { + "epoch": 0.20233131716483646, + "grad_norm": 0.0, + "learning_rate": 1.8482529651706005e-05, + "loss": 1.4775, + "step": 6891 + }, + { + "epoch": 0.20236067884197545, + "grad_norm": 0.0, + "learning_rate": 1.8482025993051367e-05, + "loss": 1.46, + "step": 6892 + }, + { + "epoch": 0.20239004051911444, + "grad_norm": 0.0, + "learning_rate": 1.8481522257691596e-05, + "loss": 1.4961, + "step": 6893 + }, + { + "epoch": 0.20241940219625346, + "grad_norm": 0.0, + "learning_rate": 1.8481018445631248e-05, + "loss": 1.54, + "step": 6894 + }, + { + "epoch": 0.20244876387339245, + "grad_norm": 0.0, + "learning_rate": 1.8480514556874878e-05, + "loss": 1.498, + "step": 6895 + }, + { + "epoch": 0.20247812555053143, + "grad_norm": 0.0, + "learning_rate": 1.8480010591427044e-05, + "loss": 1.4951, + "step": 6896 + }, + { + "epoch": 0.20250748722767045, + "grad_norm": 0.0, + "learning_rate": 1.8479506549292302e-05, + "loss": 1.4922, + "step": 6897 + }, + { + "epoch": 0.20253684890480944, + "grad_norm": 0.0, + "learning_rate": 1.8479002430475204e-05, + "loss": 1.5029, + "step": 6898 + }, + { + "epoch": 0.20256621058194843, + "grad_norm": 0.0, + "learning_rate": 1.8478498234980323e-05, + "loss": 1.501, + "step": 6899 + }, + { + "epoch": 0.20259557225908745, + "grad_norm": 0.0, + "learning_rate": 1.847799396281221e-05, + "loss": 1.4512, + "step": 6900 + }, + { + "epoch": 0.20262493393622644, + "grad_norm": 0.0, + "learning_rate": 1.8477489613975427e-05, + "loss": 1.4268, + "step": 6901 + }, + { + "epoch": 0.20265429561336543, + "grad_norm": 0.0, + "learning_rate": 1.8476985188474533e-05, + "loss": 1.4707, + "step": 6902 + }, + { + "epoch": 0.20268365729050444, + "grad_norm": 0.0, + "learning_rate": 1.847648068631409e-05, + "loss": 1.5098, + "step": 6903 + }, + { + "epoch": 0.20271301896764343, + "grad_norm": 0.0, + "learning_rate": 1.8475976107498664e-05, + "loss": 1.583, + "step": 6904 + }, + { + "epoch": 0.20274238064478242, + "grad_norm": 0.0, + "learning_rate": 1.847547145203281e-05, + "loss": 1.377, + "step": 6905 + }, + { + "epoch": 0.20277174232192144, + "grad_norm": 0.0, + "learning_rate": 1.8474966719921104e-05, + "loss": 1.4404, + "step": 6906 + }, + { + "epoch": 0.20280110399906043, + "grad_norm": 0.0, + "learning_rate": 1.84744619111681e-05, + "loss": 1.5879, + "step": 6907 + }, + { + "epoch": 0.20283046567619942, + "grad_norm": 0.0, + "learning_rate": 1.8473957025778366e-05, + "loss": 1.2871, + "step": 6908 + }, + { + "epoch": 0.20285982735333843, + "grad_norm": 0.0, + "learning_rate": 1.847345206375647e-05, + "loss": 1.4971, + "step": 6909 + }, + { + "epoch": 0.20288918903047742, + "grad_norm": 0.0, + "learning_rate": 1.8472947025106974e-05, + "loss": 1.3906, + "step": 6910 + }, + { + "epoch": 0.2029185507076164, + "grad_norm": 0.0, + "learning_rate": 1.8472441909834452e-05, + "loss": 1.4434, + "step": 6911 + }, + { + "epoch": 0.20294791238475543, + "grad_norm": 0.0, + "learning_rate": 1.8471936717943463e-05, + "loss": 1.5195, + "step": 6912 + }, + { + "epoch": 0.20297727406189442, + "grad_norm": 0.0, + "learning_rate": 1.8471431449438588e-05, + "loss": 1.4443, + "step": 6913 + }, + { + "epoch": 0.2030066357390334, + "grad_norm": 0.0, + "learning_rate": 1.8470926104324383e-05, + "loss": 1.4248, + "step": 6914 + }, + { + "epoch": 0.2030359974161724, + "grad_norm": 0.0, + "learning_rate": 1.8470420682605424e-05, + "loss": 1.4434, + "step": 6915 + }, + { + "epoch": 0.20306535909331141, + "grad_norm": 0.0, + "learning_rate": 1.8469915184286284e-05, + "loss": 1.377, + "step": 6916 + }, + { + "epoch": 0.2030947207704504, + "grad_norm": 0.0, + "learning_rate": 1.8469409609371533e-05, + "loss": 1.3955, + "step": 6917 + }, + { + "epoch": 0.2031240824475894, + "grad_norm": 0.0, + "learning_rate": 1.8468903957865735e-05, + "loss": 1.3916, + "step": 6918 + }, + { + "epoch": 0.2031534441247284, + "grad_norm": 0.0, + "learning_rate": 1.8468398229773478e-05, + "loss": 1.3164, + "step": 6919 + }, + { + "epoch": 0.2031828058018674, + "grad_norm": 0.0, + "learning_rate": 1.8467892425099322e-05, + "loss": 1.4121, + "step": 6920 + }, + { + "epoch": 0.2032121674790064, + "grad_norm": 0.0, + "learning_rate": 1.8467386543847848e-05, + "loss": 1.4014, + "step": 6921 + }, + { + "epoch": 0.2032415291561454, + "grad_norm": 0.0, + "learning_rate": 1.8466880586023627e-05, + "loss": 1.377, + "step": 6922 + }, + { + "epoch": 0.2032708908332844, + "grad_norm": 0.0, + "learning_rate": 1.8466374551631236e-05, + "loss": 1.5166, + "step": 6923 + }, + { + "epoch": 0.20330025251042338, + "grad_norm": 0.0, + "learning_rate": 1.8465868440675255e-05, + "loss": 1.4707, + "step": 6924 + }, + { + "epoch": 0.2033296141875624, + "grad_norm": 0.0, + "learning_rate": 1.8465362253160256e-05, + "loss": 1.3008, + "step": 6925 + }, + { + "epoch": 0.2033589758647014, + "grad_norm": 0.0, + "learning_rate": 1.846485598909082e-05, + "loss": 1.3594, + "step": 6926 + }, + { + "epoch": 0.20338833754184038, + "grad_norm": 0.0, + "learning_rate": 1.846434964847152e-05, + "loss": 1.3105, + "step": 6927 + }, + { + "epoch": 0.2034176992189794, + "grad_norm": 0.0, + "learning_rate": 1.846384323130694e-05, + "loss": 1.4795, + "step": 6928 + }, + { + "epoch": 0.2034470608961184, + "grad_norm": 0.0, + "learning_rate": 1.8463336737601656e-05, + "loss": 1.4961, + "step": 6929 + }, + { + "epoch": 0.20347642257325738, + "grad_norm": 0.0, + "learning_rate": 1.8462830167360254e-05, + "loss": 1.4531, + "step": 6930 + }, + { + "epoch": 0.2035057842503964, + "grad_norm": 0.0, + "learning_rate": 1.8462323520587307e-05, + "loss": 1.4238, + "step": 6931 + }, + { + "epoch": 0.20353514592753538, + "grad_norm": 0.0, + "learning_rate": 1.8461816797287407e-05, + "loss": 1.3926, + "step": 6932 + }, + { + "epoch": 0.20356450760467437, + "grad_norm": 0.0, + "learning_rate": 1.8461309997465125e-05, + "loss": 1.582, + "step": 6933 + }, + { + "epoch": 0.2035938692818134, + "grad_norm": 0.0, + "learning_rate": 1.8460803121125054e-05, + "loss": 1.3896, + "step": 6934 + }, + { + "epoch": 0.20362323095895238, + "grad_norm": 0.0, + "learning_rate": 1.846029616827177e-05, + "loss": 1.4336, + "step": 6935 + }, + { + "epoch": 0.20365259263609137, + "grad_norm": 0.0, + "learning_rate": 1.845978913890986e-05, + "loss": 1.4141, + "step": 6936 + }, + { + "epoch": 0.20368195431323038, + "grad_norm": 0.0, + "learning_rate": 1.8459282033043916e-05, + "loss": 1.4551, + "step": 6937 + }, + { + "epoch": 0.20371131599036937, + "grad_norm": 0.0, + "learning_rate": 1.8458774850678515e-05, + "loss": 1.4365, + "step": 6938 + }, + { + "epoch": 0.20374067766750836, + "grad_norm": 0.0, + "learning_rate": 1.8458267591818246e-05, + "loss": 1.376, + "step": 6939 + }, + { + "epoch": 0.20377003934464738, + "grad_norm": 0.0, + "learning_rate": 1.8457760256467697e-05, + "loss": 1.5361, + "step": 6940 + }, + { + "epoch": 0.20379940102178637, + "grad_norm": 0.0, + "learning_rate": 1.845725284463146e-05, + "loss": 1.4746, + "step": 6941 + }, + { + "epoch": 0.20382876269892536, + "grad_norm": 0.0, + "learning_rate": 1.8456745356314115e-05, + "loss": 1.4785, + "step": 6942 + }, + { + "epoch": 0.20385812437606435, + "grad_norm": 0.0, + "learning_rate": 1.8456237791520254e-05, + "loss": 1.2319, + "step": 6943 + }, + { + "epoch": 0.20388748605320337, + "grad_norm": 0.0, + "learning_rate": 1.8455730150254475e-05, + "loss": 1.4473, + "step": 6944 + }, + { + "epoch": 0.20391684773034235, + "grad_norm": 0.0, + "learning_rate": 1.8455222432521358e-05, + "loss": 1.417, + "step": 6945 + }, + { + "epoch": 0.20394620940748134, + "grad_norm": 0.0, + "learning_rate": 1.8454714638325503e-05, + "loss": 1.332, + "step": 6946 + }, + { + "epoch": 0.20397557108462036, + "grad_norm": 0.0, + "learning_rate": 1.8454206767671493e-05, + "loss": 1.5283, + "step": 6947 + }, + { + "epoch": 0.20400493276175935, + "grad_norm": 0.0, + "learning_rate": 1.8453698820563934e-05, + "loss": 1.2793, + "step": 6948 + }, + { + "epoch": 0.20403429443889834, + "grad_norm": 0.0, + "learning_rate": 1.8453190797007402e-05, + "loss": 1.4121, + "step": 6949 + }, + { + "epoch": 0.20406365611603736, + "grad_norm": 0.0, + "learning_rate": 1.8452682697006507e-05, + "loss": 1.375, + "step": 6950 + }, + { + "epoch": 0.20409301779317635, + "grad_norm": 0.0, + "learning_rate": 1.8452174520565838e-05, + "loss": 1.4258, + "step": 6951 + }, + { + "epoch": 0.20412237947031533, + "grad_norm": 0.0, + "learning_rate": 1.8451666267689986e-05, + "loss": 1.3638, + "step": 6952 + }, + { + "epoch": 0.20415174114745435, + "grad_norm": 0.0, + "learning_rate": 1.8451157938383556e-05, + "loss": 1.4844, + "step": 6953 + }, + { + "epoch": 0.20418110282459334, + "grad_norm": 0.0, + "learning_rate": 1.8450649532651136e-05, + "loss": 1.5391, + "step": 6954 + }, + { + "epoch": 0.20421046450173233, + "grad_norm": 0.0, + "learning_rate": 1.845014105049733e-05, + "loss": 1.5459, + "step": 6955 + }, + { + "epoch": 0.20423982617887135, + "grad_norm": 0.0, + "learning_rate": 1.8449632491926735e-05, + "loss": 1.5098, + "step": 6956 + }, + { + "epoch": 0.20426918785601034, + "grad_norm": 0.0, + "learning_rate": 1.844912385694395e-05, + "loss": 1.4219, + "step": 6957 + }, + { + "epoch": 0.20429854953314933, + "grad_norm": 0.0, + "learning_rate": 1.8448615145553575e-05, + "loss": 1.5166, + "step": 6958 + }, + { + "epoch": 0.20432791121028834, + "grad_norm": 0.0, + "learning_rate": 1.8448106357760205e-05, + "loss": 1.4746, + "step": 6959 + }, + { + "epoch": 0.20435727288742733, + "grad_norm": 0.0, + "learning_rate": 1.844759749356845e-05, + "loss": 1.4814, + "step": 6960 + }, + { + "epoch": 0.20438663456456632, + "grad_norm": 0.0, + "learning_rate": 1.8447088552982904e-05, + "loss": 1.4434, + "step": 6961 + }, + { + "epoch": 0.20441599624170534, + "grad_norm": 0.0, + "learning_rate": 1.8446579536008175e-05, + "loss": 1.4521, + "step": 6962 + }, + { + "epoch": 0.20444535791884433, + "grad_norm": 0.0, + "learning_rate": 1.8446070442648865e-05, + "loss": 1.5107, + "step": 6963 + }, + { + "epoch": 0.20447471959598332, + "grad_norm": 0.0, + "learning_rate": 1.8445561272909572e-05, + "loss": 1.3281, + "step": 6964 + }, + { + "epoch": 0.20450408127312233, + "grad_norm": 0.0, + "learning_rate": 1.844505202679491e-05, + "loss": 1.4658, + "step": 6965 + }, + { + "epoch": 0.20453344295026132, + "grad_norm": 0.0, + "learning_rate": 1.844454270430948e-05, + "loss": 1.4609, + "step": 6966 + }, + { + "epoch": 0.2045628046274003, + "grad_norm": 0.0, + "learning_rate": 1.8444033305457884e-05, + "loss": 1.4326, + "step": 6967 + }, + { + "epoch": 0.2045921663045393, + "grad_norm": 0.0, + "learning_rate": 1.8443523830244737e-05, + "loss": 1.3018, + "step": 6968 + }, + { + "epoch": 0.20462152798167832, + "grad_norm": 0.0, + "learning_rate": 1.8443014278674637e-05, + "loss": 1.4111, + "step": 6969 + }, + { + "epoch": 0.2046508896588173, + "grad_norm": 0.0, + "learning_rate": 1.84425046507522e-05, + "loss": 1.4736, + "step": 6970 + }, + { + "epoch": 0.2046802513359563, + "grad_norm": 0.0, + "learning_rate": 1.8441994946482033e-05, + "loss": 1.3066, + "step": 6971 + }, + { + "epoch": 0.20470961301309532, + "grad_norm": 0.0, + "learning_rate": 1.8441485165868738e-05, + "loss": 1.4985, + "step": 6972 + }, + { + "epoch": 0.2047389746902343, + "grad_norm": 0.0, + "learning_rate": 1.8440975308916934e-05, + "loss": 1.3896, + "step": 6973 + }, + { + "epoch": 0.2047683363673733, + "grad_norm": 0.0, + "learning_rate": 1.8440465375631226e-05, + "loss": 1.4258, + "step": 6974 + }, + { + "epoch": 0.2047976980445123, + "grad_norm": 0.0, + "learning_rate": 1.8439955366016228e-05, + "loss": 1.4043, + "step": 6975 + }, + { + "epoch": 0.2048270597216513, + "grad_norm": 0.0, + "learning_rate": 1.8439445280076555e-05, + "loss": 1.4131, + "step": 6976 + }, + { + "epoch": 0.2048564213987903, + "grad_norm": 0.0, + "learning_rate": 1.8438935117816818e-05, + "loss": 1.5078, + "step": 6977 + }, + { + "epoch": 0.2048857830759293, + "grad_norm": 0.0, + "learning_rate": 1.8438424879241628e-05, + "loss": 1.502, + "step": 6978 + }, + { + "epoch": 0.2049151447530683, + "grad_norm": 0.0, + "learning_rate": 1.84379145643556e-05, + "loss": 1.5195, + "step": 6979 + }, + { + "epoch": 0.20494450643020728, + "grad_norm": 0.0, + "learning_rate": 1.8437404173163348e-05, + "loss": 1.498, + "step": 6980 + }, + { + "epoch": 0.2049738681073463, + "grad_norm": 0.0, + "learning_rate": 1.843689370566949e-05, + "loss": 1.2598, + "step": 6981 + }, + { + "epoch": 0.2050032297844853, + "grad_norm": 0.0, + "learning_rate": 1.8436383161878644e-05, + "loss": 1.3535, + "step": 6982 + }, + { + "epoch": 0.20503259146162428, + "grad_norm": 0.0, + "learning_rate": 1.8435872541795423e-05, + "loss": 1.4531, + "step": 6983 + }, + { + "epoch": 0.2050619531387633, + "grad_norm": 0.0, + "learning_rate": 1.8435361845424445e-05, + "loss": 1.5635, + "step": 6984 + }, + { + "epoch": 0.2050913148159023, + "grad_norm": 0.0, + "learning_rate": 1.8434851072770333e-05, + "loss": 1.4502, + "step": 6985 + }, + { + "epoch": 0.20512067649304128, + "grad_norm": 0.0, + "learning_rate": 1.84343402238377e-05, + "loss": 1.4053, + "step": 6986 + }, + { + "epoch": 0.2051500381701803, + "grad_norm": 0.0, + "learning_rate": 1.8433829298631168e-05, + "loss": 1.5127, + "step": 6987 + }, + { + "epoch": 0.20517939984731928, + "grad_norm": 0.0, + "learning_rate": 1.8433318297155358e-05, + "loss": 1.4434, + "step": 6988 + }, + { + "epoch": 0.20520876152445827, + "grad_norm": 0.0, + "learning_rate": 1.843280721941489e-05, + "loss": 1.5352, + "step": 6989 + }, + { + "epoch": 0.2052381232015973, + "grad_norm": 0.0, + "learning_rate": 1.8432296065414388e-05, + "loss": 1.5068, + "step": 6990 + }, + { + "epoch": 0.20526748487873628, + "grad_norm": 0.0, + "learning_rate": 1.8431784835158475e-05, + "loss": 1.3682, + "step": 6991 + }, + { + "epoch": 0.20529684655587527, + "grad_norm": 0.0, + "learning_rate": 1.843127352865177e-05, + "loss": 1.5498, + "step": 6992 + }, + { + "epoch": 0.20532620823301426, + "grad_norm": 0.0, + "learning_rate": 1.8430762145898897e-05, + "loss": 1.3555, + "step": 6993 + }, + { + "epoch": 0.20535556991015327, + "grad_norm": 0.0, + "learning_rate": 1.8430250686904484e-05, + "loss": 1.4116, + "step": 6994 + }, + { + "epoch": 0.20538493158729226, + "grad_norm": 0.0, + "learning_rate": 1.842973915167316e-05, + "loss": 1.415, + "step": 6995 + }, + { + "epoch": 0.20541429326443125, + "grad_norm": 0.0, + "learning_rate": 1.842922754020954e-05, + "loss": 1.5518, + "step": 6996 + }, + { + "epoch": 0.20544365494157027, + "grad_norm": 0.0, + "learning_rate": 1.842871585251826e-05, + "loss": 1.4932, + "step": 6997 + }, + { + "epoch": 0.20547301661870926, + "grad_norm": 0.0, + "learning_rate": 1.842820408860394e-05, + "loss": 1.3857, + "step": 6998 + }, + { + "epoch": 0.20550237829584825, + "grad_norm": 0.0, + "learning_rate": 1.8427692248471218e-05, + "loss": 1.3682, + "step": 6999 + }, + { + "epoch": 0.20553173997298727, + "grad_norm": 0.0, + "learning_rate": 1.842718033212471e-05, + "loss": 1.4932, + "step": 7000 + }, + { + "epoch": 0.20556110165012625, + "grad_norm": 0.0, + "learning_rate": 1.8426668339569058e-05, + "loss": 1.2935, + "step": 7001 + }, + { + "epoch": 0.20559046332726524, + "grad_norm": 0.0, + "learning_rate": 1.842615627080888e-05, + "loss": 1.4268, + "step": 7002 + }, + { + "epoch": 0.20561982500440426, + "grad_norm": 0.0, + "learning_rate": 1.8425644125848817e-05, + "loss": 1.4395, + "step": 7003 + }, + { + "epoch": 0.20564918668154325, + "grad_norm": 0.0, + "learning_rate": 1.8425131904693498e-05, + "loss": 1.3359, + "step": 7004 + }, + { + "epoch": 0.20567854835868224, + "grad_norm": 0.0, + "learning_rate": 1.8424619607347548e-05, + "loss": 1.4424, + "step": 7005 + }, + { + "epoch": 0.20570791003582126, + "grad_norm": 0.0, + "learning_rate": 1.8424107233815606e-05, + "loss": 1.3174, + "step": 7006 + }, + { + "epoch": 0.20573727171296025, + "grad_norm": 0.0, + "learning_rate": 1.842359478410231e-05, + "loss": 1.4336, + "step": 7007 + }, + { + "epoch": 0.20576663339009924, + "grad_norm": 0.0, + "learning_rate": 1.8423082258212286e-05, + "loss": 1.4736, + "step": 7008 + }, + { + "epoch": 0.20579599506723825, + "grad_norm": 0.0, + "learning_rate": 1.8422569656150173e-05, + "loss": 1.5732, + "step": 7009 + }, + { + "epoch": 0.20582535674437724, + "grad_norm": 0.0, + "learning_rate": 1.8422056977920602e-05, + "loss": 1.4189, + "step": 7010 + }, + { + "epoch": 0.20585471842151623, + "grad_norm": 0.0, + "learning_rate": 1.8421544223528217e-05, + "loss": 1.5371, + "step": 7011 + }, + { + "epoch": 0.20588408009865525, + "grad_norm": 0.0, + "learning_rate": 1.842103139297765e-05, + "loss": 1.3311, + "step": 7012 + }, + { + "epoch": 0.20591344177579424, + "grad_norm": 0.0, + "learning_rate": 1.8420518486273538e-05, + "loss": 1.291, + "step": 7013 + }, + { + "epoch": 0.20594280345293323, + "grad_norm": 0.0, + "learning_rate": 1.842000550342052e-05, + "loss": 1.4053, + "step": 7014 + }, + { + "epoch": 0.20597216513007224, + "grad_norm": 0.0, + "learning_rate": 1.841949244442324e-05, + "loss": 1.1895, + "step": 7015 + }, + { + "epoch": 0.20600152680721123, + "grad_norm": 0.0, + "learning_rate": 1.841897930928633e-05, + "loss": 1.4707, + "step": 7016 + }, + { + "epoch": 0.20603088848435022, + "grad_norm": 0.0, + "learning_rate": 1.8418466098014435e-05, + "loss": 1.4141, + "step": 7017 + }, + { + "epoch": 0.2060602501614892, + "grad_norm": 0.0, + "learning_rate": 1.8417952810612195e-05, + "loss": 1.5781, + "step": 7018 + }, + { + "epoch": 0.20608961183862823, + "grad_norm": 0.0, + "learning_rate": 1.841743944708425e-05, + "loss": 1.4365, + "step": 7019 + }, + { + "epoch": 0.20611897351576722, + "grad_norm": 0.0, + "learning_rate": 1.8416926007435246e-05, + "loss": 1.313, + "step": 7020 + }, + { + "epoch": 0.2061483351929062, + "grad_norm": 0.0, + "learning_rate": 1.8416412491669827e-05, + "loss": 1.5439, + "step": 7021 + }, + { + "epoch": 0.20617769687004522, + "grad_norm": 0.0, + "learning_rate": 1.8415898899792633e-05, + "loss": 1.4463, + "step": 7022 + }, + { + "epoch": 0.2062070585471842, + "grad_norm": 0.0, + "learning_rate": 1.841538523180831e-05, + "loss": 1.417, + "step": 7023 + }, + { + "epoch": 0.2062364202243232, + "grad_norm": 0.0, + "learning_rate": 1.8414871487721496e-05, + "loss": 1.5391, + "step": 7024 + }, + { + "epoch": 0.20626578190146222, + "grad_norm": 0.0, + "learning_rate": 1.8414357667536854e-05, + "loss": 1.5742, + "step": 7025 + }, + { + "epoch": 0.2062951435786012, + "grad_norm": 0.0, + "learning_rate": 1.8413843771259014e-05, + "loss": 1.4521, + "step": 7026 + }, + { + "epoch": 0.2063245052557402, + "grad_norm": 0.0, + "learning_rate": 1.8413329798892637e-05, + "loss": 1.4658, + "step": 7027 + }, + { + "epoch": 0.20635386693287922, + "grad_norm": 0.0, + "learning_rate": 1.841281575044236e-05, + "loss": 1.335, + "step": 7028 + }, + { + "epoch": 0.2063832286100182, + "grad_norm": 0.0, + "learning_rate": 1.841230162591283e-05, + "loss": 1.3789, + "step": 7029 + }, + { + "epoch": 0.2064125902871572, + "grad_norm": 0.0, + "learning_rate": 1.841178742530871e-05, + "loss": 1.4229, + "step": 7030 + }, + { + "epoch": 0.2064419519642962, + "grad_norm": 0.0, + "learning_rate": 1.8411273148634642e-05, + "loss": 1.4141, + "step": 7031 + }, + { + "epoch": 0.2064713136414352, + "grad_norm": 0.0, + "learning_rate": 1.8410758795895273e-05, + "loss": 1.2324, + "step": 7032 + }, + { + "epoch": 0.2065006753185742, + "grad_norm": 0.0, + "learning_rate": 1.841024436709526e-05, + "loss": 1.4688, + "step": 7033 + }, + { + "epoch": 0.2065300369957132, + "grad_norm": 0.0, + "learning_rate": 1.840972986223925e-05, + "loss": 1.583, + "step": 7034 + }, + { + "epoch": 0.2065593986728522, + "grad_norm": 0.0, + "learning_rate": 1.84092152813319e-05, + "loss": 1.4102, + "step": 7035 + }, + { + "epoch": 0.20658876034999119, + "grad_norm": 0.0, + "learning_rate": 1.8408700624377868e-05, + "loss": 1.4648, + "step": 7036 + }, + { + "epoch": 0.2066181220271302, + "grad_norm": 0.0, + "learning_rate": 1.8408185891381797e-05, + "loss": 1.373, + "step": 7037 + }, + { + "epoch": 0.2066474837042692, + "grad_norm": 0.0, + "learning_rate": 1.840767108234835e-05, + "loss": 1.4863, + "step": 7038 + }, + { + "epoch": 0.20667684538140818, + "grad_norm": 0.0, + "learning_rate": 1.8407156197282178e-05, + "loss": 1.6357, + "step": 7039 + }, + { + "epoch": 0.2067062070585472, + "grad_norm": 0.0, + "learning_rate": 1.8406641236187944e-05, + "loss": 1.3979, + "step": 7040 + }, + { + "epoch": 0.2067355687356862, + "grad_norm": 0.0, + "learning_rate": 1.8406126199070294e-05, + "loss": 1.3789, + "step": 7041 + }, + { + "epoch": 0.20676493041282518, + "grad_norm": 0.0, + "learning_rate": 1.8405611085933897e-05, + "loss": 1.5332, + "step": 7042 + }, + { + "epoch": 0.20679429208996417, + "grad_norm": 0.0, + "learning_rate": 1.8405095896783404e-05, + "loss": 1.4639, + "step": 7043 + }, + { + "epoch": 0.20682365376710318, + "grad_norm": 0.0, + "learning_rate": 1.840458063162348e-05, + "loss": 1.3613, + "step": 7044 + }, + { + "epoch": 0.20685301544424217, + "grad_norm": 0.0, + "learning_rate": 1.8404065290458778e-05, + "loss": 1.5127, + "step": 7045 + }, + { + "epoch": 0.20688237712138116, + "grad_norm": 0.0, + "learning_rate": 1.840354987329396e-05, + "loss": 1.46, + "step": 7046 + }, + { + "epoch": 0.20691173879852018, + "grad_norm": 0.0, + "learning_rate": 1.840303438013369e-05, + "loss": 1.4404, + "step": 7047 + }, + { + "epoch": 0.20694110047565917, + "grad_norm": 0.0, + "learning_rate": 1.8402518810982623e-05, + "loss": 1.3574, + "step": 7048 + }, + { + "epoch": 0.20697046215279816, + "grad_norm": 0.0, + "learning_rate": 1.8402003165845435e-05, + "loss": 1.5547, + "step": 7049 + }, + { + "epoch": 0.20699982382993717, + "grad_norm": 0.0, + "learning_rate": 1.8401487444726776e-05, + "loss": 1.3584, + "step": 7050 + }, + { + "epoch": 0.20702918550707616, + "grad_norm": 0.0, + "learning_rate": 1.8400971647631315e-05, + "loss": 1.5293, + "step": 7051 + }, + { + "epoch": 0.20705854718421515, + "grad_norm": 0.0, + "learning_rate": 1.8400455774563713e-05, + "loss": 1.3428, + "step": 7052 + }, + { + "epoch": 0.20708790886135417, + "grad_norm": 0.0, + "learning_rate": 1.8399939825528644e-05, + "loss": 1.5449, + "step": 7053 + }, + { + "epoch": 0.20711727053849316, + "grad_norm": 0.0, + "learning_rate": 1.8399423800530766e-05, + "loss": 1.5254, + "step": 7054 + }, + { + "epoch": 0.20714663221563215, + "grad_norm": 0.0, + "learning_rate": 1.8398907699574743e-05, + "loss": 1.4541, + "step": 7055 + }, + { + "epoch": 0.20717599389277117, + "grad_norm": 0.0, + "learning_rate": 1.839839152266525e-05, + "loss": 1.375, + "step": 7056 + }, + { + "epoch": 0.20720535556991015, + "grad_norm": 0.0, + "learning_rate": 1.839787526980695e-05, + "loss": 1.4131, + "step": 7057 + }, + { + "epoch": 0.20723471724704914, + "grad_norm": 0.0, + "learning_rate": 1.8397358941004517e-05, + "loss": 1.4971, + "step": 7058 + }, + { + "epoch": 0.20726407892418816, + "grad_norm": 0.0, + "learning_rate": 1.839684253626261e-05, + "loss": 1.3799, + "step": 7059 + }, + { + "epoch": 0.20729344060132715, + "grad_norm": 0.0, + "learning_rate": 1.8396326055585914e-05, + "loss": 1.3887, + "step": 7060 + }, + { + "epoch": 0.20732280227846614, + "grad_norm": 0.0, + "learning_rate": 1.8395809498979085e-05, + "loss": 1.375, + "step": 7061 + }, + { + "epoch": 0.20735216395560516, + "grad_norm": 0.0, + "learning_rate": 1.8395292866446805e-05, + "loss": 1.5, + "step": 7062 + }, + { + "epoch": 0.20738152563274415, + "grad_norm": 0.0, + "learning_rate": 1.8394776157993735e-05, + "loss": 1.5273, + "step": 7063 + }, + { + "epoch": 0.20741088730988314, + "grad_norm": 0.0, + "learning_rate": 1.839425937362456e-05, + "loss": 1.3643, + "step": 7064 + }, + { + "epoch": 0.20744024898702215, + "grad_norm": 0.0, + "learning_rate": 1.8393742513343946e-05, + "loss": 1.4961, + "step": 7065 + }, + { + "epoch": 0.20746961066416114, + "grad_norm": 0.0, + "learning_rate": 1.8393225577156565e-05, + "loss": 1.4414, + "step": 7066 + }, + { + "epoch": 0.20749897234130013, + "grad_norm": 0.0, + "learning_rate": 1.83927085650671e-05, + "loss": 1.5244, + "step": 7067 + }, + { + "epoch": 0.20752833401843912, + "grad_norm": 0.0, + "learning_rate": 1.8392191477080224e-05, + "loss": 1.3916, + "step": 7068 + }, + { + "epoch": 0.20755769569557814, + "grad_norm": 0.0, + "learning_rate": 1.8391674313200608e-05, + "loss": 1.3789, + "step": 7069 + }, + { + "epoch": 0.20758705737271713, + "grad_norm": 0.0, + "learning_rate": 1.8391157073432932e-05, + "loss": 1.417, + "step": 7070 + }, + { + "epoch": 0.20761641904985612, + "grad_norm": 0.0, + "learning_rate": 1.839063975778187e-05, + "loss": 1.4775, + "step": 7071 + }, + { + "epoch": 0.20764578072699513, + "grad_norm": 0.0, + "learning_rate": 1.839012236625211e-05, + "loss": 1.3701, + "step": 7072 + }, + { + "epoch": 0.20767514240413412, + "grad_norm": 0.0, + "learning_rate": 1.8389604898848323e-05, + "loss": 1.416, + "step": 7073 + }, + { + "epoch": 0.2077045040812731, + "grad_norm": 0.0, + "learning_rate": 1.838908735557519e-05, + "loss": 1.4639, + "step": 7074 + }, + { + "epoch": 0.20773386575841213, + "grad_norm": 0.0, + "learning_rate": 1.838856973643739e-05, + "loss": 1.502, + "step": 7075 + }, + { + "epoch": 0.20776322743555112, + "grad_norm": 0.0, + "learning_rate": 1.838805204143961e-05, + "loss": 1.5186, + "step": 7076 + }, + { + "epoch": 0.2077925891126901, + "grad_norm": 0.0, + "learning_rate": 1.838753427058652e-05, + "loss": 1.5273, + "step": 7077 + }, + { + "epoch": 0.20782195078982912, + "grad_norm": 0.0, + "learning_rate": 1.8387016423882814e-05, + "loss": 1.5547, + "step": 7078 + }, + { + "epoch": 0.2078513124669681, + "grad_norm": 0.0, + "learning_rate": 1.8386498501333172e-05, + "loss": 1.4043, + "step": 7079 + }, + { + "epoch": 0.2078806741441071, + "grad_norm": 0.0, + "learning_rate": 1.8385980502942274e-05, + "loss": 1.5566, + "step": 7080 + }, + { + "epoch": 0.20791003582124612, + "grad_norm": 0.0, + "learning_rate": 1.8385462428714808e-05, + "loss": 1.5156, + "step": 7081 + }, + { + "epoch": 0.2079393974983851, + "grad_norm": 0.0, + "learning_rate": 1.8384944278655457e-05, + "loss": 1.4834, + "step": 7082 + }, + { + "epoch": 0.2079687591755241, + "grad_norm": 0.0, + "learning_rate": 1.8384426052768907e-05, + "loss": 1.4336, + "step": 7083 + }, + { + "epoch": 0.20799812085266312, + "grad_norm": 0.0, + "learning_rate": 1.8383907751059843e-05, + "loss": 1.4365, + "step": 7084 + }, + { + "epoch": 0.2080274825298021, + "grad_norm": 0.0, + "learning_rate": 1.838338937353296e-05, + "loss": 1.4092, + "step": 7085 + }, + { + "epoch": 0.2080568442069411, + "grad_norm": 0.0, + "learning_rate": 1.8382870920192932e-05, + "loss": 1.4463, + "step": 7086 + }, + { + "epoch": 0.2080862058840801, + "grad_norm": 0.0, + "learning_rate": 1.838235239104446e-05, + "loss": 1.5146, + "step": 7087 + }, + { + "epoch": 0.2081155675612191, + "grad_norm": 0.0, + "learning_rate": 1.8381833786092228e-05, + "loss": 1.5186, + "step": 7088 + }, + { + "epoch": 0.2081449292383581, + "grad_norm": 0.0, + "learning_rate": 1.8381315105340928e-05, + "loss": 1.458, + "step": 7089 + }, + { + "epoch": 0.2081742909154971, + "grad_norm": 0.0, + "learning_rate": 1.8380796348795246e-05, + "loss": 1.4307, + "step": 7090 + }, + { + "epoch": 0.2082036525926361, + "grad_norm": 0.0, + "learning_rate": 1.8380277516459876e-05, + "loss": 1.3115, + "step": 7091 + }, + { + "epoch": 0.20823301426977509, + "grad_norm": 0.0, + "learning_rate": 1.837975860833951e-05, + "loss": 1.3975, + "step": 7092 + }, + { + "epoch": 0.20826237594691407, + "grad_norm": 0.0, + "learning_rate": 1.8379239624438843e-05, + "loss": 1.5205, + "step": 7093 + }, + { + "epoch": 0.2082917376240531, + "grad_norm": 0.0, + "learning_rate": 1.8378720564762567e-05, + "loss": 1.5059, + "step": 7094 + }, + { + "epoch": 0.20832109930119208, + "grad_norm": 0.0, + "learning_rate": 1.8378201429315375e-05, + "loss": 1.4697, + "step": 7095 + }, + { + "epoch": 0.20835046097833107, + "grad_norm": 0.0, + "learning_rate": 1.837768221810196e-05, + "loss": 1.3701, + "step": 7096 + }, + { + "epoch": 0.2083798226554701, + "grad_norm": 0.0, + "learning_rate": 1.8377162931127022e-05, + "loss": 1.4502, + "step": 7097 + }, + { + "epoch": 0.20840918433260908, + "grad_norm": 0.0, + "learning_rate": 1.837664356839525e-05, + "loss": 1.3955, + "step": 7098 + }, + { + "epoch": 0.20843854600974807, + "grad_norm": 0.0, + "learning_rate": 1.8376124129911348e-05, + "loss": 1.4355, + "step": 7099 + }, + { + "epoch": 0.20846790768688708, + "grad_norm": 0.0, + "learning_rate": 1.8375604615680008e-05, + "loss": 1.459, + "step": 7100 + }, + { + "epoch": 0.20849726936402607, + "grad_norm": 0.0, + "learning_rate": 1.837508502570593e-05, + "loss": 1.376, + "step": 7101 + }, + { + "epoch": 0.20852663104116506, + "grad_norm": 0.0, + "learning_rate": 1.8374565359993817e-05, + "loss": 1.5596, + "step": 7102 + }, + { + "epoch": 0.20855599271830408, + "grad_norm": 0.0, + "learning_rate": 1.8374045618548365e-05, + "loss": 1.4043, + "step": 7103 + }, + { + "epoch": 0.20858535439544307, + "grad_norm": 0.0, + "learning_rate": 1.8373525801374268e-05, + "loss": 1.4648, + "step": 7104 + }, + { + "epoch": 0.20861471607258206, + "grad_norm": 0.0, + "learning_rate": 1.8373005908476238e-05, + "loss": 1.292, + "step": 7105 + }, + { + "epoch": 0.20864407774972107, + "grad_norm": 0.0, + "learning_rate": 1.837248593985897e-05, + "loss": 1.4229, + "step": 7106 + }, + { + "epoch": 0.20867343942686006, + "grad_norm": 0.0, + "learning_rate": 1.8371965895527166e-05, + "loss": 1.4072, + "step": 7107 + }, + { + "epoch": 0.20870280110399905, + "grad_norm": 0.0, + "learning_rate": 1.837144577548553e-05, + "loss": 1.4229, + "step": 7108 + }, + { + "epoch": 0.20873216278113807, + "grad_norm": 0.0, + "learning_rate": 1.8370925579738767e-05, + "loss": 1.4785, + "step": 7109 + }, + { + "epoch": 0.20876152445827706, + "grad_norm": 0.0, + "learning_rate": 1.837040530829158e-05, + "loss": 1.3408, + "step": 7110 + }, + { + "epoch": 0.20879088613541605, + "grad_norm": 0.0, + "learning_rate": 1.8369884961148673e-05, + "loss": 1.6074, + "step": 7111 + }, + { + "epoch": 0.20882024781255507, + "grad_norm": 0.0, + "learning_rate": 1.8369364538314756e-05, + "loss": 1.4414, + "step": 7112 + }, + { + "epoch": 0.20884960948969405, + "grad_norm": 0.0, + "learning_rate": 1.8368844039794533e-05, + "loss": 1.3818, + "step": 7113 + }, + { + "epoch": 0.20887897116683304, + "grad_norm": 0.0, + "learning_rate": 1.8368323465592704e-05, + "loss": 1.4648, + "step": 7114 + }, + { + "epoch": 0.20890833284397206, + "grad_norm": 0.0, + "learning_rate": 1.8367802815713988e-05, + "loss": 1.3965, + "step": 7115 + }, + { + "epoch": 0.20893769452111105, + "grad_norm": 0.0, + "learning_rate": 1.8367282090163083e-05, + "loss": 1.5391, + "step": 7116 + }, + { + "epoch": 0.20896705619825004, + "grad_norm": 0.0, + "learning_rate": 1.8366761288944705e-05, + "loss": 1.4629, + "step": 7117 + }, + { + "epoch": 0.20899641787538903, + "grad_norm": 0.0, + "learning_rate": 1.8366240412063565e-05, + "loss": 1.3789, + "step": 7118 + }, + { + "epoch": 0.20902577955252805, + "grad_norm": 0.0, + "learning_rate": 1.8365719459524367e-05, + "loss": 1.4424, + "step": 7119 + }, + { + "epoch": 0.20905514122966704, + "grad_norm": 0.0, + "learning_rate": 1.836519843133183e-05, + "loss": 1.459, + "step": 7120 + }, + { + "epoch": 0.20908450290680602, + "grad_norm": 0.0, + "learning_rate": 1.8364677327490657e-05, + "loss": 1.4873, + "step": 7121 + }, + { + "epoch": 0.20911386458394504, + "grad_norm": 0.0, + "learning_rate": 1.8364156148005566e-05, + "loss": 1.4033, + "step": 7122 + }, + { + "epoch": 0.20914322626108403, + "grad_norm": 0.0, + "learning_rate": 1.8363634892881266e-05, + "loss": 1.5146, + "step": 7123 + }, + { + "epoch": 0.20917258793822302, + "grad_norm": 0.0, + "learning_rate": 1.8363113562122478e-05, + "loss": 1.4385, + "step": 7124 + }, + { + "epoch": 0.20920194961536204, + "grad_norm": 0.0, + "learning_rate": 1.836259215573391e-05, + "loss": 1.5918, + "step": 7125 + }, + { + "epoch": 0.20923131129250103, + "grad_norm": 0.0, + "learning_rate": 1.8362070673720282e-05, + "loss": 1.4082, + "step": 7126 + }, + { + "epoch": 0.20926067296964002, + "grad_norm": 0.0, + "learning_rate": 1.8361549116086304e-05, + "loss": 1.4395, + "step": 7127 + }, + { + "epoch": 0.20929003464677903, + "grad_norm": 0.0, + "learning_rate": 1.8361027482836698e-05, + "loss": 1.3438, + "step": 7128 + }, + { + "epoch": 0.20931939632391802, + "grad_norm": 0.0, + "learning_rate": 1.8360505773976174e-05, + "loss": 1.4668, + "step": 7129 + }, + { + "epoch": 0.209348758001057, + "grad_norm": 0.0, + "learning_rate": 1.8359983989509463e-05, + "loss": 1.4287, + "step": 7130 + }, + { + "epoch": 0.20937811967819603, + "grad_norm": 0.0, + "learning_rate": 1.8359462129441273e-05, + "loss": 1.4902, + "step": 7131 + }, + { + "epoch": 0.20940748135533502, + "grad_norm": 0.0, + "learning_rate": 1.835894019377632e-05, + "loss": 1.3438, + "step": 7132 + }, + { + "epoch": 0.209436843032474, + "grad_norm": 0.0, + "learning_rate": 1.8358418182519337e-05, + "loss": 1.3477, + "step": 7133 + }, + { + "epoch": 0.20946620470961302, + "grad_norm": 0.0, + "learning_rate": 1.835789609567504e-05, + "loss": 1.459, + "step": 7134 + }, + { + "epoch": 0.209495566386752, + "grad_norm": 0.0, + "learning_rate": 1.8357373933248143e-05, + "loss": 1.3799, + "step": 7135 + }, + { + "epoch": 0.209524928063891, + "grad_norm": 0.0, + "learning_rate": 1.8356851695243376e-05, + "loss": 1.4717, + "step": 7136 + }, + { + "epoch": 0.20955428974103002, + "grad_norm": 0.0, + "learning_rate": 1.8356329381665455e-05, + "loss": 1.502, + "step": 7137 + }, + { + "epoch": 0.209583651418169, + "grad_norm": 0.0, + "learning_rate": 1.835580699251911e-05, + "loss": 1.3984, + "step": 7138 + }, + { + "epoch": 0.209613013095308, + "grad_norm": 0.0, + "learning_rate": 1.8355284527809062e-05, + "loss": 1.3711, + "step": 7139 + }, + { + "epoch": 0.20964237477244702, + "grad_norm": 0.0, + "learning_rate": 1.8354761987540035e-05, + "loss": 1.4238, + "step": 7140 + }, + { + "epoch": 0.209671736449586, + "grad_norm": 0.0, + "learning_rate": 1.8354239371716758e-05, + "loss": 1.4414, + "step": 7141 + }, + { + "epoch": 0.209701098126725, + "grad_norm": 0.0, + "learning_rate": 1.8353716680343955e-05, + "loss": 1.4902, + "step": 7142 + }, + { + "epoch": 0.20973045980386398, + "grad_norm": 0.0, + "learning_rate": 1.835319391342635e-05, + "loss": 1.4492, + "step": 7143 + }, + { + "epoch": 0.209759821481003, + "grad_norm": 0.0, + "learning_rate": 1.8352671070968674e-05, + "loss": 1.4238, + "step": 7144 + }, + { + "epoch": 0.209789183158142, + "grad_norm": 0.0, + "learning_rate": 1.8352148152975656e-05, + "loss": 1.4922, + "step": 7145 + }, + { + "epoch": 0.20981854483528098, + "grad_norm": 0.0, + "learning_rate": 1.8351625159452022e-05, + "loss": 1.4609, + "step": 7146 + }, + { + "epoch": 0.20984790651242, + "grad_norm": 0.0, + "learning_rate": 1.8351102090402502e-05, + "loss": 1.3789, + "step": 7147 + }, + { + "epoch": 0.20987726818955899, + "grad_norm": 0.0, + "learning_rate": 1.835057894583183e-05, + "loss": 1.3848, + "step": 7148 + }, + { + "epoch": 0.20990662986669797, + "grad_norm": 0.0, + "learning_rate": 1.835005572574473e-05, + "loss": 1.5889, + "step": 7149 + }, + { + "epoch": 0.209935991543837, + "grad_norm": 0.0, + "learning_rate": 1.8349532430145938e-05, + "loss": 1.4834, + "step": 7150 + }, + { + "epoch": 0.20996535322097598, + "grad_norm": 0.0, + "learning_rate": 1.8349009059040187e-05, + "loss": 1.4453, + "step": 7151 + }, + { + "epoch": 0.20999471489811497, + "grad_norm": 0.0, + "learning_rate": 1.8348485612432212e-05, + "loss": 1.4229, + "step": 7152 + }, + { + "epoch": 0.210024076575254, + "grad_norm": 0.0, + "learning_rate": 1.834796209032674e-05, + "loss": 1.4502, + "step": 7153 + }, + { + "epoch": 0.21005343825239298, + "grad_norm": 0.0, + "learning_rate": 1.834743849272851e-05, + "loss": 1.4355, + "step": 7154 + }, + { + "epoch": 0.21008279992953197, + "grad_norm": 0.0, + "learning_rate": 1.8346914819642254e-05, + "loss": 1.54, + "step": 7155 + }, + { + "epoch": 0.21011216160667098, + "grad_norm": 0.0, + "learning_rate": 1.8346391071072715e-05, + "loss": 1.5059, + "step": 7156 + }, + { + "epoch": 0.21014152328380997, + "grad_norm": 0.0, + "learning_rate": 1.834586724702462e-05, + "loss": 1.5039, + "step": 7157 + }, + { + "epoch": 0.21017088496094896, + "grad_norm": 0.0, + "learning_rate": 1.834534334750271e-05, + "loss": 1.3936, + "step": 7158 + }, + { + "epoch": 0.21020024663808798, + "grad_norm": 0.0, + "learning_rate": 1.834481937251172e-05, + "loss": 1.4268, + "step": 7159 + }, + { + "epoch": 0.21022960831522697, + "grad_norm": 0.0, + "learning_rate": 1.8344295322056397e-05, + "loss": 1.4307, + "step": 7160 + }, + { + "epoch": 0.21025896999236596, + "grad_norm": 0.0, + "learning_rate": 1.834377119614147e-05, + "loss": 1.4805, + "step": 7161 + }, + { + "epoch": 0.21028833166950497, + "grad_norm": 0.0, + "learning_rate": 1.8343246994771685e-05, + "loss": 1.3477, + "step": 7162 + }, + { + "epoch": 0.21031769334664396, + "grad_norm": 0.0, + "learning_rate": 1.834272271795178e-05, + "loss": 1.3984, + "step": 7163 + }, + { + "epoch": 0.21034705502378295, + "grad_norm": 0.0, + "learning_rate": 1.83421983656865e-05, + "loss": 1.4824, + "step": 7164 + }, + { + "epoch": 0.21037641670092197, + "grad_norm": 0.0, + "learning_rate": 1.834167393798058e-05, + "loss": 1.459, + "step": 7165 + }, + { + "epoch": 0.21040577837806096, + "grad_norm": 0.0, + "learning_rate": 1.8341149434838767e-05, + "loss": 1.4707, + "step": 7166 + }, + { + "epoch": 0.21043514005519995, + "grad_norm": 0.0, + "learning_rate": 1.8340624856265804e-05, + "loss": 1.498, + "step": 7167 + }, + { + "epoch": 0.21046450173233894, + "grad_norm": 0.0, + "learning_rate": 1.8340100202266434e-05, + "loss": 1.4717, + "step": 7168 + }, + { + "epoch": 0.21049386340947795, + "grad_norm": 0.0, + "learning_rate": 1.8339575472845404e-05, + "loss": 1.4229, + "step": 7169 + }, + { + "epoch": 0.21052322508661694, + "grad_norm": 0.0, + "learning_rate": 1.8339050668007454e-05, + "loss": 1.415, + "step": 7170 + }, + { + "epoch": 0.21055258676375593, + "grad_norm": 0.0, + "learning_rate": 1.8338525787757337e-05, + "loss": 1.3906, + "step": 7171 + }, + { + "epoch": 0.21058194844089495, + "grad_norm": 0.0, + "learning_rate": 1.833800083209979e-05, + "loss": 1.3359, + "step": 7172 + }, + { + "epoch": 0.21061131011803394, + "grad_norm": 0.0, + "learning_rate": 1.833747580103957e-05, + "loss": 1.5137, + "step": 7173 + }, + { + "epoch": 0.21064067179517293, + "grad_norm": 0.0, + "learning_rate": 1.833695069458142e-05, + "loss": 1.4453, + "step": 7174 + }, + { + "epoch": 0.21067003347231195, + "grad_norm": 0.0, + "learning_rate": 1.833642551273009e-05, + "loss": 1.4482, + "step": 7175 + }, + { + "epoch": 0.21069939514945094, + "grad_norm": 0.0, + "learning_rate": 1.833590025549033e-05, + "loss": 1.3721, + "step": 7176 + }, + { + "epoch": 0.21072875682658992, + "grad_norm": 0.0, + "learning_rate": 1.8335374922866892e-05, + "loss": 1.4824, + "step": 7177 + }, + { + "epoch": 0.21075811850372894, + "grad_norm": 0.0, + "learning_rate": 1.8334849514864518e-05, + "loss": 1.3086, + "step": 7178 + }, + { + "epoch": 0.21078748018086793, + "grad_norm": 0.0, + "learning_rate": 1.8334324031487966e-05, + "loss": 1.2593, + "step": 7179 + }, + { + "epoch": 0.21081684185800692, + "grad_norm": 0.0, + "learning_rate": 1.8333798472741988e-05, + "loss": 1.3208, + "step": 7180 + }, + { + "epoch": 0.21084620353514594, + "grad_norm": 0.0, + "learning_rate": 1.833327283863134e-05, + "loss": 1.292, + "step": 7181 + }, + { + "epoch": 0.21087556521228493, + "grad_norm": 0.0, + "learning_rate": 1.8332747129160768e-05, + "loss": 1.5752, + "step": 7182 + }, + { + "epoch": 0.21090492688942392, + "grad_norm": 0.0, + "learning_rate": 1.833222134433503e-05, + "loss": 1.6084, + "step": 7183 + }, + { + "epoch": 0.21093428856656293, + "grad_norm": 0.0, + "learning_rate": 1.833169548415888e-05, + "loss": 1.4043, + "step": 7184 + }, + { + "epoch": 0.21096365024370192, + "grad_norm": 0.0, + "learning_rate": 1.8331169548637074e-05, + "loss": 1.3936, + "step": 7185 + }, + { + "epoch": 0.2109930119208409, + "grad_norm": 0.0, + "learning_rate": 1.833064353777437e-05, + "loss": 1.541, + "step": 7186 + }, + { + "epoch": 0.21102237359797993, + "grad_norm": 0.0, + "learning_rate": 1.8330117451575523e-05, + "loss": 1.4873, + "step": 7187 + }, + { + "epoch": 0.21105173527511892, + "grad_norm": 0.0, + "learning_rate": 1.8329591290045288e-05, + "loss": 1.3096, + "step": 7188 + }, + { + "epoch": 0.2110810969522579, + "grad_norm": 0.0, + "learning_rate": 1.8329065053188426e-05, + "loss": 1.3926, + "step": 7189 + }, + { + "epoch": 0.21111045862939692, + "grad_norm": 0.0, + "learning_rate": 1.8328538741009697e-05, + "loss": 1.3926, + "step": 7190 + }, + { + "epoch": 0.2111398203065359, + "grad_norm": 0.0, + "learning_rate": 1.832801235351386e-05, + "loss": 1.4521, + "step": 7191 + }, + { + "epoch": 0.2111691819836749, + "grad_norm": 0.0, + "learning_rate": 1.832748589070567e-05, + "loss": 1.4502, + "step": 7192 + }, + { + "epoch": 0.2111985436608139, + "grad_norm": 0.0, + "learning_rate": 1.8326959352589896e-05, + "loss": 1.4912, + "step": 7193 + }, + { + "epoch": 0.2112279053379529, + "grad_norm": 0.0, + "learning_rate": 1.8326432739171296e-05, + "loss": 1.332, + "step": 7194 + }, + { + "epoch": 0.2112572670150919, + "grad_norm": 0.0, + "learning_rate": 1.8325906050454632e-05, + "loss": 1.3408, + "step": 7195 + }, + { + "epoch": 0.2112866286922309, + "grad_norm": 0.0, + "learning_rate": 1.8325379286444667e-05, + "loss": 1.3789, + "step": 7196 + }, + { + "epoch": 0.2113159903693699, + "grad_norm": 0.0, + "learning_rate": 1.832485244714617e-05, + "loss": 1.4902, + "step": 7197 + }, + { + "epoch": 0.2113453520465089, + "grad_norm": 0.0, + "learning_rate": 1.832432553256389e-05, + "loss": 1.3867, + "step": 7198 + }, + { + "epoch": 0.21137471372364788, + "grad_norm": 0.0, + "learning_rate": 1.832379854270261e-05, + "loss": 1.4453, + "step": 7199 + }, + { + "epoch": 0.2114040754007869, + "grad_norm": 0.0, + "learning_rate": 1.8323271477567084e-05, + "loss": 1.4268, + "step": 7200 + }, + { + "epoch": 0.2114334370779259, + "grad_norm": 0.0, + "learning_rate": 1.8322744337162085e-05, + "loss": 1.3428, + "step": 7201 + }, + { + "epoch": 0.21146279875506488, + "grad_norm": 0.0, + "learning_rate": 1.8322217121492377e-05, + "loss": 1.5342, + "step": 7202 + }, + { + "epoch": 0.2114921604322039, + "grad_norm": 0.0, + "learning_rate": 1.8321689830562728e-05, + "loss": 1.4697, + "step": 7203 + }, + { + "epoch": 0.21152152210934289, + "grad_norm": 0.0, + "learning_rate": 1.8321162464377908e-05, + "loss": 1.3799, + "step": 7204 + }, + { + "epoch": 0.21155088378648187, + "grad_norm": 0.0, + "learning_rate": 1.8320635022942685e-05, + "loss": 1.4932, + "step": 7205 + }, + { + "epoch": 0.2115802454636209, + "grad_norm": 0.0, + "learning_rate": 1.8320107506261825e-05, + "loss": 1.4072, + "step": 7206 + }, + { + "epoch": 0.21160960714075988, + "grad_norm": 0.0, + "learning_rate": 1.8319579914340102e-05, + "loss": 1.5488, + "step": 7207 + }, + { + "epoch": 0.21163896881789887, + "grad_norm": 0.0, + "learning_rate": 1.831905224718229e-05, + "loss": 1.4961, + "step": 7208 + }, + { + "epoch": 0.2116683304950379, + "grad_norm": 0.0, + "learning_rate": 1.8318524504793157e-05, + "loss": 1.4326, + "step": 7209 + }, + { + "epoch": 0.21169769217217688, + "grad_norm": 0.0, + "learning_rate": 1.8317996687177476e-05, + "loss": 1.4609, + "step": 7210 + }, + { + "epoch": 0.21172705384931587, + "grad_norm": 0.0, + "learning_rate": 1.831746879434002e-05, + "loss": 1.7959, + "step": 7211 + }, + { + "epoch": 0.21175641552645488, + "grad_norm": 0.0, + "learning_rate": 1.8316940826285567e-05, + "loss": 1.3008, + "step": 7212 + }, + { + "epoch": 0.21178577720359387, + "grad_norm": 0.0, + "learning_rate": 1.8316412783018885e-05, + "loss": 1.4697, + "step": 7213 + }, + { + "epoch": 0.21181513888073286, + "grad_norm": 0.0, + "learning_rate": 1.8315884664544752e-05, + "loss": 1.4375, + "step": 7214 + }, + { + "epoch": 0.21184450055787188, + "grad_norm": 0.0, + "learning_rate": 1.8315356470867947e-05, + "loss": 1.4717, + "step": 7215 + }, + { + "epoch": 0.21187386223501087, + "grad_norm": 0.0, + "learning_rate": 1.831482820199324e-05, + "loss": 1.4688, + "step": 7216 + }, + { + "epoch": 0.21190322391214986, + "grad_norm": 0.0, + "learning_rate": 1.8314299857925414e-05, + "loss": 1.5586, + "step": 7217 + }, + { + "epoch": 0.21193258558928885, + "grad_norm": 0.0, + "learning_rate": 1.8313771438669247e-05, + "loss": 1.4854, + "step": 7218 + }, + { + "epoch": 0.21196194726642786, + "grad_norm": 0.0, + "learning_rate": 1.8313242944229515e-05, + "loss": 1.4316, + "step": 7219 + }, + { + "epoch": 0.21199130894356685, + "grad_norm": 0.0, + "learning_rate": 1.8312714374610997e-05, + "loss": 1.4043, + "step": 7220 + }, + { + "epoch": 0.21202067062070584, + "grad_norm": 0.0, + "learning_rate": 1.831218572981847e-05, + "loss": 1.3701, + "step": 7221 + }, + { + "epoch": 0.21205003229784486, + "grad_norm": 0.0, + "learning_rate": 1.8311657009856728e-05, + "loss": 1.3271, + "step": 7222 + }, + { + "epoch": 0.21207939397498385, + "grad_norm": 0.0, + "learning_rate": 1.8311128214730538e-05, + "loss": 1.501, + "step": 7223 + }, + { + "epoch": 0.21210875565212284, + "grad_norm": 0.0, + "learning_rate": 1.8310599344444685e-05, + "loss": 1.4844, + "step": 7224 + }, + { + "epoch": 0.21213811732926185, + "grad_norm": 0.0, + "learning_rate": 1.8310070399003957e-05, + "loss": 1.4932, + "step": 7225 + }, + { + "epoch": 0.21216747900640084, + "grad_norm": 0.0, + "learning_rate": 1.830954137841313e-05, + "loss": 1.4131, + "step": 7226 + }, + { + "epoch": 0.21219684068353983, + "grad_norm": 0.0, + "learning_rate": 1.8309012282676995e-05, + "loss": 1.4658, + "step": 7227 + }, + { + "epoch": 0.21222620236067885, + "grad_norm": 0.0, + "learning_rate": 1.8308483111800336e-05, + "loss": 1.3369, + "step": 7228 + }, + { + "epoch": 0.21225556403781784, + "grad_norm": 0.0, + "learning_rate": 1.8307953865787933e-05, + "loss": 1.6143, + "step": 7229 + }, + { + "epoch": 0.21228492571495683, + "grad_norm": 0.0, + "learning_rate": 1.8307424544644576e-05, + "loss": 1.3486, + "step": 7230 + }, + { + "epoch": 0.21231428739209585, + "grad_norm": 0.0, + "learning_rate": 1.830689514837505e-05, + "loss": 1.5869, + "step": 7231 + }, + { + "epoch": 0.21234364906923484, + "grad_norm": 0.0, + "learning_rate": 1.830636567698415e-05, + "loss": 1.4209, + "step": 7232 + }, + { + "epoch": 0.21237301074637382, + "grad_norm": 0.0, + "learning_rate": 1.8305836130476652e-05, + "loss": 1.5088, + "step": 7233 + }, + { + "epoch": 0.21240237242351284, + "grad_norm": 0.0, + "learning_rate": 1.8305306508857353e-05, + "loss": 1.5498, + "step": 7234 + }, + { + "epoch": 0.21243173410065183, + "grad_norm": 0.0, + "learning_rate": 1.830477681213104e-05, + "loss": 1.4287, + "step": 7235 + }, + { + "epoch": 0.21246109577779082, + "grad_norm": 0.0, + "learning_rate": 1.8304247040302504e-05, + "loss": 1.5674, + "step": 7236 + }, + { + "epoch": 0.21249045745492984, + "grad_norm": 0.0, + "learning_rate": 1.8303717193376533e-05, + "loss": 1.4746, + "step": 7237 + }, + { + "epoch": 0.21251981913206883, + "grad_norm": 0.0, + "learning_rate": 1.8303187271357924e-05, + "loss": 1.5137, + "step": 7238 + }, + { + "epoch": 0.21254918080920782, + "grad_norm": 0.0, + "learning_rate": 1.8302657274251463e-05, + "loss": 1.4048, + "step": 7239 + }, + { + "epoch": 0.21257854248634683, + "grad_norm": 0.0, + "learning_rate": 1.830212720206195e-05, + "loss": 1.4121, + "step": 7240 + }, + { + "epoch": 0.21260790416348582, + "grad_norm": 0.0, + "learning_rate": 1.8301597054794168e-05, + "loss": 1.5186, + "step": 7241 + }, + { + "epoch": 0.2126372658406248, + "grad_norm": 0.0, + "learning_rate": 1.8301066832452922e-05, + "loss": 1.4902, + "step": 7242 + }, + { + "epoch": 0.2126666275177638, + "grad_norm": 0.0, + "learning_rate": 1.8300536535043002e-05, + "loss": 1.4434, + "step": 7243 + }, + { + "epoch": 0.21269598919490282, + "grad_norm": 0.0, + "learning_rate": 1.8300006162569205e-05, + "loss": 1.3662, + "step": 7244 + }, + { + "epoch": 0.2127253508720418, + "grad_norm": 0.0, + "learning_rate": 1.8299475715036325e-05, + "loss": 1.4111, + "step": 7245 + }, + { + "epoch": 0.2127547125491808, + "grad_norm": 0.0, + "learning_rate": 1.829894519244916e-05, + "loss": 1.335, + "step": 7246 + }, + { + "epoch": 0.2127840742263198, + "grad_norm": 0.0, + "learning_rate": 1.829841459481251e-05, + "loss": 1.5352, + "step": 7247 + }, + { + "epoch": 0.2128134359034588, + "grad_norm": 0.0, + "learning_rate": 1.8297883922131173e-05, + "loss": 1.4658, + "step": 7248 + }, + { + "epoch": 0.2128427975805978, + "grad_norm": 0.0, + "learning_rate": 1.8297353174409945e-05, + "loss": 1.5166, + "step": 7249 + }, + { + "epoch": 0.2128721592577368, + "grad_norm": 0.0, + "learning_rate": 1.8296822351653627e-05, + "loss": 1.5811, + "step": 7250 + }, + { + "epoch": 0.2129015209348758, + "grad_norm": 0.0, + "learning_rate": 1.829629145386702e-05, + "loss": 1.5498, + "step": 7251 + }, + { + "epoch": 0.2129308826120148, + "grad_norm": 0.0, + "learning_rate": 1.8295760481054926e-05, + "loss": 1.4561, + "step": 7252 + }, + { + "epoch": 0.2129602442891538, + "grad_norm": 0.0, + "learning_rate": 1.8295229433222143e-05, + "loss": 1.4268, + "step": 7253 + }, + { + "epoch": 0.2129896059662928, + "grad_norm": 0.0, + "learning_rate": 1.8294698310373476e-05, + "loss": 1.4121, + "step": 7254 + }, + { + "epoch": 0.21301896764343178, + "grad_norm": 0.0, + "learning_rate": 1.829416711251373e-05, + "loss": 1.4033, + "step": 7255 + }, + { + "epoch": 0.2130483293205708, + "grad_norm": 0.0, + "learning_rate": 1.8293635839647704e-05, + "loss": 1.4541, + "step": 7256 + }, + { + "epoch": 0.2130776909977098, + "grad_norm": 0.0, + "learning_rate": 1.8293104491780208e-05, + "loss": 1.4424, + "step": 7257 + }, + { + "epoch": 0.21310705267484878, + "grad_norm": 0.0, + "learning_rate": 1.829257306891604e-05, + "loss": 1.4453, + "step": 7258 + }, + { + "epoch": 0.2131364143519878, + "grad_norm": 0.0, + "learning_rate": 1.829204157106001e-05, + "loss": 1.4561, + "step": 7259 + }, + { + "epoch": 0.21316577602912679, + "grad_norm": 0.0, + "learning_rate": 1.829150999821693e-05, + "loss": 1.5547, + "step": 7260 + }, + { + "epoch": 0.21319513770626577, + "grad_norm": 0.0, + "learning_rate": 1.8290978350391596e-05, + "loss": 1.3984, + "step": 7261 + }, + { + "epoch": 0.2132244993834048, + "grad_norm": 0.0, + "learning_rate": 1.8290446627588824e-05, + "loss": 1.4502, + "step": 7262 + }, + { + "epoch": 0.21325386106054378, + "grad_norm": 0.0, + "learning_rate": 1.828991482981342e-05, + "loss": 1.4863, + "step": 7263 + }, + { + "epoch": 0.21328322273768277, + "grad_norm": 0.0, + "learning_rate": 1.8289382957070193e-05, + "loss": 1.4258, + "step": 7264 + }, + { + "epoch": 0.2133125844148218, + "grad_norm": 0.0, + "learning_rate": 1.8288851009363956e-05, + "loss": 1.5029, + "step": 7265 + }, + { + "epoch": 0.21334194609196078, + "grad_norm": 0.0, + "learning_rate": 1.828831898669951e-05, + "loss": 1.5029, + "step": 7266 + }, + { + "epoch": 0.21337130776909977, + "grad_norm": 0.0, + "learning_rate": 1.828778688908168e-05, + "loss": 1.5645, + "step": 7267 + }, + { + "epoch": 0.21340066944623876, + "grad_norm": 0.0, + "learning_rate": 1.8287254716515264e-05, + "loss": 1.3779, + "step": 7268 + }, + { + "epoch": 0.21343003112337777, + "grad_norm": 0.0, + "learning_rate": 1.8286722469005083e-05, + "loss": 1.4434, + "step": 7269 + }, + { + "epoch": 0.21345939280051676, + "grad_norm": 0.0, + "learning_rate": 1.828619014655595e-05, + "loss": 1.4883, + "step": 7270 + }, + { + "epoch": 0.21348875447765575, + "grad_norm": 0.0, + "learning_rate": 1.8285657749172675e-05, + "loss": 1.3262, + "step": 7271 + }, + { + "epoch": 0.21351811615479477, + "grad_norm": 0.0, + "learning_rate": 1.828512527686008e-05, + "loss": 1.3584, + "step": 7272 + }, + { + "epoch": 0.21354747783193376, + "grad_norm": 0.0, + "learning_rate": 1.828459272962297e-05, + "loss": 1.3071, + "step": 7273 + }, + { + "epoch": 0.21357683950907275, + "grad_norm": 0.0, + "learning_rate": 1.8284060107466167e-05, + "loss": 1.4258, + "step": 7274 + }, + { + "epoch": 0.21360620118621176, + "grad_norm": 0.0, + "learning_rate": 1.828352741039449e-05, + "loss": 1.6533, + "step": 7275 + }, + { + "epoch": 0.21363556286335075, + "grad_norm": 0.0, + "learning_rate": 1.8282994638412752e-05, + "loss": 1.4365, + "step": 7276 + }, + { + "epoch": 0.21366492454048974, + "grad_norm": 0.0, + "learning_rate": 1.828246179152577e-05, + "loss": 1.3691, + "step": 7277 + }, + { + "epoch": 0.21369428621762876, + "grad_norm": 0.0, + "learning_rate": 1.828192886973837e-05, + "loss": 1.4648, + "step": 7278 + }, + { + "epoch": 0.21372364789476775, + "grad_norm": 0.0, + "learning_rate": 1.8281395873055362e-05, + "loss": 1.4473, + "step": 7279 + }, + { + "epoch": 0.21375300957190674, + "grad_norm": 0.0, + "learning_rate": 1.8280862801481568e-05, + "loss": 1.3145, + "step": 7280 + }, + { + "epoch": 0.21378237124904576, + "grad_norm": 0.0, + "learning_rate": 1.8280329655021814e-05, + "loss": 1.4482, + "step": 7281 + }, + { + "epoch": 0.21381173292618474, + "grad_norm": 0.0, + "learning_rate": 1.827979643368092e-05, + "loss": 1.4067, + "step": 7282 + }, + { + "epoch": 0.21384109460332373, + "grad_norm": 0.0, + "learning_rate": 1.82792631374637e-05, + "loss": 1.3389, + "step": 7283 + }, + { + "epoch": 0.21387045628046275, + "grad_norm": 0.0, + "learning_rate": 1.827872976637499e-05, + "loss": 1.4404, + "step": 7284 + }, + { + "epoch": 0.21389981795760174, + "grad_norm": 0.0, + "learning_rate": 1.8278196320419604e-05, + "loss": 1.3818, + "step": 7285 + }, + { + "epoch": 0.21392917963474073, + "grad_norm": 0.0, + "learning_rate": 1.827766279960237e-05, + "loss": 1.3271, + "step": 7286 + }, + { + "epoch": 0.21395854131187975, + "grad_norm": 0.0, + "learning_rate": 1.827712920392811e-05, + "loss": 1.4375, + "step": 7287 + }, + { + "epoch": 0.21398790298901874, + "grad_norm": 0.0, + "learning_rate": 1.827659553340165e-05, + "loss": 1.4043, + "step": 7288 + }, + { + "epoch": 0.21401726466615772, + "grad_norm": 0.0, + "learning_rate": 1.827606178802782e-05, + "loss": 1.4473, + "step": 7289 + }, + { + "epoch": 0.21404662634329674, + "grad_norm": 0.0, + "learning_rate": 1.8275527967811438e-05, + "loss": 1.3037, + "step": 7290 + }, + { + "epoch": 0.21407598802043573, + "grad_norm": 0.0, + "learning_rate": 1.8274994072757342e-05, + "loss": 1.3164, + "step": 7291 + }, + { + "epoch": 0.21410534969757472, + "grad_norm": 0.0, + "learning_rate": 1.8274460102870352e-05, + "loss": 1.2744, + "step": 7292 + }, + { + "epoch": 0.2141347113747137, + "grad_norm": 0.0, + "learning_rate": 1.8273926058155303e-05, + "loss": 1.5, + "step": 7293 + }, + { + "epoch": 0.21416407305185273, + "grad_norm": 0.0, + "learning_rate": 1.827339193861702e-05, + "loss": 1.4375, + "step": 7294 + }, + { + "epoch": 0.21419343472899172, + "grad_norm": 0.0, + "learning_rate": 1.8272857744260337e-05, + "loss": 1.3223, + "step": 7295 + }, + { + "epoch": 0.2142227964061307, + "grad_norm": 0.0, + "learning_rate": 1.827232347509008e-05, + "loss": 1.4326, + "step": 7296 + }, + { + "epoch": 0.21425215808326972, + "grad_norm": 0.0, + "learning_rate": 1.8271789131111085e-05, + "loss": 1.4268, + "step": 7297 + }, + { + "epoch": 0.2142815197604087, + "grad_norm": 0.0, + "learning_rate": 1.8271254712328183e-05, + "loss": 1.5088, + "step": 7298 + }, + { + "epoch": 0.2143108814375477, + "grad_norm": 0.0, + "learning_rate": 1.8270720218746204e-05, + "loss": 1.4824, + "step": 7299 + }, + { + "epoch": 0.21434024311468672, + "grad_norm": 0.0, + "learning_rate": 1.8270185650369985e-05, + "loss": 1.3062, + "step": 7300 + }, + { + "epoch": 0.2143696047918257, + "grad_norm": 0.0, + "learning_rate": 1.8269651007204362e-05, + "loss": 1.5342, + "step": 7301 + }, + { + "epoch": 0.2143989664689647, + "grad_norm": 0.0, + "learning_rate": 1.8269116289254163e-05, + "loss": 1.6934, + "step": 7302 + }, + { + "epoch": 0.2144283281461037, + "grad_norm": 0.0, + "learning_rate": 1.826858149652423e-05, + "loss": 1.4766, + "step": 7303 + }, + { + "epoch": 0.2144576898232427, + "grad_norm": 0.0, + "learning_rate": 1.8268046629019394e-05, + "loss": 1.4678, + "step": 7304 + }, + { + "epoch": 0.2144870515003817, + "grad_norm": 0.0, + "learning_rate": 1.82675116867445e-05, + "loss": 1.5117, + "step": 7305 + }, + { + "epoch": 0.2145164131775207, + "grad_norm": 0.0, + "learning_rate": 1.8266976669704375e-05, + "loss": 1.4316, + "step": 7306 + }, + { + "epoch": 0.2145457748546597, + "grad_norm": 0.0, + "learning_rate": 1.8266441577903865e-05, + "loss": 1.417, + "step": 7307 + }, + { + "epoch": 0.2145751365317987, + "grad_norm": 0.0, + "learning_rate": 1.8265906411347806e-05, + "loss": 1.5156, + "step": 7308 + }, + { + "epoch": 0.2146044982089377, + "grad_norm": 0.0, + "learning_rate": 1.826537117004104e-05, + "loss": 1.5537, + "step": 7309 + }, + { + "epoch": 0.2146338598860767, + "grad_norm": 0.0, + "learning_rate": 1.8264835853988407e-05, + "loss": 1.4395, + "step": 7310 + }, + { + "epoch": 0.21466322156321568, + "grad_norm": 0.0, + "learning_rate": 1.8264300463194743e-05, + "loss": 1.5283, + "step": 7311 + }, + { + "epoch": 0.2146925832403547, + "grad_norm": 0.0, + "learning_rate": 1.8263764997664893e-05, + "loss": 1.4277, + "step": 7312 + }, + { + "epoch": 0.2147219449174937, + "grad_norm": 0.0, + "learning_rate": 1.8263229457403705e-05, + "loss": 1.4834, + "step": 7313 + }, + { + "epoch": 0.21475130659463268, + "grad_norm": 0.0, + "learning_rate": 1.8262693842416013e-05, + "loss": 1.3945, + "step": 7314 + }, + { + "epoch": 0.2147806682717717, + "grad_norm": 0.0, + "learning_rate": 1.8262158152706665e-05, + "loss": 1.3672, + "step": 7315 + }, + { + "epoch": 0.21481002994891069, + "grad_norm": 0.0, + "learning_rate": 1.8261622388280504e-05, + "loss": 1.2842, + "step": 7316 + }, + { + "epoch": 0.21483939162604967, + "grad_norm": 0.0, + "learning_rate": 1.826108654914238e-05, + "loss": 1.4971, + "step": 7317 + }, + { + "epoch": 0.21486875330318866, + "grad_norm": 0.0, + "learning_rate": 1.826055063529713e-05, + "loss": 1.376, + "step": 7318 + }, + { + "epoch": 0.21489811498032768, + "grad_norm": 0.0, + "learning_rate": 1.8260014646749604e-05, + "loss": 1.4258, + "step": 7319 + }, + { + "epoch": 0.21492747665746667, + "grad_norm": 0.0, + "learning_rate": 1.8259478583504653e-05, + "loss": 1.4766, + "step": 7320 + }, + { + "epoch": 0.21495683833460566, + "grad_norm": 0.0, + "learning_rate": 1.8258942445567124e-05, + "loss": 1.3916, + "step": 7321 + }, + { + "epoch": 0.21498620001174468, + "grad_norm": 0.0, + "learning_rate": 1.8258406232941858e-05, + "loss": 1.5, + "step": 7322 + }, + { + "epoch": 0.21501556168888367, + "grad_norm": 0.0, + "learning_rate": 1.8257869945633713e-05, + "loss": 1.5039, + "step": 7323 + }, + { + "epoch": 0.21504492336602266, + "grad_norm": 0.0, + "learning_rate": 1.8257333583647533e-05, + "loss": 1.54, + "step": 7324 + }, + { + "epoch": 0.21507428504316167, + "grad_norm": 0.0, + "learning_rate": 1.825679714698817e-05, + "loss": 1.5361, + "step": 7325 + }, + { + "epoch": 0.21510364672030066, + "grad_norm": 0.0, + "learning_rate": 1.8256260635660484e-05, + "loss": 1.4775, + "step": 7326 + }, + { + "epoch": 0.21513300839743965, + "grad_norm": 0.0, + "learning_rate": 1.825572404966931e-05, + "loss": 1.4023, + "step": 7327 + }, + { + "epoch": 0.21516237007457867, + "grad_norm": 0.0, + "learning_rate": 1.825518738901951e-05, + "loss": 1.4463, + "step": 7328 + }, + { + "epoch": 0.21519173175171766, + "grad_norm": 0.0, + "learning_rate": 1.825465065371594e-05, + "loss": 1.5977, + "step": 7329 + }, + { + "epoch": 0.21522109342885665, + "grad_norm": 0.0, + "learning_rate": 1.8254113843763445e-05, + "loss": 1.4419, + "step": 7330 + }, + { + "epoch": 0.21525045510599566, + "grad_norm": 0.0, + "learning_rate": 1.825357695916689e-05, + "loss": 1.4502, + "step": 7331 + }, + { + "epoch": 0.21527981678313465, + "grad_norm": 0.0, + "learning_rate": 1.8253039999931125e-05, + "loss": 1.374, + "step": 7332 + }, + { + "epoch": 0.21530917846027364, + "grad_norm": 0.0, + "learning_rate": 1.8252502966061e-05, + "loss": 1.4482, + "step": 7333 + }, + { + "epoch": 0.21533854013741266, + "grad_norm": 0.0, + "learning_rate": 1.825196585756138e-05, + "loss": 1.5693, + "step": 7334 + }, + { + "epoch": 0.21536790181455165, + "grad_norm": 0.0, + "learning_rate": 1.8251428674437122e-05, + "loss": 1.4365, + "step": 7335 + }, + { + "epoch": 0.21539726349169064, + "grad_norm": 0.0, + "learning_rate": 1.8250891416693078e-05, + "loss": 1.3926, + "step": 7336 + }, + { + "epoch": 0.21542662516882966, + "grad_norm": 0.0, + "learning_rate": 1.825035408433411e-05, + "loss": 1.3965, + "step": 7337 + }, + { + "epoch": 0.21545598684596864, + "grad_norm": 0.0, + "learning_rate": 1.824981667736508e-05, + "loss": 1.5674, + "step": 7338 + }, + { + "epoch": 0.21548534852310763, + "grad_norm": 0.0, + "learning_rate": 1.824927919579084e-05, + "loss": 1.4482, + "step": 7339 + }, + { + "epoch": 0.21551471020024665, + "grad_norm": 0.0, + "learning_rate": 1.8248741639616258e-05, + "loss": 1.5146, + "step": 7340 + }, + { + "epoch": 0.21554407187738564, + "grad_norm": 0.0, + "learning_rate": 1.8248204008846192e-05, + "loss": 1.3174, + "step": 7341 + }, + { + "epoch": 0.21557343355452463, + "grad_norm": 0.0, + "learning_rate": 1.8247666303485507e-05, + "loss": 1.4609, + "step": 7342 + }, + { + "epoch": 0.21560279523166365, + "grad_norm": 0.0, + "learning_rate": 1.824712852353906e-05, + "loss": 1.3408, + "step": 7343 + }, + { + "epoch": 0.21563215690880264, + "grad_norm": 0.0, + "learning_rate": 1.8246590669011723e-05, + "loss": 1.3135, + "step": 7344 + }, + { + "epoch": 0.21566151858594163, + "grad_norm": 0.0, + "learning_rate": 1.824605273990835e-05, + "loss": 1.4287, + "step": 7345 + }, + { + "epoch": 0.21569088026308061, + "grad_norm": 0.0, + "learning_rate": 1.8245514736233813e-05, + "loss": 1.5791, + "step": 7346 + }, + { + "epoch": 0.21572024194021963, + "grad_norm": 0.0, + "learning_rate": 1.824497665799297e-05, + "loss": 1.4688, + "step": 7347 + }, + { + "epoch": 0.21574960361735862, + "grad_norm": 0.0, + "learning_rate": 1.8244438505190695e-05, + "loss": 1.4531, + "step": 7348 + }, + { + "epoch": 0.2157789652944976, + "grad_norm": 0.0, + "learning_rate": 1.8243900277831855e-05, + "loss": 1.4541, + "step": 7349 + }, + { + "epoch": 0.21580832697163663, + "grad_norm": 0.0, + "learning_rate": 1.8243361975921308e-05, + "loss": 1.4346, + "step": 7350 + }, + { + "epoch": 0.21583768864877562, + "grad_norm": 0.0, + "learning_rate": 1.824282359946393e-05, + "loss": 1.4463, + "step": 7351 + }, + { + "epoch": 0.2158670503259146, + "grad_norm": 0.0, + "learning_rate": 1.8242285148464586e-05, + "loss": 1.5, + "step": 7352 + }, + { + "epoch": 0.21589641200305362, + "grad_norm": 0.0, + "learning_rate": 1.8241746622928146e-05, + "loss": 1.4307, + "step": 7353 + }, + { + "epoch": 0.2159257736801926, + "grad_norm": 0.0, + "learning_rate": 1.824120802285948e-05, + "loss": 1.3047, + "step": 7354 + }, + { + "epoch": 0.2159551353573316, + "grad_norm": 0.0, + "learning_rate": 1.824066934826346e-05, + "loss": 1.3926, + "step": 7355 + }, + { + "epoch": 0.21598449703447062, + "grad_norm": 0.0, + "learning_rate": 1.8240130599144958e-05, + "loss": 1.3789, + "step": 7356 + }, + { + "epoch": 0.2160138587116096, + "grad_norm": 0.0, + "learning_rate": 1.8239591775508844e-05, + "loss": 1.3457, + "step": 7357 + }, + { + "epoch": 0.2160432203887486, + "grad_norm": 0.0, + "learning_rate": 1.823905287735999e-05, + "loss": 1.4229, + "step": 7358 + }, + { + "epoch": 0.21607258206588761, + "grad_norm": 0.0, + "learning_rate": 1.823851390470327e-05, + "loss": 1.373, + "step": 7359 + }, + { + "epoch": 0.2161019437430266, + "grad_norm": 0.0, + "learning_rate": 1.823797485754356e-05, + "loss": 1.417, + "step": 7360 + }, + { + "epoch": 0.2161313054201656, + "grad_norm": 0.0, + "learning_rate": 1.8237435735885736e-05, + "loss": 1.4277, + "step": 7361 + }, + { + "epoch": 0.2161606670973046, + "grad_norm": 0.0, + "learning_rate": 1.823689653973467e-05, + "loss": 1.6104, + "step": 7362 + }, + { + "epoch": 0.2161900287744436, + "grad_norm": 0.0, + "learning_rate": 1.8236357269095238e-05, + "loss": 1.4785, + "step": 7363 + }, + { + "epoch": 0.2162193904515826, + "grad_norm": 0.0, + "learning_rate": 1.823581792397232e-05, + "loss": 1.4482, + "step": 7364 + }, + { + "epoch": 0.2162487521287216, + "grad_norm": 0.0, + "learning_rate": 1.823527850437079e-05, + "loss": 1.3262, + "step": 7365 + }, + { + "epoch": 0.2162781138058606, + "grad_norm": 0.0, + "learning_rate": 1.8234739010295523e-05, + "loss": 1.4922, + "step": 7366 + }, + { + "epoch": 0.21630747548299958, + "grad_norm": 0.0, + "learning_rate": 1.823419944175141e-05, + "loss": 1.4307, + "step": 7367 + }, + { + "epoch": 0.2163368371601386, + "grad_norm": 0.0, + "learning_rate": 1.8233659798743313e-05, + "loss": 1.4824, + "step": 7368 + }, + { + "epoch": 0.2163661988372776, + "grad_norm": 0.0, + "learning_rate": 1.823312008127613e-05, + "loss": 1.4551, + "step": 7369 + }, + { + "epoch": 0.21639556051441658, + "grad_norm": 0.0, + "learning_rate": 1.8232580289354733e-05, + "loss": 1.4697, + "step": 7370 + }, + { + "epoch": 0.21642492219155557, + "grad_norm": 0.0, + "learning_rate": 1.8232040422984002e-05, + "loss": 1.5059, + "step": 7371 + }, + { + "epoch": 0.21645428386869459, + "grad_norm": 0.0, + "learning_rate": 1.823150048216882e-05, + "loss": 1.418, + "step": 7372 + }, + { + "epoch": 0.21648364554583358, + "grad_norm": 0.0, + "learning_rate": 1.8230960466914074e-05, + "loss": 1.4951, + "step": 7373 + }, + { + "epoch": 0.21651300722297256, + "grad_norm": 0.0, + "learning_rate": 1.823042037722464e-05, + "loss": 1.3887, + "step": 7374 + }, + { + "epoch": 0.21654236890011158, + "grad_norm": 0.0, + "learning_rate": 1.822988021310541e-05, + "loss": 1.417, + "step": 7375 + }, + { + "epoch": 0.21657173057725057, + "grad_norm": 0.0, + "learning_rate": 1.8229339974561267e-05, + "loss": 1.5205, + "step": 7376 + }, + { + "epoch": 0.21660109225438956, + "grad_norm": 0.0, + "learning_rate": 1.822879966159709e-05, + "loss": 1.3936, + "step": 7377 + }, + { + "epoch": 0.21663045393152858, + "grad_norm": 0.0, + "learning_rate": 1.8228259274217776e-05, + "loss": 1.4609, + "step": 7378 + }, + { + "epoch": 0.21665981560866757, + "grad_norm": 0.0, + "learning_rate": 1.8227718812428203e-05, + "loss": 1.4912, + "step": 7379 + }, + { + "epoch": 0.21668917728580656, + "grad_norm": 0.0, + "learning_rate": 1.8227178276233264e-05, + "loss": 1.5732, + "step": 7380 + }, + { + "epoch": 0.21671853896294557, + "grad_norm": 0.0, + "learning_rate": 1.822663766563784e-05, + "loss": 1.376, + "step": 7381 + }, + { + "epoch": 0.21674790064008456, + "grad_norm": 0.0, + "learning_rate": 1.822609698064683e-05, + "loss": 1.4131, + "step": 7382 + }, + { + "epoch": 0.21677726231722355, + "grad_norm": 0.0, + "learning_rate": 1.822555622126511e-05, + "loss": 1.3623, + "step": 7383 + }, + { + "epoch": 0.21680662399436257, + "grad_norm": 0.0, + "learning_rate": 1.822501538749759e-05, + "loss": 1.4717, + "step": 7384 + }, + { + "epoch": 0.21683598567150156, + "grad_norm": 0.0, + "learning_rate": 1.8224474479349142e-05, + "loss": 1.5361, + "step": 7385 + }, + { + "epoch": 0.21686534734864055, + "grad_norm": 0.0, + "learning_rate": 1.8223933496824665e-05, + "loss": 1.3828, + "step": 7386 + }, + { + "epoch": 0.21689470902577956, + "grad_norm": 0.0, + "learning_rate": 1.8223392439929055e-05, + "loss": 1.5498, + "step": 7387 + }, + { + "epoch": 0.21692407070291855, + "grad_norm": 0.0, + "learning_rate": 1.82228513086672e-05, + "loss": 1.4033, + "step": 7388 + }, + { + "epoch": 0.21695343238005754, + "grad_norm": 0.0, + "learning_rate": 1.822231010304399e-05, + "loss": 1.4229, + "step": 7389 + }, + { + "epoch": 0.21698279405719656, + "grad_norm": 0.0, + "learning_rate": 1.8221768823064332e-05, + "loss": 1.5078, + "step": 7390 + }, + { + "epoch": 0.21701215573433555, + "grad_norm": 0.0, + "learning_rate": 1.8221227468733106e-05, + "loss": 1.4834, + "step": 7391 + }, + { + "epoch": 0.21704151741147454, + "grad_norm": 0.0, + "learning_rate": 1.8220686040055217e-05, + "loss": 1.4766, + "step": 7392 + }, + { + "epoch": 0.21707087908861356, + "grad_norm": 0.0, + "learning_rate": 1.822014453703556e-05, + "loss": 1.3042, + "step": 7393 + }, + { + "epoch": 0.21710024076575254, + "grad_norm": 0.0, + "learning_rate": 1.8219602959679032e-05, + "loss": 1.4404, + "step": 7394 + }, + { + "epoch": 0.21712960244289153, + "grad_norm": 0.0, + "learning_rate": 1.8219061307990524e-05, + "loss": 1.4019, + "step": 7395 + }, + { + "epoch": 0.21715896412003052, + "grad_norm": 0.0, + "learning_rate": 1.8218519581974946e-05, + "loss": 1.4473, + "step": 7396 + }, + { + "epoch": 0.21718832579716954, + "grad_norm": 0.0, + "learning_rate": 1.8217977781637187e-05, + "loss": 1.5381, + "step": 7397 + }, + { + "epoch": 0.21721768747430853, + "grad_norm": 0.0, + "learning_rate": 1.821743590698215e-05, + "loss": 1.5186, + "step": 7398 + }, + { + "epoch": 0.21724704915144752, + "grad_norm": 0.0, + "learning_rate": 1.8216893958014734e-05, + "loss": 1.3467, + "step": 7399 + }, + { + "epoch": 0.21727641082858654, + "grad_norm": 0.0, + "learning_rate": 1.8216351934739844e-05, + "loss": 1.3867, + "step": 7400 + }, + { + "epoch": 0.21730577250572553, + "grad_norm": 0.0, + "learning_rate": 1.8215809837162378e-05, + "loss": 1.4111, + "step": 7401 + }, + { + "epoch": 0.21733513418286451, + "grad_norm": 0.0, + "learning_rate": 1.821526766528724e-05, + "loss": 1.4346, + "step": 7402 + }, + { + "epoch": 0.21736449586000353, + "grad_norm": 0.0, + "learning_rate": 1.8214725419119335e-05, + "loss": 1.498, + "step": 7403 + }, + { + "epoch": 0.21739385753714252, + "grad_norm": 0.0, + "learning_rate": 1.8214183098663563e-05, + "loss": 1.4287, + "step": 7404 + }, + { + "epoch": 0.2174232192142815, + "grad_norm": 0.0, + "learning_rate": 1.8213640703924827e-05, + "loss": 1.3496, + "step": 7405 + }, + { + "epoch": 0.21745258089142053, + "grad_norm": 0.0, + "learning_rate": 1.8213098234908038e-05, + "loss": 1.4092, + "step": 7406 + }, + { + "epoch": 0.21748194256855952, + "grad_norm": 0.0, + "learning_rate": 1.8212555691618098e-05, + "loss": 1.2979, + "step": 7407 + }, + { + "epoch": 0.2175113042456985, + "grad_norm": 0.0, + "learning_rate": 1.821201307405991e-05, + "loss": 1.2764, + "step": 7408 + }, + { + "epoch": 0.21754066592283752, + "grad_norm": 0.0, + "learning_rate": 1.8211470382238388e-05, + "loss": 1.3857, + "step": 7409 + }, + { + "epoch": 0.2175700275999765, + "grad_norm": 0.0, + "learning_rate": 1.8210927616158433e-05, + "loss": 1.4873, + "step": 7410 + }, + { + "epoch": 0.2175993892771155, + "grad_norm": 0.0, + "learning_rate": 1.821038477582496e-05, + "loss": 1.2017, + "step": 7411 + }, + { + "epoch": 0.21762875095425452, + "grad_norm": 0.0, + "learning_rate": 1.8209841861242876e-05, + "loss": 1.5781, + "step": 7412 + }, + { + "epoch": 0.2176581126313935, + "grad_norm": 0.0, + "learning_rate": 1.820929887241708e-05, + "loss": 1.4219, + "step": 7413 + }, + { + "epoch": 0.2176874743085325, + "grad_norm": 0.0, + "learning_rate": 1.82087558093525e-05, + "loss": 1.5254, + "step": 7414 + }, + { + "epoch": 0.21771683598567151, + "grad_norm": 0.0, + "learning_rate": 1.8208212672054042e-05, + "loss": 1.5215, + "step": 7415 + }, + { + "epoch": 0.2177461976628105, + "grad_norm": 0.0, + "learning_rate": 1.8207669460526613e-05, + "loss": 1.335, + "step": 7416 + }, + { + "epoch": 0.2177755593399495, + "grad_norm": 0.0, + "learning_rate": 1.8207126174775123e-05, + "loss": 1.3857, + "step": 7417 + }, + { + "epoch": 0.2178049210170885, + "grad_norm": 0.0, + "learning_rate": 1.820658281480449e-05, + "loss": 1.3965, + "step": 7418 + }, + { + "epoch": 0.2178342826942275, + "grad_norm": 0.0, + "learning_rate": 1.8206039380619627e-05, + "loss": 1.4541, + "step": 7419 + }, + { + "epoch": 0.2178636443713665, + "grad_norm": 0.0, + "learning_rate": 1.820549587222545e-05, + "loss": 1.5771, + "step": 7420 + }, + { + "epoch": 0.21789300604850548, + "grad_norm": 0.0, + "learning_rate": 1.8204952289626868e-05, + "loss": 1.3115, + "step": 7421 + }, + { + "epoch": 0.2179223677256445, + "grad_norm": 0.0, + "learning_rate": 1.8204408632828808e-05, + "loss": 1.3887, + "step": 7422 + }, + { + "epoch": 0.21795172940278348, + "grad_norm": 0.0, + "learning_rate": 1.8203864901836174e-05, + "loss": 1.3525, + "step": 7423 + }, + { + "epoch": 0.21798109107992247, + "grad_norm": 0.0, + "learning_rate": 1.820332109665389e-05, + "loss": 1.3457, + "step": 7424 + }, + { + "epoch": 0.2180104527570615, + "grad_norm": 0.0, + "learning_rate": 1.8202777217286877e-05, + "loss": 1.4639, + "step": 7425 + }, + { + "epoch": 0.21803981443420048, + "grad_norm": 0.0, + "learning_rate": 1.8202233263740043e-05, + "loss": 1.5107, + "step": 7426 + }, + { + "epoch": 0.21806917611133947, + "grad_norm": 0.0, + "learning_rate": 1.8201689236018313e-05, + "loss": 1.3027, + "step": 7427 + }, + { + "epoch": 0.21809853778847849, + "grad_norm": 0.0, + "learning_rate": 1.820114513412661e-05, + "loss": 1.3525, + "step": 7428 + }, + { + "epoch": 0.21812789946561748, + "grad_norm": 0.0, + "learning_rate": 1.820060095806985e-05, + "loss": 1.3467, + "step": 7429 + }, + { + "epoch": 0.21815726114275646, + "grad_norm": 0.0, + "learning_rate": 1.8200056707852954e-05, + "loss": 1.4893, + "step": 7430 + }, + { + "epoch": 0.21818662281989548, + "grad_norm": 0.0, + "learning_rate": 1.8199512383480845e-05, + "loss": 1.4287, + "step": 7431 + }, + { + "epoch": 0.21821598449703447, + "grad_norm": 0.0, + "learning_rate": 1.819896798495845e-05, + "loss": 1.4209, + "step": 7432 + }, + { + "epoch": 0.21824534617417346, + "grad_norm": 0.0, + "learning_rate": 1.8198423512290685e-05, + "loss": 1.4736, + "step": 7433 + }, + { + "epoch": 0.21827470785131248, + "grad_norm": 0.0, + "learning_rate": 1.8197878965482476e-05, + "loss": 1.3584, + "step": 7434 + }, + { + "epoch": 0.21830406952845147, + "grad_norm": 0.0, + "learning_rate": 1.8197334344538745e-05, + "loss": 1.4502, + "step": 7435 + }, + { + "epoch": 0.21833343120559046, + "grad_norm": 0.0, + "learning_rate": 1.8196789649464425e-05, + "loss": 1.5771, + "step": 7436 + }, + { + "epoch": 0.21836279288272947, + "grad_norm": 0.0, + "learning_rate": 1.8196244880264436e-05, + "loss": 1.4531, + "step": 7437 + }, + { + "epoch": 0.21839215455986846, + "grad_norm": 0.0, + "learning_rate": 1.8195700036943704e-05, + "loss": 1.498, + "step": 7438 + }, + { + "epoch": 0.21842151623700745, + "grad_norm": 0.0, + "learning_rate": 1.819515511950716e-05, + "loss": 1.5176, + "step": 7439 + }, + { + "epoch": 0.21845087791414647, + "grad_norm": 0.0, + "learning_rate": 1.8194610127959727e-05, + "loss": 1.2646, + "step": 7440 + }, + { + "epoch": 0.21848023959128546, + "grad_norm": 0.0, + "learning_rate": 1.8194065062306338e-05, + "loss": 1.4492, + "step": 7441 + }, + { + "epoch": 0.21850960126842445, + "grad_norm": 0.0, + "learning_rate": 1.8193519922551918e-05, + "loss": 1.4092, + "step": 7442 + }, + { + "epoch": 0.21853896294556346, + "grad_norm": 0.0, + "learning_rate": 1.8192974708701397e-05, + "loss": 1.3877, + "step": 7443 + }, + { + "epoch": 0.21856832462270245, + "grad_norm": 0.0, + "learning_rate": 1.819242942075971e-05, + "loss": 1.4385, + "step": 7444 + }, + { + "epoch": 0.21859768629984144, + "grad_norm": 0.0, + "learning_rate": 1.8191884058731788e-05, + "loss": 1.5908, + "step": 7445 + }, + { + "epoch": 0.21862704797698043, + "grad_norm": 0.0, + "learning_rate": 1.819133862262256e-05, + "loss": 1.4629, + "step": 7446 + }, + { + "epoch": 0.21865640965411945, + "grad_norm": 0.0, + "learning_rate": 1.8190793112436955e-05, + "loss": 1.4736, + "step": 7447 + }, + { + "epoch": 0.21868577133125844, + "grad_norm": 0.0, + "learning_rate": 1.8190247528179913e-05, + "loss": 1.3926, + "step": 7448 + }, + { + "epoch": 0.21871513300839743, + "grad_norm": 0.0, + "learning_rate": 1.8189701869856364e-05, + "loss": 1.3311, + "step": 7449 + }, + { + "epoch": 0.21874449468553644, + "grad_norm": 0.0, + "learning_rate": 1.8189156137471243e-05, + "loss": 1.5059, + "step": 7450 + }, + { + "epoch": 0.21877385636267543, + "grad_norm": 0.0, + "learning_rate": 1.818861033102949e-05, + "loss": 1.4756, + "step": 7451 + }, + { + "epoch": 0.21880321803981442, + "grad_norm": 0.0, + "learning_rate": 1.8188064450536033e-05, + "loss": 1.416, + "step": 7452 + }, + { + "epoch": 0.21883257971695344, + "grad_norm": 0.0, + "learning_rate": 1.8187518495995813e-05, + "loss": 1.5947, + "step": 7453 + }, + { + "epoch": 0.21886194139409243, + "grad_norm": 0.0, + "learning_rate": 1.8186972467413767e-05, + "loss": 1.4541, + "step": 7454 + }, + { + "epoch": 0.21889130307123142, + "grad_norm": 0.0, + "learning_rate": 1.818642636479483e-05, + "loss": 1.5781, + "step": 7455 + }, + { + "epoch": 0.21892066474837044, + "grad_norm": 0.0, + "learning_rate": 1.8185880188143944e-05, + "loss": 1.4502, + "step": 7456 + }, + { + "epoch": 0.21895002642550943, + "grad_norm": 0.0, + "learning_rate": 1.8185333937466048e-05, + "loss": 1.459, + "step": 7457 + }, + { + "epoch": 0.21897938810264841, + "grad_norm": 0.0, + "learning_rate": 1.818478761276608e-05, + "loss": 1.3896, + "step": 7458 + }, + { + "epoch": 0.21900874977978743, + "grad_norm": 0.0, + "learning_rate": 1.818424121404898e-05, + "loss": 1.4209, + "step": 7459 + }, + { + "epoch": 0.21903811145692642, + "grad_norm": 0.0, + "learning_rate": 1.8183694741319692e-05, + "loss": 1.4131, + "step": 7460 + }, + { + "epoch": 0.2190674731340654, + "grad_norm": 0.0, + "learning_rate": 1.8183148194583156e-05, + "loss": 1.4326, + "step": 7461 + }, + { + "epoch": 0.21909683481120443, + "grad_norm": 0.0, + "learning_rate": 1.8182601573844317e-05, + "loss": 1.377, + "step": 7462 + }, + { + "epoch": 0.21912619648834342, + "grad_norm": 0.0, + "learning_rate": 1.8182054879108117e-05, + "loss": 1.3701, + "step": 7463 + }, + { + "epoch": 0.2191555581654824, + "grad_norm": 0.0, + "learning_rate": 1.8181508110379496e-05, + "loss": 1.4014, + "step": 7464 + }, + { + "epoch": 0.21918491984262142, + "grad_norm": 0.0, + "learning_rate": 1.8180961267663405e-05, + "loss": 1.5205, + "step": 7465 + }, + { + "epoch": 0.2192142815197604, + "grad_norm": 0.0, + "learning_rate": 1.8180414350964786e-05, + "loss": 1.3408, + "step": 7466 + }, + { + "epoch": 0.2192436431968994, + "grad_norm": 0.0, + "learning_rate": 1.8179867360288583e-05, + "loss": 1.3779, + "step": 7467 + }, + { + "epoch": 0.21927300487403842, + "grad_norm": 0.0, + "learning_rate": 1.8179320295639746e-05, + "loss": 1.4492, + "step": 7468 + }, + { + "epoch": 0.2193023665511774, + "grad_norm": 0.0, + "learning_rate": 1.8178773157023217e-05, + "loss": 1.3594, + "step": 7469 + }, + { + "epoch": 0.2193317282283164, + "grad_norm": 0.0, + "learning_rate": 1.8178225944443952e-05, + "loss": 1.5625, + "step": 7470 + }, + { + "epoch": 0.2193610899054554, + "grad_norm": 0.0, + "learning_rate": 1.8177678657906896e-05, + "loss": 1.4551, + "step": 7471 + }, + { + "epoch": 0.2193904515825944, + "grad_norm": 0.0, + "learning_rate": 1.8177131297416992e-05, + "loss": 1.4785, + "step": 7472 + }, + { + "epoch": 0.2194198132597334, + "grad_norm": 0.0, + "learning_rate": 1.81765838629792e-05, + "loss": 1.4541, + "step": 7473 + }, + { + "epoch": 0.21944917493687238, + "grad_norm": 0.0, + "learning_rate": 1.8176036354598467e-05, + "loss": 1.3711, + "step": 7474 + }, + { + "epoch": 0.2194785366140114, + "grad_norm": 0.0, + "learning_rate": 1.8175488772279738e-05, + "loss": 1.2881, + "step": 7475 + }, + { + "epoch": 0.2195078982911504, + "grad_norm": 0.0, + "learning_rate": 1.8174941116027976e-05, + "loss": 1.3242, + "step": 7476 + }, + { + "epoch": 0.21953725996828938, + "grad_norm": 0.0, + "learning_rate": 1.8174393385848127e-05, + "loss": 1.5049, + "step": 7477 + }, + { + "epoch": 0.2195666216454284, + "grad_norm": 0.0, + "learning_rate": 1.817384558174514e-05, + "loss": 1.4785, + "step": 7478 + }, + { + "epoch": 0.21959598332256738, + "grad_norm": 0.0, + "learning_rate": 1.817329770372398e-05, + "loss": 1.4434, + "step": 7479 + }, + { + "epoch": 0.21962534499970637, + "grad_norm": 0.0, + "learning_rate": 1.8172749751789595e-05, + "loss": 1.5879, + "step": 7480 + }, + { + "epoch": 0.2196547066768454, + "grad_norm": 0.0, + "learning_rate": 1.8172201725946936e-05, + "loss": 1.2646, + "step": 7481 + }, + { + "epoch": 0.21968406835398438, + "grad_norm": 0.0, + "learning_rate": 1.817165362620097e-05, + "loss": 1.5742, + "step": 7482 + }, + { + "epoch": 0.21971343003112337, + "grad_norm": 0.0, + "learning_rate": 1.8171105452556642e-05, + "loss": 1.4326, + "step": 7483 + }, + { + "epoch": 0.21974279170826239, + "grad_norm": 0.0, + "learning_rate": 1.8170557205018918e-05, + "loss": 1.3027, + "step": 7484 + }, + { + "epoch": 0.21977215338540138, + "grad_norm": 0.0, + "learning_rate": 1.8170008883592755e-05, + "loss": 1.4258, + "step": 7485 + }, + { + "epoch": 0.21980151506254036, + "grad_norm": 0.0, + "learning_rate": 1.8169460488283103e-05, + "loss": 1.4062, + "step": 7486 + }, + { + "epoch": 0.21983087673967938, + "grad_norm": 0.0, + "learning_rate": 1.8168912019094932e-05, + "loss": 1.4941, + "step": 7487 + }, + { + "epoch": 0.21986023841681837, + "grad_norm": 0.0, + "learning_rate": 1.8168363476033197e-05, + "loss": 1.3936, + "step": 7488 + }, + { + "epoch": 0.21988960009395736, + "grad_norm": 0.0, + "learning_rate": 1.8167814859102856e-05, + "loss": 1.3555, + "step": 7489 + }, + { + "epoch": 0.21991896177109638, + "grad_norm": 0.0, + "learning_rate": 1.816726616830888e-05, + "loss": 1.3428, + "step": 7490 + }, + { + "epoch": 0.21994832344823537, + "grad_norm": 0.0, + "learning_rate": 1.8166717403656217e-05, + "loss": 1.3779, + "step": 7491 + }, + { + "epoch": 0.21997768512537436, + "grad_norm": 0.0, + "learning_rate": 1.8166168565149844e-05, + "loss": 1.5664, + "step": 7492 + }, + { + "epoch": 0.22000704680251337, + "grad_norm": 0.0, + "learning_rate": 1.816561965279471e-05, + "loss": 1.4443, + "step": 7493 + }, + { + "epoch": 0.22003640847965236, + "grad_norm": 0.0, + "learning_rate": 1.816507066659579e-05, + "loss": 1.4824, + "step": 7494 + }, + { + "epoch": 0.22006577015679135, + "grad_norm": 0.0, + "learning_rate": 1.8164521606558047e-05, + "loss": 1.5723, + "step": 7495 + }, + { + "epoch": 0.22009513183393034, + "grad_norm": 0.0, + "learning_rate": 1.8163972472686442e-05, + "loss": 1.4414, + "step": 7496 + }, + { + "epoch": 0.22012449351106936, + "grad_norm": 0.0, + "learning_rate": 1.8163423264985942e-05, + "loss": 1.4639, + "step": 7497 + }, + { + "epoch": 0.22015385518820835, + "grad_norm": 0.0, + "learning_rate": 1.8162873983461513e-05, + "loss": 1.5518, + "step": 7498 + }, + { + "epoch": 0.22018321686534734, + "grad_norm": 0.0, + "learning_rate": 1.816232462811813e-05, + "loss": 1.459, + "step": 7499 + }, + { + "epoch": 0.22021257854248635, + "grad_norm": 0.0, + "learning_rate": 1.816177519896075e-05, + "loss": 1.3623, + "step": 7500 + }, + { + "epoch": 0.22024194021962534, + "grad_norm": 0.0, + "learning_rate": 1.816122569599435e-05, + "loss": 1.4043, + "step": 7501 + }, + { + "epoch": 0.22027130189676433, + "grad_norm": 0.0, + "learning_rate": 1.8160676119223894e-05, + "loss": 1.3291, + "step": 7502 + }, + { + "epoch": 0.22030066357390335, + "grad_norm": 0.0, + "learning_rate": 1.8160126468654355e-05, + "loss": 1.3809, + "step": 7503 + }, + { + "epoch": 0.22033002525104234, + "grad_norm": 0.0, + "learning_rate": 1.81595767442907e-05, + "loss": 1.4736, + "step": 7504 + }, + { + "epoch": 0.22035938692818133, + "grad_norm": 0.0, + "learning_rate": 1.8159026946137904e-05, + "loss": 1.3574, + "step": 7505 + }, + { + "epoch": 0.22038874860532034, + "grad_norm": 0.0, + "learning_rate": 1.815847707420094e-05, + "loss": 1.3799, + "step": 7506 + }, + { + "epoch": 0.22041811028245933, + "grad_norm": 0.0, + "learning_rate": 1.8157927128484773e-05, + "loss": 1.4141, + "step": 7507 + }, + { + "epoch": 0.22044747195959832, + "grad_norm": 0.0, + "learning_rate": 1.8157377108994387e-05, + "loss": 1.4609, + "step": 7508 + }, + { + "epoch": 0.22047683363673734, + "grad_norm": 0.0, + "learning_rate": 1.8156827015734748e-05, + "loss": 1.3916, + "step": 7509 + }, + { + "epoch": 0.22050619531387633, + "grad_norm": 0.0, + "learning_rate": 1.8156276848710837e-05, + "loss": 1.416, + "step": 7510 + }, + { + "epoch": 0.22053555699101532, + "grad_norm": 0.0, + "learning_rate": 1.815572660792762e-05, + "loss": 1.4062, + "step": 7511 + }, + { + "epoch": 0.22056491866815434, + "grad_norm": 0.0, + "learning_rate": 1.815517629339008e-05, + "loss": 1.334, + "step": 7512 + }, + { + "epoch": 0.22059428034529333, + "grad_norm": 0.0, + "learning_rate": 1.8154625905103197e-05, + "loss": 1.5059, + "step": 7513 + }, + { + "epoch": 0.22062364202243231, + "grad_norm": 0.0, + "learning_rate": 1.8154075443071938e-05, + "loss": 1.3262, + "step": 7514 + }, + { + "epoch": 0.22065300369957133, + "grad_norm": 0.0, + "learning_rate": 1.815352490730129e-05, + "loss": 1.3496, + "step": 7515 + }, + { + "epoch": 0.22068236537671032, + "grad_norm": 0.0, + "learning_rate": 1.8152974297796228e-05, + "loss": 1.4932, + "step": 7516 + }, + { + "epoch": 0.2207117270538493, + "grad_norm": 0.0, + "learning_rate": 1.8152423614561728e-05, + "loss": 1.3311, + "step": 7517 + }, + { + "epoch": 0.22074108873098833, + "grad_norm": 0.0, + "learning_rate": 1.8151872857602778e-05, + "loss": 1.5156, + "step": 7518 + }, + { + "epoch": 0.22077045040812732, + "grad_norm": 0.0, + "learning_rate": 1.8151322026924352e-05, + "loss": 1.3633, + "step": 7519 + }, + { + "epoch": 0.2207998120852663, + "grad_norm": 0.0, + "learning_rate": 1.815077112253143e-05, + "loss": 1.3916, + "step": 7520 + }, + { + "epoch": 0.2208291737624053, + "grad_norm": 0.0, + "learning_rate": 1.8150220144429e-05, + "loss": 1.5293, + "step": 7521 + }, + { + "epoch": 0.2208585354395443, + "grad_norm": 0.0, + "learning_rate": 1.8149669092622045e-05, + "loss": 1.4551, + "step": 7522 + }, + { + "epoch": 0.2208878971166833, + "grad_norm": 0.0, + "learning_rate": 1.8149117967115543e-05, + "loss": 1.4395, + "step": 7523 + }, + { + "epoch": 0.2209172587938223, + "grad_norm": 0.0, + "learning_rate": 1.8148566767914478e-05, + "loss": 1.5254, + "step": 7524 + }, + { + "epoch": 0.2209466204709613, + "grad_norm": 0.0, + "learning_rate": 1.814801549502384e-05, + "loss": 1.499, + "step": 7525 + }, + { + "epoch": 0.2209759821481003, + "grad_norm": 0.0, + "learning_rate": 1.814746414844861e-05, + "loss": 1.4238, + "step": 7526 + }, + { + "epoch": 0.2210053438252393, + "grad_norm": 0.0, + "learning_rate": 1.8146912728193774e-05, + "loss": 1.4014, + "step": 7527 + }, + { + "epoch": 0.2210347055023783, + "grad_norm": 0.0, + "learning_rate": 1.814636123426432e-05, + "loss": 1.5059, + "step": 7528 + }, + { + "epoch": 0.2210640671795173, + "grad_norm": 0.0, + "learning_rate": 1.8145809666665233e-05, + "loss": 1.4561, + "step": 7529 + }, + { + "epoch": 0.22109342885665628, + "grad_norm": 0.0, + "learning_rate": 1.8145258025401507e-05, + "loss": 1.2637, + "step": 7530 + }, + { + "epoch": 0.2211227905337953, + "grad_norm": 0.0, + "learning_rate": 1.8144706310478123e-05, + "loss": 1.3369, + "step": 7531 + }, + { + "epoch": 0.2211521522109343, + "grad_norm": 0.0, + "learning_rate": 1.8144154521900078e-05, + "loss": 1.5107, + "step": 7532 + }, + { + "epoch": 0.22118151388807328, + "grad_norm": 0.0, + "learning_rate": 1.814360265967236e-05, + "loss": 1.4336, + "step": 7533 + }, + { + "epoch": 0.2212108755652123, + "grad_norm": 0.0, + "learning_rate": 1.814305072379995e-05, + "loss": 1.5322, + "step": 7534 + }, + { + "epoch": 0.22124023724235128, + "grad_norm": 0.0, + "learning_rate": 1.814249871428785e-05, + "loss": 1.3828, + "step": 7535 + }, + { + "epoch": 0.22126959891949027, + "grad_norm": 0.0, + "learning_rate": 1.8141946631141046e-05, + "loss": 1.4639, + "step": 7536 + }, + { + "epoch": 0.2212989605966293, + "grad_norm": 0.0, + "learning_rate": 1.8141394474364538e-05, + "loss": 1.4775, + "step": 7537 + }, + { + "epoch": 0.22132832227376828, + "grad_norm": 0.0, + "learning_rate": 1.8140842243963313e-05, + "loss": 1.4287, + "step": 7538 + }, + { + "epoch": 0.22135768395090727, + "grad_norm": 0.0, + "learning_rate": 1.8140289939942368e-05, + "loss": 1.4424, + "step": 7539 + }, + { + "epoch": 0.2213870456280463, + "grad_norm": 0.0, + "learning_rate": 1.8139737562306698e-05, + "loss": 1.4365, + "step": 7540 + }, + { + "epoch": 0.22141640730518528, + "grad_norm": 0.0, + "learning_rate": 1.813918511106129e-05, + "loss": 1.4658, + "step": 7541 + }, + { + "epoch": 0.22144576898232426, + "grad_norm": 0.0, + "learning_rate": 1.813863258621115e-05, + "loss": 1.3555, + "step": 7542 + }, + { + "epoch": 0.22147513065946328, + "grad_norm": 0.0, + "learning_rate": 1.8138079987761272e-05, + "loss": 1.4863, + "step": 7543 + }, + { + "epoch": 0.22150449233660227, + "grad_norm": 0.0, + "learning_rate": 1.813752731571665e-05, + "loss": 1.2222, + "step": 7544 + }, + { + "epoch": 0.22153385401374126, + "grad_norm": 0.0, + "learning_rate": 1.813697457008229e-05, + "loss": 1.4404, + "step": 7545 + }, + { + "epoch": 0.22156321569088025, + "grad_norm": 0.0, + "learning_rate": 1.8136421750863183e-05, + "loss": 1.5537, + "step": 7546 + }, + { + "epoch": 0.22159257736801927, + "grad_norm": 0.0, + "learning_rate": 1.8135868858064328e-05, + "loss": 1.4014, + "step": 7547 + }, + { + "epoch": 0.22162193904515826, + "grad_norm": 0.0, + "learning_rate": 1.8135315891690727e-05, + "loss": 1.3555, + "step": 7548 + }, + { + "epoch": 0.22165130072229725, + "grad_norm": 0.0, + "learning_rate": 1.8134762851747384e-05, + "loss": 1.3105, + "step": 7549 + }, + { + "epoch": 0.22168066239943626, + "grad_norm": 0.0, + "learning_rate": 1.8134209738239295e-05, + "loss": 1.4053, + "step": 7550 + }, + { + "epoch": 0.22171002407657525, + "grad_norm": 0.0, + "learning_rate": 1.8133656551171463e-05, + "loss": 1.4307, + "step": 7551 + }, + { + "epoch": 0.22173938575371424, + "grad_norm": 0.0, + "learning_rate": 1.813310329054889e-05, + "loss": 1.4375, + "step": 7552 + }, + { + "epoch": 0.22176874743085326, + "grad_norm": 0.0, + "learning_rate": 1.813254995637659e-05, + "loss": 1.5127, + "step": 7553 + }, + { + "epoch": 0.22179810910799225, + "grad_norm": 0.0, + "learning_rate": 1.8131996548659548e-05, + "loss": 1.375, + "step": 7554 + }, + { + "epoch": 0.22182747078513124, + "grad_norm": 0.0, + "learning_rate": 1.8131443067402784e-05, + "loss": 1.4189, + "step": 7555 + }, + { + "epoch": 0.22185683246227025, + "grad_norm": 0.0, + "learning_rate": 1.8130889512611295e-05, + "loss": 1.5215, + "step": 7556 + }, + { + "epoch": 0.22188619413940924, + "grad_norm": 0.0, + "learning_rate": 1.8130335884290092e-05, + "loss": 1.3711, + "step": 7557 + }, + { + "epoch": 0.22191555581654823, + "grad_norm": 0.0, + "learning_rate": 1.812978218244418e-05, + "loss": 1.3438, + "step": 7558 + }, + { + "epoch": 0.22194491749368725, + "grad_norm": 0.0, + "learning_rate": 1.8129228407078565e-05, + "loss": 1.5195, + "step": 7559 + }, + { + "epoch": 0.22197427917082624, + "grad_norm": 0.0, + "learning_rate": 1.8128674558198253e-05, + "loss": 1.3594, + "step": 7560 + }, + { + "epoch": 0.22200364084796523, + "grad_norm": 0.0, + "learning_rate": 1.8128120635808258e-05, + "loss": 1.4336, + "step": 7561 + }, + { + "epoch": 0.22203300252510424, + "grad_norm": 0.0, + "learning_rate": 1.8127566639913587e-05, + "loss": 1.3936, + "step": 7562 + }, + { + "epoch": 0.22206236420224323, + "grad_norm": 0.0, + "learning_rate": 1.812701257051925e-05, + "loss": 1.4854, + "step": 7563 + }, + { + "epoch": 0.22209172587938222, + "grad_norm": 0.0, + "learning_rate": 1.812645842763025e-05, + "loss": 1.4854, + "step": 7564 + }, + { + "epoch": 0.22212108755652124, + "grad_norm": 0.0, + "learning_rate": 1.8125904211251613e-05, + "loss": 1.5625, + "step": 7565 + }, + { + "epoch": 0.22215044923366023, + "grad_norm": 0.0, + "learning_rate": 1.8125349921388343e-05, + "loss": 1.4863, + "step": 7566 + }, + { + "epoch": 0.22217981091079922, + "grad_norm": 0.0, + "learning_rate": 1.8124795558045447e-05, + "loss": 1.3311, + "step": 7567 + }, + { + "epoch": 0.22220917258793824, + "grad_norm": 0.0, + "learning_rate": 1.812424112122795e-05, + "loss": 1.2979, + "step": 7568 + }, + { + "epoch": 0.22223853426507723, + "grad_norm": 0.0, + "learning_rate": 1.8123686610940852e-05, + "loss": 1.2617, + "step": 7569 + }, + { + "epoch": 0.22226789594221621, + "grad_norm": 0.0, + "learning_rate": 1.8123132027189182e-05, + "loss": 1.4688, + "step": 7570 + }, + { + "epoch": 0.2222972576193552, + "grad_norm": 0.0, + "learning_rate": 1.8122577369977946e-05, + "loss": 1.3926, + "step": 7571 + }, + { + "epoch": 0.22232661929649422, + "grad_norm": 0.0, + "learning_rate": 1.8122022639312168e-05, + "loss": 1.4131, + "step": 7572 + }, + { + "epoch": 0.2223559809736332, + "grad_norm": 0.0, + "learning_rate": 1.8121467835196853e-05, + "loss": 1.2339, + "step": 7573 + }, + { + "epoch": 0.2223853426507722, + "grad_norm": 0.0, + "learning_rate": 1.8120912957637026e-05, + "loss": 1.4658, + "step": 7574 + }, + { + "epoch": 0.22241470432791122, + "grad_norm": 0.0, + "learning_rate": 1.8120358006637705e-05, + "loss": 1.3916, + "step": 7575 + }, + { + "epoch": 0.2224440660050502, + "grad_norm": 0.0, + "learning_rate": 1.8119802982203906e-05, + "loss": 1.4229, + "step": 7576 + }, + { + "epoch": 0.2224734276821892, + "grad_norm": 0.0, + "learning_rate": 1.811924788434065e-05, + "loss": 1.5166, + "step": 7577 + }, + { + "epoch": 0.2225027893593282, + "grad_norm": 0.0, + "learning_rate": 1.8118692713052953e-05, + "loss": 1.5039, + "step": 7578 + }, + { + "epoch": 0.2225321510364672, + "grad_norm": 0.0, + "learning_rate": 1.811813746834584e-05, + "loss": 1.4424, + "step": 7579 + }, + { + "epoch": 0.2225615127136062, + "grad_norm": 0.0, + "learning_rate": 1.811758215022433e-05, + "loss": 1.4072, + "step": 7580 + }, + { + "epoch": 0.2225908743907452, + "grad_norm": 0.0, + "learning_rate": 1.811702675869345e-05, + "loss": 1.4287, + "step": 7581 + }, + { + "epoch": 0.2226202360678842, + "grad_norm": 0.0, + "learning_rate": 1.811647129375821e-05, + "loss": 1.4131, + "step": 7582 + }, + { + "epoch": 0.2226495977450232, + "grad_norm": 0.0, + "learning_rate": 1.811591575542365e-05, + "loss": 1.3262, + "step": 7583 + }, + { + "epoch": 0.2226789594221622, + "grad_norm": 0.0, + "learning_rate": 1.811536014369478e-05, + "loss": 1.418, + "step": 7584 + }, + { + "epoch": 0.2227083210993012, + "grad_norm": 0.0, + "learning_rate": 1.811480445857663e-05, + "loss": 1.4951, + "step": 7585 + }, + { + "epoch": 0.22273768277644018, + "grad_norm": 0.0, + "learning_rate": 1.811424870007423e-05, + "loss": 1.6006, + "step": 7586 + }, + { + "epoch": 0.2227670444535792, + "grad_norm": 0.0, + "learning_rate": 1.8113692868192597e-05, + "loss": 1.3203, + "step": 7587 + }, + { + "epoch": 0.2227964061307182, + "grad_norm": 0.0, + "learning_rate": 1.8113136962936758e-05, + "loss": 1.356, + "step": 7588 + }, + { + "epoch": 0.22282576780785718, + "grad_norm": 0.0, + "learning_rate": 1.811258098431175e-05, + "loss": 1.375, + "step": 7589 + }, + { + "epoch": 0.2228551294849962, + "grad_norm": 0.0, + "learning_rate": 1.8112024932322593e-05, + "loss": 1.333, + "step": 7590 + }, + { + "epoch": 0.22288449116213518, + "grad_norm": 0.0, + "learning_rate": 1.8111468806974316e-05, + "loss": 1.416, + "step": 7591 + }, + { + "epoch": 0.22291385283927417, + "grad_norm": 0.0, + "learning_rate": 1.811091260827195e-05, + "loss": 1.4102, + "step": 7592 + }, + { + "epoch": 0.2229432145164132, + "grad_norm": 0.0, + "learning_rate": 1.811035633622052e-05, + "loss": 1.5049, + "step": 7593 + }, + { + "epoch": 0.22297257619355218, + "grad_norm": 0.0, + "learning_rate": 1.8109799990825065e-05, + "loss": 1.4873, + "step": 7594 + }, + { + "epoch": 0.22300193787069117, + "grad_norm": 0.0, + "learning_rate": 1.810924357209061e-05, + "loss": 1.2969, + "step": 7595 + }, + { + "epoch": 0.22303129954783016, + "grad_norm": 0.0, + "learning_rate": 1.810868708002219e-05, + "loss": 1.4131, + "step": 7596 + }, + { + "epoch": 0.22306066122496918, + "grad_norm": 0.0, + "learning_rate": 1.8108130514624837e-05, + "loss": 1.4941, + "step": 7597 + }, + { + "epoch": 0.22309002290210816, + "grad_norm": 0.0, + "learning_rate": 1.8107573875903583e-05, + "loss": 1.2949, + "step": 7598 + }, + { + "epoch": 0.22311938457924715, + "grad_norm": 0.0, + "learning_rate": 1.8107017163863458e-05, + "loss": 1.3926, + "step": 7599 + }, + { + "epoch": 0.22314874625638617, + "grad_norm": 0.0, + "learning_rate": 1.8106460378509504e-05, + "loss": 1.4795, + "step": 7600 + }, + { + "epoch": 0.22317810793352516, + "grad_norm": 0.0, + "learning_rate": 1.8105903519846753e-05, + "loss": 1.4209, + "step": 7601 + }, + { + "epoch": 0.22320746961066415, + "grad_norm": 0.0, + "learning_rate": 1.810534658788024e-05, + "loss": 1.376, + "step": 7602 + }, + { + "epoch": 0.22323683128780317, + "grad_norm": 0.0, + "learning_rate": 1.8104789582615e-05, + "loss": 1.5479, + "step": 7603 + }, + { + "epoch": 0.22326619296494216, + "grad_norm": 0.0, + "learning_rate": 1.8104232504056072e-05, + "loss": 1.4561, + "step": 7604 + }, + { + "epoch": 0.22329555464208115, + "grad_norm": 0.0, + "learning_rate": 1.8103675352208496e-05, + "loss": 1.4629, + "step": 7605 + }, + { + "epoch": 0.22332491631922016, + "grad_norm": 0.0, + "learning_rate": 1.810311812707731e-05, + "loss": 1.4844, + "step": 7606 + }, + { + "epoch": 0.22335427799635915, + "grad_norm": 0.0, + "learning_rate": 1.8102560828667547e-05, + "loss": 1.623, + "step": 7607 + }, + { + "epoch": 0.22338363967349814, + "grad_norm": 0.0, + "learning_rate": 1.8102003456984255e-05, + "loss": 1.3652, + "step": 7608 + }, + { + "epoch": 0.22341300135063716, + "grad_norm": 0.0, + "learning_rate": 1.8101446012032467e-05, + "loss": 1.3438, + "step": 7609 + }, + { + "epoch": 0.22344236302777615, + "grad_norm": 0.0, + "learning_rate": 1.8100888493817232e-05, + "loss": 1.3506, + "step": 7610 + }, + { + "epoch": 0.22347172470491514, + "grad_norm": 0.0, + "learning_rate": 1.8100330902343584e-05, + "loss": 1.4033, + "step": 7611 + }, + { + "epoch": 0.22350108638205415, + "grad_norm": 0.0, + "learning_rate": 1.8099773237616572e-05, + "loss": 1.4219, + "step": 7612 + }, + { + "epoch": 0.22353044805919314, + "grad_norm": 0.0, + "learning_rate": 1.8099215499641238e-05, + "loss": 1.4434, + "step": 7613 + }, + { + "epoch": 0.22355980973633213, + "grad_norm": 0.0, + "learning_rate": 1.809865768842262e-05, + "loss": 1.377, + "step": 7614 + }, + { + "epoch": 0.22358917141347115, + "grad_norm": 0.0, + "learning_rate": 1.809809980396577e-05, + "loss": 1.3809, + "step": 7615 + }, + { + "epoch": 0.22361853309061014, + "grad_norm": 0.0, + "learning_rate": 1.8097541846275726e-05, + "loss": 1.4277, + "step": 7616 + }, + { + "epoch": 0.22364789476774913, + "grad_norm": 0.0, + "learning_rate": 1.8096983815357537e-05, + "loss": 1.4326, + "step": 7617 + }, + { + "epoch": 0.22367725644488815, + "grad_norm": 0.0, + "learning_rate": 1.8096425711216252e-05, + "loss": 1.4307, + "step": 7618 + }, + { + "epoch": 0.22370661812202713, + "grad_norm": 0.0, + "learning_rate": 1.809586753385692e-05, + "loss": 1.5381, + "step": 7619 + }, + { + "epoch": 0.22373597979916612, + "grad_norm": 0.0, + "learning_rate": 1.8095309283284576e-05, + "loss": 1.4961, + "step": 7620 + }, + { + "epoch": 0.2237653414763051, + "grad_norm": 0.0, + "learning_rate": 1.809475095950428e-05, + "loss": 1.4688, + "step": 7621 + }, + { + "epoch": 0.22379470315344413, + "grad_norm": 0.0, + "learning_rate": 1.8094192562521077e-05, + "loss": 1.3672, + "step": 7622 + }, + { + "epoch": 0.22382406483058312, + "grad_norm": 0.0, + "learning_rate": 1.809363409234002e-05, + "loss": 1.4385, + "step": 7623 + }, + { + "epoch": 0.2238534265077221, + "grad_norm": 0.0, + "learning_rate": 1.8093075548966156e-05, + "loss": 1.4541, + "step": 7624 + }, + { + "epoch": 0.22388278818486113, + "grad_norm": 0.0, + "learning_rate": 1.8092516932404538e-05, + "loss": 1.3701, + "step": 7625 + }, + { + "epoch": 0.22391214986200011, + "grad_norm": 0.0, + "learning_rate": 1.8091958242660215e-05, + "loss": 1.3984, + "step": 7626 + }, + { + "epoch": 0.2239415115391391, + "grad_norm": 0.0, + "learning_rate": 1.8091399479738246e-05, + "loss": 1.3545, + "step": 7627 + }, + { + "epoch": 0.22397087321627812, + "grad_norm": 0.0, + "learning_rate": 1.8090840643643673e-05, + "loss": 1.417, + "step": 7628 + }, + { + "epoch": 0.2240002348934171, + "grad_norm": 0.0, + "learning_rate": 1.809028173438156e-05, + "loss": 1.2549, + "step": 7629 + }, + { + "epoch": 0.2240295965705561, + "grad_norm": 0.0, + "learning_rate": 1.8089722751956956e-05, + "loss": 1.4053, + "step": 7630 + }, + { + "epoch": 0.22405895824769512, + "grad_norm": 0.0, + "learning_rate": 1.8089163696374915e-05, + "loss": 1.4375, + "step": 7631 + }, + { + "epoch": 0.2240883199248341, + "grad_norm": 0.0, + "learning_rate": 1.80886045676405e-05, + "loss": 1.3682, + "step": 7632 + }, + { + "epoch": 0.2241176816019731, + "grad_norm": 0.0, + "learning_rate": 1.8088045365758757e-05, + "loss": 1.3652, + "step": 7633 + }, + { + "epoch": 0.2241470432791121, + "grad_norm": 0.0, + "learning_rate": 1.8087486090734753e-05, + "loss": 1.2964, + "step": 7634 + }, + { + "epoch": 0.2241764049562511, + "grad_norm": 0.0, + "learning_rate": 1.808692674257354e-05, + "loss": 1.4873, + "step": 7635 + }, + { + "epoch": 0.2242057666333901, + "grad_norm": 0.0, + "learning_rate": 1.8086367321280176e-05, + "loss": 1.4805, + "step": 7636 + }, + { + "epoch": 0.2242351283105291, + "grad_norm": 0.0, + "learning_rate": 1.8085807826859724e-05, + "loss": 1.3857, + "step": 7637 + }, + { + "epoch": 0.2242644899876681, + "grad_norm": 0.0, + "learning_rate": 1.808524825931724e-05, + "loss": 1.4219, + "step": 7638 + }, + { + "epoch": 0.2242938516648071, + "grad_norm": 0.0, + "learning_rate": 1.808468861865778e-05, + "loss": 1.3799, + "step": 7639 + }, + { + "epoch": 0.2243232133419461, + "grad_norm": 0.0, + "learning_rate": 1.8084128904886417e-05, + "loss": 1.3384, + "step": 7640 + }, + { + "epoch": 0.2243525750190851, + "grad_norm": 0.0, + "learning_rate": 1.8083569118008203e-05, + "loss": 1.3115, + "step": 7641 + }, + { + "epoch": 0.22438193669622408, + "grad_norm": 0.0, + "learning_rate": 1.8083009258028205e-05, + "loss": 1.375, + "step": 7642 + }, + { + "epoch": 0.2244112983733631, + "grad_norm": 0.0, + "learning_rate": 1.808244932495148e-05, + "loss": 1.3799, + "step": 7643 + }, + { + "epoch": 0.2244406600505021, + "grad_norm": 0.0, + "learning_rate": 1.80818893187831e-05, + "loss": 1.6504, + "step": 7644 + }, + { + "epoch": 0.22447002172764108, + "grad_norm": 0.0, + "learning_rate": 1.8081329239528122e-05, + "loss": 1.249, + "step": 7645 + }, + { + "epoch": 0.22449938340478007, + "grad_norm": 0.0, + "learning_rate": 1.8080769087191617e-05, + "loss": 1.4844, + "step": 7646 + }, + { + "epoch": 0.22452874508191908, + "grad_norm": 0.0, + "learning_rate": 1.8080208861778648e-05, + "loss": 1.4033, + "step": 7647 + }, + { + "epoch": 0.22455810675905807, + "grad_norm": 0.0, + "learning_rate": 1.8079648563294278e-05, + "loss": 1.5137, + "step": 7648 + }, + { + "epoch": 0.22458746843619706, + "grad_norm": 0.0, + "learning_rate": 1.807908819174358e-05, + "loss": 1.46, + "step": 7649 + }, + { + "epoch": 0.22461683011333608, + "grad_norm": 0.0, + "learning_rate": 1.8078527747131615e-05, + "loss": 1.4727, + "step": 7650 + }, + { + "epoch": 0.22464619179047507, + "grad_norm": 0.0, + "learning_rate": 1.8077967229463457e-05, + "loss": 1.4141, + "step": 7651 + }, + { + "epoch": 0.22467555346761406, + "grad_norm": 0.0, + "learning_rate": 1.807740663874417e-05, + "loss": 1.3428, + "step": 7652 + }, + { + "epoch": 0.22470491514475308, + "grad_norm": 0.0, + "learning_rate": 1.8076845974978828e-05, + "loss": 1.4521, + "step": 7653 + }, + { + "epoch": 0.22473427682189207, + "grad_norm": 0.0, + "learning_rate": 1.80762852381725e-05, + "loss": 1.5469, + "step": 7654 + }, + { + "epoch": 0.22476363849903105, + "grad_norm": 0.0, + "learning_rate": 1.8075724428330256e-05, + "loss": 1.4404, + "step": 7655 + }, + { + "epoch": 0.22479300017617007, + "grad_norm": 0.0, + "learning_rate": 1.8075163545457168e-05, + "loss": 1.4297, + "step": 7656 + }, + { + "epoch": 0.22482236185330906, + "grad_norm": 0.0, + "learning_rate": 1.8074602589558306e-05, + "loss": 1.4834, + "step": 7657 + }, + { + "epoch": 0.22485172353044805, + "grad_norm": 0.0, + "learning_rate": 1.8074041560638747e-05, + "loss": 1.3535, + "step": 7658 + }, + { + "epoch": 0.22488108520758707, + "grad_norm": 0.0, + "learning_rate": 1.807348045870356e-05, + "loss": 1.5156, + "step": 7659 + }, + { + "epoch": 0.22491044688472606, + "grad_norm": 0.0, + "learning_rate": 1.8072919283757823e-05, + "loss": 1.4619, + "step": 7660 + }, + { + "epoch": 0.22493980856186505, + "grad_norm": 0.0, + "learning_rate": 1.8072358035806612e-05, + "loss": 1.2852, + "step": 7661 + }, + { + "epoch": 0.22496917023900406, + "grad_norm": 0.0, + "learning_rate": 1.8071796714854995e-05, + "loss": 1.3877, + "step": 7662 + }, + { + "epoch": 0.22499853191614305, + "grad_norm": 0.0, + "learning_rate": 1.8071235320908058e-05, + "loss": 1.3643, + "step": 7663 + }, + { + "epoch": 0.22502789359328204, + "grad_norm": 0.0, + "learning_rate": 1.8070673853970868e-05, + "loss": 1.542, + "step": 7664 + }, + { + "epoch": 0.22505725527042106, + "grad_norm": 0.0, + "learning_rate": 1.807011231404851e-05, + "loss": 1.4033, + "step": 7665 + }, + { + "epoch": 0.22508661694756005, + "grad_norm": 0.0, + "learning_rate": 1.806955070114606e-05, + "loss": 1.3799, + "step": 7666 + }, + { + "epoch": 0.22511597862469904, + "grad_norm": 0.0, + "learning_rate": 1.8068989015268595e-05, + "loss": 1.4082, + "step": 7667 + }, + { + "epoch": 0.22514534030183805, + "grad_norm": 0.0, + "learning_rate": 1.80684272564212e-05, + "loss": 1.3145, + "step": 7668 + }, + { + "epoch": 0.22517470197897704, + "grad_norm": 0.0, + "learning_rate": 1.8067865424608944e-05, + "loss": 1.4502, + "step": 7669 + }, + { + "epoch": 0.22520406365611603, + "grad_norm": 0.0, + "learning_rate": 1.8067303519836917e-05, + "loss": 1.4873, + "step": 7670 + }, + { + "epoch": 0.22523342533325502, + "grad_norm": 0.0, + "learning_rate": 1.8066741542110204e-05, + "loss": 1.3682, + "step": 7671 + }, + { + "epoch": 0.22526278701039404, + "grad_norm": 0.0, + "learning_rate": 1.806617949143387e-05, + "loss": 1.418, + "step": 7672 + }, + { + "epoch": 0.22529214868753303, + "grad_norm": 0.0, + "learning_rate": 1.806561736781302e-05, + "loss": 1.3584, + "step": 7673 + }, + { + "epoch": 0.22532151036467202, + "grad_norm": 0.0, + "learning_rate": 1.8065055171252723e-05, + "loss": 1.5303, + "step": 7674 + }, + { + "epoch": 0.22535087204181103, + "grad_norm": 0.0, + "learning_rate": 1.8064492901758063e-05, + "loss": 1.4316, + "step": 7675 + }, + { + "epoch": 0.22538023371895002, + "grad_norm": 0.0, + "learning_rate": 1.8063930559334135e-05, + "loss": 1.5039, + "step": 7676 + }, + { + "epoch": 0.225409595396089, + "grad_norm": 0.0, + "learning_rate": 1.8063368143986012e-05, + "loss": 1.457, + "step": 7677 + }, + { + "epoch": 0.22543895707322803, + "grad_norm": 0.0, + "learning_rate": 1.806280565571879e-05, + "loss": 1.271, + "step": 7678 + }, + { + "epoch": 0.22546831875036702, + "grad_norm": 0.0, + "learning_rate": 1.806224309453755e-05, + "loss": 1.4521, + "step": 7679 + }, + { + "epoch": 0.225497680427506, + "grad_norm": 0.0, + "learning_rate": 1.806168046044738e-05, + "loss": 1.458, + "step": 7680 + }, + { + "epoch": 0.22552704210464503, + "grad_norm": 0.0, + "learning_rate": 1.806111775345337e-05, + "loss": 1.4238, + "step": 7681 + }, + { + "epoch": 0.22555640378178402, + "grad_norm": 0.0, + "learning_rate": 1.8060554973560603e-05, + "loss": 1.4551, + "step": 7682 + }, + { + "epoch": 0.225585765458923, + "grad_norm": 0.0, + "learning_rate": 1.805999212077418e-05, + "loss": 1.3945, + "step": 7683 + }, + { + "epoch": 0.22561512713606202, + "grad_norm": 0.0, + "learning_rate": 1.805942919509918e-05, + "loss": 1.5215, + "step": 7684 + }, + { + "epoch": 0.225644488813201, + "grad_norm": 0.0, + "learning_rate": 1.8058866196540698e-05, + "loss": 1.3506, + "step": 7685 + }, + { + "epoch": 0.22567385049034, + "grad_norm": 0.0, + "learning_rate": 1.8058303125103823e-05, + "loss": 1.4404, + "step": 7686 + }, + { + "epoch": 0.22570321216747902, + "grad_norm": 0.0, + "learning_rate": 1.8057739980793654e-05, + "loss": 1.4141, + "step": 7687 + }, + { + "epoch": 0.225732573844618, + "grad_norm": 0.0, + "learning_rate": 1.8057176763615277e-05, + "loss": 1.3398, + "step": 7688 + }, + { + "epoch": 0.225761935521757, + "grad_norm": 0.0, + "learning_rate": 1.8056613473573788e-05, + "loss": 1.5371, + "step": 7689 + }, + { + "epoch": 0.225791297198896, + "grad_norm": 0.0, + "learning_rate": 1.805605011067428e-05, + "loss": 1.2959, + "step": 7690 + }, + { + "epoch": 0.225820658876035, + "grad_norm": 0.0, + "learning_rate": 1.8055486674921844e-05, + "loss": 1.501, + "step": 7691 + }, + { + "epoch": 0.225850020553174, + "grad_norm": 0.0, + "learning_rate": 1.8054923166321582e-05, + "loss": 1.311, + "step": 7692 + }, + { + "epoch": 0.225879382230313, + "grad_norm": 0.0, + "learning_rate": 1.8054359584878587e-05, + "loss": 1.3809, + "step": 7693 + }, + { + "epoch": 0.225908743907452, + "grad_norm": 0.0, + "learning_rate": 1.8053795930597955e-05, + "loss": 1.4141, + "step": 7694 + }, + { + "epoch": 0.225938105584591, + "grad_norm": 0.0, + "learning_rate": 1.8053232203484784e-05, + "loss": 1.3555, + "step": 7695 + }, + { + "epoch": 0.22596746726172998, + "grad_norm": 0.0, + "learning_rate": 1.805266840354417e-05, + "loss": 1.3887, + "step": 7696 + }, + { + "epoch": 0.225996828938869, + "grad_norm": 0.0, + "learning_rate": 1.8052104530781216e-05, + "loss": 1.4199, + "step": 7697 + }, + { + "epoch": 0.22602619061600798, + "grad_norm": 0.0, + "learning_rate": 1.8051540585201014e-05, + "loss": 1.4961, + "step": 7698 + }, + { + "epoch": 0.22605555229314697, + "grad_norm": 0.0, + "learning_rate": 1.8050976566808673e-05, + "loss": 1.5293, + "step": 7699 + }, + { + "epoch": 0.226084913970286, + "grad_norm": 0.0, + "learning_rate": 1.805041247560929e-05, + "loss": 1.4023, + "step": 7700 + }, + { + "epoch": 0.22611427564742498, + "grad_norm": 0.0, + "learning_rate": 1.804984831160796e-05, + "loss": 1.4287, + "step": 7701 + }, + { + "epoch": 0.22614363732456397, + "grad_norm": 0.0, + "learning_rate": 1.804928407480979e-05, + "loss": 1.4746, + "step": 7702 + }, + { + "epoch": 0.22617299900170298, + "grad_norm": 0.0, + "learning_rate": 1.8048719765219892e-05, + "loss": 1.4043, + "step": 7703 + }, + { + "epoch": 0.22620236067884197, + "grad_norm": 0.0, + "learning_rate": 1.8048155382843352e-05, + "loss": 1.4707, + "step": 7704 + }, + { + "epoch": 0.22623172235598096, + "grad_norm": 0.0, + "learning_rate": 1.8047590927685282e-05, + "loss": 1.4082, + "step": 7705 + }, + { + "epoch": 0.22626108403311998, + "grad_norm": 0.0, + "learning_rate": 1.8047026399750785e-05, + "loss": 1.4014, + "step": 7706 + }, + { + "epoch": 0.22629044571025897, + "grad_norm": 0.0, + "learning_rate": 1.804646179904497e-05, + "loss": 1.5195, + "step": 7707 + }, + { + "epoch": 0.22631980738739796, + "grad_norm": 0.0, + "learning_rate": 1.804589712557294e-05, + "loss": 1.4033, + "step": 7708 + }, + { + "epoch": 0.22634916906453698, + "grad_norm": 0.0, + "learning_rate": 1.8045332379339805e-05, + "loss": 1.3047, + "step": 7709 + }, + { + "epoch": 0.22637853074167597, + "grad_norm": 0.0, + "learning_rate": 1.8044767560350665e-05, + "loss": 1.4175, + "step": 7710 + }, + { + "epoch": 0.22640789241881495, + "grad_norm": 0.0, + "learning_rate": 1.8044202668610632e-05, + "loss": 1.415, + "step": 7711 + }, + { + "epoch": 0.22643725409595397, + "grad_norm": 0.0, + "learning_rate": 1.8043637704124814e-05, + "loss": 1.4014, + "step": 7712 + }, + { + "epoch": 0.22646661577309296, + "grad_norm": 0.0, + "learning_rate": 1.8043072666898323e-05, + "loss": 1.4282, + "step": 7713 + }, + { + "epoch": 0.22649597745023195, + "grad_norm": 0.0, + "learning_rate": 1.8042507556936263e-05, + "loss": 1.4219, + "step": 7714 + }, + { + "epoch": 0.22652533912737097, + "grad_norm": 0.0, + "learning_rate": 1.804194237424375e-05, + "loss": 1.3477, + "step": 7715 + }, + { + "epoch": 0.22655470080450996, + "grad_norm": 0.0, + "learning_rate": 1.804137711882589e-05, + "loss": 1.4609, + "step": 7716 + }, + { + "epoch": 0.22658406248164895, + "grad_norm": 0.0, + "learning_rate": 1.8040811790687796e-05, + "loss": 1.3965, + "step": 7717 + }, + { + "epoch": 0.22661342415878796, + "grad_norm": 0.0, + "learning_rate": 1.8040246389834584e-05, + "loss": 1.293, + "step": 7718 + }, + { + "epoch": 0.22664278583592695, + "grad_norm": 0.0, + "learning_rate": 1.8039680916271367e-05, + "loss": 1.4658, + "step": 7719 + }, + { + "epoch": 0.22667214751306594, + "grad_norm": 0.0, + "learning_rate": 1.8039115370003252e-05, + "loss": 1.4609, + "step": 7720 + }, + { + "epoch": 0.22670150919020493, + "grad_norm": 0.0, + "learning_rate": 1.8038549751035357e-05, + "loss": 1.3633, + "step": 7721 + }, + { + "epoch": 0.22673087086734395, + "grad_norm": 0.0, + "learning_rate": 1.80379840593728e-05, + "loss": 1.376, + "step": 7722 + }, + { + "epoch": 0.22676023254448294, + "grad_norm": 0.0, + "learning_rate": 1.8037418295020695e-05, + "loss": 1.4717, + "step": 7723 + }, + { + "epoch": 0.22678959422162193, + "grad_norm": 0.0, + "learning_rate": 1.803685245798416e-05, + "loss": 1.4961, + "step": 7724 + }, + { + "epoch": 0.22681895589876094, + "grad_norm": 0.0, + "learning_rate": 1.803628654826831e-05, + "loss": 1.2725, + "step": 7725 + }, + { + "epoch": 0.22684831757589993, + "grad_norm": 0.0, + "learning_rate": 1.803572056587826e-05, + "loss": 1.4248, + "step": 7726 + }, + { + "epoch": 0.22687767925303892, + "grad_norm": 0.0, + "learning_rate": 1.803515451081913e-05, + "loss": 1.4883, + "step": 7727 + }, + { + "epoch": 0.22690704093017794, + "grad_norm": 0.0, + "learning_rate": 1.803458838309604e-05, + "loss": 1.4004, + "step": 7728 + }, + { + "epoch": 0.22693640260731693, + "grad_norm": 0.0, + "learning_rate": 1.8034022182714115e-05, + "loss": 1.2764, + "step": 7729 + }, + { + "epoch": 0.22696576428445592, + "grad_norm": 0.0, + "learning_rate": 1.8033455909678467e-05, + "loss": 1.5576, + "step": 7730 + }, + { + "epoch": 0.22699512596159493, + "grad_norm": 0.0, + "learning_rate": 1.8032889563994216e-05, + "loss": 1.4854, + "step": 7731 + }, + { + "epoch": 0.22702448763873392, + "grad_norm": 0.0, + "learning_rate": 1.8032323145666492e-05, + "loss": 1.3057, + "step": 7732 + }, + { + "epoch": 0.2270538493158729, + "grad_norm": 0.0, + "learning_rate": 1.803175665470041e-05, + "loss": 1.3311, + "step": 7733 + }, + { + "epoch": 0.22708321099301193, + "grad_norm": 0.0, + "learning_rate": 1.8031190091101098e-05, + "loss": 1.4961, + "step": 7734 + }, + { + "epoch": 0.22711257267015092, + "grad_norm": 0.0, + "learning_rate": 1.8030623454873673e-05, + "loss": 1.3076, + "step": 7735 + }, + { + "epoch": 0.2271419343472899, + "grad_norm": 0.0, + "learning_rate": 1.8030056746023267e-05, + "loss": 1.3936, + "step": 7736 + }, + { + "epoch": 0.22717129602442893, + "grad_norm": 0.0, + "learning_rate": 1.8029489964555002e-05, + "loss": 1.3057, + "step": 7737 + }, + { + "epoch": 0.22720065770156792, + "grad_norm": 0.0, + "learning_rate": 1.8028923110474e-05, + "loss": 1.4521, + "step": 7738 + }, + { + "epoch": 0.2272300193787069, + "grad_norm": 0.0, + "learning_rate": 1.8028356183785392e-05, + "loss": 1.4336, + "step": 7739 + }, + { + "epoch": 0.22725938105584592, + "grad_norm": 0.0, + "learning_rate": 1.8027789184494303e-05, + "loss": 1.5264, + "step": 7740 + }, + { + "epoch": 0.2272887427329849, + "grad_norm": 0.0, + "learning_rate": 1.8027222112605857e-05, + "loss": 1.4834, + "step": 7741 + }, + { + "epoch": 0.2273181044101239, + "grad_norm": 0.0, + "learning_rate": 1.802665496812519e-05, + "loss": 1.3066, + "step": 7742 + }, + { + "epoch": 0.22734746608726292, + "grad_norm": 0.0, + "learning_rate": 1.8026087751057427e-05, + "loss": 1.3359, + "step": 7743 + }, + { + "epoch": 0.2273768277644019, + "grad_norm": 0.0, + "learning_rate": 1.8025520461407693e-05, + "loss": 1.4844, + "step": 7744 + }, + { + "epoch": 0.2274061894415409, + "grad_norm": 0.0, + "learning_rate": 1.8024953099181123e-05, + "loss": 1.4375, + "step": 7745 + }, + { + "epoch": 0.2274355511186799, + "grad_norm": 0.0, + "learning_rate": 1.8024385664382847e-05, + "loss": 1.3838, + "step": 7746 + }, + { + "epoch": 0.2274649127958189, + "grad_norm": 0.0, + "learning_rate": 1.8023818157018e-05, + "loss": 1.4453, + "step": 7747 + }, + { + "epoch": 0.2274942744729579, + "grad_norm": 0.0, + "learning_rate": 1.8023250577091706e-05, + "loss": 1.4004, + "step": 7748 + }, + { + "epoch": 0.22752363615009688, + "grad_norm": 0.0, + "learning_rate": 1.8022682924609104e-05, + "loss": 1.3164, + "step": 7749 + }, + { + "epoch": 0.2275529978272359, + "grad_norm": 0.0, + "learning_rate": 1.8022115199575324e-05, + "loss": 1.3955, + "step": 7750 + }, + { + "epoch": 0.2275823595043749, + "grad_norm": 0.0, + "learning_rate": 1.8021547401995506e-05, + "loss": 1.4609, + "step": 7751 + }, + { + "epoch": 0.22761172118151388, + "grad_norm": 0.0, + "learning_rate": 1.8020979531874778e-05, + "loss": 1.3101, + "step": 7752 + }, + { + "epoch": 0.2276410828586529, + "grad_norm": 0.0, + "learning_rate": 1.8020411589218278e-05, + "loss": 1.416, + "step": 7753 + }, + { + "epoch": 0.22767044453579188, + "grad_norm": 0.0, + "learning_rate": 1.801984357403114e-05, + "loss": 1.4082, + "step": 7754 + }, + { + "epoch": 0.22769980621293087, + "grad_norm": 0.0, + "learning_rate": 1.8019275486318505e-05, + "loss": 1.5146, + "step": 7755 + }, + { + "epoch": 0.2277291678900699, + "grad_norm": 0.0, + "learning_rate": 1.8018707326085506e-05, + "loss": 1.4453, + "step": 7756 + }, + { + "epoch": 0.22775852956720888, + "grad_norm": 0.0, + "learning_rate": 1.8018139093337287e-05, + "loss": 1.4541, + "step": 7757 + }, + { + "epoch": 0.22778789124434787, + "grad_norm": 0.0, + "learning_rate": 1.801757078807898e-05, + "loss": 1.4424, + "step": 7758 + }, + { + "epoch": 0.22781725292148688, + "grad_norm": 0.0, + "learning_rate": 1.8017002410315725e-05, + "loss": 1.3145, + "step": 7759 + }, + { + "epoch": 0.22784661459862587, + "grad_norm": 0.0, + "learning_rate": 1.8016433960052665e-05, + "loss": 1.4072, + "step": 7760 + }, + { + "epoch": 0.22787597627576486, + "grad_norm": 0.0, + "learning_rate": 1.8015865437294944e-05, + "loss": 1.4033, + "step": 7761 + }, + { + "epoch": 0.22790533795290388, + "grad_norm": 0.0, + "learning_rate": 1.8015296842047693e-05, + "loss": 1.4268, + "step": 7762 + }, + { + "epoch": 0.22793469963004287, + "grad_norm": 0.0, + "learning_rate": 1.8014728174316066e-05, + "loss": 1.3936, + "step": 7763 + }, + { + "epoch": 0.22796406130718186, + "grad_norm": 0.0, + "learning_rate": 1.8014159434105194e-05, + "loss": 1.2871, + "step": 7764 + }, + { + "epoch": 0.22799342298432088, + "grad_norm": 0.0, + "learning_rate": 1.801359062142023e-05, + "loss": 1.4062, + "step": 7765 + }, + { + "epoch": 0.22802278466145987, + "grad_norm": 0.0, + "learning_rate": 1.801302173626631e-05, + "loss": 1.4824, + "step": 7766 + }, + { + "epoch": 0.22805214633859885, + "grad_norm": 0.0, + "learning_rate": 1.8012452778648585e-05, + "loss": 1.4082, + "step": 7767 + }, + { + "epoch": 0.22808150801573787, + "grad_norm": 0.0, + "learning_rate": 1.8011883748572194e-05, + "loss": 1.4526, + "step": 7768 + }, + { + "epoch": 0.22811086969287686, + "grad_norm": 0.0, + "learning_rate": 1.801131464604229e-05, + "loss": 1.502, + "step": 7769 + }, + { + "epoch": 0.22814023137001585, + "grad_norm": 0.0, + "learning_rate": 1.8010745471064014e-05, + "loss": 1.4395, + "step": 7770 + }, + { + "epoch": 0.22816959304715487, + "grad_norm": 0.0, + "learning_rate": 1.8010176223642516e-05, + "loss": 1.3237, + "step": 7771 + }, + { + "epoch": 0.22819895472429386, + "grad_norm": 0.0, + "learning_rate": 1.8009606903782943e-05, + "loss": 1.3887, + "step": 7772 + }, + { + "epoch": 0.22822831640143285, + "grad_norm": 0.0, + "learning_rate": 1.8009037511490445e-05, + "loss": 1.4775, + "step": 7773 + }, + { + "epoch": 0.22825767807857184, + "grad_norm": 0.0, + "learning_rate": 1.8008468046770166e-05, + "loss": 1.3984, + "step": 7774 + }, + { + "epoch": 0.22828703975571085, + "grad_norm": 0.0, + "learning_rate": 1.800789850962726e-05, + "loss": 1.3623, + "step": 7775 + }, + { + "epoch": 0.22831640143284984, + "grad_norm": 0.0, + "learning_rate": 1.800732890006688e-05, + "loss": 1.3955, + "step": 7776 + }, + { + "epoch": 0.22834576310998883, + "grad_norm": 0.0, + "learning_rate": 1.8006759218094175e-05, + "loss": 1.4932, + "step": 7777 + }, + { + "epoch": 0.22837512478712785, + "grad_norm": 0.0, + "learning_rate": 1.800618946371429e-05, + "loss": 1.4697, + "step": 7778 + }, + { + "epoch": 0.22840448646426684, + "grad_norm": 0.0, + "learning_rate": 1.800561963693239e-05, + "loss": 1.4072, + "step": 7779 + }, + { + "epoch": 0.22843384814140583, + "grad_norm": 0.0, + "learning_rate": 1.8005049737753614e-05, + "loss": 1.3818, + "step": 7780 + }, + { + "epoch": 0.22846320981854484, + "grad_norm": 0.0, + "learning_rate": 1.8004479766183127e-05, + "loss": 1.502, + "step": 7781 + }, + { + "epoch": 0.22849257149568383, + "grad_norm": 0.0, + "learning_rate": 1.800390972222608e-05, + "loss": 1.5391, + "step": 7782 + }, + { + "epoch": 0.22852193317282282, + "grad_norm": 0.0, + "learning_rate": 1.800333960588763e-05, + "loss": 1.4082, + "step": 7783 + }, + { + "epoch": 0.22855129484996184, + "grad_norm": 0.0, + "learning_rate": 1.8002769417172924e-05, + "loss": 1.4551, + "step": 7784 + }, + { + "epoch": 0.22858065652710083, + "grad_norm": 0.0, + "learning_rate": 1.8002199156087128e-05, + "loss": 1.2559, + "step": 7785 + }, + { + "epoch": 0.22861001820423982, + "grad_norm": 0.0, + "learning_rate": 1.8001628822635394e-05, + "loss": 1.4756, + "step": 7786 + }, + { + "epoch": 0.22863937988137883, + "grad_norm": 0.0, + "learning_rate": 1.8001058416822885e-05, + "loss": 1.3008, + "step": 7787 + }, + { + "epoch": 0.22866874155851782, + "grad_norm": 0.0, + "learning_rate": 1.8000487938654752e-05, + "loss": 1.3994, + "step": 7788 + }, + { + "epoch": 0.2286981032356568, + "grad_norm": 0.0, + "learning_rate": 1.799991738813616e-05, + "loss": 1.6562, + "step": 7789 + }, + { + "epoch": 0.22872746491279583, + "grad_norm": 0.0, + "learning_rate": 1.7999346765272267e-05, + "loss": 1.293, + "step": 7790 + }, + { + "epoch": 0.22875682658993482, + "grad_norm": 0.0, + "learning_rate": 1.799877607006823e-05, + "loss": 1.3574, + "step": 7791 + }, + { + "epoch": 0.2287861882670738, + "grad_norm": 0.0, + "learning_rate": 1.799820530252921e-05, + "loss": 1.5479, + "step": 7792 + }, + { + "epoch": 0.22881554994421283, + "grad_norm": 0.0, + "learning_rate": 1.7997634462660375e-05, + "loss": 1.3584, + "step": 7793 + }, + { + "epoch": 0.22884491162135182, + "grad_norm": 0.0, + "learning_rate": 1.7997063550466888e-05, + "loss": 1.5645, + "step": 7794 + }, + { + "epoch": 0.2288742732984908, + "grad_norm": 0.0, + "learning_rate": 1.79964925659539e-05, + "loss": 1.4023, + "step": 7795 + }, + { + "epoch": 0.22890363497562982, + "grad_norm": 0.0, + "learning_rate": 1.799592150912658e-05, + "loss": 1.4033, + "step": 7796 + }, + { + "epoch": 0.2289329966527688, + "grad_norm": 0.0, + "learning_rate": 1.79953503799901e-05, + "loss": 1.5469, + "step": 7797 + }, + { + "epoch": 0.2289623583299078, + "grad_norm": 0.0, + "learning_rate": 1.7994779178549617e-05, + "loss": 1.2734, + "step": 7798 + }, + { + "epoch": 0.2289917200070468, + "grad_norm": 0.0, + "learning_rate": 1.7994207904810298e-05, + "loss": 1.3008, + "step": 7799 + }, + { + "epoch": 0.2290210816841858, + "grad_norm": 0.0, + "learning_rate": 1.799363655877731e-05, + "loss": 1.4102, + "step": 7800 + }, + { + "epoch": 0.2290504433613248, + "grad_norm": 0.0, + "learning_rate": 1.799306514045582e-05, + "loss": 1.3965, + "step": 7801 + }, + { + "epoch": 0.22907980503846379, + "grad_norm": 0.0, + "learning_rate": 1.7992493649850994e-05, + "loss": 1.4131, + "step": 7802 + }, + { + "epoch": 0.2291091667156028, + "grad_norm": 0.0, + "learning_rate": 1.7991922086968e-05, + "loss": 1.3975, + "step": 7803 + }, + { + "epoch": 0.2291385283927418, + "grad_norm": 0.0, + "learning_rate": 1.7991350451812007e-05, + "loss": 1.3672, + "step": 7804 + }, + { + "epoch": 0.22916789006988078, + "grad_norm": 0.0, + "learning_rate": 1.7990778744388187e-05, + "loss": 1.418, + "step": 7805 + }, + { + "epoch": 0.2291972517470198, + "grad_norm": 0.0, + "learning_rate": 1.799020696470171e-05, + "loss": 1.4502, + "step": 7806 + }, + { + "epoch": 0.2292266134241588, + "grad_norm": 0.0, + "learning_rate": 1.7989635112757745e-05, + "loss": 1.2793, + "step": 7807 + }, + { + "epoch": 0.22925597510129778, + "grad_norm": 0.0, + "learning_rate": 1.798906318856146e-05, + "loss": 1.3633, + "step": 7808 + }, + { + "epoch": 0.2292853367784368, + "grad_norm": 0.0, + "learning_rate": 1.798849119211803e-05, + "loss": 1.3662, + "step": 7809 + }, + { + "epoch": 0.22931469845557578, + "grad_norm": 0.0, + "learning_rate": 1.7987919123432632e-05, + "loss": 1.4463, + "step": 7810 + }, + { + "epoch": 0.22934406013271477, + "grad_norm": 0.0, + "learning_rate": 1.7987346982510435e-05, + "loss": 1.3398, + "step": 7811 + }, + { + "epoch": 0.2293734218098538, + "grad_norm": 0.0, + "learning_rate": 1.7986774769356615e-05, + "loss": 1.3408, + "step": 7812 + }, + { + "epoch": 0.22940278348699278, + "grad_norm": 0.0, + "learning_rate": 1.7986202483976343e-05, + "loss": 1.3945, + "step": 7813 + }, + { + "epoch": 0.22943214516413177, + "grad_norm": 0.0, + "learning_rate": 1.7985630126374796e-05, + "loss": 1.4248, + "step": 7814 + }, + { + "epoch": 0.22946150684127078, + "grad_norm": 0.0, + "learning_rate": 1.7985057696557152e-05, + "loss": 1.501, + "step": 7815 + }, + { + "epoch": 0.22949086851840977, + "grad_norm": 0.0, + "learning_rate": 1.7984485194528583e-05, + "loss": 1.4756, + "step": 7816 + }, + { + "epoch": 0.22952023019554876, + "grad_norm": 0.0, + "learning_rate": 1.7983912620294274e-05, + "loss": 1.3975, + "step": 7817 + }, + { + "epoch": 0.22954959187268778, + "grad_norm": 0.0, + "learning_rate": 1.7983339973859398e-05, + "loss": 1.3945, + "step": 7818 + }, + { + "epoch": 0.22957895354982677, + "grad_norm": 0.0, + "learning_rate": 1.7982767255229133e-05, + "loss": 1.4531, + "step": 7819 + }, + { + "epoch": 0.22960831522696576, + "grad_norm": 0.0, + "learning_rate": 1.798219446440866e-05, + "loss": 1.4297, + "step": 7820 + }, + { + "epoch": 0.22963767690410478, + "grad_norm": 0.0, + "learning_rate": 1.7981621601403155e-05, + "loss": 1.3838, + "step": 7821 + }, + { + "epoch": 0.22966703858124377, + "grad_norm": 0.0, + "learning_rate": 1.7981048666217807e-05, + "loss": 1.4658, + "step": 7822 + }, + { + "epoch": 0.22969640025838275, + "grad_norm": 0.0, + "learning_rate": 1.7980475658857788e-05, + "loss": 1.4824, + "step": 7823 + }, + { + "epoch": 0.22972576193552174, + "grad_norm": 0.0, + "learning_rate": 1.7979902579328282e-05, + "loss": 1.3301, + "step": 7824 + }, + { + "epoch": 0.22975512361266076, + "grad_norm": 0.0, + "learning_rate": 1.797932942763448e-05, + "loss": 1.4121, + "step": 7825 + }, + { + "epoch": 0.22978448528979975, + "grad_norm": 0.0, + "learning_rate": 1.7978756203781552e-05, + "loss": 1.4482, + "step": 7826 + }, + { + "epoch": 0.22981384696693874, + "grad_norm": 0.0, + "learning_rate": 1.797818290777469e-05, + "loss": 1.3525, + "step": 7827 + }, + { + "epoch": 0.22984320864407776, + "grad_norm": 0.0, + "learning_rate": 1.797760953961908e-05, + "loss": 1.293, + "step": 7828 + }, + { + "epoch": 0.22987257032121675, + "grad_norm": 0.0, + "learning_rate": 1.7977036099319898e-05, + "loss": 1.4873, + "step": 7829 + }, + { + "epoch": 0.22990193199835574, + "grad_norm": 0.0, + "learning_rate": 1.797646258688234e-05, + "loss": 1.4023, + "step": 7830 + }, + { + "epoch": 0.22993129367549475, + "grad_norm": 0.0, + "learning_rate": 1.797588900231159e-05, + "loss": 1.4717, + "step": 7831 + }, + { + "epoch": 0.22996065535263374, + "grad_norm": 0.0, + "learning_rate": 1.7975315345612826e-05, + "loss": 1.4141, + "step": 7832 + }, + { + "epoch": 0.22999001702977273, + "grad_norm": 0.0, + "learning_rate": 1.7974741616791247e-05, + "loss": 1.5742, + "step": 7833 + }, + { + "epoch": 0.23001937870691175, + "grad_norm": 0.0, + "learning_rate": 1.797416781585204e-05, + "loss": 1.4385, + "step": 7834 + }, + { + "epoch": 0.23004874038405074, + "grad_norm": 0.0, + "learning_rate": 1.7973593942800387e-05, + "loss": 1.3047, + "step": 7835 + }, + { + "epoch": 0.23007810206118973, + "grad_norm": 0.0, + "learning_rate": 1.7973019997641484e-05, + "loss": 1.4961, + "step": 7836 + }, + { + "epoch": 0.23010746373832874, + "grad_norm": 0.0, + "learning_rate": 1.7972445980380518e-05, + "loss": 1.5322, + "step": 7837 + }, + { + "epoch": 0.23013682541546773, + "grad_norm": 0.0, + "learning_rate": 1.7971871891022682e-05, + "loss": 1.5342, + "step": 7838 + }, + { + "epoch": 0.23016618709260672, + "grad_norm": 0.0, + "learning_rate": 1.7971297729573164e-05, + "loss": 1.5996, + "step": 7839 + }, + { + "epoch": 0.23019554876974574, + "grad_norm": 0.0, + "learning_rate": 1.7970723496037165e-05, + "loss": 1.4043, + "step": 7840 + }, + { + "epoch": 0.23022491044688473, + "grad_norm": 0.0, + "learning_rate": 1.7970149190419864e-05, + "loss": 1.4639, + "step": 7841 + }, + { + "epoch": 0.23025427212402372, + "grad_norm": 0.0, + "learning_rate": 1.796957481272647e-05, + "loss": 1.2705, + "step": 7842 + }, + { + "epoch": 0.23028363380116273, + "grad_norm": 0.0, + "learning_rate": 1.7969000362962166e-05, + "loss": 1.6182, + "step": 7843 + }, + { + "epoch": 0.23031299547830172, + "grad_norm": 0.0, + "learning_rate": 1.7968425841132154e-05, + "loss": 1.5176, + "step": 7844 + }, + { + "epoch": 0.2303423571554407, + "grad_norm": 0.0, + "learning_rate": 1.7967851247241627e-05, + "loss": 1.3994, + "step": 7845 + }, + { + "epoch": 0.23037171883257973, + "grad_norm": 0.0, + "learning_rate": 1.7967276581295775e-05, + "loss": 1.5283, + "step": 7846 + }, + { + "epoch": 0.23040108050971872, + "grad_norm": 0.0, + "learning_rate": 1.7966701843299807e-05, + "loss": 1.498, + "step": 7847 + }, + { + "epoch": 0.2304304421868577, + "grad_norm": 0.0, + "learning_rate": 1.7966127033258906e-05, + "loss": 1.5225, + "step": 7848 + }, + { + "epoch": 0.2304598038639967, + "grad_norm": 0.0, + "learning_rate": 1.7965552151178286e-05, + "loss": 1.4473, + "step": 7849 + }, + { + "epoch": 0.23048916554113572, + "grad_norm": 0.0, + "learning_rate": 1.7964977197063132e-05, + "loss": 1.5547, + "step": 7850 + }, + { + "epoch": 0.2305185272182747, + "grad_norm": 0.0, + "learning_rate": 1.7964402170918654e-05, + "loss": 1.541, + "step": 7851 + }, + { + "epoch": 0.2305478888954137, + "grad_norm": 0.0, + "learning_rate": 1.7963827072750043e-05, + "loss": 1.5215, + "step": 7852 + }, + { + "epoch": 0.2305772505725527, + "grad_norm": 0.0, + "learning_rate": 1.796325190256251e-05, + "loss": 1.3193, + "step": 7853 + }, + { + "epoch": 0.2306066122496917, + "grad_norm": 0.0, + "learning_rate": 1.7962676660361246e-05, + "loss": 1.3848, + "step": 7854 + }, + { + "epoch": 0.2306359739268307, + "grad_norm": 0.0, + "learning_rate": 1.7962101346151457e-05, + "loss": 1.4658, + "step": 7855 + }, + { + "epoch": 0.2306653356039697, + "grad_norm": 0.0, + "learning_rate": 1.796152595993835e-05, + "loss": 1.4141, + "step": 7856 + }, + { + "epoch": 0.2306946972811087, + "grad_norm": 0.0, + "learning_rate": 1.796095050172712e-05, + "loss": 1.4795, + "step": 7857 + }, + { + "epoch": 0.23072405895824769, + "grad_norm": 0.0, + "learning_rate": 1.796037497152298e-05, + "loss": 1.4365, + "step": 7858 + }, + { + "epoch": 0.2307534206353867, + "grad_norm": 0.0, + "learning_rate": 1.795979936933113e-05, + "loss": 1.4102, + "step": 7859 + }, + { + "epoch": 0.2307827823125257, + "grad_norm": 0.0, + "learning_rate": 1.7959223695156774e-05, + "loss": 1.3984, + "step": 7860 + }, + { + "epoch": 0.23081214398966468, + "grad_norm": 0.0, + "learning_rate": 1.7958647949005123e-05, + "loss": 1.4121, + "step": 7861 + }, + { + "epoch": 0.2308415056668037, + "grad_norm": 0.0, + "learning_rate": 1.7958072130881376e-05, + "loss": 1.582, + "step": 7862 + }, + { + "epoch": 0.2308708673439427, + "grad_norm": 0.0, + "learning_rate": 1.795749624079075e-05, + "loss": 1.4971, + "step": 7863 + }, + { + "epoch": 0.23090022902108168, + "grad_norm": 0.0, + "learning_rate": 1.795692027873844e-05, + "loss": 1.4375, + "step": 7864 + }, + { + "epoch": 0.2309295906982207, + "grad_norm": 0.0, + "learning_rate": 1.795634424472967e-05, + "loss": 1.3506, + "step": 7865 + }, + { + "epoch": 0.23095895237535968, + "grad_norm": 0.0, + "learning_rate": 1.7955768138769636e-05, + "loss": 1.3643, + "step": 7866 + }, + { + "epoch": 0.23098831405249867, + "grad_norm": 0.0, + "learning_rate": 1.7955191960863553e-05, + "loss": 1.4482, + "step": 7867 + }, + { + "epoch": 0.2310176757296377, + "grad_norm": 0.0, + "learning_rate": 1.7954615711016634e-05, + "loss": 1.4209, + "step": 7868 + }, + { + "epoch": 0.23104703740677668, + "grad_norm": 0.0, + "learning_rate": 1.7954039389234087e-05, + "loss": 1.3799, + "step": 7869 + }, + { + "epoch": 0.23107639908391567, + "grad_norm": 0.0, + "learning_rate": 1.7953462995521126e-05, + "loss": 1.374, + "step": 7870 + }, + { + "epoch": 0.23110576076105468, + "grad_norm": 0.0, + "learning_rate": 1.7952886529882962e-05, + "loss": 1.3809, + "step": 7871 + }, + { + "epoch": 0.23113512243819367, + "grad_norm": 0.0, + "learning_rate": 1.795230999232481e-05, + "loss": 1.4131, + "step": 7872 + }, + { + "epoch": 0.23116448411533266, + "grad_norm": 0.0, + "learning_rate": 1.795173338285188e-05, + "loss": 1.4346, + "step": 7873 + }, + { + "epoch": 0.23119384579247165, + "grad_norm": 0.0, + "learning_rate": 1.7951156701469388e-05, + "loss": 1.3491, + "step": 7874 + }, + { + "epoch": 0.23122320746961067, + "grad_norm": 0.0, + "learning_rate": 1.795057994818255e-05, + "loss": 1.6016, + "step": 7875 + }, + { + "epoch": 0.23125256914674966, + "grad_norm": 0.0, + "learning_rate": 1.795000312299658e-05, + "loss": 1.4395, + "step": 7876 + }, + { + "epoch": 0.23128193082388865, + "grad_norm": 0.0, + "learning_rate": 1.79494262259167e-05, + "loss": 1.6221, + "step": 7877 + }, + { + "epoch": 0.23131129250102767, + "grad_norm": 0.0, + "learning_rate": 1.7948849256948116e-05, + "loss": 1.3779, + "step": 7878 + }, + { + "epoch": 0.23134065417816665, + "grad_norm": 0.0, + "learning_rate": 1.794827221609606e-05, + "loss": 1.4424, + "step": 7879 + }, + { + "epoch": 0.23137001585530564, + "grad_norm": 0.0, + "learning_rate": 1.794769510336574e-05, + "loss": 1.2832, + "step": 7880 + }, + { + "epoch": 0.23139937753244466, + "grad_norm": 0.0, + "learning_rate": 1.7947117918762377e-05, + "loss": 1.7412, + "step": 7881 + }, + { + "epoch": 0.23142873920958365, + "grad_norm": 0.0, + "learning_rate": 1.7946540662291193e-05, + "loss": 1.3838, + "step": 7882 + }, + { + "epoch": 0.23145810088672264, + "grad_norm": 0.0, + "learning_rate": 1.7945963333957403e-05, + "loss": 1.4805, + "step": 7883 + }, + { + "epoch": 0.23148746256386166, + "grad_norm": 0.0, + "learning_rate": 1.7945385933766233e-05, + "loss": 1.5371, + "step": 7884 + }, + { + "epoch": 0.23151682424100065, + "grad_norm": 0.0, + "learning_rate": 1.79448084617229e-05, + "loss": 1.4111, + "step": 7885 + }, + { + "epoch": 0.23154618591813964, + "grad_norm": 0.0, + "learning_rate": 1.7944230917832635e-05, + "loss": 1.3105, + "step": 7886 + }, + { + "epoch": 0.23157554759527865, + "grad_norm": 0.0, + "learning_rate": 1.7943653302100657e-05, + "loss": 1.334, + "step": 7887 + }, + { + "epoch": 0.23160490927241764, + "grad_norm": 0.0, + "learning_rate": 1.794307561453218e-05, + "loss": 1.4668, + "step": 7888 + }, + { + "epoch": 0.23163427094955663, + "grad_norm": 0.0, + "learning_rate": 1.794249785513244e-05, + "loss": 1.4902, + "step": 7889 + }, + { + "epoch": 0.23166363262669565, + "grad_norm": 0.0, + "learning_rate": 1.794192002390666e-05, + "loss": 1.4521, + "step": 7890 + }, + { + "epoch": 0.23169299430383464, + "grad_norm": 0.0, + "learning_rate": 1.794134212086006e-05, + "loss": 1.5479, + "step": 7891 + }, + { + "epoch": 0.23172235598097363, + "grad_norm": 0.0, + "learning_rate": 1.7940764145997873e-05, + "loss": 1.3311, + "step": 7892 + }, + { + "epoch": 0.23175171765811264, + "grad_norm": 0.0, + "learning_rate": 1.794018609932532e-05, + "loss": 1.4072, + "step": 7893 + }, + { + "epoch": 0.23178107933525163, + "grad_norm": 0.0, + "learning_rate": 1.793960798084763e-05, + "loss": 1.4248, + "step": 7894 + }, + { + "epoch": 0.23181044101239062, + "grad_norm": 0.0, + "learning_rate": 1.7939029790570034e-05, + "loss": 1.4189, + "step": 7895 + }, + { + "epoch": 0.23183980268952964, + "grad_norm": 0.0, + "learning_rate": 1.7938451528497754e-05, + "loss": 1.4316, + "step": 7896 + }, + { + "epoch": 0.23186916436666863, + "grad_norm": 0.0, + "learning_rate": 1.7937873194636027e-05, + "loss": 1.3076, + "step": 7897 + }, + { + "epoch": 0.23189852604380762, + "grad_norm": 0.0, + "learning_rate": 1.7937294788990078e-05, + "loss": 1.5791, + "step": 7898 + }, + { + "epoch": 0.2319278877209466, + "grad_norm": 0.0, + "learning_rate": 1.793671631156514e-05, + "loss": 1.4258, + "step": 7899 + }, + { + "epoch": 0.23195724939808562, + "grad_norm": 0.0, + "learning_rate": 1.7936137762366445e-05, + "loss": 1.2837, + "step": 7900 + }, + { + "epoch": 0.2319866110752246, + "grad_norm": 0.0, + "learning_rate": 1.793555914139922e-05, + "loss": 1.6699, + "step": 7901 + }, + { + "epoch": 0.2320159727523636, + "grad_norm": 0.0, + "learning_rate": 1.793498044866871e-05, + "loss": 1.5234, + "step": 7902 + }, + { + "epoch": 0.23204533442950262, + "grad_norm": 0.0, + "learning_rate": 1.7934401684180128e-05, + "loss": 1.3574, + "step": 7903 + }, + { + "epoch": 0.2320746961066416, + "grad_norm": 0.0, + "learning_rate": 1.7933822847938728e-05, + "loss": 1.4775, + "step": 7904 + }, + { + "epoch": 0.2321040577837806, + "grad_norm": 0.0, + "learning_rate": 1.7933243939949735e-05, + "loss": 1.3184, + "step": 7905 + }, + { + "epoch": 0.23213341946091962, + "grad_norm": 0.0, + "learning_rate": 1.7932664960218386e-05, + "loss": 1.457, + "step": 7906 + }, + { + "epoch": 0.2321627811380586, + "grad_norm": 0.0, + "learning_rate": 1.7932085908749912e-05, + "loss": 1.4639, + "step": 7907 + }, + { + "epoch": 0.2321921428151976, + "grad_norm": 0.0, + "learning_rate": 1.7931506785549556e-05, + "loss": 1.4209, + "step": 7908 + }, + { + "epoch": 0.2322215044923366, + "grad_norm": 0.0, + "learning_rate": 1.793092759062255e-05, + "loss": 1.5186, + "step": 7909 + }, + { + "epoch": 0.2322508661694756, + "grad_norm": 0.0, + "learning_rate": 1.793034832397414e-05, + "loss": 1.3535, + "step": 7910 + }, + { + "epoch": 0.2322802278466146, + "grad_norm": 0.0, + "learning_rate": 1.7929768985609555e-05, + "loss": 1.3779, + "step": 7911 + }, + { + "epoch": 0.2323095895237536, + "grad_norm": 0.0, + "learning_rate": 1.792918957553404e-05, + "loss": 1.3691, + "step": 7912 + }, + { + "epoch": 0.2323389512008926, + "grad_norm": 0.0, + "learning_rate": 1.7928610093752833e-05, + "loss": 1.3457, + "step": 7913 + }, + { + "epoch": 0.23236831287803159, + "grad_norm": 0.0, + "learning_rate": 1.7928030540271174e-05, + "loss": 1.3545, + "step": 7914 + }, + { + "epoch": 0.2323976745551706, + "grad_norm": 0.0, + "learning_rate": 1.7927450915094305e-05, + "loss": 1.3291, + "step": 7915 + }, + { + "epoch": 0.2324270362323096, + "grad_norm": 0.0, + "learning_rate": 1.7926871218227465e-05, + "loss": 1.4375, + "step": 7916 + }, + { + "epoch": 0.23245639790944858, + "grad_norm": 0.0, + "learning_rate": 1.79262914496759e-05, + "loss": 1.4385, + "step": 7917 + }, + { + "epoch": 0.2324857595865876, + "grad_norm": 0.0, + "learning_rate": 1.792571160944485e-05, + "loss": 1.4092, + "step": 7918 + }, + { + "epoch": 0.2325151212637266, + "grad_norm": 0.0, + "learning_rate": 1.7925131697539564e-05, + "loss": 1.3984, + "step": 7919 + }, + { + "epoch": 0.23254448294086558, + "grad_norm": 0.0, + "learning_rate": 1.7924551713965278e-05, + "loss": 1.4629, + "step": 7920 + }, + { + "epoch": 0.2325738446180046, + "grad_norm": 0.0, + "learning_rate": 1.7923971658727244e-05, + "loss": 1.4844, + "step": 7921 + }, + { + "epoch": 0.23260320629514358, + "grad_norm": 0.0, + "learning_rate": 1.7923391531830704e-05, + "loss": 1.4717, + "step": 7922 + }, + { + "epoch": 0.23263256797228257, + "grad_norm": 0.0, + "learning_rate": 1.7922811333280906e-05, + "loss": 1.4277, + "step": 7923 + }, + { + "epoch": 0.23266192964942156, + "grad_norm": 0.0, + "learning_rate": 1.7922231063083095e-05, + "loss": 1.502, + "step": 7924 + }, + { + "epoch": 0.23269129132656058, + "grad_norm": 0.0, + "learning_rate": 1.792165072124252e-05, + "loss": 1.2627, + "step": 7925 + }, + { + "epoch": 0.23272065300369957, + "grad_norm": 0.0, + "learning_rate": 1.792107030776443e-05, + "loss": 1.4785, + "step": 7926 + }, + { + "epoch": 0.23275001468083856, + "grad_norm": 0.0, + "learning_rate": 1.792048982265407e-05, + "loss": 1.5293, + "step": 7927 + }, + { + "epoch": 0.23277937635797757, + "grad_norm": 0.0, + "learning_rate": 1.7919909265916696e-05, + "loss": 1.4912, + "step": 7928 + }, + { + "epoch": 0.23280873803511656, + "grad_norm": 0.0, + "learning_rate": 1.791932863755755e-05, + "loss": 1.3906, + "step": 7929 + }, + { + "epoch": 0.23283809971225555, + "grad_norm": 0.0, + "learning_rate": 1.7918747937581888e-05, + "loss": 1.4326, + "step": 7930 + }, + { + "epoch": 0.23286746138939457, + "grad_norm": 0.0, + "learning_rate": 1.791816716599496e-05, + "loss": 1.416, + "step": 7931 + }, + { + "epoch": 0.23289682306653356, + "grad_norm": 0.0, + "learning_rate": 1.791758632280202e-05, + "loss": 1.4424, + "step": 7932 + }, + { + "epoch": 0.23292618474367255, + "grad_norm": 0.0, + "learning_rate": 1.791700540800832e-05, + "loss": 1.4014, + "step": 7933 + }, + { + "epoch": 0.23295554642081157, + "grad_norm": 0.0, + "learning_rate": 1.791642442161911e-05, + "loss": 1.5635, + "step": 7934 + }, + { + "epoch": 0.23298490809795055, + "grad_norm": 0.0, + "learning_rate": 1.7915843363639647e-05, + "loss": 1.3799, + "step": 7935 + }, + { + "epoch": 0.23301426977508954, + "grad_norm": 0.0, + "learning_rate": 1.7915262234075187e-05, + "loss": 1.3574, + "step": 7936 + }, + { + "epoch": 0.23304363145222856, + "grad_norm": 0.0, + "learning_rate": 1.7914681032930982e-05, + "loss": 1.4131, + "step": 7937 + }, + { + "epoch": 0.23307299312936755, + "grad_norm": 0.0, + "learning_rate": 1.791409976021229e-05, + "loss": 1.3496, + "step": 7938 + }, + { + "epoch": 0.23310235480650654, + "grad_norm": 0.0, + "learning_rate": 1.7913518415924365e-05, + "loss": 1.4111, + "step": 7939 + }, + { + "epoch": 0.23313171648364556, + "grad_norm": 0.0, + "learning_rate": 1.791293700007247e-05, + "loss": 1.4912, + "step": 7940 + }, + { + "epoch": 0.23316107816078455, + "grad_norm": 0.0, + "learning_rate": 1.7912355512661855e-05, + "loss": 1.4854, + "step": 7941 + }, + { + "epoch": 0.23319043983792354, + "grad_norm": 0.0, + "learning_rate": 1.7911773953697787e-05, + "loss": 1.3877, + "step": 7942 + }, + { + "epoch": 0.23321980151506255, + "grad_norm": 0.0, + "learning_rate": 1.7911192323185516e-05, + "loss": 1.3096, + "step": 7943 + }, + { + "epoch": 0.23324916319220154, + "grad_norm": 0.0, + "learning_rate": 1.7910610621130312e-05, + "loss": 1.3018, + "step": 7944 + }, + { + "epoch": 0.23327852486934053, + "grad_norm": 0.0, + "learning_rate": 1.791002884753743e-05, + "loss": 1.4707, + "step": 7945 + }, + { + "epoch": 0.23330788654647955, + "grad_norm": 0.0, + "learning_rate": 1.7909447002412126e-05, + "loss": 1.3965, + "step": 7946 + }, + { + "epoch": 0.23333724822361854, + "grad_norm": 0.0, + "learning_rate": 1.7908865085759672e-05, + "loss": 1.3604, + "step": 7947 + }, + { + "epoch": 0.23336660990075753, + "grad_norm": 0.0, + "learning_rate": 1.790828309758532e-05, + "loss": 1.4287, + "step": 7948 + }, + { + "epoch": 0.23339597157789652, + "grad_norm": 0.0, + "learning_rate": 1.7907701037894344e-05, + "loss": 1.4482, + "step": 7949 + }, + { + "epoch": 0.23342533325503553, + "grad_norm": 0.0, + "learning_rate": 1.7907118906692e-05, + "loss": 1.3154, + "step": 7950 + }, + { + "epoch": 0.23345469493217452, + "grad_norm": 0.0, + "learning_rate": 1.790653670398356e-05, + "loss": 1.5596, + "step": 7951 + }, + { + "epoch": 0.2334840566093135, + "grad_norm": 0.0, + "learning_rate": 1.7905954429774274e-05, + "loss": 1.3584, + "step": 7952 + }, + { + "epoch": 0.23351341828645253, + "grad_norm": 0.0, + "learning_rate": 1.7905372084069426e-05, + "loss": 1.5215, + "step": 7953 + }, + { + "epoch": 0.23354277996359152, + "grad_norm": 0.0, + "learning_rate": 1.790478966687427e-05, + "loss": 1.4023, + "step": 7954 + }, + { + "epoch": 0.2335721416407305, + "grad_norm": 0.0, + "learning_rate": 1.7904207178194078e-05, + "loss": 1.4424, + "step": 7955 + }, + { + "epoch": 0.23360150331786952, + "grad_norm": 0.0, + "learning_rate": 1.7903624618034112e-05, + "loss": 1.3994, + "step": 7956 + }, + { + "epoch": 0.2336308649950085, + "grad_norm": 0.0, + "learning_rate": 1.790304198639965e-05, + "loss": 1.4326, + "step": 7957 + }, + { + "epoch": 0.2336602266721475, + "grad_norm": 0.0, + "learning_rate": 1.790245928329595e-05, + "loss": 1.4121, + "step": 7958 + }, + { + "epoch": 0.23368958834928652, + "grad_norm": 0.0, + "learning_rate": 1.790187650872829e-05, + "loss": 1.4385, + "step": 7959 + }, + { + "epoch": 0.2337189500264255, + "grad_norm": 0.0, + "learning_rate": 1.7901293662701937e-05, + "loss": 1.4492, + "step": 7960 + }, + { + "epoch": 0.2337483117035645, + "grad_norm": 0.0, + "learning_rate": 1.7900710745222162e-05, + "loss": 1.4062, + "step": 7961 + }, + { + "epoch": 0.23377767338070352, + "grad_norm": 0.0, + "learning_rate": 1.7900127756294233e-05, + "loss": 1.5332, + "step": 7962 + }, + { + "epoch": 0.2338070350578425, + "grad_norm": 0.0, + "learning_rate": 1.789954469592343e-05, + "loss": 1.4385, + "step": 7963 + }, + { + "epoch": 0.2338363967349815, + "grad_norm": 0.0, + "learning_rate": 1.7898961564115018e-05, + "loss": 1.4961, + "step": 7964 + }, + { + "epoch": 0.2338657584121205, + "grad_norm": 0.0, + "learning_rate": 1.7898378360874275e-05, + "loss": 1.4229, + "step": 7965 + }, + { + "epoch": 0.2338951200892595, + "grad_norm": 0.0, + "learning_rate": 1.7897795086206473e-05, + "loss": 1.4336, + "step": 7966 + }, + { + "epoch": 0.2339244817663985, + "grad_norm": 0.0, + "learning_rate": 1.789721174011689e-05, + "loss": 1.3872, + "step": 7967 + }, + { + "epoch": 0.2339538434435375, + "grad_norm": 0.0, + "learning_rate": 1.7896628322610795e-05, + "loss": 1.3477, + "step": 7968 + }, + { + "epoch": 0.2339832051206765, + "grad_norm": 0.0, + "learning_rate": 1.7896044833693472e-05, + "loss": 1.335, + "step": 7969 + }, + { + "epoch": 0.23401256679781549, + "grad_norm": 0.0, + "learning_rate": 1.789546127337019e-05, + "loss": 1.4834, + "step": 7970 + }, + { + "epoch": 0.2340419284749545, + "grad_norm": 0.0, + "learning_rate": 1.7894877641646232e-05, + "loss": 1.4268, + "step": 7971 + }, + { + "epoch": 0.2340712901520935, + "grad_norm": 0.0, + "learning_rate": 1.789429393852687e-05, + "loss": 1.5342, + "step": 7972 + }, + { + "epoch": 0.23410065182923248, + "grad_norm": 0.0, + "learning_rate": 1.7893710164017388e-05, + "loss": 1.4717, + "step": 7973 + }, + { + "epoch": 0.23413001350637147, + "grad_norm": 0.0, + "learning_rate": 1.7893126318123065e-05, + "loss": 1.4629, + "step": 7974 + }, + { + "epoch": 0.2341593751835105, + "grad_norm": 0.0, + "learning_rate": 1.7892542400849174e-05, + "loss": 1.459, + "step": 7975 + }, + { + "epoch": 0.23418873686064948, + "grad_norm": 0.0, + "learning_rate": 1.7891958412201007e-05, + "loss": 1.458, + "step": 7976 + }, + { + "epoch": 0.23421809853778847, + "grad_norm": 0.0, + "learning_rate": 1.7891374352183836e-05, + "loss": 1.2852, + "step": 7977 + }, + { + "epoch": 0.23424746021492748, + "grad_norm": 0.0, + "learning_rate": 1.7890790220802947e-05, + "loss": 1.4268, + "step": 7978 + }, + { + "epoch": 0.23427682189206647, + "grad_norm": 0.0, + "learning_rate": 1.789020601806362e-05, + "loss": 1.5498, + "step": 7979 + }, + { + "epoch": 0.23430618356920546, + "grad_norm": 0.0, + "learning_rate": 1.7889621743971138e-05, + "loss": 1.3418, + "step": 7980 + }, + { + "epoch": 0.23433554524634448, + "grad_norm": 0.0, + "learning_rate": 1.788903739853079e-05, + "loss": 1.3911, + "step": 7981 + }, + { + "epoch": 0.23436490692348347, + "grad_norm": 0.0, + "learning_rate": 1.788845298174785e-05, + "loss": 1.498, + "step": 7982 + }, + { + "epoch": 0.23439426860062246, + "grad_norm": 0.0, + "learning_rate": 1.7887868493627614e-05, + "loss": 1.3379, + "step": 7983 + }, + { + "epoch": 0.23442363027776147, + "grad_norm": 0.0, + "learning_rate": 1.7887283934175366e-05, + "loss": 1.4102, + "step": 7984 + }, + { + "epoch": 0.23445299195490046, + "grad_norm": 0.0, + "learning_rate": 1.7886699303396383e-05, + "loss": 1.4268, + "step": 7985 + }, + { + "epoch": 0.23448235363203945, + "grad_norm": 0.0, + "learning_rate": 1.7886114601295963e-05, + "loss": 1.5527, + "step": 7986 + }, + { + "epoch": 0.23451171530917847, + "grad_norm": 0.0, + "learning_rate": 1.7885529827879387e-05, + "loss": 1.3857, + "step": 7987 + }, + { + "epoch": 0.23454107698631746, + "grad_norm": 0.0, + "learning_rate": 1.7884944983151947e-05, + "loss": 1.416, + "step": 7988 + }, + { + "epoch": 0.23457043866345645, + "grad_norm": 0.0, + "learning_rate": 1.788436006711893e-05, + "loss": 1.4336, + "step": 7989 + }, + { + "epoch": 0.23459980034059547, + "grad_norm": 0.0, + "learning_rate": 1.7883775079785623e-05, + "loss": 1.4502, + "step": 7990 + }, + { + "epoch": 0.23462916201773446, + "grad_norm": 0.0, + "learning_rate": 1.7883190021157322e-05, + "loss": 1.2925, + "step": 7991 + }, + { + "epoch": 0.23465852369487344, + "grad_norm": 0.0, + "learning_rate": 1.7882604891239314e-05, + "loss": 1.4629, + "step": 7992 + }, + { + "epoch": 0.23468788537201246, + "grad_norm": 0.0, + "learning_rate": 1.788201969003689e-05, + "loss": 1.4668, + "step": 7993 + }, + { + "epoch": 0.23471724704915145, + "grad_norm": 0.0, + "learning_rate": 1.7881434417555345e-05, + "loss": 1.3828, + "step": 7994 + }, + { + "epoch": 0.23474660872629044, + "grad_norm": 0.0, + "learning_rate": 1.7880849073799965e-05, + "loss": 1.5439, + "step": 7995 + }, + { + "epoch": 0.23477597040342946, + "grad_norm": 0.0, + "learning_rate": 1.7880263658776054e-05, + "loss": 1.4551, + "step": 7996 + }, + { + "epoch": 0.23480533208056845, + "grad_norm": 0.0, + "learning_rate": 1.7879678172488896e-05, + "loss": 1.5723, + "step": 7997 + }, + { + "epoch": 0.23483469375770744, + "grad_norm": 0.0, + "learning_rate": 1.7879092614943796e-05, + "loss": 1.418, + "step": 7998 + }, + { + "epoch": 0.23486405543484642, + "grad_norm": 0.0, + "learning_rate": 1.7878506986146042e-05, + "loss": 1.5508, + "step": 7999 + }, + { + "epoch": 0.23489341711198544, + "grad_norm": 0.0, + "learning_rate": 1.787792128610093e-05, + "loss": 1.3462, + "step": 8000 + }, + { + "epoch": 0.23492277878912443, + "grad_norm": 0.0, + "learning_rate": 1.7877335514813755e-05, + "loss": 1.4072, + "step": 8001 + }, + { + "epoch": 0.23495214046626342, + "grad_norm": 0.0, + "learning_rate": 1.7876749672289825e-05, + "loss": 1.3828, + "step": 8002 + }, + { + "epoch": 0.23498150214340244, + "grad_norm": 0.0, + "learning_rate": 1.7876163758534425e-05, + "loss": 1.3643, + "step": 8003 + }, + { + "epoch": 0.23501086382054143, + "grad_norm": 0.0, + "learning_rate": 1.787557777355286e-05, + "loss": 1.4658, + "step": 8004 + }, + { + "epoch": 0.23504022549768042, + "grad_norm": 0.0, + "learning_rate": 1.7874991717350427e-05, + "loss": 1.4258, + "step": 8005 + }, + { + "epoch": 0.23506958717481943, + "grad_norm": 0.0, + "learning_rate": 1.787440558993243e-05, + "loss": 1.3965, + "step": 8006 + }, + { + "epoch": 0.23509894885195842, + "grad_norm": 0.0, + "learning_rate": 1.7873819391304167e-05, + "loss": 1.5576, + "step": 8007 + }, + { + "epoch": 0.2351283105290974, + "grad_norm": 0.0, + "learning_rate": 1.7873233121470936e-05, + "loss": 1.3311, + "step": 8008 + }, + { + "epoch": 0.23515767220623643, + "grad_norm": 0.0, + "learning_rate": 1.7872646780438043e-05, + "loss": 1.5771, + "step": 8009 + }, + { + "epoch": 0.23518703388337542, + "grad_norm": 0.0, + "learning_rate": 1.7872060368210787e-05, + "loss": 1.5312, + "step": 8010 + }, + { + "epoch": 0.2352163955605144, + "grad_norm": 0.0, + "learning_rate": 1.7871473884794475e-05, + "loss": 1.4658, + "step": 8011 + }, + { + "epoch": 0.23524575723765342, + "grad_norm": 0.0, + "learning_rate": 1.7870887330194412e-05, + "loss": 1.377, + "step": 8012 + }, + { + "epoch": 0.2352751189147924, + "grad_norm": 0.0, + "learning_rate": 1.7870300704415893e-05, + "loss": 1.4307, + "step": 8013 + }, + { + "epoch": 0.2353044805919314, + "grad_norm": 0.0, + "learning_rate": 1.786971400746423e-05, + "loss": 1.3574, + "step": 8014 + }, + { + "epoch": 0.23533384226907042, + "grad_norm": 0.0, + "learning_rate": 1.786912723934473e-05, + "loss": 1.2871, + "step": 8015 + }, + { + "epoch": 0.2353632039462094, + "grad_norm": 0.0, + "learning_rate": 1.7868540400062697e-05, + "loss": 1.5137, + "step": 8016 + }, + { + "epoch": 0.2353925656233484, + "grad_norm": 0.0, + "learning_rate": 1.7867953489623438e-05, + "loss": 1.4443, + "step": 8017 + }, + { + "epoch": 0.23542192730048742, + "grad_norm": 0.0, + "learning_rate": 1.786736650803226e-05, + "loss": 1.6084, + "step": 8018 + }, + { + "epoch": 0.2354512889776264, + "grad_norm": 0.0, + "learning_rate": 1.7866779455294472e-05, + "loss": 1.4434, + "step": 8019 + }, + { + "epoch": 0.2354806506547654, + "grad_norm": 0.0, + "learning_rate": 1.7866192331415383e-05, + "loss": 1.4209, + "step": 8020 + }, + { + "epoch": 0.2355100123319044, + "grad_norm": 0.0, + "learning_rate": 1.7865605136400298e-05, + "loss": 1.4639, + "step": 8021 + }, + { + "epoch": 0.2355393740090434, + "grad_norm": 0.0, + "learning_rate": 1.7865017870254538e-05, + "loss": 1.542, + "step": 8022 + }, + { + "epoch": 0.2355687356861824, + "grad_norm": 0.0, + "learning_rate": 1.7864430532983403e-05, + "loss": 1.3672, + "step": 8023 + }, + { + "epoch": 0.23559809736332138, + "grad_norm": 0.0, + "learning_rate": 1.786384312459221e-05, + "loss": 1.376, + "step": 8024 + }, + { + "epoch": 0.2356274590404604, + "grad_norm": 0.0, + "learning_rate": 1.786325564508627e-05, + "loss": 1.459, + "step": 8025 + }, + { + "epoch": 0.23565682071759939, + "grad_norm": 0.0, + "learning_rate": 1.7862668094470892e-05, + "loss": 1.4404, + "step": 8026 + }, + { + "epoch": 0.23568618239473837, + "grad_norm": 0.0, + "learning_rate": 1.7862080472751397e-05, + "loss": 1.4932, + "step": 8027 + }, + { + "epoch": 0.2357155440718774, + "grad_norm": 0.0, + "learning_rate": 1.7861492779933094e-05, + "loss": 1.3818, + "step": 8028 + }, + { + "epoch": 0.23574490574901638, + "grad_norm": 0.0, + "learning_rate": 1.78609050160213e-05, + "loss": 1.3633, + "step": 8029 + }, + { + "epoch": 0.23577426742615537, + "grad_norm": 0.0, + "learning_rate": 1.7860317181021324e-05, + "loss": 1.3066, + "step": 8030 + }, + { + "epoch": 0.2358036291032944, + "grad_norm": 0.0, + "learning_rate": 1.7859729274938493e-05, + "loss": 1.3652, + "step": 8031 + }, + { + "epoch": 0.23583299078043338, + "grad_norm": 0.0, + "learning_rate": 1.7859141297778107e-05, + "loss": 1.4951, + "step": 8032 + }, + { + "epoch": 0.23586235245757237, + "grad_norm": 0.0, + "learning_rate": 1.78585532495455e-05, + "loss": 1.2695, + "step": 8033 + }, + { + "epoch": 0.23589171413471138, + "grad_norm": 0.0, + "learning_rate": 1.7857965130245988e-05, + "loss": 1.4023, + "step": 8034 + }, + { + "epoch": 0.23592107581185037, + "grad_norm": 0.0, + "learning_rate": 1.785737693988488e-05, + "loss": 1.4434, + "step": 8035 + }, + { + "epoch": 0.23595043748898936, + "grad_norm": 0.0, + "learning_rate": 1.78567886784675e-05, + "loss": 1.3877, + "step": 8036 + }, + { + "epoch": 0.23597979916612838, + "grad_norm": 0.0, + "learning_rate": 1.7856200345999166e-05, + "loss": 1.2715, + "step": 8037 + }, + { + "epoch": 0.23600916084326737, + "grad_norm": 0.0, + "learning_rate": 1.78556119424852e-05, + "loss": 1.3613, + "step": 8038 + }, + { + "epoch": 0.23603852252040636, + "grad_norm": 0.0, + "learning_rate": 1.785502346793093e-05, + "loss": 1.4043, + "step": 8039 + }, + { + "epoch": 0.23606788419754537, + "grad_norm": 0.0, + "learning_rate": 1.7854434922341667e-05, + "loss": 1.4424, + "step": 8040 + }, + { + "epoch": 0.23609724587468436, + "grad_norm": 0.0, + "learning_rate": 1.7853846305722737e-05, + "loss": 1.3525, + "step": 8041 + }, + { + "epoch": 0.23612660755182335, + "grad_norm": 0.0, + "learning_rate": 1.7853257618079464e-05, + "loss": 1.4736, + "step": 8042 + }, + { + "epoch": 0.23615596922896237, + "grad_norm": 0.0, + "learning_rate": 1.785266885941717e-05, + "loss": 1.4727, + "step": 8043 + }, + { + "epoch": 0.23618533090610136, + "grad_norm": 0.0, + "learning_rate": 1.785208002974118e-05, + "loss": 1.417, + "step": 8044 + }, + { + "epoch": 0.23621469258324035, + "grad_norm": 0.0, + "learning_rate": 1.7851491129056822e-05, + "loss": 1.4033, + "step": 8045 + }, + { + "epoch": 0.23624405426037937, + "grad_norm": 0.0, + "learning_rate": 1.7850902157369415e-05, + "loss": 1.2671, + "step": 8046 + }, + { + "epoch": 0.23627341593751836, + "grad_norm": 0.0, + "learning_rate": 1.7850313114684293e-05, + "loss": 1.3486, + "step": 8047 + }, + { + "epoch": 0.23630277761465734, + "grad_norm": 0.0, + "learning_rate": 1.784972400100678e-05, + "loss": 1.5205, + "step": 8048 + }, + { + "epoch": 0.23633213929179633, + "grad_norm": 0.0, + "learning_rate": 1.7849134816342197e-05, + "loss": 1.5527, + "step": 8049 + }, + { + "epoch": 0.23636150096893535, + "grad_norm": 0.0, + "learning_rate": 1.784854556069588e-05, + "loss": 1.4355, + "step": 8050 + }, + { + "epoch": 0.23639086264607434, + "grad_norm": 0.0, + "learning_rate": 1.7847956234073156e-05, + "loss": 1.4561, + "step": 8051 + }, + { + "epoch": 0.23642022432321333, + "grad_norm": 0.0, + "learning_rate": 1.7847366836479355e-05, + "loss": 1.3643, + "step": 8052 + }, + { + "epoch": 0.23644958600035235, + "grad_norm": 0.0, + "learning_rate": 1.7846777367919806e-05, + "loss": 1.4326, + "step": 8053 + }, + { + "epoch": 0.23647894767749134, + "grad_norm": 0.0, + "learning_rate": 1.784618782839984e-05, + "loss": 1.4092, + "step": 8054 + }, + { + "epoch": 0.23650830935463033, + "grad_norm": 0.0, + "learning_rate": 1.7845598217924784e-05, + "loss": 1.4023, + "step": 8055 + }, + { + "epoch": 0.23653767103176934, + "grad_norm": 0.0, + "learning_rate": 1.7845008536499977e-05, + "loss": 1.46, + "step": 8056 + }, + { + "epoch": 0.23656703270890833, + "grad_norm": 0.0, + "learning_rate": 1.784441878413075e-05, + "loss": 1.3965, + "step": 8057 + }, + { + "epoch": 0.23659639438604732, + "grad_norm": 0.0, + "learning_rate": 1.7843828960822432e-05, + "loss": 1.3838, + "step": 8058 + }, + { + "epoch": 0.23662575606318634, + "grad_norm": 0.0, + "learning_rate": 1.784323906658036e-05, + "loss": 1.4062, + "step": 8059 + }, + { + "epoch": 0.23665511774032533, + "grad_norm": 0.0, + "learning_rate": 1.784264910140987e-05, + "loss": 1.3125, + "step": 8060 + }, + { + "epoch": 0.23668447941746432, + "grad_norm": 0.0, + "learning_rate": 1.7842059065316294e-05, + "loss": 1.3857, + "step": 8061 + }, + { + "epoch": 0.23671384109460333, + "grad_norm": 0.0, + "learning_rate": 1.7841468958304973e-05, + "loss": 1.458, + "step": 8062 + }, + { + "epoch": 0.23674320277174232, + "grad_norm": 0.0, + "learning_rate": 1.7840878780381236e-05, + "loss": 1.4951, + "step": 8063 + }, + { + "epoch": 0.2367725644488813, + "grad_norm": 0.0, + "learning_rate": 1.784028853155043e-05, + "loss": 1.4268, + "step": 8064 + }, + { + "epoch": 0.23680192612602033, + "grad_norm": 0.0, + "learning_rate": 1.783969821181788e-05, + "loss": 1.25, + "step": 8065 + }, + { + "epoch": 0.23683128780315932, + "grad_norm": 0.0, + "learning_rate": 1.7839107821188934e-05, + "loss": 1.4883, + "step": 8066 + }, + { + "epoch": 0.2368606494802983, + "grad_norm": 0.0, + "learning_rate": 1.7838517359668928e-05, + "loss": 1.6074, + "step": 8067 + }, + { + "epoch": 0.23689001115743732, + "grad_norm": 0.0, + "learning_rate": 1.7837926827263207e-05, + "loss": 1.4951, + "step": 8068 + }, + { + "epoch": 0.23691937283457631, + "grad_norm": 0.0, + "learning_rate": 1.78373362239771e-05, + "loss": 1.4043, + "step": 8069 + }, + { + "epoch": 0.2369487345117153, + "grad_norm": 0.0, + "learning_rate": 1.7836745549815955e-05, + "loss": 1.3955, + "step": 8070 + }, + { + "epoch": 0.23697809618885432, + "grad_norm": 0.0, + "learning_rate": 1.7836154804785114e-05, + "loss": 1.3486, + "step": 8071 + }, + { + "epoch": 0.2370074578659933, + "grad_norm": 0.0, + "learning_rate": 1.7835563988889917e-05, + "loss": 1.4502, + "step": 8072 + }, + { + "epoch": 0.2370368195431323, + "grad_norm": 0.0, + "learning_rate": 1.783497310213571e-05, + "loss": 1.5088, + "step": 8073 + }, + { + "epoch": 0.2370661812202713, + "grad_norm": 0.0, + "learning_rate": 1.7834382144527837e-05, + "loss": 1.3887, + "step": 8074 + }, + { + "epoch": 0.2370955428974103, + "grad_norm": 0.0, + "learning_rate": 1.7833791116071636e-05, + "loss": 1.3701, + "step": 8075 + }, + { + "epoch": 0.2371249045745493, + "grad_norm": 0.0, + "learning_rate": 1.7833200016772456e-05, + "loss": 1.4326, + "step": 8076 + }, + { + "epoch": 0.23715426625168828, + "grad_norm": 0.0, + "learning_rate": 1.7832608846635643e-05, + "loss": 1.4922, + "step": 8077 + }, + { + "epoch": 0.2371836279288273, + "grad_norm": 0.0, + "learning_rate": 1.7832017605666543e-05, + "loss": 1.4775, + "step": 8078 + }, + { + "epoch": 0.2372129896059663, + "grad_norm": 0.0, + "learning_rate": 1.78314262938705e-05, + "loss": 1.2949, + "step": 8079 + }, + { + "epoch": 0.23724235128310528, + "grad_norm": 0.0, + "learning_rate": 1.7830834911252863e-05, + "loss": 1.3535, + "step": 8080 + }, + { + "epoch": 0.2372717129602443, + "grad_norm": 0.0, + "learning_rate": 1.7830243457818982e-05, + "loss": 1.4336, + "step": 8081 + }, + { + "epoch": 0.23730107463738329, + "grad_norm": 0.0, + "learning_rate": 1.7829651933574204e-05, + "loss": 1.3574, + "step": 8082 + }, + { + "epoch": 0.23733043631452228, + "grad_norm": 0.0, + "learning_rate": 1.782906033852388e-05, + "loss": 1.3076, + "step": 8083 + }, + { + "epoch": 0.2373597979916613, + "grad_norm": 0.0, + "learning_rate": 1.7828468672673356e-05, + "loss": 1.4453, + "step": 8084 + }, + { + "epoch": 0.23738915966880028, + "grad_norm": 0.0, + "learning_rate": 1.7827876936027982e-05, + "loss": 1.3457, + "step": 8085 + }, + { + "epoch": 0.23741852134593927, + "grad_norm": 0.0, + "learning_rate": 1.782728512859312e-05, + "loss": 1.4561, + "step": 8086 + }, + { + "epoch": 0.2374478830230783, + "grad_norm": 0.0, + "learning_rate": 1.7826693250374107e-05, + "loss": 1.5107, + "step": 8087 + }, + { + "epoch": 0.23747724470021728, + "grad_norm": 0.0, + "learning_rate": 1.7826101301376306e-05, + "loss": 1.4229, + "step": 8088 + }, + { + "epoch": 0.23750660637735627, + "grad_norm": 0.0, + "learning_rate": 1.7825509281605064e-05, + "loss": 1.4951, + "step": 8089 + }, + { + "epoch": 0.23753596805449528, + "grad_norm": 0.0, + "learning_rate": 1.7824917191065743e-05, + "loss": 1.4375, + "step": 8090 + }, + { + "epoch": 0.23756532973163427, + "grad_norm": 0.0, + "learning_rate": 1.7824325029763687e-05, + "loss": 1.3945, + "step": 8091 + }, + { + "epoch": 0.23759469140877326, + "grad_norm": 0.0, + "learning_rate": 1.7823732797704257e-05, + "loss": 1.377, + "step": 8092 + }, + { + "epoch": 0.23762405308591228, + "grad_norm": 0.0, + "learning_rate": 1.7823140494892808e-05, + "loss": 1.3838, + "step": 8093 + }, + { + "epoch": 0.23765341476305127, + "grad_norm": 0.0, + "learning_rate": 1.7822548121334694e-05, + "loss": 1.416, + "step": 8094 + }, + { + "epoch": 0.23768277644019026, + "grad_norm": 0.0, + "learning_rate": 1.7821955677035278e-05, + "loss": 1.3193, + "step": 8095 + }, + { + "epoch": 0.23771213811732927, + "grad_norm": 0.0, + "learning_rate": 1.7821363161999907e-05, + "loss": 1.4443, + "step": 8096 + }, + { + "epoch": 0.23774149979446826, + "grad_norm": 0.0, + "learning_rate": 1.782077057623395e-05, + "loss": 1.3965, + "step": 8097 + }, + { + "epoch": 0.23777086147160725, + "grad_norm": 0.0, + "learning_rate": 1.7820177919742763e-05, + "loss": 1.4199, + "step": 8098 + }, + { + "epoch": 0.23780022314874624, + "grad_norm": 0.0, + "learning_rate": 1.7819585192531704e-05, + "loss": 1.6318, + "step": 8099 + }, + { + "epoch": 0.23782958482588526, + "grad_norm": 0.0, + "learning_rate": 1.781899239460613e-05, + "loss": 1.3311, + "step": 8100 + }, + { + "epoch": 0.23785894650302425, + "grad_norm": 0.0, + "learning_rate": 1.781839952597141e-05, + "loss": 1.3955, + "step": 8101 + }, + { + "epoch": 0.23788830818016324, + "grad_norm": 0.0, + "learning_rate": 1.78178065866329e-05, + "loss": 1.3506, + "step": 8102 + }, + { + "epoch": 0.23791766985730226, + "grad_norm": 0.0, + "learning_rate": 1.781721357659596e-05, + "loss": 1.5342, + "step": 8103 + }, + { + "epoch": 0.23794703153444124, + "grad_norm": 0.0, + "learning_rate": 1.7816620495865955e-05, + "loss": 1.4082, + "step": 8104 + }, + { + "epoch": 0.23797639321158023, + "grad_norm": 0.0, + "learning_rate": 1.781602734444825e-05, + "loss": 1.2158, + "step": 8105 + }, + { + "epoch": 0.23800575488871925, + "grad_norm": 0.0, + "learning_rate": 1.781543412234821e-05, + "loss": 1.248, + "step": 8106 + }, + { + "epoch": 0.23803511656585824, + "grad_norm": 0.0, + "learning_rate": 1.7814840829571197e-05, + "loss": 1.46, + "step": 8107 + }, + { + "epoch": 0.23806447824299723, + "grad_norm": 0.0, + "learning_rate": 1.7814247466122575e-05, + "loss": 1.4346, + "step": 8108 + }, + { + "epoch": 0.23809383992013625, + "grad_norm": 0.0, + "learning_rate": 1.7813654032007714e-05, + "loss": 1.4551, + "step": 8109 + }, + { + "epoch": 0.23812320159727524, + "grad_norm": 0.0, + "learning_rate": 1.7813060527231976e-05, + "loss": 1.4658, + "step": 8110 + }, + { + "epoch": 0.23815256327441423, + "grad_norm": 0.0, + "learning_rate": 1.7812466951800734e-05, + "loss": 1.3535, + "step": 8111 + }, + { + "epoch": 0.23818192495155324, + "grad_norm": 0.0, + "learning_rate": 1.7811873305719352e-05, + "loss": 1.5, + "step": 8112 + }, + { + "epoch": 0.23821128662869223, + "grad_norm": 0.0, + "learning_rate": 1.78112795889932e-05, + "loss": 1.3994, + "step": 8113 + }, + { + "epoch": 0.23824064830583122, + "grad_norm": 0.0, + "learning_rate": 1.7810685801627642e-05, + "loss": 1.2939, + "step": 8114 + }, + { + "epoch": 0.23827000998297024, + "grad_norm": 0.0, + "learning_rate": 1.7810091943628056e-05, + "loss": 1.3203, + "step": 8115 + }, + { + "epoch": 0.23829937166010923, + "grad_norm": 0.0, + "learning_rate": 1.7809498014999806e-05, + "loss": 1.4814, + "step": 8116 + }, + { + "epoch": 0.23832873333724822, + "grad_norm": 0.0, + "learning_rate": 1.7808904015748266e-05, + "loss": 1.292, + "step": 8117 + }, + { + "epoch": 0.23835809501438723, + "grad_norm": 0.0, + "learning_rate": 1.7808309945878807e-05, + "loss": 1.4009, + "step": 8118 + }, + { + "epoch": 0.23838745669152622, + "grad_norm": 0.0, + "learning_rate": 1.7807715805396802e-05, + "loss": 1.5059, + "step": 8119 + }, + { + "epoch": 0.2384168183686652, + "grad_norm": 0.0, + "learning_rate": 1.7807121594307625e-05, + "loss": 1.3701, + "step": 8120 + }, + { + "epoch": 0.23844618004580423, + "grad_norm": 0.0, + "learning_rate": 1.7806527312616642e-05, + "loss": 1.46, + "step": 8121 + }, + { + "epoch": 0.23847554172294322, + "grad_norm": 0.0, + "learning_rate": 1.780593296032924e-05, + "loss": 1.4629, + "step": 8122 + }, + { + "epoch": 0.2385049034000822, + "grad_norm": 0.0, + "learning_rate": 1.7805338537450785e-05, + "loss": 1.4844, + "step": 8123 + }, + { + "epoch": 0.23853426507722122, + "grad_norm": 0.0, + "learning_rate": 1.7804744043986654e-05, + "loss": 1.3877, + "step": 8124 + }, + { + "epoch": 0.23856362675436021, + "grad_norm": 0.0, + "learning_rate": 1.7804149479942222e-05, + "loss": 1.3862, + "step": 8125 + }, + { + "epoch": 0.2385929884314992, + "grad_norm": 0.0, + "learning_rate": 1.780355484532287e-05, + "loss": 1.5234, + "step": 8126 + }, + { + "epoch": 0.2386223501086382, + "grad_norm": 0.0, + "learning_rate": 1.780296014013397e-05, + "loss": 1.4775, + "step": 8127 + }, + { + "epoch": 0.2386517117857772, + "grad_norm": 0.0, + "learning_rate": 1.7802365364380907e-05, + "loss": 1.373, + "step": 8128 + }, + { + "epoch": 0.2386810734629162, + "grad_norm": 0.0, + "learning_rate": 1.7801770518069055e-05, + "loss": 1.373, + "step": 8129 + }, + { + "epoch": 0.2387104351400552, + "grad_norm": 0.0, + "learning_rate": 1.7801175601203793e-05, + "loss": 1.291, + "step": 8130 + }, + { + "epoch": 0.2387397968171942, + "grad_norm": 0.0, + "learning_rate": 1.7800580613790505e-05, + "loss": 1.3691, + "step": 8131 + }, + { + "epoch": 0.2387691584943332, + "grad_norm": 0.0, + "learning_rate": 1.779998555583456e-05, + "loss": 1.4561, + "step": 8132 + }, + { + "epoch": 0.23879852017147218, + "grad_norm": 0.0, + "learning_rate": 1.779939042734136e-05, + "loss": 1.4443, + "step": 8133 + }, + { + "epoch": 0.2388278818486112, + "grad_norm": 0.0, + "learning_rate": 1.7798795228316267e-05, + "loss": 1.3818, + "step": 8134 + }, + { + "epoch": 0.2388572435257502, + "grad_norm": 0.0, + "learning_rate": 1.7798199958764676e-05, + "loss": 1.4717, + "step": 8135 + }, + { + "epoch": 0.23888660520288918, + "grad_norm": 0.0, + "learning_rate": 1.7797604618691958e-05, + "loss": 1.4629, + "step": 8136 + }, + { + "epoch": 0.2389159668800282, + "grad_norm": 0.0, + "learning_rate": 1.7797009208103514e-05, + "loss": 1.3994, + "step": 8137 + }, + { + "epoch": 0.23894532855716719, + "grad_norm": 0.0, + "learning_rate": 1.7796413727004713e-05, + "loss": 1.4863, + "step": 8138 + }, + { + "epoch": 0.23897469023430618, + "grad_norm": 0.0, + "learning_rate": 1.7795818175400946e-05, + "loss": 1.2764, + "step": 8139 + }, + { + "epoch": 0.2390040519114452, + "grad_norm": 0.0, + "learning_rate": 1.7795222553297604e-05, + "loss": 1.4619, + "step": 8140 + }, + { + "epoch": 0.23903341358858418, + "grad_norm": 0.0, + "learning_rate": 1.7794626860700063e-05, + "loss": 1.4639, + "step": 8141 + }, + { + "epoch": 0.23906277526572317, + "grad_norm": 0.0, + "learning_rate": 1.7794031097613714e-05, + "loss": 1.3789, + "step": 8142 + }, + { + "epoch": 0.2390921369428622, + "grad_norm": 0.0, + "learning_rate": 1.779343526404395e-05, + "loss": 1.4082, + "step": 8143 + }, + { + "epoch": 0.23912149862000118, + "grad_norm": 0.0, + "learning_rate": 1.7792839359996152e-05, + "loss": 1.3877, + "step": 8144 + }, + { + "epoch": 0.23915086029714017, + "grad_norm": 0.0, + "learning_rate": 1.7792243385475712e-05, + "loss": 1.5088, + "step": 8145 + }, + { + "epoch": 0.23918022197427918, + "grad_norm": 0.0, + "learning_rate": 1.779164734048802e-05, + "loss": 1.416, + "step": 8146 + }, + { + "epoch": 0.23920958365141817, + "grad_norm": 0.0, + "learning_rate": 1.7791051225038466e-05, + "loss": 1.3682, + "step": 8147 + }, + { + "epoch": 0.23923894532855716, + "grad_norm": 0.0, + "learning_rate": 1.7790455039132443e-05, + "loss": 1.3652, + "step": 8148 + }, + { + "epoch": 0.23926830700569618, + "grad_norm": 0.0, + "learning_rate": 1.7789858782775336e-05, + "loss": 1.5088, + "step": 8149 + }, + { + "epoch": 0.23929766868283517, + "grad_norm": 0.0, + "learning_rate": 1.7789262455972545e-05, + "loss": 1.3604, + "step": 8150 + }, + { + "epoch": 0.23932703035997416, + "grad_norm": 0.0, + "learning_rate": 1.7788666058729455e-05, + "loss": 1.5, + "step": 8151 + }, + { + "epoch": 0.23935639203711315, + "grad_norm": 0.0, + "learning_rate": 1.7788069591051464e-05, + "loss": 1.4541, + "step": 8152 + }, + { + "epoch": 0.23938575371425216, + "grad_norm": 0.0, + "learning_rate": 1.7787473052943965e-05, + "loss": 1.374, + "step": 8153 + }, + { + "epoch": 0.23941511539139115, + "grad_norm": 0.0, + "learning_rate": 1.7786876444412355e-05, + "loss": 1.3975, + "step": 8154 + }, + { + "epoch": 0.23944447706853014, + "grad_norm": 0.0, + "learning_rate": 1.7786279765462023e-05, + "loss": 1.4619, + "step": 8155 + }, + { + "epoch": 0.23947383874566916, + "grad_norm": 0.0, + "learning_rate": 1.778568301609837e-05, + "loss": 1.3867, + "step": 8156 + }, + { + "epoch": 0.23950320042280815, + "grad_norm": 0.0, + "learning_rate": 1.7785086196326796e-05, + "loss": 1.416, + "step": 8157 + }, + { + "epoch": 0.23953256209994714, + "grad_norm": 0.0, + "learning_rate": 1.7784489306152692e-05, + "loss": 1.291, + "step": 8158 + }, + { + "epoch": 0.23956192377708616, + "grad_norm": 0.0, + "learning_rate": 1.7783892345581452e-05, + "loss": 1.4961, + "step": 8159 + }, + { + "epoch": 0.23959128545422514, + "grad_norm": 0.0, + "learning_rate": 1.778329531461849e-05, + "loss": 1.3809, + "step": 8160 + }, + { + "epoch": 0.23962064713136413, + "grad_norm": 0.0, + "learning_rate": 1.7782698213269184e-05, + "loss": 1.4297, + "step": 8161 + }, + { + "epoch": 0.23965000880850315, + "grad_norm": 0.0, + "learning_rate": 1.778210104153895e-05, + "loss": 1.4209, + "step": 8162 + }, + { + "epoch": 0.23967937048564214, + "grad_norm": 0.0, + "learning_rate": 1.7781503799433183e-05, + "loss": 1.4482, + "step": 8163 + }, + { + "epoch": 0.23970873216278113, + "grad_norm": 0.0, + "learning_rate": 1.7780906486957284e-05, + "loss": 1.3594, + "step": 8164 + }, + { + "epoch": 0.23973809383992015, + "grad_norm": 0.0, + "learning_rate": 1.7780309104116657e-05, + "loss": 1.4258, + "step": 8165 + }, + { + "epoch": 0.23976745551705914, + "grad_norm": 0.0, + "learning_rate": 1.7779711650916704e-05, + "loss": 1.3838, + "step": 8166 + }, + { + "epoch": 0.23979681719419813, + "grad_norm": 0.0, + "learning_rate": 1.777911412736282e-05, + "loss": 1.3936, + "step": 8167 + }, + { + "epoch": 0.23982617887133714, + "grad_norm": 0.0, + "learning_rate": 1.7778516533460418e-05, + "loss": 1.3545, + "step": 8168 + }, + { + "epoch": 0.23985554054847613, + "grad_norm": 0.0, + "learning_rate": 1.7777918869214898e-05, + "loss": 1.5215, + "step": 8169 + }, + { + "epoch": 0.23988490222561512, + "grad_norm": 0.0, + "learning_rate": 1.7777321134631666e-05, + "loss": 1.4092, + "step": 8170 + }, + { + "epoch": 0.23991426390275414, + "grad_norm": 0.0, + "learning_rate": 1.7776723329716128e-05, + "loss": 1.335, + "step": 8171 + }, + { + "epoch": 0.23994362557989313, + "grad_norm": 0.0, + "learning_rate": 1.777612545447369e-05, + "loss": 1.5459, + "step": 8172 + }, + { + "epoch": 0.23997298725703212, + "grad_norm": 0.0, + "learning_rate": 1.7775527508909755e-05, + "loss": 1.2598, + "step": 8173 + }, + { + "epoch": 0.24000234893417113, + "grad_norm": 0.0, + "learning_rate": 1.7774929493029734e-05, + "loss": 1.5234, + "step": 8174 + }, + { + "epoch": 0.24003171061131012, + "grad_norm": 0.0, + "learning_rate": 1.7774331406839037e-05, + "loss": 1.501, + "step": 8175 + }, + { + "epoch": 0.2400610722884491, + "grad_norm": 0.0, + "learning_rate": 1.7773733250343067e-05, + "loss": 1.4639, + "step": 8176 + }, + { + "epoch": 0.2400904339655881, + "grad_norm": 0.0, + "learning_rate": 1.7773135023547238e-05, + "loss": 1.4385, + "step": 8177 + }, + { + "epoch": 0.24011979564272712, + "grad_norm": 0.0, + "learning_rate": 1.7772536726456958e-05, + "loss": 1.3848, + "step": 8178 + }, + { + "epoch": 0.2401491573198661, + "grad_norm": 0.0, + "learning_rate": 1.777193835907764e-05, + "loss": 1.3145, + "step": 8179 + }, + { + "epoch": 0.2401785189970051, + "grad_norm": 0.0, + "learning_rate": 1.777133992141469e-05, + "loss": 1.5107, + "step": 8180 + }, + { + "epoch": 0.24020788067414411, + "grad_norm": 0.0, + "learning_rate": 1.7770741413473524e-05, + "loss": 1.4717, + "step": 8181 + }, + { + "epoch": 0.2402372423512831, + "grad_norm": 0.0, + "learning_rate": 1.777014283525955e-05, + "loss": 1.3486, + "step": 8182 + }, + { + "epoch": 0.2402666040284221, + "grad_norm": 0.0, + "learning_rate": 1.7769544186778192e-05, + "loss": 1.4043, + "step": 8183 + }, + { + "epoch": 0.2402959657055611, + "grad_norm": 0.0, + "learning_rate": 1.776894546803485e-05, + "loss": 1.3486, + "step": 8184 + }, + { + "epoch": 0.2403253273827001, + "grad_norm": 0.0, + "learning_rate": 1.7768346679034946e-05, + "loss": 1.3818, + "step": 8185 + }, + { + "epoch": 0.2403546890598391, + "grad_norm": 0.0, + "learning_rate": 1.7767747819783895e-05, + "loss": 1.5137, + "step": 8186 + }, + { + "epoch": 0.2403840507369781, + "grad_norm": 0.0, + "learning_rate": 1.7767148890287108e-05, + "loss": 1.3896, + "step": 8187 + }, + { + "epoch": 0.2404134124141171, + "grad_norm": 0.0, + "learning_rate": 1.7766549890550004e-05, + "loss": 1.4209, + "step": 8188 + }, + { + "epoch": 0.24044277409125608, + "grad_norm": 0.0, + "learning_rate": 1.7765950820578003e-05, + "loss": 1.4717, + "step": 8189 + }, + { + "epoch": 0.2404721357683951, + "grad_norm": 0.0, + "learning_rate": 1.7765351680376518e-05, + "loss": 1.3623, + "step": 8190 + }, + { + "epoch": 0.2405014974455341, + "grad_norm": 0.0, + "learning_rate": 1.776475246995097e-05, + "loss": 1.4941, + "step": 8191 + }, + { + "epoch": 0.24053085912267308, + "grad_norm": 0.0, + "learning_rate": 1.7764153189306778e-05, + "loss": 1.4053, + "step": 8192 + }, + { + "epoch": 0.2405602207998121, + "grad_norm": 0.0, + "learning_rate": 1.776355383844936e-05, + "loss": 1.4697, + "step": 8193 + }, + { + "epoch": 0.24058958247695109, + "grad_norm": 0.0, + "learning_rate": 1.7762954417384135e-05, + "loss": 1.4873, + "step": 8194 + }, + { + "epoch": 0.24061894415409008, + "grad_norm": 0.0, + "learning_rate": 1.7762354926116526e-05, + "loss": 1.3838, + "step": 8195 + }, + { + "epoch": 0.2406483058312291, + "grad_norm": 0.0, + "learning_rate": 1.7761755364651953e-05, + "loss": 1.4258, + "step": 8196 + }, + { + "epoch": 0.24067766750836808, + "grad_norm": 0.0, + "learning_rate": 1.776115573299584e-05, + "loss": 1.4131, + "step": 8197 + }, + { + "epoch": 0.24070702918550707, + "grad_norm": 0.0, + "learning_rate": 1.7760556031153604e-05, + "loss": 1.2803, + "step": 8198 + }, + { + "epoch": 0.2407363908626461, + "grad_norm": 0.0, + "learning_rate": 1.7759956259130678e-05, + "loss": 1.3867, + "step": 8199 + }, + { + "epoch": 0.24076575253978508, + "grad_norm": 0.0, + "learning_rate": 1.7759356416932477e-05, + "loss": 1.3838, + "step": 8200 + }, + { + "epoch": 0.24079511421692407, + "grad_norm": 0.0, + "learning_rate": 1.7758756504564428e-05, + "loss": 1.3506, + "step": 8201 + }, + { + "epoch": 0.24082447589406306, + "grad_norm": 0.0, + "learning_rate": 1.7758156522031957e-05, + "loss": 1.4863, + "step": 8202 + }, + { + "epoch": 0.24085383757120207, + "grad_norm": 0.0, + "learning_rate": 1.775755646934049e-05, + "loss": 1.4307, + "step": 8203 + }, + { + "epoch": 0.24088319924834106, + "grad_norm": 0.0, + "learning_rate": 1.7756956346495452e-05, + "loss": 1.3555, + "step": 8204 + }, + { + "epoch": 0.24091256092548005, + "grad_norm": 0.0, + "learning_rate": 1.7756356153502273e-05, + "loss": 1.4072, + "step": 8205 + }, + { + "epoch": 0.24094192260261907, + "grad_norm": 0.0, + "learning_rate": 1.775575589036638e-05, + "loss": 1.4316, + "step": 8206 + }, + { + "epoch": 0.24097128427975806, + "grad_norm": 0.0, + "learning_rate": 1.77551555570932e-05, + "loss": 1.4736, + "step": 8207 + }, + { + "epoch": 0.24100064595689705, + "grad_norm": 0.0, + "learning_rate": 1.7754555153688156e-05, + "loss": 1.3828, + "step": 8208 + }, + { + "epoch": 0.24103000763403606, + "grad_norm": 0.0, + "learning_rate": 1.775395468015669e-05, + "loss": 1.5049, + "step": 8209 + }, + { + "epoch": 0.24105936931117505, + "grad_norm": 0.0, + "learning_rate": 1.7753354136504227e-05, + "loss": 1.5166, + "step": 8210 + }, + { + "epoch": 0.24108873098831404, + "grad_norm": 0.0, + "learning_rate": 1.7752753522736194e-05, + "loss": 1.4482, + "step": 8211 + }, + { + "epoch": 0.24111809266545306, + "grad_norm": 0.0, + "learning_rate": 1.7752152838858027e-05, + "loss": 1.4746, + "step": 8212 + }, + { + "epoch": 0.24114745434259205, + "grad_norm": 0.0, + "learning_rate": 1.7751552084875154e-05, + "loss": 1.3467, + "step": 8213 + }, + { + "epoch": 0.24117681601973104, + "grad_norm": 0.0, + "learning_rate": 1.7750951260793014e-05, + "loss": 1.5459, + "step": 8214 + }, + { + "epoch": 0.24120617769687006, + "grad_norm": 0.0, + "learning_rate": 1.7750350366617034e-05, + "loss": 1.4873, + "step": 8215 + }, + { + "epoch": 0.24123553937400904, + "grad_norm": 0.0, + "learning_rate": 1.774974940235265e-05, + "loss": 1.4727, + "step": 8216 + }, + { + "epoch": 0.24126490105114803, + "grad_norm": 0.0, + "learning_rate": 1.77491483680053e-05, + "loss": 1.4326, + "step": 8217 + }, + { + "epoch": 0.24129426272828705, + "grad_norm": 0.0, + "learning_rate": 1.7748547263580414e-05, + "loss": 1.4639, + "step": 8218 + }, + { + "epoch": 0.24132362440542604, + "grad_norm": 0.0, + "learning_rate": 1.7747946089083433e-05, + "loss": 1.4248, + "step": 8219 + }, + { + "epoch": 0.24135298608256503, + "grad_norm": 0.0, + "learning_rate": 1.7747344844519786e-05, + "loss": 1.4189, + "step": 8220 + }, + { + "epoch": 0.24138234775970405, + "grad_norm": 0.0, + "learning_rate": 1.774674352989492e-05, + "loss": 1.3867, + "step": 8221 + }, + { + "epoch": 0.24141170943684304, + "grad_norm": 0.0, + "learning_rate": 1.7746142145214272e-05, + "loss": 1.4434, + "step": 8222 + }, + { + "epoch": 0.24144107111398203, + "grad_norm": 0.0, + "learning_rate": 1.774554069048327e-05, + "loss": 1.3564, + "step": 8223 + }, + { + "epoch": 0.24147043279112104, + "grad_norm": 0.0, + "learning_rate": 1.7744939165707363e-05, + "loss": 1.3887, + "step": 8224 + }, + { + "epoch": 0.24149979446826003, + "grad_norm": 0.0, + "learning_rate": 1.7744337570891987e-05, + "loss": 1.3145, + "step": 8225 + }, + { + "epoch": 0.24152915614539902, + "grad_norm": 0.0, + "learning_rate": 1.774373590604258e-05, + "loss": 1.2422, + "step": 8226 + }, + { + "epoch": 0.241558517822538, + "grad_norm": 0.0, + "learning_rate": 1.7743134171164592e-05, + "loss": 1.4209, + "step": 8227 + }, + { + "epoch": 0.24158787949967703, + "grad_norm": 0.0, + "learning_rate": 1.7742532366263456e-05, + "loss": 1.4551, + "step": 8228 + }, + { + "epoch": 0.24161724117681602, + "grad_norm": 0.0, + "learning_rate": 1.7741930491344618e-05, + "loss": 1.3545, + "step": 8229 + }, + { + "epoch": 0.241646602853955, + "grad_norm": 0.0, + "learning_rate": 1.774132854641352e-05, + "loss": 1.458, + "step": 8230 + }, + { + "epoch": 0.24167596453109402, + "grad_norm": 0.0, + "learning_rate": 1.7740726531475602e-05, + "loss": 1.3105, + "step": 8231 + }, + { + "epoch": 0.241705326208233, + "grad_norm": 0.0, + "learning_rate": 1.7740124446536314e-05, + "loss": 1.3076, + "step": 8232 + }, + { + "epoch": 0.241734687885372, + "grad_norm": 0.0, + "learning_rate": 1.7739522291601096e-05, + "loss": 1.4238, + "step": 8233 + }, + { + "epoch": 0.24176404956251102, + "grad_norm": 0.0, + "learning_rate": 1.77389200666754e-05, + "loss": 1.3145, + "step": 8234 + }, + { + "epoch": 0.24179341123965, + "grad_norm": 0.0, + "learning_rate": 1.7738317771764663e-05, + "loss": 1.3184, + "step": 8235 + }, + { + "epoch": 0.241822772916789, + "grad_norm": 0.0, + "learning_rate": 1.7737715406874342e-05, + "loss": 1.3369, + "step": 8236 + }, + { + "epoch": 0.24185213459392801, + "grad_norm": 0.0, + "learning_rate": 1.7737112972009876e-05, + "loss": 1.2559, + "step": 8237 + }, + { + "epoch": 0.241881496271067, + "grad_norm": 0.0, + "learning_rate": 1.7736510467176715e-05, + "loss": 1.3955, + "step": 8238 + }, + { + "epoch": 0.241910857948206, + "grad_norm": 0.0, + "learning_rate": 1.773590789238031e-05, + "loss": 1.2939, + "step": 8239 + }, + { + "epoch": 0.241940219625345, + "grad_norm": 0.0, + "learning_rate": 1.7735305247626108e-05, + "loss": 1.3506, + "step": 8240 + }, + { + "epoch": 0.241969581302484, + "grad_norm": 0.0, + "learning_rate": 1.7734702532919558e-05, + "loss": 1.335, + "step": 8241 + }, + { + "epoch": 0.241998942979623, + "grad_norm": 0.0, + "learning_rate": 1.7734099748266115e-05, + "loss": 1.417, + "step": 8242 + }, + { + "epoch": 0.242028304656762, + "grad_norm": 0.0, + "learning_rate": 1.7733496893671226e-05, + "loss": 1.4902, + "step": 8243 + }, + { + "epoch": 0.242057666333901, + "grad_norm": 0.0, + "learning_rate": 1.7732893969140345e-05, + "loss": 1.3574, + "step": 8244 + }, + { + "epoch": 0.24208702801103998, + "grad_norm": 0.0, + "learning_rate": 1.7732290974678924e-05, + "loss": 1.4004, + "step": 8245 + }, + { + "epoch": 0.242116389688179, + "grad_norm": 0.0, + "learning_rate": 1.7731687910292413e-05, + "loss": 1.3818, + "step": 8246 + }, + { + "epoch": 0.242145751365318, + "grad_norm": 0.0, + "learning_rate": 1.7731084775986266e-05, + "loss": 1.4707, + "step": 8247 + }, + { + "epoch": 0.24217511304245698, + "grad_norm": 0.0, + "learning_rate": 1.7730481571765942e-05, + "loss": 1.3779, + "step": 8248 + }, + { + "epoch": 0.242204474719596, + "grad_norm": 0.0, + "learning_rate": 1.7729878297636896e-05, + "loss": 1.4707, + "step": 8249 + }, + { + "epoch": 0.242233836396735, + "grad_norm": 0.0, + "learning_rate": 1.7729274953604578e-05, + "loss": 1.415, + "step": 8250 + }, + { + "epoch": 0.24226319807387398, + "grad_norm": 0.0, + "learning_rate": 1.7728671539674448e-05, + "loss": 1.2812, + "step": 8251 + }, + { + "epoch": 0.24229255975101296, + "grad_norm": 0.0, + "learning_rate": 1.772806805585196e-05, + "loss": 1.4385, + "step": 8252 + }, + { + "epoch": 0.24232192142815198, + "grad_norm": 0.0, + "learning_rate": 1.772746450214258e-05, + "loss": 1.4629, + "step": 8253 + }, + { + "epoch": 0.24235128310529097, + "grad_norm": 0.0, + "learning_rate": 1.772686087855175e-05, + "loss": 1.4258, + "step": 8254 + }, + { + "epoch": 0.24238064478242996, + "grad_norm": 0.0, + "learning_rate": 1.7726257185084945e-05, + "loss": 1.4365, + "step": 8255 + }, + { + "epoch": 0.24241000645956898, + "grad_norm": 0.0, + "learning_rate": 1.7725653421747615e-05, + "loss": 1.4766, + "step": 8256 + }, + { + "epoch": 0.24243936813670797, + "grad_norm": 0.0, + "learning_rate": 1.7725049588545222e-05, + "loss": 1.4785, + "step": 8257 + }, + { + "epoch": 0.24246872981384696, + "grad_norm": 0.0, + "learning_rate": 1.7724445685483228e-05, + "loss": 1.458, + "step": 8258 + }, + { + "epoch": 0.24249809149098597, + "grad_norm": 0.0, + "learning_rate": 1.7723841712567094e-05, + "loss": 1.3281, + "step": 8259 + }, + { + "epoch": 0.24252745316812496, + "grad_norm": 0.0, + "learning_rate": 1.772323766980228e-05, + "loss": 1.4111, + "step": 8260 + }, + { + "epoch": 0.24255681484526395, + "grad_norm": 0.0, + "learning_rate": 1.7722633557194253e-05, + "loss": 1.5049, + "step": 8261 + }, + { + "epoch": 0.24258617652240297, + "grad_norm": 0.0, + "learning_rate": 1.7722029374748467e-05, + "loss": 1.3242, + "step": 8262 + }, + { + "epoch": 0.24261553819954196, + "grad_norm": 0.0, + "learning_rate": 1.7721425122470398e-05, + "loss": 1.2959, + "step": 8263 + }, + { + "epoch": 0.24264489987668095, + "grad_norm": 0.0, + "learning_rate": 1.77208208003655e-05, + "loss": 1.501, + "step": 8264 + }, + { + "epoch": 0.24267426155381996, + "grad_norm": 0.0, + "learning_rate": 1.7720216408439244e-05, + "loss": 1.3359, + "step": 8265 + }, + { + "epoch": 0.24270362323095895, + "grad_norm": 0.0, + "learning_rate": 1.7719611946697093e-05, + "loss": 1.4121, + "step": 8266 + }, + { + "epoch": 0.24273298490809794, + "grad_norm": 0.0, + "learning_rate": 1.7719007415144512e-05, + "loss": 1.2334, + "step": 8267 + }, + { + "epoch": 0.24276234658523696, + "grad_norm": 0.0, + "learning_rate": 1.7718402813786974e-05, + "loss": 1.4727, + "step": 8268 + }, + { + "epoch": 0.24279170826237595, + "grad_norm": 0.0, + "learning_rate": 1.7717798142629943e-05, + "loss": 1.4951, + "step": 8269 + }, + { + "epoch": 0.24282106993951494, + "grad_norm": 0.0, + "learning_rate": 1.7717193401678885e-05, + "loss": 1.4609, + "step": 8270 + }, + { + "epoch": 0.24285043161665396, + "grad_norm": 0.0, + "learning_rate": 1.771658859093927e-05, + "loss": 1.3633, + "step": 8271 + }, + { + "epoch": 0.24287979329379294, + "grad_norm": 0.0, + "learning_rate": 1.771598371041657e-05, + "loss": 1.3652, + "step": 8272 + }, + { + "epoch": 0.24290915497093193, + "grad_norm": 0.0, + "learning_rate": 1.771537876011625e-05, + "loss": 1.3398, + "step": 8273 + }, + { + "epoch": 0.24293851664807095, + "grad_norm": 0.0, + "learning_rate": 1.7714773740043787e-05, + "loss": 1.4033, + "step": 8274 + }, + { + "epoch": 0.24296787832520994, + "grad_norm": 0.0, + "learning_rate": 1.771416865020465e-05, + "loss": 1.4209, + "step": 8275 + }, + { + "epoch": 0.24299724000234893, + "grad_norm": 0.0, + "learning_rate": 1.7713563490604307e-05, + "loss": 1.3447, + "step": 8276 + }, + { + "epoch": 0.24302660167948792, + "grad_norm": 0.0, + "learning_rate": 1.7712958261248237e-05, + "loss": 1.4043, + "step": 8277 + }, + { + "epoch": 0.24305596335662694, + "grad_norm": 0.0, + "learning_rate": 1.7712352962141905e-05, + "loss": 1.4697, + "step": 8278 + }, + { + "epoch": 0.24308532503376593, + "grad_norm": 0.0, + "learning_rate": 1.7711747593290794e-05, + "loss": 1.3887, + "step": 8279 + }, + { + "epoch": 0.24311468671090491, + "grad_norm": 0.0, + "learning_rate": 1.7711142154700375e-05, + "loss": 1.5439, + "step": 8280 + }, + { + "epoch": 0.24314404838804393, + "grad_norm": 0.0, + "learning_rate": 1.7710536646376125e-05, + "loss": 1.4658, + "step": 8281 + }, + { + "epoch": 0.24317341006518292, + "grad_norm": 0.0, + "learning_rate": 1.770993106832351e-05, + "loss": 1.499, + "step": 8282 + }, + { + "epoch": 0.2432027717423219, + "grad_norm": 0.0, + "learning_rate": 1.7709325420548018e-05, + "loss": 1.4219, + "step": 8283 + }, + { + "epoch": 0.24323213341946093, + "grad_norm": 0.0, + "learning_rate": 1.770871970305512e-05, + "loss": 1.3682, + "step": 8284 + }, + { + "epoch": 0.24326149509659992, + "grad_norm": 0.0, + "learning_rate": 1.77081139158503e-05, + "loss": 1.4424, + "step": 8285 + }, + { + "epoch": 0.2432908567737389, + "grad_norm": 0.0, + "learning_rate": 1.770750805893903e-05, + "loss": 1.4053, + "step": 8286 + }, + { + "epoch": 0.24332021845087792, + "grad_norm": 0.0, + "learning_rate": 1.7706902132326787e-05, + "loss": 1.3828, + "step": 8287 + }, + { + "epoch": 0.2433495801280169, + "grad_norm": 0.0, + "learning_rate": 1.770629613601906e-05, + "loss": 1.4062, + "step": 8288 + }, + { + "epoch": 0.2433789418051559, + "grad_norm": 0.0, + "learning_rate": 1.770569007002132e-05, + "loss": 1.5107, + "step": 8289 + }, + { + "epoch": 0.24340830348229492, + "grad_norm": 0.0, + "learning_rate": 1.770508393433905e-05, + "loss": 1.4277, + "step": 8290 + }, + { + "epoch": 0.2434376651594339, + "grad_norm": 0.0, + "learning_rate": 1.7704477728977737e-05, + "loss": 1.3232, + "step": 8291 + }, + { + "epoch": 0.2434670268365729, + "grad_norm": 0.0, + "learning_rate": 1.7703871453942855e-05, + "loss": 1.4404, + "step": 8292 + }, + { + "epoch": 0.24349638851371191, + "grad_norm": 0.0, + "learning_rate": 1.7703265109239892e-05, + "loss": 1.5566, + "step": 8293 + }, + { + "epoch": 0.2435257501908509, + "grad_norm": 0.0, + "learning_rate": 1.7702658694874325e-05, + "loss": 1.4795, + "step": 8294 + }, + { + "epoch": 0.2435551118679899, + "grad_norm": 0.0, + "learning_rate": 1.770205221085165e-05, + "loss": 1.5146, + "step": 8295 + }, + { + "epoch": 0.2435844735451289, + "grad_norm": 0.0, + "learning_rate": 1.7701445657177342e-05, + "loss": 1.2876, + "step": 8296 + }, + { + "epoch": 0.2436138352222679, + "grad_norm": 0.0, + "learning_rate": 1.7700839033856888e-05, + "loss": 1.4746, + "step": 8297 + }, + { + "epoch": 0.2436431968994069, + "grad_norm": 0.0, + "learning_rate": 1.7700232340895773e-05, + "loss": 1.3291, + "step": 8298 + }, + { + "epoch": 0.2436725585765459, + "grad_norm": 0.0, + "learning_rate": 1.7699625578299487e-05, + "loss": 1.4531, + "step": 8299 + }, + { + "epoch": 0.2437019202536849, + "grad_norm": 0.0, + "learning_rate": 1.7699018746073515e-05, + "loss": 1.3125, + "step": 8300 + }, + { + "epoch": 0.24373128193082388, + "grad_norm": 0.0, + "learning_rate": 1.7698411844223346e-05, + "loss": 1.3232, + "step": 8301 + }, + { + "epoch": 0.24376064360796287, + "grad_norm": 0.0, + "learning_rate": 1.7697804872754464e-05, + "loss": 1.3711, + "step": 8302 + }, + { + "epoch": 0.2437900052851019, + "grad_norm": 0.0, + "learning_rate": 1.769719783167236e-05, + "loss": 1.4775, + "step": 8303 + }, + { + "epoch": 0.24381936696224088, + "grad_norm": 0.0, + "learning_rate": 1.7696590720982527e-05, + "loss": 1.5039, + "step": 8304 + }, + { + "epoch": 0.24384872863937987, + "grad_norm": 0.0, + "learning_rate": 1.7695983540690453e-05, + "loss": 1.4453, + "step": 8305 + }, + { + "epoch": 0.2438780903165189, + "grad_norm": 0.0, + "learning_rate": 1.7695376290801628e-05, + "loss": 1.1411, + "step": 8306 + }, + { + "epoch": 0.24390745199365788, + "grad_norm": 0.0, + "learning_rate": 1.7694768971321545e-05, + "loss": 1.4658, + "step": 8307 + }, + { + "epoch": 0.24393681367079686, + "grad_norm": 0.0, + "learning_rate": 1.7694161582255697e-05, + "loss": 1.3438, + "step": 8308 + }, + { + "epoch": 0.24396617534793588, + "grad_norm": 0.0, + "learning_rate": 1.769355412360957e-05, + "loss": 1.5107, + "step": 8309 + }, + { + "epoch": 0.24399553702507487, + "grad_norm": 0.0, + "learning_rate": 1.769294659538867e-05, + "loss": 1.3096, + "step": 8310 + }, + { + "epoch": 0.24402489870221386, + "grad_norm": 0.0, + "learning_rate": 1.769233899759848e-05, + "loss": 1.4326, + "step": 8311 + }, + { + "epoch": 0.24405426037935288, + "grad_norm": 0.0, + "learning_rate": 1.76917313302445e-05, + "loss": 1.458, + "step": 8312 + }, + { + "epoch": 0.24408362205649187, + "grad_norm": 0.0, + "learning_rate": 1.769112359333222e-05, + "loss": 1.2954, + "step": 8313 + }, + { + "epoch": 0.24411298373363086, + "grad_norm": 0.0, + "learning_rate": 1.7690515786867144e-05, + "loss": 1.4941, + "step": 8314 + }, + { + "epoch": 0.24414234541076987, + "grad_norm": 0.0, + "learning_rate": 1.7689907910854763e-05, + "loss": 1.4668, + "step": 8315 + }, + { + "epoch": 0.24417170708790886, + "grad_norm": 0.0, + "learning_rate": 1.7689299965300574e-05, + "loss": 1.4863, + "step": 8316 + }, + { + "epoch": 0.24420106876504785, + "grad_norm": 0.0, + "learning_rate": 1.7688691950210076e-05, + "loss": 1.3838, + "step": 8317 + }, + { + "epoch": 0.24423043044218687, + "grad_norm": 0.0, + "learning_rate": 1.768808386558877e-05, + "loss": 1.4072, + "step": 8318 + }, + { + "epoch": 0.24425979211932586, + "grad_norm": 0.0, + "learning_rate": 1.768747571144215e-05, + "loss": 1.4014, + "step": 8319 + }, + { + "epoch": 0.24428915379646485, + "grad_norm": 0.0, + "learning_rate": 1.7686867487775718e-05, + "loss": 1.4619, + "step": 8320 + }, + { + "epoch": 0.24431851547360386, + "grad_norm": 0.0, + "learning_rate": 1.7686259194594976e-05, + "loss": 1.4033, + "step": 8321 + }, + { + "epoch": 0.24434787715074285, + "grad_norm": 0.0, + "learning_rate": 1.7685650831905426e-05, + "loss": 1.418, + "step": 8322 + }, + { + "epoch": 0.24437723882788184, + "grad_norm": 0.0, + "learning_rate": 1.7685042399712563e-05, + "loss": 1.4336, + "step": 8323 + }, + { + "epoch": 0.24440660050502086, + "grad_norm": 0.0, + "learning_rate": 1.7684433898021898e-05, + "loss": 1.3389, + "step": 8324 + }, + { + "epoch": 0.24443596218215985, + "grad_norm": 0.0, + "learning_rate": 1.7683825326838925e-05, + "loss": 1.502, + "step": 8325 + }, + { + "epoch": 0.24446532385929884, + "grad_norm": 0.0, + "learning_rate": 1.7683216686169153e-05, + "loss": 1.479, + "step": 8326 + }, + { + "epoch": 0.24449468553643783, + "grad_norm": 0.0, + "learning_rate": 1.7682607976018086e-05, + "loss": 1.376, + "step": 8327 + }, + { + "epoch": 0.24452404721357685, + "grad_norm": 0.0, + "learning_rate": 1.7681999196391225e-05, + "loss": 1.5234, + "step": 8328 + }, + { + "epoch": 0.24455340889071583, + "grad_norm": 0.0, + "learning_rate": 1.7681390347294082e-05, + "loss": 1.4375, + "step": 8329 + }, + { + "epoch": 0.24458277056785482, + "grad_norm": 0.0, + "learning_rate": 1.7680781428732153e-05, + "loss": 1.4863, + "step": 8330 + }, + { + "epoch": 0.24461213224499384, + "grad_norm": 0.0, + "learning_rate": 1.7680172440710955e-05, + "loss": 1.4219, + "step": 8331 + }, + { + "epoch": 0.24464149392213283, + "grad_norm": 0.0, + "learning_rate": 1.767956338323599e-05, + "loss": 1.3506, + "step": 8332 + }, + { + "epoch": 0.24467085559927182, + "grad_norm": 0.0, + "learning_rate": 1.7678954256312765e-05, + "loss": 1.377, + "step": 8333 + }, + { + "epoch": 0.24470021727641084, + "grad_norm": 0.0, + "learning_rate": 1.7678345059946795e-05, + "loss": 1.373, + "step": 8334 + }, + { + "epoch": 0.24472957895354983, + "grad_norm": 0.0, + "learning_rate": 1.7677735794143574e-05, + "loss": 1.4072, + "step": 8335 + }, + { + "epoch": 0.24475894063068881, + "grad_norm": 0.0, + "learning_rate": 1.7677126458908632e-05, + "loss": 1.3477, + "step": 8336 + }, + { + "epoch": 0.24478830230782783, + "grad_norm": 0.0, + "learning_rate": 1.7676517054247465e-05, + "loss": 1.4795, + "step": 8337 + }, + { + "epoch": 0.24481766398496682, + "grad_norm": 0.0, + "learning_rate": 1.767590758016559e-05, + "loss": 1.3452, + "step": 8338 + }, + { + "epoch": 0.2448470256621058, + "grad_norm": 0.0, + "learning_rate": 1.7675298036668515e-05, + "loss": 1.4941, + "step": 8339 + }, + { + "epoch": 0.24487638733924483, + "grad_norm": 0.0, + "learning_rate": 1.7674688423761753e-05, + "loss": 1.3848, + "step": 8340 + }, + { + "epoch": 0.24490574901638382, + "grad_norm": 0.0, + "learning_rate": 1.7674078741450823e-05, + "loss": 1.3672, + "step": 8341 + }, + { + "epoch": 0.2449351106935228, + "grad_norm": 0.0, + "learning_rate": 1.7673468989741226e-05, + "loss": 1.3623, + "step": 8342 + }, + { + "epoch": 0.24496447237066182, + "grad_norm": 0.0, + "learning_rate": 1.767285916863849e-05, + "loss": 1.3125, + "step": 8343 + }, + { + "epoch": 0.2449938340478008, + "grad_norm": 0.0, + "learning_rate": 1.767224927814812e-05, + "loss": 1.4502, + "step": 8344 + }, + { + "epoch": 0.2450231957249398, + "grad_norm": 0.0, + "learning_rate": 1.7671639318275637e-05, + "loss": 1.5566, + "step": 8345 + }, + { + "epoch": 0.24505255740207882, + "grad_norm": 0.0, + "learning_rate": 1.7671029289026558e-05, + "loss": 1.3438, + "step": 8346 + }, + { + "epoch": 0.2450819190792178, + "grad_norm": 0.0, + "learning_rate": 1.767041919040639e-05, + "loss": 1.3721, + "step": 8347 + }, + { + "epoch": 0.2451112807563568, + "grad_norm": 0.0, + "learning_rate": 1.766980902242066e-05, + "loss": 1.2422, + "step": 8348 + }, + { + "epoch": 0.24514064243349581, + "grad_norm": 0.0, + "learning_rate": 1.7669198785074884e-05, + "loss": 1.3711, + "step": 8349 + }, + { + "epoch": 0.2451700041106348, + "grad_norm": 0.0, + "learning_rate": 1.766858847837458e-05, + "loss": 1.4346, + "step": 8350 + }, + { + "epoch": 0.2451993657877738, + "grad_norm": 0.0, + "learning_rate": 1.766797810232526e-05, + "loss": 1.2842, + "step": 8351 + }, + { + "epoch": 0.24522872746491278, + "grad_norm": 0.0, + "learning_rate": 1.7667367656932458e-05, + "loss": 1.4541, + "step": 8352 + }, + { + "epoch": 0.2452580891420518, + "grad_norm": 0.0, + "learning_rate": 1.7666757142201684e-05, + "loss": 1.4531, + "step": 8353 + }, + { + "epoch": 0.2452874508191908, + "grad_norm": 0.0, + "learning_rate": 1.766614655813846e-05, + "loss": 1.4688, + "step": 8354 + }, + { + "epoch": 0.24531681249632978, + "grad_norm": 0.0, + "learning_rate": 1.766553590474831e-05, + "loss": 1.4121, + "step": 8355 + }, + { + "epoch": 0.2453461741734688, + "grad_norm": 0.0, + "learning_rate": 1.7664925182036755e-05, + "loss": 1.46, + "step": 8356 + }, + { + "epoch": 0.24537553585060778, + "grad_norm": 0.0, + "learning_rate": 1.7664314390009322e-05, + "loss": 1.3535, + "step": 8357 + }, + { + "epoch": 0.24540489752774677, + "grad_norm": 0.0, + "learning_rate": 1.7663703528671524e-05, + "loss": 1.3975, + "step": 8358 + }, + { + "epoch": 0.2454342592048858, + "grad_norm": 0.0, + "learning_rate": 1.7663092598028898e-05, + "loss": 1.4961, + "step": 8359 + }, + { + "epoch": 0.24546362088202478, + "grad_norm": 0.0, + "learning_rate": 1.7662481598086963e-05, + "loss": 1.4189, + "step": 8360 + }, + { + "epoch": 0.24549298255916377, + "grad_norm": 0.0, + "learning_rate": 1.7661870528851244e-05, + "loss": 1.5684, + "step": 8361 + }, + { + "epoch": 0.2455223442363028, + "grad_norm": 0.0, + "learning_rate": 1.7661259390327264e-05, + "loss": 1.3926, + "step": 8362 + }, + { + "epoch": 0.24555170591344178, + "grad_norm": 0.0, + "learning_rate": 1.7660648182520556e-05, + "loss": 1.3838, + "step": 8363 + }, + { + "epoch": 0.24558106759058076, + "grad_norm": 0.0, + "learning_rate": 1.7660036905436647e-05, + "loss": 1.4395, + "step": 8364 + }, + { + "epoch": 0.24561042926771978, + "grad_norm": 0.0, + "learning_rate": 1.7659425559081062e-05, + "loss": 1.3691, + "step": 8365 + }, + { + "epoch": 0.24563979094485877, + "grad_norm": 0.0, + "learning_rate": 1.7658814143459327e-05, + "loss": 1.4072, + "step": 8366 + }, + { + "epoch": 0.24566915262199776, + "grad_norm": 0.0, + "learning_rate": 1.7658202658576975e-05, + "loss": 1.5352, + "step": 8367 + }, + { + "epoch": 0.24569851429913678, + "grad_norm": 0.0, + "learning_rate": 1.765759110443954e-05, + "loss": 1.4814, + "step": 8368 + }, + { + "epoch": 0.24572787597627577, + "grad_norm": 0.0, + "learning_rate": 1.7656979481052538e-05, + "loss": 1.3291, + "step": 8369 + }, + { + "epoch": 0.24575723765341476, + "grad_norm": 0.0, + "learning_rate": 1.7656367788421515e-05, + "loss": 1.3789, + "step": 8370 + }, + { + "epoch": 0.24578659933055377, + "grad_norm": 0.0, + "learning_rate": 1.7655756026551997e-05, + "loss": 1.4229, + "step": 8371 + }, + { + "epoch": 0.24581596100769276, + "grad_norm": 0.0, + "learning_rate": 1.765514419544952e-05, + "loss": 1.4062, + "step": 8372 + }, + { + "epoch": 0.24584532268483175, + "grad_norm": 0.0, + "learning_rate": 1.7654532295119607e-05, + "loss": 1.3906, + "step": 8373 + }, + { + "epoch": 0.24587468436197077, + "grad_norm": 0.0, + "learning_rate": 1.7653920325567805e-05, + "loss": 1.3828, + "step": 8374 + }, + { + "epoch": 0.24590404603910976, + "grad_norm": 0.0, + "learning_rate": 1.7653308286799634e-05, + "loss": 1.4404, + "step": 8375 + }, + { + "epoch": 0.24593340771624875, + "grad_norm": 0.0, + "learning_rate": 1.765269617882064e-05, + "loss": 1.5479, + "step": 8376 + }, + { + "epoch": 0.24596276939338774, + "grad_norm": 0.0, + "learning_rate": 1.7652084001636355e-05, + "loss": 1.4082, + "step": 8377 + }, + { + "epoch": 0.24599213107052675, + "grad_norm": 0.0, + "learning_rate": 1.7651471755252315e-05, + "loss": 1.4395, + "step": 8378 + }, + { + "epoch": 0.24602149274766574, + "grad_norm": 0.0, + "learning_rate": 1.7650859439674053e-05, + "loss": 1.583, + "step": 8379 + }, + { + "epoch": 0.24605085442480473, + "grad_norm": 0.0, + "learning_rate": 1.7650247054907113e-05, + "loss": 1.4482, + "step": 8380 + }, + { + "epoch": 0.24608021610194375, + "grad_norm": 0.0, + "learning_rate": 1.764963460095703e-05, + "loss": 1.2695, + "step": 8381 + }, + { + "epoch": 0.24610957777908274, + "grad_norm": 0.0, + "learning_rate": 1.7649022077829338e-05, + "loss": 1.498, + "step": 8382 + }, + { + "epoch": 0.24613893945622173, + "grad_norm": 0.0, + "learning_rate": 1.7648409485529585e-05, + "loss": 1.4951, + "step": 8383 + }, + { + "epoch": 0.24616830113336075, + "grad_norm": 0.0, + "learning_rate": 1.7647796824063304e-05, + "loss": 1.5146, + "step": 8384 + }, + { + "epoch": 0.24619766281049973, + "grad_norm": 0.0, + "learning_rate": 1.7647184093436037e-05, + "loss": 1.4902, + "step": 8385 + }, + { + "epoch": 0.24622702448763872, + "grad_norm": 0.0, + "learning_rate": 1.7646571293653328e-05, + "loss": 1.5713, + "step": 8386 + }, + { + "epoch": 0.24625638616477774, + "grad_norm": 0.0, + "learning_rate": 1.7645958424720716e-05, + "loss": 1.2949, + "step": 8387 + }, + { + "epoch": 0.24628574784191673, + "grad_norm": 0.0, + "learning_rate": 1.7645345486643744e-05, + "loss": 1.4355, + "step": 8388 + }, + { + "epoch": 0.24631510951905572, + "grad_norm": 0.0, + "learning_rate": 1.764473247942795e-05, + "loss": 1.416, + "step": 8389 + }, + { + "epoch": 0.24634447119619474, + "grad_norm": 0.0, + "learning_rate": 1.7644119403078888e-05, + "loss": 1.4209, + "step": 8390 + }, + { + "epoch": 0.24637383287333373, + "grad_norm": 0.0, + "learning_rate": 1.7643506257602094e-05, + "loss": 1.415, + "step": 8391 + }, + { + "epoch": 0.24640319455047272, + "grad_norm": 0.0, + "learning_rate": 1.7642893043003115e-05, + "loss": 1.418, + "step": 8392 + }, + { + "epoch": 0.24643255622761173, + "grad_norm": 0.0, + "learning_rate": 1.7642279759287498e-05, + "loss": 1.3008, + "step": 8393 + }, + { + "epoch": 0.24646191790475072, + "grad_norm": 0.0, + "learning_rate": 1.764166640646079e-05, + "loss": 1.4395, + "step": 8394 + }, + { + "epoch": 0.2464912795818897, + "grad_norm": 0.0, + "learning_rate": 1.7641052984528534e-05, + "loss": 1.2559, + "step": 8395 + }, + { + "epoch": 0.24652064125902873, + "grad_norm": 0.0, + "learning_rate": 1.7640439493496277e-05, + "loss": 1.4316, + "step": 8396 + }, + { + "epoch": 0.24655000293616772, + "grad_norm": 0.0, + "learning_rate": 1.763982593336957e-05, + "loss": 1.3047, + "step": 8397 + }, + { + "epoch": 0.2465793646133067, + "grad_norm": 0.0, + "learning_rate": 1.763921230415396e-05, + "loss": 1.4365, + "step": 8398 + }, + { + "epoch": 0.24660872629044572, + "grad_norm": 0.0, + "learning_rate": 1.7638598605855e-05, + "loss": 1.4688, + "step": 8399 + }, + { + "epoch": 0.2466380879675847, + "grad_norm": 0.0, + "learning_rate": 1.7637984838478232e-05, + "loss": 1.4414, + "step": 8400 + }, + { + "epoch": 0.2466674496447237, + "grad_norm": 0.0, + "learning_rate": 1.7637371002029215e-05, + "loss": 1.4023, + "step": 8401 + }, + { + "epoch": 0.2466968113218627, + "grad_norm": 0.0, + "learning_rate": 1.7636757096513495e-05, + "loss": 1.4141, + "step": 8402 + }, + { + "epoch": 0.2467261729990017, + "grad_norm": 0.0, + "learning_rate": 1.7636143121936623e-05, + "loss": 1.4492, + "step": 8403 + }, + { + "epoch": 0.2467555346761407, + "grad_norm": 0.0, + "learning_rate": 1.7635529078304154e-05, + "loss": 1.4297, + "step": 8404 + }, + { + "epoch": 0.2467848963532797, + "grad_norm": 0.0, + "learning_rate": 1.763491496562164e-05, + "loss": 1.3984, + "step": 8405 + }, + { + "epoch": 0.2468142580304187, + "grad_norm": 0.0, + "learning_rate": 1.7634300783894635e-05, + "loss": 1.4873, + "step": 8406 + }, + { + "epoch": 0.2468436197075577, + "grad_norm": 0.0, + "learning_rate": 1.7633686533128692e-05, + "loss": 1.3115, + "step": 8407 + }, + { + "epoch": 0.24687298138469668, + "grad_norm": 0.0, + "learning_rate": 1.7633072213329367e-05, + "loss": 1.4131, + "step": 8408 + }, + { + "epoch": 0.2469023430618357, + "grad_norm": 0.0, + "learning_rate": 1.7632457824502216e-05, + "loss": 1.3672, + "step": 8409 + }, + { + "epoch": 0.2469317047389747, + "grad_norm": 0.0, + "learning_rate": 1.7631843366652794e-05, + "loss": 1.3926, + "step": 8410 + }, + { + "epoch": 0.24696106641611368, + "grad_norm": 0.0, + "learning_rate": 1.7631228839786656e-05, + "loss": 1.3857, + "step": 8411 + }, + { + "epoch": 0.2469904280932527, + "grad_norm": 0.0, + "learning_rate": 1.7630614243909363e-05, + "loss": 1.4414, + "step": 8412 + }, + { + "epoch": 0.24701978977039168, + "grad_norm": 0.0, + "learning_rate": 1.7629999579026474e-05, + "loss": 1.501, + "step": 8413 + }, + { + "epoch": 0.24704915144753067, + "grad_norm": 0.0, + "learning_rate": 1.762938484514354e-05, + "loss": 1.3184, + "step": 8414 + }, + { + "epoch": 0.2470785131246697, + "grad_norm": 0.0, + "learning_rate": 1.7628770042266125e-05, + "loss": 1.4521, + "step": 8415 + }, + { + "epoch": 0.24710787480180868, + "grad_norm": 0.0, + "learning_rate": 1.7628155170399792e-05, + "loss": 1.5361, + "step": 8416 + }, + { + "epoch": 0.24713723647894767, + "grad_norm": 0.0, + "learning_rate": 1.7627540229550096e-05, + "loss": 1.4834, + "step": 8417 + }, + { + "epoch": 0.2471665981560867, + "grad_norm": 0.0, + "learning_rate": 1.76269252197226e-05, + "loss": 1.4033, + "step": 8418 + }, + { + "epoch": 0.24719595983322568, + "grad_norm": 0.0, + "learning_rate": 1.7626310140922865e-05, + "loss": 1.3398, + "step": 8419 + }, + { + "epoch": 0.24722532151036467, + "grad_norm": 0.0, + "learning_rate": 1.7625694993156458e-05, + "loss": 1.3613, + "step": 8420 + }, + { + "epoch": 0.24725468318750368, + "grad_norm": 0.0, + "learning_rate": 1.7625079776428937e-05, + "loss": 1.5215, + "step": 8421 + }, + { + "epoch": 0.24728404486464267, + "grad_norm": 0.0, + "learning_rate": 1.7624464490745863e-05, + "loss": 1.4512, + "step": 8422 + }, + { + "epoch": 0.24731340654178166, + "grad_norm": 0.0, + "learning_rate": 1.762384913611281e-05, + "loss": 1.3389, + "step": 8423 + }, + { + "epoch": 0.24734276821892068, + "grad_norm": 0.0, + "learning_rate": 1.762323371253533e-05, + "loss": 1.4521, + "step": 8424 + }, + { + "epoch": 0.24737212989605967, + "grad_norm": 0.0, + "learning_rate": 1.7622618220019002e-05, + "loss": 1.3125, + "step": 8425 + }, + { + "epoch": 0.24740149157319866, + "grad_norm": 0.0, + "learning_rate": 1.762200265856938e-05, + "loss": 1.3086, + "step": 8426 + }, + { + "epoch": 0.24743085325033765, + "grad_norm": 0.0, + "learning_rate": 1.762138702819204e-05, + "loss": 1.3564, + "step": 8427 + }, + { + "epoch": 0.24746021492747666, + "grad_norm": 0.0, + "learning_rate": 1.7620771328892542e-05, + "loss": 1.4775, + "step": 8428 + }, + { + "epoch": 0.24748957660461565, + "grad_norm": 0.0, + "learning_rate": 1.762015556067646e-05, + "loss": 1.3135, + "step": 8429 + }, + { + "epoch": 0.24751893828175464, + "grad_norm": 0.0, + "learning_rate": 1.7619539723549357e-05, + "loss": 1.4385, + "step": 8430 + }, + { + "epoch": 0.24754829995889366, + "grad_norm": 0.0, + "learning_rate": 1.7618923817516808e-05, + "loss": 1.3887, + "step": 8431 + }, + { + "epoch": 0.24757766163603265, + "grad_norm": 0.0, + "learning_rate": 1.7618307842584377e-05, + "loss": 1.4717, + "step": 8432 + }, + { + "epoch": 0.24760702331317164, + "grad_norm": 0.0, + "learning_rate": 1.7617691798757637e-05, + "loss": 1.1665, + "step": 8433 + }, + { + "epoch": 0.24763638499031065, + "grad_norm": 0.0, + "learning_rate": 1.761707568604216e-05, + "loss": 1.5557, + "step": 8434 + }, + { + "epoch": 0.24766574666744964, + "grad_norm": 0.0, + "learning_rate": 1.7616459504443514e-05, + "loss": 1.3975, + "step": 8435 + }, + { + "epoch": 0.24769510834458863, + "grad_norm": 0.0, + "learning_rate": 1.761584325396728e-05, + "loss": 1.2939, + "step": 8436 + }, + { + "epoch": 0.24772447002172765, + "grad_norm": 0.0, + "learning_rate": 1.7615226934619022e-05, + "loss": 1.4004, + "step": 8437 + }, + { + "epoch": 0.24775383169886664, + "grad_norm": 0.0, + "learning_rate": 1.7614610546404315e-05, + "loss": 1.543, + "step": 8438 + }, + { + "epoch": 0.24778319337600563, + "grad_norm": 0.0, + "learning_rate": 1.7613994089328736e-05, + "loss": 1.4268, + "step": 8439 + }, + { + "epoch": 0.24781255505314465, + "grad_norm": 0.0, + "learning_rate": 1.7613377563397856e-05, + "loss": 1.459, + "step": 8440 + }, + { + "epoch": 0.24784191673028363, + "grad_norm": 0.0, + "learning_rate": 1.761276096861726e-05, + "loss": 1.3779, + "step": 8441 + }, + { + "epoch": 0.24787127840742262, + "grad_norm": 0.0, + "learning_rate": 1.7612144304992513e-05, + "loss": 1.3613, + "step": 8442 + }, + { + "epoch": 0.24790064008456164, + "grad_norm": 0.0, + "learning_rate": 1.7611527572529193e-05, + "loss": 1.3613, + "step": 8443 + }, + { + "epoch": 0.24793000176170063, + "grad_norm": 0.0, + "learning_rate": 1.7610910771232882e-05, + "loss": 1.4316, + "step": 8444 + }, + { + "epoch": 0.24795936343883962, + "grad_norm": 0.0, + "learning_rate": 1.7610293901109154e-05, + "loss": 1.5088, + "step": 8445 + }, + { + "epoch": 0.24798872511597864, + "grad_norm": 0.0, + "learning_rate": 1.760967696216359e-05, + "loss": 1.3711, + "step": 8446 + }, + { + "epoch": 0.24801808679311763, + "grad_norm": 0.0, + "learning_rate": 1.760905995440177e-05, + "loss": 1.3438, + "step": 8447 + }, + { + "epoch": 0.24804744847025662, + "grad_norm": 0.0, + "learning_rate": 1.760844287782927e-05, + "loss": 1.4277, + "step": 8448 + }, + { + "epoch": 0.24807681014739563, + "grad_norm": 0.0, + "learning_rate": 1.7607825732451676e-05, + "loss": 1.3994, + "step": 8449 + }, + { + "epoch": 0.24810617182453462, + "grad_norm": 0.0, + "learning_rate": 1.7607208518274563e-05, + "loss": 1.4375, + "step": 8450 + }, + { + "epoch": 0.2481355335016736, + "grad_norm": 0.0, + "learning_rate": 1.7606591235303514e-05, + "loss": 1.4434, + "step": 8451 + }, + { + "epoch": 0.2481648951788126, + "grad_norm": 0.0, + "learning_rate": 1.7605973883544116e-05, + "loss": 1.3584, + "step": 8452 + }, + { + "epoch": 0.24819425685595162, + "grad_norm": 0.0, + "learning_rate": 1.7605356463001943e-05, + "loss": 1.4746, + "step": 8453 + }, + { + "epoch": 0.2482236185330906, + "grad_norm": 0.0, + "learning_rate": 1.7604738973682586e-05, + "loss": 1.4785, + "step": 8454 + }, + { + "epoch": 0.2482529802102296, + "grad_norm": 0.0, + "learning_rate": 1.7604121415591627e-05, + "loss": 1.6191, + "step": 8455 + }, + { + "epoch": 0.2482823418873686, + "grad_norm": 0.0, + "learning_rate": 1.7603503788734654e-05, + "loss": 1.3701, + "step": 8456 + }, + { + "epoch": 0.2483117035645076, + "grad_norm": 0.0, + "learning_rate": 1.7602886093117248e-05, + "loss": 1.5205, + "step": 8457 + }, + { + "epoch": 0.2483410652416466, + "grad_norm": 0.0, + "learning_rate": 1.760226832874499e-05, + "loss": 1.4678, + "step": 8458 + }, + { + "epoch": 0.2483704269187856, + "grad_norm": 0.0, + "learning_rate": 1.7601650495623478e-05, + "loss": 1.3486, + "step": 8459 + }, + { + "epoch": 0.2483997885959246, + "grad_norm": 0.0, + "learning_rate": 1.7601032593758292e-05, + "loss": 1.2729, + "step": 8460 + }, + { + "epoch": 0.2484291502730636, + "grad_norm": 0.0, + "learning_rate": 1.760041462315502e-05, + "loss": 1.3887, + "step": 8461 + }, + { + "epoch": 0.2484585119502026, + "grad_norm": 0.0, + "learning_rate": 1.7599796583819257e-05, + "loss": 1.4209, + "step": 8462 + }, + { + "epoch": 0.2484878736273416, + "grad_norm": 0.0, + "learning_rate": 1.7599178475756586e-05, + "loss": 1.3672, + "step": 8463 + }, + { + "epoch": 0.24851723530448058, + "grad_norm": 0.0, + "learning_rate": 1.7598560298972593e-05, + "loss": 1.335, + "step": 8464 + }, + { + "epoch": 0.2485465969816196, + "grad_norm": 0.0, + "learning_rate": 1.7597942053472876e-05, + "loss": 1.5059, + "step": 8465 + }, + { + "epoch": 0.2485759586587586, + "grad_norm": 0.0, + "learning_rate": 1.7597323739263022e-05, + "loss": 1.2891, + "step": 8466 + }, + { + "epoch": 0.24860532033589758, + "grad_norm": 0.0, + "learning_rate": 1.7596705356348625e-05, + "loss": 1.4834, + "step": 8467 + }, + { + "epoch": 0.2486346820130366, + "grad_norm": 0.0, + "learning_rate": 1.759608690473528e-05, + "loss": 1.4033, + "step": 8468 + }, + { + "epoch": 0.24866404369017558, + "grad_norm": 0.0, + "learning_rate": 1.759546838442857e-05, + "loss": 1.4307, + "step": 8469 + }, + { + "epoch": 0.24869340536731457, + "grad_norm": 0.0, + "learning_rate": 1.7594849795434096e-05, + "loss": 1.3428, + "step": 8470 + }, + { + "epoch": 0.2487227670444536, + "grad_norm": 0.0, + "learning_rate": 1.759423113775745e-05, + "loss": 1.5098, + "step": 8471 + }, + { + "epoch": 0.24875212872159258, + "grad_norm": 0.0, + "learning_rate": 1.7593612411404228e-05, + "loss": 1.4473, + "step": 8472 + }, + { + "epoch": 0.24878149039873157, + "grad_norm": 0.0, + "learning_rate": 1.7592993616380024e-05, + "loss": 1.4678, + "step": 8473 + }, + { + "epoch": 0.2488108520758706, + "grad_norm": 0.0, + "learning_rate": 1.759237475269043e-05, + "loss": 1.5361, + "step": 8474 + }, + { + "epoch": 0.24884021375300958, + "grad_norm": 0.0, + "learning_rate": 1.7591755820341052e-05, + "loss": 1.4062, + "step": 8475 + }, + { + "epoch": 0.24886957543014857, + "grad_norm": 0.0, + "learning_rate": 1.759113681933748e-05, + "loss": 1.583, + "step": 8476 + }, + { + "epoch": 0.24889893710728755, + "grad_norm": 0.0, + "learning_rate": 1.759051774968531e-05, + "loss": 1.4404, + "step": 8477 + }, + { + "epoch": 0.24892829878442657, + "grad_norm": 0.0, + "learning_rate": 1.758989861139015e-05, + "loss": 1.46, + "step": 8478 + }, + { + "epoch": 0.24895766046156556, + "grad_norm": 0.0, + "learning_rate": 1.7589279404457592e-05, + "loss": 1.4551, + "step": 8479 + }, + { + "epoch": 0.24898702213870455, + "grad_norm": 0.0, + "learning_rate": 1.7588660128893234e-05, + "loss": 1.3057, + "step": 8480 + }, + { + "epoch": 0.24901638381584357, + "grad_norm": 0.0, + "learning_rate": 1.7588040784702684e-05, + "loss": 1.3682, + "step": 8481 + }, + { + "epoch": 0.24904574549298256, + "grad_norm": 0.0, + "learning_rate": 1.758742137189153e-05, + "loss": 1.4912, + "step": 8482 + }, + { + "epoch": 0.24907510717012155, + "grad_norm": 0.0, + "learning_rate": 1.7586801890465387e-05, + "loss": 1.3701, + "step": 8483 + }, + { + "epoch": 0.24910446884726056, + "grad_norm": 0.0, + "learning_rate": 1.7586182340429848e-05, + "loss": 1.374, + "step": 8484 + }, + { + "epoch": 0.24913383052439955, + "grad_norm": 0.0, + "learning_rate": 1.7585562721790523e-05, + "loss": 1.3867, + "step": 8485 + }, + { + "epoch": 0.24916319220153854, + "grad_norm": 0.0, + "learning_rate": 1.7584943034553007e-05, + "loss": 1.3857, + "step": 8486 + }, + { + "epoch": 0.24919255387867756, + "grad_norm": 0.0, + "learning_rate": 1.7584323278722913e-05, + "loss": 1.3984, + "step": 8487 + }, + { + "epoch": 0.24922191555581655, + "grad_norm": 0.0, + "learning_rate": 1.758370345430584e-05, + "loss": 1.3203, + "step": 8488 + }, + { + "epoch": 0.24925127723295554, + "grad_norm": 0.0, + "learning_rate": 1.7583083561307394e-05, + "loss": 1.332, + "step": 8489 + }, + { + "epoch": 0.24928063891009455, + "grad_norm": 0.0, + "learning_rate": 1.7582463599733183e-05, + "loss": 1.4346, + "step": 8490 + }, + { + "epoch": 0.24931000058723354, + "grad_norm": 0.0, + "learning_rate": 1.758184356958881e-05, + "loss": 1.4336, + "step": 8491 + }, + { + "epoch": 0.24933936226437253, + "grad_norm": 0.0, + "learning_rate": 1.7581223470879886e-05, + "loss": 1.3018, + "step": 8492 + }, + { + "epoch": 0.24936872394151155, + "grad_norm": 0.0, + "learning_rate": 1.758060330361201e-05, + "loss": 1.457, + "step": 8493 + }, + { + "epoch": 0.24939808561865054, + "grad_norm": 0.0, + "learning_rate": 1.7579983067790803e-05, + "loss": 1.3877, + "step": 8494 + }, + { + "epoch": 0.24942744729578953, + "grad_norm": 0.0, + "learning_rate": 1.7579362763421867e-05, + "loss": 1.418, + "step": 8495 + }, + { + "epoch": 0.24945680897292855, + "grad_norm": 0.0, + "learning_rate": 1.757874239051081e-05, + "loss": 1.498, + "step": 8496 + }, + { + "epoch": 0.24948617065006753, + "grad_norm": 0.0, + "learning_rate": 1.757812194906325e-05, + "loss": 1.3525, + "step": 8497 + }, + { + "epoch": 0.24951553232720652, + "grad_norm": 0.0, + "learning_rate": 1.757750143908479e-05, + "loss": 1.624, + "step": 8498 + }, + { + "epoch": 0.24954489400434554, + "grad_norm": 0.0, + "learning_rate": 1.757688086058104e-05, + "loss": 1.417, + "step": 8499 + }, + { + "epoch": 0.24957425568148453, + "grad_norm": 0.0, + "learning_rate": 1.757626021355762e-05, + "loss": 1.4404, + "step": 8500 + }, + { + "epoch": 0.24960361735862352, + "grad_norm": 0.0, + "learning_rate": 1.7575639498020135e-05, + "loss": 1.3887, + "step": 8501 + }, + { + "epoch": 0.2496329790357625, + "grad_norm": 0.0, + "learning_rate": 1.7575018713974205e-05, + "loss": 1.5293, + "step": 8502 + }, + { + "epoch": 0.24966234071290153, + "grad_norm": 0.0, + "learning_rate": 1.7574397861425437e-05, + "loss": 1.4102, + "step": 8503 + }, + { + "epoch": 0.24969170239004052, + "grad_norm": 0.0, + "learning_rate": 1.7573776940379453e-05, + "loss": 1.4473, + "step": 8504 + }, + { + "epoch": 0.2497210640671795, + "grad_norm": 0.0, + "learning_rate": 1.7573155950841866e-05, + "loss": 1.3916, + "step": 8505 + }, + { + "epoch": 0.24975042574431852, + "grad_norm": 0.0, + "learning_rate": 1.7572534892818288e-05, + "loss": 1.4111, + "step": 8506 + }, + { + "epoch": 0.2497797874214575, + "grad_norm": 0.0, + "learning_rate": 1.7571913766314335e-05, + "loss": 1.3271, + "step": 8507 + }, + { + "epoch": 0.2498091490985965, + "grad_norm": 0.0, + "learning_rate": 1.757129257133563e-05, + "loss": 1.4268, + "step": 8508 + }, + { + "epoch": 0.24983851077573552, + "grad_norm": 0.0, + "learning_rate": 1.7570671307887787e-05, + "loss": 1.4131, + "step": 8509 + }, + { + "epoch": 0.2498678724528745, + "grad_norm": 0.0, + "learning_rate": 1.757004997597642e-05, + "loss": 1.4902, + "step": 8510 + }, + { + "epoch": 0.2498972341300135, + "grad_norm": 0.0, + "learning_rate": 1.756942857560716e-05, + "loss": 1.3896, + "step": 8511 + }, + { + "epoch": 0.2499265958071525, + "grad_norm": 0.0, + "learning_rate": 1.7568807106785616e-05, + "loss": 1.2324, + "step": 8512 + }, + { + "epoch": 0.2499559574842915, + "grad_norm": 0.0, + "learning_rate": 1.756818556951741e-05, + "loss": 1.4395, + "step": 8513 + }, + { + "epoch": 0.2499853191614305, + "grad_norm": 0.0, + "learning_rate": 1.7567563963808162e-05, + "loss": 1.4473, + "step": 8514 + }, + { + "epoch": 0.2500146808385695, + "grad_norm": 0.0, + "learning_rate": 1.7566942289663497e-05, + "loss": 1.4023, + "step": 8515 + }, + { + "epoch": 0.2500440425157085, + "grad_norm": 0.0, + "learning_rate": 1.7566320547089032e-05, + "loss": 1.4463, + "step": 8516 + }, + { + "epoch": 0.2500734041928475, + "grad_norm": 0.0, + "learning_rate": 1.7565698736090396e-05, + "loss": 1.4932, + "step": 8517 + }, + { + "epoch": 0.2501027658699865, + "grad_norm": 0.0, + "learning_rate": 1.756507685667321e-05, + "loss": 1.3271, + "step": 8518 + }, + { + "epoch": 0.2501321275471255, + "grad_norm": 0.0, + "learning_rate": 1.756445490884309e-05, + "loss": 1.3081, + "step": 8519 + }, + { + "epoch": 0.2501614892242645, + "grad_norm": 0.0, + "learning_rate": 1.7563832892605675e-05, + "loss": 1.376, + "step": 8520 + }, + { + "epoch": 0.25019085090140347, + "grad_norm": 0.0, + "learning_rate": 1.7563210807966577e-05, + "loss": 1.3369, + "step": 8521 + }, + { + "epoch": 0.2502202125785425, + "grad_norm": 0.0, + "learning_rate": 1.756258865493143e-05, + "loss": 1.4707, + "step": 8522 + }, + { + "epoch": 0.2502495742556815, + "grad_norm": 0.0, + "learning_rate": 1.7561966433505854e-05, + "loss": 1.3711, + "step": 8523 + }, + { + "epoch": 0.25027893593282047, + "grad_norm": 0.0, + "learning_rate": 1.7561344143695478e-05, + "loss": 1.3486, + "step": 8524 + }, + { + "epoch": 0.2503082976099595, + "grad_norm": 0.0, + "learning_rate": 1.756072178550593e-05, + "loss": 1.4043, + "step": 8525 + }, + { + "epoch": 0.2503376592870985, + "grad_norm": 0.0, + "learning_rate": 1.7560099358942844e-05, + "loss": 1.4326, + "step": 8526 + }, + { + "epoch": 0.25036702096423746, + "grad_norm": 0.0, + "learning_rate": 1.755947686401184e-05, + "loss": 1.3545, + "step": 8527 + }, + { + "epoch": 0.2503963826413765, + "grad_norm": 0.0, + "learning_rate": 1.755885430071855e-05, + "loss": 1.3643, + "step": 8528 + }, + { + "epoch": 0.2504257443185155, + "grad_norm": 0.0, + "learning_rate": 1.7558231669068607e-05, + "loss": 1.4756, + "step": 8529 + }, + { + "epoch": 0.25045510599565446, + "grad_norm": 0.0, + "learning_rate": 1.7557608969067638e-05, + "loss": 1.3105, + "step": 8530 + }, + { + "epoch": 0.2504844676727935, + "grad_norm": 0.0, + "learning_rate": 1.7556986200721275e-05, + "loss": 1.4141, + "step": 8531 + }, + { + "epoch": 0.2505138293499325, + "grad_norm": 0.0, + "learning_rate": 1.755636336403515e-05, + "loss": 1.4375, + "step": 8532 + }, + { + "epoch": 0.25054319102707145, + "grad_norm": 0.0, + "learning_rate": 1.7555740459014897e-05, + "loss": 1.3682, + "step": 8533 + }, + { + "epoch": 0.25057255270421047, + "grad_norm": 0.0, + "learning_rate": 1.7555117485666148e-05, + "loss": 1.5791, + "step": 8534 + }, + { + "epoch": 0.2506019143813495, + "grad_norm": 0.0, + "learning_rate": 1.7554494443994535e-05, + "loss": 1.4531, + "step": 8535 + }, + { + "epoch": 0.25063127605848845, + "grad_norm": 0.0, + "learning_rate": 1.7553871334005696e-05, + "loss": 1.4482, + "step": 8536 + }, + { + "epoch": 0.25066063773562747, + "grad_norm": 0.0, + "learning_rate": 1.7553248155705264e-05, + "loss": 1.501, + "step": 8537 + }, + { + "epoch": 0.2506899994127665, + "grad_norm": 0.0, + "learning_rate": 1.7552624909098874e-05, + "loss": 1.4043, + "step": 8538 + }, + { + "epoch": 0.25071936108990545, + "grad_norm": 0.0, + "learning_rate": 1.7552001594192162e-05, + "loss": 1.5273, + "step": 8539 + }, + { + "epoch": 0.25074872276704446, + "grad_norm": 0.0, + "learning_rate": 1.7551378210990767e-05, + "loss": 1.2456, + "step": 8540 + }, + { + "epoch": 0.2507780844441834, + "grad_norm": 0.0, + "learning_rate": 1.7550754759500324e-05, + "loss": 1.4023, + "step": 8541 + }, + { + "epoch": 0.25080744612132244, + "grad_norm": 0.0, + "learning_rate": 1.755013123972647e-05, + "loss": 1.4844, + "step": 8542 + }, + { + "epoch": 0.25083680779846146, + "grad_norm": 0.0, + "learning_rate": 1.754950765167485e-05, + "loss": 1.4043, + "step": 8543 + }, + { + "epoch": 0.2508661694756004, + "grad_norm": 0.0, + "learning_rate": 1.7548883995351094e-05, + "loss": 1.4854, + "step": 8544 + }, + { + "epoch": 0.25089553115273944, + "grad_norm": 0.0, + "learning_rate": 1.754826027076085e-05, + "loss": 1.5029, + "step": 8545 + }, + { + "epoch": 0.25092489282987845, + "grad_norm": 0.0, + "learning_rate": 1.7547636477909754e-05, + "loss": 1.4707, + "step": 8546 + }, + { + "epoch": 0.2509542545070174, + "grad_norm": 0.0, + "learning_rate": 1.754701261680345e-05, + "loss": 1.5264, + "step": 8547 + }, + { + "epoch": 0.25098361618415643, + "grad_norm": 0.0, + "learning_rate": 1.7546388687447575e-05, + "loss": 1.4775, + "step": 8548 + }, + { + "epoch": 0.25101297786129545, + "grad_norm": 0.0, + "learning_rate": 1.7545764689847778e-05, + "loss": 1.4697, + "step": 8549 + }, + { + "epoch": 0.2510423395384344, + "grad_norm": 0.0, + "learning_rate": 1.7545140624009696e-05, + "loss": 1.3965, + "step": 8550 + }, + { + "epoch": 0.25107170121557343, + "grad_norm": 0.0, + "learning_rate": 1.7544516489938976e-05, + "loss": 1.4102, + "step": 8551 + }, + { + "epoch": 0.25110106289271245, + "grad_norm": 0.0, + "learning_rate": 1.754389228764126e-05, + "loss": 1.4551, + "step": 8552 + }, + { + "epoch": 0.2511304245698514, + "grad_norm": 0.0, + "learning_rate": 1.7543268017122196e-05, + "loss": 1.5137, + "step": 8553 + }, + { + "epoch": 0.2511597862469904, + "grad_norm": 0.0, + "learning_rate": 1.7542643678387425e-05, + "loss": 1.4482, + "step": 8554 + }, + { + "epoch": 0.25118914792412944, + "grad_norm": 0.0, + "learning_rate": 1.75420192714426e-05, + "loss": 1.3711, + "step": 8555 + }, + { + "epoch": 0.2512185096012684, + "grad_norm": 0.0, + "learning_rate": 1.7541394796293358e-05, + "loss": 1.3662, + "step": 8556 + }, + { + "epoch": 0.2512478712784074, + "grad_norm": 0.0, + "learning_rate": 1.754077025294535e-05, + "loss": 1.3389, + "step": 8557 + }, + { + "epoch": 0.25127723295554644, + "grad_norm": 0.0, + "learning_rate": 1.7540145641404232e-05, + "loss": 1.418, + "step": 8558 + }, + { + "epoch": 0.2513065946326854, + "grad_norm": 0.0, + "learning_rate": 1.7539520961675643e-05, + "loss": 1.2842, + "step": 8559 + }, + { + "epoch": 0.2513359563098244, + "grad_norm": 0.0, + "learning_rate": 1.7538896213765232e-05, + "loss": 1.377, + "step": 8560 + }, + { + "epoch": 0.25136531798696343, + "grad_norm": 0.0, + "learning_rate": 1.7538271397678657e-05, + "loss": 1.4629, + "step": 8561 + }, + { + "epoch": 0.2513946796641024, + "grad_norm": 0.0, + "learning_rate": 1.753764651342156e-05, + "loss": 1.335, + "step": 8562 + }, + { + "epoch": 0.2514240413412414, + "grad_norm": 0.0, + "learning_rate": 1.7537021560999597e-05, + "loss": 1.3687, + "step": 8563 + }, + { + "epoch": 0.25145340301838043, + "grad_norm": 0.0, + "learning_rate": 1.7536396540418414e-05, + "loss": 1.582, + "step": 8564 + }, + { + "epoch": 0.2514827646955194, + "grad_norm": 0.0, + "learning_rate": 1.7535771451683673e-05, + "loss": 1.4414, + "step": 8565 + }, + { + "epoch": 0.2515121263726584, + "grad_norm": 0.0, + "learning_rate": 1.7535146294801015e-05, + "loss": 1.4268, + "step": 8566 + }, + { + "epoch": 0.2515414880497974, + "grad_norm": 0.0, + "learning_rate": 1.75345210697761e-05, + "loss": 1.4033, + "step": 8567 + }, + { + "epoch": 0.2515708497269364, + "grad_norm": 0.0, + "learning_rate": 1.7533895776614587e-05, + "loss": 1.2437, + "step": 8568 + }, + { + "epoch": 0.2516002114040754, + "grad_norm": 0.0, + "learning_rate": 1.753327041532212e-05, + "loss": 1.4434, + "step": 8569 + }, + { + "epoch": 0.2516295730812144, + "grad_norm": 0.0, + "learning_rate": 1.753264498590436e-05, + "loss": 1.4463, + "step": 8570 + }, + { + "epoch": 0.2516589347583534, + "grad_norm": 0.0, + "learning_rate": 1.7532019488366962e-05, + "loss": 1.4209, + "step": 8571 + }, + { + "epoch": 0.2516882964354924, + "grad_norm": 0.0, + "learning_rate": 1.7531393922715585e-05, + "loss": 1.4189, + "step": 8572 + }, + { + "epoch": 0.2517176581126314, + "grad_norm": 0.0, + "learning_rate": 1.7530768288955882e-05, + "loss": 1.4502, + "step": 8573 + }, + { + "epoch": 0.2517470197897704, + "grad_norm": 0.0, + "learning_rate": 1.7530142587093513e-05, + "loss": 1.3672, + "step": 8574 + }, + { + "epoch": 0.2517763814669094, + "grad_norm": 0.0, + "learning_rate": 1.7529516817134137e-05, + "loss": 1.3818, + "step": 8575 + }, + { + "epoch": 0.2518057431440484, + "grad_norm": 0.0, + "learning_rate": 1.7528890979083412e-05, + "loss": 1.4785, + "step": 8576 + }, + { + "epoch": 0.25183510482118737, + "grad_norm": 0.0, + "learning_rate": 1.7528265072946996e-05, + "loss": 1.3281, + "step": 8577 + }, + { + "epoch": 0.2518644664983264, + "grad_norm": 0.0, + "learning_rate": 1.752763909873055e-05, + "loss": 1.3877, + "step": 8578 + }, + { + "epoch": 0.2518938281754654, + "grad_norm": 0.0, + "learning_rate": 1.752701305643974e-05, + "loss": 1.3857, + "step": 8579 + }, + { + "epoch": 0.25192318985260437, + "grad_norm": 0.0, + "learning_rate": 1.752638694608022e-05, + "loss": 1.373, + "step": 8580 + }, + { + "epoch": 0.2519525515297434, + "grad_norm": 0.0, + "learning_rate": 1.7525760767657657e-05, + "loss": 1.4795, + "step": 8581 + }, + { + "epoch": 0.2519819132068824, + "grad_norm": 0.0, + "learning_rate": 1.7525134521177708e-05, + "loss": 1.5039, + "step": 8582 + }, + { + "epoch": 0.25201127488402136, + "grad_norm": 0.0, + "learning_rate": 1.7524508206646044e-05, + "loss": 1.3857, + "step": 8583 + }, + { + "epoch": 0.2520406365611604, + "grad_norm": 0.0, + "learning_rate": 1.7523881824068327e-05, + "loss": 1.4512, + "step": 8584 + }, + { + "epoch": 0.2520699982382994, + "grad_norm": 0.0, + "learning_rate": 1.752325537345022e-05, + "loss": 1.3057, + "step": 8585 + }, + { + "epoch": 0.25209935991543836, + "grad_norm": 0.0, + "learning_rate": 1.7522628854797384e-05, + "loss": 1.3623, + "step": 8586 + }, + { + "epoch": 0.2521287215925774, + "grad_norm": 0.0, + "learning_rate": 1.752200226811549e-05, + "loss": 1.4668, + "step": 8587 + }, + { + "epoch": 0.2521580832697164, + "grad_norm": 0.0, + "learning_rate": 1.7521375613410205e-05, + "loss": 1.4746, + "step": 8588 + }, + { + "epoch": 0.25218744494685535, + "grad_norm": 0.0, + "learning_rate": 1.752074889068719e-05, + "loss": 1.4219, + "step": 8589 + }, + { + "epoch": 0.25221680662399437, + "grad_norm": 0.0, + "learning_rate": 1.752012209995212e-05, + "loss": 1.4893, + "step": 8590 + }, + { + "epoch": 0.25224616830113333, + "grad_norm": 0.0, + "learning_rate": 1.7519495241210657e-05, + "loss": 1.4092, + "step": 8591 + }, + { + "epoch": 0.25227552997827235, + "grad_norm": 0.0, + "learning_rate": 1.7518868314468475e-05, + "loss": 1.4492, + "step": 8592 + }, + { + "epoch": 0.25230489165541137, + "grad_norm": 0.0, + "learning_rate": 1.751824131973124e-05, + "loss": 1.4902, + "step": 8593 + }, + { + "epoch": 0.25233425333255033, + "grad_norm": 0.0, + "learning_rate": 1.7517614257004624e-05, + "loss": 1.4033, + "step": 8594 + }, + { + "epoch": 0.25236361500968935, + "grad_norm": 0.0, + "learning_rate": 1.75169871262943e-05, + "loss": 1.4482, + "step": 8595 + }, + { + "epoch": 0.25239297668682836, + "grad_norm": 0.0, + "learning_rate": 1.751635992760593e-05, + "loss": 1.2725, + "step": 8596 + }, + { + "epoch": 0.2524223383639673, + "grad_norm": 0.0, + "learning_rate": 1.7515732660945196e-05, + "loss": 1.4395, + "step": 8597 + }, + { + "epoch": 0.25245170004110634, + "grad_norm": 0.0, + "learning_rate": 1.7515105326317767e-05, + "loss": 1.2783, + "step": 8598 + }, + { + "epoch": 0.25248106171824536, + "grad_norm": 0.0, + "learning_rate": 1.7514477923729318e-05, + "loss": 1.4111, + "step": 8599 + }, + { + "epoch": 0.2525104233953843, + "grad_norm": 0.0, + "learning_rate": 1.7513850453185515e-05, + "loss": 1.3809, + "step": 8600 + }, + { + "epoch": 0.25253978507252334, + "grad_norm": 0.0, + "learning_rate": 1.7513222914692042e-05, + "loss": 1.5371, + "step": 8601 + }, + { + "epoch": 0.25256914674966235, + "grad_norm": 0.0, + "learning_rate": 1.751259530825457e-05, + "loss": 1.3848, + "step": 8602 + }, + { + "epoch": 0.2525985084268013, + "grad_norm": 0.0, + "learning_rate": 1.7511967633878774e-05, + "loss": 1.375, + "step": 8603 + }, + { + "epoch": 0.25262787010394033, + "grad_norm": 0.0, + "learning_rate": 1.751133989157033e-05, + "loss": 1.5098, + "step": 8604 + }, + { + "epoch": 0.25265723178107935, + "grad_norm": 0.0, + "learning_rate": 1.7510712081334917e-05, + "loss": 1.4297, + "step": 8605 + }, + { + "epoch": 0.2526865934582183, + "grad_norm": 0.0, + "learning_rate": 1.751008420317821e-05, + "loss": 1.4297, + "step": 8606 + }, + { + "epoch": 0.25271595513535733, + "grad_norm": 0.0, + "learning_rate": 1.7509456257105886e-05, + "loss": 1.2461, + "step": 8607 + }, + { + "epoch": 0.25274531681249635, + "grad_norm": 0.0, + "learning_rate": 1.750882824312363e-05, + "loss": 1.3887, + "step": 8608 + }, + { + "epoch": 0.2527746784896353, + "grad_norm": 0.0, + "learning_rate": 1.7508200161237115e-05, + "loss": 1.4453, + "step": 8609 + }, + { + "epoch": 0.2528040401667743, + "grad_norm": 0.0, + "learning_rate": 1.7507572011452023e-05, + "loss": 1.4404, + "step": 8610 + }, + { + "epoch": 0.25283340184391334, + "grad_norm": 0.0, + "learning_rate": 1.7506943793774037e-05, + "loss": 1.3984, + "step": 8611 + }, + { + "epoch": 0.2528627635210523, + "grad_norm": 0.0, + "learning_rate": 1.750631550820883e-05, + "loss": 1.4785, + "step": 8612 + }, + { + "epoch": 0.2528921251981913, + "grad_norm": 0.0, + "learning_rate": 1.7505687154762093e-05, + "loss": 1.4678, + "step": 8613 + }, + { + "epoch": 0.25292148687533034, + "grad_norm": 0.0, + "learning_rate": 1.7505058733439505e-05, + "loss": 1.4365, + "step": 8614 + }, + { + "epoch": 0.2529508485524693, + "grad_norm": 0.0, + "learning_rate": 1.750443024424675e-05, + "loss": 1.3672, + "step": 8615 + }, + { + "epoch": 0.2529802102296083, + "grad_norm": 0.0, + "learning_rate": 1.750380168718951e-05, + "loss": 1.4854, + "step": 8616 + }, + { + "epoch": 0.25300957190674733, + "grad_norm": 0.0, + "learning_rate": 1.750317306227347e-05, + "loss": 1.4932, + "step": 8617 + }, + { + "epoch": 0.2530389335838863, + "grad_norm": 0.0, + "learning_rate": 1.7502544369504314e-05, + "loss": 1.4766, + "step": 8618 + }, + { + "epoch": 0.2530682952610253, + "grad_norm": 0.0, + "learning_rate": 1.7501915608887724e-05, + "loss": 1.4443, + "step": 8619 + }, + { + "epoch": 0.25309765693816433, + "grad_norm": 0.0, + "learning_rate": 1.7501286780429398e-05, + "loss": 1.4746, + "step": 8620 + }, + { + "epoch": 0.2531270186153033, + "grad_norm": 0.0, + "learning_rate": 1.7500657884135005e-05, + "loss": 1.416, + "step": 8621 + }, + { + "epoch": 0.2531563802924423, + "grad_norm": 0.0, + "learning_rate": 1.7500028920010248e-05, + "loss": 1.4727, + "step": 8622 + }, + { + "epoch": 0.2531857419695813, + "grad_norm": 0.0, + "learning_rate": 1.7499399888060805e-05, + "loss": 1.5332, + "step": 8623 + }, + { + "epoch": 0.2532151036467203, + "grad_norm": 0.0, + "learning_rate": 1.749877078829237e-05, + "loss": 1.3271, + "step": 8624 + }, + { + "epoch": 0.2532444653238593, + "grad_norm": 0.0, + "learning_rate": 1.7498141620710633e-05, + "loss": 1.4736, + "step": 8625 + }, + { + "epoch": 0.2532738270009983, + "grad_norm": 0.0, + "learning_rate": 1.7497512385321277e-05, + "loss": 1.3652, + "step": 8626 + }, + { + "epoch": 0.2533031886781373, + "grad_norm": 0.0, + "learning_rate": 1.749688308213e-05, + "loss": 1.3564, + "step": 8627 + }, + { + "epoch": 0.2533325503552763, + "grad_norm": 0.0, + "learning_rate": 1.7496253711142488e-05, + "loss": 1.4219, + "step": 8628 + }, + { + "epoch": 0.2533619120324153, + "grad_norm": 0.0, + "learning_rate": 1.7495624272364435e-05, + "loss": 1.4336, + "step": 8629 + }, + { + "epoch": 0.2533912737095543, + "grad_norm": 0.0, + "learning_rate": 1.7494994765801527e-05, + "loss": 1.4268, + "step": 8630 + }, + { + "epoch": 0.2534206353866933, + "grad_norm": 0.0, + "learning_rate": 1.7494365191459466e-05, + "loss": 1.4121, + "step": 8631 + }, + { + "epoch": 0.2534499970638323, + "grad_norm": 0.0, + "learning_rate": 1.749373554934394e-05, + "loss": 1.3154, + "step": 8632 + }, + { + "epoch": 0.2534793587409713, + "grad_norm": 0.0, + "learning_rate": 1.7493105839460646e-05, + "loss": 1.3506, + "step": 8633 + }, + { + "epoch": 0.2535087204181103, + "grad_norm": 0.0, + "learning_rate": 1.7492476061815272e-05, + "loss": 1.4238, + "step": 8634 + }, + { + "epoch": 0.2535380820952493, + "grad_norm": 0.0, + "learning_rate": 1.7491846216413523e-05, + "loss": 1.3394, + "step": 8635 + }, + { + "epoch": 0.25356744377238827, + "grad_norm": 0.0, + "learning_rate": 1.749121630326109e-05, + "loss": 1.2861, + "step": 8636 + }, + { + "epoch": 0.2535968054495273, + "grad_norm": 0.0, + "learning_rate": 1.7490586322363667e-05, + "loss": 1.3809, + "step": 8637 + }, + { + "epoch": 0.2536261671266663, + "grad_norm": 0.0, + "learning_rate": 1.7489956273726954e-05, + "loss": 1.4189, + "step": 8638 + }, + { + "epoch": 0.25365552880380526, + "grad_norm": 0.0, + "learning_rate": 1.7489326157356648e-05, + "loss": 1.4424, + "step": 8639 + }, + { + "epoch": 0.2536848904809443, + "grad_norm": 0.0, + "learning_rate": 1.7488695973258448e-05, + "loss": 1.4893, + "step": 8640 + }, + { + "epoch": 0.25371425215808324, + "grad_norm": 0.0, + "learning_rate": 1.7488065721438053e-05, + "loss": 1.4629, + "step": 8641 + }, + { + "epoch": 0.25374361383522226, + "grad_norm": 0.0, + "learning_rate": 1.748743540190116e-05, + "loss": 1.3418, + "step": 8642 + }, + { + "epoch": 0.2537729755123613, + "grad_norm": 0.0, + "learning_rate": 1.7486805014653474e-05, + "loss": 1.4678, + "step": 8643 + }, + { + "epoch": 0.25380233718950024, + "grad_norm": 0.0, + "learning_rate": 1.7486174559700694e-05, + "loss": 1.4014, + "step": 8644 + }, + { + "epoch": 0.25383169886663925, + "grad_norm": 0.0, + "learning_rate": 1.7485544037048514e-05, + "loss": 1.3584, + "step": 8645 + }, + { + "epoch": 0.25386106054377827, + "grad_norm": 0.0, + "learning_rate": 1.7484913446702647e-05, + "loss": 1.4004, + "step": 8646 + }, + { + "epoch": 0.25389042222091723, + "grad_norm": 0.0, + "learning_rate": 1.748428278866879e-05, + "loss": 1.3867, + "step": 8647 + }, + { + "epoch": 0.25391978389805625, + "grad_norm": 0.0, + "learning_rate": 1.7483652062952646e-05, + "loss": 1.3555, + "step": 8648 + }, + { + "epoch": 0.25394914557519527, + "grad_norm": 0.0, + "learning_rate": 1.748302126955992e-05, + "loss": 1.4072, + "step": 8649 + }, + { + "epoch": 0.25397850725233423, + "grad_norm": 0.0, + "learning_rate": 1.7482390408496316e-05, + "loss": 1.3877, + "step": 8650 + }, + { + "epoch": 0.25400786892947325, + "grad_norm": 0.0, + "learning_rate": 1.7481759479767542e-05, + "loss": 1.4053, + "step": 8651 + }, + { + "epoch": 0.25403723060661226, + "grad_norm": 0.0, + "learning_rate": 1.7481128483379298e-05, + "loss": 1.3721, + "step": 8652 + }, + { + "epoch": 0.2540665922837512, + "grad_norm": 0.0, + "learning_rate": 1.7480497419337296e-05, + "loss": 1.3887, + "step": 8653 + }, + { + "epoch": 0.25409595396089024, + "grad_norm": 0.0, + "learning_rate": 1.7479866287647238e-05, + "loss": 1.4336, + "step": 8654 + }, + { + "epoch": 0.25412531563802926, + "grad_norm": 0.0, + "learning_rate": 1.7479235088314836e-05, + "loss": 1.335, + "step": 8655 + }, + { + "epoch": 0.2541546773151682, + "grad_norm": 0.0, + "learning_rate": 1.747860382134579e-05, + "loss": 1.4678, + "step": 8656 + }, + { + "epoch": 0.25418403899230724, + "grad_norm": 0.0, + "learning_rate": 1.7477972486745817e-05, + "loss": 1.5205, + "step": 8657 + }, + { + "epoch": 0.25421340066944625, + "grad_norm": 0.0, + "learning_rate": 1.7477341084520625e-05, + "loss": 1.4375, + "step": 8658 + }, + { + "epoch": 0.2542427623465852, + "grad_norm": 0.0, + "learning_rate": 1.7476709614675922e-05, + "loss": 1.4824, + "step": 8659 + }, + { + "epoch": 0.25427212402372423, + "grad_norm": 0.0, + "learning_rate": 1.747607807721742e-05, + "loss": 1.4443, + "step": 8660 + }, + { + "epoch": 0.25430148570086325, + "grad_norm": 0.0, + "learning_rate": 1.7475446472150827e-05, + "loss": 1.29, + "step": 8661 + }, + { + "epoch": 0.2543308473780022, + "grad_norm": 0.0, + "learning_rate": 1.747481479948186e-05, + "loss": 1.3213, + "step": 8662 + }, + { + "epoch": 0.25436020905514123, + "grad_norm": 0.0, + "learning_rate": 1.7474183059216225e-05, + "loss": 1.5, + "step": 8663 + }, + { + "epoch": 0.25438957073228025, + "grad_norm": 0.0, + "learning_rate": 1.747355125135964e-05, + "loss": 1.332, + "step": 8664 + }, + { + "epoch": 0.2544189324094192, + "grad_norm": 0.0, + "learning_rate": 1.747291937591782e-05, + "loss": 1.3604, + "step": 8665 + }, + { + "epoch": 0.2544482940865582, + "grad_norm": 0.0, + "learning_rate": 1.7472287432896472e-05, + "loss": 1.4863, + "step": 8666 + }, + { + "epoch": 0.25447765576369724, + "grad_norm": 0.0, + "learning_rate": 1.7471655422301314e-05, + "loss": 1.3389, + "step": 8667 + }, + { + "epoch": 0.2545070174408362, + "grad_norm": 0.0, + "learning_rate": 1.7471023344138067e-05, + "loss": 1.4521, + "step": 8668 + }, + { + "epoch": 0.2545363791179752, + "grad_norm": 0.0, + "learning_rate": 1.7470391198412443e-05, + "loss": 1.3496, + "step": 8669 + }, + { + "epoch": 0.25456574079511424, + "grad_norm": 0.0, + "learning_rate": 1.7469758985130155e-05, + "loss": 1.2607, + "step": 8670 + }, + { + "epoch": 0.2545951024722532, + "grad_norm": 0.0, + "learning_rate": 1.7469126704296924e-05, + "loss": 1.4482, + "step": 8671 + }, + { + "epoch": 0.2546244641493922, + "grad_norm": 0.0, + "learning_rate": 1.7468494355918468e-05, + "loss": 1.2344, + "step": 8672 + }, + { + "epoch": 0.25465382582653123, + "grad_norm": 0.0, + "learning_rate": 1.74678619400005e-05, + "loss": 1.3589, + "step": 8673 + }, + { + "epoch": 0.2546831875036702, + "grad_norm": 0.0, + "learning_rate": 1.746722945654875e-05, + "loss": 1.4238, + "step": 8674 + }, + { + "epoch": 0.2547125491808092, + "grad_norm": 0.0, + "learning_rate": 1.7466596905568933e-05, + "loss": 1.29, + "step": 8675 + }, + { + "epoch": 0.25474191085794823, + "grad_norm": 0.0, + "learning_rate": 1.7465964287066764e-05, + "loss": 1.459, + "step": 8676 + }, + { + "epoch": 0.2547712725350872, + "grad_norm": 0.0, + "learning_rate": 1.7465331601047968e-05, + "loss": 1.2568, + "step": 8677 + }, + { + "epoch": 0.2548006342122262, + "grad_norm": 0.0, + "learning_rate": 1.746469884751827e-05, + "loss": 1.4658, + "step": 8678 + }, + { + "epoch": 0.2548299958893652, + "grad_norm": 0.0, + "learning_rate": 1.7464066026483384e-05, + "loss": 1.4277, + "step": 8679 + }, + { + "epoch": 0.2548593575665042, + "grad_norm": 0.0, + "learning_rate": 1.746343313794904e-05, + "loss": 1.2832, + "step": 8680 + }, + { + "epoch": 0.2548887192436432, + "grad_norm": 0.0, + "learning_rate": 1.7462800181920957e-05, + "loss": 1.3418, + "step": 8681 + }, + { + "epoch": 0.2549180809207822, + "grad_norm": 0.0, + "learning_rate": 1.746216715840486e-05, + "loss": 1.4121, + "step": 8682 + }, + { + "epoch": 0.2549474425979212, + "grad_norm": 0.0, + "learning_rate": 1.7461534067406477e-05, + "loss": 1.5059, + "step": 8683 + }, + { + "epoch": 0.2549768042750602, + "grad_norm": 0.0, + "learning_rate": 1.7460900908931527e-05, + "loss": 1.2568, + "step": 8684 + }, + { + "epoch": 0.2550061659521992, + "grad_norm": 0.0, + "learning_rate": 1.7460267682985745e-05, + "loss": 1.3105, + "step": 8685 + }, + { + "epoch": 0.2550355276293382, + "grad_norm": 0.0, + "learning_rate": 1.7459634389574848e-05, + "loss": 1.3887, + "step": 8686 + }, + { + "epoch": 0.2550648893064772, + "grad_norm": 0.0, + "learning_rate": 1.7459001028704567e-05, + "loss": 1.4844, + "step": 8687 + }, + { + "epoch": 0.2550942509836162, + "grad_norm": 0.0, + "learning_rate": 1.745836760038063e-05, + "loss": 1.4502, + "step": 8688 + }, + { + "epoch": 0.2551236126607552, + "grad_norm": 0.0, + "learning_rate": 1.7457734104608762e-05, + "loss": 1.5254, + "step": 8689 + }, + { + "epoch": 0.2551529743378942, + "grad_norm": 0.0, + "learning_rate": 1.7457100541394697e-05, + "loss": 1.3662, + "step": 8690 + }, + { + "epoch": 0.25518233601503315, + "grad_norm": 0.0, + "learning_rate": 1.7456466910744162e-05, + "loss": 1.207, + "step": 8691 + }, + { + "epoch": 0.25521169769217217, + "grad_norm": 0.0, + "learning_rate": 1.745583321266289e-05, + "loss": 1.502, + "step": 8692 + }, + { + "epoch": 0.2552410593693112, + "grad_norm": 0.0, + "learning_rate": 1.7455199447156607e-05, + "loss": 1.4023, + "step": 8693 + }, + { + "epoch": 0.25527042104645015, + "grad_norm": 0.0, + "learning_rate": 1.7454565614231042e-05, + "loss": 1.2363, + "step": 8694 + }, + { + "epoch": 0.25529978272358916, + "grad_norm": 0.0, + "learning_rate": 1.7453931713891935e-05, + "loss": 1.376, + "step": 8695 + }, + { + "epoch": 0.2553291444007282, + "grad_norm": 0.0, + "learning_rate": 1.7453297746145014e-05, + "loss": 1.3428, + "step": 8696 + }, + { + "epoch": 0.25535850607786714, + "grad_norm": 0.0, + "learning_rate": 1.7452663710996012e-05, + "loss": 1.4951, + "step": 8697 + }, + { + "epoch": 0.25538786775500616, + "grad_norm": 0.0, + "learning_rate": 1.7452029608450662e-05, + "loss": 1.4902, + "step": 8698 + }, + { + "epoch": 0.2554172294321452, + "grad_norm": 0.0, + "learning_rate": 1.7451395438514703e-05, + "loss": 1.3896, + "step": 8699 + }, + { + "epoch": 0.25544659110928414, + "grad_norm": 0.0, + "learning_rate": 1.7450761201193865e-05, + "loss": 1.4307, + "step": 8700 + }, + { + "epoch": 0.25547595278642315, + "grad_norm": 0.0, + "learning_rate": 1.7450126896493883e-05, + "loss": 1.2358, + "step": 8701 + }, + { + "epoch": 0.25550531446356217, + "grad_norm": 0.0, + "learning_rate": 1.74494925244205e-05, + "loss": 1.3262, + "step": 8702 + }, + { + "epoch": 0.25553467614070113, + "grad_norm": 0.0, + "learning_rate": 1.7448858084979448e-05, + "loss": 1.2344, + "step": 8703 + }, + { + "epoch": 0.25556403781784015, + "grad_norm": 0.0, + "learning_rate": 1.744822357817646e-05, + "loss": 1.333, + "step": 8704 + }, + { + "epoch": 0.25559339949497917, + "grad_norm": 0.0, + "learning_rate": 1.7447589004017283e-05, + "loss": 1.375, + "step": 8705 + }, + { + "epoch": 0.25562276117211813, + "grad_norm": 0.0, + "learning_rate": 1.7446954362507647e-05, + "loss": 1.5371, + "step": 8706 + }, + { + "epoch": 0.25565212284925715, + "grad_norm": 0.0, + "learning_rate": 1.74463196536533e-05, + "loss": 1.3193, + "step": 8707 + }, + { + "epoch": 0.25568148452639616, + "grad_norm": 0.0, + "learning_rate": 1.7445684877459978e-05, + "loss": 1.3149, + "step": 8708 + }, + { + "epoch": 0.2557108462035351, + "grad_norm": 0.0, + "learning_rate": 1.7445050033933414e-05, + "loss": 1.333, + "step": 8709 + }, + { + "epoch": 0.25574020788067414, + "grad_norm": 0.0, + "learning_rate": 1.7444415123079362e-05, + "loss": 1.3711, + "step": 8710 + }, + { + "epoch": 0.25576956955781316, + "grad_norm": 0.0, + "learning_rate": 1.7443780144903555e-05, + "loss": 1.3389, + "step": 8711 + }, + { + "epoch": 0.2557989312349521, + "grad_norm": 0.0, + "learning_rate": 1.744314509941174e-05, + "loss": 1.376, + "step": 8712 + }, + { + "epoch": 0.25582829291209114, + "grad_norm": 0.0, + "learning_rate": 1.7442509986609654e-05, + "loss": 1.3672, + "step": 8713 + }, + { + "epoch": 0.25585765458923015, + "grad_norm": 0.0, + "learning_rate": 1.7441874806503046e-05, + "loss": 1.3887, + "step": 8714 + }, + { + "epoch": 0.2558870162663691, + "grad_norm": 0.0, + "learning_rate": 1.7441239559097658e-05, + "loss": 1.4365, + "step": 8715 + }, + { + "epoch": 0.25591637794350813, + "grad_norm": 0.0, + "learning_rate": 1.7440604244399236e-05, + "loss": 1.4189, + "step": 8716 + }, + { + "epoch": 0.25594573962064715, + "grad_norm": 0.0, + "learning_rate": 1.7439968862413522e-05, + "loss": 1.4258, + "step": 8717 + }, + { + "epoch": 0.2559751012977861, + "grad_norm": 0.0, + "learning_rate": 1.7439333413146267e-05, + "loss": 1.4824, + "step": 8718 + }, + { + "epoch": 0.25600446297492513, + "grad_norm": 0.0, + "learning_rate": 1.7438697896603214e-05, + "loss": 1.3799, + "step": 8719 + }, + { + "epoch": 0.25603382465206415, + "grad_norm": 0.0, + "learning_rate": 1.743806231279011e-05, + "loss": 1.4697, + "step": 8720 + }, + { + "epoch": 0.2560631863292031, + "grad_norm": 0.0, + "learning_rate": 1.7437426661712704e-05, + "loss": 1.3643, + "step": 8721 + }, + { + "epoch": 0.2560925480063421, + "grad_norm": 0.0, + "learning_rate": 1.7436790943376746e-05, + "loss": 1.4248, + "step": 8722 + }, + { + "epoch": 0.25612190968348114, + "grad_norm": 0.0, + "learning_rate": 1.743615515778798e-05, + "loss": 1.3379, + "step": 8723 + }, + { + "epoch": 0.2561512713606201, + "grad_norm": 0.0, + "learning_rate": 1.7435519304952157e-05, + "loss": 1.4307, + "step": 8724 + }, + { + "epoch": 0.2561806330377591, + "grad_norm": 0.0, + "learning_rate": 1.7434883384875032e-05, + "loss": 1.3867, + "step": 8725 + }, + { + "epoch": 0.25620999471489814, + "grad_norm": 0.0, + "learning_rate": 1.7434247397562353e-05, + "loss": 1.4863, + "step": 8726 + }, + { + "epoch": 0.2562393563920371, + "grad_norm": 0.0, + "learning_rate": 1.743361134301987e-05, + "loss": 1.3213, + "step": 8727 + }, + { + "epoch": 0.2562687180691761, + "grad_norm": 0.0, + "learning_rate": 1.7432975221253333e-05, + "loss": 1.5537, + "step": 8728 + }, + { + "epoch": 0.25629807974631513, + "grad_norm": 0.0, + "learning_rate": 1.74323390322685e-05, + "loss": 1.3564, + "step": 8729 + }, + { + "epoch": 0.2563274414234541, + "grad_norm": 0.0, + "learning_rate": 1.743170277607112e-05, + "loss": 1.4482, + "step": 8730 + }, + { + "epoch": 0.2563568031005931, + "grad_norm": 0.0, + "learning_rate": 1.743106645266695e-05, + "loss": 1.4863, + "step": 8731 + }, + { + "epoch": 0.25638616477773213, + "grad_norm": 0.0, + "learning_rate": 1.7430430062061742e-05, + "loss": 1.4961, + "step": 8732 + }, + { + "epoch": 0.2564155264548711, + "grad_norm": 0.0, + "learning_rate": 1.742979360426125e-05, + "loss": 1.457, + "step": 8733 + }, + { + "epoch": 0.2564448881320101, + "grad_norm": 0.0, + "learning_rate": 1.7429157079271237e-05, + "loss": 1.3877, + "step": 8734 + }, + { + "epoch": 0.2564742498091491, + "grad_norm": 0.0, + "learning_rate": 1.742852048709745e-05, + "loss": 1.3291, + "step": 8735 + }, + { + "epoch": 0.2565036114862881, + "grad_norm": 0.0, + "learning_rate": 1.742788382774565e-05, + "loss": 1.375, + "step": 8736 + }, + { + "epoch": 0.2565329731634271, + "grad_norm": 0.0, + "learning_rate": 1.7427247101221596e-05, + "loss": 1.4805, + "step": 8737 + }, + { + "epoch": 0.2565623348405661, + "grad_norm": 0.0, + "learning_rate": 1.7426610307531043e-05, + "loss": 1.4785, + "step": 8738 + }, + { + "epoch": 0.2565916965177051, + "grad_norm": 0.0, + "learning_rate": 1.742597344667975e-05, + "loss": 1.4707, + "step": 8739 + }, + { + "epoch": 0.2566210581948441, + "grad_norm": 0.0, + "learning_rate": 1.7425336518673482e-05, + "loss": 1.4062, + "step": 8740 + }, + { + "epoch": 0.2566504198719831, + "grad_norm": 0.0, + "learning_rate": 1.742469952351799e-05, + "loss": 1.3682, + "step": 8741 + }, + { + "epoch": 0.2566797815491221, + "grad_norm": 0.0, + "learning_rate": 1.742406246121904e-05, + "loss": 1.3906, + "step": 8742 + }, + { + "epoch": 0.2567091432262611, + "grad_norm": 0.0, + "learning_rate": 1.742342533178239e-05, + "loss": 1.458, + "step": 8743 + }, + { + "epoch": 0.25673850490340006, + "grad_norm": 0.0, + "learning_rate": 1.7422788135213806e-05, + "loss": 1.5186, + "step": 8744 + }, + { + "epoch": 0.2567678665805391, + "grad_norm": 0.0, + "learning_rate": 1.7422150871519047e-05, + "loss": 1.3936, + "step": 8745 + }, + { + "epoch": 0.2567972282576781, + "grad_norm": 0.0, + "learning_rate": 1.7421513540703876e-05, + "loss": 1.4219, + "step": 8746 + }, + { + "epoch": 0.25682658993481705, + "grad_norm": 0.0, + "learning_rate": 1.742087614277406e-05, + "loss": 1.2788, + "step": 8747 + }, + { + "epoch": 0.25685595161195607, + "grad_norm": 0.0, + "learning_rate": 1.7420238677735358e-05, + "loss": 1.3711, + "step": 8748 + }, + { + "epoch": 0.2568853132890951, + "grad_norm": 0.0, + "learning_rate": 1.741960114559354e-05, + "loss": 1.4287, + "step": 8749 + }, + { + "epoch": 0.25691467496623405, + "grad_norm": 0.0, + "learning_rate": 1.7418963546354363e-05, + "loss": 1.3281, + "step": 8750 + }, + { + "epoch": 0.25694403664337306, + "grad_norm": 0.0, + "learning_rate": 1.7418325880023604e-05, + "loss": 1.4932, + "step": 8751 + }, + { + "epoch": 0.2569733983205121, + "grad_norm": 0.0, + "learning_rate": 1.7417688146607022e-05, + "loss": 1.4854, + "step": 8752 + }, + { + "epoch": 0.25700275999765104, + "grad_norm": 0.0, + "learning_rate": 1.7417050346110388e-05, + "loss": 1.3906, + "step": 8753 + }, + { + "epoch": 0.25703212167479006, + "grad_norm": 0.0, + "learning_rate": 1.7416412478539467e-05, + "loss": 1.2822, + "step": 8754 + }, + { + "epoch": 0.2570614833519291, + "grad_norm": 0.0, + "learning_rate": 1.7415774543900027e-05, + "loss": 1.4648, + "step": 8755 + }, + { + "epoch": 0.25709084502906804, + "grad_norm": 0.0, + "learning_rate": 1.741513654219784e-05, + "loss": 1.3638, + "step": 8756 + }, + { + "epoch": 0.25712020670620706, + "grad_norm": 0.0, + "learning_rate": 1.7414498473438673e-05, + "loss": 1.3662, + "step": 8757 + }, + { + "epoch": 0.25714956838334607, + "grad_norm": 0.0, + "learning_rate": 1.7413860337628302e-05, + "loss": 1.4385, + "step": 8758 + }, + { + "epoch": 0.25717893006048503, + "grad_norm": 0.0, + "learning_rate": 1.7413222134772488e-05, + "loss": 1.4326, + "step": 8759 + }, + { + "epoch": 0.25720829173762405, + "grad_norm": 0.0, + "learning_rate": 1.741258386487701e-05, + "loss": 1.2998, + "step": 8760 + }, + { + "epoch": 0.25723765341476307, + "grad_norm": 0.0, + "learning_rate": 1.7411945527947638e-05, + "loss": 1.3936, + "step": 8761 + }, + { + "epoch": 0.25726701509190203, + "grad_norm": 0.0, + "learning_rate": 1.7411307123990143e-05, + "loss": 1.4961, + "step": 8762 + }, + { + "epoch": 0.25729637676904105, + "grad_norm": 0.0, + "learning_rate": 1.74106686530103e-05, + "loss": 1.4102, + "step": 8763 + }, + { + "epoch": 0.25732573844618006, + "grad_norm": 0.0, + "learning_rate": 1.7410030115013884e-05, + "loss": 1.4316, + "step": 8764 + }, + { + "epoch": 0.257355100123319, + "grad_norm": 0.0, + "learning_rate": 1.7409391510006667e-05, + "loss": 1.3057, + "step": 8765 + }, + { + "epoch": 0.25738446180045804, + "grad_norm": 0.0, + "learning_rate": 1.7408752837994425e-05, + "loss": 1.4268, + "step": 8766 + }, + { + "epoch": 0.25741382347759706, + "grad_norm": 0.0, + "learning_rate": 1.7408114098982934e-05, + "loss": 1.4209, + "step": 8767 + }, + { + "epoch": 0.257443185154736, + "grad_norm": 0.0, + "learning_rate": 1.740747529297797e-05, + "loss": 1.2114, + "step": 8768 + }, + { + "epoch": 0.25747254683187504, + "grad_norm": 0.0, + "learning_rate": 1.7406836419985308e-05, + "loss": 1.3994, + "step": 8769 + }, + { + "epoch": 0.25750190850901405, + "grad_norm": 0.0, + "learning_rate": 1.740619748001073e-05, + "loss": 1.3691, + "step": 8770 + }, + { + "epoch": 0.257531270186153, + "grad_norm": 0.0, + "learning_rate": 1.7405558473060007e-05, + "loss": 1.4258, + "step": 8771 + }, + { + "epoch": 0.25756063186329203, + "grad_norm": 0.0, + "learning_rate": 1.7404919399138925e-05, + "loss": 1.208, + "step": 8772 + }, + { + "epoch": 0.25758999354043105, + "grad_norm": 0.0, + "learning_rate": 1.7404280258253262e-05, + "loss": 1.46, + "step": 8773 + }, + { + "epoch": 0.25761935521757, + "grad_norm": 0.0, + "learning_rate": 1.7403641050408794e-05, + "loss": 1.458, + "step": 8774 + }, + { + "epoch": 0.25764871689470903, + "grad_norm": 0.0, + "learning_rate": 1.7403001775611307e-05, + "loss": 1.4385, + "step": 8775 + }, + { + "epoch": 0.25767807857184805, + "grad_norm": 0.0, + "learning_rate": 1.7402362433866577e-05, + "loss": 1.3379, + "step": 8776 + }, + { + "epoch": 0.257707440248987, + "grad_norm": 0.0, + "learning_rate": 1.7401723025180385e-05, + "loss": 1.4189, + "step": 8777 + }, + { + "epoch": 0.257736801926126, + "grad_norm": 0.0, + "learning_rate": 1.7401083549558522e-05, + "loss": 1.3379, + "step": 8778 + }, + { + "epoch": 0.25776616360326504, + "grad_norm": 0.0, + "learning_rate": 1.740044400700676e-05, + "loss": 1.4229, + "step": 8779 + }, + { + "epoch": 0.257795525280404, + "grad_norm": 0.0, + "learning_rate": 1.739980439753089e-05, + "loss": 1.4824, + "step": 8780 + }, + { + "epoch": 0.257824886957543, + "grad_norm": 0.0, + "learning_rate": 1.739916472113669e-05, + "loss": 1.3623, + "step": 8781 + }, + { + "epoch": 0.25785424863468204, + "grad_norm": 0.0, + "learning_rate": 1.7398524977829954e-05, + "loss": 1.3828, + "step": 8782 + }, + { + "epoch": 0.257883610311821, + "grad_norm": 0.0, + "learning_rate": 1.739788516761646e-05, + "loss": 1.4053, + "step": 8783 + }, + { + "epoch": 0.25791297198896, + "grad_norm": 0.0, + "learning_rate": 1.7397245290501994e-05, + "loss": 1.5273, + "step": 8784 + }, + { + "epoch": 0.25794233366609903, + "grad_norm": 0.0, + "learning_rate": 1.7396605346492345e-05, + "loss": 1.5752, + "step": 8785 + }, + { + "epoch": 0.257971695343238, + "grad_norm": 0.0, + "learning_rate": 1.73959653355933e-05, + "loss": 1.4727, + "step": 8786 + }, + { + "epoch": 0.258001057020377, + "grad_norm": 0.0, + "learning_rate": 1.7395325257810647e-05, + "loss": 1.2666, + "step": 8787 + }, + { + "epoch": 0.25803041869751603, + "grad_norm": 0.0, + "learning_rate": 1.7394685113150175e-05, + "loss": 1.5674, + "step": 8788 + }, + { + "epoch": 0.258059780374655, + "grad_norm": 0.0, + "learning_rate": 1.7394044901617668e-05, + "loss": 1.4053, + "step": 8789 + }, + { + "epoch": 0.258089142051794, + "grad_norm": 0.0, + "learning_rate": 1.739340462321892e-05, + "loss": 1.2983, + "step": 8790 + }, + { + "epoch": 0.258118503728933, + "grad_norm": 0.0, + "learning_rate": 1.7392764277959725e-05, + "loss": 1.4404, + "step": 8791 + }, + { + "epoch": 0.258147865406072, + "grad_norm": 0.0, + "learning_rate": 1.7392123865845866e-05, + "loss": 1.3779, + "step": 8792 + }, + { + "epoch": 0.258177227083211, + "grad_norm": 0.0, + "learning_rate": 1.7391483386883137e-05, + "loss": 1.4111, + "step": 8793 + }, + { + "epoch": 0.25820658876034996, + "grad_norm": 0.0, + "learning_rate": 1.739084284107733e-05, + "loss": 1.3984, + "step": 8794 + }, + { + "epoch": 0.258235950437489, + "grad_norm": 0.0, + "learning_rate": 1.739020222843424e-05, + "loss": 1.5713, + "step": 8795 + }, + { + "epoch": 0.258265312114628, + "grad_norm": 0.0, + "learning_rate": 1.7389561548959658e-05, + "loss": 1.4902, + "step": 8796 + }, + { + "epoch": 0.25829467379176696, + "grad_norm": 0.0, + "learning_rate": 1.7388920802659378e-05, + "loss": 1.4863, + "step": 8797 + }, + { + "epoch": 0.258324035468906, + "grad_norm": 0.0, + "learning_rate": 1.7388279989539197e-05, + "loss": 1.4375, + "step": 8798 + }, + { + "epoch": 0.258353397146045, + "grad_norm": 0.0, + "learning_rate": 1.7387639109604907e-05, + "loss": 1.4756, + "step": 8799 + }, + { + "epoch": 0.25838275882318396, + "grad_norm": 0.0, + "learning_rate": 1.7386998162862304e-05, + "loss": 1.4424, + "step": 8800 + }, + { + "epoch": 0.258412120500323, + "grad_norm": 0.0, + "learning_rate": 1.7386357149317183e-05, + "loss": 1.2393, + "step": 8801 + }, + { + "epoch": 0.258441482177462, + "grad_norm": 0.0, + "learning_rate": 1.7385716068975345e-05, + "loss": 1.5146, + "step": 8802 + }, + { + "epoch": 0.25847084385460095, + "grad_norm": 0.0, + "learning_rate": 1.7385074921842585e-05, + "loss": 1.3115, + "step": 8803 + }, + { + "epoch": 0.25850020553173997, + "grad_norm": 0.0, + "learning_rate": 1.73844337079247e-05, + "loss": 1.2529, + "step": 8804 + }, + { + "epoch": 0.258529567208879, + "grad_norm": 0.0, + "learning_rate": 1.7383792427227494e-05, + "loss": 1.3818, + "step": 8805 + }, + { + "epoch": 0.25855892888601795, + "grad_norm": 0.0, + "learning_rate": 1.7383151079756757e-05, + "loss": 1.4277, + "step": 8806 + }, + { + "epoch": 0.25858829056315696, + "grad_norm": 0.0, + "learning_rate": 1.7382509665518296e-05, + "loss": 1.3916, + "step": 8807 + }, + { + "epoch": 0.258617652240296, + "grad_norm": 0.0, + "learning_rate": 1.7381868184517907e-05, + "loss": 1.3447, + "step": 8808 + }, + { + "epoch": 0.25864701391743494, + "grad_norm": 0.0, + "learning_rate": 1.7381226636761396e-05, + "loss": 1.3262, + "step": 8809 + }, + { + "epoch": 0.25867637559457396, + "grad_norm": 0.0, + "learning_rate": 1.738058502225456e-05, + "loss": 1.3076, + "step": 8810 + }, + { + "epoch": 0.258705737271713, + "grad_norm": 0.0, + "learning_rate": 1.7379943341003203e-05, + "loss": 1.4688, + "step": 8811 + }, + { + "epoch": 0.25873509894885194, + "grad_norm": 0.0, + "learning_rate": 1.7379301593013134e-05, + "loss": 1.3936, + "step": 8812 + }, + { + "epoch": 0.25876446062599096, + "grad_norm": 0.0, + "learning_rate": 1.7378659778290147e-05, + "loss": 1.4268, + "step": 8813 + }, + { + "epoch": 0.25879382230313, + "grad_norm": 0.0, + "learning_rate": 1.737801789684005e-05, + "loss": 1.3408, + "step": 8814 + }, + { + "epoch": 0.25882318398026893, + "grad_norm": 0.0, + "learning_rate": 1.737737594866865e-05, + "loss": 1.3516, + "step": 8815 + }, + { + "epoch": 0.25885254565740795, + "grad_norm": 0.0, + "learning_rate": 1.7376733933781748e-05, + "loss": 1.3584, + "step": 8816 + }, + { + "epoch": 0.25888190733454697, + "grad_norm": 0.0, + "learning_rate": 1.7376091852185147e-05, + "loss": 1.3975, + "step": 8817 + }, + { + "epoch": 0.25891126901168593, + "grad_norm": 0.0, + "learning_rate": 1.7375449703884664e-05, + "loss": 1.4229, + "step": 8818 + }, + { + "epoch": 0.25894063068882495, + "grad_norm": 0.0, + "learning_rate": 1.73748074888861e-05, + "loss": 1.4277, + "step": 8819 + }, + { + "epoch": 0.25896999236596396, + "grad_norm": 0.0, + "learning_rate": 1.737416520719526e-05, + "loss": 1.3984, + "step": 8820 + }, + { + "epoch": 0.2589993540431029, + "grad_norm": 0.0, + "learning_rate": 1.737352285881796e-05, + "loss": 1.3545, + "step": 8821 + }, + { + "epoch": 0.25902871572024194, + "grad_norm": 0.0, + "learning_rate": 1.737288044376e-05, + "loss": 1.4336, + "step": 8822 + }, + { + "epoch": 0.25905807739738096, + "grad_norm": 0.0, + "learning_rate": 1.7372237962027198e-05, + "loss": 1.4863, + "step": 8823 + }, + { + "epoch": 0.2590874390745199, + "grad_norm": 0.0, + "learning_rate": 1.737159541362536e-05, + "loss": 1.3818, + "step": 8824 + }, + { + "epoch": 0.25911680075165894, + "grad_norm": 0.0, + "learning_rate": 1.7370952798560293e-05, + "loss": 1.46, + "step": 8825 + }, + { + "epoch": 0.25914616242879795, + "grad_norm": 0.0, + "learning_rate": 1.7370310116837817e-05, + "loss": 1.249, + "step": 8826 + }, + { + "epoch": 0.2591755241059369, + "grad_norm": 0.0, + "learning_rate": 1.7369667368463734e-05, + "loss": 1.3496, + "step": 8827 + }, + { + "epoch": 0.25920488578307593, + "grad_norm": 0.0, + "learning_rate": 1.7369024553443865e-05, + "loss": 1.3525, + "step": 8828 + }, + { + "epoch": 0.25923424746021495, + "grad_norm": 0.0, + "learning_rate": 1.7368381671784015e-05, + "loss": 1.3301, + "step": 8829 + }, + { + "epoch": 0.2592636091373539, + "grad_norm": 0.0, + "learning_rate": 1.7367738723490007e-05, + "loss": 1.3828, + "step": 8830 + }, + { + "epoch": 0.25929297081449293, + "grad_norm": 0.0, + "learning_rate": 1.736709570856765e-05, + "loss": 1.4189, + "step": 8831 + }, + { + "epoch": 0.25932233249163195, + "grad_norm": 0.0, + "learning_rate": 1.7366452627022762e-05, + "loss": 1.3652, + "step": 8832 + }, + { + "epoch": 0.2593516941687709, + "grad_norm": 0.0, + "learning_rate": 1.736580947886115e-05, + "loss": 1.3994, + "step": 8833 + }, + { + "epoch": 0.2593810558459099, + "grad_norm": 0.0, + "learning_rate": 1.7365166264088642e-05, + "loss": 1.3799, + "step": 8834 + }, + { + "epoch": 0.25941041752304894, + "grad_norm": 0.0, + "learning_rate": 1.7364522982711047e-05, + "loss": 1.417, + "step": 8835 + }, + { + "epoch": 0.2594397792001879, + "grad_norm": 0.0, + "learning_rate": 1.7363879634734186e-05, + "loss": 1.3613, + "step": 8836 + }, + { + "epoch": 0.2594691408773269, + "grad_norm": 0.0, + "learning_rate": 1.7363236220163875e-05, + "loss": 1.4404, + "step": 8837 + }, + { + "epoch": 0.25949850255446594, + "grad_norm": 0.0, + "learning_rate": 1.7362592739005933e-05, + "loss": 1.2324, + "step": 8838 + }, + { + "epoch": 0.2595278642316049, + "grad_norm": 0.0, + "learning_rate": 1.736194919126618e-05, + "loss": 1.4922, + "step": 8839 + }, + { + "epoch": 0.2595572259087439, + "grad_norm": 0.0, + "learning_rate": 1.7361305576950434e-05, + "loss": 1.4136, + "step": 8840 + }, + { + "epoch": 0.25958658758588293, + "grad_norm": 0.0, + "learning_rate": 1.7360661896064518e-05, + "loss": 1.5176, + "step": 8841 + }, + { + "epoch": 0.2596159492630219, + "grad_norm": 0.0, + "learning_rate": 1.7360018148614247e-05, + "loss": 1.5029, + "step": 8842 + }, + { + "epoch": 0.2596453109401609, + "grad_norm": 0.0, + "learning_rate": 1.735937433460545e-05, + "loss": 1.3711, + "step": 8843 + }, + { + "epoch": 0.2596746726172999, + "grad_norm": 0.0, + "learning_rate": 1.7358730454043947e-05, + "loss": 1.3711, + "step": 8844 + }, + { + "epoch": 0.2597040342944389, + "grad_norm": 0.0, + "learning_rate": 1.7358086506935555e-05, + "loss": 1.4717, + "step": 8845 + }, + { + "epoch": 0.2597333959715779, + "grad_norm": 0.0, + "learning_rate": 1.735744249328611e-05, + "loss": 1.4219, + "step": 8846 + }, + { + "epoch": 0.25976275764871687, + "grad_norm": 0.0, + "learning_rate": 1.735679841310142e-05, + "loss": 1.3906, + "step": 8847 + }, + { + "epoch": 0.2597921193258559, + "grad_norm": 0.0, + "learning_rate": 1.7356154266387328e-05, + "loss": 1.4307, + "step": 8848 + }, + { + "epoch": 0.2598214810029949, + "grad_norm": 0.0, + "learning_rate": 1.735551005314964e-05, + "loss": 1.3467, + "step": 8849 + }, + { + "epoch": 0.25985084268013386, + "grad_norm": 0.0, + "learning_rate": 1.7354865773394193e-05, + "loss": 1.3306, + "step": 8850 + }, + { + "epoch": 0.2598802043572729, + "grad_norm": 0.0, + "learning_rate": 1.7354221427126817e-05, + "loss": 1.3555, + "step": 8851 + }, + { + "epoch": 0.2599095660344119, + "grad_norm": 0.0, + "learning_rate": 1.7353577014353325e-05, + "loss": 1.4844, + "step": 8852 + }, + { + "epoch": 0.25993892771155086, + "grad_norm": 0.0, + "learning_rate": 1.7352932535079558e-05, + "loss": 1.2988, + "step": 8853 + }, + { + "epoch": 0.2599682893886899, + "grad_norm": 0.0, + "learning_rate": 1.7352287989311336e-05, + "loss": 1.3438, + "step": 8854 + }, + { + "epoch": 0.2599976510658289, + "grad_norm": 0.0, + "learning_rate": 1.7351643377054493e-05, + "loss": 1.5869, + "step": 8855 + }, + { + "epoch": 0.26002701274296786, + "grad_norm": 0.0, + "learning_rate": 1.7350998698314855e-05, + "loss": 1.4395, + "step": 8856 + }, + { + "epoch": 0.2600563744201069, + "grad_norm": 0.0, + "learning_rate": 1.7350353953098254e-05, + "loss": 1.4932, + "step": 8857 + }, + { + "epoch": 0.2600857360972459, + "grad_norm": 0.0, + "learning_rate": 1.7349709141410516e-05, + "loss": 1.3691, + "step": 8858 + }, + { + "epoch": 0.26011509777438485, + "grad_norm": 0.0, + "learning_rate": 1.7349064263257482e-05, + "loss": 1.3291, + "step": 8859 + }, + { + "epoch": 0.26014445945152387, + "grad_norm": 0.0, + "learning_rate": 1.7348419318644972e-05, + "loss": 1.5166, + "step": 8860 + }, + { + "epoch": 0.2601738211286629, + "grad_norm": 0.0, + "learning_rate": 1.7347774307578828e-05, + "loss": 1.415, + "step": 8861 + }, + { + "epoch": 0.26020318280580185, + "grad_norm": 0.0, + "learning_rate": 1.734712923006488e-05, + "loss": 1.4365, + "step": 8862 + }, + { + "epoch": 0.26023254448294086, + "grad_norm": 0.0, + "learning_rate": 1.734648408610896e-05, + "loss": 1.2603, + "step": 8863 + }, + { + "epoch": 0.2602619061600799, + "grad_norm": 0.0, + "learning_rate": 1.7345838875716898e-05, + "loss": 1.332, + "step": 8864 + }, + { + "epoch": 0.26029126783721884, + "grad_norm": 0.0, + "learning_rate": 1.734519359889454e-05, + "loss": 1.4268, + "step": 8865 + }, + { + "epoch": 0.26032062951435786, + "grad_norm": 0.0, + "learning_rate": 1.7344548255647714e-05, + "loss": 1.3955, + "step": 8866 + }, + { + "epoch": 0.2603499911914969, + "grad_norm": 0.0, + "learning_rate": 1.7343902845982254e-05, + "loss": 1.3408, + "step": 8867 + }, + { + "epoch": 0.26037935286863584, + "grad_norm": 0.0, + "learning_rate": 1.7343257369904e-05, + "loss": 1.4189, + "step": 8868 + }, + { + "epoch": 0.26040871454577486, + "grad_norm": 0.0, + "learning_rate": 1.734261182741879e-05, + "loss": 1.3232, + "step": 8869 + }, + { + "epoch": 0.2604380762229139, + "grad_norm": 0.0, + "learning_rate": 1.7341966218532462e-05, + "loss": 1.4883, + "step": 8870 + }, + { + "epoch": 0.26046743790005283, + "grad_norm": 0.0, + "learning_rate": 1.7341320543250853e-05, + "loss": 1.4434, + "step": 8871 + }, + { + "epoch": 0.26049679957719185, + "grad_norm": 0.0, + "learning_rate": 1.73406748015798e-05, + "loss": 1.4248, + "step": 8872 + }, + { + "epoch": 0.26052616125433087, + "grad_norm": 0.0, + "learning_rate": 1.7340028993525147e-05, + "loss": 1.2812, + "step": 8873 + }, + { + "epoch": 0.26055552293146983, + "grad_norm": 0.0, + "learning_rate": 1.7339383119092733e-05, + "loss": 1.3145, + "step": 8874 + }, + { + "epoch": 0.26058488460860885, + "grad_norm": 0.0, + "learning_rate": 1.7338737178288396e-05, + "loss": 1.4521, + "step": 8875 + }, + { + "epoch": 0.26061424628574786, + "grad_norm": 0.0, + "learning_rate": 1.733809117111798e-05, + "loss": 1.3564, + "step": 8876 + }, + { + "epoch": 0.2606436079628868, + "grad_norm": 0.0, + "learning_rate": 1.7337445097587324e-05, + "loss": 1.4326, + "step": 8877 + }, + { + "epoch": 0.26067296964002584, + "grad_norm": 0.0, + "learning_rate": 1.7336798957702275e-05, + "loss": 1.5459, + "step": 8878 + }, + { + "epoch": 0.26070233131716486, + "grad_norm": 0.0, + "learning_rate": 1.7336152751468675e-05, + "loss": 1.4717, + "step": 8879 + }, + { + "epoch": 0.2607316929943038, + "grad_norm": 0.0, + "learning_rate": 1.7335506478892364e-05, + "loss": 1.3867, + "step": 8880 + }, + { + "epoch": 0.26076105467144284, + "grad_norm": 0.0, + "learning_rate": 1.733486013997919e-05, + "loss": 1.4707, + "step": 8881 + }, + { + "epoch": 0.26079041634858185, + "grad_norm": 0.0, + "learning_rate": 1.7334213734735e-05, + "loss": 1.2998, + "step": 8882 + }, + { + "epoch": 0.2608197780257208, + "grad_norm": 0.0, + "learning_rate": 1.7333567263165635e-05, + "loss": 1.3906, + "step": 8883 + }, + { + "epoch": 0.26084913970285983, + "grad_norm": 0.0, + "learning_rate": 1.733292072527694e-05, + "loss": 1.4717, + "step": 8884 + }, + { + "epoch": 0.26087850137999885, + "grad_norm": 0.0, + "learning_rate": 1.733227412107477e-05, + "loss": 1.4463, + "step": 8885 + }, + { + "epoch": 0.2609078630571378, + "grad_norm": 0.0, + "learning_rate": 1.7331627450564966e-05, + "loss": 1.3828, + "step": 8886 + }, + { + "epoch": 0.26093722473427683, + "grad_norm": 0.0, + "learning_rate": 1.7330980713753376e-05, + "loss": 1.3037, + "step": 8887 + }, + { + "epoch": 0.26096658641141585, + "grad_norm": 0.0, + "learning_rate": 1.7330333910645853e-05, + "loss": 1.4678, + "step": 8888 + }, + { + "epoch": 0.2609959480885548, + "grad_norm": 0.0, + "learning_rate": 1.7329687041248237e-05, + "loss": 1.3213, + "step": 8889 + }, + { + "epoch": 0.2610253097656938, + "grad_norm": 0.0, + "learning_rate": 1.732904010556639e-05, + "loss": 1.5068, + "step": 8890 + }, + { + "epoch": 0.26105467144283284, + "grad_norm": 0.0, + "learning_rate": 1.7328393103606152e-05, + "loss": 1.4961, + "step": 8891 + }, + { + "epoch": 0.2610840331199718, + "grad_norm": 0.0, + "learning_rate": 1.732774603537338e-05, + "loss": 1.4668, + "step": 8892 + }, + { + "epoch": 0.2611133947971108, + "grad_norm": 0.0, + "learning_rate": 1.7327098900873926e-05, + "loss": 1.3867, + "step": 8893 + }, + { + "epoch": 0.2611427564742498, + "grad_norm": 0.0, + "learning_rate": 1.7326451700113636e-05, + "loss": 1.3799, + "step": 8894 + }, + { + "epoch": 0.2611721181513888, + "grad_norm": 0.0, + "learning_rate": 1.732580443309837e-05, + "loss": 1.5, + "step": 8895 + }, + { + "epoch": 0.2612014798285278, + "grad_norm": 0.0, + "learning_rate": 1.7325157099833977e-05, + "loss": 1.498, + "step": 8896 + }, + { + "epoch": 0.2612308415056668, + "grad_norm": 0.0, + "learning_rate": 1.7324509700326313e-05, + "loss": 1.3809, + "step": 8897 + }, + { + "epoch": 0.2612602031828058, + "grad_norm": 0.0, + "learning_rate": 1.732386223458123e-05, + "loss": 1.2959, + "step": 8898 + }, + { + "epoch": 0.2612895648599448, + "grad_norm": 0.0, + "learning_rate": 1.732321470260459e-05, + "loss": 1.5742, + "step": 8899 + }, + { + "epoch": 0.2613189265370838, + "grad_norm": 0.0, + "learning_rate": 1.732256710440224e-05, + "loss": 1.583, + "step": 8900 + }, + { + "epoch": 0.2613482882142228, + "grad_norm": 0.0, + "learning_rate": 1.732191943998004e-05, + "loss": 1.5635, + "step": 8901 + }, + { + "epoch": 0.2613776498913618, + "grad_norm": 0.0, + "learning_rate": 1.732127170934385e-05, + "loss": 1.5518, + "step": 8902 + }, + { + "epoch": 0.26140701156850077, + "grad_norm": 0.0, + "learning_rate": 1.7320623912499522e-05, + "loss": 1.4268, + "step": 8903 + }, + { + "epoch": 0.2614363732456398, + "grad_norm": 0.0, + "learning_rate": 1.731997604945292e-05, + "loss": 1.5137, + "step": 8904 + }, + { + "epoch": 0.2614657349227788, + "grad_norm": 0.0, + "learning_rate": 1.7319328120209898e-05, + "loss": 1.4531, + "step": 8905 + }, + { + "epoch": 0.26149509659991776, + "grad_norm": 0.0, + "learning_rate": 1.7318680124776314e-05, + "loss": 1.4854, + "step": 8906 + }, + { + "epoch": 0.2615244582770568, + "grad_norm": 0.0, + "learning_rate": 1.7318032063158036e-05, + "loss": 1.415, + "step": 8907 + }, + { + "epoch": 0.2615538199541958, + "grad_norm": 0.0, + "learning_rate": 1.7317383935360918e-05, + "loss": 1.4561, + "step": 8908 + }, + { + "epoch": 0.26158318163133476, + "grad_norm": 0.0, + "learning_rate": 1.7316735741390824e-05, + "loss": 1.4268, + "step": 8909 + }, + { + "epoch": 0.2616125433084738, + "grad_norm": 0.0, + "learning_rate": 1.7316087481253617e-05, + "loss": 1.3936, + "step": 8910 + }, + { + "epoch": 0.2616419049856128, + "grad_norm": 0.0, + "learning_rate": 1.7315439154955154e-05, + "loss": 1.4111, + "step": 8911 + }, + { + "epoch": 0.26167126666275176, + "grad_norm": 0.0, + "learning_rate": 1.7314790762501304e-05, + "loss": 1.3154, + "step": 8912 + }, + { + "epoch": 0.2617006283398908, + "grad_norm": 0.0, + "learning_rate": 1.7314142303897924e-05, + "loss": 1.334, + "step": 8913 + }, + { + "epoch": 0.2617299900170298, + "grad_norm": 0.0, + "learning_rate": 1.7313493779150884e-05, + "loss": 1.5801, + "step": 8914 + }, + { + "epoch": 0.26175935169416875, + "grad_norm": 0.0, + "learning_rate": 1.7312845188266047e-05, + "loss": 1.3271, + "step": 8915 + }, + { + "epoch": 0.26178871337130777, + "grad_norm": 0.0, + "learning_rate": 1.731219653124928e-05, + "loss": 1.4883, + "step": 8916 + }, + { + "epoch": 0.2618180750484468, + "grad_norm": 0.0, + "learning_rate": 1.7311547808106444e-05, + "loss": 1.498, + "step": 8917 + }, + { + "epoch": 0.26184743672558575, + "grad_norm": 0.0, + "learning_rate": 1.731089901884341e-05, + "loss": 1.4282, + "step": 8918 + }, + { + "epoch": 0.26187679840272476, + "grad_norm": 0.0, + "learning_rate": 1.731025016346604e-05, + "loss": 1.4805, + "step": 8919 + }, + { + "epoch": 0.2619061600798638, + "grad_norm": 0.0, + "learning_rate": 1.730960124198021e-05, + "loss": 1.2212, + "step": 8920 + }, + { + "epoch": 0.26193552175700274, + "grad_norm": 0.0, + "learning_rate": 1.7308952254391785e-05, + "loss": 1.3691, + "step": 8921 + }, + { + "epoch": 0.26196488343414176, + "grad_norm": 0.0, + "learning_rate": 1.7308303200706632e-05, + "loss": 1.5156, + "step": 8922 + }, + { + "epoch": 0.2619942451112808, + "grad_norm": 0.0, + "learning_rate": 1.7307654080930618e-05, + "loss": 1.3672, + "step": 8923 + }, + { + "epoch": 0.26202360678841974, + "grad_norm": 0.0, + "learning_rate": 1.730700489506962e-05, + "loss": 1.5752, + "step": 8924 + }, + { + "epoch": 0.26205296846555876, + "grad_norm": 0.0, + "learning_rate": 1.7306355643129506e-05, + "loss": 1.4443, + "step": 8925 + }, + { + "epoch": 0.2620823301426978, + "grad_norm": 0.0, + "learning_rate": 1.7305706325116144e-05, + "loss": 1.4189, + "step": 8926 + }, + { + "epoch": 0.26211169181983673, + "grad_norm": 0.0, + "learning_rate": 1.730505694103541e-05, + "loss": 1.4209, + "step": 8927 + }, + { + "epoch": 0.26214105349697575, + "grad_norm": 0.0, + "learning_rate": 1.7304407490893176e-05, + "loss": 1.4355, + "step": 8928 + }, + { + "epoch": 0.26217041517411477, + "grad_norm": 0.0, + "learning_rate": 1.730375797469531e-05, + "loss": 1.4316, + "step": 8929 + }, + { + "epoch": 0.26219977685125373, + "grad_norm": 0.0, + "learning_rate": 1.7303108392447697e-05, + "loss": 1.5244, + "step": 8930 + }, + { + "epoch": 0.26222913852839275, + "grad_norm": 0.0, + "learning_rate": 1.73024587441562e-05, + "loss": 1.4238, + "step": 8931 + }, + { + "epoch": 0.26225850020553176, + "grad_norm": 0.0, + "learning_rate": 1.73018090298267e-05, + "loss": 1.4473, + "step": 8932 + }, + { + "epoch": 0.2622878618826707, + "grad_norm": 0.0, + "learning_rate": 1.7301159249465068e-05, + "loss": 1.417, + "step": 8933 + }, + { + "epoch": 0.26231722355980974, + "grad_norm": 0.0, + "learning_rate": 1.7300509403077185e-05, + "loss": 1.3408, + "step": 8934 + }, + { + "epoch": 0.26234658523694876, + "grad_norm": 0.0, + "learning_rate": 1.7299859490668925e-05, + "loss": 1.4043, + "step": 8935 + }, + { + "epoch": 0.2623759469140877, + "grad_norm": 0.0, + "learning_rate": 1.729920951224617e-05, + "loss": 1.3643, + "step": 8936 + }, + { + "epoch": 0.26240530859122674, + "grad_norm": 0.0, + "learning_rate": 1.729855946781479e-05, + "loss": 1.4102, + "step": 8937 + }, + { + "epoch": 0.26243467026836576, + "grad_norm": 0.0, + "learning_rate": 1.729790935738067e-05, + "loss": 1.498, + "step": 8938 + }, + { + "epoch": 0.2624640319455047, + "grad_norm": 0.0, + "learning_rate": 1.7297259180949682e-05, + "loss": 1.4062, + "step": 8939 + }, + { + "epoch": 0.26249339362264373, + "grad_norm": 0.0, + "learning_rate": 1.7296608938527713e-05, + "loss": 1.3906, + "step": 8940 + }, + { + "epoch": 0.26252275529978275, + "grad_norm": 0.0, + "learning_rate": 1.7295958630120644e-05, + "loss": 1.3721, + "step": 8941 + }, + { + "epoch": 0.2625521169769217, + "grad_norm": 0.0, + "learning_rate": 1.729530825573435e-05, + "loss": 1.4697, + "step": 8942 + }, + { + "epoch": 0.26258147865406073, + "grad_norm": 0.0, + "learning_rate": 1.7294657815374716e-05, + "loss": 1.418, + "step": 8943 + }, + { + "epoch": 0.2626108403311997, + "grad_norm": 0.0, + "learning_rate": 1.729400730904762e-05, + "loss": 1.3887, + "step": 8944 + }, + { + "epoch": 0.2626402020083387, + "grad_norm": 0.0, + "learning_rate": 1.7293356736758953e-05, + "loss": 1.335, + "step": 8945 + }, + { + "epoch": 0.2626695636854777, + "grad_norm": 0.0, + "learning_rate": 1.7292706098514588e-05, + "loss": 1.3789, + "step": 8946 + }, + { + "epoch": 0.2626989253626167, + "grad_norm": 0.0, + "learning_rate": 1.7292055394320417e-05, + "loss": 1.4297, + "step": 8947 + }, + { + "epoch": 0.2627282870397557, + "grad_norm": 0.0, + "learning_rate": 1.7291404624182322e-05, + "loss": 1.3906, + "step": 8948 + }, + { + "epoch": 0.2627576487168947, + "grad_norm": 0.0, + "learning_rate": 1.729075378810619e-05, + "loss": 1.375, + "step": 8949 + }, + { + "epoch": 0.2627870103940337, + "grad_norm": 0.0, + "learning_rate": 1.7290102886097898e-05, + "loss": 1.4258, + "step": 8950 + }, + { + "epoch": 0.2628163720711727, + "grad_norm": 0.0, + "learning_rate": 1.728945191816334e-05, + "loss": 1.4492, + "step": 8951 + }, + { + "epoch": 0.2628457337483117, + "grad_norm": 0.0, + "learning_rate": 1.7288800884308404e-05, + "loss": 1.5498, + "step": 8952 + }, + { + "epoch": 0.2628750954254507, + "grad_norm": 0.0, + "learning_rate": 1.7288149784538974e-05, + "loss": 1.3174, + "step": 8953 + }, + { + "epoch": 0.2629044571025897, + "grad_norm": 0.0, + "learning_rate": 1.7287498618860938e-05, + "loss": 1.4233, + "step": 8954 + }, + { + "epoch": 0.2629338187797287, + "grad_norm": 0.0, + "learning_rate": 1.7286847387280188e-05, + "loss": 1.4883, + "step": 8955 + }, + { + "epoch": 0.2629631804568677, + "grad_norm": 0.0, + "learning_rate": 1.7286196089802606e-05, + "loss": 1.3906, + "step": 8956 + }, + { + "epoch": 0.2629925421340067, + "grad_norm": 0.0, + "learning_rate": 1.7285544726434093e-05, + "loss": 1.3457, + "step": 8957 + }, + { + "epoch": 0.2630219038111457, + "grad_norm": 0.0, + "learning_rate": 1.728489329718053e-05, + "loss": 1.2881, + "step": 8958 + }, + { + "epoch": 0.26305126548828467, + "grad_norm": 0.0, + "learning_rate": 1.7284241802047812e-05, + "loss": 1.543, + "step": 8959 + }, + { + "epoch": 0.2630806271654237, + "grad_norm": 0.0, + "learning_rate": 1.728359024104183e-05, + "loss": 1.3037, + "step": 8960 + }, + { + "epoch": 0.2631099888425627, + "grad_norm": 0.0, + "learning_rate": 1.7282938614168473e-05, + "loss": 1.3301, + "step": 8961 + }, + { + "epoch": 0.26313935051970166, + "grad_norm": 0.0, + "learning_rate": 1.7282286921433642e-05, + "loss": 1.3789, + "step": 8962 + }, + { + "epoch": 0.2631687121968407, + "grad_norm": 0.0, + "learning_rate": 1.728163516284322e-05, + "loss": 1.3252, + "step": 8963 + }, + { + "epoch": 0.2631980738739797, + "grad_norm": 0.0, + "learning_rate": 1.728098333840311e-05, + "loss": 1.418, + "step": 8964 + }, + { + "epoch": 0.26322743555111866, + "grad_norm": 0.0, + "learning_rate": 1.7280331448119202e-05, + "loss": 1.3018, + "step": 8965 + }, + { + "epoch": 0.2632567972282577, + "grad_norm": 0.0, + "learning_rate": 1.7279679491997396e-05, + "loss": 1.4824, + "step": 8966 + }, + { + "epoch": 0.2632861589053967, + "grad_norm": 0.0, + "learning_rate": 1.7279027470043576e-05, + "loss": 1.3408, + "step": 8967 + }, + { + "epoch": 0.26331552058253566, + "grad_norm": 0.0, + "learning_rate": 1.727837538226365e-05, + "loss": 1.251, + "step": 8968 + }, + { + "epoch": 0.2633448822596747, + "grad_norm": 0.0, + "learning_rate": 1.7277723228663515e-05, + "loss": 1.498, + "step": 8969 + }, + { + "epoch": 0.2633742439368137, + "grad_norm": 0.0, + "learning_rate": 1.727707100924906e-05, + "loss": 1.4336, + "step": 8970 + }, + { + "epoch": 0.26340360561395265, + "grad_norm": 0.0, + "learning_rate": 1.727641872402619e-05, + "loss": 1.415, + "step": 8971 + }, + { + "epoch": 0.26343296729109167, + "grad_norm": 0.0, + "learning_rate": 1.7275766373000803e-05, + "loss": 1.4238, + "step": 8972 + }, + { + "epoch": 0.2634623289682307, + "grad_norm": 0.0, + "learning_rate": 1.7275113956178796e-05, + "loss": 1.5693, + "step": 8973 + }, + { + "epoch": 0.26349169064536965, + "grad_norm": 0.0, + "learning_rate": 1.727446147356607e-05, + "loss": 1.2842, + "step": 8974 + }, + { + "epoch": 0.26352105232250866, + "grad_norm": 0.0, + "learning_rate": 1.727380892516853e-05, + "loss": 1.5088, + "step": 8975 + }, + { + "epoch": 0.2635504139996477, + "grad_norm": 0.0, + "learning_rate": 1.727315631099207e-05, + "loss": 1.5205, + "step": 8976 + }, + { + "epoch": 0.26357977567678664, + "grad_norm": 0.0, + "learning_rate": 1.7272503631042594e-05, + "loss": 1.5, + "step": 8977 + }, + { + "epoch": 0.26360913735392566, + "grad_norm": 0.0, + "learning_rate": 1.7271850885326005e-05, + "loss": 1.5449, + "step": 8978 + }, + { + "epoch": 0.2636384990310647, + "grad_norm": 0.0, + "learning_rate": 1.7271198073848204e-05, + "loss": 1.373, + "step": 8979 + }, + { + "epoch": 0.26366786070820364, + "grad_norm": 0.0, + "learning_rate": 1.7270545196615104e-05, + "loss": 1.5693, + "step": 8980 + }, + { + "epoch": 0.26369722238534266, + "grad_norm": 0.0, + "learning_rate": 1.7269892253632595e-05, + "loss": 1.4062, + "step": 8981 + }, + { + "epoch": 0.2637265840624817, + "grad_norm": 0.0, + "learning_rate": 1.726923924490659e-05, + "loss": 1.3501, + "step": 8982 + }, + { + "epoch": 0.26375594573962063, + "grad_norm": 0.0, + "learning_rate": 1.7268586170442993e-05, + "loss": 1.4482, + "step": 8983 + }, + { + "epoch": 0.26378530741675965, + "grad_norm": 0.0, + "learning_rate": 1.726793303024771e-05, + "loss": 1.4131, + "step": 8984 + }, + { + "epoch": 0.26381466909389867, + "grad_norm": 0.0, + "learning_rate": 1.726727982432665e-05, + "loss": 1.4434, + "step": 8985 + }, + { + "epoch": 0.26384403077103763, + "grad_norm": 0.0, + "learning_rate": 1.726662655268571e-05, + "loss": 1.46, + "step": 8986 + }, + { + "epoch": 0.26387339244817665, + "grad_norm": 0.0, + "learning_rate": 1.7265973215330815e-05, + "loss": 1.4189, + "step": 8987 + }, + { + "epoch": 0.26390275412531566, + "grad_norm": 0.0, + "learning_rate": 1.7265319812267856e-05, + "loss": 1.46, + "step": 8988 + }, + { + "epoch": 0.2639321158024546, + "grad_norm": 0.0, + "learning_rate": 1.7264666343502754e-05, + "loss": 1.416, + "step": 8989 + }, + { + "epoch": 0.26396147747959364, + "grad_norm": 0.0, + "learning_rate": 1.726401280904141e-05, + "loss": 1.4014, + "step": 8990 + }, + { + "epoch": 0.26399083915673266, + "grad_norm": 0.0, + "learning_rate": 1.726335920888974e-05, + "loss": 1.2769, + "step": 8991 + }, + { + "epoch": 0.2640202008338716, + "grad_norm": 0.0, + "learning_rate": 1.726270554305365e-05, + "loss": 1.4678, + "step": 8992 + }, + { + "epoch": 0.26404956251101064, + "grad_norm": 0.0, + "learning_rate": 1.7262051811539056e-05, + "loss": 1.4951, + "step": 8993 + }, + { + "epoch": 0.2640789241881496, + "grad_norm": 0.0, + "learning_rate": 1.7261398014351867e-05, + "loss": 1.4336, + "step": 8994 + }, + { + "epoch": 0.2641082858652886, + "grad_norm": 0.0, + "learning_rate": 1.7260744151497998e-05, + "loss": 1.375, + "step": 8995 + }, + { + "epoch": 0.26413764754242763, + "grad_norm": 0.0, + "learning_rate": 1.7260090222983358e-05, + "loss": 1.3955, + "step": 8996 + }, + { + "epoch": 0.2641670092195666, + "grad_norm": 0.0, + "learning_rate": 1.7259436228813863e-05, + "loss": 1.5312, + "step": 8997 + }, + { + "epoch": 0.2641963708967056, + "grad_norm": 0.0, + "learning_rate": 1.725878216899543e-05, + "loss": 1.4883, + "step": 8998 + }, + { + "epoch": 0.26422573257384463, + "grad_norm": 0.0, + "learning_rate": 1.7258128043533963e-05, + "loss": 1.374, + "step": 8999 + }, + { + "epoch": 0.2642550942509836, + "grad_norm": 0.0, + "learning_rate": 1.7257473852435393e-05, + "loss": 1.3984, + "step": 9000 + }, + { + "epoch": 0.2642844559281226, + "grad_norm": 0.0, + "learning_rate": 1.7256819595705626e-05, + "loss": 1.4111, + "step": 9001 + }, + { + "epoch": 0.2643138176052616, + "grad_norm": 0.0, + "learning_rate": 1.7256165273350577e-05, + "loss": 1.5205, + "step": 9002 + }, + { + "epoch": 0.2643431792824006, + "grad_norm": 0.0, + "learning_rate": 1.7255510885376172e-05, + "loss": 1.4336, + "step": 9003 + }, + { + "epoch": 0.2643725409595396, + "grad_norm": 0.0, + "learning_rate": 1.7254856431788322e-05, + "loss": 1.3857, + "step": 9004 + }, + { + "epoch": 0.2644019026366786, + "grad_norm": 0.0, + "learning_rate": 1.7254201912592946e-05, + "loss": 1.3613, + "step": 9005 + }, + { + "epoch": 0.2644312643138176, + "grad_norm": 0.0, + "learning_rate": 1.725354732779597e-05, + "loss": 1.5156, + "step": 9006 + }, + { + "epoch": 0.2644606259909566, + "grad_norm": 0.0, + "learning_rate": 1.72528926774033e-05, + "loss": 1.3926, + "step": 9007 + }, + { + "epoch": 0.2644899876680956, + "grad_norm": 0.0, + "learning_rate": 1.725223796142087e-05, + "loss": 1.335, + "step": 9008 + }, + { + "epoch": 0.2645193493452346, + "grad_norm": 0.0, + "learning_rate": 1.725158317985459e-05, + "loss": 1.4102, + "step": 9009 + }, + { + "epoch": 0.2645487110223736, + "grad_norm": 0.0, + "learning_rate": 1.725092833271039e-05, + "loss": 1.5352, + "step": 9010 + }, + { + "epoch": 0.2645780726995126, + "grad_norm": 0.0, + "learning_rate": 1.7250273419994185e-05, + "loss": 1.4053, + "step": 9011 + }, + { + "epoch": 0.2646074343766516, + "grad_norm": 0.0, + "learning_rate": 1.7249618441711904e-05, + "loss": 1.29, + "step": 9012 + }, + { + "epoch": 0.2646367960537906, + "grad_norm": 0.0, + "learning_rate": 1.7248963397869464e-05, + "loss": 1.3457, + "step": 9013 + }, + { + "epoch": 0.2646661577309296, + "grad_norm": 0.0, + "learning_rate": 1.72483082884728e-05, + "loss": 1.5957, + "step": 9014 + }, + { + "epoch": 0.26469551940806857, + "grad_norm": 0.0, + "learning_rate": 1.7247653113527815e-05, + "loss": 1.3555, + "step": 9015 + }, + { + "epoch": 0.2647248810852076, + "grad_norm": 0.0, + "learning_rate": 1.7246997873040455e-05, + "loss": 1.5078, + "step": 9016 + }, + { + "epoch": 0.2647542427623466, + "grad_norm": 0.0, + "learning_rate": 1.724634256701663e-05, + "loss": 1.4141, + "step": 9017 + }, + { + "epoch": 0.26478360443948556, + "grad_norm": 0.0, + "learning_rate": 1.7245687195462283e-05, + "loss": 1.4424, + "step": 9018 + }, + { + "epoch": 0.2648129661166246, + "grad_norm": 0.0, + "learning_rate": 1.7245031758383322e-05, + "loss": 1.3838, + "step": 9019 + }, + { + "epoch": 0.2648423277937636, + "grad_norm": 0.0, + "learning_rate": 1.724437625578569e-05, + "loss": 1.5303, + "step": 9020 + }, + { + "epoch": 0.26487168947090256, + "grad_norm": 0.0, + "learning_rate": 1.7243720687675303e-05, + "loss": 1.3164, + "step": 9021 + }, + { + "epoch": 0.2649010511480416, + "grad_norm": 0.0, + "learning_rate": 1.72430650540581e-05, + "loss": 1.3623, + "step": 9022 + }, + { + "epoch": 0.2649304128251806, + "grad_norm": 0.0, + "learning_rate": 1.7242409354940003e-05, + "loss": 1.2607, + "step": 9023 + }, + { + "epoch": 0.26495977450231956, + "grad_norm": 0.0, + "learning_rate": 1.724175359032694e-05, + "loss": 1.3545, + "step": 9024 + }, + { + "epoch": 0.2649891361794586, + "grad_norm": 0.0, + "learning_rate": 1.7241097760224845e-05, + "loss": 1.4766, + "step": 9025 + }, + { + "epoch": 0.2650184978565976, + "grad_norm": 0.0, + "learning_rate": 1.7240441864639653e-05, + "loss": 1.4233, + "step": 9026 + }, + { + "epoch": 0.26504785953373655, + "grad_norm": 0.0, + "learning_rate": 1.723978590357729e-05, + "loss": 1.4893, + "step": 9027 + }, + { + "epoch": 0.26507722121087557, + "grad_norm": 0.0, + "learning_rate": 1.7239129877043685e-05, + "loss": 1.4775, + "step": 9028 + }, + { + "epoch": 0.2651065828880146, + "grad_norm": 0.0, + "learning_rate": 1.723847378504478e-05, + "loss": 1.3672, + "step": 9029 + }, + { + "epoch": 0.26513594456515355, + "grad_norm": 0.0, + "learning_rate": 1.7237817627586497e-05, + "loss": 1.3325, + "step": 9030 + }, + { + "epoch": 0.26516530624229256, + "grad_norm": 0.0, + "learning_rate": 1.723716140467478e-05, + "loss": 1.5029, + "step": 9031 + }, + { + "epoch": 0.2651946679194316, + "grad_norm": 0.0, + "learning_rate": 1.7236505116315557e-05, + "loss": 1.248, + "step": 9032 + }, + { + "epoch": 0.26522402959657054, + "grad_norm": 0.0, + "learning_rate": 1.7235848762514763e-05, + "loss": 1.5254, + "step": 9033 + }, + { + "epoch": 0.26525339127370956, + "grad_norm": 0.0, + "learning_rate": 1.723519234327834e-05, + "loss": 1.4492, + "step": 9034 + }, + { + "epoch": 0.2652827529508486, + "grad_norm": 0.0, + "learning_rate": 1.7234535858612214e-05, + "loss": 1.4492, + "step": 9035 + }, + { + "epoch": 0.26531211462798754, + "grad_norm": 0.0, + "learning_rate": 1.723387930852233e-05, + "loss": 1.3574, + "step": 9036 + }, + { + "epoch": 0.26534147630512656, + "grad_norm": 0.0, + "learning_rate": 1.7233222693014625e-05, + "loss": 1.4053, + "step": 9037 + }, + { + "epoch": 0.2653708379822656, + "grad_norm": 0.0, + "learning_rate": 1.7232566012095033e-05, + "loss": 1.3398, + "step": 9038 + }, + { + "epoch": 0.26540019965940453, + "grad_norm": 0.0, + "learning_rate": 1.7231909265769494e-05, + "loss": 1.2881, + "step": 9039 + }, + { + "epoch": 0.26542956133654355, + "grad_norm": 0.0, + "learning_rate": 1.723125245404395e-05, + "loss": 1.4561, + "step": 9040 + }, + { + "epoch": 0.26545892301368257, + "grad_norm": 0.0, + "learning_rate": 1.7230595576924332e-05, + "loss": 1.2861, + "step": 9041 + }, + { + "epoch": 0.26548828469082153, + "grad_norm": 0.0, + "learning_rate": 1.722993863441659e-05, + "loss": 1.4346, + "step": 9042 + }, + { + "epoch": 0.26551764636796055, + "grad_norm": 0.0, + "learning_rate": 1.7229281626526665e-05, + "loss": 1.3955, + "step": 9043 + }, + { + "epoch": 0.2655470080450995, + "grad_norm": 0.0, + "learning_rate": 1.722862455326049e-05, + "loss": 1.3535, + "step": 9044 + }, + { + "epoch": 0.2655763697222385, + "grad_norm": 0.0, + "learning_rate": 1.7227967414624015e-05, + "loss": 1.251, + "step": 9045 + }, + { + "epoch": 0.26560573139937754, + "grad_norm": 0.0, + "learning_rate": 1.722731021062318e-05, + "loss": 1.5117, + "step": 9046 + }, + { + "epoch": 0.2656350930765165, + "grad_norm": 0.0, + "learning_rate": 1.7226652941263924e-05, + "loss": 1.334, + "step": 9047 + }, + { + "epoch": 0.2656644547536555, + "grad_norm": 0.0, + "learning_rate": 1.7225995606552196e-05, + "loss": 1.4893, + "step": 9048 + }, + { + "epoch": 0.26569381643079454, + "grad_norm": 0.0, + "learning_rate": 1.722533820649394e-05, + "loss": 1.4824, + "step": 9049 + }, + { + "epoch": 0.2657231781079335, + "grad_norm": 0.0, + "learning_rate": 1.72246807410951e-05, + "loss": 1.502, + "step": 9050 + }, + { + "epoch": 0.2657525397850725, + "grad_norm": 0.0, + "learning_rate": 1.722402321036162e-05, + "loss": 1.3652, + "step": 9051 + }, + { + "epoch": 0.26578190146221153, + "grad_norm": 0.0, + "learning_rate": 1.722336561429945e-05, + "loss": 1.3594, + "step": 9052 + }, + { + "epoch": 0.2658112631393505, + "grad_norm": 0.0, + "learning_rate": 1.722270795291454e-05, + "loss": 1.29, + "step": 9053 + }, + { + "epoch": 0.2658406248164895, + "grad_norm": 0.0, + "learning_rate": 1.7222050226212822e-05, + "loss": 1.3564, + "step": 9054 + }, + { + "epoch": 0.26586998649362853, + "grad_norm": 0.0, + "learning_rate": 1.722139243420026e-05, + "loss": 1.2656, + "step": 9055 + }, + { + "epoch": 0.2658993481707675, + "grad_norm": 0.0, + "learning_rate": 1.7220734576882792e-05, + "loss": 1.3926, + "step": 9056 + }, + { + "epoch": 0.2659287098479065, + "grad_norm": 0.0, + "learning_rate": 1.7220076654266374e-05, + "loss": 1.4111, + "step": 9057 + }, + { + "epoch": 0.2659580715250455, + "grad_norm": 0.0, + "learning_rate": 1.721941866635696e-05, + "loss": 1.4688, + "step": 9058 + }, + { + "epoch": 0.2659874332021845, + "grad_norm": 0.0, + "learning_rate": 1.721876061316049e-05, + "loss": 1.3906, + "step": 9059 + }, + { + "epoch": 0.2660167948793235, + "grad_norm": 0.0, + "learning_rate": 1.7218102494682917e-05, + "loss": 1.3936, + "step": 9060 + }, + { + "epoch": 0.2660461565564625, + "grad_norm": 0.0, + "learning_rate": 1.7217444310930195e-05, + "loss": 1.5098, + "step": 9061 + }, + { + "epoch": 0.2660755182336015, + "grad_norm": 0.0, + "learning_rate": 1.721678606190828e-05, + "loss": 1.5879, + "step": 9062 + }, + { + "epoch": 0.2661048799107405, + "grad_norm": 0.0, + "learning_rate": 1.7216127747623114e-05, + "loss": 1.3975, + "step": 9063 + }, + { + "epoch": 0.2661342415878795, + "grad_norm": 0.0, + "learning_rate": 1.721546936808066e-05, + "loss": 1.3384, + "step": 9064 + }, + { + "epoch": 0.2661636032650185, + "grad_norm": 0.0, + "learning_rate": 1.721481092328687e-05, + "loss": 1.4668, + "step": 9065 + }, + { + "epoch": 0.2661929649421575, + "grad_norm": 0.0, + "learning_rate": 1.7214152413247694e-05, + "loss": 1.4512, + "step": 9066 + }, + { + "epoch": 0.2662223266192965, + "grad_norm": 0.0, + "learning_rate": 1.721349383796909e-05, + "loss": 1.3086, + "step": 9067 + }, + { + "epoch": 0.2662516882964355, + "grad_norm": 0.0, + "learning_rate": 1.7212835197457015e-05, + "loss": 1.5078, + "step": 9068 + }, + { + "epoch": 0.2662810499735745, + "grad_norm": 0.0, + "learning_rate": 1.7212176491717424e-05, + "loss": 1.4619, + "step": 9069 + }, + { + "epoch": 0.2663104116507135, + "grad_norm": 0.0, + "learning_rate": 1.7211517720756275e-05, + "loss": 1.2646, + "step": 9070 + }, + { + "epoch": 0.26633977332785247, + "grad_norm": 0.0, + "learning_rate": 1.7210858884579526e-05, + "loss": 1.4766, + "step": 9071 + }, + { + "epoch": 0.2663691350049915, + "grad_norm": 0.0, + "learning_rate": 1.721019998319313e-05, + "loss": 1.3164, + "step": 9072 + }, + { + "epoch": 0.2663984966821305, + "grad_norm": 0.0, + "learning_rate": 1.720954101660305e-05, + "loss": 1.3555, + "step": 9073 + }, + { + "epoch": 0.26642785835926946, + "grad_norm": 0.0, + "learning_rate": 1.7208881984815245e-05, + "loss": 1.3662, + "step": 9074 + }, + { + "epoch": 0.2664572200364085, + "grad_norm": 0.0, + "learning_rate": 1.7208222887835672e-05, + "loss": 1.4443, + "step": 9075 + }, + { + "epoch": 0.2664865817135475, + "grad_norm": 0.0, + "learning_rate": 1.7207563725670297e-05, + "loss": 1.3574, + "step": 9076 + }, + { + "epoch": 0.26651594339068646, + "grad_norm": 0.0, + "learning_rate": 1.7206904498325072e-05, + "loss": 1.3799, + "step": 9077 + }, + { + "epoch": 0.2665453050678255, + "grad_norm": 0.0, + "learning_rate": 1.7206245205805968e-05, + "loss": 1.3955, + "step": 9078 + }, + { + "epoch": 0.2665746667449645, + "grad_norm": 0.0, + "learning_rate": 1.720558584811894e-05, + "loss": 1.415, + "step": 9079 + }, + { + "epoch": 0.26660402842210346, + "grad_norm": 0.0, + "learning_rate": 1.720492642526996e-05, + "loss": 1.4287, + "step": 9080 + }, + { + "epoch": 0.2666333900992425, + "grad_norm": 0.0, + "learning_rate": 1.720426693726498e-05, + "loss": 1.5059, + "step": 9081 + }, + { + "epoch": 0.2666627517763815, + "grad_norm": 0.0, + "learning_rate": 1.7203607384109968e-05, + "loss": 1.4648, + "step": 9082 + }, + { + "epoch": 0.26669211345352045, + "grad_norm": 0.0, + "learning_rate": 1.7202947765810893e-05, + "loss": 1.3389, + "step": 9083 + }, + { + "epoch": 0.26672147513065947, + "grad_norm": 0.0, + "learning_rate": 1.7202288082373714e-05, + "loss": 1.4277, + "step": 9084 + }, + { + "epoch": 0.2667508368077985, + "grad_norm": 0.0, + "learning_rate": 1.72016283338044e-05, + "loss": 1.4717, + "step": 9085 + }, + { + "epoch": 0.26678019848493745, + "grad_norm": 0.0, + "learning_rate": 1.7200968520108915e-05, + "loss": 1.3203, + "step": 9086 + }, + { + "epoch": 0.26680956016207646, + "grad_norm": 0.0, + "learning_rate": 1.7200308641293232e-05, + "loss": 1.4199, + "step": 9087 + }, + { + "epoch": 0.2668389218392155, + "grad_norm": 0.0, + "learning_rate": 1.719964869736331e-05, + "loss": 1.3379, + "step": 9088 + }, + { + "epoch": 0.26686828351635444, + "grad_norm": 0.0, + "learning_rate": 1.7198988688325126e-05, + "loss": 1.3291, + "step": 9089 + }, + { + "epoch": 0.26689764519349346, + "grad_norm": 0.0, + "learning_rate": 1.7198328614184638e-05, + "loss": 1.3457, + "step": 9090 + }, + { + "epoch": 0.2669270068706325, + "grad_norm": 0.0, + "learning_rate": 1.7197668474947825e-05, + "loss": 1.5479, + "step": 9091 + }, + { + "epoch": 0.26695636854777144, + "grad_norm": 0.0, + "learning_rate": 1.7197008270620647e-05, + "loss": 1.3818, + "step": 9092 + }, + { + "epoch": 0.26698573022491046, + "grad_norm": 0.0, + "learning_rate": 1.7196348001209083e-05, + "loss": 1.2939, + "step": 9093 + }, + { + "epoch": 0.2670150919020494, + "grad_norm": 0.0, + "learning_rate": 1.7195687666719104e-05, + "loss": 1.3394, + "step": 9094 + }, + { + "epoch": 0.26704445357918843, + "grad_norm": 0.0, + "learning_rate": 1.7195027267156676e-05, + "loss": 1.3613, + "step": 9095 + }, + { + "epoch": 0.26707381525632745, + "grad_norm": 0.0, + "learning_rate": 1.7194366802527772e-05, + "loss": 1.584, + "step": 9096 + }, + { + "epoch": 0.2671031769334664, + "grad_norm": 0.0, + "learning_rate": 1.719370627283837e-05, + "loss": 1.4932, + "step": 9097 + }, + { + "epoch": 0.26713253861060543, + "grad_norm": 0.0, + "learning_rate": 1.7193045678094438e-05, + "loss": 1.4082, + "step": 9098 + }, + { + "epoch": 0.26716190028774445, + "grad_norm": 0.0, + "learning_rate": 1.719238501830195e-05, + "loss": 1.3535, + "step": 9099 + }, + { + "epoch": 0.2671912619648834, + "grad_norm": 0.0, + "learning_rate": 1.7191724293466886e-05, + "loss": 1.4668, + "step": 9100 + }, + { + "epoch": 0.2672206236420224, + "grad_norm": 0.0, + "learning_rate": 1.7191063503595217e-05, + "loss": 1.5059, + "step": 9101 + }, + { + "epoch": 0.26724998531916144, + "grad_norm": 0.0, + "learning_rate": 1.7190402648692918e-05, + "loss": 1.3672, + "step": 9102 + }, + { + "epoch": 0.2672793469963004, + "grad_norm": 0.0, + "learning_rate": 1.7189741728765966e-05, + "loss": 1.3496, + "step": 9103 + }, + { + "epoch": 0.2673087086734394, + "grad_norm": 0.0, + "learning_rate": 1.718908074382034e-05, + "loss": 1.417, + "step": 9104 + }, + { + "epoch": 0.26733807035057844, + "grad_norm": 0.0, + "learning_rate": 1.718841969386201e-05, + "loss": 1.5176, + "step": 9105 + }, + { + "epoch": 0.2673674320277174, + "grad_norm": 0.0, + "learning_rate": 1.7187758578896963e-05, + "loss": 1.3008, + "step": 9106 + }, + { + "epoch": 0.2673967937048564, + "grad_norm": 0.0, + "learning_rate": 1.7187097398931175e-05, + "loss": 1.3535, + "step": 9107 + }, + { + "epoch": 0.26742615538199543, + "grad_norm": 0.0, + "learning_rate": 1.7186436153970627e-05, + "loss": 1.3945, + "step": 9108 + }, + { + "epoch": 0.2674555170591344, + "grad_norm": 0.0, + "learning_rate": 1.7185774844021294e-05, + "loss": 1.4521, + "step": 9109 + }, + { + "epoch": 0.2674848787362734, + "grad_norm": 0.0, + "learning_rate": 1.718511346908916e-05, + "loss": 1.4014, + "step": 9110 + }, + { + "epoch": 0.26751424041341243, + "grad_norm": 0.0, + "learning_rate": 1.7184452029180204e-05, + "loss": 1.4229, + "step": 9111 + }, + { + "epoch": 0.2675436020905514, + "grad_norm": 0.0, + "learning_rate": 1.7183790524300407e-05, + "loss": 1.4668, + "step": 9112 + }, + { + "epoch": 0.2675729637676904, + "grad_norm": 0.0, + "learning_rate": 1.718312895445575e-05, + "loss": 1.3789, + "step": 9113 + }, + { + "epoch": 0.2676023254448294, + "grad_norm": 0.0, + "learning_rate": 1.7182467319652226e-05, + "loss": 1.3359, + "step": 9114 + }, + { + "epoch": 0.2676316871219684, + "grad_norm": 0.0, + "learning_rate": 1.7181805619895803e-05, + "loss": 1.4561, + "step": 9115 + }, + { + "epoch": 0.2676610487991074, + "grad_norm": 0.0, + "learning_rate": 1.7181143855192478e-05, + "loss": 1.3252, + "step": 9116 + }, + { + "epoch": 0.2676904104762464, + "grad_norm": 0.0, + "learning_rate": 1.7180482025548228e-05, + "loss": 1.5283, + "step": 9117 + }, + { + "epoch": 0.2677197721533854, + "grad_norm": 0.0, + "learning_rate": 1.717982013096904e-05, + "loss": 1.3691, + "step": 9118 + }, + { + "epoch": 0.2677491338305244, + "grad_norm": 0.0, + "learning_rate": 1.7179158171460897e-05, + "loss": 1.3447, + "step": 9119 + }, + { + "epoch": 0.2677784955076634, + "grad_norm": 0.0, + "learning_rate": 1.7178496147029792e-05, + "loss": 1.3228, + "step": 9120 + }, + { + "epoch": 0.2678078571848024, + "grad_norm": 0.0, + "learning_rate": 1.7177834057681707e-05, + "loss": 1.332, + "step": 9121 + }, + { + "epoch": 0.2678372188619414, + "grad_norm": 0.0, + "learning_rate": 1.717717190342263e-05, + "loss": 1.248, + "step": 9122 + }, + { + "epoch": 0.2678665805390804, + "grad_norm": 0.0, + "learning_rate": 1.717650968425855e-05, + "loss": 1.5713, + "step": 9123 + }, + { + "epoch": 0.2678959422162194, + "grad_norm": 0.0, + "learning_rate": 1.7175847400195452e-05, + "loss": 1.5039, + "step": 9124 + }, + { + "epoch": 0.2679253038933584, + "grad_norm": 0.0, + "learning_rate": 1.717518505123933e-05, + "loss": 1.3691, + "step": 9125 + }, + { + "epoch": 0.2679546655704974, + "grad_norm": 0.0, + "learning_rate": 1.7174522637396175e-05, + "loss": 1.4268, + "step": 9126 + }, + { + "epoch": 0.26798402724763637, + "grad_norm": 0.0, + "learning_rate": 1.717386015867197e-05, + "loss": 1.4648, + "step": 9127 + }, + { + "epoch": 0.2680133889247754, + "grad_norm": 0.0, + "learning_rate": 1.7173197615072712e-05, + "loss": 1.4092, + "step": 9128 + }, + { + "epoch": 0.2680427506019144, + "grad_norm": 0.0, + "learning_rate": 1.717253500660439e-05, + "loss": 1.3999, + "step": 9129 + }, + { + "epoch": 0.26807211227905337, + "grad_norm": 0.0, + "learning_rate": 1.7171872333272998e-05, + "loss": 1.3662, + "step": 9130 + }, + { + "epoch": 0.2681014739561924, + "grad_norm": 0.0, + "learning_rate": 1.717120959508453e-05, + "loss": 1.4883, + "step": 9131 + }, + { + "epoch": 0.2681308356333314, + "grad_norm": 0.0, + "learning_rate": 1.7170546792044975e-05, + "loss": 1.3691, + "step": 9132 + }, + { + "epoch": 0.26816019731047036, + "grad_norm": 0.0, + "learning_rate": 1.716988392416033e-05, + "loss": 1.4131, + "step": 9133 + }, + { + "epoch": 0.2681895589876094, + "grad_norm": 0.0, + "learning_rate": 1.7169220991436587e-05, + "loss": 1.4189, + "step": 9134 + }, + { + "epoch": 0.2682189206647484, + "grad_norm": 0.0, + "learning_rate": 1.716855799387974e-05, + "loss": 1.4561, + "step": 9135 + }, + { + "epoch": 0.26824828234188736, + "grad_norm": 0.0, + "learning_rate": 1.716789493149579e-05, + "loss": 1.3174, + "step": 9136 + }, + { + "epoch": 0.2682776440190264, + "grad_norm": 0.0, + "learning_rate": 1.716723180429073e-05, + "loss": 1.3809, + "step": 9137 + }, + { + "epoch": 0.2683070056961654, + "grad_norm": 0.0, + "learning_rate": 1.716656861227056e-05, + "loss": 1.4043, + "step": 9138 + }, + { + "epoch": 0.26833636737330435, + "grad_norm": 0.0, + "learning_rate": 1.716590535544127e-05, + "loss": 1.4238, + "step": 9139 + }, + { + "epoch": 0.26836572905044337, + "grad_norm": 0.0, + "learning_rate": 1.7165242033808867e-05, + "loss": 1.4648, + "step": 9140 + }, + { + "epoch": 0.2683950907275824, + "grad_norm": 0.0, + "learning_rate": 1.7164578647379345e-05, + "loss": 1.3936, + "step": 9141 + }, + { + "epoch": 0.26842445240472135, + "grad_norm": 0.0, + "learning_rate": 1.7163915196158696e-05, + "loss": 1.292, + "step": 9142 + }, + { + "epoch": 0.26845381408186036, + "grad_norm": 0.0, + "learning_rate": 1.7163251680152937e-05, + "loss": 1.3818, + "step": 9143 + }, + { + "epoch": 0.2684831757589994, + "grad_norm": 0.0, + "learning_rate": 1.7162588099368052e-05, + "loss": 1.1821, + "step": 9144 + }, + { + "epoch": 0.26851253743613834, + "grad_norm": 0.0, + "learning_rate": 1.716192445381005e-05, + "loss": 1.5068, + "step": 9145 + }, + { + "epoch": 0.26854189911327736, + "grad_norm": 0.0, + "learning_rate": 1.7161260743484933e-05, + "loss": 1.4121, + "step": 9146 + }, + { + "epoch": 0.2685712607904163, + "grad_norm": 0.0, + "learning_rate": 1.71605969683987e-05, + "loss": 1.4434, + "step": 9147 + }, + { + "epoch": 0.26860062246755534, + "grad_norm": 0.0, + "learning_rate": 1.7159933128557353e-05, + "loss": 1.4287, + "step": 9148 + }, + { + "epoch": 0.26862998414469436, + "grad_norm": 0.0, + "learning_rate": 1.7159269223966897e-05, + "loss": 1.416, + "step": 9149 + }, + { + "epoch": 0.2686593458218333, + "grad_norm": 0.0, + "learning_rate": 1.715860525463334e-05, + "loss": 1.2695, + "step": 9150 + }, + { + "epoch": 0.26868870749897233, + "grad_norm": 0.0, + "learning_rate": 1.715794122056268e-05, + "loss": 1.3496, + "step": 9151 + }, + { + "epoch": 0.26871806917611135, + "grad_norm": 0.0, + "learning_rate": 1.715727712176092e-05, + "loss": 1.417, + "step": 9152 + }, + { + "epoch": 0.2687474308532503, + "grad_norm": 0.0, + "learning_rate": 1.715661295823408e-05, + "loss": 1.5166, + "step": 9153 + }, + { + "epoch": 0.26877679253038933, + "grad_norm": 0.0, + "learning_rate": 1.7155948729988147e-05, + "loss": 1.3779, + "step": 9154 + }, + { + "epoch": 0.26880615420752835, + "grad_norm": 0.0, + "learning_rate": 1.715528443702914e-05, + "loss": 1.4092, + "step": 9155 + }, + { + "epoch": 0.2688355158846673, + "grad_norm": 0.0, + "learning_rate": 1.7154620079363065e-05, + "loss": 1.3779, + "step": 9156 + }, + { + "epoch": 0.2688648775618063, + "grad_norm": 0.0, + "learning_rate": 1.7153955656995927e-05, + "loss": 1.3379, + "step": 9157 + }, + { + "epoch": 0.26889423923894534, + "grad_norm": 0.0, + "learning_rate": 1.7153291169933735e-05, + "loss": 1.3037, + "step": 9158 + }, + { + "epoch": 0.2689236009160843, + "grad_norm": 0.0, + "learning_rate": 1.71526266181825e-05, + "loss": 1.3984, + "step": 9159 + }, + { + "epoch": 0.2689529625932233, + "grad_norm": 0.0, + "learning_rate": 1.715196200174823e-05, + "loss": 1.4395, + "step": 9160 + }, + { + "epoch": 0.26898232427036234, + "grad_norm": 0.0, + "learning_rate": 1.7151297320636937e-05, + "loss": 1.4824, + "step": 9161 + }, + { + "epoch": 0.2690116859475013, + "grad_norm": 0.0, + "learning_rate": 1.715063257485463e-05, + "loss": 1.1855, + "step": 9162 + }, + { + "epoch": 0.2690410476246403, + "grad_norm": 0.0, + "learning_rate": 1.714996776440732e-05, + "loss": 1.3291, + "step": 9163 + }, + { + "epoch": 0.26907040930177933, + "grad_norm": 0.0, + "learning_rate": 1.7149302889301024e-05, + "loss": 1.4053, + "step": 9164 + }, + { + "epoch": 0.2690997709789183, + "grad_norm": 0.0, + "learning_rate": 1.714863794954175e-05, + "loss": 1.3042, + "step": 9165 + }, + { + "epoch": 0.2691291326560573, + "grad_norm": 0.0, + "learning_rate": 1.714797294513551e-05, + "loss": 1.3516, + "step": 9166 + }, + { + "epoch": 0.26915849433319633, + "grad_norm": 0.0, + "learning_rate": 1.714730787608832e-05, + "loss": 1.3828, + "step": 9167 + }, + { + "epoch": 0.2691878560103353, + "grad_norm": 0.0, + "learning_rate": 1.7146642742406195e-05, + "loss": 1.3564, + "step": 9168 + }, + { + "epoch": 0.2692172176874743, + "grad_norm": 0.0, + "learning_rate": 1.714597754409515e-05, + "loss": 1.3887, + "step": 9169 + }, + { + "epoch": 0.2692465793646133, + "grad_norm": 0.0, + "learning_rate": 1.71453122811612e-05, + "loss": 1.3975, + "step": 9170 + }, + { + "epoch": 0.2692759410417523, + "grad_norm": 0.0, + "learning_rate": 1.714464695361036e-05, + "loss": 1.2832, + "step": 9171 + }, + { + "epoch": 0.2693053027188913, + "grad_norm": 0.0, + "learning_rate": 1.7143981561448652e-05, + "loss": 1.3242, + "step": 9172 + }, + { + "epoch": 0.2693346643960303, + "grad_norm": 0.0, + "learning_rate": 1.7143316104682087e-05, + "loss": 1.3018, + "step": 9173 + }, + { + "epoch": 0.2693640260731693, + "grad_norm": 0.0, + "learning_rate": 1.7142650583316684e-05, + "loss": 1.4346, + "step": 9174 + }, + { + "epoch": 0.2693933877503083, + "grad_norm": 0.0, + "learning_rate": 1.714198499735846e-05, + "loss": 1.2881, + "step": 9175 + }, + { + "epoch": 0.2694227494274473, + "grad_norm": 0.0, + "learning_rate": 1.7141319346813442e-05, + "loss": 1.3398, + "step": 9176 + }, + { + "epoch": 0.2694521111045863, + "grad_norm": 0.0, + "learning_rate": 1.714065363168764e-05, + "loss": 1.2856, + "step": 9177 + }, + { + "epoch": 0.2694814727817253, + "grad_norm": 0.0, + "learning_rate": 1.713998785198708e-05, + "loss": 1.4531, + "step": 9178 + }, + { + "epoch": 0.2695108344588643, + "grad_norm": 0.0, + "learning_rate": 1.713932200771778e-05, + "loss": 1.2588, + "step": 9179 + }, + { + "epoch": 0.2695401961360033, + "grad_norm": 0.0, + "learning_rate": 1.7138656098885766e-05, + "loss": 1.3506, + "step": 9180 + }, + { + "epoch": 0.2695695578131423, + "grad_norm": 0.0, + "learning_rate": 1.7137990125497052e-05, + "loss": 1.5361, + "step": 9181 + }, + { + "epoch": 0.2695989194902813, + "grad_norm": 0.0, + "learning_rate": 1.713732408755767e-05, + "loss": 1.4238, + "step": 9182 + }, + { + "epoch": 0.26962828116742027, + "grad_norm": 0.0, + "learning_rate": 1.7136657985073633e-05, + "loss": 1.4414, + "step": 9183 + }, + { + "epoch": 0.2696576428445593, + "grad_norm": 0.0, + "learning_rate": 1.713599181805097e-05, + "loss": 1.4297, + "step": 9184 + }, + { + "epoch": 0.2696870045216983, + "grad_norm": 0.0, + "learning_rate": 1.713532558649571e-05, + "loss": 1.4092, + "step": 9185 + }, + { + "epoch": 0.26971636619883727, + "grad_norm": 0.0, + "learning_rate": 1.7134659290413874e-05, + "loss": 1.3496, + "step": 9186 + }, + { + "epoch": 0.2697457278759763, + "grad_norm": 0.0, + "learning_rate": 1.7133992929811485e-05, + "loss": 1.3467, + "step": 9187 + }, + { + "epoch": 0.2697750895531153, + "grad_norm": 0.0, + "learning_rate": 1.7133326504694568e-05, + "loss": 1.4092, + "step": 9188 + }, + { + "epoch": 0.26980445123025426, + "grad_norm": 0.0, + "learning_rate": 1.7132660015069157e-05, + "loss": 1.5898, + "step": 9189 + }, + { + "epoch": 0.2698338129073933, + "grad_norm": 0.0, + "learning_rate": 1.713199346094127e-05, + "loss": 1.4932, + "step": 9190 + }, + { + "epoch": 0.2698631745845323, + "grad_norm": 0.0, + "learning_rate": 1.7131326842316947e-05, + "loss": 1.3096, + "step": 9191 + }, + { + "epoch": 0.26989253626167126, + "grad_norm": 0.0, + "learning_rate": 1.7130660159202203e-05, + "loss": 1.4307, + "step": 9192 + }, + { + "epoch": 0.2699218979388103, + "grad_norm": 0.0, + "learning_rate": 1.7129993411603075e-05, + "loss": 1.4111, + "step": 9193 + }, + { + "epoch": 0.2699512596159493, + "grad_norm": 0.0, + "learning_rate": 1.712932659952559e-05, + "loss": 1.3535, + "step": 9194 + }, + { + "epoch": 0.26998062129308825, + "grad_norm": 0.0, + "learning_rate": 1.712865972297578e-05, + "loss": 1.4307, + "step": 9195 + }, + { + "epoch": 0.27000998297022727, + "grad_norm": 0.0, + "learning_rate": 1.7127992781959672e-05, + "loss": 1.292, + "step": 9196 + }, + { + "epoch": 0.27003934464736623, + "grad_norm": 0.0, + "learning_rate": 1.71273257764833e-05, + "loss": 1.4727, + "step": 9197 + }, + { + "epoch": 0.27006870632450525, + "grad_norm": 0.0, + "learning_rate": 1.7126658706552697e-05, + "loss": 1.4727, + "step": 9198 + }, + { + "epoch": 0.27009806800164426, + "grad_norm": 0.0, + "learning_rate": 1.7125991572173894e-05, + "loss": 1.5352, + "step": 9199 + }, + { + "epoch": 0.2701274296787832, + "grad_norm": 0.0, + "learning_rate": 1.7125324373352925e-05, + "loss": 1.3701, + "step": 9200 + }, + { + "epoch": 0.27015679135592224, + "grad_norm": 0.0, + "learning_rate": 1.7124657110095822e-05, + "loss": 1.3877, + "step": 9201 + }, + { + "epoch": 0.27018615303306126, + "grad_norm": 0.0, + "learning_rate": 1.712398978240862e-05, + "loss": 1.3506, + "step": 9202 + }, + { + "epoch": 0.2702155147102002, + "grad_norm": 0.0, + "learning_rate": 1.7123322390297355e-05, + "loss": 1.4922, + "step": 9203 + }, + { + "epoch": 0.27024487638733924, + "grad_norm": 0.0, + "learning_rate": 1.7122654933768056e-05, + "loss": 1.332, + "step": 9204 + }, + { + "epoch": 0.27027423806447826, + "grad_norm": 0.0, + "learning_rate": 1.7121987412826773e-05, + "loss": 1.5439, + "step": 9205 + }, + { + "epoch": 0.2703035997416172, + "grad_norm": 0.0, + "learning_rate": 1.7121319827479527e-05, + "loss": 1.3311, + "step": 9206 + }, + { + "epoch": 0.27033296141875623, + "grad_norm": 0.0, + "learning_rate": 1.7120652177732363e-05, + "loss": 1.3486, + "step": 9207 + }, + { + "epoch": 0.27036232309589525, + "grad_norm": 0.0, + "learning_rate": 1.7119984463591322e-05, + "loss": 1.4219, + "step": 9208 + }, + { + "epoch": 0.2703916847730342, + "grad_norm": 0.0, + "learning_rate": 1.7119316685062434e-05, + "loss": 1.5059, + "step": 9209 + }, + { + "epoch": 0.27042104645017323, + "grad_norm": 0.0, + "learning_rate": 1.7118648842151744e-05, + "loss": 1.3564, + "step": 9210 + }, + { + "epoch": 0.27045040812731225, + "grad_norm": 0.0, + "learning_rate": 1.7117980934865286e-05, + "loss": 1.5117, + "step": 9211 + }, + { + "epoch": 0.2704797698044512, + "grad_norm": 0.0, + "learning_rate": 1.711731296320911e-05, + "loss": 1.5039, + "step": 9212 + }, + { + "epoch": 0.2705091314815902, + "grad_norm": 0.0, + "learning_rate": 1.7116644927189242e-05, + "loss": 1.2949, + "step": 9213 + }, + { + "epoch": 0.27053849315872924, + "grad_norm": 0.0, + "learning_rate": 1.7115976826811738e-05, + "loss": 1.3232, + "step": 9214 + }, + { + "epoch": 0.2705678548358682, + "grad_norm": 0.0, + "learning_rate": 1.7115308662082633e-05, + "loss": 1.4512, + "step": 9215 + }, + { + "epoch": 0.2705972165130072, + "grad_norm": 0.0, + "learning_rate": 1.7114640433007966e-05, + "loss": 1.3447, + "step": 9216 + }, + { + "epoch": 0.27062657819014624, + "grad_norm": 0.0, + "learning_rate": 1.7113972139593783e-05, + "loss": 1.4102, + "step": 9217 + }, + { + "epoch": 0.2706559398672852, + "grad_norm": 0.0, + "learning_rate": 1.7113303781846128e-05, + "loss": 1.3691, + "step": 9218 + }, + { + "epoch": 0.2706853015444242, + "grad_norm": 0.0, + "learning_rate": 1.711263535977105e-05, + "loss": 1.4619, + "step": 9219 + }, + { + "epoch": 0.27071466322156323, + "grad_norm": 0.0, + "learning_rate": 1.7111966873374583e-05, + "loss": 1.4209, + "step": 9220 + }, + { + "epoch": 0.2707440248987022, + "grad_norm": 0.0, + "learning_rate": 1.7111298322662783e-05, + "loss": 1.4619, + "step": 9221 + }, + { + "epoch": 0.2707733865758412, + "grad_norm": 0.0, + "learning_rate": 1.711062970764169e-05, + "loss": 1.3623, + "step": 9222 + }, + { + "epoch": 0.27080274825298023, + "grad_norm": 0.0, + "learning_rate": 1.710996102831735e-05, + "loss": 1.417, + "step": 9223 + }, + { + "epoch": 0.2708321099301192, + "grad_norm": 0.0, + "learning_rate": 1.7109292284695807e-05, + "loss": 1.4502, + "step": 9224 + }, + { + "epoch": 0.2708614716072582, + "grad_norm": 0.0, + "learning_rate": 1.710862347678312e-05, + "loss": 1.4268, + "step": 9225 + }, + { + "epoch": 0.2708908332843972, + "grad_norm": 0.0, + "learning_rate": 1.7107954604585328e-05, + "loss": 1.2778, + "step": 9226 + }, + { + "epoch": 0.2709201949615362, + "grad_norm": 0.0, + "learning_rate": 1.7107285668108483e-05, + "loss": 1.4521, + "step": 9227 + }, + { + "epoch": 0.2709495566386752, + "grad_norm": 0.0, + "learning_rate": 1.710661666735863e-05, + "loss": 1.2378, + "step": 9228 + }, + { + "epoch": 0.2709789183158142, + "grad_norm": 0.0, + "learning_rate": 1.710594760234183e-05, + "loss": 1.4248, + "step": 9229 + }, + { + "epoch": 0.2710082799929532, + "grad_norm": 0.0, + "learning_rate": 1.7105278473064123e-05, + "loss": 1.3613, + "step": 9230 + }, + { + "epoch": 0.2710376416700922, + "grad_norm": 0.0, + "learning_rate": 1.710460927953156e-05, + "loss": 1.3945, + "step": 9231 + }, + { + "epoch": 0.2710670033472312, + "grad_norm": 0.0, + "learning_rate": 1.7103940021750197e-05, + "loss": 1.334, + "step": 9232 + }, + { + "epoch": 0.2710963650243702, + "grad_norm": 0.0, + "learning_rate": 1.7103270699726085e-05, + "loss": 1.4834, + "step": 9233 + }, + { + "epoch": 0.2711257267015092, + "grad_norm": 0.0, + "learning_rate": 1.7102601313465275e-05, + "loss": 1.3711, + "step": 9234 + }, + { + "epoch": 0.2711550883786482, + "grad_norm": 0.0, + "learning_rate": 1.7101931862973827e-05, + "loss": 1.2373, + "step": 9235 + }, + { + "epoch": 0.2711844500557872, + "grad_norm": 0.0, + "learning_rate": 1.7101262348257787e-05, + "loss": 1.3721, + "step": 9236 + }, + { + "epoch": 0.2712138117329262, + "grad_norm": 0.0, + "learning_rate": 1.7100592769323216e-05, + "loss": 1.417, + "step": 9237 + }, + { + "epoch": 0.2712431734100652, + "grad_norm": 0.0, + "learning_rate": 1.709992312617616e-05, + "loss": 1.417, + "step": 9238 + }, + { + "epoch": 0.27127253508720417, + "grad_norm": 0.0, + "learning_rate": 1.7099253418822687e-05, + "loss": 1.498, + "step": 9239 + }, + { + "epoch": 0.2713018967643432, + "grad_norm": 0.0, + "learning_rate": 1.7098583647268845e-05, + "loss": 1.416, + "step": 9240 + }, + { + "epoch": 0.2713312584414822, + "grad_norm": 0.0, + "learning_rate": 1.709791381152069e-05, + "loss": 1.4316, + "step": 9241 + }, + { + "epoch": 0.27136062011862117, + "grad_norm": 0.0, + "learning_rate": 1.709724391158429e-05, + "loss": 1.3984, + "step": 9242 + }, + { + "epoch": 0.2713899817957602, + "grad_norm": 0.0, + "learning_rate": 1.709657394746569e-05, + "loss": 1.4629, + "step": 9243 + }, + { + "epoch": 0.2714193434728992, + "grad_norm": 0.0, + "learning_rate": 1.7095903919170954e-05, + "loss": 1.2886, + "step": 9244 + }, + { + "epoch": 0.27144870515003816, + "grad_norm": 0.0, + "learning_rate": 1.7095233826706144e-05, + "loss": 1.4961, + "step": 9245 + }, + { + "epoch": 0.2714780668271772, + "grad_norm": 0.0, + "learning_rate": 1.7094563670077315e-05, + "loss": 1.54, + "step": 9246 + }, + { + "epoch": 0.27150742850431614, + "grad_norm": 0.0, + "learning_rate": 1.7093893449290534e-05, + "loss": 1.3936, + "step": 9247 + }, + { + "epoch": 0.27153679018145516, + "grad_norm": 0.0, + "learning_rate": 1.7093223164351853e-05, + "loss": 1.4346, + "step": 9248 + }, + { + "epoch": 0.2715661518585942, + "grad_norm": 0.0, + "learning_rate": 1.709255281526734e-05, + "loss": 1.3262, + "step": 9249 + }, + { + "epoch": 0.27159551353573314, + "grad_norm": 0.0, + "learning_rate": 1.7091882402043056e-05, + "loss": 1.4409, + "step": 9250 + }, + { + "epoch": 0.27162487521287215, + "grad_norm": 0.0, + "learning_rate": 1.7091211924685062e-05, + "loss": 1.3682, + "step": 9251 + }, + { + "epoch": 0.27165423689001117, + "grad_norm": 0.0, + "learning_rate": 1.7090541383199424e-05, + "loss": 1.3896, + "step": 9252 + }, + { + "epoch": 0.27168359856715013, + "grad_norm": 0.0, + "learning_rate": 1.7089870777592205e-05, + "loss": 1.4268, + "step": 9253 + }, + { + "epoch": 0.27171296024428915, + "grad_norm": 0.0, + "learning_rate": 1.708920010786947e-05, + "loss": 1.4648, + "step": 9254 + }, + { + "epoch": 0.27174232192142816, + "grad_norm": 0.0, + "learning_rate": 1.7088529374037278e-05, + "loss": 1.4629, + "step": 9255 + }, + { + "epoch": 0.2717716835985671, + "grad_norm": 0.0, + "learning_rate": 1.7087858576101705e-05, + "loss": 1.4023, + "step": 9256 + }, + { + "epoch": 0.27180104527570614, + "grad_norm": 0.0, + "learning_rate": 1.7087187714068804e-05, + "loss": 1.4336, + "step": 9257 + }, + { + "epoch": 0.27183040695284516, + "grad_norm": 0.0, + "learning_rate": 1.7086516787944654e-05, + "loss": 1.5186, + "step": 9258 + }, + { + "epoch": 0.2718597686299841, + "grad_norm": 0.0, + "learning_rate": 1.708584579773532e-05, + "loss": 1.4902, + "step": 9259 + }, + { + "epoch": 0.27188913030712314, + "grad_norm": 0.0, + "learning_rate": 1.7085174743446865e-05, + "loss": 1.3228, + "step": 9260 + }, + { + "epoch": 0.27191849198426216, + "grad_norm": 0.0, + "learning_rate": 1.7084503625085362e-05, + "loss": 1.4463, + "step": 9261 + }, + { + "epoch": 0.2719478536614011, + "grad_norm": 0.0, + "learning_rate": 1.708383244265688e-05, + "loss": 1.4697, + "step": 9262 + }, + { + "epoch": 0.27197721533854013, + "grad_norm": 0.0, + "learning_rate": 1.708316119616748e-05, + "loss": 1.5889, + "step": 9263 + }, + { + "epoch": 0.27200657701567915, + "grad_norm": 0.0, + "learning_rate": 1.7082489885623244e-05, + "loss": 1.4893, + "step": 9264 + }, + { + "epoch": 0.2720359386928181, + "grad_norm": 0.0, + "learning_rate": 1.708181851103024e-05, + "loss": 1.3945, + "step": 9265 + }, + { + "epoch": 0.27206530036995713, + "grad_norm": 0.0, + "learning_rate": 1.7081147072394534e-05, + "loss": 1.4785, + "step": 9266 + }, + { + "epoch": 0.27209466204709615, + "grad_norm": 0.0, + "learning_rate": 1.7080475569722202e-05, + "loss": 1.3955, + "step": 9267 + }, + { + "epoch": 0.2721240237242351, + "grad_norm": 0.0, + "learning_rate": 1.7079804003019316e-05, + "loss": 1.417, + "step": 9268 + }, + { + "epoch": 0.2721533854013741, + "grad_norm": 0.0, + "learning_rate": 1.707913237229195e-05, + "loss": 1.3789, + "step": 9269 + }, + { + "epoch": 0.27218274707851314, + "grad_norm": 0.0, + "learning_rate": 1.707846067754618e-05, + "loss": 1.4082, + "step": 9270 + }, + { + "epoch": 0.2722121087556521, + "grad_norm": 0.0, + "learning_rate": 1.707778891878807e-05, + "loss": 1.4941, + "step": 9271 + }, + { + "epoch": 0.2722414704327911, + "grad_norm": 0.0, + "learning_rate": 1.707711709602371e-05, + "loss": 1.4844, + "step": 9272 + }, + { + "epoch": 0.27227083210993014, + "grad_norm": 0.0, + "learning_rate": 1.7076445209259162e-05, + "loss": 1.4189, + "step": 9273 + }, + { + "epoch": 0.2723001937870691, + "grad_norm": 0.0, + "learning_rate": 1.7075773258500513e-05, + "loss": 1.5693, + "step": 9274 + }, + { + "epoch": 0.2723295554642081, + "grad_norm": 0.0, + "learning_rate": 1.707510124375383e-05, + "loss": 1.46, + "step": 9275 + }, + { + "epoch": 0.27235891714134713, + "grad_norm": 0.0, + "learning_rate": 1.7074429165025196e-05, + "loss": 1.4023, + "step": 9276 + }, + { + "epoch": 0.2723882788184861, + "grad_norm": 0.0, + "learning_rate": 1.7073757022320685e-05, + "loss": 1.3848, + "step": 9277 + }, + { + "epoch": 0.2724176404956251, + "grad_norm": 0.0, + "learning_rate": 1.707308481564638e-05, + "loss": 1.4258, + "step": 9278 + }, + { + "epoch": 0.27244700217276413, + "grad_norm": 0.0, + "learning_rate": 1.7072412545008356e-05, + "loss": 1.376, + "step": 9279 + }, + { + "epoch": 0.2724763638499031, + "grad_norm": 0.0, + "learning_rate": 1.7071740210412695e-05, + "loss": 1.4385, + "step": 9280 + }, + { + "epoch": 0.2725057255270421, + "grad_norm": 0.0, + "learning_rate": 1.7071067811865477e-05, + "loss": 1.3838, + "step": 9281 + }, + { + "epoch": 0.2725350872041811, + "grad_norm": 0.0, + "learning_rate": 1.707039534937278e-05, + "loss": 1.4785, + "step": 9282 + }, + { + "epoch": 0.2725644488813201, + "grad_norm": 0.0, + "learning_rate": 1.7069722822940688e-05, + "loss": 1.4863, + "step": 9283 + }, + { + "epoch": 0.2725938105584591, + "grad_norm": 0.0, + "learning_rate": 1.7069050232575283e-05, + "loss": 1.5479, + "step": 9284 + }, + { + "epoch": 0.2726231722355981, + "grad_norm": 0.0, + "learning_rate": 1.7068377578282645e-05, + "loss": 1.5498, + "step": 9285 + }, + { + "epoch": 0.2726525339127371, + "grad_norm": 0.0, + "learning_rate": 1.7067704860068862e-05, + "loss": 1.4316, + "step": 9286 + }, + { + "epoch": 0.2726818955898761, + "grad_norm": 0.0, + "learning_rate": 1.706703207794001e-05, + "loss": 1.3672, + "step": 9287 + }, + { + "epoch": 0.2727112572670151, + "grad_norm": 0.0, + "learning_rate": 1.7066359231902175e-05, + "loss": 1.417, + "step": 9288 + }, + { + "epoch": 0.2727406189441541, + "grad_norm": 0.0, + "learning_rate": 1.7065686321961448e-05, + "loss": 1.3848, + "step": 9289 + }, + { + "epoch": 0.2727699806212931, + "grad_norm": 0.0, + "learning_rate": 1.7065013348123906e-05, + "loss": 1.3262, + "step": 9290 + }, + { + "epoch": 0.2727993422984321, + "grad_norm": 0.0, + "learning_rate": 1.706434031039564e-05, + "loss": 1.3496, + "step": 9291 + }, + { + "epoch": 0.2728287039755711, + "grad_norm": 0.0, + "learning_rate": 1.706366720878274e-05, + "loss": 1.5312, + "step": 9292 + }, + { + "epoch": 0.2728580656527101, + "grad_norm": 0.0, + "learning_rate": 1.7062994043291284e-05, + "loss": 1.3906, + "step": 9293 + }, + { + "epoch": 0.2728874273298491, + "grad_norm": 0.0, + "learning_rate": 1.7062320813927368e-05, + "loss": 1.4629, + "step": 9294 + }, + { + "epoch": 0.27291678900698807, + "grad_norm": 0.0, + "learning_rate": 1.706164752069707e-05, + "loss": 1.4287, + "step": 9295 + }, + { + "epoch": 0.2729461506841271, + "grad_norm": 0.0, + "learning_rate": 1.706097416360649e-05, + "loss": 1.4736, + "step": 9296 + }, + { + "epoch": 0.27297551236126605, + "grad_norm": 0.0, + "learning_rate": 1.7060300742661713e-05, + "loss": 1.5146, + "step": 9297 + }, + { + "epoch": 0.27300487403840507, + "grad_norm": 0.0, + "learning_rate": 1.7059627257868822e-05, + "loss": 1.418, + "step": 9298 + }, + { + "epoch": 0.2730342357155441, + "grad_norm": 0.0, + "learning_rate": 1.705895370923392e-05, + "loss": 1.3994, + "step": 9299 + }, + { + "epoch": 0.27306359739268304, + "grad_norm": 0.0, + "learning_rate": 1.7058280096763086e-05, + "loss": 1.4043, + "step": 9300 + }, + { + "epoch": 0.27309295906982206, + "grad_norm": 0.0, + "learning_rate": 1.7057606420462422e-05, + "loss": 1.4141, + "step": 9301 + }, + { + "epoch": 0.2731223207469611, + "grad_norm": 0.0, + "learning_rate": 1.7056932680338015e-05, + "loss": 1.3008, + "step": 9302 + }, + { + "epoch": 0.27315168242410004, + "grad_norm": 0.0, + "learning_rate": 1.7056258876395957e-05, + "loss": 1.3662, + "step": 9303 + }, + { + "epoch": 0.27318104410123906, + "grad_norm": 0.0, + "learning_rate": 1.7055585008642347e-05, + "loss": 1.5312, + "step": 9304 + }, + { + "epoch": 0.2732104057783781, + "grad_norm": 0.0, + "learning_rate": 1.705491107708327e-05, + "loss": 1.4209, + "step": 9305 + }, + { + "epoch": 0.27323976745551704, + "grad_norm": 0.0, + "learning_rate": 1.7054237081724827e-05, + "loss": 1.2935, + "step": 9306 + }, + { + "epoch": 0.27326912913265605, + "grad_norm": 0.0, + "learning_rate": 1.705356302257311e-05, + "loss": 1.4297, + "step": 9307 + }, + { + "epoch": 0.27329849080979507, + "grad_norm": 0.0, + "learning_rate": 1.705288889963422e-05, + "loss": 1.3984, + "step": 9308 + }, + { + "epoch": 0.27332785248693403, + "grad_norm": 0.0, + "learning_rate": 1.7052214712914246e-05, + "loss": 1.4492, + "step": 9309 + }, + { + "epoch": 0.27335721416407305, + "grad_norm": 0.0, + "learning_rate": 1.7051540462419288e-05, + "loss": 1.4434, + "step": 9310 + }, + { + "epoch": 0.27338657584121207, + "grad_norm": 0.0, + "learning_rate": 1.7050866148155444e-05, + "loss": 1.4746, + "step": 9311 + }, + { + "epoch": 0.273415937518351, + "grad_norm": 0.0, + "learning_rate": 1.7050191770128814e-05, + "loss": 1.3555, + "step": 9312 + }, + { + "epoch": 0.27344529919549004, + "grad_norm": 0.0, + "learning_rate": 1.704951732834549e-05, + "loss": 1.3408, + "step": 9313 + }, + { + "epoch": 0.27347466087262906, + "grad_norm": 0.0, + "learning_rate": 1.704884282281158e-05, + "loss": 1.2085, + "step": 9314 + }, + { + "epoch": 0.273504022549768, + "grad_norm": 0.0, + "learning_rate": 1.7048168253533176e-05, + "loss": 1.3506, + "step": 9315 + }, + { + "epoch": 0.27353338422690704, + "grad_norm": 0.0, + "learning_rate": 1.7047493620516384e-05, + "loss": 1.3145, + "step": 9316 + }, + { + "epoch": 0.27356274590404606, + "grad_norm": 0.0, + "learning_rate": 1.7046818923767302e-05, + "loss": 1.5752, + "step": 9317 + }, + { + "epoch": 0.273592107581185, + "grad_norm": 0.0, + "learning_rate": 1.704614416329203e-05, + "loss": 1.3623, + "step": 9318 + }, + { + "epoch": 0.27362146925832403, + "grad_norm": 0.0, + "learning_rate": 1.704546933909667e-05, + "loss": 1.376, + "step": 9319 + }, + { + "epoch": 0.27365083093546305, + "grad_norm": 0.0, + "learning_rate": 1.7044794451187332e-05, + "loss": 1.4795, + "step": 9320 + }, + { + "epoch": 0.273680192612602, + "grad_norm": 0.0, + "learning_rate": 1.704411949957011e-05, + "loss": 1.3945, + "step": 9321 + }, + { + "epoch": 0.27370955428974103, + "grad_norm": 0.0, + "learning_rate": 1.704344448425111e-05, + "loss": 1.3672, + "step": 9322 + }, + { + "epoch": 0.27373891596688005, + "grad_norm": 0.0, + "learning_rate": 1.704276940523644e-05, + "loss": 1.2451, + "step": 9323 + }, + { + "epoch": 0.273768277644019, + "grad_norm": 0.0, + "learning_rate": 1.70420942625322e-05, + "loss": 1.5059, + "step": 9324 + }, + { + "epoch": 0.273797639321158, + "grad_norm": 0.0, + "learning_rate": 1.7041419056144503e-05, + "loss": 1.3828, + "step": 9325 + }, + { + "epoch": 0.27382700099829704, + "grad_norm": 0.0, + "learning_rate": 1.704074378607945e-05, + "loss": 1.376, + "step": 9326 + }, + { + "epoch": 0.273856362675436, + "grad_norm": 0.0, + "learning_rate": 1.7040068452343145e-05, + "loss": 1.4395, + "step": 9327 + }, + { + "epoch": 0.273885724352575, + "grad_norm": 0.0, + "learning_rate": 1.7039393054941697e-05, + "loss": 1.332, + "step": 9328 + }, + { + "epoch": 0.27391508602971404, + "grad_norm": 0.0, + "learning_rate": 1.7038717593881213e-05, + "loss": 1.5371, + "step": 9329 + }, + { + "epoch": 0.273944447706853, + "grad_norm": 0.0, + "learning_rate": 1.7038042069167806e-05, + "loss": 1.5039, + "step": 9330 + }, + { + "epoch": 0.273973809383992, + "grad_norm": 0.0, + "learning_rate": 1.703736648080758e-05, + "loss": 1.4561, + "step": 9331 + }, + { + "epoch": 0.27400317106113103, + "grad_norm": 0.0, + "learning_rate": 1.7036690828806653e-05, + "loss": 1.3887, + "step": 9332 + }, + { + "epoch": 0.27403253273827, + "grad_norm": 0.0, + "learning_rate": 1.7036015113171122e-05, + "loss": 1.4609, + "step": 9333 + }, + { + "epoch": 0.274061894415409, + "grad_norm": 0.0, + "learning_rate": 1.703533933390711e-05, + "loss": 1.3574, + "step": 9334 + }, + { + "epoch": 0.27409125609254803, + "grad_norm": 0.0, + "learning_rate": 1.7034663491020717e-05, + "loss": 1.3633, + "step": 9335 + }, + { + "epoch": 0.274120617769687, + "grad_norm": 0.0, + "learning_rate": 1.7033987584518065e-05, + "loss": 1.4863, + "step": 9336 + }, + { + "epoch": 0.274149979446826, + "grad_norm": 0.0, + "learning_rate": 1.703331161440526e-05, + "loss": 1.334, + "step": 9337 + }, + { + "epoch": 0.274179341123965, + "grad_norm": 0.0, + "learning_rate": 1.7032635580688417e-05, + "loss": 1.4141, + "step": 9338 + }, + { + "epoch": 0.274208702801104, + "grad_norm": 0.0, + "learning_rate": 1.703195948337365e-05, + "loss": 1.3086, + "step": 9339 + }, + { + "epoch": 0.274238064478243, + "grad_norm": 0.0, + "learning_rate": 1.7031283322467067e-05, + "loss": 1.3115, + "step": 9340 + }, + { + "epoch": 0.274267426155382, + "grad_norm": 0.0, + "learning_rate": 1.7030607097974795e-05, + "loss": 1.4248, + "step": 9341 + }, + { + "epoch": 0.274296787832521, + "grad_norm": 0.0, + "learning_rate": 1.702993080990294e-05, + "loss": 1.4736, + "step": 9342 + }, + { + "epoch": 0.27432614950966, + "grad_norm": 0.0, + "learning_rate": 1.702925445825762e-05, + "loss": 1.543, + "step": 9343 + }, + { + "epoch": 0.274355511186799, + "grad_norm": 0.0, + "learning_rate": 1.702857804304495e-05, + "loss": 1.3135, + "step": 9344 + }, + { + "epoch": 0.274384872863938, + "grad_norm": 0.0, + "learning_rate": 1.702790156427105e-05, + "loss": 1.4268, + "step": 9345 + }, + { + "epoch": 0.274414234541077, + "grad_norm": 0.0, + "learning_rate": 1.702722502194204e-05, + "loss": 1.3057, + "step": 9346 + }, + { + "epoch": 0.27444359621821596, + "grad_norm": 0.0, + "learning_rate": 1.702654841606403e-05, + "loss": 1.3984, + "step": 9347 + }, + { + "epoch": 0.274472957895355, + "grad_norm": 0.0, + "learning_rate": 1.7025871746643144e-05, + "loss": 1.4863, + "step": 9348 + }, + { + "epoch": 0.274502319572494, + "grad_norm": 0.0, + "learning_rate": 1.7025195013685502e-05, + "loss": 1.2734, + "step": 9349 + }, + { + "epoch": 0.27453168124963295, + "grad_norm": 0.0, + "learning_rate": 1.702451821719722e-05, + "loss": 1.5566, + "step": 9350 + }, + { + "epoch": 0.27456104292677197, + "grad_norm": 0.0, + "learning_rate": 1.7023841357184422e-05, + "loss": 1.3467, + "step": 9351 + }, + { + "epoch": 0.274590404603911, + "grad_norm": 0.0, + "learning_rate": 1.702316443365323e-05, + "loss": 1.46, + "step": 9352 + }, + { + "epoch": 0.27461976628104995, + "grad_norm": 0.0, + "learning_rate": 1.702248744660976e-05, + "loss": 1.4307, + "step": 9353 + }, + { + "epoch": 0.27464912795818897, + "grad_norm": 0.0, + "learning_rate": 1.702181039606014e-05, + "loss": 1.4473, + "step": 9354 + }, + { + "epoch": 0.274678489635328, + "grad_norm": 0.0, + "learning_rate": 1.7021133282010488e-05, + "loss": 1.4619, + "step": 9355 + }, + { + "epoch": 0.27470785131246694, + "grad_norm": 0.0, + "learning_rate": 1.702045610446693e-05, + "loss": 1.4336, + "step": 9356 + }, + { + "epoch": 0.27473721298960596, + "grad_norm": 0.0, + "learning_rate": 1.701977886343559e-05, + "loss": 1.4756, + "step": 9357 + }, + { + "epoch": 0.274766574666745, + "grad_norm": 0.0, + "learning_rate": 1.7019101558922592e-05, + "loss": 1.4346, + "step": 9358 + }, + { + "epoch": 0.27479593634388394, + "grad_norm": 0.0, + "learning_rate": 1.7018424190934062e-05, + "loss": 1.501, + "step": 9359 + }, + { + "epoch": 0.27482529802102296, + "grad_norm": 0.0, + "learning_rate": 1.7017746759476124e-05, + "loss": 1.4072, + "step": 9360 + }, + { + "epoch": 0.274854659698162, + "grad_norm": 0.0, + "learning_rate": 1.7017069264554904e-05, + "loss": 1.4502, + "step": 9361 + }, + { + "epoch": 0.27488402137530094, + "grad_norm": 0.0, + "learning_rate": 1.701639170617653e-05, + "loss": 1.3564, + "step": 9362 + }, + { + "epoch": 0.27491338305243995, + "grad_norm": 0.0, + "learning_rate": 1.701571408434713e-05, + "loss": 1.4717, + "step": 9363 + }, + { + "epoch": 0.27494274472957897, + "grad_norm": 0.0, + "learning_rate": 1.7015036399072825e-05, + "loss": 1.4326, + "step": 9364 + }, + { + "epoch": 0.27497210640671793, + "grad_norm": 0.0, + "learning_rate": 1.701435865035976e-05, + "loss": 1.3418, + "step": 9365 + }, + { + "epoch": 0.27500146808385695, + "grad_norm": 0.0, + "learning_rate": 1.7013680838214045e-05, + "loss": 1.4453, + "step": 9366 + }, + { + "epoch": 0.27503082976099597, + "grad_norm": 0.0, + "learning_rate": 1.701300296264182e-05, + "loss": 1.3535, + "step": 9367 + }, + { + "epoch": 0.2750601914381349, + "grad_norm": 0.0, + "learning_rate": 1.7012325023649212e-05, + "loss": 1.4404, + "step": 9368 + }, + { + "epoch": 0.27508955311527394, + "grad_norm": 0.0, + "learning_rate": 1.7011647021242353e-05, + "loss": 1.2554, + "step": 9369 + }, + { + "epoch": 0.27511891479241296, + "grad_norm": 0.0, + "learning_rate": 1.7010968955427377e-05, + "loss": 1.3408, + "step": 9370 + }, + { + "epoch": 0.2751482764695519, + "grad_norm": 0.0, + "learning_rate": 1.7010290826210412e-05, + "loss": 1.3965, + "step": 9371 + }, + { + "epoch": 0.27517763814669094, + "grad_norm": 0.0, + "learning_rate": 1.7009612633597592e-05, + "loss": 1.3779, + "step": 9372 + }, + { + "epoch": 0.27520699982382996, + "grad_norm": 0.0, + "learning_rate": 1.700893437759505e-05, + "loss": 1.3633, + "step": 9373 + }, + { + "epoch": 0.2752363615009689, + "grad_norm": 0.0, + "learning_rate": 1.7008256058208918e-05, + "loss": 1.4473, + "step": 9374 + }, + { + "epoch": 0.27526572317810793, + "grad_norm": 0.0, + "learning_rate": 1.7007577675445333e-05, + "loss": 1.585, + "step": 9375 + }, + { + "epoch": 0.27529508485524695, + "grad_norm": 0.0, + "learning_rate": 1.7006899229310425e-05, + "loss": 1.332, + "step": 9376 + }, + { + "epoch": 0.2753244465323859, + "grad_norm": 0.0, + "learning_rate": 1.7006220719810338e-05, + "loss": 1.4014, + "step": 9377 + }, + { + "epoch": 0.27535380820952493, + "grad_norm": 0.0, + "learning_rate": 1.70055421469512e-05, + "loss": 1.3721, + "step": 9378 + }, + { + "epoch": 0.27538316988666395, + "grad_norm": 0.0, + "learning_rate": 1.700486351073915e-05, + "loss": 1.4355, + "step": 9379 + }, + { + "epoch": 0.2754125315638029, + "grad_norm": 0.0, + "learning_rate": 1.7004184811180325e-05, + "loss": 1.3594, + "step": 9380 + }, + { + "epoch": 0.2754418932409419, + "grad_norm": 0.0, + "learning_rate": 1.700350604828086e-05, + "loss": 1.4717, + "step": 9381 + }, + { + "epoch": 0.27547125491808094, + "grad_norm": 0.0, + "learning_rate": 1.7002827222046902e-05, + "loss": 1.4697, + "step": 9382 + }, + { + "epoch": 0.2755006165952199, + "grad_norm": 0.0, + "learning_rate": 1.700214833248458e-05, + "loss": 1.4326, + "step": 9383 + }, + { + "epoch": 0.2755299782723589, + "grad_norm": 0.0, + "learning_rate": 1.7001469379600043e-05, + "loss": 1.3408, + "step": 9384 + }, + { + "epoch": 0.27555933994949794, + "grad_norm": 0.0, + "learning_rate": 1.700079036339942e-05, + "loss": 1.4404, + "step": 9385 + }, + { + "epoch": 0.2755887016266369, + "grad_norm": 0.0, + "learning_rate": 1.700011128388886e-05, + "loss": 1.3262, + "step": 9386 + }, + { + "epoch": 0.2756180633037759, + "grad_norm": 0.0, + "learning_rate": 1.69994321410745e-05, + "loss": 1.417, + "step": 9387 + }, + { + "epoch": 0.27564742498091493, + "grad_norm": 0.0, + "learning_rate": 1.6998752934962478e-05, + "loss": 1.4541, + "step": 9388 + }, + { + "epoch": 0.2756767866580539, + "grad_norm": 0.0, + "learning_rate": 1.6998073665558946e-05, + "loss": 1.4707, + "step": 9389 + }, + { + "epoch": 0.2757061483351929, + "grad_norm": 0.0, + "learning_rate": 1.6997394332870038e-05, + "loss": 1.4688, + "step": 9390 + }, + { + "epoch": 0.27573551001233193, + "grad_norm": 0.0, + "learning_rate": 1.6996714936901907e-05, + "loss": 1.3516, + "step": 9391 + }, + { + "epoch": 0.2757648716894709, + "grad_norm": 0.0, + "learning_rate": 1.6996035477660683e-05, + "loss": 1.4492, + "step": 9392 + }, + { + "epoch": 0.2757942333666099, + "grad_norm": 0.0, + "learning_rate": 1.6995355955152522e-05, + "loss": 1.3389, + "step": 9393 + }, + { + "epoch": 0.2758235950437489, + "grad_norm": 0.0, + "learning_rate": 1.6994676369383566e-05, + "loss": 1.4678, + "step": 9394 + }, + { + "epoch": 0.2758529567208879, + "grad_norm": 0.0, + "learning_rate": 1.6993996720359962e-05, + "loss": 1.3877, + "step": 9395 + }, + { + "epoch": 0.2758823183980269, + "grad_norm": 0.0, + "learning_rate": 1.6993317008087852e-05, + "loss": 1.415, + "step": 9396 + }, + { + "epoch": 0.27591168007516587, + "grad_norm": 0.0, + "learning_rate": 1.699263723257339e-05, + "loss": 1.4141, + "step": 9397 + }, + { + "epoch": 0.2759410417523049, + "grad_norm": 0.0, + "learning_rate": 1.6991957393822712e-05, + "loss": 1.3604, + "step": 9398 + }, + { + "epoch": 0.2759704034294439, + "grad_norm": 0.0, + "learning_rate": 1.6991277491841975e-05, + "loss": 1.3643, + "step": 9399 + }, + { + "epoch": 0.27599976510658286, + "grad_norm": 0.0, + "learning_rate": 1.6990597526637326e-05, + "loss": 1.4121, + "step": 9400 + }, + { + "epoch": 0.2760291267837219, + "grad_norm": 0.0, + "learning_rate": 1.698991749821491e-05, + "loss": 1.3174, + "step": 9401 + }, + { + "epoch": 0.2760584884608609, + "grad_norm": 0.0, + "learning_rate": 1.6989237406580884e-05, + "loss": 1.3809, + "step": 9402 + }, + { + "epoch": 0.27608785013799986, + "grad_norm": 0.0, + "learning_rate": 1.6988557251741395e-05, + "loss": 1.2871, + "step": 9403 + }, + { + "epoch": 0.2761172118151389, + "grad_norm": 0.0, + "learning_rate": 1.6987877033702588e-05, + "loss": 1.5215, + "step": 9404 + }, + { + "epoch": 0.2761465734922779, + "grad_norm": 0.0, + "learning_rate": 1.6987196752470624e-05, + "loss": 1.3574, + "step": 9405 + }, + { + "epoch": 0.27617593516941685, + "grad_norm": 0.0, + "learning_rate": 1.698651640805165e-05, + "loss": 1.3857, + "step": 9406 + }, + { + "epoch": 0.27620529684655587, + "grad_norm": 0.0, + "learning_rate": 1.6985836000451815e-05, + "loss": 1.3672, + "step": 9407 + }, + { + "epoch": 0.2762346585236949, + "grad_norm": 0.0, + "learning_rate": 1.6985155529677276e-05, + "loss": 1.3438, + "step": 9408 + }, + { + "epoch": 0.27626402020083385, + "grad_norm": 0.0, + "learning_rate": 1.698447499573419e-05, + "loss": 1.4287, + "step": 9409 + }, + { + "epoch": 0.27629338187797287, + "grad_norm": 0.0, + "learning_rate": 1.698379439862871e-05, + "loss": 1.6006, + "step": 9410 + }, + { + "epoch": 0.2763227435551119, + "grad_norm": 0.0, + "learning_rate": 1.6983113738366984e-05, + "loss": 1.3975, + "step": 9411 + }, + { + "epoch": 0.27635210523225084, + "grad_norm": 0.0, + "learning_rate": 1.6982433014955172e-05, + "loss": 1.3975, + "step": 9412 + }, + { + "epoch": 0.27638146690938986, + "grad_norm": 0.0, + "learning_rate": 1.698175222839943e-05, + "loss": 1.4482, + "step": 9413 + }, + { + "epoch": 0.2764108285865289, + "grad_norm": 0.0, + "learning_rate": 1.6981071378705916e-05, + "loss": 1.251, + "step": 9414 + }, + { + "epoch": 0.27644019026366784, + "grad_norm": 0.0, + "learning_rate": 1.698039046588079e-05, + "loss": 1.5645, + "step": 9415 + }, + { + "epoch": 0.27646955194080686, + "grad_norm": 0.0, + "learning_rate": 1.69797094899302e-05, + "loss": 1.4893, + "step": 9416 + }, + { + "epoch": 0.2764989136179459, + "grad_norm": 0.0, + "learning_rate": 1.6979028450860308e-05, + "loss": 1.4258, + "step": 9417 + }, + { + "epoch": 0.27652827529508484, + "grad_norm": 0.0, + "learning_rate": 1.697834734867728e-05, + "loss": 1.4502, + "step": 9418 + }, + { + "epoch": 0.27655763697222385, + "grad_norm": 0.0, + "learning_rate": 1.6977666183387263e-05, + "loss": 1.4277, + "step": 9419 + }, + { + "epoch": 0.27658699864936287, + "grad_norm": 0.0, + "learning_rate": 1.6976984954996427e-05, + "loss": 1.2949, + "step": 9420 + }, + { + "epoch": 0.27661636032650183, + "grad_norm": 0.0, + "learning_rate": 1.697630366351093e-05, + "loss": 1.4492, + "step": 9421 + }, + { + "epoch": 0.27664572200364085, + "grad_norm": 0.0, + "learning_rate": 1.6975622308936932e-05, + "loss": 1.4531, + "step": 9422 + }, + { + "epoch": 0.27667508368077987, + "grad_norm": 0.0, + "learning_rate": 1.6974940891280595e-05, + "loss": 1.4248, + "step": 9423 + }, + { + "epoch": 0.2767044453579188, + "grad_norm": 0.0, + "learning_rate": 1.6974259410548083e-05, + "loss": 1.4297, + "step": 9424 + }, + { + "epoch": 0.27673380703505784, + "grad_norm": 0.0, + "learning_rate": 1.6973577866745555e-05, + "loss": 1.415, + "step": 9425 + }, + { + "epoch": 0.27676316871219686, + "grad_norm": 0.0, + "learning_rate": 1.697289625987918e-05, + "loss": 1.4609, + "step": 9426 + }, + { + "epoch": 0.2767925303893358, + "grad_norm": 0.0, + "learning_rate": 1.697221458995511e-05, + "loss": 1.4668, + "step": 9427 + }, + { + "epoch": 0.27682189206647484, + "grad_norm": 0.0, + "learning_rate": 1.6971532856979522e-05, + "loss": 1.4277, + "step": 9428 + }, + { + "epoch": 0.27685125374361386, + "grad_norm": 0.0, + "learning_rate": 1.697085106095858e-05, + "loss": 1.4141, + "step": 9429 + }, + { + "epoch": 0.2768806154207528, + "grad_norm": 0.0, + "learning_rate": 1.6970169201898442e-05, + "loss": 1.4902, + "step": 9430 + }, + { + "epoch": 0.27690997709789184, + "grad_norm": 0.0, + "learning_rate": 1.696948727980528e-05, + "loss": 1.3496, + "step": 9431 + }, + { + "epoch": 0.27693933877503085, + "grad_norm": 0.0, + "learning_rate": 1.6968805294685262e-05, + "loss": 1.4238, + "step": 9432 + }, + { + "epoch": 0.2769687004521698, + "grad_norm": 0.0, + "learning_rate": 1.696812324654455e-05, + "loss": 1.2773, + "step": 9433 + }, + { + "epoch": 0.27699806212930883, + "grad_norm": 0.0, + "learning_rate": 1.696744113538932e-05, + "loss": 1.3838, + "step": 9434 + }, + { + "epoch": 0.27702742380644785, + "grad_norm": 0.0, + "learning_rate": 1.696675896122573e-05, + "loss": 1.3145, + "step": 9435 + }, + { + "epoch": 0.2770567854835868, + "grad_norm": 0.0, + "learning_rate": 1.6966076724059956e-05, + "loss": 1.293, + "step": 9436 + }, + { + "epoch": 0.2770861471607258, + "grad_norm": 0.0, + "learning_rate": 1.6965394423898166e-05, + "loss": 1.3877, + "step": 9437 + }, + { + "epoch": 0.27711550883786484, + "grad_norm": 0.0, + "learning_rate": 1.6964712060746533e-05, + "loss": 1.2979, + "step": 9438 + }, + { + "epoch": 0.2771448705150038, + "grad_norm": 0.0, + "learning_rate": 1.696402963461122e-05, + "loss": 1.4844, + "step": 9439 + }, + { + "epoch": 0.2771742321921428, + "grad_norm": 0.0, + "learning_rate": 1.696334714549841e-05, + "loss": 1.2656, + "step": 9440 + }, + { + "epoch": 0.27720359386928184, + "grad_norm": 0.0, + "learning_rate": 1.6962664593414265e-05, + "loss": 1.4834, + "step": 9441 + }, + { + "epoch": 0.2772329555464208, + "grad_norm": 0.0, + "learning_rate": 1.696198197836496e-05, + "loss": 1.4209, + "step": 9442 + }, + { + "epoch": 0.2772623172235598, + "grad_norm": 0.0, + "learning_rate": 1.6961299300356667e-05, + "loss": 1.4033, + "step": 9443 + }, + { + "epoch": 0.27729167890069883, + "grad_norm": 0.0, + "learning_rate": 1.696061655939557e-05, + "loss": 1.333, + "step": 9444 + }, + { + "epoch": 0.2773210405778378, + "grad_norm": 0.0, + "learning_rate": 1.695993375548783e-05, + "loss": 1.3701, + "step": 9445 + }, + { + "epoch": 0.2773504022549768, + "grad_norm": 0.0, + "learning_rate": 1.695925088863962e-05, + "loss": 1.4785, + "step": 9446 + }, + { + "epoch": 0.2773797639321158, + "grad_norm": 0.0, + "learning_rate": 1.695856795885713e-05, + "loss": 1.5107, + "step": 9447 + }, + { + "epoch": 0.2774091256092548, + "grad_norm": 0.0, + "learning_rate": 1.6957884966146524e-05, + "loss": 1.4414, + "step": 9448 + }, + { + "epoch": 0.2774384872863938, + "grad_norm": 0.0, + "learning_rate": 1.6957201910513984e-05, + "loss": 1.4414, + "step": 9449 + }, + { + "epoch": 0.27746784896353277, + "grad_norm": 0.0, + "learning_rate": 1.6956518791965687e-05, + "loss": 1.3252, + "step": 9450 + }, + { + "epoch": 0.2774972106406718, + "grad_norm": 0.0, + "learning_rate": 1.6955835610507806e-05, + "loss": 1.4043, + "step": 9451 + }, + { + "epoch": 0.2775265723178108, + "grad_norm": 0.0, + "learning_rate": 1.6955152366146524e-05, + "loss": 1.3291, + "step": 9452 + }, + { + "epoch": 0.27755593399494977, + "grad_norm": 0.0, + "learning_rate": 1.6954469058888018e-05, + "loss": 1.3486, + "step": 9453 + }, + { + "epoch": 0.2775852956720888, + "grad_norm": 0.0, + "learning_rate": 1.6953785688738467e-05, + "loss": 1.3486, + "step": 9454 + }, + { + "epoch": 0.2776146573492278, + "grad_norm": 0.0, + "learning_rate": 1.6953102255704052e-05, + "loss": 1.46, + "step": 9455 + }, + { + "epoch": 0.27764401902636676, + "grad_norm": 0.0, + "learning_rate": 1.695241875979095e-05, + "loss": 1.374, + "step": 9456 + }, + { + "epoch": 0.2776733807035058, + "grad_norm": 0.0, + "learning_rate": 1.6951735201005345e-05, + "loss": 1.3604, + "step": 9457 + }, + { + "epoch": 0.2777027423806448, + "grad_norm": 0.0, + "learning_rate": 1.695105157935342e-05, + "loss": 1.3174, + "step": 9458 + }, + { + "epoch": 0.27773210405778376, + "grad_norm": 0.0, + "learning_rate": 1.6950367894841356e-05, + "loss": 1.4648, + "step": 9459 + }, + { + "epoch": 0.2777614657349228, + "grad_norm": 0.0, + "learning_rate": 1.6949684147475332e-05, + "loss": 1.4248, + "step": 9460 + }, + { + "epoch": 0.2777908274120618, + "grad_norm": 0.0, + "learning_rate": 1.6949000337261536e-05, + "loss": 1.3193, + "step": 9461 + }, + { + "epoch": 0.27782018908920075, + "grad_norm": 0.0, + "learning_rate": 1.6948316464206153e-05, + "loss": 1.4023, + "step": 9462 + }, + { + "epoch": 0.27784955076633977, + "grad_norm": 0.0, + "learning_rate": 1.694763252831536e-05, + "loss": 1.3623, + "step": 9463 + }, + { + "epoch": 0.2778789124434788, + "grad_norm": 0.0, + "learning_rate": 1.694694852959535e-05, + "loss": 1.2764, + "step": 9464 + }, + { + "epoch": 0.27790827412061775, + "grad_norm": 0.0, + "learning_rate": 1.6946264468052303e-05, + "loss": 1.3105, + "step": 9465 + }, + { + "epoch": 0.27793763579775677, + "grad_norm": 0.0, + "learning_rate": 1.694558034369241e-05, + "loss": 1.3486, + "step": 9466 + }, + { + "epoch": 0.2779669974748958, + "grad_norm": 0.0, + "learning_rate": 1.6944896156521853e-05, + "loss": 1.418, + "step": 9467 + }, + { + "epoch": 0.27799635915203474, + "grad_norm": 0.0, + "learning_rate": 1.694421190654682e-05, + "loss": 1.2627, + "step": 9468 + }, + { + "epoch": 0.27802572082917376, + "grad_norm": 0.0, + "learning_rate": 1.6943527593773503e-05, + "loss": 1.3623, + "step": 9469 + }, + { + "epoch": 0.2780550825063128, + "grad_norm": 0.0, + "learning_rate": 1.694284321820809e-05, + "loss": 1.3633, + "step": 9470 + }, + { + "epoch": 0.27808444418345174, + "grad_norm": 0.0, + "learning_rate": 1.694215877985676e-05, + "loss": 1.3164, + "step": 9471 + }, + { + "epoch": 0.27811380586059076, + "grad_norm": 0.0, + "learning_rate": 1.6941474278725715e-05, + "loss": 1.4121, + "step": 9472 + }, + { + "epoch": 0.2781431675377298, + "grad_norm": 0.0, + "learning_rate": 1.6940789714821142e-05, + "loss": 1.4834, + "step": 9473 + }, + { + "epoch": 0.27817252921486874, + "grad_norm": 0.0, + "learning_rate": 1.694010508814923e-05, + "loss": 1.3486, + "step": 9474 + }, + { + "epoch": 0.27820189089200775, + "grad_norm": 0.0, + "learning_rate": 1.693942039871617e-05, + "loss": 1.4375, + "step": 9475 + }, + { + "epoch": 0.27823125256914677, + "grad_norm": 0.0, + "learning_rate": 1.693873564652815e-05, + "loss": 1.4121, + "step": 9476 + }, + { + "epoch": 0.27826061424628573, + "grad_norm": 0.0, + "learning_rate": 1.693805083159137e-05, + "loss": 1.4766, + "step": 9477 + }, + { + "epoch": 0.27828997592342475, + "grad_norm": 0.0, + "learning_rate": 1.6937365953912018e-05, + "loss": 1.5264, + "step": 9478 + }, + { + "epoch": 0.27831933760056377, + "grad_norm": 0.0, + "learning_rate": 1.693668101349629e-05, + "loss": 1.3945, + "step": 9479 + }, + { + "epoch": 0.2783486992777027, + "grad_norm": 0.0, + "learning_rate": 1.693599601035038e-05, + "loss": 1.3477, + "step": 9480 + }, + { + "epoch": 0.27837806095484174, + "grad_norm": 0.0, + "learning_rate": 1.6935310944480477e-05, + "loss": 1.4395, + "step": 9481 + }, + { + "epoch": 0.27840742263198076, + "grad_norm": 0.0, + "learning_rate": 1.6934625815892788e-05, + "loss": 1.4248, + "step": 9482 + }, + { + "epoch": 0.2784367843091197, + "grad_norm": 0.0, + "learning_rate": 1.69339406245935e-05, + "loss": 1.5117, + "step": 9483 + }, + { + "epoch": 0.27846614598625874, + "grad_norm": 0.0, + "learning_rate": 1.693325537058881e-05, + "loss": 1.3936, + "step": 9484 + }, + { + "epoch": 0.27849550766339776, + "grad_norm": 0.0, + "learning_rate": 1.6932570053884915e-05, + "loss": 1.4756, + "step": 9485 + }, + { + "epoch": 0.2785248693405367, + "grad_norm": 0.0, + "learning_rate": 1.6931884674488012e-05, + "loss": 1.5342, + "step": 9486 + }, + { + "epoch": 0.27855423101767574, + "grad_norm": 0.0, + "learning_rate": 1.6931199232404304e-05, + "loss": 1.3301, + "step": 9487 + }, + { + "epoch": 0.27858359269481475, + "grad_norm": 0.0, + "learning_rate": 1.6930513727639986e-05, + "loss": 1.3594, + "step": 9488 + }, + { + "epoch": 0.2786129543719537, + "grad_norm": 0.0, + "learning_rate": 1.6929828160201257e-05, + "loss": 1.3789, + "step": 9489 + }, + { + "epoch": 0.27864231604909273, + "grad_norm": 0.0, + "learning_rate": 1.6929142530094315e-05, + "loss": 1.4873, + "step": 9490 + }, + { + "epoch": 0.27867167772623175, + "grad_norm": 0.0, + "learning_rate": 1.6928456837325362e-05, + "loss": 1.3506, + "step": 9491 + }, + { + "epoch": 0.2787010394033707, + "grad_norm": 0.0, + "learning_rate": 1.69277710819006e-05, + "loss": 1.4258, + "step": 9492 + }, + { + "epoch": 0.2787304010805097, + "grad_norm": 0.0, + "learning_rate": 1.6927085263826235e-05, + "loss": 1.3408, + "step": 9493 + }, + { + "epoch": 0.27875976275764874, + "grad_norm": 0.0, + "learning_rate": 1.692639938310846e-05, + "loss": 1.3613, + "step": 9494 + }, + { + "epoch": 0.2787891244347877, + "grad_norm": 0.0, + "learning_rate": 1.692571343975348e-05, + "loss": 1.3994, + "step": 9495 + }, + { + "epoch": 0.2788184861119267, + "grad_norm": 0.0, + "learning_rate": 1.6925027433767502e-05, + "loss": 1.4375, + "step": 9496 + }, + { + "epoch": 0.2788478477890657, + "grad_norm": 0.0, + "learning_rate": 1.6924341365156726e-05, + "loss": 1.4043, + "step": 9497 + }, + { + "epoch": 0.2788772094662047, + "grad_norm": 0.0, + "learning_rate": 1.692365523392736e-05, + "loss": 1.3887, + "step": 9498 + }, + { + "epoch": 0.2789065711433437, + "grad_norm": 0.0, + "learning_rate": 1.6922969040085603e-05, + "loss": 1.4277, + "step": 9499 + }, + { + "epoch": 0.2789359328204827, + "grad_norm": 0.0, + "learning_rate": 1.692228278363767e-05, + "loss": 1.3623, + "step": 9500 + }, + { + "epoch": 0.2789652944976217, + "grad_norm": 0.0, + "learning_rate": 1.6921596464589755e-05, + "loss": 1.4707, + "step": 9501 + }, + { + "epoch": 0.2789946561747607, + "grad_norm": 0.0, + "learning_rate": 1.6920910082948073e-05, + "loss": 1.4707, + "step": 9502 + }, + { + "epoch": 0.2790240178518997, + "grad_norm": 0.0, + "learning_rate": 1.692022363871883e-05, + "loss": 1.4248, + "step": 9503 + }, + { + "epoch": 0.2790533795290387, + "grad_norm": 0.0, + "learning_rate": 1.6919537131908228e-05, + "loss": 1.4609, + "step": 9504 + }, + { + "epoch": 0.2790827412061777, + "grad_norm": 0.0, + "learning_rate": 1.6918850562522483e-05, + "loss": 1.2891, + "step": 9505 + }, + { + "epoch": 0.27911210288331667, + "grad_norm": 0.0, + "learning_rate": 1.69181639305678e-05, + "loss": 1.4336, + "step": 9506 + }, + { + "epoch": 0.2791414645604557, + "grad_norm": 0.0, + "learning_rate": 1.691747723605039e-05, + "loss": 1.4629, + "step": 9507 + }, + { + "epoch": 0.2791708262375947, + "grad_norm": 0.0, + "learning_rate": 1.6916790478976457e-05, + "loss": 1.4268, + "step": 9508 + }, + { + "epoch": 0.27920018791473367, + "grad_norm": 0.0, + "learning_rate": 1.6916103659352222e-05, + "loss": 1.4443, + "step": 9509 + }, + { + "epoch": 0.2792295495918727, + "grad_norm": 0.0, + "learning_rate": 1.6915416777183888e-05, + "loss": 1.3682, + "step": 9510 + }, + { + "epoch": 0.2792589112690117, + "grad_norm": 0.0, + "learning_rate": 1.6914729832477667e-05, + "loss": 1.4316, + "step": 9511 + }, + { + "epoch": 0.27928827294615066, + "grad_norm": 0.0, + "learning_rate": 1.6914042825239775e-05, + "loss": 1.3262, + "step": 9512 + }, + { + "epoch": 0.2793176346232897, + "grad_norm": 0.0, + "learning_rate": 1.6913355755476424e-05, + "loss": 1.4531, + "step": 9513 + }, + { + "epoch": 0.2793469963004287, + "grad_norm": 0.0, + "learning_rate": 1.6912668623193826e-05, + "loss": 1.3477, + "step": 9514 + }, + { + "epoch": 0.27937635797756766, + "grad_norm": 0.0, + "learning_rate": 1.6911981428398194e-05, + "loss": 1.3394, + "step": 9515 + }, + { + "epoch": 0.2794057196547067, + "grad_norm": 0.0, + "learning_rate": 1.6911294171095745e-05, + "loss": 1.5137, + "step": 9516 + }, + { + "epoch": 0.2794350813318457, + "grad_norm": 0.0, + "learning_rate": 1.6910606851292693e-05, + "loss": 1.4658, + "step": 9517 + }, + { + "epoch": 0.27946444300898465, + "grad_norm": 0.0, + "learning_rate": 1.6909919468995252e-05, + "loss": 1.4209, + "step": 9518 + }, + { + "epoch": 0.27949380468612367, + "grad_norm": 0.0, + "learning_rate": 1.690923202420964e-05, + "loss": 1.5088, + "step": 9519 + }, + { + "epoch": 0.2795231663632627, + "grad_norm": 0.0, + "learning_rate": 1.6908544516942073e-05, + "loss": 1.4092, + "step": 9520 + }, + { + "epoch": 0.27955252804040165, + "grad_norm": 0.0, + "learning_rate": 1.6907856947198768e-05, + "loss": 1.4629, + "step": 9521 + }, + { + "epoch": 0.27958188971754067, + "grad_norm": 0.0, + "learning_rate": 1.6907169314985944e-05, + "loss": 1.4346, + "step": 9522 + }, + { + "epoch": 0.2796112513946797, + "grad_norm": 0.0, + "learning_rate": 1.690648162030982e-05, + "loss": 1.5215, + "step": 9523 + }, + { + "epoch": 0.27964061307181864, + "grad_norm": 0.0, + "learning_rate": 1.6905793863176617e-05, + "loss": 1.4473, + "step": 9524 + }, + { + "epoch": 0.27966997474895766, + "grad_norm": 0.0, + "learning_rate": 1.6905106043592545e-05, + "loss": 1.4092, + "step": 9525 + }, + { + "epoch": 0.2796993364260967, + "grad_norm": 0.0, + "learning_rate": 1.6904418161563833e-05, + "loss": 1.252, + "step": 9526 + }, + { + "epoch": 0.27972869810323564, + "grad_norm": 0.0, + "learning_rate": 1.69037302170967e-05, + "loss": 1.4014, + "step": 9527 + }, + { + "epoch": 0.27975805978037466, + "grad_norm": 0.0, + "learning_rate": 1.6903042210197367e-05, + "loss": 1.4678, + "step": 9528 + }, + { + "epoch": 0.2797874214575137, + "grad_norm": 0.0, + "learning_rate": 1.6902354140872055e-05, + "loss": 1.4941, + "step": 9529 + }, + { + "epoch": 0.27981678313465264, + "grad_norm": 0.0, + "learning_rate": 1.6901666009126988e-05, + "loss": 1.3926, + "step": 9530 + }, + { + "epoch": 0.27984614481179165, + "grad_norm": 0.0, + "learning_rate": 1.6900977814968386e-05, + "loss": 1.3774, + "step": 9531 + }, + { + "epoch": 0.27987550648893067, + "grad_norm": 0.0, + "learning_rate": 1.6900289558402474e-05, + "loss": 1.4209, + "step": 9532 + }, + { + "epoch": 0.27990486816606963, + "grad_norm": 0.0, + "learning_rate": 1.6899601239435478e-05, + "loss": 1.4512, + "step": 9533 + }, + { + "epoch": 0.27993422984320865, + "grad_norm": 0.0, + "learning_rate": 1.6898912858073623e-05, + "loss": 1.3076, + "step": 9534 + }, + { + "epoch": 0.27996359152034767, + "grad_norm": 0.0, + "learning_rate": 1.6898224414323126e-05, + "loss": 1.4971, + "step": 9535 + }, + { + "epoch": 0.2799929531974866, + "grad_norm": 0.0, + "learning_rate": 1.6897535908190225e-05, + "loss": 1.3477, + "step": 9536 + }, + { + "epoch": 0.28002231487462564, + "grad_norm": 0.0, + "learning_rate": 1.6896847339681136e-05, + "loss": 1.4814, + "step": 9537 + }, + { + "epoch": 0.28005167655176466, + "grad_norm": 0.0, + "learning_rate": 1.689615870880209e-05, + "loss": 1.4062, + "step": 9538 + }, + { + "epoch": 0.2800810382289036, + "grad_norm": 0.0, + "learning_rate": 1.6895470015559317e-05, + "loss": 1.3647, + "step": 9539 + }, + { + "epoch": 0.28011039990604264, + "grad_norm": 0.0, + "learning_rate": 1.6894781259959044e-05, + "loss": 1.6514, + "step": 9540 + }, + { + "epoch": 0.28013976158318166, + "grad_norm": 0.0, + "learning_rate": 1.6894092442007494e-05, + "loss": 1.374, + "step": 9541 + }, + { + "epoch": 0.2801691232603206, + "grad_norm": 0.0, + "learning_rate": 1.6893403561710903e-05, + "loss": 1.4619, + "step": 9542 + }, + { + "epoch": 0.28019848493745964, + "grad_norm": 0.0, + "learning_rate": 1.68927146190755e-05, + "loss": 1.3037, + "step": 9543 + }, + { + "epoch": 0.28022784661459865, + "grad_norm": 0.0, + "learning_rate": 1.689202561410751e-05, + "loss": 1.4043, + "step": 9544 + }, + { + "epoch": 0.2802572082917376, + "grad_norm": 0.0, + "learning_rate": 1.689133654681317e-05, + "loss": 1.4521, + "step": 9545 + }, + { + "epoch": 0.28028656996887663, + "grad_norm": 0.0, + "learning_rate": 1.6890647417198708e-05, + "loss": 1.3608, + "step": 9546 + }, + { + "epoch": 0.28031593164601565, + "grad_norm": 0.0, + "learning_rate": 1.6889958225270355e-05, + "loss": 1.418, + "step": 9547 + }, + { + "epoch": 0.2803452933231546, + "grad_norm": 0.0, + "learning_rate": 1.6889268971034346e-05, + "loss": 1.541, + "step": 9548 + }, + { + "epoch": 0.2803746550002936, + "grad_norm": 0.0, + "learning_rate": 1.6888579654496912e-05, + "loss": 1.2959, + "step": 9549 + }, + { + "epoch": 0.2804040166774326, + "grad_norm": 0.0, + "learning_rate": 1.6887890275664288e-05, + "loss": 1.4688, + "step": 9550 + }, + { + "epoch": 0.2804333783545716, + "grad_norm": 0.0, + "learning_rate": 1.688720083454271e-05, + "loss": 1.4795, + "step": 9551 + }, + { + "epoch": 0.2804627400317106, + "grad_norm": 0.0, + "learning_rate": 1.688651133113841e-05, + "loss": 1.3633, + "step": 9552 + }, + { + "epoch": 0.2804921017088496, + "grad_norm": 0.0, + "learning_rate": 1.6885821765457628e-05, + "loss": 1.3208, + "step": 9553 + }, + { + "epoch": 0.2805214633859886, + "grad_norm": 0.0, + "learning_rate": 1.6885132137506593e-05, + "loss": 1.2812, + "step": 9554 + }, + { + "epoch": 0.2805508250631276, + "grad_norm": 0.0, + "learning_rate": 1.6884442447291545e-05, + "loss": 1.3828, + "step": 9555 + }, + { + "epoch": 0.2805801867402666, + "grad_norm": 0.0, + "learning_rate": 1.6883752694818722e-05, + "loss": 1.4346, + "step": 9556 + }, + { + "epoch": 0.2806095484174056, + "grad_norm": 0.0, + "learning_rate": 1.6883062880094358e-05, + "loss": 1.4492, + "step": 9557 + }, + { + "epoch": 0.2806389100945446, + "grad_norm": 0.0, + "learning_rate": 1.6882373003124697e-05, + "loss": 1.3945, + "step": 9558 + }, + { + "epoch": 0.2806682717716836, + "grad_norm": 0.0, + "learning_rate": 1.6881683063915973e-05, + "loss": 1.4092, + "step": 9559 + }, + { + "epoch": 0.2806976334488226, + "grad_norm": 0.0, + "learning_rate": 1.6880993062474423e-05, + "loss": 1.3564, + "step": 9560 + }, + { + "epoch": 0.2807269951259616, + "grad_norm": 0.0, + "learning_rate": 1.6880302998806297e-05, + "loss": 1.5107, + "step": 9561 + }, + { + "epoch": 0.28075635680310057, + "grad_norm": 0.0, + "learning_rate": 1.6879612872917827e-05, + "loss": 1.4932, + "step": 9562 + }, + { + "epoch": 0.2807857184802396, + "grad_norm": 0.0, + "learning_rate": 1.6878922684815254e-05, + "loss": 1.4209, + "step": 9563 + }, + { + "epoch": 0.2808150801573786, + "grad_norm": 0.0, + "learning_rate": 1.687823243450482e-05, + "loss": 1.2578, + "step": 9564 + }, + { + "epoch": 0.28084444183451757, + "grad_norm": 0.0, + "learning_rate": 1.687754212199277e-05, + "loss": 1.3594, + "step": 9565 + }, + { + "epoch": 0.2808738035116566, + "grad_norm": 0.0, + "learning_rate": 1.6876851747285345e-05, + "loss": 1.499, + "step": 9566 + }, + { + "epoch": 0.2809031651887956, + "grad_norm": 0.0, + "learning_rate": 1.687616131038879e-05, + "loss": 1.5, + "step": 9567 + }, + { + "epoch": 0.28093252686593456, + "grad_norm": 0.0, + "learning_rate": 1.6875470811309345e-05, + "loss": 1.2969, + "step": 9568 + }, + { + "epoch": 0.2809618885430736, + "grad_norm": 0.0, + "learning_rate": 1.687478025005326e-05, + "loss": 1.3965, + "step": 9569 + }, + { + "epoch": 0.2809912502202126, + "grad_norm": 0.0, + "learning_rate": 1.6874089626626773e-05, + "loss": 1.5215, + "step": 9570 + }, + { + "epoch": 0.28102061189735156, + "grad_norm": 0.0, + "learning_rate": 1.6873398941036137e-05, + "loss": 1.5088, + "step": 9571 + }, + { + "epoch": 0.2810499735744906, + "grad_norm": 0.0, + "learning_rate": 1.687270819328759e-05, + "loss": 1.3887, + "step": 9572 + }, + { + "epoch": 0.2810793352516296, + "grad_norm": 0.0, + "learning_rate": 1.6872017383387383e-05, + "loss": 1.3496, + "step": 9573 + }, + { + "epoch": 0.28110869692876855, + "grad_norm": 0.0, + "learning_rate": 1.6871326511341766e-05, + "loss": 1.3301, + "step": 9574 + }, + { + "epoch": 0.28113805860590757, + "grad_norm": 0.0, + "learning_rate": 1.687063557715698e-05, + "loss": 1.3418, + "step": 9575 + }, + { + "epoch": 0.2811674202830466, + "grad_norm": 0.0, + "learning_rate": 1.686994458083928e-05, + "loss": 1.4727, + "step": 9576 + }, + { + "epoch": 0.28119678196018555, + "grad_norm": 0.0, + "learning_rate": 1.686925352239491e-05, + "loss": 1.4375, + "step": 9577 + }, + { + "epoch": 0.28122614363732457, + "grad_norm": 0.0, + "learning_rate": 1.686856240183012e-05, + "loss": 1.4551, + "step": 9578 + }, + { + "epoch": 0.2812555053144636, + "grad_norm": 0.0, + "learning_rate": 1.6867871219151163e-05, + "loss": 1.3701, + "step": 9579 + }, + { + "epoch": 0.28128486699160254, + "grad_norm": 0.0, + "learning_rate": 1.686717997436429e-05, + "loss": 1.4844, + "step": 9580 + }, + { + "epoch": 0.28131422866874156, + "grad_norm": 0.0, + "learning_rate": 1.6866488667475748e-05, + "loss": 1.3506, + "step": 9581 + }, + { + "epoch": 0.2813435903458806, + "grad_norm": 0.0, + "learning_rate": 1.6865797298491787e-05, + "loss": 1.4287, + "step": 9582 + }, + { + "epoch": 0.28137295202301954, + "grad_norm": 0.0, + "learning_rate": 1.6865105867418665e-05, + "loss": 1.4746, + "step": 9583 + }, + { + "epoch": 0.28140231370015856, + "grad_norm": 0.0, + "learning_rate": 1.686441437426263e-05, + "loss": 1.4424, + "step": 9584 + }, + { + "epoch": 0.2814316753772976, + "grad_norm": 0.0, + "learning_rate": 1.6863722819029942e-05, + "loss": 1.4209, + "step": 9585 + }, + { + "epoch": 0.28146103705443654, + "grad_norm": 0.0, + "learning_rate": 1.686303120172685e-05, + "loss": 1.3408, + "step": 9586 + }, + { + "epoch": 0.28149039873157555, + "grad_norm": 0.0, + "learning_rate": 1.686233952235961e-05, + "loss": 1.3633, + "step": 9587 + }, + { + "epoch": 0.28151976040871457, + "grad_norm": 0.0, + "learning_rate": 1.6861647780934474e-05, + "loss": 1.4141, + "step": 9588 + }, + { + "epoch": 0.28154912208585353, + "grad_norm": 0.0, + "learning_rate": 1.68609559774577e-05, + "loss": 1.3223, + "step": 9589 + }, + { + "epoch": 0.28157848376299255, + "grad_norm": 0.0, + "learning_rate": 1.6860264111935546e-05, + "loss": 1.3682, + "step": 9590 + }, + { + "epoch": 0.28160784544013157, + "grad_norm": 0.0, + "learning_rate": 1.6859572184374262e-05, + "loss": 1.2607, + "step": 9591 + }, + { + "epoch": 0.2816372071172705, + "grad_norm": 0.0, + "learning_rate": 1.6858880194780116e-05, + "loss": 1.4482, + "step": 9592 + }, + { + "epoch": 0.28166656879440954, + "grad_norm": 0.0, + "learning_rate": 1.6858188143159354e-05, + "loss": 1.335, + "step": 9593 + }, + { + "epoch": 0.28169593047154856, + "grad_norm": 0.0, + "learning_rate": 1.6857496029518246e-05, + "loss": 1.3701, + "step": 9594 + }, + { + "epoch": 0.2817252921486875, + "grad_norm": 0.0, + "learning_rate": 1.685680385386304e-05, + "loss": 1.3262, + "step": 9595 + }, + { + "epoch": 0.28175465382582654, + "grad_norm": 0.0, + "learning_rate": 1.6856111616200002e-05, + "loss": 1.4717, + "step": 9596 + }, + { + "epoch": 0.28178401550296556, + "grad_norm": 0.0, + "learning_rate": 1.6855419316535392e-05, + "loss": 1.4482, + "step": 9597 + }, + { + "epoch": 0.2818133771801045, + "grad_norm": 0.0, + "learning_rate": 1.6854726954875472e-05, + "loss": 1.4883, + "step": 9598 + }, + { + "epoch": 0.28184273885724354, + "grad_norm": 0.0, + "learning_rate": 1.6854034531226496e-05, + "loss": 1.3525, + "step": 9599 + }, + { + "epoch": 0.2818721005343825, + "grad_norm": 0.0, + "learning_rate": 1.685334204559473e-05, + "loss": 1.334, + "step": 9600 + }, + { + "epoch": 0.2819014622115215, + "grad_norm": 0.0, + "learning_rate": 1.685264949798644e-05, + "loss": 1.4551, + "step": 9601 + }, + { + "epoch": 0.28193082388866053, + "grad_norm": 0.0, + "learning_rate": 1.6851956888407888e-05, + "loss": 1.584, + "step": 9602 + }, + { + "epoch": 0.2819601855657995, + "grad_norm": 0.0, + "learning_rate": 1.6851264216865328e-05, + "loss": 1.4971, + "step": 9603 + }, + { + "epoch": 0.2819895472429385, + "grad_norm": 0.0, + "learning_rate": 1.6850571483365038e-05, + "loss": 1.4883, + "step": 9604 + }, + { + "epoch": 0.2820189089200775, + "grad_norm": 0.0, + "learning_rate": 1.684987868791327e-05, + "loss": 1.3506, + "step": 9605 + }, + { + "epoch": 0.2820482705972165, + "grad_norm": 0.0, + "learning_rate": 1.68491858305163e-05, + "loss": 1.4609, + "step": 9606 + }, + { + "epoch": 0.2820776322743555, + "grad_norm": 0.0, + "learning_rate": 1.6848492911180383e-05, + "loss": 1.3652, + "step": 9607 + }, + { + "epoch": 0.2821069939514945, + "grad_norm": 0.0, + "learning_rate": 1.6847799929911793e-05, + "loss": 1.5078, + "step": 9608 + }, + { + "epoch": 0.2821363556286335, + "grad_norm": 0.0, + "learning_rate": 1.6847106886716793e-05, + "loss": 1.4668, + "step": 9609 + }, + { + "epoch": 0.2821657173057725, + "grad_norm": 0.0, + "learning_rate": 1.6846413781601655e-05, + "loss": 1.4756, + "step": 9610 + }, + { + "epoch": 0.2821950789829115, + "grad_norm": 0.0, + "learning_rate": 1.6845720614572644e-05, + "loss": 1.3916, + "step": 9611 + }, + { + "epoch": 0.2822244406600505, + "grad_norm": 0.0, + "learning_rate": 1.6845027385636025e-05, + "loss": 1.3271, + "step": 9612 + }, + { + "epoch": 0.2822538023371895, + "grad_norm": 0.0, + "learning_rate": 1.684433409479807e-05, + "loss": 1.4004, + "step": 9613 + }, + { + "epoch": 0.2822831640143285, + "grad_norm": 0.0, + "learning_rate": 1.6843640742065052e-05, + "loss": 1.4443, + "step": 9614 + }, + { + "epoch": 0.2823125256914675, + "grad_norm": 0.0, + "learning_rate": 1.6842947327443235e-05, + "loss": 1.3545, + "step": 9615 + }, + { + "epoch": 0.2823418873686065, + "grad_norm": 0.0, + "learning_rate": 1.6842253850938895e-05, + "loss": 1.5088, + "step": 9616 + }, + { + "epoch": 0.2823712490457455, + "grad_norm": 0.0, + "learning_rate": 1.6841560312558304e-05, + "loss": 1.4414, + "step": 9617 + }, + { + "epoch": 0.28240061072288447, + "grad_norm": 0.0, + "learning_rate": 1.6840866712307726e-05, + "loss": 1.4492, + "step": 9618 + }, + { + "epoch": 0.2824299724000235, + "grad_norm": 0.0, + "learning_rate": 1.684017305019344e-05, + "loss": 1.335, + "step": 9619 + }, + { + "epoch": 0.2824593340771625, + "grad_norm": 0.0, + "learning_rate": 1.683947932622172e-05, + "loss": 1.4863, + "step": 9620 + }, + { + "epoch": 0.28248869575430147, + "grad_norm": 0.0, + "learning_rate": 1.6838785540398832e-05, + "loss": 1.248, + "step": 9621 + }, + { + "epoch": 0.2825180574314405, + "grad_norm": 0.0, + "learning_rate": 1.6838091692731055e-05, + "loss": 1.3955, + "step": 9622 + }, + { + "epoch": 0.2825474191085795, + "grad_norm": 0.0, + "learning_rate": 1.683739778322467e-05, + "loss": 1.4971, + "step": 9623 + }, + { + "epoch": 0.28257678078571846, + "grad_norm": 0.0, + "learning_rate": 1.683670381188594e-05, + "loss": 1.3936, + "step": 9624 + }, + { + "epoch": 0.2826061424628575, + "grad_norm": 0.0, + "learning_rate": 1.6836009778721147e-05, + "loss": 1.2227, + "step": 9625 + }, + { + "epoch": 0.2826355041399965, + "grad_norm": 0.0, + "learning_rate": 1.6835315683736568e-05, + "loss": 1.2695, + "step": 9626 + }, + { + "epoch": 0.28266486581713546, + "grad_norm": 0.0, + "learning_rate": 1.683462152693848e-05, + "loss": 1.4277, + "step": 9627 + }, + { + "epoch": 0.2826942274942745, + "grad_norm": 0.0, + "learning_rate": 1.6833927308333154e-05, + "loss": 1.3867, + "step": 9628 + }, + { + "epoch": 0.2827235891714135, + "grad_norm": 0.0, + "learning_rate": 1.6833233027926876e-05, + "loss": 1.3018, + "step": 9629 + }, + { + "epoch": 0.28275295084855245, + "grad_norm": 0.0, + "learning_rate": 1.6832538685725923e-05, + "loss": 1.4414, + "step": 9630 + }, + { + "epoch": 0.28278231252569147, + "grad_norm": 0.0, + "learning_rate": 1.6831844281736573e-05, + "loss": 1.4258, + "step": 9631 + }, + { + "epoch": 0.2828116742028305, + "grad_norm": 0.0, + "learning_rate": 1.6831149815965106e-05, + "loss": 1.584, + "step": 9632 + }, + { + "epoch": 0.28284103587996945, + "grad_norm": 0.0, + "learning_rate": 1.6830455288417803e-05, + "loss": 1.5195, + "step": 9633 + }, + { + "epoch": 0.28287039755710847, + "grad_norm": 0.0, + "learning_rate": 1.682976069910094e-05, + "loss": 1.5, + "step": 9634 + }, + { + "epoch": 0.2828997592342475, + "grad_norm": 0.0, + "learning_rate": 1.68290660480208e-05, + "loss": 1.4023, + "step": 9635 + }, + { + "epoch": 0.28292912091138644, + "grad_norm": 0.0, + "learning_rate": 1.682837133518367e-05, + "loss": 1.4844, + "step": 9636 + }, + { + "epoch": 0.28295848258852546, + "grad_norm": 0.0, + "learning_rate": 1.682767656059583e-05, + "loss": 1.4023, + "step": 9637 + }, + { + "epoch": 0.2829878442656645, + "grad_norm": 0.0, + "learning_rate": 1.682698172426356e-05, + "loss": 1.3535, + "step": 9638 + }, + { + "epoch": 0.28301720594280344, + "grad_norm": 0.0, + "learning_rate": 1.682628682619315e-05, + "loss": 1.4248, + "step": 9639 + }, + { + "epoch": 0.28304656761994246, + "grad_norm": 0.0, + "learning_rate": 1.6825591866390875e-05, + "loss": 1.3555, + "step": 9640 + }, + { + "epoch": 0.2830759292970815, + "grad_norm": 0.0, + "learning_rate": 1.6824896844863026e-05, + "loss": 1.3672, + "step": 9641 + }, + { + "epoch": 0.28310529097422044, + "grad_norm": 0.0, + "learning_rate": 1.682420176161589e-05, + "loss": 1.4668, + "step": 9642 + }, + { + "epoch": 0.28313465265135945, + "grad_norm": 0.0, + "learning_rate": 1.6823506616655746e-05, + "loss": 1.3896, + "step": 9643 + }, + { + "epoch": 0.28316401432849847, + "grad_norm": 0.0, + "learning_rate": 1.6822811409988884e-05, + "loss": 1.3447, + "step": 9644 + }, + { + "epoch": 0.28319337600563743, + "grad_norm": 0.0, + "learning_rate": 1.6822116141621595e-05, + "loss": 1.3154, + "step": 9645 + }, + { + "epoch": 0.28322273768277645, + "grad_norm": 0.0, + "learning_rate": 1.682142081156016e-05, + "loss": 1.5225, + "step": 9646 + }, + { + "epoch": 0.28325209935991547, + "grad_norm": 0.0, + "learning_rate": 1.6820725419810872e-05, + "loss": 1.4336, + "step": 9647 + }, + { + "epoch": 0.2832814610370544, + "grad_norm": 0.0, + "learning_rate": 1.6820029966380012e-05, + "loss": 1.4141, + "step": 9648 + }, + { + "epoch": 0.28331082271419344, + "grad_norm": 0.0, + "learning_rate": 1.681933445127388e-05, + "loss": 1.2842, + "step": 9649 + }, + { + "epoch": 0.2833401843913324, + "grad_norm": 0.0, + "learning_rate": 1.6818638874498755e-05, + "loss": 1.3882, + "step": 9650 + }, + { + "epoch": 0.2833695460684714, + "grad_norm": 0.0, + "learning_rate": 1.6817943236060935e-05, + "loss": 1.3984, + "step": 9651 + }, + { + "epoch": 0.28339890774561044, + "grad_norm": 0.0, + "learning_rate": 1.681724753596671e-05, + "loss": 1.417, + "step": 9652 + }, + { + "epoch": 0.2834282694227494, + "grad_norm": 0.0, + "learning_rate": 1.6816551774222368e-05, + "loss": 1.4863, + "step": 9653 + }, + { + "epoch": 0.2834576310998884, + "grad_norm": 0.0, + "learning_rate": 1.6815855950834202e-05, + "loss": 1.459, + "step": 9654 + }, + { + "epoch": 0.28348699277702744, + "grad_norm": 0.0, + "learning_rate": 1.6815160065808505e-05, + "loss": 1.4629, + "step": 9655 + }, + { + "epoch": 0.2835163544541664, + "grad_norm": 0.0, + "learning_rate": 1.681446411915157e-05, + "loss": 1.4609, + "step": 9656 + }, + { + "epoch": 0.2835457161313054, + "grad_norm": 0.0, + "learning_rate": 1.681376811086969e-05, + "loss": 1.2866, + "step": 9657 + }, + { + "epoch": 0.28357507780844443, + "grad_norm": 0.0, + "learning_rate": 1.681307204096916e-05, + "loss": 1.3955, + "step": 9658 + }, + { + "epoch": 0.2836044394855834, + "grad_norm": 0.0, + "learning_rate": 1.6812375909456277e-05, + "loss": 1.3604, + "step": 9659 + }, + { + "epoch": 0.2836338011627224, + "grad_norm": 0.0, + "learning_rate": 1.6811679716337335e-05, + "loss": 1.4805, + "step": 9660 + }, + { + "epoch": 0.2836631628398614, + "grad_norm": 0.0, + "learning_rate": 1.6810983461618623e-05, + "loss": 1.3916, + "step": 9661 + }, + { + "epoch": 0.2836925245170004, + "grad_norm": 0.0, + "learning_rate": 1.681028714530645e-05, + "loss": 1.3213, + "step": 9662 + }, + { + "epoch": 0.2837218861941394, + "grad_norm": 0.0, + "learning_rate": 1.6809590767407102e-05, + "loss": 1.3691, + "step": 9663 + }, + { + "epoch": 0.2837512478712784, + "grad_norm": 0.0, + "learning_rate": 1.6808894327926882e-05, + "loss": 1.2793, + "step": 9664 + }, + { + "epoch": 0.2837806095484174, + "grad_norm": 0.0, + "learning_rate": 1.6808197826872087e-05, + "loss": 1.3662, + "step": 9665 + }, + { + "epoch": 0.2838099712255564, + "grad_norm": 0.0, + "learning_rate": 1.6807501264249015e-05, + "loss": 1.3945, + "step": 9666 + }, + { + "epoch": 0.2838393329026954, + "grad_norm": 0.0, + "learning_rate": 1.6806804640063964e-05, + "loss": 1.4883, + "step": 9667 + }, + { + "epoch": 0.2838686945798344, + "grad_norm": 0.0, + "learning_rate": 1.680610795432324e-05, + "loss": 1.4688, + "step": 9668 + }, + { + "epoch": 0.2838980562569734, + "grad_norm": 0.0, + "learning_rate": 1.6805411207033135e-05, + "loss": 1.5322, + "step": 9669 + }, + { + "epoch": 0.2839274179341124, + "grad_norm": 0.0, + "learning_rate": 1.6804714398199953e-05, + "loss": 1.4355, + "step": 9670 + }, + { + "epoch": 0.2839567796112514, + "grad_norm": 0.0, + "learning_rate": 1.6804017527829998e-05, + "loss": 1.4238, + "step": 9671 + }, + { + "epoch": 0.2839861412883904, + "grad_norm": 0.0, + "learning_rate": 1.680332059592957e-05, + "loss": 1.3125, + "step": 9672 + }, + { + "epoch": 0.2840155029655294, + "grad_norm": 0.0, + "learning_rate": 1.680262360250497e-05, + "loss": 1.4502, + "step": 9673 + }, + { + "epoch": 0.28404486464266837, + "grad_norm": 0.0, + "learning_rate": 1.6801926547562504e-05, + "loss": 1.5264, + "step": 9674 + }, + { + "epoch": 0.2840742263198074, + "grad_norm": 0.0, + "learning_rate": 1.680122943110847e-05, + "loss": 1.4844, + "step": 9675 + }, + { + "epoch": 0.2841035879969464, + "grad_norm": 0.0, + "learning_rate": 1.6800532253149183e-05, + "loss": 1.4844, + "step": 9676 + }, + { + "epoch": 0.28413294967408537, + "grad_norm": 0.0, + "learning_rate": 1.679983501369094e-05, + "loss": 1.4883, + "step": 9677 + }, + { + "epoch": 0.2841623113512244, + "grad_norm": 0.0, + "learning_rate": 1.6799137712740042e-05, + "loss": 1.4727, + "step": 9678 + }, + { + "epoch": 0.2841916730283634, + "grad_norm": 0.0, + "learning_rate": 1.6798440350302806e-05, + "loss": 1.251, + "step": 9679 + }, + { + "epoch": 0.28422103470550236, + "grad_norm": 0.0, + "learning_rate": 1.6797742926385533e-05, + "loss": 1.4658, + "step": 9680 + }, + { + "epoch": 0.2842503963826414, + "grad_norm": 0.0, + "learning_rate": 1.679704544099453e-05, + "loss": 1.5078, + "step": 9681 + }, + { + "epoch": 0.2842797580597804, + "grad_norm": 0.0, + "learning_rate": 1.67963478941361e-05, + "loss": 1.4365, + "step": 9682 + }, + { + "epoch": 0.28430911973691936, + "grad_norm": 0.0, + "learning_rate": 1.6795650285816556e-05, + "loss": 1.3604, + "step": 9683 + }, + { + "epoch": 0.2843384814140584, + "grad_norm": 0.0, + "learning_rate": 1.6794952616042207e-05, + "loss": 1.2422, + "step": 9684 + }, + { + "epoch": 0.2843678430911974, + "grad_norm": 0.0, + "learning_rate": 1.6794254884819362e-05, + "loss": 1.2754, + "step": 9685 + }, + { + "epoch": 0.28439720476833635, + "grad_norm": 0.0, + "learning_rate": 1.6793557092154332e-05, + "loss": 1.4092, + "step": 9686 + }, + { + "epoch": 0.28442656644547537, + "grad_norm": 0.0, + "learning_rate": 1.6792859238053424e-05, + "loss": 1.3154, + "step": 9687 + }, + { + "epoch": 0.2844559281226144, + "grad_norm": 0.0, + "learning_rate": 1.6792161322522948e-05, + "loss": 1.3926, + "step": 9688 + }, + { + "epoch": 0.28448528979975335, + "grad_norm": 0.0, + "learning_rate": 1.6791463345569218e-05, + "loss": 1.4473, + "step": 9689 + }, + { + "epoch": 0.28451465147689237, + "grad_norm": 0.0, + "learning_rate": 1.6790765307198548e-05, + "loss": 1.5049, + "step": 9690 + }, + { + "epoch": 0.2845440131540314, + "grad_norm": 0.0, + "learning_rate": 1.6790067207417247e-05, + "loss": 1.2812, + "step": 9691 + }, + { + "epoch": 0.28457337483117034, + "grad_norm": 0.0, + "learning_rate": 1.6789369046231628e-05, + "loss": 1.3574, + "step": 9692 + }, + { + "epoch": 0.28460273650830936, + "grad_norm": 0.0, + "learning_rate": 1.678867082364801e-05, + "loss": 1.4727, + "step": 9693 + }, + { + "epoch": 0.2846320981854484, + "grad_norm": 0.0, + "learning_rate": 1.67879725396727e-05, + "loss": 1.4766, + "step": 9694 + }, + { + "epoch": 0.28466145986258734, + "grad_norm": 0.0, + "learning_rate": 1.6787274194312014e-05, + "loss": 1.418, + "step": 9695 + }, + { + "epoch": 0.28469082153972636, + "grad_norm": 0.0, + "learning_rate": 1.6786575787572272e-05, + "loss": 1.3584, + "step": 9696 + }, + { + "epoch": 0.2847201832168654, + "grad_norm": 0.0, + "learning_rate": 1.6785877319459786e-05, + "loss": 1.4629, + "step": 9697 + }, + { + "epoch": 0.28474954489400434, + "grad_norm": 0.0, + "learning_rate": 1.6785178789980877e-05, + "loss": 1.4072, + "step": 9698 + }, + { + "epoch": 0.28477890657114335, + "grad_norm": 0.0, + "learning_rate": 1.6784480199141854e-05, + "loss": 1.4336, + "step": 9699 + }, + { + "epoch": 0.2848082682482823, + "grad_norm": 0.0, + "learning_rate": 1.678378154694904e-05, + "loss": 1.3115, + "step": 9700 + }, + { + "epoch": 0.28483762992542133, + "grad_norm": 0.0, + "learning_rate": 1.6783082833408754e-05, + "loss": 1.5166, + "step": 9701 + }, + { + "epoch": 0.28486699160256035, + "grad_norm": 0.0, + "learning_rate": 1.678238405852731e-05, + "loss": 1.4668, + "step": 9702 + }, + { + "epoch": 0.2848963532796993, + "grad_norm": 0.0, + "learning_rate": 1.6781685222311032e-05, + "loss": 1.5078, + "step": 9703 + }, + { + "epoch": 0.2849257149568383, + "grad_norm": 0.0, + "learning_rate": 1.6780986324766234e-05, + "loss": 1.4795, + "step": 9704 + }, + { + "epoch": 0.28495507663397734, + "grad_norm": 0.0, + "learning_rate": 1.6780287365899246e-05, + "loss": 1.3828, + "step": 9705 + }, + { + "epoch": 0.2849844383111163, + "grad_norm": 0.0, + "learning_rate": 1.677958834571638e-05, + "loss": 1.4502, + "step": 9706 + }, + { + "epoch": 0.2850137999882553, + "grad_norm": 0.0, + "learning_rate": 1.6778889264223962e-05, + "loss": 1.4932, + "step": 9707 + }, + { + "epoch": 0.28504316166539434, + "grad_norm": 0.0, + "learning_rate": 1.677819012142831e-05, + "loss": 1.3789, + "step": 9708 + }, + { + "epoch": 0.2850725233425333, + "grad_norm": 0.0, + "learning_rate": 1.677749091733575e-05, + "loss": 1.46, + "step": 9709 + }, + { + "epoch": 0.2851018850196723, + "grad_norm": 0.0, + "learning_rate": 1.6776791651952603e-05, + "loss": 1.459, + "step": 9710 + }, + { + "epoch": 0.28513124669681134, + "grad_norm": 0.0, + "learning_rate": 1.6776092325285194e-05, + "loss": 1.3438, + "step": 9711 + }, + { + "epoch": 0.2851606083739503, + "grad_norm": 0.0, + "learning_rate": 1.6775392937339846e-05, + "loss": 1.3975, + "step": 9712 + }, + { + "epoch": 0.2851899700510893, + "grad_norm": 0.0, + "learning_rate": 1.6774693488122883e-05, + "loss": 1.4785, + "step": 9713 + }, + { + "epoch": 0.28521933172822833, + "grad_norm": 0.0, + "learning_rate": 1.6773993977640633e-05, + "loss": 1.2832, + "step": 9714 + }, + { + "epoch": 0.2852486934053673, + "grad_norm": 0.0, + "learning_rate": 1.6773294405899423e-05, + "loss": 1.4521, + "step": 9715 + }, + { + "epoch": 0.2852780550825063, + "grad_norm": 0.0, + "learning_rate": 1.6772594772905573e-05, + "loss": 1.4385, + "step": 9716 + }, + { + "epoch": 0.2853074167596453, + "grad_norm": 0.0, + "learning_rate": 1.6771895078665418e-05, + "loss": 1.4326, + "step": 9717 + }, + { + "epoch": 0.2853367784367843, + "grad_norm": 0.0, + "learning_rate": 1.677119532318528e-05, + "loss": 1.5654, + "step": 9718 + }, + { + "epoch": 0.2853661401139233, + "grad_norm": 0.0, + "learning_rate": 1.677049550647149e-05, + "loss": 1.3691, + "step": 9719 + }, + { + "epoch": 0.2853955017910623, + "grad_norm": 0.0, + "learning_rate": 1.6769795628530373e-05, + "loss": 1.3184, + "step": 9720 + }, + { + "epoch": 0.2854248634682013, + "grad_norm": 0.0, + "learning_rate": 1.6769095689368262e-05, + "loss": 1.3623, + "step": 9721 + }, + { + "epoch": 0.2854542251453403, + "grad_norm": 0.0, + "learning_rate": 1.6768395688991487e-05, + "loss": 1.3691, + "step": 9722 + }, + { + "epoch": 0.2854835868224793, + "grad_norm": 0.0, + "learning_rate": 1.676769562740637e-05, + "loss": 1.457, + "step": 9723 + }, + { + "epoch": 0.2855129484996183, + "grad_norm": 0.0, + "learning_rate": 1.6766995504619252e-05, + "loss": 1.4893, + "step": 9724 + }, + { + "epoch": 0.2855423101767573, + "grad_norm": 0.0, + "learning_rate": 1.676629532063646e-05, + "loss": 1.4678, + "step": 9725 + }, + { + "epoch": 0.2855716718538963, + "grad_norm": 0.0, + "learning_rate": 1.6765595075464327e-05, + "loss": 1.3369, + "step": 9726 + }, + { + "epoch": 0.2856010335310353, + "grad_norm": 0.0, + "learning_rate": 1.6764894769109185e-05, + "loss": 1.3672, + "step": 9727 + }, + { + "epoch": 0.2856303952081743, + "grad_norm": 0.0, + "learning_rate": 1.6764194401577366e-05, + "loss": 1.3623, + "step": 9728 + }, + { + "epoch": 0.2856597568853133, + "grad_norm": 0.0, + "learning_rate": 1.676349397287521e-05, + "loss": 1.4316, + "step": 9729 + }, + { + "epoch": 0.28568911856245227, + "grad_norm": 0.0, + "learning_rate": 1.6762793483009038e-05, + "loss": 1.3896, + "step": 9730 + }, + { + "epoch": 0.2857184802395913, + "grad_norm": 0.0, + "learning_rate": 1.67620929319852e-05, + "loss": 1.4619, + "step": 9731 + }, + { + "epoch": 0.2857478419167303, + "grad_norm": 0.0, + "learning_rate": 1.676139231981002e-05, + "loss": 1.3613, + "step": 9732 + }, + { + "epoch": 0.28577720359386927, + "grad_norm": 0.0, + "learning_rate": 1.6760691646489836e-05, + "loss": 1.3262, + "step": 9733 + }, + { + "epoch": 0.2858065652710083, + "grad_norm": 0.0, + "learning_rate": 1.6759990912030984e-05, + "loss": 1.415, + "step": 9734 + }, + { + "epoch": 0.2858359269481473, + "grad_norm": 0.0, + "learning_rate": 1.6759290116439807e-05, + "loss": 1.4619, + "step": 9735 + }, + { + "epoch": 0.28586528862528626, + "grad_norm": 0.0, + "learning_rate": 1.675858925972264e-05, + "loss": 1.5059, + "step": 9736 + }, + { + "epoch": 0.2858946503024253, + "grad_norm": 0.0, + "learning_rate": 1.6757888341885816e-05, + "loss": 1.4453, + "step": 9737 + }, + { + "epoch": 0.2859240119795643, + "grad_norm": 0.0, + "learning_rate": 1.6757187362935678e-05, + "loss": 1.4453, + "step": 9738 + }, + { + "epoch": 0.28595337365670326, + "grad_norm": 0.0, + "learning_rate": 1.6756486322878564e-05, + "loss": 1.3389, + "step": 9739 + }, + { + "epoch": 0.2859827353338423, + "grad_norm": 0.0, + "learning_rate": 1.6755785221720813e-05, + "loss": 1.4111, + "step": 9740 + }, + { + "epoch": 0.2860120970109813, + "grad_norm": 0.0, + "learning_rate": 1.6755084059468768e-05, + "loss": 1.3955, + "step": 9741 + }, + { + "epoch": 0.28604145868812025, + "grad_norm": 0.0, + "learning_rate": 1.6754382836128768e-05, + "loss": 1.4121, + "step": 9742 + }, + { + "epoch": 0.28607082036525927, + "grad_norm": 0.0, + "learning_rate": 1.675368155170715e-05, + "loss": 1.3447, + "step": 9743 + }, + { + "epoch": 0.2861001820423983, + "grad_norm": 0.0, + "learning_rate": 1.6752980206210268e-05, + "loss": 1.3281, + "step": 9744 + }, + { + "epoch": 0.28612954371953725, + "grad_norm": 0.0, + "learning_rate": 1.675227879964445e-05, + "loss": 1.501, + "step": 9745 + }, + { + "epoch": 0.28615890539667627, + "grad_norm": 0.0, + "learning_rate": 1.675157733201605e-05, + "loss": 1.4443, + "step": 9746 + }, + { + "epoch": 0.2861882670738153, + "grad_norm": 0.0, + "learning_rate": 1.6750875803331405e-05, + "loss": 1.3633, + "step": 9747 + }, + { + "epoch": 0.28621762875095424, + "grad_norm": 0.0, + "learning_rate": 1.675017421359686e-05, + "loss": 1.5244, + "step": 9748 + }, + { + "epoch": 0.28624699042809326, + "grad_norm": 0.0, + "learning_rate": 1.674947256281876e-05, + "loss": 1.3867, + "step": 9749 + }, + { + "epoch": 0.2862763521052322, + "grad_norm": 0.0, + "learning_rate": 1.6748770851003454e-05, + "loss": 1.3018, + "step": 9750 + }, + { + "epoch": 0.28630571378237124, + "grad_norm": 0.0, + "learning_rate": 1.6748069078157283e-05, + "loss": 1.3984, + "step": 9751 + }, + { + "epoch": 0.28633507545951026, + "grad_norm": 0.0, + "learning_rate": 1.6747367244286597e-05, + "loss": 1.4424, + "step": 9752 + }, + { + "epoch": 0.2863644371366492, + "grad_norm": 0.0, + "learning_rate": 1.6746665349397738e-05, + "loss": 1.3848, + "step": 9753 + }, + { + "epoch": 0.28639379881378824, + "grad_norm": 0.0, + "learning_rate": 1.6745963393497057e-05, + "loss": 1.3555, + "step": 9754 + }, + { + "epoch": 0.28642316049092725, + "grad_norm": 0.0, + "learning_rate": 1.6745261376590903e-05, + "loss": 1.4805, + "step": 9755 + }, + { + "epoch": 0.2864525221680662, + "grad_norm": 0.0, + "learning_rate": 1.6744559298685623e-05, + "loss": 1.4062, + "step": 9756 + }, + { + "epoch": 0.28648188384520523, + "grad_norm": 0.0, + "learning_rate": 1.6743857159787564e-05, + "loss": 1.3833, + "step": 9757 + }, + { + "epoch": 0.28651124552234425, + "grad_norm": 0.0, + "learning_rate": 1.674315495990308e-05, + "loss": 1.417, + "step": 9758 + }, + { + "epoch": 0.2865406071994832, + "grad_norm": 0.0, + "learning_rate": 1.6742452699038517e-05, + "loss": 1.4619, + "step": 9759 + }, + { + "epoch": 0.2865699688766222, + "grad_norm": 0.0, + "learning_rate": 1.6741750377200227e-05, + "loss": 1.4209, + "step": 9760 + }, + { + "epoch": 0.28659933055376124, + "grad_norm": 0.0, + "learning_rate": 1.6741047994394562e-05, + "loss": 1.2559, + "step": 9761 + }, + { + "epoch": 0.2866286922309002, + "grad_norm": 0.0, + "learning_rate": 1.674034555062788e-05, + "loss": 1.4395, + "step": 9762 + }, + { + "epoch": 0.2866580539080392, + "grad_norm": 0.0, + "learning_rate": 1.673964304590652e-05, + "loss": 1.2915, + "step": 9763 + }, + { + "epoch": 0.28668741558517824, + "grad_norm": 0.0, + "learning_rate": 1.673894048023684e-05, + "loss": 1.3516, + "step": 9764 + }, + { + "epoch": 0.2867167772623172, + "grad_norm": 0.0, + "learning_rate": 1.6738237853625198e-05, + "loss": 1.4326, + "step": 9765 + }, + { + "epoch": 0.2867461389394562, + "grad_norm": 0.0, + "learning_rate": 1.6737535166077946e-05, + "loss": 1.3438, + "step": 9766 + }, + { + "epoch": 0.28677550061659524, + "grad_norm": 0.0, + "learning_rate": 1.6736832417601435e-05, + "loss": 1.3584, + "step": 9767 + }, + { + "epoch": 0.2868048622937342, + "grad_norm": 0.0, + "learning_rate": 1.673612960820203e-05, + "loss": 1.3438, + "step": 9768 + }, + { + "epoch": 0.2868342239708732, + "grad_norm": 0.0, + "learning_rate": 1.6735426737886074e-05, + "loss": 1.2568, + "step": 9769 + }, + { + "epoch": 0.28686358564801223, + "grad_norm": 0.0, + "learning_rate": 1.6734723806659935e-05, + "loss": 1.3135, + "step": 9770 + }, + { + "epoch": 0.2868929473251512, + "grad_norm": 0.0, + "learning_rate": 1.6734020814529956e-05, + "loss": 1.4873, + "step": 9771 + }, + { + "epoch": 0.2869223090022902, + "grad_norm": 0.0, + "learning_rate": 1.6733317761502506e-05, + "loss": 1.2173, + "step": 9772 + }, + { + "epoch": 0.2869516706794292, + "grad_norm": 0.0, + "learning_rate": 1.6732614647583937e-05, + "loss": 1.4756, + "step": 9773 + }, + { + "epoch": 0.2869810323565682, + "grad_norm": 0.0, + "learning_rate": 1.6731911472780616e-05, + "loss": 1.3418, + "step": 9774 + }, + { + "epoch": 0.2870103940337072, + "grad_norm": 0.0, + "learning_rate": 1.673120823709889e-05, + "loss": 1.4082, + "step": 9775 + }, + { + "epoch": 0.2870397557108462, + "grad_norm": 0.0, + "learning_rate": 1.6730504940545123e-05, + "loss": 1.3975, + "step": 9776 + }, + { + "epoch": 0.2870691173879852, + "grad_norm": 0.0, + "learning_rate": 1.6729801583125677e-05, + "loss": 1.333, + "step": 9777 + }, + { + "epoch": 0.2870984790651242, + "grad_norm": 0.0, + "learning_rate": 1.6729098164846913e-05, + "loss": 1.2256, + "step": 9778 + }, + { + "epoch": 0.2871278407422632, + "grad_norm": 0.0, + "learning_rate": 1.672839468571519e-05, + "loss": 1.4297, + "step": 9779 + }, + { + "epoch": 0.2871572024194022, + "grad_norm": 0.0, + "learning_rate": 1.672769114573687e-05, + "loss": 1.4004, + "step": 9780 + }, + { + "epoch": 0.2871865640965412, + "grad_norm": 0.0, + "learning_rate": 1.672698754491832e-05, + "loss": 1.4639, + "step": 9781 + }, + { + "epoch": 0.2872159257736802, + "grad_norm": 0.0, + "learning_rate": 1.6726283883265896e-05, + "loss": 1.3555, + "step": 9782 + }, + { + "epoch": 0.2872452874508192, + "grad_norm": 0.0, + "learning_rate": 1.6725580160785962e-05, + "loss": 1.3408, + "step": 9783 + }, + { + "epoch": 0.2872746491279582, + "grad_norm": 0.0, + "learning_rate": 1.6724876377484892e-05, + "loss": 1.3672, + "step": 9784 + }, + { + "epoch": 0.2873040108050972, + "grad_norm": 0.0, + "learning_rate": 1.6724172533369035e-05, + "loss": 1.3525, + "step": 9785 + }, + { + "epoch": 0.28733337248223617, + "grad_norm": 0.0, + "learning_rate": 1.6723468628444764e-05, + "loss": 1.3301, + "step": 9786 + }, + { + "epoch": 0.2873627341593752, + "grad_norm": 0.0, + "learning_rate": 1.6722764662718446e-05, + "loss": 1.5225, + "step": 9787 + }, + { + "epoch": 0.2873920958365142, + "grad_norm": 0.0, + "learning_rate": 1.672206063619645e-05, + "loss": 1.457, + "step": 9788 + }, + { + "epoch": 0.28742145751365317, + "grad_norm": 0.0, + "learning_rate": 1.6721356548885134e-05, + "loss": 1.5771, + "step": 9789 + }, + { + "epoch": 0.2874508191907922, + "grad_norm": 0.0, + "learning_rate": 1.6720652400790867e-05, + "loss": 1.417, + "step": 9790 + }, + { + "epoch": 0.2874801808679312, + "grad_norm": 0.0, + "learning_rate": 1.671994819192002e-05, + "loss": 1.3262, + "step": 9791 + }, + { + "epoch": 0.28750954254507016, + "grad_norm": 0.0, + "learning_rate": 1.671924392227896e-05, + "loss": 1.3848, + "step": 9792 + }, + { + "epoch": 0.2875389042222092, + "grad_norm": 0.0, + "learning_rate": 1.671853959187406e-05, + "loss": 1.3887, + "step": 9793 + }, + { + "epoch": 0.2875682658993482, + "grad_norm": 0.0, + "learning_rate": 1.6717835200711684e-05, + "loss": 1.3691, + "step": 9794 + }, + { + "epoch": 0.28759762757648716, + "grad_norm": 0.0, + "learning_rate": 1.6717130748798205e-05, + "loss": 1.3174, + "step": 9795 + }, + { + "epoch": 0.2876269892536262, + "grad_norm": 0.0, + "learning_rate": 1.671642623613999e-05, + "loss": 1.4463, + "step": 9796 + }, + { + "epoch": 0.2876563509307652, + "grad_norm": 0.0, + "learning_rate": 1.6715721662743414e-05, + "loss": 1.3877, + "step": 9797 + }, + { + "epoch": 0.28768571260790415, + "grad_norm": 0.0, + "learning_rate": 1.6715017028614848e-05, + "loss": 1.2622, + "step": 9798 + }, + { + "epoch": 0.28771507428504317, + "grad_norm": 0.0, + "learning_rate": 1.6714312333760664e-05, + "loss": 1.3281, + "step": 9799 + }, + { + "epoch": 0.28774443596218213, + "grad_norm": 0.0, + "learning_rate": 1.671360757818723e-05, + "loss": 1.3169, + "step": 9800 + }, + { + "epoch": 0.28777379763932115, + "grad_norm": 0.0, + "learning_rate": 1.6712902761900924e-05, + "loss": 1.4189, + "step": 9801 + }, + { + "epoch": 0.28780315931646017, + "grad_norm": 0.0, + "learning_rate": 1.671219788490812e-05, + "loss": 1.4492, + "step": 9802 + }, + { + "epoch": 0.28783252099359913, + "grad_norm": 0.0, + "learning_rate": 1.6711492947215192e-05, + "loss": 1.5332, + "step": 9803 + }, + { + "epoch": 0.28786188267073815, + "grad_norm": 0.0, + "learning_rate": 1.6710787948828513e-05, + "loss": 1.5098, + "step": 9804 + }, + { + "epoch": 0.28789124434787716, + "grad_norm": 0.0, + "learning_rate": 1.6710082889754465e-05, + "loss": 1.2832, + "step": 9805 + }, + { + "epoch": 0.2879206060250161, + "grad_norm": 0.0, + "learning_rate": 1.6709377769999414e-05, + "loss": 1.46, + "step": 9806 + }, + { + "epoch": 0.28794996770215514, + "grad_norm": 0.0, + "learning_rate": 1.6708672589569743e-05, + "loss": 1.3457, + "step": 9807 + }, + { + "epoch": 0.28797932937929416, + "grad_norm": 0.0, + "learning_rate": 1.6707967348471828e-05, + "loss": 1.3652, + "step": 9808 + }, + { + "epoch": 0.2880086910564331, + "grad_norm": 0.0, + "learning_rate": 1.6707262046712045e-05, + "loss": 1.2334, + "step": 9809 + }, + { + "epoch": 0.28803805273357214, + "grad_norm": 0.0, + "learning_rate": 1.6706556684296776e-05, + "loss": 1.2334, + "step": 9810 + }, + { + "epoch": 0.28806741441071115, + "grad_norm": 0.0, + "learning_rate": 1.6705851261232393e-05, + "loss": 1.5723, + "step": 9811 + }, + { + "epoch": 0.2880967760878501, + "grad_norm": 0.0, + "learning_rate": 1.670514577752528e-05, + "loss": 1.4316, + "step": 9812 + }, + { + "epoch": 0.28812613776498913, + "grad_norm": 0.0, + "learning_rate": 1.670444023318182e-05, + "loss": 1.2471, + "step": 9813 + }, + { + "epoch": 0.28815549944212815, + "grad_norm": 0.0, + "learning_rate": 1.670373462820839e-05, + "loss": 1.3535, + "step": 9814 + }, + { + "epoch": 0.2881848611192671, + "grad_norm": 0.0, + "learning_rate": 1.6703028962611368e-05, + "loss": 1.416, + "step": 9815 + }, + { + "epoch": 0.2882142227964061, + "grad_norm": 0.0, + "learning_rate": 1.6702323236397142e-05, + "loss": 1.4707, + "step": 9816 + }, + { + "epoch": 0.28824358447354514, + "grad_norm": 0.0, + "learning_rate": 1.6701617449572087e-05, + "loss": 1.4863, + "step": 9817 + }, + { + "epoch": 0.2882729461506841, + "grad_norm": 0.0, + "learning_rate": 1.670091160214259e-05, + "loss": 1.4102, + "step": 9818 + }, + { + "epoch": 0.2883023078278231, + "grad_norm": 0.0, + "learning_rate": 1.6700205694115033e-05, + "loss": 1.4033, + "step": 9819 + }, + { + "epoch": 0.28833166950496214, + "grad_norm": 0.0, + "learning_rate": 1.6699499725495804e-05, + "loss": 1.5195, + "step": 9820 + }, + { + "epoch": 0.2883610311821011, + "grad_norm": 0.0, + "learning_rate": 1.669879369629128e-05, + "loss": 1.5215, + "step": 9821 + }, + { + "epoch": 0.2883903928592401, + "grad_norm": 0.0, + "learning_rate": 1.669808760650785e-05, + "loss": 1.4424, + "step": 9822 + }, + { + "epoch": 0.28841975453637914, + "grad_norm": 0.0, + "learning_rate": 1.6697381456151896e-05, + "loss": 1.4941, + "step": 9823 + }, + { + "epoch": 0.2884491162135181, + "grad_norm": 0.0, + "learning_rate": 1.6696675245229808e-05, + "loss": 1.5293, + "step": 9824 + }, + { + "epoch": 0.2884784778906571, + "grad_norm": 0.0, + "learning_rate": 1.6695968973747968e-05, + "loss": 1.6035, + "step": 9825 + }, + { + "epoch": 0.28850783956779613, + "grad_norm": 0.0, + "learning_rate": 1.669526264171277e-05, + "loss": 1.6152, + "step": 9826 + }, + { + "epoch": 0.2885372012449351, + "grad_norm": 0.0, + "learning_rate": 1.6694556249130595e-05, + "loss": 1.4307, + "step": 9827 + }, + { + "epoch": 0.2885665629220741, + "grad_norm": 0.0, + "learning_rate": 1.6693849796007835e-05, + "loss": 1.5508, + "step": 9828 + }, + { + "epoch": 0.2885959245992131, + "grad_norm": 0.0, + "learning_rate": 1.6693143282350873e-05, + "loss": 1.5039, + "step": 9829 + }, + { + "epoch": 0.2886252862763521, + "grad_norm": 0.0, + "learning_rate": 1.6692436708166105e-05, + "loss": 1.4688, + "step": 9830 + }, + { + "epoch": 0.2886546479534911, + "grad_norm": 0.0, + "learning_rate": 1.669173007345992e-05, + "loss": 1.457, + "step": 9831 + }, + { + "epoch": 0.2886840096306301, + "grad_norm": 0.0, + "learning_rate": 1.6691023378238703e-05, + "loss": 1.4131, + "step": 9832 + }, + { + "epoch": 0.2887133713077691, + "grad_norm": 0.0, + "learning_rate": 1.669031662250885e-05, + "loss": 1.4756, + "step": 9833 + }, + { + "epoch": 0.2887427329849081, + "grad_norm": 0.0, + "learning_rate": 1.668960980627675e-05, + "loss": 1.4482, + "step": 9834 + }, + { + "epoch": 0.2887720946620471, + "grad_norm": 0.0, + "learning_rate": 1.6688902929548794e-05, + "loss": 1.4502, + "step": 9835 + }, + { + "epoch": 0.2888014563391861, + "grad_norm": 0.0, + "learning_rate": 1.6688195992331377e-05, + "loss": 1.376, + "step": 9836 + }, + { + "epoch": 0.2888308180163251, + "grad_norm": 0.0, + "learning_rate": 1.6687488994630887e-05, + "loss": 1.499, + "step": 9837 + }, + { + "epoch": 0.2888601796934641, + "grad_norm": 0.0, + "learning_rate": 1.6686781936453726e-05, + "loss": 1.4014, + "step": 9838 + }, + { + "epoch": 0.2888895413706031, + "grad_norm": 0.0, + "learning_rate": 1.668607481780628e-05, + "loss": 1.3457, + "step": 9839 + }, + { + "epoch": 0.2889189030477421, + "grad_norm": 0.0, + "learning_rate": 1.668536763869495e-05, + "loss": 1.4629, + "step": 9840 + }, + { + "epoch": 0.2889482647248811, + "grad_norm": 0.0, + "learning_rate": 1.6684660399126126e-05, + "loss": 1.5322, + "step": 9841 + }, + { + "epoch": 0.28897762640202007, + "grad_norm": 0.0, + "learning_rate": 1.668395309910621e-05, + "loss": 1.3506, + "step": 9842 + }, + { + "epoch": 0.2890069880791591, + "grad_norm": 0.0, + "learning_rate": 1.668324573864159e-05, + "loss": 1.4756, + "step": 9843 + }, + { + "epoch": 0.2890363497562981, + "grad_norm": 0.0, + "learning_rate": 1.668253831773867e-05, + "loss": 1.4014, + "step": 9844 + }, + { + "epoch": 0.28906571143343707, + "grad_norm": 0.0, + "learning_rate": 1.668183083640384e-05, + "loss": 1.4824, + "step": 9845 + }, + { + "epoch": 0.2890950731105761, + "grad_norm": 0.0, + "learning_rate": 1.668112329464351e-05, + "loss": 1.2881, + "step": 9846 + }, + { + "epoch": 0.2891244347877151, + "grad_norm": 0.0, + "learning_rate": 1.6680415692464066e-05, + "loss": 1.4238, + "step": 9847 + }, + { + "epoch": 0.28915379646485406, + "grad_norm": 0.0, + "learning_rate": 1.6679708029871912e-05, + "loss": 1.4033, + "step": 9848 + }, + { + "epoch": 0.2891831581419931, + "grad_norm": 0.0, + "learning_rate": 1.6679000306873448e-05, + "loss": 1.4883, + "step": 9849 + }, + { + "epoch": 0.28921251981913204, + "grad_norm": 0.0, + "learning_rate": 1.6678292523475076e-05, + "loss": 1.4414, + "step": 9850 + }, + { + "epoch": 0.28924188149627106, + "grad_norm": 0.0, + "learning_rate": 1.6677584679683188e-05, + "loss": 1.3457, + "step": 9851 + }, + { + "epoch": 0.2892712431734101, + "grad_norm": 0.0, + "learning_rate": 1.6676876775504197e-05, + "loss": 1.4238, + "step": 9852 + }, + { + "epoch": 0.28930060485054904, + "grad_norm": 0.0, + "learning_rate": 1.66761688109445e-05, + "loss": 1.2725, + "step": 9853 + }, + { + "epoch": 0.28932996652768805, + "grad_norm": 0.0, + "learning_rate": 1.6675460786010495e-05, + "loss": 1.3936, + "step": 9854 + }, + { + "epoch": 0.28935932820482707, + "grad_norm": 0.0, + "learning_rate": 1.6674752700708592e-05, + "loss": 1.4336, + "step": 9855 + }, + { + "epoch": 0.28938868988196603, + "grad_norm": 0.0, + "learning_rate": 1.667404455504519e-05, + "loss": 1.4531, + "step": 9856 + }, + { + "epoch": 0.28941805155910505, + "grad_norm": 0.0, + "learning_rate": 1.6673336349026696e-05, + "loss": 1.3154, + "step": 9857 + }, + { + "epoch": 0.28944741323624407, + "grad_norm": 0.0, + "learning_rate": 1.6672628082659506e-05, + "loss": 1.4307, + "step": 9858 + }, + { + "epoch": 0.28947677491338303, + "grad_norm": 0.0, + "learning_rate": 1.6671919755950038e-05, + "loss": 1.3887, + "step": 9859 + }, + { + "epoch": 0.28950613659052205, + "grad_norm": 0.0, + "learning_rate": 1.6671211368904686e-05, + "loss": 1.3232, + "step": 9860 + }, + { + "epoch": 0.28953549826766106, + "grad_norm": 0.0, + "learning_rate": 1.6670502921529862e-05, + "loss": 1.3906, + "step": 9861 + }, + { + "epoch": 0.2895648599448, + "grad_norm": 0.0, + "learning_rate": 1.6669794413831974e-05, + "loss": 1.4346, + "step": 9862 + }, + { + "epoch": 0.28959422162193904, + "grad_norm": 0.0, + "learning_rate": 1.6669085845817426e-05, + "loss": 1.3975, + "step": 9863 + }, + { + "epoch": 0.28962358329907806, + "grad_norm": 0.0, + "learning_rate": 1.6668377217492628e-05, + "loss": 1.4229, + "step": 9864 + }, + { + "epoch": 0.289652944976217, + "grad_norm": 0.0, + "learning_rate": 1.6667668528863983e-05, + "loss": 1.3623, + "step": 9865 + }, + { + "epoch": 0.28968230665335604, + "grad_norm": 0.0, + "learning_rate": 1.6666959779937908e-05, + "loss": 1.4014, + "step": 9866 + }, + { + "epoch": 0.28971166833049505, + "grad_norm": 0.0, + "learning_rate": 1.6666250970720805e-05, + "loss": 1.4121, + "step": 9867 + }, + { + "epoch": 0.289741030007634, + "grad_norm": 0.0, + "learning_rate": 1.666554210121909e-05, + "loss": 1.335, + "step": 9868 + }, + { + "epoch": 0.28977039168477303, + "grad_norm": 0.0, + "learning_rate": 1.666483317143917e-05, + "loss": 1.2764, + "step": 9869 + }, + { + "epoch": 0.28979975336191205, + "grad_norm": 0.0, + "learning_rate": 1.6664124181387455e-05, + "loss": 1.4531, + "step": 9870 + }, + { + "epoch": 0.289829115039051, + "grad_norm": 0.0, + "learning_rate": 1.666341513107036e-05, + "loss": 1.4648, + "step": 9871 + }, + { + "epoch": 0.28985847671619, + "grad_norm": 0.0, + "learning_rate": 1.6662706020494296e-05, + "loss": 1.2168, + "step": 9872 + }, + { + "epoch": 0.28988783839332904, + "grad_norm": 0.0, + "learning_rate": 1.6661996849665675e-05, + "loss": 1.2764, + "step": 9873 + }, + { + "epoch": 0.289917200070468, + "grad_norm": 0.0, + "learning_rate": 1.6661287618590906e-05, + "loss": 1.2471, + "step": 9874 + }, + { + "epoch": 0.289946561747607, + "grad_norm": 0.0, + "learning_rate": 1.666057832727641e-05, + "loss": 1.3091, + "step": 9875 + }, + { + "epoch": 0.28997592342474604, + "grad_norm": 0.0, + "learning_rate": 1.6659868975728598e-05, + "loss": 1.4688, + "step": 9876 + }, + { + "epoch": 0.290005285101885, + "grad_norm": 0.0, + "learning_rate": 1.6659159563953892e-05, + "loss": 1.4697, + "step": 9877 + }, + { + "epoch": 0.290034646779024, + "grad_norm": 0.0, + "learning_rate": 1.665845009195869e-05, + "loss": 1.3906, + "step": 9878 + }, + { + "epoch": 0.29006400845616304, + "grad_norm": 0.0, + "learning_rate": 1.6657740559749425e-05, + "loss": 1.3945, + "step": 9879 + }, + { + "epoch": 0.290093370133302, + "grad_norm": 0.0, + "learning_rate": 1.6657030967332505e-05, + "loss": 1.4385, + "step": 9880 + }, + { + "epoch": 0.290122731810441, + "grad_norm": 0.0, + "learning_rate": 1.6656321314714352e-05, + "loss": 1.4619, + "step": 9881 + }, + { + "epoch": 0.29015209348758003, + "grad_norm": 0.0, + "learning_rate": 1.6655611601901375e-05, + "loss": 1.5107, + "step": 9882 + }, + { + "epoch": 0.290181455164719, + "grad_norm": 0.0, + "learning_rate": 1.6654901828900003e-05, + "loss": 1.4502, + "step": 9883 + }, + { + "epoch": 0.290210816841858, + "grad_norm": 0.0, + "learning_rate": 1.6654191995716648e-05, + "loss": 1.4121, + "step": 9884 + }, + { + "epoch": 0.290240178518997, + "grad_norm": 0.0, + "learning_rate": 1.6653482102357728e-05, + "loss": 1.375, + "step": 9885 + }, + { + "epoch": 0.290269540196136, + "grad_norm": 0.0, + "learning_rate": 1.6652772148829666e-05, + "loss": 1.4238, + "step": 9886 + }, + { + "epoch": 0.290298901873275, + "grad_norm": 0.0, + "learning_rate": 1.6652062135138884e-05, + "loss": 1.2705, + "step": 9887 + }, + { + "epoch": 0.290328263550414, + "grad_norm": 0.0, + "learning_rate": 1.6651352061291802e-05, + "loss": 1.3965, + "step": 9888 + }, + { + "epoch": 0.290357625227553, + "grad_norm": 0.0, + "learning_rate": 1.6650641927294837e-05, + "loss": 1.4033, + "step": 9889 + }, + { + "epoch": 0.290386986904692, + "grad_norm": 0.0, + "learning_rate": 1.6649931733154412e-05, + "loss": 1.4629, + "step": 9890 + }, + { + "epoch": 0.290416348581831, + "grad_norm": 0.0, + "learning_rate": 1.6649221478876952e-05, + "loss": 1.4229, + "step": 9891 + }, + { + "epoch": 0.29044571025897, + "grad_norm": 0.0, + "learning_rate": 1.6648511164468884e-05, + "loss": 1.4004, + "step": 9892 + }, + { + "epoch": 0.290475071936109, + "grad_norm": 0.0, + "learning_rate": 1.6647800789936625e-05, + "loss": 1.3789, + "step": 9893 + }, + { + "epoch": 0.290504433613248, + "grad_norm": 0.0, + "learning_rate": 1.6647090355286595e-05, + "loss": 1.4404, + "step": 9894 + }, + { + "epoch": 0.290533795290387, + "grad_norm": 0.0, + "learning_rate": 1.664637986052523e-05, + "loss": 1.3506, + "step": 9895 + }, + { + "epoch": 0.290563156967526, + "grad_norm": 0.0, + "learning_rate": 1.6645669305658948e-05, + "loss": 1.4551, + "step": 9896 + }, + { + "epoch": 0.290592518644665, + "grad_norm": 0.0, + "learning_rate": 1.6644958690694178e-05, + "loss": 1.4746, + "step": 9897 + }, + { + "epoch": 0.29062188032180397, + "grad_norm": 0.0, + "learning_rate": 1.6644248015637347e-05, + "loss": 1.4648, + "step": 9898 + }, + { + "epoch": 0.290651241998943, + "grad_norm": 0.0, + "learning_rate": 1.6643537280494874e-05, + "loss": 1.3955, + "step": 9899 + }, + { + "epoch": 0.29068060367608195, + "grad_norm": 0.0, + "learning_rate": 1.6642826485273196e-05, + "loss": 1.4141, + "step": 9900 + }, + { + "epoch": 0.29070996535322097, + "grad_norm": 0.0, + "learning_rate": 1.6642115629978734e-05, + "loss": 1.5195, + "step": 9901 + }, + { + "epoch": 0.29073932703036, + "grad_norm": 0.0, + "learning_rate": 1.664140471461792e-05, + "loss": 1.3145, + "step": 9902 + }, + { + "epoch": 0.29076868870749895, + "grad_norm": 0.0, + "learning_rate": 1.6640693739197185e-05, + "loss": 1.4961, + "step": 9903 + }, + { + "epoch": 0.29079805038463796, + "grad_norm": 0.0, + "learning_rate": 1.6639982703722952e-05, + "loss": 1.2363, + "step": 9904 + }, + { + "epoch": 0.290827412061777, + "grad_norm": 0.0, + "learning_rate": 1.6639271608201656e-05, + "loss": 1.4277, + "step": 9905 + }, + { + "epoch": 0.29085677373891594, + "grad_norm": 0.0, + "learning_rate": 1.6638560452639727e-05, + "loss": 1.3779, + "step": 9906 + }, + { + "epoch": 0.29088613541605496, + "grad_norm": 0.0, + "learning_rate": 1.6637849237043596e-05, + "loss": 1.4883, + "step": 9907 + }, + { + "epoch": 0.290915497093194, + "grad_norm": 0.0, + "learning_rate": 1.6637137961419697e-05, + "loss": 1.333, + "step": 9908 + }, + { + "epoch": 0.29094485877033294, + "grad_norm": 0.0, + "learning_rate": 1.6636426625774453e-05, + "loss": 1.4951, + "step": 9909 + }, + { + "epoch": 0.29097422044747195, + "grad_norm": 0.0, + "learning_rate": 1.663571523011431e-05, + "loss": 1.4229, + "step": 9910 + }, + { + "epoch": 0.29100358212461097, + "grad_norm": 0.0, + "learning_rate": 1.663500377444569e-05, + "loss": 1.4365, + "step": 9911 + }, + { + "epoch": 0.29103294380174993, + "grad_norm": 0.0, + "learning_rate": 1.6634292258775032e-05, + "loss": 1.4043, + "step": 9912 + }, + { + "epoch": 0.29106230547888895, + "grad_norm": 0.0, + "learning_rate": 1.6633580683108773e-05, + "loss": 1.3867, + "step": 9913 + }, + { + "epoch": 0.29109166715602797, + "grad_norm": 0.0, + "learning_rate": 1.6632869047453344e-05, + "loss": 1.4688, + "step": 9914 + }, + { + "epoch": 0.29112102883316693, + "grad_norm": 0.0, + "learning_rate": 1.663215735181518e-05, + "loss": 1.3066, + "step": 9915 + }, + { + "epoch": 0.29115039051030595, + "grad_norm": 0.0, + "learning_rate": 1.6631445596200723e-05, + "loss": 1.3467, + "step": 9916 + }, + { + "epoch": 0.29117975218744496, + "grad_norm": 0.0, + "learning_rate": 1.6630733780616403e-05, + "loss": 1.6396, + "step": 9917 + }, + { + "epoch": 0.2912091138645839, + "grad_norm": 0.0, + "learning_rate": 1.6630021905068656e-05, + "loss": 1.3467, + "step": 9918 + }, + { + "epoch": 0.29123847554172294, + "grad_norm": 0.0, + "learning_rate": 1.6629309969563925e-05, + "loss": 1.501, + "step": 9919 + }, + { + "epoch": 0.29126783721886196, + "grad_norm": 0.0, + "learning_rate": 1.6628597974108646e-05, + "loss": 1.3975, + "step": 9920 + }, + { + "epoch": 0.2912971988960009, + "grad_norm": 0.0, + "learning_rate": 1.6627885918709258e-05, + "loss": 1.4297, + "step": 9921 + }, + { + "epoch": 0.29132656057313994, + "grad_norm": 0.0, + "learning_rate": 1.66271738033722e-05, + "loss": 1.3398, + "step": 9922 + }, + { + "epoch": 0.29135592225027895, + "grad_norm": 0.0, + "learning_rate": 1.662646162810391e-05, + "loss": 1.416, + "step": 9923 + }, + { + "epoch": 0.2913852839274179, + "grad_norm": 0.0, + "learning_rate": 1.6625749392910835e-05, + "loss": 1.3877, + "step": 9924 + }, + { + "epoch": 0.29141464560455693, + "grad_norm": 0.0, + "learning_rate": 1.662503709779941e-05, + "loss": 1.417, + "step": 9925 + }, + { + "epoch": 0.29144400728169595, + "grad_norm": 0.0, + "learning_rate": 1.6624324742776075e-05, + "loss": 1.3848, + "step": 9926 + }, + { + "epoch": 0.2914733689588349, + "grad_norm": 0.0, + "learning_rate": 1.6623612327847278e-05, + "loss": 1.3711, + "step": 9927 + }, + { + "epoch": 0.29150273063597393, + "grad_norm": 0.0, + "learning_rate": 1.6622899853019458e-05, + "loss": 1.3506, + "step": 9928 + }, + { + "epoch": 0.29153209231311294, + "grad_norm": 0.0, + "learning_rate": 1.6622187318299053e-05, + "loss": 1.186, + "step": 9929 + }, + { + "epoch": 0.2915614539902519, + "grad_norm": 0.0, + "learning_rate": 1.6621474723692518e-05, + "loss": 1.2119, + "step": 9930 + }, + { + "epoch": 0.2915908156673909, + "grad_norm": 0.0, + "learning_rate": 1.662076206920629e-05, + "loss": 1.3838, + "step": 9931 + }, + { + "epoch": 0.29162017734452994, + "grad_norm": 0.0, + "learning_rate": 1.6620049354846815e-05, + "loss": 1.2568, + "step": 9932 + }, + { + "epoch": 0.2916495390216689, + "grad_norm": 0.0, + "learning_rate": 1.6619336580620537e-05, + "loss": 1.3486, + "step": 9933 + }, + { + "epoch": 0.2916789006988079, + "grad_norm": 0.0, + "learning_rate": 1.6618623746533908e-05, + "loss": 1.3418, + "step": 9934 + }, + { + "epoch": 0.29170826237594694, + "grad_norm": 0.0, + "learning_rate": 1.6617910852593366e-05, + "loss": 1.3936, + "step": 9935 + }, + { + "epoch": 0.2917376240530859, + "grad_norm": 0.0, + "learning_rate": 1.661719789880536e-05, + "loss": 1.3594, + "step": 9936 + }, + { + "epoch": 0.2917669857302249, + "grad_norm": 0.0, + "learning_rate": 1.661648488517634e-05, + "loss": 1.6016, + "step": 9937 + }, + { + "epoch": 0.29179634740736393, + "grad_norm": 0.0, + "learning_rate": 1.6615771811712753e-05, + "loss": 1.4131, + "step": 9938 + }, + { + "epoch": 0.2918257090845029, + "grad_norm": 0.0, + "learning_rate": 1.6615058678421045e-05, + "loss": 1.3477, + "step": 9939 + }, + { + "epoch": 0.2918550707616419, + "grad_norm": 0.0, + "learning_rate": 1.661434548530767e-05, + "loss": 1.4844, + "step": 9940 + }, + { + "epoch": 0.2918844324387809, + "grad_norm": 0.0, + "learning_rate": 1.661363223237907e-05, + "loss": 1.3203, + "step": 9941 + }, + { + "epoch": 0.2919137941159199, + "grad_norm": 0.0, + "learning_rate": 1.6612918919641707e-05, + "loss": 1.4541, + "step": 9942 + }, + { + "epoch": 0.2919431557930589, + "grad_norm": 0.0, + "learning_rate": 1.661220554710202e-05, + "loss": 1.4365, + "step": 9943 + }, + { + "epoch": 0.2919725174701979, + "grad_norm": 0.0, + "learning_rate": 1.6611492114766466e-05, + "loss": 1.4463, + "step": 9944 + }, + { + "epoch": 0.2920018791473369, + "grad_norm": 0.0, + "learning_rate": 1.6610778622641495e-05, + "loss": 1.3613, + "step": 9945 + }, + { + "epoch": 0.2920312408244759, + "grad_norm": 0.0, + "learning_rate": 1.6610065070733563e-05, + "loss": 1.3896, + "step": 9946 + }, + { + "epoch": 0.2920606025016149, + "grad_norm": 0.0, + "learning_rate": 1.6609351459049116e-05, + "loss": 1.4482, + "step": 9947 + }, + { + "epoch": 0.2920899641787539, + "grad_norm": 0.0, + "learning_rate": 1.6608637787594613e-05, + "loss": 1.5449, + "step": 9948 + }, + { + "epoch": 0.2921193258558929, + "grad_norm": 0.0, + "learning_rate": 1.6607924056376507e-05, + "loss": 1.3457, + "step": 9949 + }, + { + "epoch": 0.2921486875330319, + "grad_norm": 0.0, + "learning_rate": 1.6607210265401247e-05, + "loss": 1.4062, + "step": 9950 + }, + { + "epoch": 0.2921780492101709, + "grad_norm": 0.0, + "learning_rate": 1.6606496414675296e-05, + "loss": 1.4502, + "step": 9951 + }, + { + "epoch": 0.2922074108873099, + "grad_norm": 0.0, + "learning_rate": 1.6605782504205103e-05, + "loss": 1.2969, + "step": 9952 + }, + { + "epoch": 0.29223677256444885, + "grad_norm": 0.0, + "learning_rate": 1.660506853399713e-05, + "loss": 1.4512, + "step": 9953 + }, + { + "epoch": 0.29226613424158787, + "grad_norm": 0.0, + "learning_rate": 1.6604354504057832e-05, + "loss": 1.5195, + "step": 9954 + }, + { + "epoch": 0.2922954959187269, + "grad_norm": 0.0, + "learning_rate": 1.660364041439366e-05, + "loss": 1.4258, + "step": 9955 + }, + { + "epoch": 0.29232485759586585, + "grad_norm": 0.0, + "learning_rate": 1.660292626501108e-05, + "loss": 1.3174, + "step": 9956 + }, + { + "epoch": 0.29235421927300487, + "grad_norm": 0.0, + "learning_rate": 1.6602212055916546e-05, + "loss": 1.4805, + "step": 9957 + }, + { + "epoch": 0.2923835809501439, + "grad_norm": 0.0, + "learning_rate": 1.660149778711652e-05, + "loss": 1.373, + "step": 9958 + }, + { + "epoch": 0.29241294262728285, + "grad_norm": 0.0, + "learning_rate": 1.6600783458617457e-05, + "loss": 1.4951, + "step": 9959 + }, + { + "epoch": 0.29244230430442186, + "grad_norm": 0.0, + "learning_rate": 1.660006907042581e-05, + "loss": 1.293, + "step": 9960 + }, + { + "epoch": 0.2924716659815609, + "grad_norm": 0.0, + "learning_rate": 1.659935462254806e-05, + "loss": 1.4121, + "step": 9961 + }, + { + "epoch": 0.29250102765869984, + "grad_norm": 0.0, + "learning_rate": 1.6598640114990652e-05, + "loss": 1.3779, + "step": 9962 + }, + { + "epoch": 0.29253038933583886, + "grad_norm": 0.0, + "learning_rate": 1.6597925547760053e-05, + "loss": 1.2393, + "step": 9963 + }, + { + "epoch": 0.2925597510129779, + "grad_norm": 0.0, + "learning_rate": 1.6597210920862722e-05, + "loss": 1.3486, + "step": 9964 + }, + { + "epoch": 0.29258911269011684, + "grad_norm": 0.0, + "learning_rate": 1.659649623430512e-05, + "loss": 1.4482, + "step": 9965 + }, + { + "epoch": 0.29261847436725585, + "grad_norm": 0.0, + "learning_rate": 1.659578148809372e-05, + "loss": 1.415, + "step": 9966 + }, + { + "epoch": 0.29264783604439487, + "grad_norm": 0.0, + "learning_rate": 1.6595066682234976e-05, + "loss": 1.373, + "step": 9967 + }, + { + "epoch": 0.29267719772153383, + "grad_norm": 0.0, + "learning_rate": 1.6594351816735352e-05, + "loss": 1.3604, + "step": 9968 + }, + { + "epoch": 0.29270655939867285, + "grad_norm": 0.0, + "learning_rate": 1.6593636891601315e-05, + "loss": 1.3408, + "step": 9969 + }, + { + "epoch": 0.29273592107581187, + "grad_norm": 0.0, + "learning_rate": 1.6592921906839337e-05, + "loss": 1.5234, + "step": 9970 + }, + { + "epoch": 0.29276528275295083, + "grad_norm": 0.0, + "learning_rate": 1.6592206862455876e-05, + "loss": 1.5146, + "step": 9971 + }, + { + "epoch": 0.29279464443008985, + "grad_norm": 0.0, + "learning_rate": 1.6591491758457398e-05, + "loss": 1.334, + "step": 9972 + }, + { + "epoch": 0.29282400610722886, + "grad_norm": 0.0, + "learning_rate": 1.659077659485037e-05, + "loss": 1.4355, + "step": 9973 + }, + { + "epoch": 0.2928533677843678, + "grad_norm": 0.0, + "learning_rate": 1.6590061371641264e-05, + "loss": 1.3926, + "step": 9974 + }, + { + "epoch": 0.29288272946150684, + "grad_norm": 0.0, + "learning_rate": 1.6589346088836548e-05, + "loss": 1.5205, + "step": 9975 + }, + { + "epoch": 0.29291209113864586, + "grad_norm": 0.0, + "learning_rate": 1.6588630746442683e-05, + "loss": 1.3916, + "step": 9976 + }, + { + "epoch": 0.2929414528157848, + "grad_norm": 0.0, + "learning_rate": 1.6587915344466147e-05, + "loss": 1.2842, + "step": 9977 + }, + { + "epoch": 0.29297081449292384, + "grad_norm": 0.0, + "learning_rate": 1.6587199882913402e-05, + "loss": 1.3164, + "step": 9978 + }, + { + "epoch": 0.29300017617006285, + "grad_norm": 0.0, + "learning_rate": 1.658648436179092e-05, + "loss": 1.3467, + "step": 9979 + }, + { + "epoch": 0.2930295378472018, + "grad_norm": 0.0, + "learning_rate": 1.6585768781105177e-05, + "loss": 1.4229, + "step": 9980 + }, + { + "epoch": 0.29305889952434083, + "grad_norm": 0.0, + "learning_rate": 1.6585053140862636e-05, + "loss": 1.3647, + "step": 9981 + }, + { + "epoch": 0.29308826120147985, + "grad_norm": 0.0, + "learning_rate": 1.6584337441069776e-05, + "loss": 1.4023, + "step": 9982 + }, + { + "epoch": 0.2931176228786188, + "grad_norm": 0.0, + "learning_rate": 1.6583621681733067e-05, + "loss": 1.3154, + "step": 9983 + }, + { + "epoch": 0.29314698455575783, + "grad_norm": 0.0, + "learning_rate": 1.658290586285898e-05, + "loss": 1.4229, + "step": 9984 + }, + { + "epoch": 0.29317634623289685, + "grad_norm": 0.0, + "learning_rate": 1.6582189984453992e-05, + "loss": 1.417, + "step": 9985 + }, + { + "epoch": 0.2932057079100358, + "grad_norm": 0.0, + "learning_rate": 1.658147404652457e-05, + "loss": 1.2778, + "step": 9986 + }, + { + "epoch": 0.2932350695871748, + "grad_norm": 0.0, + "learning_rate": 1.6580758049077195e-05, + "loss": 1.3154, + "step": 9987 + }, + { + "epoch": 0.29326443126431384, + "grad_norm": 0.0, + "learning_rate": 1.658004199211834e-05, + "loss": 1.2017, + "step": 9988 + }, + { + "epoch": 0.2932937929414528, + "grad_norm": 0.0, + "learning_rate": 1.6579325875654476e-05, + "loss": 1.4531, + "step": 9989 + }, + { + "epoch": 0.2933231546185918, + "grad_norm": 0.0, + "learning_rate": 1.6578609699692087e-05, + "loss": 1.3691, + "step": 9990 + }, + { + "epoch": 0.29335251629573084, + "grad_norm": 0.0, + "learning_rate": 1.6577893464237644e-05, + "loss": 1.2324, + "step": 9991 + }, + { + "epoch": 0.2933818779728698, + "grad_norm": 0.0, + "learning_rate": 1.6577177169297625e-05, + "loss": 1.3379, + "step": 9992 + }, + { + "epoch": 0.2934112396500088, + "grad_norm": 0.0, + "learning_rate": 1.6576460814878513e-05, + "loss": 1.4258, + "step": 9993 + }, + { + "epoch": 0.29344060132714783, + "grad_norm": 0.0, + "learning_rate": 1.6575744400986778e-05, + "loss": 1.5098, + "step": 9994 + }, + { + "epoch": 0.2934699630042868, + "grad_norm": 0.0, + "learning_rate": 1.65750279276289e-05, + "loss": 1.4092, + "step": 9995 + }, + { + "epoch": 0.2934993246814258, + "grad_norm": 0.0, + "learning_rate": 1.6574311394811366e-05, + "loss": 1.5127, + "step": 9996 + }, + { + "epoch": 0.2935286863585648, + "grad_norm": 0.0, + "learning_rate": 1.6573594802540648e-05, + "loss": 1.4932, + "step": 9997 + }, + { + "epoch": 0.2935580480357038, + "grad_norm": 0.0, + "learning_rate": 1.6572878150823227e-05, + "loss": 1.4502, + "step": 9998 + }, + { + "epoch": 0.2935874097128428, + "grad_norm": 0.0, + "learning_rate": 1.6572161439665587e-05, + "loss": 1.3564, + "step": 9999 + }, + { + "epoch": 0.2936167713899818, + "grad_norm": 0.0, + "learning_rate": 1.657144466907421e-05, + "loss": 1.3223, + "step": 10000 + }, + { + "epoch": 0.2936461330671208, + "grad_norm": 0.0, + "learning_rate": 1.6570727839055575e-05, + "loss": 1.2852, + "step": 10001 + }, + { + "epoch": 0.2936754947442598, + "grad_norm": 0.0, + "learning_rate": 1.6570010949616162e-05, + "loss": 1.4092, + "step": 10002 + }, + { + "epoch": 0.29370485642139876, + "grad_norm": 0.0, + "learning_rate": 1.656929400076246e-05, + "loss": 1.4248, + "step": 10003 + }, + { + "epoch": 0.2937342180985378, + "grad_norm": 0.0, + "learning_rate": 1.656857699250095e-05, + "loss": 1.4893, + "step": 10004 + }, + { + "epoch": 0.2937635797756768, + "grad_norm": 0.0, + "learning_rate": 1.6567859924838113e-05, + "loss": 1.3545, + "step": 10005 + }, + { + "epoch": 0.29379294145281576, + "grad_norm": 0.0, + "learning_rate": 1.6567142797780442e-05, + "loss": 1.4463, + "step": 10006 + }, + { + "epoch": 0.2938223031299548, + "grad_norm": 0.0, + "learning_rate": 1.6566425611334415e-05, + "loss": 1.373, + "step": 10007 + }, + { + "epoch": 0.2938516648070938, + "grad_norm": 0.0, + "learning_rate": 1.656570836550652e-05, + "loss": 1.3301, + "step": 10008 + }, + { + "epoch": 0.29388102648423275, + "grad_norm": 0.0, + "learning_rate": 1.656499106030324e-05, + "loss": 1.5654, + "step": 10009 + }, + { + "epoch": 0.29391038816137177, + "grad_norm": 0.0, + "learning_rate": 1.6564273695731063e-05, + "loss": 1.4639, + "step": 10010 + }, + { + "epoch": 0.2939397498385108, + "grad_norm": 0.0, + "learning_rate": 1.656355627179648e-05, + "loss": 1.4355, + "step": 10011 + }, + { + "epoch": 0.29396911151564975, + "grad_norm": 0.0, + "learning_rate": 1.656283878850598e-05, + "loss": 1.3652, + "step": 10012 + }, + { + "epoch": 0.29399847319278877, + "grad_norm": 0.0, + "learning_rate": 1.6562121245866044e-05, + "loss": 1.3789, + "step": 10013 + }, + { + "epoch": 0.2940278348699278, + "grad_norm": 0.0, + "learning_rate": 1.6561403643883166e-05, + "loss": 1.3691, + "step": 10014 + }, + { + "epoch": 0.29405719654706675, + "grad_norm": 0.0, + "learning_rate": 1.6560685982563834e-05, + "loss": 1.2637, + "step": 10015 + }, + { + "epoch": 0.29408655822420576, + "grad_norm": 0.0, + "learning_rate": 1.655996826191454e-05, + "loss": 1.3477, + "step": 10016 + }, + { + "epoch": 0.2941159199013448, + "grad_norm": 0.0, + "learning_rate": 1.6559250481941773e-05, + "loss": 1.4785, + "step": 10017 + }, + { + "epoch": 0.29414528157848374, + "grad_norm": 0.0, + "learning_rate": 1.6558532642652023e-05, + "loss": 1.4824, + "step": 10018 + }, + { + "epoch": 0.29417464325562276, + "grad_norm": 0.0, + "learning_rate": 1.655781474405178e-05, + "loss": 1.4668, + "step": 10019 + }, + { + "epoch": 0.2942040049327618, + "grad_norm": 0.0, + "learning_rate": 1.6557096786147545e-05, + "loss": 1.4102, + "step": 10020 + }, + { + "epoch": 0.29423336660990074, + "grad_norm": 0.0, + "learning_rate": 1.65563787689458e-05, + "loss": 1.3145, + "step": 10021 + }, + { + "epoch": 0.29426272828703975, + "grad_norm": 0.0, + "learning_rate": 1.6555660692453045e-05, + "loss": 1.3691, + "step": 10022 + }, + { + "epoch": 0.29429208996417877, + "grad_norm": 0.0, + "learning_rate": 1.655494255667577e-05, + "loss": 1.3662, + "step": 10023 + }, + { + "epoch": 0.29432145164131773, + "grad_norm": 0.0, + "learning_rate": 1.655422436162047e-05, + "loss": 1.3613, + "step": 10024 + }, + { + "epoch": 0.29435081331845675, + "grad_norm": 0.0, + "learning_rate": 1.6553506107293643e-05, + "loss": 1.3711, + "step": 10025 + }, + { + "epoch": 0.29438017499559577, + "grad_norm": 0.0, + "learning_rate": 1.655278779370178e-05, + "loss": 1.2979, + "step": 10026 + }, + { + "epoch": 0.29440953667273473, + "grad_norm": 0.0, + "learning_rate": 1.6552069420851378e-05, + "loss": 1.4111, + "step": 10027 + }, + { + "epoch": 0.29443889834987375, + "grad_norm": 0.0, + "learning_rate": 1.6551350988748937e-05, + "loss": 1.3135, + "step": 10028 + }, + { + "epoch": 0.29446826002701276, + "grad_norm": 0.0, + "learning_rate": 1.655063249740095e-05, + "loss": 1.376, + "step": 10029 + }, + { + "epoch": 0.2944976217041517, + "grad_norm": 0.0, + "learning_rate": 1.6549913946813917e-05, + "loss": 1.3047, + "step": 10030 + }, + { + "epoch": 0.29452698338129074, + "grad_norm": 0.0, + "learning_rate": 1.654919533699433e-05, + "loss": 1.5371, + "step": 10031 + }, + { + "epoch": 0.29455634505842976, + "grad_norm": 0.0, + "learning_rate": 1.6548476667948695e-05, + "loss": 1.4902, + "step": 10032 + }, + { + "epoch": 0.2945857067355687, + "grad_norm": 0.0, + "learning_rate": 1.654775793968351e-05, + "loss": 1.4824, + "step": 10033 + }, + { + "epoch": 0.29461506841270774, + "grad_norm": 0.0, + "learning_rate": 1.654703915220527e-05, + "loss": 1.4922, + "step": 10034 + }, + { + "epoch": 0.29464443008984675, + "grad_norm": 0.0, + "learning_rate": 1.654632030552048e-05, + "loss": 1.5205, + "step": 10035 + }, + { + "epoch": 0.2946737917669857, + "grad_norm": 0.0, + "learning_rate": 1.654560139963564e-05, + "loss": 1.4619, + "step": 10036 + }, + { + "epoch": 0.29470315344412473, + "grad_norm": 0.0, + "learning_rate": 1.654488243455725e-05, + "loss": 1.3623, + "step": 10037 + }, + { + "epoch": 0.29473251512126375, + "grad_norm": 0.0, + "learning_rate": 1.6544163410291808e-05, + "loss": 1.2764, + "step": 10038 + }, + { + "epoch": 0.2947618767984027, + "grad_norm": 0.0, + "learning_rate": 1.6543444326845825e-05, + "loss": 1.4951, + "step": 10039 + }, + { + "epoch": 0.29479123847554173, + "grad_norm": 0.0, + "learning_rate": 1.65427251842258e-05, + "loss": 1.3652, + "step": 10040 + }, + { + "epoch": 0.29482060015268075, + "grad_norm": 0.0, + "learning_rate": 1.654200598243823e-05, + "loss": 1.4316, + "step": 10041 + }, + { + "epoch": 0.2948499618298197, + "grad_norm": 0.0, + "learning_rate": 1.654128672148963e-05, + "loss": 1.4873, + "step": 10042 + }, + { + "epoch": 0.2948793235069587, + "grad_norm": 0.0, + "learning_rate": 1.6540567401386493e-05, + "loss": 1.3945, + "step": 10043 + }, + { + "epoch": 0.29490868518409774, + "grad_norm": 0.0, + "learning_rate": 1.6539848022135334e-05, + "loss": 1.3428, + "step": 10044 + }, + { + "epoch": 0.2949380468612367, + "grad_norm": 0.0, + "learning_rate": 1.6539128583742658e-05, + "loss": 1.3877, + "step": 10045 + }, + { + "epoch": 0.2949674085383757, + "grad_norm": 0.0, + "learning_rate": 1.6538409086214963e-05, + "loss": 1.3721, + "step": 10046 + }, + { + "epoch": 0.29499677021551474, + "grad_norm": 0.0, + "learning_rate": 1.6537689529558757e-05, + "loss": 1.4414, + "step": 10047 + }, + { + "epoch": 0.2950261318926537, + "grad_norm": 0.0, + "learning_rate": 1.6536969913780553e-05, + "loss": 1.375, + "step": 10048 + }, + { + "epoch": 0.2950554935697927, + "grad_norm": 0.0, + "learning_rate": 1.653625023888686e-05, + "loss": 1.248, + "step": 10049 + }, + { + "epoch": 0.29508485524693173, + "grad_norm": 0.0, + "learning_rate": 1.6535530504884176e-05, + "loss": 1.4736, + "step": 10050 + }, + { + "epoch": 0.2951142169240707, + "grad_norm": 0.0, + "learning_rate": 1.6534810711779018e-05, + "loss": 1.4756, + "step": 10051 + }, + { + "epoch": 0.2951435786012097, + "grad_norm": 0.0, + "learning_rate": 1.6534090859577896e-05, + "loss": 1.4277, + "step": 10052 + }, + { + "epoch": 0.29517294027834867, + "grad_norm": 0.0, + "learning_rate": 1.6533370948287314e-05, + "loss": 1.4932, + "step": 10053 + }, + { + "epoch": 0.2952023019554877, + "grad_norm": 0.0, + "learning_rate": 1.6532650977913786e-05, + "loss": 1.4678, + "step": 10054 + }, + { + "epoch": 0.2952316636326267, + "grad_norm": 0.0, + "learning_rate": 1.6531930948463823e-05, + "loss": 1.313, + "step": 10055 + }, + { + "epoch": 0.29526102530976567, + "grad_norm": 0.0, + "learning_rate": 1.6531210859943934e-05, + "loss": 1.3506, + "step": 10056 + }, + { + "epoch": 0.2952903869869047, + "grad_norm": 0.0, + "learning_rate": 1.6530490712360634e-05, + "loss": 1.3447, + "step": 10057 + }, + { + "epoch": 0.2953197486640437, + "grad_norm": 0.0, + "learning_rate": 1.6529770505720432e-05, + "loss": 1.3633, + "step": 10058 + }, + { + "epoch": 0.29534911034118266, + "grad_norm": 0.0, + "learning_rate": 1.6529050240029846e-05, + "loss": 1.418, + "step": 10059 + }, + { + "epoch": 0.2953784720183217, + "grad_norm": 0.0, + "learning_rate": 1.652832991529538e-05, + "loss": 1.4189, + "step": 10060 + }, + { + "epoch": 0.2954078336954607, + "grad_norm": 0.0, + "learning_rate": 1.6527609531523565e-05, + "loss": 1.3164, + "step": 10061 + }, + { + "epoch": 0.29543719537259966, + "grad_norm": 0.0, + "learning_rate": 1.6526889088720897e-05, + "loss": 1.3672, + "step": 10062 + }, + { + "epoch": 0.2954665570497387, + "grad_norm": 0.0, + "learning_rate": 1.6526168586893903e-05, + "loss": 1.2969, + "step": 10063 + }, + { + "epoch": 0.2954959187268777, + "grad_norm": 0.0, + "learning_rate": 1.6525448026049096e-05, + "loss": 1.5166, + "step": 10064 + }, + { + "epoch": 0.29552528040401665, + "grad_norm": 0.0, + "learning_rate": 1.6524727406192988e-05, + "loss": 1.3154, + "step": 10065 + }, + { + "epoch": 0.29555464208115567, + "grad_norm": 0.0, + "learning_rate": 1.6524006727332104e-05, + "loss": 1.2236, + "step": 10066 + }, + { + "epoch": 0.2955840037582947, + "grad_norm": 0.0, + "learning_rate": 1.6523285989472952e-05, + "loss": 1.5156, + "step": 10067 + }, + { + "epoch": 0.29561336543543365, + "grad_norm": 0.0, + "learning_rate": 1.6522565192622052e-05, + "loss": 1.5303, + "step": 10068 + }, + { + "epoch": 0.29564272711257267, + "grad_norm": 0.0, + "learning_rate": 1.652184433678593e-05, + "loss": 1.3389, + "step": 10069 + }, + { + "epoch": 0.2956720887897117, + "grad_norm": 0.0, + "learning_rate": 1.6521123421971096e-05, + "loss": 1.4014, + "step": 10070 + }, + { + "epoch": 0.29570145046685065, + "grad_norm": 0.0, + "learning_rate": 1.6520402448184073e-05, + "loss": 1.4248, + "step": 10071 + }, + { + "epoch": 0.29573081214398966, + "grad_norm": 0.0, + "learning_rate": 1.6519681415431383e-05, + "loss": 1.3291, + "step": 10072 + }, + { + "epoch": 0.2957601738211287, + "grad_norm": 0.0, + "learning_rate": 1.6518960323719543e-05, + "loss": 1.3037, + "step": 10073 + }, + { + "epoch": 0.29578953549826764, + "grad_norm": 0.0, + "learning_rate": 1.6518239173055076e-05, + "loss": 1.3564, + "step": 10074 + }, + { + "epoch": 0.29581889717540666, + "grad_norm": 0.0, + "learning_rate": 1.65175179634445e-05, + "loss": 1.4424, + "step": 10075 + }, + { + "epoch": 0.2958482588525457, + "grad_norm": 0.0, + "learning_rate": 1.6516796694894344e-05, + "loss": 1.3916, + "step": 10076 + }, + { + "epoch": 0.29587762052968464, + "grad_norm": 0.0, + "learning_rate": 1.6516075367411123e-05, + "loss": 1.4175, + "step": 10077 + }, + { + "epoch": 0.29590698220682365, + "grad_norm": 0.0, + "learning_rate": 1.6515353981001365e-05, + "loss": 1.5635, + "step": 10078 + }, + { + "epoch": 0.29593634388396267, + "grad_norm": 0.0, + "learning_rate": 1.651463253567159e-05, + "loss": 1.3857, + "step": 10079 + }, + { + "epoch": 0.29596570556110163, + "grad_norm": 0.0, + "learning_rate": 1.651391103142833e-05, + "loss": 1.4619, + "step": 10080 + }, + { + "epoch": 0.29599506723824065, + "grad_norm": 0.0, + "learning_rate": 1.65131894682781e-05, + "loss": 1.4678, + "step": 10081 + }, + { + "epoch": 0.29602442891537967, + "grad_norm": 0.0, + "learning_rate": 1.651246784622743e-05, + "loss": 1.4033, + "step": 10082 + }, + { + "epoch": 0.29605379059251863, + "grad_norm": 0.0, + "learning_rate": 1.6511746165282846e-05, + "loss": 1.4961, + "step": 10083 + }, + { + "epoch": 0.29608315226965765, + "grad_norm": 0.0, + "learning_rate": 1.6511024425450877e-05, + "loss": 1.4346, + "step": 10084 + }, + { + "epoch": 0.29611251394679666, + "grad_norm": 0.0, + "learning_rate": 1.651030262673804e-05, + "loss": 1.3809, + "step": 10085 + }, + { + "epoch": 0.2961418756239356, + "grad_norm": 0.0, + "learning_rate": 1.6509580769150874e-05, + "loss": 1.4033, + "step": 10086 + }, + { + "epoch": 0.29617123730107464, + "grad_norm": 0.0, + "learning_rate": 1.65088588526959e-05, + "loss": 1.4639, + "step": 10087 + }, + { + "epoch": 0.29620059897821366, + "grad_norm": 0.0, + "learning_rate": 1.650813687737965e-05, + "loss": 1.4424, + "step": 10088 + }, + { + "epoch": 0.2962299606553526, + "grad_norm": 0.0, + "learning_rate": 1.650741484320865e-05, + "loss": 1.5527, + "step": 10089 + }, + { + "epoch": 0.29625932233249164, + "grad_norm": 0.0, + "learning_rate": 1.650669275018943e-05, + "loss": 1.2559, + "step": 10090 + }, + { + "epoch": 0.29628868400963065, + "grad_norm": 0.0, + "learning_rate": 1.6505970598328525e-05, + "loss": 1.3115, + "step": 10091 + }, + { + "epoch": 0.2963180456867696, + "grad_norm": 0.0, + "learning_rate": 1.650524838763246e-05, + "loss": 1.2734, + "step": 10092 + }, + { + "epoch": 0.29634740736390863, + "grad_norm": 0.0, + "learning_rate": 1.6504526118107766e-05, + "loss": 1.3311, + "step": 10093 + }, + { + "epoch": 0.29637676904104765, + "grad_norm": 0.0, + "learning_rate": 1.6503803789760978e-05, + "loss": 1.3516, + "step": 10094 + }, + { + "epoch": 0.2964061307181866, + "grad_norm": 0.0, + "learning_rate": 1.6503081402598625e-05, + "loss": 1.3506, + "step": 10095 + }, + { + "epoch": 0.29643549239532563, + "grad_norm": 0.0, + "learning_rate": 1.6502358956627245e-05, + "loss": 1.4658, + "step": 10096 + }, + { + "epoch": 0.29646485407246465, + "grad_norm": 0.0, + "learning_rate": 1.6501636451853364e-05, + "loss": 1.3174, + "step": 10097 + }, + { + "epoch": 0.2964942157496036, + "grad_norm": 0.0, + "learning_rate": 1.650091388828352e-05, + "loss": 1.3691, + "step": 10098 + }, + { + "epoch": 0.2965235774267426, + "grad_norm": 0.0, + "learning_rate": 1.650019126592425e-05, + "loss": 1.3345, + "step": 10099 + }, + { + "epoch": 0.29655293910388164, + "grad_norm": 0.0, + "learning_rate": 1.649946858478208e-05, + "loss": 1.3096, + "step": 10100 + }, + { + "epoch": 0.2965823007810206, + "grad_norm": 0.0, + "learning_rate": 1.6498745844863555e-05, + "loss": 1.2842, + "step": 10101 + }, + { + "epoch": 0.2966116624581596, + "grad_norm": 0.0, + "learning_rate": 1.6498023046175208e-05, + "loss": 1.4668, + "step": 10102 + }, + { + "epoch": 0.2966410241352986, + "grad_norm": 0.0, + "learning_rate": 1.6497300188723574e-05, + "loss": 1.3633, + "step": 10103 + }, + { + "epoch": 0.2966703858124376, + "grad_norm": 0.0, + "learning_rate": 1.649657727251519e-05, + "loss": 1.4971, + "step": 10104 + }, + { + "epoch": 0.2966997474895766, + "grad_norm": 0.0, + "learning_rate": 1.649585429755659e-05, + "loss": 1.4189, + "step": 10105 + }, + { + "epoch": 0.2967291091667156, + "grad_norm": 0.0, + "learning_rate": 1.649513126385432e-05, + "loss": 1.3574, + "step": 10106 + }, + { + "epoch": 0.2967584708438546, + "grad_norm": 0.0, + "learning_rate": 1.6494408171414912e-05, + "loss": 1.4199, + "step": 10107 + }, + { + "epoch": 0.2967878325209936, + "grad_norm": 0.0, + "learning_rate": 1.649368502024491e-05, + "loss": 1.4531, + "step": 10108 + }, + { + "epoch": 0.2968171941981326, + "grad_norm": 0.0, + "learning_rate": 1.649296181035085e-05, + "loss": 1.4258, + "step": 10109 + }, + { + "epoch": 0.2968465558752716, + "grad_norm": 0.0, + "learning_rate": 1.6492238541739273e-05, + "loss": 1.3369, + "step": 10110 + }, + { + "epoch": 0.2968759175524106, + "grad_norm": 0.0, + "learning_rate": 1.649151521441672e-05, + "loss": 1.3936, + "step": 10111 + }, + { + "epoch": 0.29690527922954957, + "grad_norm": 0.0, + "learning_rate": 1.6490791828389735e-05, + "loss": 1.415, + "step": 10112 + }, + { + "epoch": 0.2969346409066886, + "grad_norm": 0.0, + "learning_rate": 1.6490068383664857e-05, + "loss": 1.4131, + "step": 10113 + }, + { + "epoch": 0.2969640025838276, + "grad_norm": 0.0, + "learning_rate": 1.6489344880248628e-05, + "loss": 1.3838, + "step": 10114 + }, + { + "epoch": 0.29699336426096656, + "grad_norm": 0.0, + "learning_rate": 1.6488621318147592e-05, + "loss": 1.4092, + "step": 10115 + }, + { + "epoch": 0.2970227259381056, + "grad_norm": 0.0, + "learning_rate": 1.648789769736829e-05, + "loss": 1.416, + "step": 10116 + }, + { + "epoch": 0.2970520876152446, + "grad_norm": 0.0, + "learning_rate": 1.6487174017917267e-05, + "loss": 1.4805, + "step": 10117 + }, + { + "epoch": 0.29708144929238356, + "grad_norm": 0.0, + "learning_rate": 1.6486450279801066e-05, + "loss": 1.3867, + "step": 10118 + }, + { + "epoch": 0.2971108109695226, + "grad_norm": 0.0, + "learning_rate": 1.648572648302624e-05, + "loss": 1.4336, + "step": 10119 + }, + { + "epoch": 0.2971401726466616, + "grad_norm": 0.0, + "learning_rate": 1.6485002627599326e-05, + "loss": 1.3682, + "step": 10120 + }, + { + "epoch": 0.29716953432380055, + "grad_norm": 0.0, + "learning_rate": 1.6484278713526867e-05, + "loss": 1.3662, + "step": 10121 + }, + { + "epoch": 0.29719889600093957, + "grad_norm": 0.0, + "learning_rate": 1.6483554740815422e-05, + "loss": 1.4053, + "step": 10122 + }, + { + "epoch": 0.2972282576780786, + "grad_norm": 0.0, + "learning_rate": 1.648283070947153e-05, + "loss": 1.3701, + "step": 10123 + }, + { + "epoch": 0.29725761935521755, + "grad_norm": 0.0, + "learning_rate": 1.6482106619501736e-05, + "loss": 1.4502, + "step": 10124 + }, + { + "epoch": 0.29728698103235657, + "grad_norm": 0.0, + "learning_rate": 1.6481382470912594e-05, + "loss": 1.3525, + "step": 10125 + }, + { + "epoch": 0.2973163427094956, + "grad_norm": 0.0, + "learning_rate": 1.6480658263710648e-05, + "loss": 1.4014, + "step": 10126 + }, + { + "epoch": 0.29734570438663455, + "grad_norm": 0.0, + "learning_rate": 1.6479933997902453e-05, + "loss": 1.3398, + "step": 10127 + }, + { + "epoch": 0.29737506606377356, + "grad_norm": 0.0, + "learning_rate": 1.6479209673494552e-05, + "loss": 1.4258, + "step": 10128 + }, + { + "epoch": 0.2974044277409126, + "grad_norm": 0.0, + "learning_rate": 1.64784852904935e-05, + "loss": 1.4326, + "step": 10129 + }, + { + "epoch": 0.29743378941805154, + "grad_norm": 0.0, + "learning_rate": 1.647776084890585e-05, + "loss": 1.3252, + "step": 10130 + }, + { + "epoch": 0.29746315109519056, + "grad_norm": 0.0, + "learning_rate": 1.6477036348738145e-05, + "loss": 1.3594, + "step": 10131 + }, + { + "epoch": 0.2974925127723296, + "grad_norm": 0.0, + "learning_rate": 1.6476311789996944e-05, + "loss": 1.292, + "step": 10132 + }, + { + "epoch": 0.29752187444946854, + "grad_norm": 0.0, + "learning_rate": 1.6475587172688798e-05, + "loss": 1.5088, + "step": 10133 + }, + { + "epoch": 0.29755123612660755, + "grad_norm": 0.0, + "learning_rate": 1.6474862496820256e-05, + "loss": 1.3389, + "step": 10134 + }, + { + "epoch": 0.29758059780374657, + "grad_norm": 0.0, + "learning_rate": 1.6474137762397876e-05, + "loss": 1.3389, + "step": 10135 + }, + { + "epoch": 0.29760995948088553, + "grad_norm": 0.0, + "learning_rate": 1.6473412969428205e-05, + "loss": 1.4248, + "step": 10136 + }, + { + "epoch": 0.29763932115802455, + "grad_norm": 0.0, + "learning_rate": 1.647268811791781e-05, + "loss": 1.4355, + "step": 10137 + }, + { + "epoch": 0.29766868283516357, + "grad_norm": 0.0, + "learning_rate": 1.6471963207873235e-05, + "loss": 1.5098, + "step": 10138 + }, + { + "epoch": 0.29769804451230253, + "grad_norm": 0.0, + "learning_rate": 1.647123823930104e-05, + "loss": 1.4521, + "step": 10139 + }, + { + "epoch": 0.29772740618944155, + "grad_norm": 0.0, + "learning_rate": 1.647051321220778e-05, + "loss": 1.3379, + "step": 10140 + }, + { + "epoch": 0.29775676786658056, + "grad_norm": 0.0, + "learning_rate": 1.646978812660001e-05, + "loss": 1.333, + "step": 10141 + }, + { + "epoch": 0.2977861295437195, + "grad_norm": 0.0, + "learning_rate": 1.646906298248429e-05, + "loss": 1.3438, + "step": 10142 + }, + { + "epoch": 0.29781549122085854, + "grad_norm": 0.0, + "learning_rate": 1.6468337779867182e-05, + "loss": 1.3838, + "step": 10143 + }, + { + "epoch": 0.29784485289799756, + "grad_norm": 0.0, + "learning_rate": 1.6467612518755233e-05, + "loss": 1.3091, + "step": 10144 + }, + { + "epoch": 0.2978742145751365, + "grad_norm": 0.0, + "learning_rate": 1.6466887199155008e-05, + "loss": 1.3525, + "step": 10145 + }, + { + "epoch": 0.29790357625227554, + "grad_norm": 0.0, + "learning_rate": 1.6466161821073066e-05, + "loss": 1.5283, + "step": 10146 + }, + { + "epoch": 0.29793293792941455, + "grad_norm": 0.0, + "learning_rate": 1.6465436384515968e-05, + "loss": 1.3223, + "step": 10147 + }, + { + "epoch": 0.2979622996065535, + "grad_norm": 0.0, + "learning_rate": 1.6464710889490273e-05, + "loss": 1.3428, + "step": 10148 + }, + { + "epoch": 0.29799166128369253, + "grad_norm": 0.0, + "learning_rate": 1.6463985336002543e-05, + "loss": 1.2632, + "step": 10149 + }, + { + "epoch": 0.29802102296083155, + "grad_norm": 0.0, + "learning_rate": 1.646325972405934e-05, + "loss": 1.502, + "step": 10150 + }, + { + "epoch": 0.2980503846379705, + "grad_norm": 0.0, + "learning_rate": 1.6462534053667215e-05, + "loss": 1.4219, + "step": 10151 + }, + { + "epoch": 0.29807974631510953, + "grad_norm": 0.0, + "learning_rate": 1.6461808324832745e-05, + "loss": 1.5938, + "step": 10152 + }, + { + "epoch": 0.2981091079922485, + "grad_norm": 0.0, + "learning_rate": 1.646108253756249e-05, + "loss": 1.3066, + "step": 10153 + }, + { + "epoch": 0.2981384696693875, + "grad_norm": 0.0, + "learning_rate": 1.646035669186301e-05, + "loss": 1.3662, + "step": 10154 + }, + { + "epoch": 0.2981678313465265, + "grad_norm": 0.0, + "learning_rate": 1.6459630787740864e-05, + "loss": 1.3984, + "step": 10155 + }, + { + "epoch": 0.2981971930236655, + "grad_norm": 0.0, + "learning_rate": 1.645890482520263e-05, + "loss": 1.4521, + "step": 10156 + }, + { + "epoch": 0.2982265547008045, + "grad_norm": 0.0, + "learning_rate": 1.645817880425486e-05, + "loss": 1.3887, + "step": 10157 + }, + { + "epoch": 0.2982559163779435, + "grad_norm": 0.0, + "learning_rate": 1.6457452724904127e-05, + "loss": 1.4932, + "step": 10158 + }, + { + "epoch": 0.2982852780550825, + "grad_norm": 0.0, + "learning_rate": 1.6456726587156997e-05, + "loss": 1.4873, + "step": 10159 + }, + { + "epoch": 0.2983146397322215, + "grad_norm": 0.0, + "learning_rate": 1.6456000391020034e-05, + "loss": 1.4609, + "step": 10160 + }, + { + "epoch": 0.2983440014093605, + "grad_norm": 0.0, + "learning_rate": 1.6455274136499807e-05, + "loss": 1.3574, + "step": 10161 + }, + { + "epoch": 0.2983733630864995, + "grad_norm": 0.0, + "learning_rate": 1.645454782360288e-05, + "loss": 1.3545, + "step": 10162 + }, + { + "epoch": 0.2984027247636385, + "grad_norm": 0.0, + "learning_rate": 1.6453821452335827e-05, + "loss": 1.3213, + "step": 10163 + }, + { + "epoch": 0.2984320864407775, + "grad_norm": 0.0, + "learning_rate": 1.645309502270521e-05, + "loss": 1.3486, + "step": 10164 + }, + { + "epoch": 0.2984614481179165, + "grad_norm": 0.0, + "learning_rate": 1.6452368534717603e-05, + "loss": 1.4727, + "step": 10165 + }, + { + "epoch": 0.2984908097950555, + "grad_norm": 0.0, + "learning_rate": 1.6451641988379577e-05, + "loss": 1.3838, + "step": 10166 + }, + { + "epoch": 0.2985201714721945, + "grad_norm": 0.0, + "learning_rate": 1.6450915383697702e-05, + "loss": 1.4033, + "step": 10167 + }, + { + "epoch": 0.29854953314933347, + "grad_norm": 0.0, + "learning_rate": 1.6450188720678543e-05, + "loss": 1.417, + "step": 10168 + }, + { + "epoch": 0.2985788948264725, + "grad_norm": 0.0, + "learning_rate": 1.6449461999328674e-05, + "loss": 1.4199, + "step": 10169 + }, + { + "epoch": 0.2986082565036115, + "grad_norm": 0.0, + "learning_rate": 1.6448735219654674e-05, + "loss": 1.3613, + "step": 10170 + }, + { + "epoch": 0.29863761818075046, + "grad_norm": 0.0, + "learning_rate": 1.6448008381663106e-05, + "loss": 1.3691, + "step": 10171 + }, + { + "epoch": 0.2986669798578895, + "grad_norm": 0.0, + "learning_rate": 1.644728148536055e-05, + "loss": 1.3096, + "step": 10172 + }, + { + "epoch": 0.2986963415350285, + "grad_norm": 0.0, + "learning_rate": 1.6446554530753572e-05, + "loss": 1.4326, + "step": 10173 + }, + { + "epoch": 0.29872570321216746, + "grad_norm": 0.0, + "learning_rate": 1.6445827517848752e-05, + "loss": 1.5039, + "step": 10174 + }, + { + "epoch": 0.2987550648893065, + "grad_norm": 0.0, + "learning_rate": 1.6445100446652663e-05, + "loss": 1.3574, + "step": 10175 + }, + { + "epoch": 0.2987844265664455, + "grad_norm": 0.0, + "learning_rate": 1.644437331717188e-05, + "loss": 1.4883, + "step": 10176 + }, + { + "epoch": 0.29881378824358446, + "grad_norm": 0.0, + "learning_rate": 1.6443646129412982e-05, + "loss": 1.4053, + "step": 10177 + }, + { + "epoch": 0.29884314992072347, + "grad_norm": 0.0, + "learning_rate": 1.6442918883382537e-05, + "loss": 1.4121, + "step": 10178 + }, + { + "epoch": 0.2988725115978625, + "grad_norm": 0.0, + "learning_rate": 1.644219157908713e-05, + "loss": 1.3994, + "step": 10179 + }, + { + "epoch": 0.29890187327500145, + "grad_norm": 0.0, + "learning_rate": 1.6441464216533334e-05, + "loss": 1.418, + "step": 10180 + }, + { + "epoch": 0.29893123495214047, + "grad_norm": 0.0, + "learning_rate": 1.6440736795727726e-05, + "loss": 1.5166, + "step": 10181 + }, + { + "epoch": 0.2989605966292795, + "grad_norm": 0.0, + "learning_rate": 1.6440009316676887e-05, + "loss": 1.4443, + "step": 10182 + }, + { + "epoch": 0.29898995830641845, + "grad_norm": 0.0, + "learning_rate": 1.6439281779387393e-05, + "loss": 1.3926, + "step": 10183 + }, + { + "epoch": 0.29901931998355746, + "grad_norm": 0.0, + "learning_rate": 1.6438554183865828e-05, + "loss": 1.3457, + "step": 10184 + }, + { + "epoch": 0.2990486816606965, + "grad_norm": 0.0, + "learning_rate": 1.6437826530118763e-05, + "loss": 1.3916, + "step": 10185 + }, + { + "epoch": 0.29907804333783544, + "grad_norm": 0.0, + "learning_rate": 1.6437098818152786e-05, + "loss": 1.269, + "step": 10186 + }, + { + "epoch": 0.29910740501497446, + "grad_norm": 0.0, + "learning_rate": 1.6436371047974477e-05, + "loss": 1.2949, + "step": 10187 + }, + { + "epoch": 0.2991367666921135, + "grad_norm": 0.0, + "learning_rate": 1.643564321959042e-05, + "loss": 1.334, + "step": 10188 + }, + { + "epoch": 0.29916612836925244, + "grad_norm": 0.0, + "learning_rate": 1.6434915333007187e-05, + "loss": 1.3965, + "step": 10189 + }, + { + "epoch": 0.29919549004639145, + "grad_norm": 0.0, + "learning_rate": 1.643418738823137e-05, + "loss": 1.46, + "step": 10190 + }, + { + "epoch": 0.29922485172353047, + "grad_norm": 0.0, + "learning_rate": 1.6433459385269547e-05, + "loss": 1.4326, + "step": 10191 + }, + { + "epoch": 0.29925421340066943, + "grad_norm": 0.0, + "learning_rate": 1.6432731324128305e-05, + "loss": 1.4189, + "step": 10192 + }, + { + "epoch": 0.29928357507780845, + "grad_norm": 0.0, + "learning_rate": 1.6432003204814223e-05, + "loss": 1.3379, + "step": 10193 + }, + { + "epoch": 0.29931293675494747, + "grad_norm": 0.0, + "learning_rate": 1.643127502733389e-05, + "loss": 1.4473, + "step": 10194 + }, + { + "epoch": 0.29934229843208643, + "grad_norm": 0.0, + "learning_rate": 1.643054679169389e-05, + "loss": 1.4004, + "step": 10195 + }, + { + "epoch": 0.29937166010922545, + "grad_norm": 0.0, + "learning_rate": 1.642981849790081e-05, + "loss": 1.2754, + "step": 10196 + }, + { + "epoch": 0.29940102178636446, + "grad_norm": 0.0, + "learning_rate": 1.6429090145961233e-05, + "loss": 1.3564, + "step": 10197 + }, + { + "epoch": 0.2994303834635034, + "grad_norm": 0.0, + "learning_rate": 1.6428361735881747e-05, + "loss": 1.4062, + "step": 10198 + }, + { + "epoch": 0.29945974514064244, + "grad_norm": 0.0, + "learning_rate": 1.6427633267668942e-05, + "loss": 1.418, + "step": 10199 + }, + { + "epoch": 0.29948910681778146, + "grad_norm": 0.0, + "learning_rate": 1.6426904741329403e-05, + "loss": 1.3633, + "step": 10200 + }, + { + "epoch": 0.2995184684949204, + "grad_norm": 0.0, + "learning_rate": 1.6426176156869712e-05, + "loss": 1.459, + "step": 10201 + }, + { + "epoch": 0.29954783017205944, + "grad_norm": 0.0, + "learning_rate": 1.642544751429647e-05, + "loss": 1.3916, + "step": 10202 + }, + { + "epoch": 0.2995771918491984, + "grad_norm": 0.0, + "learning_rate": 1.6424718813616262e-05, + "loss": 1.5166, + "step": 10203 + }, + { + "epoch": 0.2996065535263374, + "grad_norm": 0.0, + "learning_rate": 1.6423990054835673e-05, + "loss": 1.3789, + "step": 10204 + }, + { + "epoch": 0.29963591520347643, + "grad_norm": 0.0, + "learning_rate": 1.64232612379613e-05, + "loss": 1.3926, + "step": 10205 + }, + { + "epoch": 0.2996652768806154, + "grad_norm": 0.0, + "learning_rate": 1.642253236299973e-05, + "loss": 1.4355, + "step": 10206 + }, + { + "epoch": 0.2996946385577544, + "grad_norm": 0.0, + "learning_rate": 1.6421803429957553e-05, + "loss": 1.4375, + "step": 10207 + }, + { + "epoch": 0.29972400023489343, + "grad_norm": 0.0, + "learning_rate": 1.6421074438841363e-05, + "loss": 1.334, + "step": 10208 + }, + { + "epoch": 0.2997533619120324, + "grad_norm": 0.0, + "learning_rate": 1.642034538965775e-05, + "loss": 1.3857, + "step": 10209 + }, + { + "epoch": 0.2997827235891714, + "grad_norm": 0.0, + "learning_rate": 1.6419616282413312e-05, + "loss": 1.4199, + "step": 10210 + }, + { + "epoch": 0.2998120852663104, + "grad_norm": 0.0, + "learning_rate": 1.6418887117114642e-05, + "loss": 1.3916, + "step": 10211 + }, + { + "epoch": 0.2998414469434494, + "grad_norm": 0.0, + "learning_rate": 1.6418157893768333e-05, + "loss": 1.3848, + "step": 10212 + }, + { + "epoch": 0.2998708086205884, + "grad_norm": 0.0, + "learning_rate": 1.641742861238097e-05, + "loss": 1.3525, + "step": 10213 + }, + { + "epoch": 0.2999001702977274, + "grad_norm": 0.0, + "learning_rate": 1.6416699272959165e-05, + "loss": 1.3652, + "step": 10214 + }, + { + "epoch": 0.2999295319748664, + "grad_norm": 0.0, + "learning_rate": 1.6415969875509502e-05, + "loss": 1.4443, + "step": 10215 + }, + { + "epoch": 0.2999588936520054, + "grad_norm": 0.0, + "learning_rate": 1.6415240420038578e-05, + "loss": 1.4336, + "step": 10216 + }, + { + "epoch": 0.2999882553291444, + "grad_norm": 0.0, + "learning_rate": 1.6414510906552998e-05, + "loss": 1.3936, + "step": 10217 + }, + { + "epoch": 0.3000176170062834, + "grad_norm": 0.0, + "learning_rate": 1.641378133505935e-05, + "loss": 1.3135, + "step": 10218 + }, + { + "epoch": 0.3000469786834224, + "grad_norm": 0.0, + "learning_rate": 1.641305170556423e-05, + "loss": 1.332, + "step": 10219 + }, + { + "epoch": 0.3000763403605614, + "grad_norm": 0.0, + "learning_rate": 1.6412322018074245e-05, + "loss": 1.4365, + "step": 10220 + }, + { + "epoch": 0.3001057020377004, + "grad_norm": 0.0, + "learning_rate": 1.641159227259599e-05, + "loss": 1.4531, + "step": 10221 + }, + { + "epoch": 0.3001350637148394, + "grad_norm": 0.0, + "learning_rate": 1.6410862469136067e-05, + "loss": 1.3887, + "step": 10222 + }, + { + "epoch": 0.3001644253919784, + "grad_norm": 0.0, + "learning_rate": 1.641013260770107e-05, + "loss": 1.334, + "step": 10223 + }, + { + "epoch": 0.30019378706911737, + "grad_norm": 0.0, + "learning_rate": 1.64094026882976e-05, + "loss": 1.3877, + "step": 10224 + }, + { + "epoch": 0.3002231487462564, + "grad_norm": 0.0, + "learning_rate": 1.6408672710932264e-05, + "loss": 1.3867, + "step": 10225 + }, + { + "epoch": 0.3002525104233954, + "grad_norm": 0.0, + "learning_rate": 1.6407942675611653e-05, + "loss": 1.5215, + "step": 10226 + }, + { + "epoch": 0.30028187210053436, + "grad_norm": 0.0, + "learning_rate": 1.6407212582342382e-05, + "loss": 1.3633, + "step": 10227 + }, + { + "epoch": 0.3003112337776734, + "grad_norm": 0.0, + "learning_rate": 1.6406482431131046e-05, + "loss": 1.1919, + "step": 10228 + }, + { + "epoch": 0.3003405954548124, + "grad_norm": 0.0, + "learning_rate": 1.6405752221984245e-05, + "loss": 1.3633, + "step": 10229 + }, + { + "epoch": 0.30036995713195136, + "grad_norm": 0.0, + "learning_rate": 1.6405021954908587e-05, + "loss": 1.4668, + "step": 10230 + }, + { + "epoch": 0.3003993188090904, + "grad_norm": 0.0, + "learning_rate": 1.640429162991068e-05, + "loss": 1.3613, + "step": 10231 + }, + { + "epoch": 0.3004286804862294, + "grad_norm": 0.0, + "learning_rate": 1.6403561246997118e-05, + "loss": 1.333, + "step": 10232 + }, + { + "epoch": 0.30045804216336836, + "grad_norm": 0.0, + "learning_rate": 1.6402830806174514e-05, + "loss": 1.458, + "step": 10233 + }, + { + "epoch": 0.30048740384050737, + "grad_norm": 0.0, + "learning_rate": 1.640210030744947e-05, + "loss": 1.4443, + "step": 10234 + }, + { + "epoch": 0.3005167655176464, + "grad_norm": 0.0, + "learning_rate": 1.6401369750828596e-05, + "loss": 1.3477, + "step": 10235 + }, + { + "epoch": 0.30054612719478535, + "grad_norm": 0.0, + "learning_rate": 1.6400639136318494e-05, + "loss": 1.3994, + "step": 10236 + }, + { + "epoch": 0.30057548887192437, + "grad_norm": 0.0, + "learning_rate": 1.639990846392577e-05, + "loss": 1.3271, + "step": 10237 + }, + { + "epoch": 0.3006048505490634, + "grad_norm": 0.0, + "learning_rate": 1.6399177733657043e-05, + "loss": 1.4014, + "step": 10238 + }, + { + "epoch": 0.30063421222620235, + "grad_norm": 0.0, + "learning_rate": 1.6398446945518906e-05, + "loss": 1.2373, + "step": 10239 + }, + { + "epoch": 0.30066357390334136, + "grad_norm": 0.0, + "learning_rate": 1.6397716099517973e-05, + "loss": 1.4219, + "step": 10240 + }, + { + "epoch": 0.3006929355804804, + "grad_norm": 0.0, + "learning_rate": 1.6396985195660858e-05, + "loss": 1.502, + "step": 10241 + }, + { + "epoch": 0.30072229725761934, + "grad_norm": 0.0, + "learning_rate": 1.639625423395417e-05, + "loss": 1.2988, + "step": 10242 + }, + { + "epoch": 0.30075165893475836, + "grad_norm": 0.0, + "learning_rate": 1.6395523214404516e-05, + "loss": 1.3916, + "step": 10243 + }, + { + "epoch": 0.3007810206118974, + "grad_norm": 0.0, + "learning_rate": 1.639479213701851e-05, + "loss": 1.4268, + "step": 10244 + }, + { + "epoch": 0.30081038228903634, + "grad_norm": 0.0, + "learning_rate": 1.6394061001802753e-05, + "loss": 1.4277, + "step": 10245 + }, + { + "epoch": 0.30083974396617535, + "grad_norm": 0.0, + "learning_rate": 1.639332980876387e-05, + "loss": 1.5469, + "step": 10246 + }, + { + "epoch": 0.30086910564331437, + "grad_norm": 0.0, + "learning_rate": 1.6392598557908467e-05, + "loss": 1.2896, + "step": 10247 + }, + { + "epoch": 0.30089846732045333, + "grad_norm": 0.0, + "learning_rate": 1.6391867249243156e-05, + "loss": 1.3047, + "step": 10248 + }, + { + "epoch": 0.30092782899759235, + "grad_norm": 0.0, + "learning_rate": 1.6391135882774555e-05, + "loss": 1.4131, + "step": 10249 + }, + { + "epoch": 0.30095719067473137, + "grad_norm": 0.0, + "learning_rate": 1.6390404458509274e-05, + "loss": 1.373, + "step": 10250 + }, + { + "epoch": 0.30098655235187033, + "grad_norm": 0.0, + "learning_rate": 1.6389672976453934e-05, + "loss": 1.4697, + "step": 10251 + }, + { + "epoch": 0.30101591402900935, + "grad_norm": 0.0, + "learning_rate": 1.638894143661514e-05, + "loss": 1.3848, + "step": 10252 + }, + { + "epoch": 0.3010452757061483, + "grad_norm": 0.0, + "learning_rate": 1.6388209838999515e-05, + "loss": 1.4238, + "step": 10253 + }, + { + "epoch": 0.3010746373832873, + "grad_norm": 0.0, + "learning_rate": 1.6387478183613668e-05, + "loss": 1.374, + "step": 10254 + }, + { + "epoch": 0.30110399906042634, + "grad_norm": 0.0, + "learning_rate": 1.6386746470464223e-05, + "loss": 1.373, + "step": 10255 + }, + { + "epoch": 0.3011333607375653, + "grad_norm": 0.0, + "learning_rate": 1.6386014699557793e-05, + "loss": 1.3232, + "step": 10256 + }, + { + "epoch": 0.3011627224147043, + "grad_norm": 0.0, + "learning_rate": 1.6385282870901e-05, + "loss": 1.3457, + "step": 10257 + }, + { + "epoch": 0.30119208409184334, + "grad_norm": 0.0, + "learning_rate": 1.6384550984500454e-05, + "loss": 1.4932, + "step": 10258 + }, + { + "epoch": 0.3012214457689823, + "grad_norm": 0.0, + "learning_rate": 1.638381904036278e-05, + "loss": 1.3955, + "step": 10259 + }, + { + "epoch": 0.3012508074461213, + "grad_norm": 0.0, + "learning_rate": 1.6383087038494595e-05, + "loss": 1.3623, + "step": 10260 + }, + { + "epoch": 0.30128016912326033, + "grad_norm": 0.0, + "learning_rate": 1.638235497890252e-05, + "loss": 1.335, + "step": 10261 + }, + { + "epoch": 0.3013095308003993, + "grad_norm": 0.0, + "learning_rate": 1.6381622861593175e-05, + "loss": 1.4443, + "step": 10262 + }, + { + "epoch": 0.3013388924775383, + "grad_norm": 0.0, + "learning_rate": 1.6380890686573177e-05, + "loss": 1.543, + "step": 10263 + }, + { + "epoch": 0.30136825415467733, + "grad_norm": 0.0, + "learning_rate": 1.6380158453849155e-05, + "loss": 1.3887, + "step": 10264 + }, + { + "epoch": 0.3013976158318163, + "grad_norm": 0.0, + "learning_rate": 1.637942616342772e-05, + "loss": 1.458, + "step": 10265 + }, + { + "epoch": 0.3014269775089553, + "grad_norm": 0.0, + "learning_rate": 1.6378693815315508e-05, + "loss": 1.3086, + "step": 10266 + }, + { + "epoch": 0.3014563391860943, + "grad_norm": 0.0, + "learning_rate": 1.637796140951913e-05, + "loss": 1.4912, + "step": 10267 + }, + { + "epoch": 0.3014857008632333, + "grad_norm": 0.0, + "learning_rate": 1.6377228946045213e-05, + "loss": 1.4961, + "step": 10268 + }, + { + "epoch": 0.3015150625403723, + "grad_norm": 0.0, + "learning_rate": 1.6376496424900384e-05, + "loss": 1.4961, + "step": 10269 + }, + { + "epoch": 0.3015444242175113, + "grad_norm": 0.0, + "learning_rate": 1.6375763846091262e-05, + "loss": 1.2871, + "step": 10270 + }, + { + "epoch": 0.3015737858946503, + "grad_norm": 0.0, + "learning_rate": 1.637503120962448e-05, + "loss": 1.5498, + "step": 10271 + }, + { + "epoch": 0.3016031475717893, + "grad_norm": 0.0, + "learning_rate": 1.6374298515506652e-05, + "loss": 1.3193, + "step": 10272 + }, + { + "epoch": 0.3016325092489283, + "grad_norm": 0.0, + "learning_rate": 1.6373565763744414e-05, + "loss": 1.4443, + "step": 10273 + }, + { + "epoch": 0.3016618709260673, + "grad_norm": 0.0, + "learning_rate": 1.6372832954344386e-05, + "loss": 1.3848, + "step": 10274 + }, + { + "epoch": 0.3016912326032063, + "grad_norm": 0.0, + "learning_rate": 1.6372100087313195e-05, + "loss": 1.3018, + "step": 10275 + }, + { + "epoch": 0.3017205942803453, + "grad_norm": 0.0, + "learning_rate": 1.6371367162657475e-05, + "loss": 1.2852, + "step": 10276 + }, + { + "epoch": 0.3017499559574843, + "grad_norm": 0.0, + "learning_rate": 1.637063418038385e-05, + "loss": 1.2666, + "step": 10277 + }, + { + "epoch": 0.3017793176346233, + "grad_norm": 0.0, + "learning_rate": 1.6369901140498944e-05, + "loss": 1.3018, + "step": 10278 + }, + { + "epoch": 0.3018086793117623, + "grad_norm": 0.0, + "learning_rate": 1.6369168043009397e-05, + "loss": 1.3613, + "step": 10279 + }, + { + "epoch": 0.30183804098890127, + "grad_norm": 0.0, + "learning_rate": 1.636843488792183e-05, + "loss": 1.5225, + "step": 10280 + }, + { + "epoch": 0.3018674026660403, + "grad_norm": 0.0, + "learning_rate": 1.6367701675242877e-05, + "loss": 1.3779, + "step": 10281 + }, + { + "epoch": 0.3018967643431793, + "grad_norm": 0.0, + "learning_rate": 1.6366968404979164e-05, + "loss": 1.3682, + "step": 10282 + }, + { + "epoch": 0.30192612602031826, + "grad_norm": 0.0, + "learning_rate": 1.6366235077137324e-05, + "loss": 1.5273, + "step": 10283 + }, + { + "epoch": 0.3019554876974573, + "grad_norm": 0.0, + "learning_rate": 1.6365501691723992e-05, + "loss": 1.3721, + "step": 10284 + }, + { + "epoch": 0.3019848493745963, + "grad_norm": 0.0, + "learning_rate": 1.6364768248745796e-05, + "loss": 1.4014, + "step": 10285 + }, + { + "epoch": 0.30201421105173526, + "grad_norm": 0.0, + "learning_rate": 1.6364034748209374e-05, + "loss": 1.3672, + "step": 10286 + }, + { + "epoch": 0.3020435727288743, + "grad_norm": 0.0, + "learning_rate": 1.6363301190121355e-05, + "loss": 1.3125, + "step": 10287 + }, + { + "epoch": 0.3020729344060133, + "grad_norm": 0.0, + "learning_rate": 1.636256757448837e-05, + "loss": 1.4609, + "step": 10288 + }, + { + "epoch": 0.30210229608315226, + "grad_norm": 0.0, + "learning_rate": 1.6361833901317063e-05, + "loss": 1.4082, + "step": 10289 + }, + { + "epoch": 0.3021316577602913, + "grad_norm": 0.0, + "learning_rate": 1.6361100170614057e-05, + "loss": 1.4375, + "step": 10290 + }, + { + "epoch": 0.3021610194374303, + "grad_norm": 0.0, + "learning_rate": 1.6360366382385997e-05, + "loss": 1.4941, + "step": 10291 + }, + { + "epoch": 0.30219038111456925, + "grad_norm": 0.0, + "learning_rate": 1.6359632536639515e-05, + "loss": 1.3838, + "step": 10292 + }, + { + "epoch": 0.30221974279170827, + "grad_norm": 0.0, + "learning_rate": 1.6358898633381244e-05, + "loss": 1.4561, + "step": 10293 + }, + { + "epoch": 0.3022491044688473, + "grad_norm": 0.0, + "learning_rate": 1.6358164672617825e-05, + "loss": 1.3716, + "step": 10294 + }, + { + "epoch": 0.30227846614598625, + "grad_norm": 0.0, + "learning_rate": 1.63574306543559e-05, + "loss": 1.3447, + "step": 10295 + }, + { + "epoch": 0.30230782782312526, + "grad_norm": 0.0, + "learning_rate": 1.6356696578602095e-05, + "loss": 1.375, + "step": 10296 + }, + { + "epoch": 0.3023371895002643, + "grad_norm": 0.0, + "learning_rate": 1.6355962445363056e-05, + "loss": 1.5, + "step": 10297 + }, + { + "epoch": 0.30236655117740324, + "grad_norm": 0.0, + "learning_rate": 1.635522825464542e-05, + "loss": 1.3232, + "step": 10298 + }, + { + "epoch": 0.30239591285454226, + "grad_norm": 0.0, + "learning_rate": 1.635449400645583e-05, + "loss": 1.3418, + "step": 10299 + }, + { + "epoch": 0.3024252745316813, + "grad_norm": 0.0, + "learning_rate": 1.6353759700800922e-05, + "loss": 1.4453, + "step": 10300 + }, + { + "epoch": 0.30245463620882024, + "grad_norm": 0.0, + "learning_rate": 1.635302533768734e-05, + "loss": 1.3296, + "step": 10301 + }, + { + "epoch": 0.30248399788595925, + "grad_norm": 0.0, + "learning_rate": 1.6352290917121717e-05, + "loss": 1.4912, + "step": 10302 + }, + { + "epoch": 0.30251335956309827, + "grad_norm": 0.0, + "learning_rate": 1.6351556439110707e-05, + "loss": 1.4346, + "step": 10303 + }, + { + "epoch": 0.30254272124023723, + "grad_norm": 0.0, + "learning_rate": 1.6350821903660942e-05, + "loss": 1.2891, + "step": 10304 + }, + { + "epoch": 0.30257208291737625, + "grad_norm": 0.0, + "learning_rate": 1.6350087310779067e-05, + "loss": 1.4414, + "step": 10305 + }, + { + "epoch": 0.3026014445945152, + "grad_norm": 0.0, + "learning_rate": 1.634935266047173e-05, + "loss": 1.5332, + "step": 10306 + }, + { + "epoch": 0.30263080627165423, + "grad_norm": 0.0, + "learning_rate": 1.6348617952745567e-05, + "loss": 1.2788, + "step": 10307 + }, + { + "epoch": 0.30266016794879325, + "grad_norm": 0.0, + "learning_rate": 1.6347883187607227e-05, + "loss": 1.3232, + "step": 10308 + }, + { + "epoch": 0.3026895296259322, + "grad_norm": 0.0, + "learning_rate": 1.6347148365063356e-05, + "loss": 1.4414, + "step": 10309 + }, + { + "epoch": 0.3027188913030712, + "grad_norm": 0.0, + "learning_rate": 1.6346413485120596e-05, + "loss": 1.3052, + "step": 10310 + }, + { + "epoch": 0.30274825298021024, + "grad_norm": 0.0, + "learning_rate": 1.634567854778559e-05, + "loss": 1.3516, + "step": 10311 + }, + { + "epoch": 0.3027776146573492, + "grad_norm": 0.0, + "learning_rate": 1.634494355306499e-05, + "loss": 1.3936, + "step": 10312 + }, + { + "epoch": 0.3028069763344882, + "grad_norm": 0.0, + "learning_rate": 1.6344208500965443e-05, + "loss": 1.3057, + "step": 10313 + }, + { + "epoch": 0.30283633801162724, + "grad_norm": 0.0, + "learning_rate": 1.634347339149359e-05, + "loss": 1.375, + "step": 10314 + }, + { + "epoch": 0.3028656996887662, + "grad_norm": 0.0, + "learning_rate": 1.6342738224656085e-05, + "loss": 1.4131, + "step": 10315 + }, + { + "epoch": 0.3028950613659052, + "grad_norm": 0.0, + "learning_rate": 1.6342003000459568e-05, + "loss": 1.2129, + "step": 10316 + }, + { + "epoch": 0.30292442304304423, + "grad_norm": 0.0, + "learning_rate": 1.63412677189107e-05, + "loss": 1.3203, + "step": 10317 + }, + { + "epoch": 0.3029537847201832, + "grad_norm": 0.0, + "learning_rate": 1.6340532380016126e-05, + "loss": 1.4131, + "step": 10318 + }, + { + "epoch": 0.3029831463973222, + "grad_norm": 0.0, + "learning_rate": 1.6339796983782488e-05, + "loss": 1.3691, + "step": 10319 + }, + { + "epoch": 0.30301250807446123, + "grad_norm": 0.0, + "learning_rate": 1.6339061530216446e-05, + "loss": 1.3125, + "step": 10320 + }, + { + "epoch": 0.3030418697516002, + "grad_norm": 0.0, + "learning_rate": 1.6338326019324647e-05, + "loss": 1.4023, + "step": 10321 + }, + { + "epoch": 0.3030712314287392, + "grad_norm": 0.0, + "learning_rate": 1.633759045111374e-05, + "loss": 1.417, + "step": 10322 + }, + { + "epoch": 0.3031005931058782, + "grad_norm": 0.0, + "learning_rate": 1.633685482559038e-05, + "loss": 1.4336, + "step": 10323 + }, + { + "epoch": 0.3031299547830172, + "grad_norm": 0.0, + "learning_rate": 1.633611914276122e-05, + "loss": 1.3896, + "step": 10324 + }, + { + "epoch": 0.3031593164601562, + "grad_norm": 0.0, + "learning_rate": 1.6335383402632913e-05, + "loss": 1.4277, + "step": 10325 + }, + { + "epoch": 0.3031886781372952, + "grad_norm": 0.0, + "learning_rate": 1.6334647605212108e-05, + "loss": 1.4121, + "step": 10326 + }, + { + "epoch": 0.3032180398144342, + "grad_norm": 0.0, + "learning_rate": 1.6333911750505465e-05, + "loss": 1.2959, + "step": 10327 + }, + { + "epoch": 0.3032474014915732, + "grad_norm": 0.0, + "learning_rate": 1.6333175838519632e-05, + "loss": 1.3975, + "step": 10328 + }, + { + "epoch": 0.3032767631687122, + "grad_norm": 0.0, + "learning_rate": 1.6332439869261273e-05, + "loss": 1.3438, + "step": 10329 + }, + { + "epoch": 0.3033061248458512, + "grad_norm": 0.0, + "learning_rate": 1.633170384273704e-05, + "loss": 1.4287, + "step": 10330 + }, + { + "epoch": 0.3033354865229902, + "grad_norm": 0.0, + "learning_rate": 1.6330967758953582e-05, + "loss": 1.459, + "step": 10331 + }, + { + "epoch": 0.3033648482001292, + "grad_norm": 0.0, + "learning_rate": 1.6330231617917562e-05, + "loss": 1.5117, + "step": 10332 + }, + { + "epoch": 0.3033942098772682, + "grad_norm": 0.0, + "learning_rate": 1.632949541963564e-05, + "loss": 1.3975, + "step": 10333 + }, + { + "epoch": 0.3034235715544072, + "grad_norm": 0.0, + "learning_rate": 1.6328759164114466e-05, + "loss": 1.3525, + "step": 10334 + }, + { + "epoch": 0.3034529332315462, + "grad_norm": 0.0, + "learning_rate": 1.6328022851360703e-05, + "loss": 1.3613, + "step": 10335 + }, + { + "epoch": 0.30348229490868517, + "grad_norm": 0.0, + "learning_rate": 1.6327286481381008e-05, + "loss": 1.4512, + "step": 10336 + }, + { + "epoch": 0.3035116565858242, + "grad_norm": 0.0, + "learning_rate": 1.6326550054182045e-05, + "loss": 1.4609, + "step": 10337 + }, + { + "epoch": 0.3035410182629632, + "grad_norm": 0.0, + "learning_rate": 1.6325813569770465e-05, + "loss": 1.373, + "step": 10338 + }, + { + "epoch": 0.30357037994010216, + "grad_norm": 0.0, + "learning_rate": 1.6325077028152937e-05, + "loss": 1.4248, + "step": 10339 + }, + { + "epoch": 0.3035997416172412, + "grad_norm": 0.0, + "learning_rate": 1.6324340429336113e-05, + "loss": 1.3398, + "step": 10340 + }, + { + "epoch": 0.3036291032943802, + "grad_norm": 0.0, + "learning_rate": 1.6323603773326663e-05, + "loss": 1.4492, + "step": 10341 + }, + { + "epoch": 0.30365846497151916, + "grad_norm": 0.0, + "learning_rate": 1.6322867060131244e-05, + "loss": 1.3525, + "step": 10342 + }, + { + "epoch": 0.3036878266486582, + "grad_norm": 0.0, + "learning_rate": 1.6322130289756513e-05, + "loss": 1.4668, + "step": 10343 + }, + { + "epoch": 0.3037171883257972, + "grad_norm": 0.0, + "learning_rate": 1.6321393462209145e-05, + "loss": 1.417, + "step": 10344 + }, + { + "epoch": 0.30374655000293616, + "grad_norm": 0.0, + "learning_rate": 1.6320656577495795e-05, + "loss": 1.3174, + "step": 10345 + }, + { + "epoch": 0.3037759116800752, + "grad_norm": 0.0, + "learning_rate": 1.631991963562313e-05, + "loss": 1.3721, + "step": 10346 + }, + { + "epoch": 0.3038052733572142, + "grad_norm": 0.0, + "learning_rate": 1.6319182636597812e-05, + "loss": 1.376, + "step": 10347 + }, + { + "epoch": 0.30383463503435315, + "grad_norm": 0.0, + "learning_rate": 1.6318445580426504e-05, + "loss": 1.2812, + "step": 10348 + }, + { + "epoch": 0.30386399671149217, + "grad_norm": 0.0, + "learning_rate": 1.6317708467115882e-05, + "loss": 1.3433, + "step": 10349 + }, + { + "epoch": 0.3038933583886312, + "grad_norm": 0.0, + "learning_rate": 1.63169712966726e-05, + "loss": 1.4111, + "step": 10350 + }, + { + "epoch": 0.30392272006577015, + "grad_norm": 0.0, + "learning_rate": 1.6316234069103327e-05, + "loss": 1.2021, + "step": 10351 + }, + { + "epoch": 0.30395208174290916, + "grad_norm": 0.0, + "learning_rate": 1.6315496784414735e-05, + "loss": 1.3955, + "step": 10352 + }, + { + "epoch": 0.3039814434200482, + "grad_norm": 0.0, + "learning_rate": 1.6314759442613485e-05, + "loss": 1.3452, + "step": 10353 + }, + { + "epoch": 0.30401080509718714, + "grad_norm": 0.0, + "learning_rate": 1.631402204370625e-05, + "loss": 1.3242, + "step": 10354 + }, + { + "epoch": 0.30404016677432616, + "grad_norm": 0.0, + "learning_rate": 1.6313284587699697e-05, + "loss": 1.3369, + "step": 10355 + }, + { + "epoch": 0.3040695284514651, + "grad_norm": 0.0, + "learning_rate": 1.6312547074600494e-05, + "loss": 1.3945, + "step": 10356 + }, + { + "epoch": 0.30409889012860414, + "grad_norm": 0.0, + "learning_rate": 1.631180950441531e-05, + "loss": 1.3027, + "step": 10357 + }, + { + "epoch": 0.30412825180574315, + "grad_norm": 0.0, + "learning_rate": 1.6311071877150818e-05, + "loss": 1.4521, + "step": 10358 + }, + { + "epoch": 0.3041576134828821, + "grad_norm": 0.0, + "learning_rate": 1.6310334192813686e-05, + "loss": 1.4131, + "step": 10359 + }, + { + "epoch": 0.30418697516002113, + "grad_norm": 0.0, + "learning_rate": 1.6309596451410586e-05, + "loss": 1.4961, + "step": 10360 + }, + { + "epoch": 0.30421633683716015, + "grad_norm": 0.0, + "learning_rate": 1.630885865294819e-05, + "loss": 1.3076, + "step": 10361 + }, + { + "epoch": 0.3042456985142991, + "grad_norm": 0.0, + "learning_rate": 1.6308120797433166e-05, + "loss": 1.416, + "step": 10362 + }, + { + "epoch": 0.30427506019143813, + "grad_norm": 0.0, + "learning_rate": 1.6307382884872192e-05, + "loss": 1.5068, + "step": 10363 + }, + { + "epoch": 0.30430442186857715, + "grad_norm": 0.0, + "learning_rate": 1.6306644915271942e-05, + "loss": 1.4766, + "step": 10364 + }, + { + "epoch": 0.3043337835457161, + "grad_norm": 0.0, + "learning_rate": 1.6305906888639086e-05, + "loss": 1.3428, + "step": 10365 + }, + { + "epoch": 0.3043631452228551, + "grad_norm": 0.0, + "learning_rate": 1.63051688049803e-05, + "loss": 1.333, + "step": 10366 + }, + { + "epoch": 0.30439250689999414, + "grad_norm": 0.0, + "learning_rate": 1.630443066430225e-05, + "loss": 1.4297, + "step": 10367 + }, + { + "epoch": 0.3044218685771331, + "grad_norm": 0.0, + "learning_rate": 1.6303692466611625e-05, + "loss": 1.3535, + "step": 10368 + }, + { + "epoch": 0.3044512302542721, + "grad_norm": 0.0, + "learning_rate": 1.6302954211915098e-05, + "loss": 1.5332, + "step": 10369 + }, + { + "epoch": 0.30448059193141114, + "grad_norm": 0.0, + "learning_rate": 1.6302215900219338e-05, + "loss": 1.3711, + "step": 10370 + }, + { + "epoch": 0.3045099536085501, + "grad_norm": 0.0, + "learning_rate": 1.6301477531531023e-05, + "loss": 1.334, + "step": 10371 + }, + { + "epoch": 0.3045393152856891, + "grad_norm": 0.0, + "learning_rate": 1.6300739105856836e-05, + "loss": 1.4551, + "step": 10372 + }, + { + "epoch": 0.30456867696282813, + "grad_norm": 0.0, + "learning_rate": 1.630000062320345e-05, + "loss": 1.3359, + "step": 10373 + }, + { + "epoch": 0.3045980386399671, + "grad_norm": 0.0, + "learning_rate": 1.6299262083577548e-05, + "loss": 1.5146, + "step": 10374 + }, + { + "epoch": 0.3046274003171061, + "grad_norm": 0.0, + "learning_rate": 1.6298523486985804e-05, + "loss": 1.4229, + "step": 10375 + }, + { + "epoch": 0.30465676199424513, + "grad_norm": 0.0, + "learning_rate": 1.62977848334349e-05, + "loss": 1.334, + "step": 10376 + }, + { + "epoch": 0.3046861236713841, + "grad_norm": 0.0, + "learning_rate": 1.629704612293151e-05, + "loss": 1.3633, + "step": 10377 + }, + { + "epoch": 0.3047154853485231, + "grad_norm": 0.0, + "learning_rate": 1.6296307355482324e-05, + "loss": 1.5586, + "step": 10378 + }, + { + "epoch": 0.3047448470256621, + "grad_norm": 0.0, + "learning_rate": 1.6295568531094018e-05, + "loss": 1.292, + "step": 10379 + }, + { + "epoch": 0.3047742087028011, + "grad_norm": 0.0, + "learning_rate": 1.6294829649773273e-05, + "loss": 1.334, + "step": 10380 + }, + { + "epoch": 0.3048035703799401, + "grad_norm": 0.0, + "learning_rate": 1.6294090711526768e-05, + "loss": 1.3604, + "step": 10381 + }, + { + "epoch": 0.3048329320570791, + "grad_norm": 0.0, + "learning_rate": 1.6293351716361193e-05, + "loss": 1.3682, + "step": 10382 + }, + { + "epoch": 0.3048622937342181, + "grad_norm": 0.0, + "learning_rate": 1.6292612664283224e-05, + "loss": 1.4775, + "step": 10383 + }, + { + "epoch": 0.3048916554113571, + "grad_norm": 0.0, + "learning_rate": 1.629187355529955e-05, + "loss": 1.4062, + "step": 10384 + }, + { + "epoch": 0.3049210170884961, + "grad_norm": 0.0, + "learning_rate": 1.629113438941685e-05, + "loss": 1.4463, + "step": 10385 + }, + { + "epoch": 0.3049503787656351, + "grad_norm": 0.0, + "learning_rate": 1.629039516664181e-05, + "loss": 1.3804, + "step": 10386 + }, + { + "epoch": 0.3049797404427741, + "grad_norm": 0.0, + "learning_rate": 1.6289655886981116e-05, + "loss": 1.5205, + "step": 10387 + }, + { + "epoch": 0.3050091021199131, + "grad_norm": 0.0, + "learning_rate": 1.628891655044145e-05, + "loss": 1.4219, + "step": 10388 + }, + { + "epoch": 0.3050384637970521, + "grad_norm": 0.0, + "learning_rate": 1.6288177157029508e-05, + "loss": 1.4355, + "step": 10389 + }, + { + "epoch": 0.3050678254741911, + "grad_norm": 0.0, + "learning_rate": 1.6287437706751966e-05, + "loss": 1.3223, + "step": 10390 + }, + { + "epoch": 0.3050971871513301, + "grad_norm": 0.0, + "learning_rate": 1.6286698199615517e-05, + "loss": 1.3682, + "step": 10391 + }, + { + "epoch": 0.30512654882846907, + "grad_norm": 0.0, + "learning_rate": 1.628595863562684e-05, + "loss": 1.3906, + "step": 10392 + }, + { + "epoch": 0.3051559105056081, + "grad_norm": 0.0, + "learning_rate": 1.6285219014792634e-05, + "loss": 1.4307, + "step": 10393 + }, + { + "epoch": 0.3051852721827471, + "grad_norm": 0.0, + "learning_rate": 1.628447933711958e-05, + "loss": 1.5342, + "step": 10394 + }, + { + "epoch": 0.30521463385988606, + "grad_norm": 0.0, + "learning_rate": 1.628373960261437e-05, + "loss": 1.377, + "step": 10395 + }, + { + "epoch": 0.3052439955370251, + "grad_norm": 0.0, + "learning_rate": 1.6282999811283695e-05, + "loss": 1.3438, + "step": 10396 + }, + { + "epoch": 0.3052733572141641, + "grad_norm": 0.0, + "learning_rate": 1.6282259963134245e-05, + "loss": 1.3945, + "step": 10397 + }, + { + "epoch": 0.30530271889130306, + "grad_norm": 0.0, + "learning_rate": 1.628152005817271e-05, + "loss": 1.2812, + "step": 10398 + }, + { + "epoch": 0.3053320805684421, + "grad_norm": 0.0, + "learning_rate": 1.6280780096405777e-05, + "loss": 1.2979, + "step": 10399 + }, + { + "epoch": 0.3053614422455811, + "grad_norm": 0.0, + "learning_rate": 1.6280040077840142e-05, + "loss": 1.4805, + "step": 10400 + }, + { + "epoch": 0.30539080392272006, + "grad_norm": 0.0, + "learning_rate": 1.6279300002482496e-05, + "loss": 1.4824, + "step": 10401 + }, + { + "epoch": 0.3054201655998591, + "grad_norm": 0.0, + "learning_rate": 1.627855987033953e-05, + "loss": 1.4082, + "step": 10402 + }, + { + "epoch": 0.3054495272769981, + "grad_norm": 0.0, + "learning_rate": 1.6277819681417945e-05, + "loss": 1.4668, + "step": 10403 + }, + { + "epoch": 0.30547888895413705, + "grad_norm": 0.0, + "learning_rate": 1.6277079435724427e-05, + "loss": 1.2744, + "step": 10404 + }, + { + "epoch": 0.30550825063127607, + "grad_norm": 0.0, + "learning_rate": 1.6276339133265668e-05, + "loss": 1.3574, + "step": 10405 + }, + { + "epoch": 0.30553761230841503, + "grad_norm": 0.0, + "learning_rate": 1.627559877404837e-05, + "loss": 1.4062, + "step": 10406 + }, + { + "epoch": 0.30556697398555405, + "grad_norm": 0.0, + "learning_rate": 1.6274858358079225e-05, + "loss": 1.4307, + "step": 10407 + }, + { + "epoch": 0.30559633566269306, + "grad_norm": 0.0, + "learning_rate": 1.6274117885364926e-05, + "loss": 1.3096, + "step": 10408 + }, + { + "epoch": 0.305625697339832, + "grad_norm": 0.0, + "learning_rate": 1.6273377355912178e-05, + "loss": 1.3701, + "step": 10409 + }, + { + "epoch": 0.30565505901697104, + "grad_norm": 0.0, + "learning_rate": 1.6272636769727665e-05, + "loss": 1.3496, + "step": 10410 + }, + { + "epoch": 0.30568442069411006, + "grad_norm": 0.0, + "learning_rate": 1.6271896126818096e-05, + "loss": 1.4131, + "step": 10411 + }, + { + "epoch": 0.305713782371249, + "grad_norm": 0.0, + "learning_rate": 1.6271155427190162e-05, + "loss": 1.4307, + "step": 10412 + }, + { + "epoch": 0.30574314404838804, + "grad_norm": 0.0, + "learning_rate": 1.6270414670850563e-05, + "loss": 1.3047, + "step": 10413 + }, + { + "epoch": 0.30577250572552706, + "grad_norm": 0.0, + "learning_rate": 1.6269673857806e-05, + "loss": 1.4551, + "step": 10414 + }, + { + "epoch": 0.305801867402666, + "grad_norm": 0.0, + "learning_rate": 1.626893298806317e-05, + "loss": 1.3057, + "step": 10415 + }, + { + "epoch": 0.30583122907980503, + "grad_norm": 0.0, + "learning_rate": 1.6268192061628775e-05, + "loss": 1.2202, + "step": 10416 + }, + { + "epoch": 0.30586059075694405, + "grad_norm": 0.0, + "learning_rate": 1.626745107850951e-05, + "loss": 1.3691, + "step": 10417 + }, + { + "epoch": 0.305889952434083, + "grad_norm": 0.0, + "learning_rate": 1.6266710038712084e-05, + "loss": 1.4619, + "step": 10418 + }, + { + "epoch": 0.30591931411122203, + "grad_norm": 0.0, + "learning_rate": 1.626596894224319e-05, + "loss": 1.3975, + "step": 10419 + }, + { + "epoch": 0.30594867578836105, + "grad_norm": 0.0, + "learning_rate": 1.6265227789109536e-05, + "loss": 1.5039, + "step": 10420 + }, + { + "epoch": 0.3059780374655, + "grad_norm": 0.0, + "learning_rate": 1.6264486579317825e-05, + "loss": 1.1924, + "step": 10421 + }, + { + "epoch": 0.306007399142639, + "grad_norm": 0.0, + "learning_rate": 1.6263745312874755e-05, + "loss": 1.4336, + "step": 10422 + }, + { + "epoch": 0.30603676081977804, + "grad_norm": 0.0, + "learning_rate": 1.6263003989787033e-05, + "loss": 1.374, + "step": 10423 + }, + { + "epoch": 0.306066122496917, + "grad_norm": 0.0, + "learning_rate": 1.626226261006136e-05, + "loss": 1.4248, + "step": 10424 + }, + { + "epoch": 0.306095484174056, + "grad_norm": 0.0, + "learning_rate": 1.6261521173704446e-05, + "loss": 1.3535, + "step": 10425 + }, + { + "epoch": 0.30612484585119504, + "grad_norm": 0.0, + "learning_rate": 1.626077968072299e-05, + "loss": 1.4668, + "step": 10426 + }, + { + "epoch": 0.306154207528334, + "grad_norm": 0.0, + "learning_rate": 1.62600381311237e-05, + "loss": 1.3647, + "step": 10427 + }, + { + "epoch": 0.306183569205473, + "grad_norm": 0.0, + "learning_rate": 1.625929652491328e-05, + "loss": 1.4893, + "step": 10428 + }, + { + "epoch": 0.30621293088261203, + "grad_norm": 0.0, + "learning_rate": 1.6258554862098443e-05, + "loss": 1.4092, + "step": 10429 + }, + { + "epoch": 0.306242292559751, + "grad_norm": 0.0, + "learning_rate": 1.625781314268589e-05, + "loss": 1.4795, + "step": 10430 + }, + { + "epoch": 0.30627165423689, + "grad_norm": 0.0, + "learning_rate": 1.6257071366682326e-05, + "loss": 1.4092, + "step": 10431 + }, + { + "epoch": 0.30630101591402903, + "grad_norm": 0.0, + "learning_rate": 1.6256329534094465e-05, + "loss": 1.4541, + "step": 10432 + }, + { + "epoch": 0.306330377591168, + "grad_norm": 0.0, + "learning_rate": 1.6255587644929018e-05, + "loss": 1.2402, + "step": 10433 + }, + { + "epoch": 0.306359739268307, + "grad_norm": 0.0, + "learning_rate": 1.6254845699192686e-05, + "loss": 1.3643, + "step": 10434 + }, + { + "epoch": 0.306389100945446, + "grad_norm": 0.0, + "learning_rate": 1.6254103696892182e-05, + "loss": 1.2705, + "step": 10435 + }, + { + "epoch": 0.306418462622585, + "grad_norm": 0.0, + "learning_rate": 1.625336163803422e-05, + "loss": 1.3481, + "step": 10436 + }, + { + "epoch": 0.306447824299724, + "grad_norm": 0.0, + "learning_rate": 1.6252619522625505e-05, + "loss": 1.4326, + "step": 10437 + }, + { + "epoch": 0.306477185976863, + "grad_norm": 0.0, + "learning_rate": 1.625187735067275e-05, + "loss": 1.4375, + "step": 10438 + }, + { + "epoch": 0.306506547654002, + "grad_norm": 0.0, + "learning_rate": 1.625113512218267e-05, + "loss": 1.3936, + "step": 10439 + }, + { + "epoch": 0.306535909331141, + "grad_norm": 0.0, + "learning_rate": 1.6250392837161972e-05, + "loss": 1.459, + "step": 10440 + }, + { + "epoch": 0.30656527100828, + "grad_norm": 0.0, + "learning_rate": 1.6249650495617366e-05, + "loss": 1.3906, + "step": 10441 + }, + { + "epoch": 0.306594632685419, + "grad_norm": 0.0, + "learning_rate": 1.6248908097555574e-05, + "loss": 1.248, + "step": 10442 + }, + { + "epoch": 0.306623994362558, + "grad_norm": 0.0, + "learning_rate": 1.624816564298331e-05, + "loss": 1.3291, + "step": 10443 + }, + { + "epoch": 0.306653356039697, + "grad_norm": 0.0, + "learning_rate": 1.6247423131907278e-05, + "loss": 1.3262, + "step": 10444 + }, + { + "epoch": 0.306682717716836, + "grad_norm": 0.0, + "learning_rate": 1.62466805643342e-05, + "loss": 1.4355, + "step": 10445 + }, + { + "epoch": 0.306712079393975, + "grad_norm": 0.0, + "learning_rate": 1.624593794027079e-05, + "loss": 1.4658, + "step": 10446 + }, + { + "epoch": 0.306741441071114, + "grad_norm": 0.0, + "learning_rate": 1.6245195259723766e-05, + "loss": 1.4385, + "step": 10447 + }, + { + "epoch": 0.30677080274825297, + "grad_norm": 0.0, + "learning_rate": 1.624445252269984e-05, + "loss": 1.5156, + "step": 10448 + }, + { + "epoch": 0.306800164425392, + "grad_norm": 0.0, + "learning_rate": 1.624370972920573e-05, + "loss": 1.5078, + "step": 10449 + }, + { + "epoch": 0.306829526102531, + "grad_norm": 0.0, + "learning_rate": 1.6242966879248155e-05, + "loss": 1.4238, + "step": 10450 + }, + { + "epoch": 0.30685888777966996, + "grad_norm": 0.0, + "learning_rate": 1.624222397283383e-05, + "loss": 1.4336, + "step": 10451 + }, + { + "epoch": 0.306888249456809, + "grad_norm": 0.0, + "learning_rate": 1.6241481009969478e-05, + "loss": 1.4795, + "step": 10452 + }, + { + "epoch": 0.306917611133948, + "grad_norm": 0.0, + "learning_rate": 1.624073799066181e-05, + "loss": 1.3506, + "step": 10453 + }, + { + "epoch": 0.30694697281108696, + "grad_norm": 0.0, + "learning_rate": 1.6239994914917553e-05, + "loss": 1.4473, + "step": 10454 + }, + { + "epoch": 0.306976334488226, + "grad_norm": 0.0, + "learning_rate": 1.6239251782743422e-05, + "loss": 1.4463, + "step": 10455 + }, + { + "epoch": 0.30700569616536494, + "grad_norm": 0.0, + "learning_rate": 1.6238508594146144e-05, + "loss": 1.3447, + "step": 10456 + }, + { + "epoch": 0.30703505784250396, + "grad_norm": 0.0, + "learning_rate": 1.6237765349132432e-05, + "loss": 1.4746, + "step": 10457 + }, + { + "epoch": 0.307064419519643, + "grad_norm": 0.0, + "learning_rate": 1.6237022047709013e-05, + "loss": 1.5059, + "step": 10458 + }, + { + "epoch": 0.30709378119678193, + "grad_norm": 0.0, + "learning_rate": 1.62362786898826e-05, + "loss": 1.3242, + "step": 10459 + }, + { + "epoch": 0.30712314287392095, + "grad_norm": 0.0, + "learning_rate": 1.6235535275659927e-05, + "loss": 1.3467, + "step": 10460 + }, + { + "epoch": 0.30715250455105997, + "grad_norm": 0.0, + "learning_rate": 1.623479180504771e-05, + "loss": 1.4209, + "step": 10461 + }, + { + "epoch": 0.30718186622819893, + "grad_norm": 0.0, + "learning_rate": 1.6234048278052675e-05, + "loss": 1.3408, + "step": 10462 + }, + { + "epoch": 0.30721122790533795, + "grad_norm": 0.0, + "learning_rate": 1.6233304694681543e-05, + "loss": 1.2773, + "step": 10463 + }, + { + "epoch": 0.30724058958247696, + "grad_norm": 0.0, + "learning_rate": 1.623256105494104e-05, + "loss": 1.3887, + "step": 10464 + }, + { + "epoch": 0.3072699512596159, + "grad_norm": 0.0, + "learning_rate": 1.6231817358837894e-05, + "loss": 1.4297, + "step": 10465 + }, + { + "epoch": 0.30729931293675494, + "grad_norm": 0.0, + "learning_rate": 1.6231073606378826e-05, + "loss": 1.4023, + "step": 10466 + }, + { + "epoch": 0.30732867461389396, + "grad_norm": 0.0, + "learning_rate": 1.6230329797570562e-05, + "loss": 1.4004, + "step": 10467 + }, + { + "epoch": 0.3073580362910329, + "grad_norm": 0.0, + "learning_rate": 1.6229585932419835e-05, + "loss": 1.4658, + "step": 10468 + }, + { + "epoch": 0.30738739796817194, + "grad_norm": 0.0, + "learning_rate": 1.6228842010933363e-05, + "loss": 1.3535, + "step": 10469 + }, + { + "epoch": 0.30741675964531096, + "grad_norm": 0.0, + "learning_rate": 1.622809803311788e-05, + "loss": 1.415, + "step": 10470 + }, + { + "epoch": 0.3074461213224499, + "grad_norm": 0.0, + "learning_rate": 1.622735399898011e-05, + "loss": 1.3291, + "step": 10471 + }, + { + "epoch": 0.30747548299958893, + "grad_norm": 0.0, + "learning_rate": 1.622660990852678e-05, + "loss": 1.4326, + "step": 10472 + }, + { + "epoch": 0.30750484467672795, + "grad_norm": 0.0, + "learning_rate": 1.622586576176462e-05, + "loss": 1.3516, + "step": 10473 + }, + { + "epoch": 0.3075342063538669, + "grad_norm": 0.0, + "learning_rate": 1.622512155870037e-05, + "loss": 1.3486, + "step": 10474 + }, + { + "epoch": 0.30756356803100593, + "grad_norm": 0.0, + "learning_rate": 1.622437729934075e-05, + "loss": 1.4209, + "step": 10475 + }, + { + "epoch": 0.30759292970814495, + "grad_norm": 0.0, + "learning_rate": 1.6223632983692487e-05, + "loss": 1.4609, + "step": 10476 + }, + { + "epoch": 0.3076222913852839, + "grad_norm": 0.0, + "learning_rate": 1.622288861176232e-05, + "loss": 1.2563, + "step": 10477 + }, + { + "epoch": 0.3076516530624229, + "grad_norm": 0.0, + "learning_rate": 1.622214418355698e-05, + "loss": 1.3389, + "step": 10478 + }, + { + "epoch": 0.30768101473956194, + "grad_norm": 0.0, + "learning_rate": 1.622139969908319e-05, + "loss": 1.4365, + "step": 10479 + }, + { + "epoch": 0.3077103764167009, + "grad_norm": 0.0, + "learning_rate": 1.6220655158347692e-05, + "loss": 1.459, + "step": 10480 + }, + { + "epoch": 0.3077397380938399, + "grad_norm": 0.0, + "learning_rate": 1.621991056135722e-05, + "loss": 1.3965, + "step": 10481 + }, + { + "epoch": 0.30776909977097894, + "grad_norm": 0.0, + "learning_rate": 1.62191659081185e-05, + "loss": 1.4678, + "step": 10482 + }, + { + "epoch": 0.3077984614481179, + "grad_norm": 0.0, + "learning_rate": 1.621842119863827e-05, + "loss": 1.4922, + "step": 10483 + }, + { + "epoch": 0.3078278231252569, + "grad_norm": 0.0, + "learning_rate": 1.6217676432923262e-05, + "loss": 1.3672, + "step": 10484 + }, + { + "epoch": 0.30785718480239593, + "grad_norm": 0.0, + "learning_rate": 1.621693161098022e-05, + "loss": 1.3906, + "step": 10485 + }, + { + "epoch": 0.3078865464795349, + "grad_norm": 0.0, + "learning_rate": 1.6216186732815868e-05, + "loss": 1.5693, + "step": 10486 + }, + { + "epoch": 0.3079159081566739, + "grad_norm": 0.0, + "learning_rate": 1.6215441798436952e-05, + "loss": 1.4639, + "step": 10487 + }, + { + "epoch": 0.30794526983381293, + "grad_norm": 0.0, + "learning_rate": 1.6214696807850204e-05, + "loss": 1.4297, + "step": 10488 + }, + { + "epoch": 0.3079746315109519, + "grad_norm": 0.0, + "learning_rate": 1.6213951761062356e-05, + "loss": 1.231, + "step": 10489 + }, + { + "epoch": 0.3080039931880909, + "grad_norm": 0.0, + "learning_rate": 1.6213206658080153e-05, + "loss": 1.3887, + "step": 10490 + }, + { + "epoch": 0.3080333548652299, + "grad_norm": 0.0, + "learning_rate": 1.6212461498910332e-05, + "loss": 1.375, + "step": 10491 + }, + { + "epoch": 0.3080627165423689, + "grad_norm": 0.0, + "learning_rate": 1.621171628355963e-05, + "loss": 1.4541, + "step": 10492 + }, + { + "epoch": 0.3080920782195079, + "grad_norm": 0.0, + "learning_rate": 1.6210971012034788e-05, + "loss": 1.5654, + "step": 10493 + }, + { + "epoch": 0.3081214398966469, + "grad_norm": 0.0, + "learning_rate": 1.6210225684342544e-05, + "loss": 1.3223, + "step": 10494 + }, + { + "epoch": 0.3081508015737859, + "grad_norm": 0.0, + "learning_rate": 1.6209480300489637e-05, + "loss": 1.2549, + "step": 10495 + }, + { + "epoch": 0.3081801632509249, + "grad_norm": 0.0, + "learning_rate": 1.620873486048281e-05, + "loss": 1.5352, + "step": 10496 + }, + { + "epoch": 0.3082095249280639, + "grad_norm": 0.0, + "learning_rate": 1.6207989364328804e-05, + "loss": 1.4434, + "step": 10497 + }, + { + "epoch": 0.3082388866052029, + "grad_norm": 0.0, + "learning_rate": 1.620724381203436e-05, + "loss": 1.3857, + "step": 10498 + }, + { + "epoch": 0.3082682482823419, + "grad_norm": 0.0, + "learning_rate": 1.6206498203606225e-05, + "loss": 1.4883, + "step": 10499 + }, + { + "epoch": 0.3082976099594809, + "grad_norm": 0.0, + "learning_rate": 1.6205752539051133e-05, + "loss": 1.457, + "step": 10500 + }, + { + "epoch": 0.3083269716366199, + "grad_norm": 0.0, + "learning_rate": 1.6205006818375832e-05, + "loss": 1.2842, + "step": 10501 + }, + { + "epoch": 0.3083563333137589, + "grad_norm": 0.0, + "learning_rate": 1.6204261041587065e-05, + "loss": 1.4648, + "step": 10502 + }, + { + "epoch": 0.3083856949908979, + "grad_norm": 0.0, + "learning_rate": 1.6203515208691577e-05, + "loss": 1.3203, + "step": 10503 + }, + { + "epoch": 0.30841505666803687, + "grad_norm": 0.0, + "learning_rate": 1.6202769319696108e-05, + "loss": 1.3369, + "step": 10504 + }, + { + "epoch": 0.3084444183451759, + "grad_norm": 0.0, + "learning_rate": 1.620202337460741e-05, + "loss": 1.5, + "step": 10505 + }, + { + "epoch": 0.30847378002231485, + "grad_norm": 0.0, + "learning_rate": 1.6201277373432226e-05, + "loss": 1.3076, + "step": 10506 + }, + { + "epoch": 0.30850314169945386, + "grad_norm": 0.0, + "learning_rate": 1.6200531316177308e-05, + "loss": 1.418, + "step": 10507 + }, + { + "epoch": 0.3085325033765929, + "grad_norm": 0.0, + "learning_rate": 1.6199785202849394e-05, + "loss": 1.3789, + "step": 10508 + }, + { + "epoch": 0.30856186505373184, + "grad_norm": 0.0, + "learning_rate": 1.6199039033455232e-05, + "loss": 1.4727, + "step": 10509 + }, + { + "epoch": 0.30859122673087086, + "grad_norm": 0.0, + "learning_rate": 1.6198292808001576e-05, + "loss": 1.4072, + "step": 10510 + }, + { + "epoch": 0.3086205884080099, + "grad_norm": 0.0, + "learning_rate": 1.6197546526495166e-05, + "loss": 1.5244, + "step": 10511 + }, + { + "epoch": 0.30864995008514884, + "grad_norm": 0.0, + "learning_rate": 1.6196800188942758e-05, + "loss": 1.4004, + "step": 10512 + }, + { + "epoch": 0.30867931176228786, + "grad_norm": 0.0, + "learning_rate": 1.61960537953511e-05, + "loss": 1.4287, + "step": 10513 + }, + { + "epoch": 0.3087086734394269, + "grad_norm": 0.0, + "learning_rate": 1.619530734572694e-05, + "loss": 1.4717, + "step": 10514 + }, + { + "epoch": 0.30873803511656583, + "grad_norm": 0.0, + "learning_rate": 1.619456084007703e-05, + "loss": 1.4277, + "step": 10515 + }, + { + "epoch": 0.30876739679370485, + "grad_norm": 0.0, + "learning_rate": 1.619381427840812e-05, + "loss": 1.417, + "step": 10516 + }, + { + "epoch": 0.30879675847084387, + "grad_norm": 0.0, + "learning_rate": 1.619306766072696e-05, + "loss": 1.3779, + "step": 10517 + }, + { + "epoch": 0.30882612014798283, + "grad_norm": 0.0, + "learning_rate": 1.6192320987040303e-05, + "loss": 1.3164, + "step": 10518 + }, + { + "epoch": 0.30885548182512185, + "grad_norm": 0.0, + "learning_rate": 1.61915742573549e-05, + "loss": 1.4482, + "step": 10519 + }, + { + "epoch": 0.30888484350226086, + "grad_norm": 0.0, + "learning_rate": 1.619082747167751e-05, + "loss": 1.2432, + "step": 10520 + }, + { + "epoch": 0.3089142051793998, + "grad_norm": 0.0, + "learning_rate": 1.6190080630014878e-05, + "loss": 1.4092, + "step": 10521 + }, + { + "epoch": 0.30894356685653884, + "grad_norm": 0.0, + "learning_rate": 1.618933373237376e-05, + "loss": 1.4033, + "step": 10522 + }, + { + "epoch": 0.30897292853367786, + "grad_norm": 0.0, + "learning_rate": 1.6188586778760914e-05, + "loss": 1.4648, + "step": 10523 + }, + { + "epoch": 0.3090022902108168, + "grad_norm": 0.0, + "learning_rate": 1.6187839769183095e-05, + "loss": 1.5625, + "step": 10524 + }, + { + "epoch": 0.30903165188795584, + "grad_norm": 0.0, + "learning_rate": 1.6187092703647055e-05, + "loss": 1.2949, + "step": 10525 + }, + { + "epoch": 0.30906101356509486, + "grad_norm": 0.0, + "learning_rate": 1.618634558215955e-05, + "loss": 1.3916, + "step": 10526 + }, + { + "epoch": 0.3090903752422338, + "grad_norm": 0.0, + "learning_rate": 1.618559840472734e-05, + "loss": 1.4805, + "step": 10527 + }, + { + "epoch": 0.30911973691937283, + "grad_norm": 0.0, + "learning_rate": 1.6184851171357175e-05, + "loss": 1.4512, + "step": 10528 + }, + { + "epoch": 0.30914909859651185, + "grad_norm": 0.0, + "learning_rate": 1.618410388205582e-05, + "loss": 1.4023, + "step": 10529 + }, + { + "epoch": 0.3091784602736508, + "grad_norm": 0.0, + "learning_rate": 1.618335653683003e-05, + "loss": 1.4199, + "step": 10530 + }, + { + "epoch": 0.30920782195078983, + "grad_norm": 0.0, + "learning_rate": 1.618260913568656e-05, + "loss": 1.3701, + "step": 10531 + }, + { + "epoch": 0.30923718362792885, + "grad_norm": 0.0, + "learning_rate": 1.6181861678632177e-05, + "loss": 1.3418, + "step": 10532 + }, + { + "epoch": 0.3092665453050678, + "grad_norm": 0.0, + "learning_rate": 1.6181114165673632e-05, + "loss": 1.2217, + "step": 10533 + }, + { + "epoch": 0.3092959069822068, + "grad_norm": 0.0, + "learning_rate": 1.618036659681769e-05, + "loss": 1.417, + "step": 10534 + }, + { + "epoch": 0.30932526865934584, + "grad_norm": 0.0, + "learning_rate": 1.6179618972071107e-05, + "loss": 1.335, + "step": 10535 + }, + { + "epoch": 0.3093546303364848, + "grad_norm": 0.0, + "learning_rate": 1.617887129144065e-05, + "loss": 1.416, + "step": 10536 + }, + { + "epoch": 0.3093839920136238, + "grad_norm": 0.0, + "learning_rate": 1.617812355493308e-05, + "loss": 1.5898, + "step": 10537 + }, + { + "epoch": 0.30941335369076284, + "grad_norm": 0.0, + "learning_rate": 1.6177375762555153e-05, + "loss": 1.3081, + "step": 10538 + }, + { + "epoch": 0.3094427153679018, + "grad_norm": 0.0, + "learning_rate": 1.6176627914313636e-05, + "loss": 1.4004, + "step": 10539 + }, + { + "epoch": 0.3094720770450408, + "grad_norm": 0.0, + "learning_rate": 1.6175880010215293e-05, + "loss": 1.4844, + "step": 10540 + }, + { + "epoch": 0.30950143872217983, + "grad_norm": 0.0, + "learning_rate": 1.617513205026688e-05, + "loss": 1.4043, + "step": 10541 + }, + { + "epoch": 0.3095308003993188, + "grad_norm": 0.0, + "learning_rate": 1.617438403447517e-05, + "loss": 1.3594, + "step": 10542 + }, + { + "epoch": 0.3095601620764578, + "grad_norm": 0.0, + "learning_rate": 1.617363596284692e-05, + "loss": 1.3047, + "step": 10543 + }, + { + "epoch": 0.30958952375359683, + "grad_norm": 0.0, + "learning_rate": 1.6172887835388907e-05, + "loss": 1.3379, + "step": 10544 + }, + { + "epoch": 0.3096188854307358, + "grad_norm": 0.0, + "learning_rate": 1.6172139652107885e-05, + "loss": 1.4238, + "step": 10545 + }, + { + "epoch": 0.3096482471078748, + "grad_norm": 0.0, + "learning_rate": 1.617139141301062e-05, + "loss": 1.4277, + "step": 10546 + }, + { + "epoch": 0.3096776087850138, + "grad_norm": 0.0, + "learning_rate": 1.6170643118103882e-05, + "loss": 1.4473, + "step": 10547 + }, + { + "epoch": 0.3097069704621528, + "grad_norm": 0.0, + "learning_rate": 1.616989476739444e-05, + "loss": 1.3735, + "step": 10548 + }, + { + "epoch": 0.3097363321392918, + "grad_norm": 0.0, + "learning_rate": 1.616914636088906e-05, + "loss": 1.3799, + "step": 10549 + }, + { + "epoch": 0.3097656938164308, + "grad_norm": 0.0, + "learning_rate": 1.616839789859451e-05, + "loss": 1.3779, + "step": 10550 + }, + { + "epoch": 0.3097950554935698, + "grad_norm": 0.0, + "learning_rate": 1.616764938051756e-05, + "loss": 1.4297, + "step": 10551 + }, + { + "epoch": 0.3098244171707088, + "grad_norm": 0.0, + "learning_rate": 1.6166900806664972e-05, + "loss": 1.3447, + "step": 10552 + }, + { + "epoch": 0.3098537788478478, + "grad_norm": 0.0, + "learning_rate": 1.6166152177043523e-05, + "loss": 1.3809, + "step": 10553 + }, + { + "epoch": 0.3098831405249868, + "grad_norm": 0.0, + "learning_rate": 1.616540349165998e-05, + "loss": 1.2876, + "step": 10554 + }, + { + "epoch": 0.3099125022021258, + "grad_norm": 0.0, + "learning_rate": 1.6164654750521114e-05, + "loss": 1.4473, + "step": 10555 + }, + { + "epoch": 0.30994186387926476, + "grad_norm": 0.0, + "learning_rate": 1.6163905953633697e-05, + "loss": 1.3115, + "step": 10556 + }, + { + "epoch": 0.3099712255564038, + "grad_norm": 0.0, + "learning_rate": 1.6163157101004502e-05, + "loss": 1.4365, + "step": 10557 + }, + { + "epoch": 0.3100005872335428, + "grad_norm": 0.0, + "learning_rate": 1.6162408192640295e-05, + "loss": 1.4277, + "step": 10558 + }, + { + "epoch": 0.31002994891068175, + "grad_norm": 0.0, + "learning_rate": 1.6161659228547856e-05, + "loss": 1.4443, + "step": 10559 + }, + { + "epoch": 0.31005931058782077, + "grad_norm": 0.0, + "learning_rate": 1.616091020873395e-05, + "loss": 1.3633, + "step": 10560 + }, + { + "epoch": 0.3100886722649598, + "grad_norm": 0.0, + "learning_rate": 1.6160161133205358e-05, + "loss": 1.3975, + "step": 10561 + }, + { + "epoch": 0.31011803394209875, + "grad_norm": 0.0, + "learning_rate": 1.6159412001968853e-05, + "loss": 1.4053, + "step": 10562 + }, + { + "epoch": 0.31014739561923776, + "grad_norm": 0.0, + "learning_rate": 1.6158662815031207e-05, + "loss": 1.3955, + "step": 10563 + }, + { + "epoch": 0.3101767572963768, + "grad_norm": 0.0, + "learning_rate": 1.6157913572399193e-05, + "loss": 1.3672, + "step": 10564 + }, + { + "epoch": 0.31020611897351574, + "grad_norm": 0.0, + "learning_rate": 1.615716427407959e-05, + "loss": 1.4077, + "step": 10565 + }, + { + "epoch": 0.31023548065065476, + "grad_norm": 0.0, + "learning_rate": 1.6156414920079176e-05, + "loss": 1.2905, + "step": 10566 + }, + { + "epoch": 0.3102648423277938, + "grad_norm": 0.0, + "learning_rate": 1.6155665510404727e-05, + "loss": 1.4199, + "step": 10567 + }, + { + "epoch": 0.31029420400493274, + "grad_norm": 0.0, + "learning_rate": 1.6154916045063014e-05, + "loss": 1.3643, + "step": 10568 + }, + { + "epoch": 0.31032356568207176, + "grad_norm": 0.0, + "learning_rate": 1.6154166524060818e-05, + "loss": 1.4902, + "step": 10569 + }, + { + "epoch": 0.3103529273592108, + "grad_norm": 0.0, + "learning_rate": 1.6153416947404922e-05, + "loss": 1.3291, + "step": 10570 + }, + { + "epoch": 0.31038228903634973, + "grad_norm": 0.0, + "learning_rate": 1.61526673151021e-05, + "loss": 1.3662, + "step": 10571 + }, + { + "epoch": 0.31041165071348875, + "grad_norm": 0.0, + "learning_rate": 1.6151917627159132e-05, + "loss": 1.4688, + "step": 10572 + }, + { + "epoch": 0.31044101239062777, + "grad_norm": 0.0, + "learning_rate": 1.6151167883582794e-05, + "loss": 1.3105, + "step": 10573 + }, + { + "epoch": 0.31047037406776673, + "grad_norm": 0.0, + "learning_rate": 1.615041808437987e-05, + "loss": 1.4297, + "step": 10574 + }, + { + "epoch": 0.31049973574490575, + "grad_norm": 0.0, + "learning_rate": 1.614966822955714e-05, + "loss": 1.3154, + "step": 10575 + }, + { + "epoch": 0.31052909742204476, + "grad_norm": 0.0, + "learning_rate": 1.614891831912139e-05, + "loss": 1.4658, + "step": 10576 + }, + { + "epoch": 0.3105584590991837, + "grad_norm": 0.0, + "learning_rate": 1.6148168353079397e-05, + "loss": 1.4248, + "step": 10577 + }, + { + "epoch": 0.31058782077632274, + "grad_norm": 0.0, + "learning_rate": 1.6147418331437938e-05, + "loss": 1.3828, + "step": 10578 + }, + { + "epoch": 0.31061718245346176, + "grad_norm": 0.0, + "learning_rate": 1.6146668254203805e-05, + "loss": 1.4082, + "step": 10579 + }, + { + "epoch": 0.3106465441306007, + "grad_norm": 0.0, + "learning_rate": 1.6145918121383772e-05, + "loss": 1.2676, + "step": 10580 + }, + { + "epoch": 0.31067590580773974, + "grad_norm": 0.0, + "learning_rate": 1.6145167932984632e-05, + "loss": 1.3213, + "step": 10581 + }, + { + "epoch": 0.31070526748487876, + "grad_norm": 0.0, + "learning_rate": 1.6144417689013158e-05, + "loss": 1.4111, + "step": 10582 + }, + { + "epoch": 0.3107346291620177, + "grad_norm": 0.0, + "learning_rate": 1.6143667389476146e-05, + "loss": 1.334, + "step": 10583 + }, + { + "epoch": 0.31076399083915673, + "grad_norm": 0.0, + "learning_rate": 1.614291703438038e-05, + "loss": 1.501, + "step": 10584 + }, + { + "epoch": 0.31079335251629575, + "grad_norm": 0.0, + "learning_rate": 1.6142166623732636e-05, + "loss": 1.4414, + "step": 10585 + }, + { + "epoch": 0.3108227141934347, + "grad_norm": 0.0, + "learning_rate": 1.614141615753971e-05, + "loss": 1.4277, + "step": 10586 + }, + { + "epoch": 0.31085207587057373, + "grad_norm": 0.0, + "learning_rate": 1.6140665635808382e-05, + "loss": 1.3242, + "step": 10587 + }, + { + "epoch": 0.31088143754771275, + "grad_norm": 0.0, + "learning_rate": 1.6139915058545442e-05, + "loss": 1.3301, + "step": 10588 + }, + { + "epoch": 0.3109107992248517, + "grad_norm": 0.0, + "learning_rate": 1.613916442575768e-05, + "loss": 1.4238, + "step": 10589 + }, + { + "epoch": 0.3109401609019907, + "grad_norm": 0.0, + "learning_rate": 1.613841373745188e-05, + "loss": 1.2856, + "step": 10590 + }, + { + "epoch": 0.31096952257912974, + "grad_norm": 0.0, + "learning_rate": 1.6137662993634834e-05, + "loss": 1.3662, + "step": 10591 + }, + { + "epoch": 0.3109988842562687, + "grad_norm": 0.0, + "learning_rate": 1.6136912194313326e-05, + "loss": 1.5312, + "step": 10592 + }, + { + "epoch": 0.3110282459334077, + "grad_norm": 0.0, + "learning_rate": 1.6136161339494152e-05, + "loss": 1.2744, + "step": 10593 + }, + { + "epoch": 0.31105760761054674, + "grad_norm": 0.0, + "learning_rate": 1.6135410429184102e-05, + "loss": 1.2959, + "step": 10594 + }, + { + "epoch": 0.3110869692876857, + "grad_norm": 0.0, + "learning_rate": 1.613465946338996e-05, + "loss": 1.4365, + "step": 10595 + }, + { + "epoch": 0.3111163309648247, + "grad_norm": 0.0, + "learning_rate": 1.6133908442118523e-05, + "loss": 1.332, + "step": 10596 + }, + { + "epoch": 0.31114569264196373, + "grad_norm": 0.0, + "learning_rate": 1.6133157365376585e-05, + "loss": 1.415, + "step": 10597 + }, + { + "epoch": 0.3111750543191027, + "grad_norm": 0.0, + "learning_rate": 1.613240623317093e-05, + "loss": 1.3311, + "step": 10598 + }, + { + "epoch": 0.3112044159962417, + "grad_norm": 0.0, + "learning_rate": 1.6131655045508353e-05, + "loss": 1.5107, + "step": 10599 + }, + { + "epoch": 0.31123377767338073, + "grad_norm": 0.0, + "learning_rate": 1.6130903802395653e-05, + "loss": 1.3198, + "step": 10600 + }, + { + "epoch": 0.3112631393505197, + "grad_norm": 0.0, + "learning_rate": 1.6130152503839614e-05, + "loss": 1.4678, + "step": 10601 + }, + { + "epoch": 0.3112925010276587, + "grad_norm": 0.0, + "learning_rate": 1.6129401149847045e-05, + "loss": 1.4316, + "step": 10602 + }, + { + "epoch": 0.3113218627047977, + "grad_norm": 0.0, + "learning_rate": 1.6128649740424727e-05, + "loss": 1.333, + "step": 10603 + }, + { + "epoch": 0.3113512243819367, + "grad_norm": 0.0, + "learning_rate": 1.6127898275579455e-05, + "loss": 1.2764, + "step": 10604 + }, + { + "epoch": 0.3113805860590757, + "grad_norm": 0.0, + "learning_rate": 1.612714675531804e-05, + "loss": 1.4463, + "step": 10605 + }, + { + "epoch": 0.31140994773621467, + "grad_norm": 0.0, + "learning_rate": 1.612639517964726e-05, + "loss": 1.4238, + "step": 10606 + }, + { + "epoch": 0.3114393094133537, + "grad_norm": 0.0, + "learning_rate": 1.612564354857392e-05, + "loss": 1.4131, + "step": 10607 + }, + { + "epoch": 0.3114686710904927, + "grad_norm": 0.0, + "learning_rate": 1.612489186210482e-05, + "loss": 1.2593, + "step": 10608 + }, + { + "epoch": 0.31149803276763166, + "grad_norm": 0.0, + "learning_rate": 1.612414012024675e-05, + "loss": 1.2949, + "step": 10609 + }, + { + "epoch": 0.3115273944447707, + "grad_norm": 0.0, + "learning_rate": 1.6123388323006513e-05, + "loss": 1.2871, + "step": 10610 + }, + { + "epoch": 0.3115567561219097, + "grad_norm": 0.0, + "learning_rate": 1.612263647039091e-05, + "loss": 1.3057, + "step": 10611 + }, + { + "epoch": 0.31158611779904866, + "grad_norm": 0.0, + "learning_rate": 1.6121884562406735e-05, + "loss": 1.3623, + "step": 10612 + }, + { + "epoch": 0.3116154794761877, + "grad_norm": 0.0, + "learning_rate": 1.6121132599060794e-05, + "loss": 1.2969, + "step": 10613 + }, + { + "epoch": 0.3116448411533267, + "grad_norm": 0.0, + "learning_rate": 1.6120380580359878e-05, + "loss": 1.2969, + "step": 10614 + }, + { + "epoch": 0.31167420283046565, + "grad_norm": 0.0, + "learning_rate": 1.6119628506310797e-05, + "loss": 1.3594, + "step": 10615 + }, + { + "epoch": 0.31170356450760467, + "grad_norm": 0.0, + "learning_rate": 1.6118876376920348e-05, + "loss": 1.3564, + "step": 10616 + }, + { + "epoch": 0.3117329261847437, + "grad_norm": 0.0, + "learning_rate": 1.611812419219533e-05, + "loss": 1.3613, + "step": 10617 + }, + { + "epoch": 0.31176228786188265, + "grad_norm": 0.0, + "learning_rate": 1.611737195214255e-05, + "loss": 1.3325, + "step": 10618 + }, + { + "epoch": 0.31179164953902166, + "grad_norm": 0.0, + "learning_rate": 1.6116619656768805e-05, + "loss": 1.3428, + "step": 10619 + }, + { + "epoch": 0.3118210112161607, + "grad_norm": 0.0, + "learning_rate": 1.6115867306080908e-05, + "loss": 1.4766, + "step": 10620 + }, + { + "epoch": 0.31185037289329964, + "grad_norm": 0.0, + "learning_rate": 1.6115114900085652e-05, + "loss": 1.373, + "step": 10621 + }, + { + "epoch": 0.31187973457043866, + "grad_norm": 0.0, + "learning_rate": 1.611436243878985e-05, + "loss": 1.4414, + "step": 10622 + }, + { + "epoch": 0.3119090962475777, + "grad_norm": 0.0, + "learning_rate": 1.6113609922200297e-05, + "loss": 1.3682, + "step": 10623 + }, + { + "epoch": 0.31193845792471664, + "grad_norm": 0.0, + "learning_rate": 1.611285735032381e-05, + "loss": 1.4297, + "step": 10624 + }, + { + "epoch": 0.31196781960185566, + "grad_norm": 0.0, + "learning_rate": 1.6112104723167185e-05, + "loss": 1.4922, + "step": 10625 + }, + { + "epoch": 0.3119971812789947, + "grad_norm": 0.0, + "learning_rate": 1.6111352040737235e-05, + "loss": 1.3604, + "step": 10626 + }, + { + "epoch": 0.31202654295613363, + "grad_norm": 0.0, + "learning_rate": 1.6110599303040757e-05, + "loss": 1.5557, + "step": 10627 + }, + { + "epoch": 0.31205590463327265, + "grad_norm": 0.0, + "learning_rate": 1.6109846510084568e-05, + "loss": 1.3018, + "step": 10628 + }, + { + "epoch": 0.31208526631041167, + "grad_norm": 0.0, + "learning_rate": 1.6109093661875474e-05, + "loss": 1.3564, + "step": 10629 + }, + { + "epoch": 0.31211462798755063, + "grad_norm": 0.0, + "learning_rate": 1.610834075842028e-05, + "loss": 1.3354, + "step": 10630 + }, + { + "epoch": 0.31214398966468965, + "grad_norm": 0.0, + "learning_rate": 1.6107587799725794e-05, + "loss": 1.3809, + "step": 10631 + }, + { + "epoch": 0.31217335134182866, + "grad_norm": 0.0, + "learning_rate": 1.6106834785798827e-05, + "loss": 1.415, + "step": 10632 + }, + { + "epoch": 0.3122027130189676, + "grad_norm": 0.0, + "learning_rate": 1.6106081716646194e-05, + "loss": 1.3359, + "step": 10633 + }, + { + "epoch": 0.31223207469610664, + "grad_norm": 0.0, + "learning_rate": 1.61053285922747e-05, + "loss": 1.2275, + "step": 10634 + }, + { + "epoch": 0.31226143637324566, + "grad_norm": 0.0, + "learning_rate": 1.6104575412691152e-05, + "loss": 1.2515, + "step": 10635 + }, + { + "epoch": 0.3122907980503846, + "grad_norm": 0.0, + "learning_rate": 1.6103822177902367e-05, + "loss": 1.2217, + "step": 10636 + }, + { + "epoch": 0.31232015972752364, + "grad_norm": 0.0, + "learning_rate": 1.6103068887915153e-05, + "loss": 1.417, + "step": 10637 + }, + { + "epoch": 0.31234952140466266, + "grad_norm": 0.0, + "learning_rate": 1.6102315542736328e-05, + "loss": 1.25, + "step": 10638 + }, + { + "epoch": 0.3123788830818016, + "grad_norm": 0.0, + "learning_rate": 1.6101562142372694e-05, + "loss": 1.4502, + "step": 10639 + }, + { + "epoch": 0.31240824475894063, + "grad_norm": 0.0, + "learning_rate": 1.6100808686831077e-05, + "loss": 1.3408, + "step": 10640 + }, + { + "epoch": 0.31243760643607965, + "grad_norm": 0.0, + "learning_rate": 1.6100055176118284e-05, + "loss": 1.3662, + "step": 10641 + }, + { + "epoch": 0.3124669681132186, + "grad_norm": 0.0, + "learning_rate": 1.6099301610241126e-05, + "loss": 1.4492, + "step": 10642 + }, + { + "epoch": 0.31249632979035763, + "grad_norm": 0.0, + "learning_rate": 1.6098547989206425e-05, + "loss": 1.3896, + "step": 10643 + }, + { + "epoch": 0.31252569146749665, + "grad_norm": 0.0, + "learning_rate": 1.6097794313020992e-05, + "loss": 1.4092, + "step": 10644 + }, + { + "epoch": 0.3125550531446356, + "grad_norm": 0.0, + "learning_rate": 1.6097040581691645e-05, + "loss": 1.2979, + "step": 10645 + }, + { + "epoch": 0.3125844148217746, + "grad_norm": 0.0, + "learning_rate": 1.6096286795225196e-05, + "loss": 1.3516, + "step": 10646 + }, + { + "epoch": 0.31261377649891364, + "grad_norm": 0.0, + "learning_rate": 1.6095532953628462e-05, + "loss": 1.3203, + "step": 10647 + }, + { + "epoch": 0.3126431381760526, + "grad_norm": 0.0, + "learning_rate": 1.6094779056908264e-05, + "loss": 1.3838, + "step": 10648 + }, + { + "epoch": 0.3126724998531916, + "grad_norm": 0.0, + "learning_rate": 1.6094025105071422e-05, + "loss": 1.3223, + "step": 10649 + }, + { + "epoch": 0.31270186153033064, + "grad_norm": 0.0, + "learning_rate": 1.6093271098124746e-05, + "loss": 1.4375, + "step": 10650 + }, + { + "epoch": 0.3127312232074696, + "grad_norm": 0.0, + "learning_rate": 1.609251703607506e-05, + "loss": 1.3135, + "step": 10651 + }, + { + "epoch": 0.3127605848846086, + "grad_norm": 0.0, + "learning_rate": 1.6091762918929185e-05, + "loss": 1.3857, + "step": 10652 + }, + { + "epoch": 0.31278994656174763, + "grad_norm": 0.0, + "learning_rate": 1.6091008746693933e-05, + "loss": 1.4482, + "step": 10653 + }, + { + "epoch": 0.3128193082388866, + "grad_norm": 0.0, + "learning_rate": 1.6090254519376132e-05, + "loss": 1.3291, + "step": 10654 + }, + { + "epoch": 0.3128486699160256, + "grad_norm": 0.0, + "learning_rate": 1.60895002369826e-05, + "loss": 1.3779, + "step": 10655 + }, + { + "epoch": 0.3128780315931646, + "grad_norm": 0.0, + "learning_rate": 1.6088745899520157e-05, + "loss": 1.375, + "step": 10656 + }, + { + "epoch": 0.3129073932703036, + "grad_norm": 0.0, + "learning_rate": 1.6087991506995627e-05, + "loss": 1.333, + "step": 10657 + }, + { + "epoch": 0.3129367549474426, + "grad_norm": 0.0, + "learning_rate": 1.6087237059415826e-05, + "loss": 1.3916, + "step": 10658 + }, + { + "epoch": 0.31296611662458157, + "grad_norm": 0.0, + "learning_rate": 1.6086482556787587e-05, + "loss": 1.4277, + "step": 10659 + }, + { + "epoch": 0.3129954783017206, + "grad_norm": 0.0, + "learning_rate": 1.608572799911772e-05, + "loss": 1.377, + "step": 10660 + }, + { + "epoch": 0.3130248399788596, + "grad_norm": 0.0, + "learning_rate": 1.6084973386413065e-05, + "loss": 1.3721, + "step": 10661 + }, + { + "epoch": 0.31305420165599857, + "grad_norm": 0.0, + "learning_rate": 1.6084218718680432e-05, + "loss": 1.2314, + "step": 10662 + }, + { + "epoch": 0.3130835633331376, + "grad_norm": 0.0, + "learning_rate": 1.608346399592665e-05, + "loss": 1.418, + "step": 10663 + }, + { + "epoch": 0.3131129250102766, + "grad_norm": 0.0, + "learning_rate": 1.608270921815855e-05, + "loss": 1.3047, + "step": 10664 + }, + { + "epoch": 0.31314228668741556, + "grad_norm": 0.0, + "learning_rate": 1.608195438538295e-05, + "loss": 1.3447, + "step": 10665 + }, + { + "epoch": 0.3131716483645546, + "grad_norm": 0.0, + "learning_rate": 1.6081199497606678e-05, + "loss": 1.4736, + "step": 10666 + }, + { + "epoch": 0.3132010100416936, + "grad_norm": 0.0, + "learning_rate": 1.6080444554836562e-05, + "loss": 1.335, + "step": 10667 + }, + { + "epoch": 0.31323037171883256, + "grad_norm": 0.0, + "learning_rate": 1.607968955707943e-05, + "loss": 1.3311, + "step": 10668 + }, + { + "epoch": 0.3132597333959716, + "grad_norm": 0.0, + "learning_rate": 1.6078934504342108e-05, + "loss": 1.4941, + "step": 10669 + }, + { + "epoch": 0.3132890950731106, + "grad_norm": 0.0, + "learning_rate": 1.6078179396631425e-05, + "loss": 1.2891, + "step": 10670 + }, + { + "epoch": 0.31331845675024955, + "grad_norm": 0.0, + "learning_rate": 1.6077424233954205e-05, + "loss": 1.418, + "step": 10671 + }, + { + "epoch": 0.31334781842738857, + "grad_norm": 0.0, + "learning_rate": 1.6076669016317283e-05, + "loss": 1.417, + "step": 10672 + }, + { + "epoch": 0.3133771801045276, + "grad_norm": 0.0, + "learning_rate": 1.607591374372749e-05, + "loss": 1.2446, + "step": 10673 + }, + { + "epoch": 0.31340654178166655, + "grad_norm": 0.0, + "learning_rate": 1.607515841619165e-05, + "loss": 1.3398, + "step": 10674 + }, + { + "epoch": 0.31343590345880556, + "grad_norm": 0.0, + "learning_rate": 1.60744030337166e-05, + "loss": 1.3604, + "step": 10675 + }, + { + "epoch": 0.3134652651359446, + "grad_norm": 0.0, + "learning_rate": 1.6073647596309162e-05, + "loss": 1.416, + "step": 10676 + }, + { + "epoch": 0.31349462681308354, + "grad_norm": 0.0, + "learning_rate": 1.6072892103976176e-05, + "loss": 1.3916, + "step": 10677 + }, + { + "epoch": 0.31352398849022256, + "grad_norm": 0.0, + "learning_rate": 1.6072136556724473e-05, + "loss": 1.4463, + "step": 10678 + }, + { + "epoch": 0.3135533501673616, + "grad_norm": 0.0, + "learning_rate": 1.607138095456088e-05, + "loss": 1.3799, + "step": 10679 + }, + { + "epoch": 0.31358271184450054, + "grad_norm": 0.0, + "learning_rate": 1.607062529749224e-05, + "loss": 1.374, + "step": 10680 + }, + { + "epoch": 0.31361207352163956, + "grad_norm": 0.0, + "learning_rate": 1.6069869585525377e-05, + "loss": 1.3711, + "step": 10681 + }, + { + "epoch": 0.3136414351987786, + "grad_norm": 0.0, + "learning_rate": 1.606911381866713e-05, + "loss": 1.3105, + "step": 10682 + }, + { + "epoch": 0.31367079687591753, + "grad_norm": 0.0, + "learning_rate": 1.606835799692433e-05, + "loss": 1.4092, + "step": 10683 + }, + { + "epoch": 0.31370015855305655, + "grad_norm": 0.0, + "learning_rate": 1.606760212030382e-05, + "loss": 1.3438, + "step": 10684 + }, + { + "epoch": 0.31372952023019557, + "grad_norm": 0.0, + "learning_rate": 1.6066846188812425e-05, + "loss": 1.542, + "step": 10685 + }, + { + "epoch": 0.31375888190733453, + "grad_norm": 0.0, + "learning_rate": 1.6066090202456986e-05, + "loss": 1.4541, + "step": 10686 + }, + { + "epoch": 0.31378824358447355, + "grad_norm": 0.0, + "learning_rate": 1.6065334161244343e-05, + "loss": 1.4355, + "step": 10687 + }, + { + "epoch": 0.31381760526161256, + "grad_norm": 0.0, + "learning_rate": 1.6064578065181325e-05, + "loss": 1.2578, + "step": 10688 + }, + { + "epoch": 0.3138469669387515, + "grad_norm": 0.0, + "learning_rate": 1.606382191427478e-05, + "loss": 1.3809, + "step": 10689 + }, + { + "epoch": 0.31387632861589054, + "grad_norm": 0.0, + "learning_rate": 1.6063065708531537e-05, + "loss": 1.4082, + "step": 10690 + }, + { + "epoch": 0.31390569029302956, + "grad_norm": 0.0, + "learning_rate": 1.606230944795844e-05, + "loss": 1.3203, + "step": 10691 + }, + { + "epoch": 0.3139350519701685, + "grad_norm": 0.0, + "learning_rate": 1.6061553132562323e-05, + "loss": 1.5615, + "step": 10692 + }, + { + "epoch": 0.31396441364730754, + "grad_norm": 0.0, + "learning_rate": 1.606079676235003e-05, + "loss": 1.4863, + "step": 10693 + }, + { + "epoch": 0.31399377532444656, + "grad_norm": 0.0, + "learning_rate": 1.6060040337328403e-05, + "loss": 1.3232, + "step": 10694 + }, + { + "epoch": 0.3140231370015855, + "grad_norm": 0.0, + "learning_rate": 1.6059283857504278e-05, + "loss": 1.3203, + "step": 10695 + }, + { + "epoch": 0.31405249867872453, + "grad_norm": 0.0, + "learning_rate": 1.6058527322884497e-05, + "loss": 1.4297, + "step": 10696 + }, + { + "epoch": 0.31408186035586355, + "grad_norm": 0.0, + "learning_rate": 1.60577707334759e-05, + "loss": 1.2461, + "step": 10697 + }, + { + "epoch": 0.3141112220330025, + "grad_norm": 0.0, + "learning_rate": 1.6057014089285334e-05, + "loss": 1.4424, + "step": 10698 + }, + { + "epoch": 0.31414058371014153, + "grad_norm": 0.0, + "learning_rate": 1.6056257390319633e-05, + "loss": 1.2881, + "step": 10699 + }, + { + "epoch": 0.31416994538728055, + "grad_norm": 0.0, + "learning_rate": 1.605550063658565e-05, + "loss": 1.4443, + "step": 10700 + }, + { + "epoch": 0.3141993070644195, + "grad_norm": 0.0, + "learning_rate": 1.605474382809022e-05, + "loss": 1.3003, + "step": 10701 + }, + { + "epoch": 0.3142286687415585, + "grad_norm": 0.0, + "learning_rate": 1.605398696484019e-05, + "loss": 1.3398, + "step": 10702 + }, + { + "epoch": 0.31425803041869754, + "grad_norm": 0.0, + "learning_rate": 1.605323004684241e-05, + "loss": 1.3574, + "step": 10703 + }, + { + "epoch": 0.3142873920958365, + "grad_norm": 0.0, + "learning_rate": 1.6052473074103723e-05, + "loss": 1.3672, + "step": 10704 + }, + { + "epoch": 0.3143167537729755, + "grad_norm": 0.0, + "learning_rate": 1.6051716046630964e-05, + "loss": 1.2563, + "step": 10705 + }, + { + "epoch": 0.31434611545011454, + "grad_norm": 0.0, + "learning_rate": 1.6050958964430993e-05, + "loss": 1.4531, + "step": 10706 + }, + { + "epoch": 0.3143754771272535, + "grad_norm": 0.0, + "learning_rate": 1.6050201827510647e-05, + "loss": 1.3984, + "step": 10707 + }, + { + "epoch": 0.3144048388043925, + "grad_norm": 0.0, + "learning_rate": 1.6049444635876778e-05, + "loss": 1.3281, + "step": 10708 + }, + { + "epoch": 0.3144342004815315, + "grad_norm": 0.0, + "learning_rate": 1.604868738953623e-05, + "loss": 1.4238, + "step": 10709 + }, + { + "epoch": 0.3144635621586705, + "grad_norm": 0.0, + "learning_rate": 1.6047930088495856e-05, + "loss": 1.3867, + "step": 10710 + }, + { + "epoch": 0.3144929238358095, + "grad_norm": 0.0, + "learning_rate": 1.60471727327625e-05, + "loss": 1.3965, + "step": 10711 + }, + { + "epoch": 0.3145222855129485, + "grad_norm": 0.0, + "learning_rate": 1.6046415322343013e-05, + "loss": 1.4258, + "step": 10712 + }, + { + "epoch": 0.3145516471900875, + "grad_norm": 0.0, + "learning_rate": 1.604565785724424e-05, + "loss": 1.292, + "step": 10713 + }, + { + "epoch": 0.3145810088672265, + "grad_norm": 0.0, + "learning_rate": 1.604490033747304e-05, + "loss": 1.4395, + "step": 10714 + }, + { + "epoch": 0.31461037054436547, + "grad_norm": 0.0, + "learning_rate": 1.6044142763036257e-05, + "loss": 1.4375, + "step": 10715 + }, + { + "epoch": 0.3146397322215045, + "grad_norm": 0.0, + "learning_rate": 1.604338513394074e-05, + "loss": 1.3701, + "step": 10716 + }, + { + "epoch": 0.3146690938986435, + "grad_norm": 0.0, + "learning_rate": 1.6042627450193348e-05, + "loss": 1.3438, + "step": 10717 + }, + { + "epoch": 0.31469845557578247, + "grad_norm": 0.0, + "learning_rate": 1.6041869711800923e-05, + "loss": 1.3877, + "step": 10718 + }, + { + "epoch": 0.3147278172529215, + "grad_norm": 0.0, + "learning_rate": 1.6041111918770324e-05, + "loss": 1.2407, + "step": 10719 + }, + { + "epoch": 0.3147571789300605, + "grad_norm": 0.0, + "learning_rate": 1.6040354071108406e-05, + "loss": 1.4131, + "step": 10720 + }, + { + "epoch": 0.31478654060719946, + "grad_norm": 0.0, + "learning_rate": 1.6039596168822015e-05, + "loss": 1.3262, + "step": 10721 + }, + { + "epoch": 0.3148159022843385, + "grad_norm": 0.0, + "learning_rate": 1.6038838211918015e-05, + "loss": 1.2729, + "step": 10722 + }, + { + "epoch": 0.3148452639614775, + "grad_norm": 0.0, + "learning_rate": 1.603808020040325e-05, + "loss": 1.4854, + "step": 10723 + }, + { + "epoch": 0.31487462563861646, + "grad_norm": 0.0, + "learning_rate": 1.603732213428458e-05, + "loss": 1.2896, + "step": 10724 + }, + { + "epoch": 0.3149039873157555, + "grad_norm": 0.0, + "learning_rate": 1.603656401356886e-05, + "loss": 1.2935, + "step": 10725 + }, + { + "epoch": 0.3149333489928945, + "grad_norm": 0.0, + "learning_rate": 1.6035805838262944e-05, + "loss": 1.3623, + "step": 10726 + }, + { + "epoch": 0.31496271067003345, + "grad_norm": 0.0, + "learning_rate": 1.6035047608373693e-05, + "loss": 1.5332, + "step": 10727 + }, + { + "epoch": 0.31499207234717247, + "grad_norm": 0.0, + "learning_rate": 1.603428932390796e-05, + "loss": 1.4043, + "step": 10728 + }, + { + "epoch": 0.3150214340243115, + "grad_norm": 0.0, + "learning_rate": 1.60335309848726e-05, + "loss": 1.3486, + "step": 10729 + }, + { + "epoch": 0.31505079570145045, + "grad_norm": 0.0, + "learning_rate": 1.603277259127448e-05, + "loss": 1.3774, + "step": 10730 + }, + { + "epoch": 0.31508015737858946, + "grad_norm": 0.0, + "learning_rate": 1.6032014143120447e-05, + "loss": 1.4219, + "step": 10731 + }, + { + "epoch": 0.3151095190557285, + "grad_norm": 0.0, + "learning_rate": 1.603125564041737e-05, + "loss": 1.46, + "step": 10732 + }, + { + "epoch": 0.31513888073286744, + "grad_norm": 0.0, + "learning_rate": 1.60304970831721e-05, + "loss": 1.3071, + "step": 10733 + }, + { + "epoch": 0.31516824241000646, + "grad_norm": 0.0, + "learning_rate": 1.60297384713915e-05, + "loss": 1.5137, + "step": 10734 + }, + { + "epoch": 0.3151976040871455, + "grad_norm": 0.0, + "learning_rate": 1.6028979805082435e-05, + "loss": 1.4307, + "step": 10735 + }, + { + "epoch": 0.31522696576428444, + "grad_norm": 0.0, + "learning_rate": 1.602822108425176e-05, + "loss": 1.4912, + "step": 10736 + }, + { + "epoch": 0.31525632744142346, + "grad_norm": 0.0, + "learning_rate": 1.6027462308906337e-05, + "loss": 1.5176, + "step": 10737 + }, + { + "epoch": 0.3152856891185625, + "grad_norm": 0.0, + "learning_rate": 1.6026703479053027e-05, + "loss": 1.4141, + "step": 10738 + }, + { + "epoch": 0.31531505079570143, + "grad_norm": 0.0, + "learning_rate": 1.6025944594698697e-05, + "loss": 1.4043, + "step": 10739 + }, + { + "epoch": 0.31534441247284045, + "grad_norm": 0.0, + "learning_rate": 1.60251856558502e-05, + "loss": 1.3896, + "step": 10740 + }, + { + "epoch": 0.31537377414997947, + "grad_norm": 0.0, + "learning_rate": 1.6024426662514416e-05, + "loss": 1.46, + "step": 10741 + }, + { + "epoch": 0.31540313582711843, + "grad_norm": 0.0, + "learning_rate": 1.6023667614698193e-05, + "loss": 1.3623, + "step": 10742 + }, + { + "epoch": 0.31543249750425745, + "grad_norm": 0.0, + "learning_rate": 1.6022908512408404e-05, + "loss": 1.417, + "step": 10743 + }, + { + "epoch": 0.31546185918139646, + "grad_norm": 0.0, + "learning_rate": 1.6022149355651907e-05, + "loss": 1.4805, + "step": 10744 + }, + { + "epoch": 0.3154912208585354, + "grad_norm": 0.0, + "learning_rate": 1.6021390144435575e-05, + "loss": 1.2456, + "step": 10745 + }, + { + "epoch": 0.31552058253567444, + "grad_norm": 0.0, + "learning_rate": 1.6020630878766267e-05, + "loss": 1.2734, + "step": 10746 + }, + { + "epoch": 0.31554994421281346, + "grad_norm": 0.0, + "learning_rate": 1.6019871558650853e-05, + "loss": 1.2783, + "step": 10747 + }, + { + "epoch": 0.3155793058899524, + "grad_norm": 0.0, + "learning_rate": 1.6019112184096197e-05, + "loss": 1.2305, + "step": 10748 + }, + { + "epoch": 0.31560866756709144, + "grad_norm": 0.0, + "learning_rate": 1.6018352755109172e-05, + "loss": 1.4609, + "step": 10749 + }, + { + "epoch": 0.31563802924423046, + "grad_norm": 0.0, + "learning_rate": 1.6017593271696638e-05, + "loss": 1.4307, + "step": 10750 + }, + { + "epoch": 0.3156673909213694, + "grad_norm": 0.0, + "learning_rate": 1.601683373386547e-05, + "loss": 1.373, + "step": 10751 + }, + { + "epoch": 0.31569675259850843, + "grad_norm": 0.0, + "learning_rate": 1.601607414162253e-05, + "loss": 1.2959, + "step": 10752 + }, + { + "epoch": 0.31572611427564745, + "grad_norm": 0.0, + "learning_rate": 1.6015314494974698e-05, + "loss": 1.3721, + "step": 10753 + }, + { + "epoch": 0.3157554759527864, + "grad_norm": 0.0, + "learning_rate": 1.6014554793928828e-05, + "loss": 1.374, + "step": 10754 + }, + { + "epoch": 0.31578483762992543, + "grad_norm": 0.0, + "learning_rate": 1.60137950384918e-05, + "loss": 1.4629, + "step": 10755 + }, + { + "epoch": 0.31581419930706445, + "grad_norm": 0.0, + "learning_rate": 1.6013035228670486e-05, + "loss": 1.3838, + "step": 10756 + }, + { + "epoch": 0.3158435609842034, + "grad_norm": 0.0, + "learning_rate": 1.6012275364471756e-05, + "loss": 1.3516, + "step": 10757 + }, + { + "epoch": 0.3158729226613424, + "grad_norm": 0.0, + "learning_rate": 1.6011515445902475e-05, + "loss": 1.3975, + "step": 10758 + }, + { + "epoch": 0.3159022843384814, + "grad_norm": 0.0, + "learning_rate": 1.6010755472969526e-05, + "loss": 1.4678, + "step": 10759 + }, + { + "epoch": 0.3159316460156204, + "grad_norm": 0.0, + "learning_rate": 1.6009995445679772e-05, + "loss": 1.2617, + "step": 10760 + }, + { + "epoch": 0.3159610076927594, + "grad_norm": 0.0, + "learning_rate": 1.600923536404009e-05, + "loss": 1.4766, + "step": 10761 + }, + { + "epoch": 0.3159903693698984, + "grad_norm": 0.0, + "learning_rate": 1.6008475228057356e-05, + "loss": 1.2246, + "step": 10762 + }, + { + "epoch": 0.3160197310470374, + "grad_norm": 0.0, + "learning_rate": 1.6007715037738442e-05, + "loss": 1.373, + "step": 10763 + }, + { + "epoch": 0.3160490927241764, + "grad_norm": 0.0, + "learning_rate": 1.600695479309022e-05, + "loss": 1.291, + "step": 10764 + }, + { + "epoch": 0.3160784544013154, + "grad_norm": 0.0, + "learning_rate": 1.6006194494119566e-05, + "loss": 1.4678, + "step": 10765 + }, + { + "epoch": 0.3161078160784544, + "grad_norm": 0.0, + "learning_rate": 1.600543414083336e-05, + "loss": 1.3984, + "step": 10766 + }, + { + "epoch": 0.3161371777555934, + "grad_norm": 0.0, + "learning_rate": 1.6004673733238474e-05, + "loss": 1.4688, + "step": 10767 + }, + { + "epoch": 0.3161665394327324, + "grad_norm": 0.0, + "learning_rate": 1.6003913271341787e-05, + "loss": 1.4551, + "step": 10768 + }, + { + "epoch": 0.3161959011098714, + "grad_norm": 0.0, + "learning_rate": 1.6003152755150172e-05, + "loss": 1.4316, + "step": 10769 + }, + { + "epoch": 0.3162252627870104, + "grad_norm": 0.0, + "learning_rate": 1.600239218467051e-05, + "loss": 1.377, + "step": 10770 + }, + { + "epoch": 0.31625462446414937, + "grad_norm": 0.0, + "learning_rate": 1.600163155990968e-05, + "loss": 1.4678, + "step": 10771 + }, + { + "epoch": 0.3162839861412884, + "grad_norm": 0.0, + "learning_rate": 1.6000870880874555e-05, + "loss": 1.3418, + "step": 10772 + }, + { + "epoch": 0.3163133478184274, + "grad_norm": 0.0, + "learning_rate": 1.600011014757202e-05, + "loss": 1.4902, + "step": 10773 + }, + { + "epoch": 0.31634270949556637, + "grad_norm": 0.0, + "learning_rate": 1.5999349360008957e-05, + "loss": 1.4668, + "step": 10774 + }, + { + "epoch": 0.3163720711727054, + "grad_norm": 0.0, + "learning_rate": 1.5998588518192236e-05, + "loss": 1.4971, + "step": 10775 + }, + { + "epoch": 0.3164014328498444, + "grad_norm": 0.0, + "learning_rate": 1.5997827622128744e-05, + "loss": 1.4639, + "step": 10776 + }, + { + "epoch": 0.31643079452698336, + "grad_norm": 0.0, + "learning_rate": 1.5997066671825358e-05, + "loss": 1.4951, + "step": 10777 + }, + { + "epoch": 0.3164601562041224, + "grad_norm": 0.0, + "learning_rate": 1.5996305667288966e-05, + "loss": 1.3965, + "step": 10778 + }, + { + "epoch": 0.3164895178812614, + "grad_norm": 0.0, + "learning_rate": 1.5995544608526447e-05, + "loss": 1.3691, + "step": 10779 + }, + { + "epoch": 0.31651887955840036, + "grad_norm": 0.0, + "learning_rate": 1.5994783495544677e-05, + "loss": 1.4414, + "step": 10780 + }, + { + "epoch": 0.3165482412355394, + "grad_norm": 0.0, + "learning_rate": 1.599402232835055e-05, + "loss": 1.4404, + "step": 10781 + }, + { + "epoch": 0.3165776029126784, + "grad_norm": 0.0, + "learning_rate": 1.5993261106950942e-05, + "loss": 1.4766, + "step": 10782 + }, + { + "epoch": 0.31660696458981735, + "grad_norm": 0.0, + "learning_rate": 1.5992499831352738e-05, + "loss": 1.2847, + "step": 10783 + }, + { + "epoch": 0.31663632626695637, + "grad_norm": 0.0, + "learning_rate": 1.5991738501562823e-05, + "loss": 1.2715, + "step": 10784 + }, + { + "epoch": 0.3166656879440954, + "grad_norm": 0.0, + "learning_rate": 1.5990977117588085e-05, + "loss": 1.4082, + "step": 10785 + }, + { + "epoch": 0.31669504962123435, + "grad_norm": 0.0, + "learning_rate": 1.5990215679435403e-05, + "loss": 1.3643, + "step": 10786 + }, + { + "epoch": 0.31672441129837337, + "grad_norm": 0.0, + "learning_rate": 1.598945418711167e-05, + "loss": 1.3945, + "step": 10787 + }, + { + "epoch": 0.3167537729755124, + "grad_norm": 0.0, + "learning_rate": 1.5988692640623767e-05, + "loss": 1.3564, + "step": 10788 + }, + { + "epoch": 0.31678313465265134, + "grad_norm": 0.0, + "learning_rate": 1.5987931039978584e-05, + "loss": 1.4131, + "step": 10789 + }, + { + "epoch": 0.31681249632979036, + "grad_norm": 0.0, + "learning_rate": 1.5987169385183005e-05, + "loss": 1.3008, + "step": 10790 + }, + { + "epoch": 0.3168418580069294, + "grad_norm": 0.0, + "learning_rate": 1.598640767624392e-05, + "loss": 1.4326, + "step": 10791 + }, + { + "epoch": 0.31687121968406834, + "grad_norm": 0.0, + "learning_rate": 1.598564591316822e-05, + "loss": 1.4229, + "step": 10792 + }, + { + "epoch": 0.31690058136120736, + "grad_norm": 0.0, + "learning_rate": 1.5984884095962788e-05, + "loss": 1.4434, + "step": 10793 + }, + { + "epoch": 0.3169299430383464, + "grad_norm": 0.0, + "learning_rate": 1.598412222463452e-05, + "loss": 1.3027, + "step": 10794 + }, + { + "epoch": 0.31695930471548533, + "grad_norm": 0.0, + "learning_rate": 1.59833602991903e-05, + "loss": 1.4648, + "step": 10795 + }, + { + "epoch": 0.31698866639262435, + "grad_norm": 0.0, + "learning_rate": 1.598259831963702e-05, + "loss": 1.4561, + "step": 10796 + }, + { + "epoch": 0.31701802806976337, + "grad_norm": 0.0, + "learning_rate": 1.5981836285981574e-05, + "loss": 1.4092, + "step": 10797 + }, + { + "epoch": 0.31704738974690233, + "grad_norm": 0.0, + "learning_rate": 1.5981074198230847e-05, + "loss": 1.4824, + "step": 10798 + }, + { + "epoch": 0.31707675142404135, + "grad_norm": 0.0, + "learning_rate": 1.5980312056391733e-05, + "loss": 1.3721, + "step": 10799 + }, + { + "epoch": 0.31710611310118036, + "grad_norm": 0.0, + "learning_rate": 1.597954986047113e-05, + "loss": 1.3105, + "step": 10800 + }, + { + "epoch": 0.3171354747783193, + "grad_norm": 0.0, + "learning_rate": 1.597878761047592e-05, + "loss": 1.3086, + "step": 10801 + }, + { + "epoch": 0.31716483645545834, + "grad_norm": 0.0, + "learning_rate": 1.597802530641301e-05, + "loss": 1.415, + "step": 10802 + }, + { + "epoch": 0.31719419813259736, + "grad_norm": 0.0, + "learning_rate": 1.597726294828928e-05, + "loss": 1.4521, + "step": 10803 + }, + { + "epoch": 0.3172235598097363, + "grad_norm": 0.0, + "learning_rate": 1.5976500536111634e-05, + "loss": 1.4629, + "step": 10804 + }, + { + "epoch": 0.31725292148687534, + "grad_norm": 0.0, + "learning_rate": 1.5975738069886965e-05, + "loss": 1.5146, + "step": 10805 + }, + { + "epoch": 0.31728228316401436, + "grad_norm": 0.0, + "learning_rate": 1.5974975549622163e-05, + "loss": 1.3652, + "step": 10806 + }, + { + "epoch": 0.3173116448411533, + "grad_norm": 0.0, + "learning_rate": 1.5974212975324127e-05, + "loss": 1.3721, + "step": 10807 + }, + { + "epoch": 0.31734100651829233, + "grad_norm": 0.0, + "learning_rate": 1.597345034699975e-05, + "loss": 1.3877, + "step": 10808 + }, + { + "epoch": 0.3173703681954313, + "grad_norm": 0.0, + "learning_rate": 1.5972687664655937e-05, + "loss": 1.374, + "step": 10809 + }, + { + "epoch": 0.3173997298725703, + "grad_norm": 0.0, + "learning_rate": 1.597192492829958e-05, + "loss": 1.416, + "step": 10810 + }, + { + "epoch": 0.31742909154970933, + "grad_norm": 0.0, + "learning_rate": 1.5971162137937574e-05, + "loss": 1.3955, + "step": 10811 + }, + { + "epoch": 0.3174584532268483, + "grad_norm": 0.0, + "learning_rate": 1.5970399293576818e-05, + "loss": 1.3672, + "step": 10812 + }, + { + "epoch": 0.3174878149039873, + "grad_norm": 0.0, + "learning_rate": 1.5969636395224213e-05, + "loss": 1.3647, + "step": 10813 + }, + { + "epoch": 0.3175171765811263, + "grad_norm": 0.0, + "learning_rate": 1.596887344288666e-05, + "loss": 1.1597, + "step": 10814 + }, + { + "epoch": 0.3175465382582653, + "grad_norm": 0.0, + "learning_rate": 1.5968110436571055e-05, + "loss": 1.4482, + "step": 10815 + }, + { + "epoch": 0.3175758999354043, + "grad_norm": 0.0, + "learning_rate": 1.5967347376284298e-05, + "loss": 1.4551, + "step": 10816 + }, + { + "epoch": 0.3176052616125433, + "grad_norm": 0.0, + "learning_rate": 1.5966584262033288e-05, + "loss": 1.3516, + "step": 10817 + }, + { + "epoch": 0.3176346232896823, + "grad_norm": 0.0, + "learning_rate": 1.596582109382493e-05, + "loss": 1.4727, + "step": 10818 + }, + { + "epoch": 0.3176639849668213, + "grad_norm": 0.0, + "learning_rate": 1.5965057871666125e-05, + "loss": 1.2119, + "step": 10819 + }, + { + "epoch": 0.3176933466439603, + "grad_norm": 0.0, + "learning_rate": 1.5964294595563776e-05, + "loss": 1.376, + "step": 10820 + }, + { + "epoch": 0.3177227083210993, + "grad_norm": 0.0, + "learning_rate": 1.596353126552478e-05, + "loss": 1.3203, + "step": 10821 + }, + { + "epoch": 0.3177520699982383, + "grad_norm": 0.0, + "learning_rate": 1.5962767881556044e-05, + "loss": 1.333, + "step": 10822 + }, + { + "epoch": 0.3177814316753773, + "grad_norm": 0.0, + "learning_rate": 1.5962004443664475e-05, + "loss": 1.4629, + "step": 10823 + }, + { + "epoch": 0.3178107933525163, + "grad_norm": 0.0, + "learning_rate": 1.596124095185697e-05, + "loss": 1.3408, + "step": 10824 + }, + { + "epoch": 0.3178401550296553, + "grad_norm": 0.0, + "learning_rate": 1.5960477406140434e-05, + "loss": 1.373, + "step": 10825 + }, + { + "epoch": 0.3178695167067943, + "grad_norm": 0.0, + "learning_rate": 1.5959713806521775e-05, + "loss": 1.4316, + "step": 10826 + }, + { + "epoch": 0.31789887838393327, + "grad_norm": 0.0, + "learning_rate": 1.59589501530079e-05, + "loss": 1.3643, + "step": 10827 + }, + { + "epoch": 0.3179282400610723, + "grad_norm": 0.0, + "learning_rate": 1.595818644560571e-05, + "loss": 1.4043, + "step": 10828 + }, + { + "epoch": 0.3179576017382113, + "grad_norm": 0.0, + "learning_rate": 1.5957422684322117e-05, + "loss": 1.3477, + "step": 10829 + }, + { + "epoch": 0.31798696341535027, + "grad_norm": 0.0, + "learning_rate": 1.5956658869164022e-05, + "loss": 1.3257, + "step": 10830 + }, + { + "epoch": 0.3180163250924893, + "grad_norm": 0.0, + "learning_rate": 1.5955895000138338e-05, + "loss": 1.3672, + "step": 10831 + }, + { + "epoch": 0.3180456867696283, + "grad_norm": 0.0, + "learning_rate": 1.595513107725197e-05, + "loss": 1.3936, + "step": 10832 + }, + { + "epoch": 0.31807504844676726, + "grad_norm": 0.0, + "learning_rate": 1.5954367100511827e-05, + "loss": 1.3711, + "step": 10833 + }, + { + "epoch": 0.3181044101239063, + "grad_norm": 0.0, + "learning_rate": 1.5953603069924813e-05, + "loss": 1.4209, + "step": 10834 + }, + { + "epoch": 0.3181337718010453, + "grad_norm": 0.0, + "learning_rate": 1.5952838985497848e-05, + "loss": 1.4434, + "step": 10835 + }, + { + "epoch": 0.31816313347818426, + "grad_norm": 0.0, + "learning_rate": 1.595207484723783e-05, + "loss": 1.4229, + "step": 10836 + }, + { + "epoch": 0.3181924951553233, + "grad_norm": 0.0, + "learning_rate": 1.595131065515168e-05, + "loss": 1.5625, + "step": 10837 + }, + { + "epoch": 0.3182218568324623, + "grad_norm": 0.0, + "learning_rate": 1.5950546409246298e-05, + "loss": 1.3262, + "step": 10838 + }, + { + "epoch": 0.31825121850960125, + "grad_norm": 0.0, + "learning_rate": 1.5949782109528606e-05, + "loss": 1.2246, + "step": 10839 + }, + { + "epoch": 0.31828058018674027, + "grad_norm": 0.0, + "learning_rate": 1.5949017756005504e-05, + "loss": 1.416, + "step": 10840 + }, + { + "epoch": 0.3183099418638793, + "grad_norm": 0.0, + "learning_rate": 1.5948253348683914e-05, + "loss": 1.3965, + "step": 10841 + }, + { + "epoch": 0.31833930354101825, + "grad_norm": 0.0, + "learning_rate": 1.594748888757075e-05, + "loss": 1.4521, + "step": 10842 + }, + { + "epoch": 0.31836866521815727, + "grad_norm": 0.0, + "learning_rate": 1.5946724372672914e-05, + "loss": 1.5391, + "step": 10843 + }, + { + "epoch": 0.3183980268952963, + "grad_norm": 0.0, + "learning_rate": 1.594595980399733e-05, + "loss": 1.314, + "step": 10844 + }, + { + "epoch": 0.31842738857243524, + "grad_norm": 0.0, + "learning_rate": 1.594519518155091e-05, + "loss": 1.3857, + "step": 10845 + }, + { + "epoch": 0.31845675024957426, + "grad_norm": 0.0, + "learning_rate": 1.5944430505340566e-05, + "loss": 1.3594, + "step": 10846 + }, + { + "epoch": 0.3184861119267133, + "grad_norm": 0.0, + "learning_rate": 1.5943665775373217e-05, + "loss": 1.4219, + "step": 10847 + }, + { + "epoch": 0.31851547360385224, + "grad_norm": 0.0, + "learning_rate": 1.5942900991655772e-05, + "loss": 1.5654, + "step": 10848 + }, + { + "epoch": 0.31854483528099126, + "grad_norm": 0.0, + "learning_rate": 1.5942136154195155e-05, + "loss": 1.3994, + "step": 10849 + }, + { + "epoch": 0.3185741969581303, + "grad_norm": 0.0, + "learning_rate": 1.5941371262998275e-05, + "loss": 1.3516, + "step": 10850 + }, + { + "epoch": 0.31860355863526924, + "grad_norm": 0.0, + "learning_rate": 1.5940606318072057e-05, + "loss": 1.3359, + "step": 10851 + }, + { + "epoch": 0.31863292031240825, + "grad_norm": 0.0, + "learning_rate": 1.5939841319423414e-05, + "loss": 1.3574, + "step": 10852 + }, + { + "epoch": 0.31866228198954727, + "grad_norm": 0.0, + "learning_rate": 1.5939076267059262e-05, + "loss": 1.3418, + "step": 10853 + }, + { + "epoch": 0.31869164366668623, + "grad_norm": 0.0, + "learning_rate": 1.5938311160986523e-05, + "loss": 1.4492, + "step": 10854 + }, + { + "epoch": 0.31872100534382525, + "grad_norm": 0.0, + "learning_rate": 1.5937546001212118e-05, + "loss": 1.4932, + "step": 10855 + }, + { + "epoch": 0.31875036702096426, + "grad_norm": 0.0, + "learning_rate": 1.593678078774296e-05, + "loss": 1.4434, + "step": 10856 + }, + { + "epoch": 0.3187797286981032, + "grad_norm": 0.0, + "learning_rate": 1.5936015520585974e-05, + "loss": 1.4932, + "step": 10857 + }, + { + "epoch": 0.31880909037524224, + "grad_norm": 0.0, + "learning_rate": 1.5935250199748082e-05, + "loss": 1.3975, + "step": 10858 + }, + { + "epoch": 0.3188384520523812, + "grad_norm": 0.0, + "learning_rate": 1.5934484825236202e-05, + "loss": 1.4473, + "step": 10859 + }, + { + "epoch": 0.3188678137295202, + "grad_norm": 0.0, + "learning_rate": 1.5933719397057253e-05, + "loss": 1.4717, + "step": 10860 + }, + { + "epoch": 0.31889717540665924, + "grad_norm": 0.0, + "learning_rate": 1.5932953915218164e-05, + "loss": 1.5508, + "step": 10861 + }, + { + "epoch": 0.3189265370837982, + "grad_norm": 0.0, + "learning_rate": 1.593218837972585e-05, + "loss": 1.3135, + "step": 10862 + }, + { + "epoch": 0.3189558987609372, + "grad_norm": 0.0, + "learning_rate": 1.5931422790587236e-05, + "loss": 1.3467, + "step": 10863 + }, + { + "epoch": 0.31898526043807623, + "grad_norm": 0.0, + "learning_rate": 1.593065714780925e-05, + "loss": 1.4375, + "step": 10864 + }, + { + "epoch": 0.3190146221152152, + "grad_norm": 0.0, + "learning_rate": 1.592989145139881e-05, + "loss": 1.3799, + "step": 10865 + }, + { + "epoch": 0.3190439837923542, + "grad_norm": 0.0, + "learning_rate": 1.5929125701362845e-05, + "loss": 1.3877, + "step": 10866 + }, + { + "epoch": 0.31907334546949323, + "grad_norm": 0.0, + "learning_rate": 1.5928359897708275e-05, + "loss": 1.2412, + "step": 10867 + }, + { + "epoch": 0.3191027071466322, + "grad_norm": 0.0, + "learning_rate": 1.5927594040442032e-05, + "loss": 1.333, + "step": 10868 + }, + { + "epoch": 0.3191320688237712, + "grad_norm": 0.0, + "learning_rate": 1.5926828129571034e-05, + "loss": 1.4092, + "step": 10869 + }, + { + "epoch": 0.3191614305009102, + "grad_norm": 0.0, + "learning_rate": 1.5926062165102215e-05, + "loss": 1.3721, + "step": 10870 + }, + { + "epoch": 0.3191907921780492, + "grad_norm": 0.0, + "learning_rate": 1.5925296147042498e-05, + "loss": 1.2314, + "step": 10871 + }, + { + "epoch": 0.3192201538551882, + "grad_norm": 0.0, + "learning_rate": 1.592453007539881e-05, + "loss": 1.5146, + "step": 10872 + }, + { + "epoch": 0.3192495155323272, + "grad_norm": 0.0, + "learning_rate": 1.5923763950178077e-05, + "loss": 1.291, + "step": 10873 + }, + { + "epoch": 0.3192788772094662, + "grad_norm": 0.0, + "learning_rate": 1.5922997771387228e-05, + "loss": 1.4385, + "step": 10874 + }, + { + "epoch": 0.3193082388866052, + "grad_norm": 0.0, + "learning_rate": 1.5922231539033197e-05, + "loss": 1.3428, + "step": 10875 + }, + { + "epoch": 0.3193376005637442, + "grad_norm": 0.0, + "learning_rate": 1.592146525312291e-05, + "loss": 1.4033, + "step": 10876 + }, + { + "epoch": 0.3193669622408832, + "grad_norm": 0.0, + "learning_rate": 1.5920698913663293e-05, + "loss": 1.3418, + "step": 10877 + }, + { + "epoch": 0.3193963239180222, + "grad_norm": 0.0, + "learning_rate": 1.591993252066128e-05, + "loss": 1.3975, + "step": 10878 + }, + { + "epoch": 0.3194256855951612, + "grad_norm": 0.0, + "learning_rate": 1.5919166074123804e-05, + "loss": 1.4414, + "step": 10879 + }, + { + "epoch": 0.3194550472723002, + "grad_norm": 0.0, + "learning_rate": 1.591839957405779e-05, + "loss": 1.2808, + "step": 10880 + }, + { + "epoch": 0.3194844089494392, + "grad_norm": 0.0, + "learning_rate": 1.5917633020470176e-05, + "loss": 1.373, + "step": 10881 + }, + { + "epoch": 0.3195137706265782, + "grad_norm": 0.0, + "learning_rate": 1.5916866413367888e-05, + "loss": 1.3643, + "step": 10882 + }, + { + "epoch": 0.31954313230371717, + "grad_norm": 0.0, + "learning_rate": 1.5916099752757864e-05, + "loss": 1.3281, + "step": 10883 + }, + { + "epoch": 0.3195724939808562, + "grad_norm": 0.0, + "learning_rate": 1.5915333038647034e-05, + "loss": 1.3682, + "step": 10884 + }, + { + "epoch": 0.3196018556579952, + "grad_norm": 0.0, + "learning_rate": 1.5914566271042332e-05, + "loss": 1.2461, + "step": 10885 + }, + { + "epoch": 0.31963121733513417, + "grad_norm": 0.0, + "learning_rate": 1.5913799449950692e-05, + "loss": 1.3369, + "step": 10886 + }, + { + "epoch": 0.3196605790122732, + "grad_norm": 0.0, + "learning_rate": 1.591303257537905e-05, + "loss": 1.3086, + "step": 10887 + }, + { + "epoch": 0.3196899406894122, + "grad_norm": 0.0, + "learning_rate": 1.5912265647334343e-05, + "loss": 1.459, + "step": 10888 + }, + { + "epoch": 0.31971930236655116, + "grad_norm": 0.0, + "learning_rate": 1.59114986658235e-05, + "loss": 1.3311, + "step": 10889 + }, + { + "epoch": 0.3197486640436902, + "grad_norm": 0.0, + "learning_rate": 1.591073163085346e-05, + "loss": 1.3535, + "step": 10890 + }, + { + "epoch": 0.3197780257208292, + "grad_norm": 0.0, + "learning_rate": 1.590996454243116e-05, + "loss": 1.2109, + "step": 10891 + }, + { + "epoch": 0.31980738739796816, + "grad_norm": 0.0, + "learning_rate": 1.590919740056354e-05, + "loss": 1.4199, + "step": 10892 + }, + { + "epoch": 0.3198367490751072, + "grad_norm": 0.0, + "learning_rate": 1.5908430205257534e-05, + "loss": 1.4043, + "step": 10893 + }, + { + "epoch": 0.3198661107522462, + "grad_norm": 0.0, + "learning_rate": 1.5907662956520077e-05, + "loss": 1.3691, + "step": 10894 + }, + { + "epoch": 0.31989547242938515, + "grad_norm": 0.0, + "learning_rate": 1.5906895654358113e-05, + "loss": 1.3662, + "step": 10895 + }, + { + "epoch": 0.31992483410652417, + "grad_norm": 0.0, + "learning_rate": 1.590612829877858e-05, + "loss": 1.4121, + "step": 10896 + }, + { + "epoch": 0.3199541957836632, + "grad_norm": 0.0, + "learning_rate": 1.5905360889788415e-05, + "loss": 1.3965, + "step": 10897 + }, + { + "epoch": 0.31998355746080215, + "grad_norm": 0.0, + "learning_rate": 1.590459342739456e-05, + "loss": 1.2617, + "step": 10898 + }, + { + "epoch": 0.32001291913794117, + "grad_norm": 0.0, + "learning_rate": 1.5903825911603953e-05, + "loss": 1.416, + "step": 10899 + }, + { + "epoch": 0.3200422808150802, + "grad_norm": 0.0, + "learning_rate": 1.5903058342423537e-05, + "loss": 1.3896, + "step": 10900 + }, + { + "epoch": 0.32007164249221914, + "grad_norm": 0.0, + "learning_rate": 1.5902290719860255e-05, + "loss": 1.3887, + "step": 10901 + }, + { + "epoch": 0.32010100416935816, + "grad_norm": 0.0, + "learning_rate": 1.5901523043921043e-05, + "loss": 1.4668, + "step": 10902 + }, + { + "epoch": 0.3201303658464972, + "grad_norm": 0.0, + "learning_rate": 1.590075531461285e-05, + "loss": 1.4805, + "step": 10903 + }, + { + "epoch": 0.32015972752363614, + "grad_norm": 0.0, + "learning_rate": 1.5899987531942615e-05, + "loss": 1.4717, + "step": 10904 + }, + { + "epoch": 0.32018908920077516, + "grad_norm": 0.0, + "learning_rate": 1.589921969591728e-05, + "loss": 1.2295, + "step": 10905 + }, + { + "epoch": 0.3202184508779142, + "grad_norm": 0.0, + "learning_rate": 1.5898451806543786e-05, + "loss": 1.3418, + "step": 10906 + }, + { + "epoch": 0.32024781255505314, + "grad_norm": 0.0, + "learning_rate": 1.5897683863829088e-05, + "loss": 1.376, + "step": 10907 + }, + { + "epoch": 0.32027717423219215, + "grad_norm": 0.0, + "learning_rate": 1.5896915867780123e-05, + "loss": 1.3955, + "step": 10908 + }, + { + "epoch": 0.3203065359093311, + "grad_norm": 0.0, + "learning_rate": 1.5896147818403837e-05, + "loss": 1.4141, + "step": 10909 + }, + { + "epoch": 0.32033589758647013, + "grad_norm": 0.0, + "learning_rate": 1.5895379715707176e-05, + "loss": 1.4268, + "step": 10910 + }, + { + "epoch": 0.32036525926360915, + "grad_norm": 0.0, + "learning_rate": 1.589461155969709e-05, + "loss": 1.5537, + "step": 10911 + }, + { + "epoch": 0.3203946209407481, + "grad_norm": 0.0, + "learning_rate": 1.5893843350380516e-05, + "loss": 1.4912, + "step": 10912 + }, + { + "epoch": 0.3204239826178871, + "grad_norm": 0.0, + "learning_rate": 1.589307508776441e-05, + "loss": 1.4287, + "step": 10913 + }, + { + "epoch": 0.32045334429502614, + "grad_norm": 0.0, + "learning_rate": 1.5892306771855717e-05, + "loss": 1.3584, + "step": 10914 + }, + { + "epoch": 0.3204827059721651, + "grad_norm": 0.0, + "learning_rate": 1.5891538402661387e-05, + "loss": 1.3574, + "step": 10915 + }, + { + "epoch": 0.3205120676493041, + "grad_norm": 0.0, + "learning_rate": 1.5890769980188362e-05, + "loss": 1.5234, + "step": 10916 + }, + { + "epoch": 0.32054142932644314, + "grad_norm": 0.0, + "learning_rate": 1.58900015044436e-05, + "loss": 1.3105, + "step": 10917 + }, + { + "epoch": 0.3205707910035821, + "grad_norm": 0.0, + "learning_rate": 1.5889232975434043e-05, + "loss": 1.3301, + "step": 10918 + }, + { + "epoch": 0.3206001526807211, + "grad_norm": 0.0, + "learning_rate": 1.5888464393166643e-05, + "loss": 1.3955, + "step": 10919 + }, + { + "epoch": 0.32062951435786013, + "grad_norm": 0.0, + "learning_rate": 1.5887695757648352e-05, + "loss": 1.5283, + "step": 10920 + }, + { + "epoch": 0.3206588760349991, + "grad_norm": 0.0, + "learning_rate": 1.5886927068886122e-05, + "loss": 1.2344, + "step": 10921 + }, + { + "epoch": 0.3206882377121381, + "grad_norm": 0.0, + "learning_rate": 1.58861583268869e-05, + "loss": 1.4033, + "step": 10922 + }, + { + "epoch": 0.32071759938927713, + "grad_norm": 0.0, + "learning_rate": 1.5885389531657643e-05, + "loss": 1.3574, + "step": 10923 + }, + { + "epoch": 0.3207469610664161, + "grad_norm": 0.0, + "learning_rate": 1.58846206832053e-05, + "loss": 1.3965, + "step": 10924 + }, + { + "epoch": 0.3207763227435551, + "grad_norm": 0.0, + "learning_rate": 1.5883851781536827e-05, + "loss": 1.3184, + "step": 10925 + }, + { + "epoch": 0.3208056844206941, + "grad_norm": 0.0, + "learning_rate": 1.5883082826659177e-05, + "loss": 1.5576, + "step": 10926 + }, + { + "epoch": 0.3208350460978331, + "grad_norm": 0.0, + "learning_rate": 1.58823138185793e-05, + "loss": 1.2715, + "step": 10927 + }, + { + "epoch": 0.3208644077749721, + "grad_norm": 0.0, + "learning_rate": 1.588154475730415e-05, + "loss": 1.4512, + "step": 10928 + }, + { + "epoch": 0.3208937694521111, + "grad_norm": 0.0, + "learning_rate": 1.5880775642840687e-05, + "loss": 1.4189, + "step": 10929 + }, + { + "epoch": 0.3209231311292501, + "grad_norm": 0.0, + "learning_rate": 1.5880006475195865e-05, + "loss": 1.3896, + "step": 10930 + }, + { + "epoch": 0.3209524928063891, + "grad_norm": 0.0, + "learning_rate": 1.587923725437664e-05, + "loss": 1.3271, + "step": 10931 + }, + { + "epoch": 0.3209818544835281, + "grad_norm": 0.0, + "learning_rate": 1.5878467980389964e-05, + "loss": 1.3945, + "step": 10932 + }, + { + "epoch": 0.3210112161606671, + "grad_norm": 0.0, + "learning_rate": 1.5877698653242793e-05, + "loss": 1.4043, + "step": 10933 + }, + { + "epoch": 0.3210405778378061, + "grad_norm": 0.0, + "learning_rate": 1.5876929272942093e-05, + "loss": 1.3359, + "step": 10934 + }, + { + "epoch": 0.3210699395149451, + "grad_norm": 0.0, + "learning_rate": 1.5876159839494815e-05, + "loss": 1.4326, + "step": 10935 + }, + { + "epoch": 0.3210993011920841, + "grad_norm": 0.0, + "learning_rate": 1.587539035290792e-05, + "loss": 1.2588, + "step": 10936 + }, + { + "epoch": 0.3211286628692231, + "grad_norm": 0.0, + "learning_rate": 1.587462081318836e-05, + "loss": 1.3599, + "step": 10937 + }, + { + "epoch": 0.3211580245463621, + "grad_norm": 0.0, + "learning_rate": 1.5873851220343105e-05, + "loss": 1.4922, + "step": 10938 + }, + { + "epoch": 0.32118738622350107, + "grad_norm": 0.0, + "learning_rate": 1.5873081574379107e-05, + "loss": 1.3447, + "step": 10939 + }, + { + "epoch": 0.3212167479006401, + "grad_norm": 0.0, + "learning_rate": 1.5872311875303333e-05, + "loss": 1.4766, + "step": 10940 + }, + { + "epoch": 0.3212461095777791, + "grad_norm": 0.0, + "learning_rate": 1.587154212312273e-05, + "loss": 1.3257, + "step": 10941 + }, + { + "epoch": 0.32127547125491807, + "grad_norm": 0.0, + "learning_rate": 1.5870772317844272e-05, + "loss": 1.3887, + "step": 10942 + }, + { + "epoch": 0.3213048329320571, + "grad_norm": 0.0, + "learning_rate": 1.5870002459474917e-05, + "loss": 1.2837, + "step": 10943 + }, + { + "epoch": 0.3213341946091961, + "grad_norm": 0.0, + "learning_rate": 1.5869232548021628e-05, + "loss": 1.4463, + "step": 10944 + }, + { + "epoch": 0.32136355628633506, + "grad_norm": 0.0, + "learning_rate": 1.5868462583491363e-05, + "loss": 1.4248, + "step": 10945 + }, + { + "epoch": 0.3213929179634741, + "grad_norm": 0.0, + "learning_rate": 1.586769256589109e-05, + "loss": 1.2529, + "step": 10946 + }, + { + "epoch": 0.3214222796406131, + "grad_norm": 0.0, + "learning_rate": 1.586692249522777e-05, + "loss": 1.3799, + "step": 10947 + }, + { + "epoch": 0.32145164131775206, + "grad_norm": 0.0, + "learning_rate": 1.5866152371508365e-05, + "loss": 1.4854, + "step": 10948 + }, + { + "epoch": 0.3214810029948911, + "grad_norm": 0.0, + "learning_rate": 1.5865382194739844e-05, + "loss": 1.377, + "step": 10949 + }, + { + "epoch": 0.3215103646720301, + "grad_norm": 0.0, + "learning_rate": 1.586461196492917e-05, + "loss": 1.3604, + "step": 10950 + }, + { + "epoch": 0.32153972634916905, + "grad_norm": 0.0, + "learning_rate": 1.5863841682083307e-05, + "loss": 1.1602, + "step": 10951 + }, + { + "epoch": 0.32156908802630807, + "grad_norm": 0.0, + "learning_rate": 1.586307134620922e-05, + "loss": 1.4131, + "step": 10952 + }, + { + "epoch": 0.3215984497034471, + "grad_norm": 0.0, + "learning_rate": 1.5862300957313885e-05, + "loss": 1.4062, + "step": 10953 + }, + { + "epoch": 0.32162781138058605, + "grad_norm": 0.0, + "learning_rate": 1.5861530515404254e-05, + "loss": 1.3154, + "step": 10954 + }, + { + "epoch": 0.32165717305772507, + "grad_norm": 0.0, + "learning_rate": 1.58607600204873e-05, + "loss": 1.4463, + "step": 10955 + }, + { + "epoch": 0.3216865347348641, + "grad_norm": 0.0, + "learning_rate": 1.5859989472569995e-05, + "loss": 1.4131, + "step": 10956 + }, + { + "epoch": 0.32171589641200304, + "grad_norm": 0.0, + "learning_rate": 1.5859218871659307e-05, + "loss": 1.4062, + "step": 10957 + }, + { + "epoch": 0.32174525808914206, + "grad_norm": 0.0, + "learning_rate": 1.5858448217762198e-05, + "loss": 1.2754, + "step": 10958 + }, + { + "epoch": 0.321774619766281, + "grad_norm": 0.0, + "learning_rate": 1.585767751088564e-05, + "loss": 1.3477, + "step": 10959 + }, + { + "epoch": 0.32180398144342004, + "grad_norm": 0.0, + "learning_rate": 1.585690675103661e-05, + "loss": 1.4434, + "step": 10960 + }, + { + "epoch": 0.32183334312055906, + "grad_norm": 0.0, + "learning_rate": 1.585613593822207e-05, + "loss": 1.3613, + "step": 10961 + }, + { + "epoch": 0.321862704797698, + "grad_norm": 0.0, + "learning_rate": 1.5855365072448993e-05, + "loss": 1.3564, + "step": 10962 + }, + { + "epoch": 0.32189206647483704, + "grad_norm": 0.0, + "learning_rate": 1.585459415372435e-05, + "loss": 1.3525, + "step": 10963 + }, + { + "epoch": 0.32192142815197605, + "grad_norm": 0.0, + "learning_rate": 1.585382318205511e-05, + "loss": 1.3838, + "step": 10964 + }, + { + "epoch": 0.321950789829115, + "grad_norm": 0.0, + "learning_rate": 1.585305215744825e-05, + "loss": 1.4033, + "step": 10965 + }, + { + "epoch": 0.32198015150625403, + "grad_norm": 0.0, + "learning_rate": 1.5852281079910744e-05, + "loss": 1.3359, + "step": 10966 + }, + { + "epoch": 0.32200951318339305, + "grad_norm": 0.0, + "learning_rate": 1.5851509949449554e-05, + "loss": 1.333, + "step": 10967 + }, + { + "epoch": 0.322038874860532, + "grad_norm": 0.0, + "learning_rate": 1.5850738766071666e-05, + "loss": 1.4297, + "step": 10968 + }, + { + "epoch": 0.322068236537671, + "grad_norm": 0.0, + "learning_rate": 1.5849967529784043e-05, + "loss": 1.4717, + "step": 10969 + }, + { + "epoch": 0.32209759821481004, + "grad_norm": 0.0, + "learning_rate": 1.5849196240593677e-05, + "loss": 1.3467, + "step": 10970 + }, + { + "epoch": 0.322126959891949, + "grad_norm": 0.0, + "learning_rate": 1.584842489850752e-05, + "loss": 1.2583, + "step": 10971 + }, + { + "epoch": 0.322156321569088, + "grad_norm": 0.0, + "learning_rate": 1.5847653503532565e-05, + "loss": 1.4062, + "step": 10972 + }, + { + "epoch": 0.32218568324622704, + "grad_norm": 0.0, + "learning_rate": 1.584688205567578e-05, + "loss": 1.3496, + "step": 10973 + }, + { + "epoch": 0.322215044923366, + "grad_norm": 0.0, + "learning_rate": 1.5846110554944143e-05, + "loss": 1.3809, + "step": 10974 + }, + { + "epoch": 0.322244406600505, + "grad_norm": 0.0, + "learning_rate": 1.584533900134463e-05, + "loss": 1.3398, + "step": 10975 + }, + { + "epoch": 0.32227376827764403, + "grad_norm": 0.0, + "learning_rate": 1.584456739488422e-05, + "loss": 1.3291, + "step": 10976 + }, + { + "epoch": 0.322303129954783, + "grad_norm": 0.0, + "learning_rate": 1.5843795735569895e-05, + "loss": 1.4297, + "step": 10977 + }, + { + "epoch": 0.322332491631922, + "grad_norm": 0.0, + "learning_rate": 1.5843024023408622e-05, + "loss": 1.2832, + "step": 10978 + }, + { + "epoch": 0.32236185330906103, + "grad_norm": 0.0, + "learning_rate": 1.5842252258407388e-05, + "loss": 1.5127, + "step": 10979 + }, + { + "epoch": 0.3223912149862, + "grad_norm": 0.0, + "learning_rate": 1.5841480440573172e-05, + "loss": 1.376, + "step": 10980 + }, + { + "epoch": 0.322420576663339, + "grad_norm": 0.0, + "learning_rate": 1.5840708569912953e-05, + "loss": 1.3926, + "step": 10981 + }, + { + "epoch": 0.322449938340478, + "grad_norm": 0.0, + "learning_rate": 1.5839936646433708e-05, + "loss": 1.3984, + "step": 10982 + }, + { + "epoch": 0.322479300017617, + "grad_norm": 0.0, + "learning_rate": 1.5839164670142423e-05, + "loss": 1.3623, + "step": 10983 + }, + { + "epoch": 0.322508661694756, + "grad_norm": 0.0, + "learning_rate": 1.5838392641046073e-05, + "loss": 1.3369, + "step": 10984 + }, + { + "epoch": 0.322538023371895, + "grad_norm": 0.0, + "learning_rate": 1.5837620559151646e-05, + "loss": 1.2852, + "step": 10985 + }, + { + "epoch": 0.322567385049034, + "grad_norm": 0.0, + "learning_rate": 1.583684842446612e-05, + "loss": 1.4277, + "step": 10986 + }, + { + "epoch": 0.322596746726173, + "grad_norm": 0.0, + "learning_rate": 1.5836076236996478e-05, + "loss": 1.2412, + "step": 10987 + }, + { + "epoch": 0.322626108403312, + "grad_norm": 0.0, + "learning_rate": 1.5835303996749706e-05, + "loss": 1.373, + "step": 10988 + }, + { + "epoch": 0.322655470080451, + "grad_norm": 0.0, + "learning_rate": 1.5834531703732784e-05, + "loss": 1.3223, + "step": 10989 + }, + { + "epoch": 0.32268483175759, + "grad_norm": 0.0, + "learning_rate": 1.58337593579527e-05, + "loss": 1.4375, + "step": 10990 + }, + { + "epoch": 0.322714193434729, + "grad_norm": 0.0, + "learning_rate": 1.5832986959416432e-05, + "loss": 1.4736, + "step": 10991 + }, + { + "epoch": 0.322743555111868, + "grad_norm": 0.0, + "learning_rate": 1.583221450813097e-05, + "loss": 1.4258, + "step": 10992 + }, + { + "epoch": 0.322772916789007, + "grad_norm": 0.0, + "learning_rate": 1.5831442004103303e-05, + "loss": 1.3057, + "step": 10993 + }, + { + "epoch": 0.322802278466146, + "grad_norm": 0.0, + "learning_rate": 1.583066944734041e-05, + "loss": 1.418, + "step": 10994 + }, + { + "epoch": 0.32283164014328497, + "grad_norm": 0.0, + "learning_rate": 1.582989683784928e-05, + "loss": 1.3857, + "step": 10995 + }, + { + "epoch": 0.322861001820424, + "grad_norm": 0.0, + "learning_rate": 1.5829124175636896e-05, + "loss": 1.2979, + "step": 10996 + }, + { + "epoch": 0.322890363497563, + "grad_norm": 0.0, + "learning_rate": 1.5828351460710255e-05, + "loss": 1.375, + "step": 10997 + }, + { + "epoch": 0.32291972517470197, + "grad_norm": 0.0, + "learning_rate": 1.5827578693076334e-05, + "loss": 1.332, + "step": 10998 + }, + { + "epoch": 0.322949086851841, + "grad_norm": 0.0, + "learning_rate": 1.582680587274213e-05, + "loss": 1.3975, + "step": 10999 + }, + { + "epoch": 0.32297844852898, + "grad_norm": 0.0, + "learning_rate": 1.5826032999714625e-05, + "loss": 1.291, + "step": 11000 + }, + { + "epoch": 0.32300781020611896, + "grad_norm": 0.0, + "learning_rate": 1.5825260074000816e-05, + "loss": 1.4102, + "step": 11001 + }, + { + "epoch": 0.323037171883258, + "grad_norm": 0.0, + "learning_rate": 1.5824487095607687e-05, + "loss": 1.2939, + "step": 11002 + }, + { + "epoch": 0.323066533560397, + "grad_norm": 0.0, + "learning_rate": 1.5823714064542227e-05, + "loss": 1.4736, + "step": 11003 + }, + { + "epoch": 0.32309589523753596, + "grad_norm": 0.0, + "learning_rate": 1.582294098081143e-05, + "loss": 1.376, + "step": 11004 + }, + { + "epoch": 0.323125256914675, + "grad_norm": 0.0, + "learning_rate": 1.5822167844422285e-05, + "loss": 1.1807, + "step": 11005 + }, + { + "epoch": 0.323154618591814, + "grad_norm": 0.0, + "learning_rate": 1.582139465538179e-05, + "loss": 1.3394, + "step": 11006 + }, + { + "epoch": 0.32318398026895295, + "grad_norm": 0.0, + "learning_rate": 1.582062141369693e-05, + "loss": 1.417, + "step": 11007 + }, + { + "epoch": 0.32321334194609197, + "grad_norm": 0.0, + "learning_rate": 1.5819848119374696e-05, + "loss": 1.2822, + "step": 11008 + }, + { + "epoch": 0.32324270362323093, + "grad_norm": 0.0, + "learning_rate": 1.5819074772422092e-05, + "loss": 1.3594, + "step": 11009 + }, + { + "epoch": 0.32327206530036995, + "grad_norm": 0.0, + "learning_rate": 1.5818301372846102e-05, + "loss": 1.3599, + "step": 11010 + }, + { + "epoch": 0.32330142697750897, + "grad_norm": 0.0, + "learning_rate": 1.581752792065372e-05, + "loss": 1.3555, + "step": 11011 + }, + { + "epoch": 0.3233307886546479, + "grad_norm": 0.0, + "learning_rate": 1.5816754415851945e-05, + "loss": 1.3633, + "step": 11012 + }, + { + "epoch": 0.32336015033178694, + "grad_norm": 0.0, + "learning_rate": 1.5815980858447768e-05, + "loss": 1.5283, + "step": 11013 + }, + { + "epoch": 0.32338951200892596, + "grad_norm": 0.0, + "learning_rate": 1.5815207248448187e-05, + "loss": 1.2803, + "step": 11014 + }, + { + "epoch": 0.3234188736860649, + "grad_norm": 0.0, + "learning_rate": 1.58144335858602e-05, + "loss": 1.2861, + "step": 11015 + }, + { + "epoch": 0.32344823536320394, + "grad_norm": 0.0, + "learning_rate": 1.58136598706908e-05, + "loss": 1.4736, + "step": 11016 + }, + { + "epoch": 0.32347759704034296, + "grad_norm": 0.0, + "learning_rate": 1.5812886102946986e-05, + "loss": 1.2959, + "step": 11017 + }, + { + "epoch": 0.3235069587174819, + "grad_norm": 0.0, + "learning_rate": 1.5812112282635748e-05, + "loss": 1.2725, + "step": 11018 + }, + { + "epoch": 0.32353632039462094, + "grad_norm": 0.0, + "learning_rate": 1.58113384097641e-05, + "loss": 1.4521, + "step": 11019 + }, + { + "epoch": 0.32356568207175995, + "grad_norm": 0.0, + "learning_rate": 1.5810564484339025e-05, + "loss": 1.2656, + "step": 11020 + }, + { + "epoch": 0.3235950437488989, + "grad_norm": 0.0, + "learning_rate": 1.580979050636753e-05, + "loss": 1.4785, + "step": 11021 + }, + { + "epoch": 0.32362440542603793, + "grad_norm": 0.0, + "learning_rate": 1.5809016475856606e-05, + "loss": 1.3672, + "step": 11022 + }, + { + "epoch": 0.32365376710317695, + "grad_norm": 0.0, + "learning_rate": 1.580824239281326e-05, + "loss": 1.3477, + "step": 11023 + }, + { + "epoch": 0.3236831287803159, + "grad_norm": 0.0, + "learning_rate": 1.5807468257244495e-05, + "loss": 1.3799, + "step": 11024 + }, + { + "epoch": 0.3237124904574549, + "grad_norm": 0.0, + "learning_rate": 1.5806694069157305e-05, + "loss": 1.499, + "step": 11025 + }, + { + "epoch": 0.32374185213459394, + "grad_norm": 0.0, + "learning_rate": 1.580591982855869e-05, + "loss": 1.29, + "step": 11026 + }, + { + "epoch": 0.3237712138117329, + "grad_norm": 0.0, + "learning_rate": 1.580514553545566e-05, + "loss": 1.4326, + "step": 11027 + }, + { + "epoch": 0.3238005754888719, + "grad_norm": 0.0, + "learning_rate": 1.580437118985521e-05, + "loss": 1.4653, + "step": 11028 + }, + { + "epoch": 0.32382993716601094, + "grad_norm": 0.0, + "learning_rate": 1.5803596791764344e-05, + "loss": 1.1172, + "step": 11029 + }, + { + "epoch": 0.3238592988431499, + "grad_norm": 0.0, + "learning_rate": 1.580282234119007e-05, + "loss": 1.3701, + "step": 11030 + }, + { + "epoch": 0.3238886605202889, + "grad_norm": 0.0, + "learning_rate": 1.580204783813938e-05, + "loss": 1.4248, + "step": 11031 + }, + { + "epoch": 0.32391802219742794, + "grad_norm": 0.0, + "learning_rate": 1.5801273282619292e-05, + "loss": 1.3682, + "step": 11032 + }, + { + "epoch": 0.3239473838745669, + "grad_norm": 0.0, + "learning_rate": 1.58004986746368e-05, + "loss": 1.4512, + "step": 11033 + }, + { + "epoch": 0.3239767455517059, + "grad_norm": 0.0, + "learning_rate": 1.5799724014198914e-05, + "loss": 1.3701, + "step": 11034 + }, + { + "epoch": 0.32400610722884493, + "grad_norm": 0.0, + "learning_rate": 1.579894930131264e-05, + "loss": 1.3682, + "step": 11035 + }, + { + "epoch": 0.3240354689059839, + "grad_norm": 0.0, + "learning_rate": 1.5798174535984983e-05, + "loss": 1.2837, + "step": 11036 + }, + { + "epoch": 0.3240648305831229, + "grad_norm": 0.0, + "learning_rate": 1.5797399718222942e-05, + "loss": 1.2656, + "step": 11037 + }, + { + "epoch": 0.3240941922602619, + "grad_norm": 0.0, + "learning_rate": 1.579662484803354e-05, + "loss": 1.3057, + "step": 11038 + }, + { + "epoch": 0.3241235539374009, + "grad_norm": 0.0, + "learning_rate": 1.579584992542377e-05, + "loss": 1.4941, + "step": 11039 + }, + { + "epoch": 0.3241529156145399, + "grad_norm": 0.0, + "learning_rate": 1.5795074950400646e-05, + "loss": 1.3887, + "step": 11040 + }, + { + "epoch": 0.3241822772916789, + "grad_norm": 0.0, + "learning_rate": 1.5794299922971175e-05, + "loss": 1.4131, + "step": 11041 + }, + { + "epoch": 0.3242116389688179, + "grad_norm": 0.0, + "learning_rate": 1.5793524843142365e-05, + "loss": 1.4023, + "step": 11042 + }, + { + "epoch": 0.3242410006459569, + "grad_norm": 0.0, + "learning_rate": 1.5792749710921227e-05, + "loss": 1.333, + "step": 11043 + }, + { + "epoch": 0.3242703623230959, + "grad_norm": 0.0, + "learning_rate": 1.579197452631477e-05, + "loss": 1.4424, + "step": 11044 + }, + { + "epoch": 0.3242997240002349, + "grad_norm": 0.0, + "learning_rate": 1.579119928933e-05, + "loss": 1.3936, + "step": 11045 + }, + { + "epoch": 0.3243290856773739, + "grad_norm": 0.0, + "learning_rate": 1.5790423999973935e-05, + "loss": 1.4141, + "step": 11046 + }, + { + "epoch": 0.3243584473545129, + "grad_norm": 0.0, + "learning_rate": 1.5789648658253583e-05, + "loss": 1.4199, + "step": 11047 + }, + { + "epoch": 0.3243878090316519, + "grad_norm": 0.0, + "learning_rate": 1.5788873264175956e-05, + "loss": 1.3809, + "step": 11048 + }, + { + "epoch": 0.3244171707087909, + "grad_norm": 0.0, + "learning_rate": 1.5788097817748064e-05, + "loss": 1.3262, + "step": 11049 + }, + { + "epoch": 0.3244465323859299, + "grad_norm": 0.0, + "learning_rate": 1.578732231897692e-05, + "loss": 1.291, + "step": 11050 + }, + { + "epoch": 0.32447589406306887, + "grad_norm": 0.0, + "learning_rate": 1.5786546767869544e-05, + "loss": 1.4209, + "step": 11051 + }, + { + "epoch": 0.3245052557402079, + "grad_norm": 0.0, + "learning_rate": 1.578577116443294e-05, + "loss": 1.3438, + "step": 11052 + }, + { + "epoch": 0.3245346174173469, + "grad_norm": 0.0, + "learning_rate": 1.5784995508674125e-05, + "loss": 1.2261, + "step": 11053 + }, + { + "epoch": 0.32456397909448587, + "grad_norm": 0.0, + "learning_rate": 1.5784219800600115e-05, + "loss": 1.4375, + "step": 11054 + }, + { + "epoch": 0.3245933407716249, + "grad_norm": 0.0, + "learning_rate": 1.578344404021792e-05, + "loss": 1.4346, + "step": 11055 + }, + { + "epoch": 0.3246227024487639, + "grad_norm": 0.0, + "learning_rate": 1.5782668227534564e-05, + "loss": 1.2832, + "step": 11056 + }, + { + "epoch": 0.32465206412590286, + "grad_norm": 0.0, + "learning_rate": 1.5781892362557058e-05, + "loss": 1.2422, + "step": 11057 + }, + { + "epoch": 0.3246814258030419, + "grad_norm": 0.0, + "learning_rate": 1.5781116445292418e-05, + "loss": 1.2646, + "step": 11058 + }, + { + "epoch": 0.32471078748018084, + "grad_norm": 0.0, + "learning_rate": 1.578034047574766e-05, + "loss": 1.3838, + "step": 11059 + }, + { + "epoch": 0.32474014915731986, + "grad_norm": 0.0, + "learning_rate": 1.5779564453929804e-05, + "loss": 1.3037, + "step": 11060 + }, + { + "epoch": 0.3247695108344589, + "grad_norm": 0.0, + "learning_rate": 1.5778788379845866e-05, + "loss": 1.3984, + "step": 11061 + }, + { + "epoch": 0.32479887251159784, + "grad_norm": 0.0, + "learning_rate": 1.5778012253502866e-05, + "loss": 1.4336, + "step": 11062 + }, + { + "epoch": 0.32482823418873685, + "grad_norm": 0.0, + "learning_rate": 1.577723607490782e-05, + "loss": 1.4541, + "step": 11063 + }, + { + "epoch": 0.32485759586587587, + "grad_norm": 0.0, + "learning_rate": 1.5776459844067747e-05, + "loss": 1.3608, + "step": 11064 + }, + { + "epoch": 0.32488695754301483, + "grad_norm": 0.0, + "learning_rate": 1.577568356098967e-05, + "loss": 1.4092, + "step": 11065 + }, + { + "epoch": 0.32491631922015385, + "grad_norm": 0.0, + "learning_rate": 1.5774907225680605e-05, + "loss": 1.4268, + "step": 11066 + }, + { + "epoch": 0.32494568089729287, + "grad_norm": 0.0, + "learning_rate": 1.577413083814758e-05, + "loss": 1.4141, + "step": 11067 + }, + { + "epoch": 0.3249750425744318, + "grad_norm": 0.0, + "learning_rate": 1.5773354398397608e-05, + "loss": 1.2549, + "step": 11068 + }, + { + "epoch": 0.32500440425157084, + "grad_norm": 0.0, + "learning_rate": 1.5772577906437715e-05, + "loss": 1.5312, + "step": 11069 + }, + { + "epoch": 0.32503376592870986, + "grad_norm": 0.0, + "learning_rate": 1.577180136227492e-05, + "loss": 1.3486, + "step": 11070 + }, + { + "epoch": 0.3250631276058488, + "grad_norm": 0.0, + "learning_rate": 1.577102476591625e-05, + "loss": 1.3564, + "step": 11071 + }, + { + "epoch": 0.32509248928298784, + "grad_norm": 0.0, + "learning_rate": 1.5770248117368722e-05, + "loss": 1.3271, + "step": 11072 + }, + { + "epoch": 0.32512185096012686, + "grad_norm": 0.0, + "learning_rate": 1.5769471416639365e-05, + "loss": 1.3691, + "step": 11073 + }, + { + "epoch": 0.3251512126372658, + "grad_norm": 0.0, + "learning_rate": 1.5768694663735194e-05, + "loss": 1.2275, + "step": 11074 + }, + { + "epoch": 0.32518057431440484, + "grad_norm": 0.0, + "learning_rate": 1.5767917858663247e-05, + "loss": 1.3242, + "step": 11075 + }, + { + "epoch": 0.32520993599154385, + "grad_norm": 0.0, + "learning_rate": 1.576714100143054e-05, + "loss": 1.3887, + "step": 11076 + }, + { + "epoch": 0.3252392976686828, + "grad_norm": 0.0, + "learning_rate": 1.5766364092044096e-05, + "loss": 1.4482, + "step": 11077 + }, + { + "epoch": 0.32526865934582183, + "grad_norm": 0.0, + "learning_rate": 1.576558713051095e-05, + "loss": 1.3408, + "step": 11078 + }, + { + "epoch": 0.32529802102296085, + "grad_norm": 0.0, + "learning_rate": 1.576481011683812e-05, + "loss": 1.2705, + "step": 11079 + }, + { + "epoch": 0.3253273827000998, + "grad_norm": 0.0, + "learning_rate": 1.5764033051032638e-05, + "loss": 1.4434, + "step": 11080 + }, + { + "epoch": 0.3253567443772388, + "grad_norm": 0.0, + "learning_rate": 1.5763255933101526e-05, + "loss": 1.3926, + "step": 11081 + }, + { + "epoch": 0.32538610605437784, + "grad_norm": 0.0, + "learning_rate": 1.5762478763051816e-05, + "loss": 1.4619, + "step": 11082 + }, + { + "epoch": 0.3254154677315168, + "grad_norm": 0.0, + "learning_rate": 1.5761701540890535e-05, + "loss": 1.4199, + "step": 11083 + }, + { + "epoch": 0.3254448294086558, + "grad_norm": 0.0, + "learning_rate": 1.576092426662471e-05, + "loss": 1.4785, + "step": 11084 + }, + { + "epoch": 0.32547419108579484, + "grad_norm": 0.0, + "learning_rate": 1.5760146940261375e-05, + "loss": 1.3271, + "step": 11085 + }, + { + "epoch": 0.3255035527629338, + "grad_norm": 0.0, + "learning_rate": 1.575936956180755e-05, + "loss": 1.3818, + "step": 11086 + }, + { + "epoch": 0.3255329144400728, + "grad_norm": 0.0, + "learning_rate": 1.5758592131270276e-05, + "loss": 1.5137, + "step": 11087 + }, + { + "epoch": 0.32556227611721184, + "grad_norm": 0.0, + "learning_rate": 1.575781464865658e-05, + "loss": 1.3906, + "step": 11088 + }, + { + "epoch": 0.3255916377943508, + "grad_norm": 0.0, + "learning_rate": 1.575703711397349e-05, + "loss": 1.2412, + "step": 11089 + }, + { + "epoch": 0.3256209994714898, + "grad_norm": 0.0, + "learning_rate": 1.5756259527228036e-05, + "loss": 1.3828, + "step": 11090 + }, + { + "epoch": 0.32565036114862883, + "grad_norm": 0.0, + "learning_rate": 1.5755481888427254e-05, + "loss": 1.5098, + "step": 11091 + }, + { + "epoch": 0.3256797228257678, + "grad_norm": 0.0, + "learning_rate": 1.5754704197578178e-05, + "loss": 1.3857, + "step": 11092 + }, + { + "epoch": 0.3257090845029068, + "grad_norm": 0.0, + "learning_rate": 1.5753926454687834e-05, + "loss": 1.252, + "step": 11093 + }, + { + "epoch": 0.3257384461800458, + "grad_norm": 0.0, + "learning_rate": 1.575314865976326e-05, + "loss": 1.2852, + "step": 11094 + }, + { + "epoch": 0.3257678078571848, + "grad_norm": 0.0, + "learning_rate": 1.575237081281149e-05, + "loss": 1.3906, + "step": 11095 + }, + { + "epoch": 0.3257971695343238, + "grad_norm": 0.0, + "learning_rate": 1.575159291383956e-05, + "loss": 1.4307, + "step": 11096 + }, + { + "epoch": 0.3258265312114628, + "grad_norm": 0.0, + "learning_rate": 1.57508149628545e-05, + "loss": 1.4277, + "step": 11097 + }, + { + "epoch": 0.3258558928886018, + "grad_norm": 0.0, + "learning_rate": 1.5750036959863352e-05, + "loss": 1.4551, + "step": 11098 + }, + { + "epoch": 0.3258852545657408, + "grad_norm": 0.0, + "learning_rate": 1.574925890487314e-05, + "loss": 1.3965, + "step": 11099 + }, + { + "epoch": 0.3259146162428798, + "grad_norm": 0.0, + "learning_rate": 1.5748480797890913e-05, + "loss": 1.3936, + "step": 11100 + }, + { + "epoch": 0.3259439779200188, + "grad_norm": 0.0, + "learning_rate": 1.57477026389237e-05, + "loss": 1.3389, + "step": 11101 + }, + { + "epoch": 0.3259733395971578, + "grad_norm": 0.0, + "learning_rate": 1.5746924427978537e-05, + "loss": 1.4229, + "step": 11102 + }, + { + "epoch": 0.3260027012742968, + "grad_norm": 0.0, + "learning_rate": 1.5746146165062467e-05, + "loss": 1.4404, + "step": 11103 + }, + { + "epoch": 0.3260320629514358, + "grad_norm": 0.0, + "learning_rate": 1.5745367850182527e-05, + "loss": 1.3799, + "step": 11104 + }, + { + "epoch": 0.3260614246285748, + "grad_norm": 0.0, + "learning_rate": 1.5744589483345753e-05, + "loss": 1.2144, + "step": 11105 + }, + { + "epoch": 0.3260907863057138, + "grad_norm": 0.0, + "learning_rate": 1.5743811064559184e-05, + "loss": 1.2471, + "step": 11106 + }, + { + "epoch": 0.32612014798285277, + "grad_norm": 0.0, + "learning_rate": 1.5743032593829862e-05, + "loss": 1.4688, + "step": 11107 + }, + { + "epoch": 0.3261495096599918, + "grad_norm": 0.0, + "learning_rate": 1.5742254071164826e-05, + "loss": 1.3936, + "step": 11108 + }, + { + "epoch": 0.3261788713371308, + "grad_norm": 0.0, + "learning_rate": 1.5741475496571114e-05, + "loss": 1.3613, + "step": 11109 + }, + { + "epoch": 0.32620823301426977, + "grad_norm": 0.0, + "learning_rate": 1.5740696870055773e-05, + "loss": 1.4375, + "step": 11110 + }, + { + "epoch": 0.3262375946914088, + "grad_norm": 0.0, + "learning_rate": 1.5739918191625833e-05, + "loss": 1.3584, + "step": 11111 + }, + { + "epoch": 0.32626695636854774, + "grad_norm": 0.0, + "learning_rate": 1.5739139461288346e-05, + "loss": 1.3389, + "step": 11112 + }, + { + "epoch": 0.32629631804568676, + "grad_norm": 0.0, + "learning_rate": 1.573836067905035e-05, + "loss": 1.5879, + "step": 11113 + }, + { + "epoch": 0.3263256797228258, + "grad_norm": 0.0, + "learning_rate": 1.5737581844918893e-05, + "loss": 1.2168, + "step": 11114 + }, + { + "epoch": 0.32635504139996474, + "grad_norm": 0.0, + "learning_rate": 1.573680295890101e-05, + "loss": 1.3984, + "step": 11115 + }, + { + "epoch": 0.32638440307710376, + "grad_norm": 0.0, + "learning_rate": 1.573602402100375e-05, + "loss": 1.4326, + "step": 11116 + }, + { + "epoch": 0.3264137647542428, + "grad_norm": 0.0, + "learning_rate": 1.5735245031234155e-05, + "loss": 1.3867, + "step": 11117 + }, + { + "epoch": 0.32644312643138174, + "grad_norm": 0.0, + "learning_rate": 1.5734465989599272e-05, + "loss": 1.3896, + "step": 11118 + }, + { + "epoch": 0.32647248810852075, + "grad_norm": 0.0, + "learning_rate": 1.5733686896106143e-05, + "loss": 1.3936, + "step": 11119 + }, + { + "epoch": 0.32650184978565977, + "grad_norm": 0.0, + "learning_rate": 1.5732907750761814e-05, + "loss": 1.3184, + "step": 11120 + }, + { + "epoch": 0.32653121146279873, + "grad_norm": 0.0, + "learning_rate": 1.5732128553573332e-05, + "loss": 1.4277, + "step": 11121 + }, + { + "epoch": 0.32656057313993775, + "grad_norm": 0.0, + "learning_rate": 1.573134930454775e-05, + "loss": 1.3438, + "step": 11122 + }, + { + "epoch": 0.32658993481707677, + "grad_norm": 0.0, + "learning_rate": 1.5730570003692097e-05, + "loss": 1.3428, + "step": 11123 + }, + { + "epoch": 0.3266192964942157, + "grad_norm": 0.0, + "learning_rate": 1.572979065101344e-05, + "loss": 1.4746, + "step": 11124 + }, + { + "epoch": 0.32664865817135474, + "grad_norm": 0.0, + "learning_rate": 1.5729011246518814e-05, + "loss": 1.4248, + "step": 11125 + }, + { + "epoch": 0.32667801984849376, + "grad_norm": 0.0, + "learning_rate": 1.5728231790215276e-05, + "loss": 1.4316, + "step": 11126 + }, + { + "epoch": 0.3267073815256327, + "grad_norm": 0.0, + "learning_rate": 1.572745228210987e-05, + "loss": 1.4111, + "step": 11127 + }, + { + "epoch": 0.32673674320277174, + "grad_norm": 0.0, + "learning_rate": 1.5726672722209645e-05, + "loss": 1.4355, + "step": 11128 + }, + { + "epoch": 0.32676610487991076, + "grad_norm": 0.0, + "learning_rate": 1.5725893110521654e-05, + "loss": 1.3252, + "step": 11129 + }, + { + "epoch": 0.3267954665570497, + "grad_norm": 0.0, + "learning_rate": 1.5725113447052943e-05, + "loss": 1.3789, + "step": 11130 + }, + { + "epoch": 0.32682482823418874, + "grad_norm": 0.0, + "learning_rate": 1.5724333731810566e-05, + "loss": 1.3281, + "step": 11131 + }, + { + "epoch": 0.32685418991132775, + "grad_norm": 0.0, + "learning_rate": 1.5723553964801574e-05, + "loss": 1.3574, + "step": 11132 + }, + { + "epoch": 0.3268835515884667, + "grad_norm": 0.0, + "learning_rate": 1.5722774146033014e-05, + "loss": 1.334, + "step": 11133 + }, + { + "epoch": 0.32691291326560573, + "grad_norm": 0.0, + "learning_rate": 1.5721994275511943e-05, + "loss": 1.3896, + "step": 11134 + }, + { + "epoch": 0.32694227494274475, + "grad_norm": 0.0, + "learning_rate": 1.5721214353245417e-05, + "loss": 1.4219, + "step": 11135 + }, + { + "epoch": 0.3269716366198837, + "grad_norm": 0.0, + "learning_rate": 1.5720434379240482e-05, + "loss": 1.2949, + "step": 11136 + }, + { + "epoch": 0.3270009982970227, + "grad_norm": 0.0, + "learning_rate": 1.571965435350419e-05, + "loss": 1.2998, + "step": 11137 + }, + { + "epoch": 0.32703035997416174, + "grad_norm": 0.0, + "learning_rate": 1.5718874276043604e-05, + "loss": 1.376, + "step": 11138 + }, + { + "epoch": 0.3270597216513007, + "grad_norm": 0.0, + "learning_rate": 1.571809414686577e-05, + "loss": 1.2627, + "step": 11139 + }, + { + "epoch": 0.3270890833284397, + "grad_norm": 0.0, + "learning_rate": 1.5717313965977747e-05, + "loss": 1.3486, + "step": 11140 + }, + { + "epoch": 0.32711844500557874, + "grad_norm": 0.0, + "learning_rate": 1.571653373338659e-05, + "loss": 1.3438, + "step": 11141 + }, + { + "epoch": 0.3271478066827177, + "grad_norm": 0.0, + "learning_rate": 1.5715753449099357e-05, + "loss": 1.335, + "step": 11142 + }, + { + "epoch": 0.3271771683598567, + "grad_norm": 0.0, + "learning_rate": 1.5714973113123094e-05, + "loss": 1.3174, + "step": 11143 + }, + { + "epoch": 0.32720653003699574, + "grad_norm": 0.0, + "learning_rate": 1.5714192725464875e-05, + "loss": 1.373, + "step": 11144 + }, + { + "epoch": 0.3272358917141347, + "grad_norm": 0.0, + "learning_rate": 1.5713412286131738e-05, + "loss": 1.3369, + "step": 11145 + }, + { + "epoch": 0.3272652533912737, + "grad_norm": 0.0, + "learning_rate": 1.5712631795130757e-05, + "loss": 1.4355, + "step": 11146 + }, + { + "epoch": 0.32729461506841273, + "grad_norm": 0.0, + "learning_rate": 1.5711851252468985e-05, + "loss": 1.3955, + "step": 11147 + }, + { + "epoch": 0.3273239767455517, + "grad_norm": 0.0, + "learning_rate": 1.5711070658153474e-05, + "loss": 1.3018, + "step": 11148 + }, + { + "epoch": 0.3273533384226907, + "grad_norm": 0.0, + "learning_rate": 1.571029001219129e-05, + "loss": 1.3711, + "step": 11149 + }, + { + "epoch": 0.3273827000998297, + "grad_norm": 0.0, + "learning_rate": 1.5709509314589494e-05, + "loss": 1.4541, + "step": 11150 + }, + { + "epoch": 0.3274120617769687, + "grad_norm": 0.0, + "learning_rate": 1.5708728565355143e-05, + "loss": 1.2275, + "step": 11151 + }, + { + "epoch": 0.3274414234541077, + "grad_norm": 0.0, + "learning_rate": 1.5707947764495293e-05, + "loss": 1.3994, + "step": 11152 + }, + { + "epoch": 0.3274707851312467, + "grad_norm": 0.0, + "learning_rate": 1.5707166912017015e-05, + "loss": 1.3828, + "step": 11153 + }, + { + "epoch": 0.3275001468083857, + "grad_norm": 0.0, + "learning_rate": 1.570638600792736e-05, + "loss": 1.4141, + "step": 11154 + }, + { + "epoch": 0.3275295084855247, + "grad_norm": 0.0, + "learning_rate": 1.5705605052233396e-05, + "loss": 1.3975, + "step": 11155 + }, + { + "epoch": 0.3275588701626637, + "grad_norm": 0.0, + "learning_rate": 1.5704824044942187e-05, + "loss": 1.3799, + "step": 11156 + }, + { + "epoch": 0.3275882318398027, + "grad_norm": 0.0, + "learning_rate": 1.5704042986060794e-05, + "loss": 1.5264, + "step": 11157 + }, + { + "epoch": 0.3276175935169417, + "grad_norm": 0.0, + "learning_rate": 1.5703261875596273e-05, + "loss": 1.3857, + "step": 11158 + }, + { + "epoch": 0.3276469551940807, + "grad_norm": 0.0, + "learning_rate": 1.57024807135557e-05, + "loss": 1.4053, + "step": 11159 + }, + { + "epoch": 0.3276763168712197, + "grad_norm": 0.0, + "learning_rate": 1.5701699499946133e-05, + "loss": 1.3818, + "step": 11160 + }, + { + "epoch": 0.3277056785483587, + "grad_norm": 0.0, + "learning_rate": 1.5700918234774633e-05, + "loss": 1.3887, + "step": 11161 + }, + { + "epoch": 0.32773504022549765, + "grad_norm": 0.0, + "learning_rate": 1.570013691804827e-05, + "loss": 1.4004, + "step": 11162 + }, + { + "epoch": 0.32776440190263667, + "grad_norm": 0.0, + "learning_rate": 1.5699355549774115e-05, + "loss": 1.4697, + "step": 11163 + }, + { + "epoch": 0.3277937635797757, + "grad_norm": 0.0, + "learning_rate": 1.5698574129959223e-05, + "loss": 1.4385, + "step": 11164 + }, + { + "epoch": 0.32782312525691465, + "grad_norm": 0.0, + "learning_rate": 1.5697792658610667e-05, + "loss": 1.3691, + "step": 11165 + }, + { + "epoch": 0.32785248693405367, + "grad_norm": 0.0, + "learning_rate": 1.5697011135735514e-05, + "loss": 1.4395, + "step": 11166 + }, + { + "epoch": 0.3278818486111927, + "grad_norm": 0.0, + "learning_rate": 1.569622956134083e-05, + "loss": 1.2979, + "step": 11167 + }, + { + "epoch": 0.32791121028833164, + "grad_norm": 0.0, + "learning_rate": 1.569544793543368e-05, + "loss": 1.4062, + "step": 11168 + }, + { + "epoch": 0.32794057196547066, + "grad_norm": 0.0, + "learning_rate": 1.569466625802114e-05, + "loss": 1.3447, + "step": 11169 + }, + { + "epoch": 0.3279699336426097, + "grad_norm": 0.0, + "learning_rate": 1.569388452911027e-05, + "loss": 1.2686, + "step": 11170 + }, + { + "epoch": 0.32799929531974864, + "grad_norm": 0.0, + "learning_rate": 1.569310274870815e-05, + "loss": 1.3418, + "step": 11171 + }, + { + "epoch": 0.32802865699688766, + "grad_norm": 0.0, + "learning_rate": 1.569232091682184e-05, + "loss": 1.3682, + "step": 11172 + }, + { + "epoch": 0.3280580186740267, + "grad_norm": 0.0, + "learning_rate": 1.5691539033458415e-05, + "loss": 1.4268, + "step": 11173 + }, + { + "epoch": 0.32808738035116564, + "grad_norm": 0.0, + "learning_rate": 1.5690757098624943e-05, + "loss": 1.2959, + "step": 11174 + }, + { + "epoch": 0.32811674202830465, + "grad_norm": 0.0, + "learning_rate": 1.56899751123285e-05, + "loss": 1.3115, + "step": 11175 + }, + { + "epoch": 0.32814610370544367, + "grad_norm": 0.0, + "learning_rate": 1.568919307457615e-05, + "loss": 1.4492, + "step": 11176 + }, + { + "epoch": 0.32817546538258263, + "grad_norm": 0.0, + "learning_rate": 1.5688410985374977e-05, + "loss": 1.4492, + "step": 11177 + }, + { + "epoch": 0.32820482705972165, + "grad_norm": 0.0, + "learning_rate": 1.5687628844732042e-05, + "loss": 1.4316, + "step": 11178 + }, + { + "epoch": 0.32823418873686067, + "grad_norm": 0.0, + "learning_rate": 1.5686846652654423e-05, + "loss": 1.3809, + "step": 11179 + }, + { + "epoch": 0.3282635504139996, + "grad_norm": 0.0, + "learning_rate": 1.5686064409149193e-05, + "loss": 1.3018, + "step": 11180 + }, + { + "epoch": 0.32829291209113864, + "grad_norm": 0.0, + "learning_rate": 1.568528211422343e-05, + "loss": 1.3369, + "step": 11181 + }, + { + "epoch": 0.32832227376827766, + "grad_norm": 0.0, + "learning_rate": 1.56844997678842e-05, + "loss": 1.3662, + "step": 11182 + }, + { + "epoch": 0.3283516354454166, + "grad_norm": 0.0, + "learning_rate": 1.5683717370138585e-05, + "loss": 1.2515, + "step": 11183 + }, + { + "epoch": 0.32838099712255564, + "grad_norm": 0.0, + "learning_rate": 1.568293492099366e-05, + "loss": 1.4414, + "step": 11184 + }, + { + "epoch": 0.32841035879969466, + "grad_norm": 0.0, + "learning_rate": 1.56821524204565e-05, + "loss": 1.3184, + "step": 11185 + }, + { + "epoch": 0.3284397204768336, + "grad_norm": 0.0, + "learning_rate": 1.5681369868534174e-05, + "loss": 1.418, + "step": 11186 + }, + { + "epoch": 0.32846908215397264, + "grad_norm": 0.0, + "learning_rate": 1.568058726523377e-05, + "loss": 1.3926, + "step": 11187 + }, + { + "epoch": 0.32849844383111165, + "grad_norm": 0.0, + "learning_rate": 1.5679804610562358e-05, + "loss": 1.3896, + "step": 11188 + }, + { + "epoch": 0.3285278055082506, + "grad_norm": 0.0, + "learning_rate": 1.567902190452702e-05, + "loss": 1.3828, + "step": 11189 + }, + { + "epoch": 0.32855716718538963, + "grad_norm": 0.0, + "learning_rate": 1.5678239147134836e-05, + "loss": 1.3066, + "step": 11190 + }, + { + "epoch": 0.32858652886252865, + "grad_norm": 0.0, + "learning_rate": 1.5677456338392874e-05, + "loss": 1.2529, + "step": 11191 + }, + { + "epoch": 0.3286158905396676, + "grad_norm": 0.0, + "learning_rate": 1.5676673478308224e-05, + "loss": 1.332, + "step": 11192 + }, + { + "epoch": 0.3286452522168066, + "grad_norm": 0.0, + "learning_rate": 1.5675890566887963e-05, + "loss": 1.3052, + "step": 11193 + }, + { + "epoch": 0.32867461389394564, + "grad_norm": 0.0, + "learning_rate": 1.567510760413917e-05, + "loss": 1.3828, + "step": 11194 + }, + { + "epoch": 0.3287039755710846, + "grad_norm": 0.0, + "learning_rate": 1.5674324590068924e-05, + "loss": 1.3955, + "step": 11195 + }, + { + "epoch": 0.3287333372482236, + "grad_norm": 0.0, + "learning_rate": 1.5673541524684307e-05, + "loss": 1.3076, + "step": 11196 + }, + { + "epoch": 0.32876269892536264, + "grad_norm": 0.0, + "learning_rate": 1.56727584079924e-05, + "loss": 1.3276, + "step": 11197 + }, + { + "epoch": 0.3287920606025016, + "grad_norm": 0.0, + "learning_rate": 1.567197524000029e-05, + "loss": 1.4531, + "step": 11198 + }, + { + "epoch": 0.3288214222796406, + "grad_norm": 0.0, + "learning_rate": 1.5671192020715053e-05, + "loss": 1.3271, + "step": 11199 + }, + { + "epoch": 0.32885078395677964, + "grad_norm": 0.0, + "learning_rate": 1.5670408750143773e-05, + "loss": 1.4727, + "step": 11200 + }, + { + "epoch": 0.3288801456339186, + "grad_norm": 0.0, + "learning_rate": 1.5669625428293534e-05, + "loss": 1.4268, + "step": 11201 + }, + { + "epoch": 0.3289095073110576, + "grad_norm": 0.0, + "learning_rate": 1.5668842055171423e-05, + "loss": 1.4365, + "step": 11202 + }, + { + "epoch": 0.32893886898819663, + "grad_norm": 0.0, + "learning_rate": 1.566805863078452e-05, + "loss": 1.4297, + "step": 11203 + }, + { + "epoch": 0.3289682306653356, + "grad_norm": 0.0, + "learning_rate": 1.566727515513991e-05, + "loss": 1.3555, + "step": 11204 + }, + { + "epoch": 0.3289975923424746, + "grad_norm": 0.0, + "learning_rate": 1.566649162824468e-05, + "loss": 1.2598, + "step": 11205 + }, + { + "epoch": 0.3290269540196136, + "grad_norm": 0.0, + "learning_rate": 1.566570805010592e-05, + "loss": 1.4141, + "step": 11206 + }, + { + "epoch": 0.3290563156967526, + "grad_norm": 0.0, + "learning_rate": 1.5664924420730706e-05, + "loss": 1.4473, + "step": 11207 + }, + { + "epoch": 0.3290856773738916, + "grad_norm": 0.0, + "learning_rate": 1.566414074012613e-05, + "loss": 1.3047, + "step": 11208 + }, + { + "epoch": 0.3291150390510306, + "grad_norm": 0.0, + "learning_rate": 1.5663357008299282e-05, + "loss": 1.418, + "step": 11209 + }, + { + "epoch": 0.3291444007281696, + "grad_norm": 0.0, + "learning_rate": 1.566257322525724e-05, + "loss": 1.4932, + "step": 11210 + }, + { + "epoch": 0.3291737624053086, + "grad_norm": 0.0, + "learning_rate": 1.56617893910071e-05, + "loss": 1.3018, + "step": 11211 + }, + { + "epoch": 0.32920312408244756, + "grad_norm": 0.0, + "learning_rate": 1.566100550555595e-05, + "loss": 1.4404, + "step": 11212 + }, + { + "epoch": 0.3292324857595866, + "grad_norm": 0.0, + "learning_rate": 1.566022156891088e-05, + "loss": 1.4531, + "step": 11213 + }, + { + "epoch": 0.3292618474367256, + "grad_norm": 0.0, + "learning_rate": 1.5659437581078973e-05, + "loss": 1.4307, + "step": 11214 + }, + { + "epoch": 0.32929120911386456, + "grad_norm": 0.0, + "learning_rate": 1.5658653542067325e-05, + "loss": 1.2441, + "step": 11215 + }, + { + "epoch": 0.3293205707910036, + "grad_norm": 0.0, + "learning_rate": 1.565786945188302e-05, + "loss": 1.3896, + "step": 11216 + }, + { + "epoch": 0.3293499324681426, + "grad_norm": 0.0, + "learning_rate": 1.5657085310533156e-05, + "loss": 1.4404, + "step": 11217 + }, + { + "epoch": 0.32937929414528155, + "grad_norm": 0.0, + "learning_rate": 1.5656301118024823e-05, + "loss": 1.377, + "step": 11218 + }, + { + "epoch": 0.32940865582242057, + "grad_norm": 0.0, + "learning_rate": 1.5655516874365108e-05, + "loss": 1.416, + "step": 11219 + }, + { + "epoch": 0.3294380174995596, + "grad_norm": 0.0, + "learning_rate": 1.5654732579561103e-05, + "loss": 1.2959, + "step": 11220 + }, + { + "epoch": 0.32946737917669855, + "grad_norm": 0.0, + "learning_rate": 1.5653948233619904e-05, + "loss": 1.498, + "step": 11221 + }, + { + "epoch": 0.32949674085383757, + "grad_norm": 0.0, + "learning_rate": 1.5653163836548604e-05, + "loss": 1.3906, + "step": 11222 + }, + { + "epoch": 0.3295261025309766, + "grad_norm": 0.0, + "learning_rate": 1.565237938835429e-05, + "loss": 1.3242, + "step": 11223 + }, + { + "epoch": 0.32955546420811554, + "grad_norm": 0.0, + "learning_rate": 1.5651594889044072e-05, + "loss": 1.5117, + "step": 11224 + }, + { + "epoch": 0.32958482588525456, + "grad_norm": 0.0, + "learning_rate": 1.565081033862503e-05, + "loss": 1.4678, + "step": 11225 + }, + { + "epoch": 0.3296141875623936, + "grad_norm": 0.0, + "learning_rate": 1.5650025737104262e-05, + "loss": 1.3408, + "step": 11226 + }, + { + "epoch": 0.32964354923953254, + "grad_norm": 0.0, + "learning_rate": 1.5649241084488865e-05, + "loss": 1.4863, + "step": 11227 + }, + { + "epoch": 0.32967291091667156, + "grad_norm": 0.0, + "learning_rate": 1.5648456380785932e-05, + "loss": 1.3975, + "step": 11228 + }, + { + "epoch": 0.3297022725938106, + "grad_norm": 0.0, + "learning_rate": 1.5647671626002563e-05, + "loss": 1.2852, + "step": 11229 + }, + { + "epoch": 0.32973163427094954, + "grad_norm": 0.0, + "learning_rate": 1.564688682014585e-05, + "loss": 1.4268, + "step": 11230 + }, + { + "epoch": 0.32976099594808855, + "grad_norm": 0.0, + "learning_rate": 1.5646101963222898e-05, + "loss": 1.3779, + "step": 11231 + }, + { + "epoch": 0.32979035762522757, + "grad_norm": 0.0, + "learning_rate": 1.56453170552408e-05, + "loss": 1.2988, + "step": 11232 + }, + { + "epoch": 0.32981971930236653, + "grad_norm": 0.0, + "learning_rate": 1.5644532096206653e-05, + "loss": 1.4082, + "step": 11233 + }, + { + "epoch": 0.32984908097950555, + "grad_norm": 0.0, + "learning_rate": 1.5643747086127553e-05, + "loss": 1.4219, + "step": 11234 + }, + { + "epoch": 0.32987844265664457, + "grad_norm": 0.0, + "learning_rate": 1.5642962025010605e-05, + "loss": 1.333, + "step": 11235 + }, + { + "epoch": 0.3299078043337835, + "grad_norm": 0.0, + "learning_rate": 1.564217691286291e-05, + "loss": 1.3506, + "step": 11236 + }, + { + "epoch": 0.32993716601092254, + "grad_norm": 0.0, + "learning_rate": 1.5641391749691563e-05, + "loss": 1.3857, + "step": 11237 + }, + { + "epoch": 0.32996652768806156, + "grad_norm": 0.0, + "learning_rate": 1.564060653550366e-05, + "loss": 1.2896, + "step": 11238 + }, + { + "epoch": 0.3299958893652005, + "grad_norm": 0.0, + "learning_rate": 1.5639821270306313e-05, + "loss": 1.375, + "step": 11239 + }, + { + "epoch": 0.33002525104233954, + "grad_norm": 0.0, + "learning_rate": 1.5639035954106615e-05, + "loss": 1.1899, + "step": 11240 + }, + { + "epoch": 0.33005461271947856, + "grad_norm": 0.0, + "learning_rate": 1.5638250586911673e-05, + "loss": 1.4131, + "step": 11241 + }, + { + "epoch": 0.3300839743966175, + "grad_norm": 0.0, + "learning_rate": 1.5637465168728585e-05, + "loss": 1.5342, + "step": 11242 + }, + { + "epoch": 0.33011333607375654, + "grad_norm": 0.0, + "learning_rate": 1.5636679699564455e-05, + "loss": 1.21, + "step": 11243 + }, + { + "epoch": 0.33014269775089555, + "grad_norm": 0.0, + "learning_rate": 1.563589417942639e-05, + "loss": 1.4912, + "step": 11244 + }, + { + "epoch": 0.3301720594280345, + "grad_norm": 0.0, + "learning_rate": 1.563510860832149e-05, + "loss": 1.459, + "step": 11245 + }, + { + "epoch": 0.33020142110517353, + "grad_norm": 0.0, + "learning_rate": 1.5634322986256857e-05, + "loss": 1.3672, + "step": 11246 + }, + { + "epoch": 0.33023078278231255, + "grad_norm": 0.0, + "learning_rate": 1.56335373132396e-05, + "loss": 1.4014, + "step": 11247 + }, + { + "epoch": 0.3302601444594515, + "grad_norm": 0.0, + "learning_rate": 1.5632751589276823e-05, + "loss": 1.4561, + "step": 11248 + }, + { + "epoch": 0.3302895061365905, + "grad_norm": 0.0, + "learning_rate": 1.563196581437563e-05, + "loss": 1.3564, + "step": 11249 + }, + { + "epoch": 0.33031886781372954, + "grad_norm": 0.0, + "learning_rate": 1.5631179988543126e-05, + "loss": 1.4199, + "step": 11250 + }, + { + "epoch": 0.3303482294908685, + "grad_norm": 0.0, + "learning_rate": 1.5630394111786423e-05, + "loss": 1.4316, + "step": 11251 + }, + { + "epoch": 0.3303775911680075, + "grad_norm": 0.0, + "learning_rate": 1.562960818411262e-05, + "loss": 1.3379, + "step": 11252 + }, + { + "epoch": 0.33040695284514654, + "grad_norm": 0.0, + "learning_rate": 1.562882220552883e-05, + "loss": 1.3096, + "step": 11253 + }, + { + "epoch": 0.3304363145222855, + "grad_norm": 0.0, + "learning_rate": 1.562803617604216e-05, + "loss": 1.3477, + "step": 11254 + }, + { + "epoch": 0.3304656761994245, + "grad_norm": 0.0, + "learning_rate": 1.5627250095659715e-05, + "loss": 1.4209, + "step": 11255 + }, + { + "epoch": 0.33049503787656354, + "grad_norm": 0.0, + "learning_rate": 1.5626463964388612e-05, + "loss": 1.4639, + "step": 11256 + }, + { + "epoch": 0.3305243995537025, + "grad_norm": 0.0, + "learning_rate": 1.5625677782235952e-05, + "loss": 1.3574, + "step": 11257 + }, + { + "epoch": 0.3305537612308415, + "grad_norm": 0.0, + "learning_rate": 1.5624891549208843e-05, + "loss": 1.5273, + "step": 11258 + }, + { + "epoch": 0.33058312290798053, + "grad_norm": 0.0, + "learning_rate": 1.56241052653144e-05, + "loss": 1.3535, + "step": 11259 + }, + { + "epoch": 0.3306124845851195, + "grad_norm": 0.0, + "learning_rate": 1.5623318930559736e-05, + "loss": 1.2305, + "step": 11260 + }, + { + "epoch": 0.3306418462622585, + "grad_norm": 0.0, + "learning_rate": 1.5622532544951958e-05, + "loss": 1.3691, + "step": 11261 + }, + { + "epoch": 0.33067120793939747, + "grad_norm": 0.0, + "learning_rate": 1.5621746108498177e-05, + "loss": 1.4102, + "step": 11262 + }, + { + "epoch": 0.3307005696165365, + "grad_norm": 0.0, + "learning_rate": 1.5620959621205506e-05, + "loss": 1.4043, + "step": 11263 + }, + { + "epoch": 0.3307299312936755, + "grad_norm": 0.0, + "learning_rate": 1.562017308308106e-05, + "loss": 1.4795, + "step": 11264 + }, + { + "epoch": 0.33075929297081447, + "grad_norm": 0.0, + "learning_rate": 1.561938649413195e-05, + "loss": 1.4009, + "step": 11265 + }, + { + "epoch": 0.3307886546479535, + "grad_norm": 0.0, + "learning_rate": 1.5618599854365285e-05, + "loss": 1.3955, + "step": 11266 + }, + { + "epoch": 0.3308180163250925, + "grad_norm": 0.0, + "learning_rate": 1.5617813163788183e-05, + "loss": 1.3467, + "step": 11267 + }, + { + "epoch": 0.33084737800223146, + "grad_norm": 0.0, + "learning_rate": 1.5617026422407763e-05, + "loss": 1.5127, + "step": 11268 + }, + { + "epoch": 0.3308767396793705, + "grad_norm": 0.0, + "learning_rate": 1.561623963023113e-05, + "loss": 1.4072, + "step": 11269 + }, + { + "epoch": 0.3309061013565095, + "grad_norm": 0.0, + "learning_rate": 1.5615452787265403e-05, + "loss": 1.2695, + "step": 11270 + }, + { + "epoch": 0.33093546303364846, + "grad_norm": 0.0, + "learning_rate": 1.56146658935177e-05, + "loss": 1.4434, + "step": 11271 + }, + { + "epoch": 0.3309648247107875, + "grad_norm": 0.0, + "learning_rate": 1.5613878948995137e-05, + "loss": 1.2192, + "step": 11272 + }, + { + "epoch": 0.3309941863879265, + "grad_norm": 0.0, + "learning_rate": 1.5613091953704828e-05, + "loss": 1.3818, + "step": 11273 + }, + { + "epoch": 0.33102354806506545, + "grad_norm": 0.0, + "learning_rate": 1.5612304907653886e-05, + "loss": 1.3037, + "step": 11274 + }, + { + "epoch": 0.33105290974220447, + "grad_norm": 0.0, + "learning_rate": 1.561151781084944e-05, + "loss": 1.4355, + "step": 11275 + }, + { + "epoch": 0.3310822714193435, + "grad_norm": 0.0, + "learning_rate": 1.56107306632986e-05, + "loss": 1.3906, + "step": 11276 + }, + { + "epoch": 0.33111163309648245, + "grad_norm": 0.0, + "learning_rate": 1.5609943465008483e-05, + "loss": 1.333, + "step": 11277 + }, + { + "epoch": 0.33114099477362147, + "grad_norm": 0.0, + "learning_rate": 1.560915621598621e-05, + "loss": 1.3643, + "step": 11278 + }, + { + "epoch": 0.3311703564507605, + "grad_norm": 0.0, + "learning_rate": 1.5608368916238904e-05, + "loss": 1.4238, + "step": 11279 + }, + { + "epoch": 0.33119971812789945, + "grad_norm": 0.0, + "learning_rate": 1.560758156577368e-05, + "loss": 1.4229, + "step": 11280 + }, + { + "epoch": 0.33122907980503846, + "grad_norm": 0.0, + "learning_rate": 1.560679416459766e-05, + "loss": 1.4619, + "step": 11281 + }, + { + "epoch": 0.3312584414821775, + "grad_norm": 0.0, + "learning_rate": 1.5606006712717963e-05, + "loss": 1.2217, + "step": 11282 + }, + { + "epoch": 0.33128780315931644, + "grad_norm": 0.0, + "learning_rate": 1.5605219210141715e-05, + "loss": 1.5439, + "step": 11283 + }, + { + "epoch": 0.33131716483645546, + "grad_norm": 0.0, + "learning_rate": 1.560443165687603e-05, + "loss": 1.3506, + "step": 11284 + }, + { + "epoch": 0.3313465265135945, + "grad_norm": 0.0, + "learning_rate": 1.5603644052928037e-05, + "loss": 1.4736, + "step": 11285 + }, + { + "epoch": 0.33137588819073344, + "grad_norm": 0.0, + "learning_rate": 1.5602856398304855e-05, + "loss": 1.3096, + "step": 11286 + }, + { + "epoch": 0.33140524986787245, + "grad_norm": 0.0, + "learning_rate": 1.560206869301361e-05, + "loss": 1.373, + "step": 11287 + }, + { + "epoch": 0.33143461154501147, + "grad_norm": 0.0, + "learning_rate": 1.5601280937061417e-05, + "loss": 1.3066, + "step": 11288 + }, + { + "epoch": 0.33146397322215043, + "grad_norm": 0.0, + "learning_rate": 1.560049313045541e-05, + "loss": 1.417, + "step": 11289 + }, + { + "epoch": 0.33149333489928945, + "grad_norm": 0.0, + "learning_rate": 1.559970527320271e-05, + "loss": 1.3945, + "step": 11290 + }, + { + "epoch": 0.33152269657642847, + "grad_norm": 0.0, + "learning_rate": 1.559891736531044e-05, + "loss": 1.3066, + "step": 11291 + }, + { + "epoch": 0.3315520582535674, + "grad_norm": 0.0, + "learning_rate": 1.5598129406785728e-05, + "loss": 1.2881, + "step": 11292 + }, + { + "epoch": 0.33158141993070644, + "grad_norm": 0.0, + "learning_rate": 1.5597341397635693e-05, + "loss": 1.335, + "step": 11293 + }, + { + "epoch": 0.33161078160784546, + "grad_norm": 0.0, + "learning_rate": 1.5596553337867474e-05, + "loss": 1.3984, + "step": 11294 + }, + { + "epoch": 0.3316401432849844, + "grad_norm": 0.0, + "learning_rate": 1.5595765227488186e-05, + "loss": 1.4336, + "step": 11295 + }, + { + "epoch": 0.33166950496212344, + "grad_norm": 0.0, + "learning_rate": 1.559497706650496e-05, + "loss": 1.3027, + "step": 11296 + }, + { + "epoch": 0.33169886663926246, + "grad_norm": 0.0, + "learning_rate": 1.5594188854924924e-05, + "loss": 1.457, + "step": 11297 + }, + { + "epoch": 0.3317282283164014, + "grad_norm": 0.0, + "learning_rate": 1.5593400592755204e-05, + "loss": 1.2637, + "step": 11298 + }, + { + "epoch": 0.33175758999354044, + "grad_norm": 0.0, + "learning_rate": 1.559261228000293e-05, + "loss": 1.3799, + "step": 11299 + }, + { + "epoch": 0.33178695167067945, + "grad_norm": 0.0, + "learning_rate": 1.5591823916675232e-05, + "loss": 1.4336, + "step": 11300 + }, + { + "epoch": 0.3318163133478184, + "grad_norm": 0.0, + "learning_rate": 1.5591035502779238e-05, + "loss": 1.4023, + "step": 11301 + }, + { + "epoch": 0.33184567502495743, + "grad_norm": 0.0, + "learning_rate": 1.559024703832208e-05, + "loss": 1.4414, + "step": 11302 + }, + { + "epoch": 0.33187503670209645, + "grad_norm": 0.0, + "learning_rate": 1.5589458523310882e-05, + "loss": 1.5078, + "step": 11303 + }, + { + "epoch": 0.3319043983792354, + "grad_norm": 0.0, + "learning_rate": 1.558866995775278e-05, + "loss": 1.3428, + "step": 11304 + }, + { + "epoch": 0.3319337600563744, + "grad_norm": 0.0, + "learning_rate": 1.558788134165491e-05, + "loss": 1.3438, + "step": 11305 + }, + { + "epoch": 0.33196312173351344, + "grad_norm": 0.0, + "learning_rate": 1.5587092675024388e-05, + "loss": 1.5342, + "step": 11306 + }, + { + "epoch": 0.3319924834106524, + "grad_norm": 0.0, + "learning_rate": 1.5586303957868362e-05, + "loss": 1.4189, + "step": 11307 + }, + { + "epoch": 0.3320218450877914, + "grad_norm": 0.0, + "learning_rate": 1.558551519019396e-05, + "loss": 1.2964, + "step": 11308 + }, + { + "epoch": 0.33205120676493044, + "grad_norm": 0.0, + "learning_rate": 1.558472637200831e-05, + "loss": 1.3301, + "step": 11309 + }, + { + "epoch": 0.3320805684420694, + "grad_norm": 0.0, + "learning_rate": 1.558393750331855e-05, + "loss": 1.4648, + "step": 11310 + }, + { + "epoch": 0.3321099301192084, + "grad_norm": 0.0, + "learning_rate": 1.5583148584131814e-05, + "loss": 1.3984, + "step": 11311 + }, + { + "epoch": 0.3321392917963474, + "grad_norm": 0.0, + "learning_rate": 1.5582359614455237e-05, + "loss": 1.3828, + "step": 11312 + }, + { + "epoch": 0.3321686534734864, + "grad_norm": 0.0, + "learning_rate": 1.5581570594295952e-05, + "loss": 1.4678, + "step": 11313 + }, + { + "epoch": 0.3321980151506254, + "grad_norm": 0.0, + "learning_rate": 1.5580781523661094e-05, + "loss": 1.3916, + "step": 11314 + }, + { + "epoch": 0.3322273768277644, + "grad_norm": 0.0, + "learning_rate": 1.55799924025578e-05, + "loss": 1.2637, + "step": 11315 + }, + { + "epoch": 0.3322567385049034, + "grad_norm": 0.0, + "learning_rate": 1.5579203230993203e-05, + "loss": 1.3379, + "step": 11316 + }, + { + "epoch": 0.3322861001820424, + "grad_norm": 0.0, + "learning_rate": 1.5578414008974444e-05, + "loss": 1.2754, + "step": 11317 + }, + { + "epoch": 0.33231546185918137, + "grad_norm": 0.0, + "learning_rate": 1.5577624736508657e-05, + "loss": 1.3643, + "step": 11318 + }, + { + "epoch": 0.3323448235363204, + "grad_norm": 0.0, + "learning_rate": 1.5576835413602985e-05, + "loss": 1.3008, + "step": 11319 + }, + { + "epoch": 0.3323741852134594, + "grad_norm": 0.0, + "learning_rate": 1.5576046040264555e-05, + "loss": 1.2959, + "step": 11320 + }, + { + "epoch": 0.33240354689059837, + "grad_norm": 0.0, + "learning_rate": 1.557525661650052e-05, + "loss": 1.501, + "step": 11321 + }, + { + "epoch": 0.3324329085677374, + "grad_norm": 0.0, + "learning_rate": 1.557446714231801e-05, + "loss": 1.4443, + "step": 11322 + }, + { + "epoch": 0.3324622702448764, + "grad_norm": 0.0, + "learning_rate": 1.5573677617724164e-05, + "loss": 1.3857, + "step": 11323 + }, + { + "epoch": 0.33249163192201536, + "grad_norm": 0.0, + "learning_rate": 1.5572888042726126e-05, + "loss": 1.3828, + "step": 11324 + }, + { + "epoch": 0.3325209935991544, + "grad_norm": 0.0, + "learning_rate": 1.5572098417331033e-05, + "loss": 1.4165, + "step": 11325 + }, + { + "epoch": 0.3325503552762934, + "grad_norm": 0.0, + "learning_rate": 1.5571308741546025e-05, + "loss": 1.3779, + "step": 11326 + }, + { + "epoch": 0.33257971695343236, + "grad_norm": 0.0, + "learning_rate": 1.557051901537825e-05, + "loss": 1.4531, + "step": 11327 + }, + { + "epoch": 0.3326090786305714, + "grad_norm": 0.0, + "learning_rate": 1.5569729238834838e-05, + "loss": 1.4385, + "step": 11328 + }, + { + "epoch": 0.3326384403077104, + "grad_norm": 0.0, + "learning_rate": 1.5568939411922943e-05, + "loss": 1.2119, + "step": 11329 + }, + { + "epoch": 0.33266780198484935, + "grad_norm": 0.0, + "learning_rate": 1.5568149534649702e-05, + "loss": 1.2935, + "step": 11330 + }, + { + "epoch": 0.33269716366198837, + "grad_norm": 0.0, + "learning_rate": 1.556735960702226e-05, + "loss": 1.3076, + "step": 11331 + }, + { + "epoch": 0.3327265253391274, + "grad_norm": 0.0, + "learning_rate": 1.5566569629047754e-05, + "loss": 1.4551, + "step": 11332 + }, + { + "epoch": 0.33275588701626635, + "grad_norm": 0.0, + "learning_rate": 1.556577960073334e-05, + "loss": 1.4502, + "step": 11333 + }, + { + "epoch": 0.33278524869340537, + "grad_norm": 0.0, + "learning_rate": 1.5564989522086153e-05, + "loss": 1.3135, + "step": 11334 + }, + { + "epoch": 0.3328146103705444, + "grad_norm": 0.0, + "learning_rate": 1.556419939311334e-05, + "loss": 1.4072, + "step": 11335 + }, + { + "epoch": 0.33284397204768335, + "grad_norm": 0.0, + "learning_rate": 1.5563409213822045e-05, + "loss": 1.3398, + "step": 11336 + }, + { + "epoch": 0.33287333372482236, + "grad_norm": 0.0, + "learning_rate": 1.5562618984219418e-05, + "loss": 1.3467, + "step": 11337 + }, + { + "epoch": 0.3329026954019614, + "grad_norm": 0.0, + "learning_rate": 1.5561828704312602e-05, + "loss": 1.3379, + "step": 11338 + }, + { + "epoch": 0.33293205707910034, + "grad_norm": 0.0, + "learning_rate": 1.5561038374108747e-05, + "loss": 1.3984, + "step": 11339 + }, + { + "epoch": 0.33296141875623936, + "grad_norm": 0.0, + "learning_rate": 1.5560247993614993e-05, + "loss": 1.3691, + "step": 11340 + }, + { + "epoch": 0.3329907804333784, + "grad_norm": 0.0, + "learning_rate": 1.5559457562838497e-05, + "loss": 1.4189, + "step": 11341 + }, + { + "epoch": 0.33302014211051734, + "grad_norm": 0.0, + "learning_rate": 1.55586670817864e-05, + "loss": 1.4453, + "step": 11342 + }, + { + "epoch": 0.33304950378765635, + "grad_norm": 0.0, + "learning_rate": 1.5557876550465855e-05, + "loss": 1.4922, + "step": 11343 + }, + { + "epoch": 0.33307886546479537, + "grad_norm": 0.0, + "learning_rate": 1.5557085968884006e-05, + "loss": 1.3193, + "step": 11344 + }, + { + "epoch": 0.33310822714193433, + "grad_norm": 0.0, + "learning_rate": 1.5556295337048008e-05, + "loss": 1.3867, + "step": 11345 + }, + { + "epoch": 0.33313758881907335, + "grad_norm": 0.0, + "learning_rate": 1.555550465496501e-05, + "loss": 1.3955, + "step": 11346 + }, + { + "epoch": 0.33316695049621237, + "grad_norm": 0.0, + "learning_rate": 1.555471392264216e-05, + "loss": 1.4717, + "step": 11347 + }, + { + "epoch": 0.33319631217335133, + "grad_norm": 0.0, + "learning_rate": 1.5553923140086605e-05, + "loss": 1.3584, + "step": 11348 + }, + { + "epoch": 0.33322567385049034, + "grad_norm": 0.0, + "learning_rate": 1.5553132307305504e-05, + "loss": 1.4473, + "step": 11349 + }, + { + "epoch": 0.33325503552762936, + "grad_norm": 0.0, + "learning_rate": 1.5552341424306002e-05, + "loss": 1.377, + "step": 11350 + }, + { + "epoch": 0.3332843972047683, + "grad_norm": 0.0, + "learning_rate": 1.5551550491095258e-05, + "loss": 1.3364, + "step": 11351 + }, + { + "epoch": 0.33331375888190734, + "grad_norm": 0.0, + "learning_rate": 1.5550759507680416e-05, + "loss": 1.4609, + "step": 11352 + }, + { + "epoch": 0.33334312055904636, + "grad_norm": 0.0, + "learning_rate": 1.554996847406864e-05, + "loss": 1.5254, + "step": 11353 + }, + { + "epoch": 0.3333724822361853, + "grad_norm": 0.0, + "learning_rate": 1.5549177390267077e-05, + "loss": 1.4531, + "step": 11354 + }, + { + "epoch": 0.33340184391332434, + "grad_norm": 0.0, + "learning_rate": 1.5548386256282883e-05, + "loss": 1.3721, + "step": 11355 + }, + { + "epoch": 0.33343120559046335, + "grad_norm": 0.0, + "learning_rate": 1.554759507212321e-05, + "loss": 1.4209, + "step": 11356 + }, + { + "epoch": 0.3334605672676023, + "grad_norm": 0.0, + "learning_rate": 1.554680383779521e-05, + "loss": 1.46, + "step": 11357 + }, + { + "epoch": 0.33348992894474133, + "grad_norm": 0.0, + "learning_rate": 1.5546012553306047e-05, + "loss": 1.4336, + "step": 11358 + }, + { + "epoch": 0.33351929062188035, + "grad_norm": 0.0, + "learning_rate": 1.554522121866287e-05, + "loss": 1.4463, + "step": 11359 + }, + { + "epoch": 0.3335486522990193, + "grad_norm": 0.0, + "learning_rate": 1.5544429833872836e-05, + "loss": 1.2471, + "step": 11360 + }, + { + "epoch": 0.3335780139761583, + "grad_norm": 0.0, + "learning_rate": 1.5543638398943105e-05, + "loss": 1.3877, + "step": 11361 + }, + { + "epoch": 0.3336073756532973, + "grad_norm": 0.0, + "learning_rate": 1.554284691388083e-05, + "loss": 1.3691, + "step": 11362 + }, + { + "epoch": 0.3336367373304363, + "grad_norm": 0.0, + "learning_rate": 1.5542055378693176e-05, + "loss": 1.3613, + "step": 11363 + }, + { + "epoch": 0.3336660990075753, + "grad_norm": 0.0, + "learning_rate": 1.554126379338729e-05, + "loss": 1.4941, + "step": 11364 + }, + { + "epoch": 0.3336954606847143, + "grad_norm": 0.0, + "learning_rate": 1.5540472157970342e-05, + "loss": 1.3701, + "step": 11365 + }, + { + "epoch": 0.3337248223618533, + "grad_norm": 0.0, + "learning_rate": 1.5539680472449484e-05, + "loss": 1.5205, + "step": 11366 + }, + { + "epoch": 0.3337541840389923, + "grad_norm": 0.0, + "learning_rate": 1.5538888736831872e-05, + "loss": 1.3643, + "step": 11367 + }, + { + "epoch": 0.3337835457161313, + "grad_norm": 0.0, + "learning_rate": 1.5538096951124676e-05, + "loss": 1.3086, + "step": 11368 + }, + { + "epoch": 0.3338129073932703, + "grad_norm": 0.0, + "learning_rate": 1.5537305115335047e-05, + "loss": 1.2344, + "step": 11369 + }, + { + "epoch": 0.3338422690704093, + "grad_norm": 0.0, + "learning_rate": 1.553651322947015e-05, + "loss": 1.4287, + "step": 11370 + }, + { + "epoch": 0.3338716307475483, + "grad_norm": 0.0, + "learning_rate": 1.553572129353715e-05, + "loss": 1.2783, + "step": 11371 + }, + { + "epoch": 0.3339009924246873, + "grad_norm": 0.0, + "learning_rate": 1.55349293075432e-05, + "loss": 1.3574, + "step": 11372 + }, + { + "epoch": 0.3339303541018263, + "grad_norm": 0.0, + "learning_rate": 1.5534137271495468e-05, + "loss": 1.4561, + "step": 11373 + }, + { + "epoch": 0.33395971577896527, + "grad_norm": 0.0, + "learning_rate": 1.5533345185401118e-05, + "loss": 1.4658, + "step": 11374 + }, + { + "epoch": 0.3339890774561043, + "grad_norm": 0.0, + "learning_rate": 1.5532553049267308e-05, + "loss": 1.3721, + "step": 11375 + }, + { + "epoch": 0.3340184391332433, + "grad_norm": 0.0, + "learning_rate": 1.5531760863101206e-05, + "loss": 1.2275, + "step": 11376 + }, + { + "epoch": 0.33404780081038227, + "grad_norm": 0.0, + "learning_rate": 1.553096862690997e-05, + "loss": 1.4482, + "step": 11377 + }, + { + "epoch": 0.3340771624875213, + "grad_norm": 0.0, + "learning_rate": 1.553017634070077e-05, + "loss": 1.3887, + "step": 11378 + }, + { + "epoch": 0.3341065241646603, + "grad_norm": 0.0, + "learning_rate": 1.552938400448077e-05, + "loss": 1.4766, + "step": 11379 + }, + { + "epoch": 0.33413588584179926, + "grad_norm": 0.0, + "learning_rate": 1.5528591618257135e-05, + "loss": 1.3516, + "step": 11380 + }, + { + "epoch": 0.3341652475189383, + "grad_norm": 0.0, + "learning_rate": 1.5527799182037028e-05, + "loss": 1.3271, + "step": 11381 + }, + { + "epoch": 0.3341946091960773, + "grad_norm": 0.0, + "learning_rate": 1.5527006695827617e-05, + "loss": 1.2764, + "step": 11382 + }, + { + "epoch": 0.33422397087321626, + "grad_norm": 0.0, + "learning_rate": 1.552621415963607e-05, + "loss": 1.3047, + "step": 11383 + }, + { + "epoch": 0.3342533325503553, + "grad_norm": 0.0, + "learning_rate": 1.5525421573469555e-05, + "loss": 1.2773, + "step": 11384 + }, + { + "epoch": 0.3342826942274943, + "grad_norm": 0.0, + "learning_rate": 1.5524628937335235e-05, + "loss": 1.3555, + "step": 11385 + }, + { + "epoch": 0.33431205590463325, + "grad_norm": 0.0, + "learning_rate": 1.552383625124028e-05, + "loss": 1.2705, + "step": 11386 + }, + { + "epoch": 0.33434141758177227, + "grad_norm": 0.0, + "learning_rate": 1.5523043515191864e-05, + "loss": 1.4849, + "step": 11387 + }, + { + "epoch": 0.3343707792589113, + "grad_norm": 0.0, + "learning_rate": 1.5522250729197144e-05, + "loss": 1.2578, + "step": 11388 + }, + { + "epoch": 0.33440014093605025, + "grad_norm": 0.0, + "learning_rate": 1.55214578932633e-05, + "loss": 1.2871, + "step": 11389 + }, + { + "epoch": 0.33442950261318927, + "grad_norm": 0.0, + "learning_rate": 1.55206650073975e-05, + "loss": 1.3867, + "step": 11390 + }, + { + "epoch": 0.3344588642903283, + "grad_norm": 0.0, + "learning_rate": 1.551987207160691e-05, + "loss": 1.4209, + "step": 11391 + }, + { + "epoch": 0.33448822596746725, + "grad_norm": 0.0, + "learning_rate": 1.5519079085898707e-05, + "loss": 1.3877, + "step": 11392 + }, + { + "epoch": 0.33451758764460626, + "grad_norm": 0.0, + "learning_rate": 1.5518286050280054e-05, + "loss": 1.3867, + "step": 11393 + }, + { + "epoch": 0.3345469493217453, + "grad_norm": 0.0, + "learning_rate": 1.5517492964758126e-05, + "loss": 1.3311, + "step": 11394 + }, + { + "epoch": 0.33457631099888424, + "grad_norm": 0.0, + "learning_rate": 1.55166998293401e-05, + "loss": 1.2969, + "step": 11395 + }, + { + "epoch": 0.33460567267602326, + "grad_norm": 0.0, + "learning_rate": 1.5515906644033144e-05, + "loss": 1.3066, + "step": 11396 + }, + { + "epoch": 0.3346350343531623, + "grad_norm": 0.0, + "learning_rate": 1.551511340884443e-05, + "loss": 1.4873, + "step": 11397 + }, + { + "epoch": 0.33466439603030124, + "grad_norm": 0.0, + "learning_rate": 1.5514320123781132e-05, + "loss": 1.3828, + "step": 11398 + }, + { + "epoch": 0.33469375770744025, + "grad_norm": 0.0, + "learning_rate": 1.5513526788850424e-05, + "loss": 1.415, + "step": 11399 + }, + { + "epoch": 0.33472311938457927, + "grad_norm": 0.0, + "learning_rate": 1.5512733404059485e-05, + "loss": 1.4561, + "step": 11400 + }, + { + "epoch": 0.33475248106171823, + "grad_norm": 0.0, + "learning_rate": 1.5511939969415486e-05, + "loss": 1.4766, + "step": 11401 + }, + { + "epoch": 0.33478184273885725, + "grad_norm": 0.0, + "learning_rate": 1.55111464849256e-05, + "loss": 1.3198, + "step": 11402 + }, + { + "epoch": 0.33481120441599627, + "grad_norm": 0.0, + "learning_rate": 1.5510352950597004e-05, + "loss": 1.3408, + "step": 11403 + }, + { + "epoch": 0.33484056609313523, + "grad_norm": 0.0, + "learning_rate": 1.5509559366436877e-05, + "loss": 1.4805, + "step": 11404 + }, + { + "epoch": 0.33486992777027424, + "grad_norm": 0.0, + "learning_rate": 1.5508765732452395e-05, + "loss": 1.2822, + "step": 11405 + }, + { + "epoch": 0.33489928944741326, + "grad_norm": 0.0, + "learning_rate": 1.550797204865073e-05, + "loss": 1.4404, + "step": 11406 + }, + { + "epoch": 0.3349286511245522, + "grad_norm": 0.0, + "learning_rate": 1.5507178315039067e-05, + "loss": 1.4795, + "step": 11407 + }, + { + "epoch": 0.33495801280169124, + "grad_norm": 0.0, + "learning_rate": 1.5506384531624577e-05, + "loss": 1.231, + "step": 11408 + }, + { + "epoch": 0.33498737447883026, + "grad_norm": 0.0, + "learning_rate": 1.5505590698414442e-05, + "loss": 1.3447, + "step": 11409 + }, + { + "epoch": 0.3350167361559692, + "grad_norm": 0.0, + "learning_rate": 1.5504796815415843e-05, + "loss": 1.1855, + "step": 11410 + }, + { + "epoch": 0.33504609783310824, + "grad_norm": 0.0, + "learning_rate": 1.5504002882635956e-05, + "loss": 1.3604, + "step": 11411 + }, + { + "epoch": 0.3350754595102472, + "grad_norm": 0.0, + "learning_rate": 1.550320890008196e-05, + "loss": 1.5342, + "step": 11412 + }, + { + "epoch": 0.3351048211873862, + "grad_norm": 0.0, + "learning_rate": 1.5502414867761037e-05, + "loss": 1.4717, + "step": 11413 + }, + { + "epoch": 0.33513418286452523, + "grad_norm": 0.0, + "learning_rate": 1.5501620785680364e-05, + "loss": 1.3613, + "step": 11414 + }, + { + "epoch": 0.3351635445416642, + "grad_norm": 0.0, + "learning_rate": 1.550082665384713e-05, + "loss": 1.2109, + "step": 11415 + }, + { + "epoch": 0.3351929062188032, + "grad_norm": 0.0, + "learning_rate": 1.550003247226851e-05, + "loss": 1.3467, + "step": 11416 + }, + { + "epoch": 0.3352222678959422, + "grad_norm": 0.0, + "learning_rate": 1.5499238240951688e-05, + "loss": 1.4971, + "step": 11417 + }, + { + "epoch": 0.3352516295730812, + "grad_norm": 0.0, + "learning_rate": 1.5498443959903843e-05, + "loss": 1.4316, + "step": 11418 + }, + { + "epoch": 0.3352809912502202, + "grad_norm": 0.0, + "learning_rate": 1.5497649629132166e-05, + "loss": 1.3555, + "step": 11419 + }, + { + "epoch": 0.3353103529273592, + "grad_norm": 0.0, + "learning_rate": 1.549685524864383e-05, + "loss": 1.3193, + "step": 11420 + }, + { + "epoch": 0.3353397146044982, + "grad_norm": 0.0, + "learning_rate": 1.549606081844603e-05, + "loss": 1.4746, + "step": 11421 + }, + { + "epoch": 0.3353690762816372, + "grad_norm": 0.0, + "learning_rate": 1.549526633854594e-05, + "loss": 1.3125, + "step": 11422 + }, + { + "epoch": 0.3353984379587762, + "grad_norm": 0.0, + "learning_rate": 1.5494471808950752e-05, + "loss": 1.4258, + "step": 11423 + }, + { + "epoch": 0.3354277996359152, + "grad_norm": 0.0, + "learning_rate": 1.5493677229667648e-05, + "loss": 1.334, + "step": 11424 + }, + { + "epoch": 0.3354571613130542, + "grad_norm": 0.0, + "learning_rate": 1.5492882600703812e-05, + "loss": 1.4131, + "step": 11425 + }, + { + "epoch": 0.3354865229901932, + "grad_norm": 0.0, + "learning_rate": 1.549208792206643e-05, + "loss": 1.3701, + "step": 11426 + }, + { + "epoch": 0.3355158846673322, + "grad_norm": 0.0, + "learning_rate": 1.5491293193762696e-05, + "loss": 1.4043, + "step": 11427 + }, + { + "epoch": 0.3355452463444712, + "grad_norm": 0.0, + "learning_rate": 1.5490498415799788e-05, + "loss": 1.3477, + "step": 11428 + }, + { + "epoch": 0.3355746080216102, + "grad_norm": 0.0, + "learning_rate": 1.54897035881849e-05, + "loss": 1.5049, + "step": 11429 + }, + { + "epoch": 0.33560396969874917, + "grad_norm": 0.0, + "learning_rate": 1.5488908710925214e-05, + "loss": 1.2627, + "step": 11430 + }, + { + "epoch": 0.3356333313758882, + "grad_norm": 0.0, + "learning_rate": 1.548811378402792e-05, + "loss": 1.3945, + "step": 11431 + }, + { + "epoch": 0.3356626930530272, + "grad_norm": 0.0, + "learning_rate": 1.5487318807500208e-05, + "loss": 1.3574, + "step": 11432 + }, + { + "epoch": 0.33569205473016617, + "grad_norm": 0.0, + "learning_rate": 1.5486523781349267e-05, + "loss": 1.4287, + "step": 11433 + }, + { + "epoch": 0.3357214164073052, + "grad_norm": 0.0, + "learning_rate": 1.5485728705582287e-05, + "loss": 1.376, + "step": 11434 + }, + { + "epoch": 0.3357507780844442, + "grad_norm": 0.0, + "learning_rate": 1.5484933580206458e-05, + "loss": 1.3984, + "step": 11435 + }, + { + "epoch": 0.33578013976158316, + "grad_norm": 0.0, + "learning_rate": 1.5484138405228968e-05, + "loss": 1.4336, + "step": 11436 + }, + { + "epoch": 0.3358095014387222, + "grad_norm": 0.0, + "learning_rate": 1.548334318065701e-05, + "loss": 1.4824, + "step": 11437 + }, + { + "epoch": 0.3358388631158612, + "grad_norm": 0.0, + "learning_rate": 1.5482547906497772e-05, + "loss": 1.4092, + "step": 11438 + }, + { + "epoch": 0.33586822479300016, + "grad_norm": 0.0, + "learning_rate": 1.5481752582758454e-05, + "loss": 1.3867, + "step": 11439 + }, + { + "epoch": 0.3358975864701392, + "grad_norm": 0.0, + "learning_rate": 1.5480957209446242e-05, + "loss": 1.5186, + "step": 11440 + }, + { + "epoch": 0.3359269481472782, + "grad_norm": 0.0, + "learning_rate": 1.5480161786568332e-05, + "loss": 1.4326, + "step": 11441 + }, + { + "epoch": 0.33595630982441715, + "grad_norm": 0.0, + "learning_rate": 1.5479366314131916e-05, + "loss": 1.3174, + "step": 11442 + }, + { + "epoch": 0.33598567150155617, + "grad_norm": 0.0, + "learning_rate": 1.5478570792144185e-05, + "loss": 1.25, + "step": 11443 + }, + { + "epoch": 0.3360150331786952, + "grad_norm": 0.0, + "learning_rate": 1.5477775220612334e-05, + "loss": 1.4131, + "step": 11444 + }, + { + "epoch": 0.33604439485583415, + "grad_norm": 0.0, + "learning_rate": 1.547697959954356e-05, + "loss": 1.2856, + "step": 11445 + }, + { + "epoch": 0.33607375653297317, + "grad_norm": 0.0, + "learning_rate": 1.5476183928945058e-05, + "loss": 1.3672, + "step": 11446 + }, + { + "epoch": 0.3361031182101122, + "grad_norm": 0.0, + "learning_rate": 1.547538820882402e-05, + "loss": 1.4346, + "step": 11447 + }, + { + "epoch": 0.33613247988725115, + "grad_norm": 0.0, + "learning_rate": 1.5474592439187648e-05, + "loss": 1.458, + "step": 11448 + }, + { + "epoch": 0.33616184156439016, + "grad_norm": 0.0, + "learning_rate": 1.547379662004313e-05, + "loss": 1.3428, + "step": 11449 + }, + { + "epoch": 0.3361912032415292, + "grad_norm": 0.0, + "learning_rate": 1.547300075139767e-05, + "loss": 1.3574, + "step": 11450 + }, + { + "epoch": 0.33622056491866814, + "grad_norm": 0.0, + "learning_rate": 1.5472204833258465e-05, + "loss": 1.3926, + "step": 11451 + }, + { + "epoch": 0.33624992659580716, + "grad_norm": 0.0, + "learning_rate": 1.5471408865632707e-05, + "loss": 1.3496, + "step": 11452 + }, + { + "epoch": 0.3362792882729462, + "grad_norm": 0.0, + "learning_rate": 1.5470612848527595e-05, + "loss": 1.4541, + "step": 11453 + }, + { + "epoch": 0.33630864995008514, + "grad_norm": 0.0, + "learning_rate": 1.5469816781950333e-05, + "loss": 1.2559, + "step": 11454 + }, + { + "epoch": 0.33633801162722415, + "grad_norm": 0.0, + "learning_rate": 1.546902066590812e-05, + "loss": 1.5439, + "step": 11455 + }, + { + "epoch": 0.33636737330436317, + "grad_norm": 0.0, + "learning_rate": 1.5468224500408147e-05, + "loss": 1.3857, + "step": 11456 + }, + { + "epoch": 0.33639673498150213, + "grad_norm": 0.0, + "learning_rate": 1.546742828545762e-05, + "loss": 1.458, + "step": 11457 + }, + { + "epoch": 0.33642609665864115, + "grad_norm": 0.0, + "learning_rate": 1.546663202106374e-05, + "loss": 1.3086, + "step": 11458 + }, + { + "epoch": 0.33645545833578017, + "grad_norm": 0.0, + "learning_rate": 1.5465835707233706e-05, + "loss": 1.3916, + "step": 11459 + }, + { + "epoch": 0.33648482001291913, + "grad_norm": 0.0, + "learning_rate": 1.5465039343974725e-05, + "loss": 1.4092, + "step": 11460 + }, + { + "epoch": 0.33651418169005815, + "grad_norm": 0.0, + "learning_rate": 1.5464242931293987e-05, + "loss": 1.415, + "step": 11461 + }, + { + "epoch": 0.3365435433671971, + "grad_norm": 0.0, + "learning_rate": 1.5463446469198703e-05, + "loss": 1.4209, + "step": 11462 + }, + { + "epoch": 0.3365729050443361, + "grad_norm": 0.0, + "learning_rate": 1.5462649957696075e-05, + "loss": 1.4316, + "step": 11463 + }, + { + "epoch": 0.33660226672147514, + "grad_norm": 0.0, + "learning_rate": 1.5461853396793302e-05, + "loss": 1.3535, + "step": 11464 + }, + { + "epoch": 0.3366316283986141, + "grad_norm": 0.0, + "learning_rate": 1.546105678649759e-05, + "loss": 1.3809, + "step": 11465 + }, + { + "epoch": 0.3366609900757531, + "grad_norm": 0.0, + "learning_rate": 1.5460260126816145e-05, + "loss": 1.4727, + "step": 11466 + }, + { + "epoch": 0.33669035175289214, + "grad_norm": 0.0, + "learning_rate": 1.5459463417756167e-05, + "loss": 1.5908, + "step": 11467 + }, + { + "epoch": 0.3367197134300311, + "grad_norm": 0.0, + "learning_rate": 1.5458666659324864e-05, + "loss": 1.3018, + "step": 11468 + }, + { + "epoch": 0.3367490751071701, + "grad_norm": 0.0, + "learning_rate": 1.545786985152944e-05, + "loss": 1.4092, + "step": 11469 + }, + { + "epoch": 0.33677843678430913, + "grad_norm": 0.0, + "learning_rate": 1.54570729943771e-05, + "loss": 1.2793, + "step": 11470 + }, + { + "epoch": 0.3368077984614481, + "grad_norm": 0.0, + "learning_rate": 1.5456276087875054e-05, + "loss": 1.5283, + "step": 11471 + }, + { + "epoch": 0.3368371601385871, + "grad_norm": 0.0, + "learning_rate": 1.5455479132030506e-05, + "loss": 1.4307, + "step": 11472 + }, + { + "epoch": 0.3368665218157261, + "grad_norm": 0.0, + "learning_rate": 1.5454682126850662e-05, + "loss": 1.2964, + "step": 11473 + }, + { + "epoch": 0.3368958834928651, + "grad_norm": 0.0, + "learning_rate": 1.5453885072342733e-05, + "loss": 1.4121, + "step": 11474 + }, + { + "epoch": 0.3369252451700041, + "grad_norm": 0.0, + "learning_rate": 1.545308796851392e-05, + "loss": 1.4678, + "step": 11475 + }, + { + "epoch": 0.3369546068471431, + "grad_norm": 0.0, + "learning_rate": 1.545229081537144e-05, + "loss": 1.3003, + "step": 11476 + }, + { + "epoch": 0.3369839685242821, + "grad_norm": 0.0, + "learning_rate": 1.5451493612922494e-05, + "loss": 1.4463, + "step": 11477 + }, + { + "epoch": 0.3370133302014211, + "grad_norm": 0.0, + "learning_rate": 1.5450696361174296e-05, + "loss": 1.292, + "step": 11478 + }, + { + "epoch": 0.3370426918785601, + "grad_norm": 0.0, + "learning_rate": 1.5449899060134054e-05, + "loss": 1.248, + "step": 11479 + }, + { + "epoch": 0.3370720535556991, + "grad_norm": 0.0, + "learning_rate": 1.5449101709808982e-05, + "loss": 1.4209, + "step": 11480 + }, + { + "epoch": 0.3371014152328381, + "grad_norm": 0.0, + "learning_rate": 1.5448304310206288e-05, + "loss": 1.4541, + "step": 11481 + }, + { + "epoch": 0.3371307769099771, + "grad_norm": 0.0, + "learning_rate": 1.5447506861333185e-05, + "loss": 1.3555, + "step": 11482 + }, + { + "epoch": 0.3371601385871161, + "grad_norm": 0.0, + "learning_rate": 1.5446709363196876e-05, + "loss": 1.3613, + "step": 11483 + }, + { + "epoch": 0.3371895002642551, + "grad_norm": 0.0, + "learning_rate": 1.544591181580458e-05, + "loss": 1.3809, + "step": 11484 + }, + { + "epoch": 0.3372188619413941, + "grad_norm": 0.0, + "learning_rate": 1.5445114219163514e-05, + "loss": 1.3096, + "step": 11485 + }, + { + "epoch": 0.33724822361853307, + "grad_norm": 0.0, + "learning_rate": 1.5444316573280878e-05, + "loss": 1.3926, + "step": 11486 + }, + { + "epoch": 0.3372775852956721, + "grad_norm": 0.0, + "learning_rate": 1.54435188781639e-05, + "loss": 1.5049, + "step": 11487 + }, + { + "epoch": 0.3373069469728111, + "grad_norm": 0.0, + "learning_rate": 1.5442721133819785e-05, + "loss": 1.3447, + "step": 11488 + }, + { + "epoch": 0.33733630864995007, + "grad_norm": 0.0, + "learning_rate": 1.5441923340255745e-05, + "loss": 1.4678, + "step": 11489 + }, + { + "epoch": 0.3373656703270891, + "grad_norm": 0.0, + "learning_rate": 1.5441125497479003e-05, + "loss": 1.3389, + "step": 11490 + }, + { + "epoch": 0.3373950320042281, + "grad_norm": 0.0, + "learning_rate": 1.5440327605496768e-05, + "loss": 1.292, + "step": 11491 + }, + { + "epoch": 0.33742439368136706, + "grad_norm": 0.0, + "learning_rate": 1.5439529664316258e-05, + "loss": 1.4141, + "step": 11492 + }, + { + "epoch": 0.3374537553585061, + "grad_norm": 0.0, + "learning_rate": 1.543873167394469e-05, + "loss": 1.4082, + "step": 11493 + }, + { + "epoch": 0.3374831170356451, + "grad_norm": 0.0, + "learning_rate": 1.5437933634389273e-05, + "loss": 1.2988, + "step": 11494 + }, + { + "epoch": 0.33751247871278406, + "grad_norm": 0.0, + "learning_rate": 1.5437135545657233e-05, + "loss": 1.374, + "step": 11495 + }, + { + "epoch": 0.3375418403899231, + "grad_norm": 0.0, + "learning_rate": 1.543633740775578e-05, + "loss": 1.3291, + "step": 11496 + }, + { + "epoch": 0.3375712020670621, + "grad_norm": 0.0, + "learning_rate": 1.5435539220692142e-05, + "loss": 1.3291, + "step": 11497 + }, + { + "epoch": 0.33760056374420105, + "grad_norm": 0.0, + "learning_rate": 1.5434740984473526e-05, + "loss": 1.4229, + "step": 11498 + }, + { + "epoch": 0.33762992542134007, + "grad_norm": 0.0, + "learning_rate": 1.5433942699107157e-05, + "loss": 1.4062, + "step": 11499 + }, + { + "epoch": 0.3376592870984791, + "grad_norm": 0.0, + "learning_rate": 1.5433144364600253e-05, + "loss": 1.2393, + "step": 11500 + }, + { + "epoch": 0.33768864877561805, + "grad_norm": 0.0, + "learning_rate": 1.543234598096003e-05, + "loss": 1.3232, + "step": 11501 + }, + { + "epoch": 0.33771801045275707, + "grad_norm": 0.0, + "learning_rate": 1.5431547548193714e-05, + "loss": 1.3779, + "step": 11502 + }, + { + "epoch": 0.3377473721298961, + "grad_norm": 0.0, + "learning_rate": 1.5430749066308523e-05, + "loss": 1.4189, + "step": 11503 + }, + { + "epoch": 0.33777673380703505, + "grad_norm": 0.0, + "learning_rate": 1.5429950535311676e-05, + "loss": 1.5273, + "step": 11504 + }, + { + "epoch": 0.33780609548417406, + "grad_norm": 0.0, + "learning_rate": 1.54291519552104e-05, + "loss": 1.2891, + "step": 11505 + }, + { + "epoch": 0.3378354571613131, + "grad_norm": 0.0, + "learning_rate": 1.5428353326011903e-05, + "loss": 1.3945, + "step": 11506 + }, + { + "epoch": 0.33786481883845204, + "grad_norm": 0.0, + "learning_rate": 1.5427554647723424e-05, + "loss": 1.3774, + "step": 11507 + }, + { + "epoch": 0.33789418051559106, + "grad_norm": 0.0, + "learning_rate": 1.5426755920352176e-05, + "loss": 1.3311, + "step": 11508 + }, + { + "epoch": 0.3379235421927301, + "grad_norm": 0.0, + "learning_rate": 1.5425957143905385e-05, + "loss": 1.415, + "step": 11509 + }, + { + "epoch": 0.33795290386986904, + "grad_norm": 0.0, + "learning_rate": 1.5425158318390274e-05, + "loss": 1.3721, + "step": 11510 + }, + { + "epoch": 0.33798226554700805, + "grad_norm": 0.0, + "learning_rate": 1.542435944381407e-05, + "loss": 1.4111, + "step": 11511 + }, + { + "epoch": 0.33801162722414707, + "grad_norm": 0.0, + "learning_rate": 1.542356052018399e-05, + "loss": 1.4248, + "step": 11512 + }, + { + "epoch": 0.33804098890128603, + "grad_norm": 0.0, + "learning_rate": 1.5422761547507265e-05, + "loss": 1.458, + "step": 11513 + }, + { + "epoch": 0.33807035057842505, + "grad_norm": 0.0, + "learning_rate": 1.5421962525791116e-05, + "loss": 1.3418, + "step": 11514 + }, + { + "epoch": 0.338099712255564, + "grad_norm": 0.0, + "learning_rate": 1.5421163455042775e-05, + "loss": 1.3984, + "step": 11515 + }, + { + "epoch": 0.33812907393270303, + "grad_norm": 0.0, + "learning_rate": 1.542036433526946e-05, + "loss": 1.3066, + "step": 11516 + }, + { + "epoch": 0.33815843560984205, + "grad_norm": 0.0, + "learning_rate": 1.5419565166478405e-05, + "loss": 1.3555, + "step": 11517 + }, + { + "epoch": 0.338187797286981, + "grad_norm": 0.0, + "learning_rate": 1.5418765948676836e-05, + "loss": 1.3066, + "step": 11518 + }, + { + "epoch": 0.33821715896412, + "grad_norm": 0.0, + "learning_rate": 1.5417966681871976e-05, + "loss": 1.4297, + "step": 11519 + }, + { + "epoch": 0.33824652064125904, + "grad_norm": 0.0, + "learning_rate": 1.5417167366071053e-05, + "loss": 1.4229, + "step": 11520 + }, + { + "epoch": 0.338275882318398, + "grad_norm": 0.0, + "learning_rate": 1.5416368001281306e-05, + "loss": 1.4932, + "step": 11521 + }, + { + "epoch": 0.338305243995537, + "grad_norm": 0.0, + "learning_rate": 1.541556858750995e-05, + "loss": 1.4131, + "step": 11522 + }, + { + "epoch": 0.33833460567267604, + "grad_norm": 0.0, + "learning_rate": 1.541476912476422e-05, + "loss": 1.3027, + "step": 11523 + }, + { + "epoch": 0.338363967349815, + "grad_norm": 0.0, + "learning_rate": 1.5413969613051348e-05, + "loss": 1.3428, + "step": 11524 + }, + { + "epoch": 0.338393329026954, + "grad_norm": 0.0, + "learning_rate": 1.5413170052378564e-05, + "loss": 1.4189, + "step": 11525 + }, + { + "epoch": 0.33842269070409303, + "grad_norm": 0.0, + "learning_rate": 1.5412370442753096e-05, + "loss": 1.3262, + "step": 11526 + }, + { + "epoch": 0.338452052381232, + "grad_norm": 0.0, + "learning_rate": 1.5411570784182175e-05, + "loss": 1.3535, + "step": 11527 + }, + { + "epoch": 0.338481414058371, + "grad_norm": 0.0, + "learning_rate": 1.541077107667303e-05, + "loss": 1.3506, + "step": 11528 + }, + { + "epoch": 0.33851077573551, + "grad_norm": 0.0, + "learning_rate": 1.5409971320232902e-05, + "loss": 1.2871, + "step": 11529 + }, + { + "epoch": 0.338540137412649, + "grad_norm": 0.0, + "learning_rate": 1.5409171514869016e-05, + "loss": 1.4111, + "step": 11530 + }, + { + "epoch": 0.338569499089788, + "grad_norm": 0.0, + "learning_rate": 1.540837166058861e-05, + "loss": 1.4854, + "step": 11531 + }, + { + "epoch": 0.338598860766927, + "grad_norm": 0.0, + "learning_rate": 1.540757175739891e-05, + "loss": 1.502, + "step": 11532 + }, + { + "epoch": 0.338628222444066, + "grad_norm": 0.0, + "learning_rate": 1.540677180530715e-05, + "loss": 1.2998, + "step": 11533 + }, + { + "epoch": 0.338657584121205, + "grad_norm": 0.0, + "learning_rate": 1.5405971804320574e-05, + "loss": 1.3711, + "step": 11534 + }, + { + "epoch": 0.338686945798344, + "grad_norm": 0.0, + "learning_rate": 1.5405171754446405e-05, + "loss": 1.3516, + "step": 11535 + }, + { + "epoch": 0.338716307475483, + "grad_norm": 0.0, + "learning_rate": 1.540437165569189e-05, + "loss": 1.3447, + "step": 11536 + }, + { + "epoch": 0.338745669152622, + "grad_norm": 0.0, + "learning_rate": 1.5403571508064253e-05, + "loss": 1.2192, + "step": 11537 + }, + { + "epoch": 0.338775030829761, + "grad_norm": 0.0, + "learning_rate": 1.5402771311570738e-05, + "loss": 1.499, + "step": 11538 + }, + { + "epoch": 0.3388043925069, + "grad_norm": 0.0, + "learning_rate": 1.5401971066218576e-05, + "loss": 1.3535, + "step": 11539 + }, + { + "epoch": 0.338833754184039, + "grad_norm": 0.0, + "learning_rate": 1.540117077201501e-05, + "loss": 1.3633, + "step": 11540 + }, + { + "epoch": 0.338863115861178, + "grad_norm": 0.0, + "learning_rate": 1.5400370428967273e-05, + "loss": 1.4209, + "step": 11541 + }, + { + "epoch": 0.33889247753831697, + "grad_norm": 0.0, + "learning_rate": 1.53995700370826e-05, + "loss": 1.4795, + "step": 11542 + }, + { + "epoch": 0.338921839215456, + "grad_norm": 0.0, + "learning_rate": 1.5398769596368235e-05, + "loss": 1.4111, + "step": 11543 + }, + { + "epoch": 0.338951200892595, + "grad_norm": 0.0, + "learning_rate": 1.539796910683141e-05, + "loss": 1.3545, + "step": 11544 + }, + { + "epoch": 0.33898056256973397, + "grad_norm": 0.0, + "learning_rate": 1.539716856847937e-05, + "loss": 1.1675, + "step": 11545 + }, + { + "epoch": 0.339009924246873, + "grad_norm": 0.0, + "learning_rate": 1.5396367981319355e-05, + "loss": 1.4727, + "step": 11546 + }, + { + "epoch": 0.339039285924012, + "grad_norm": 0.0, + "learning_rate": 1.53955673453586e-05, + "loss": 1.3291, + "step": 11547 + }, + { + "epoch": 0.33906864760115096, + "grad_norm": 0.0, + "learning_rate": 1.5394766660604347e-05, + "loss": 1.3711, + "step": 11548 + }, + { + "epoch": 0.33909800927829, + "grad_norm": 0.0, + "learning_rate": 1.539396592706384e-05, + "loss": 1.3164, + "step": 11549 + }, + { + "epoch": 0.339127370955429, + "grad_norm": 0.0, + "learning_rate": 1.539316514474431e-05, + "loss": 1.2607, + "step": 11550 + }, + { + "epoch": 0.33915673263256796, + "grad_norm": 0.0, + "learning_rate": 1.5392364313653017e-05, + "loss": 1.3838, + "step": 11551 + }, + { + "epoch": 0.339186094309707, + "grad_norm": 0.0, + "learning_rate": 1.5391563433797186e-05, + "loss": 1.3037, + "step": 11552 + }, + { + "epoch": 0.339215455986846, + "grad_norm": 0.0, + "learning_rate": 1.539076250518407e-05, + "loss": 1.416, + "step": 11553 + }, + { + "epoch": 0.33924481766398495, + "grad_norm": 0.0, + "learning_rate": 1.5389961527820907e-05, + "loss": 1.2988, + "step": 11554 + }, + { + "epoch": 0.33927417934112397, + "grad_norm": 0.0, + "learning_rate": 1.5389160501714937e-05, + "loss": 1.4541, + "step": 11555 + }, + { + "epoch": 0.339303541018263, + "grad_norm": 0.0, + "learning_rate": 1.5388359426873415e-05, + "loss": 1.3477, + "step": 11556 + }, + { + "epoch": 0.33933290269540195, + "grad_norm": 0.0, + "learning_rate": 1.5387558303303576e-05, + "loss": 1.3496, + "step": 11557 + }, + { + "epoch": 0.33936226437254097, + "grad_norm": 0.0, + "learning_rate": 1.5386757131012665e-05, + "loss": 1.3555, + "step": 11558 + }, + { + "epoch": 0.33939162604968, + "grad_norm": 0.0, + "learning_rate": 1.538595591000793e-05, + "loss": 1.3154, + "step": 11559 + }, + { + "epoch": 0.33942098772681895, + "grad_norm": 0.0, + "learning_rate": 1.538515464029662e-05, + "loss": 1.416, + "step": 11560 + }, + { + "epoch": 0.33945034940395796, + "grad_norm": 0.0, + "learning_rate": 1.5384353321885974e-05, + "loss": 1.4424, + "step": 11561 + }, + { + "epoch": 0.339479711081097, + "grad_norm": 0.0, + "learning_rate": 1.5383551954783245e-05, + "loss": 1.4082, + "step": 11562 + }, + { + "epoch": 0.33950907275823594, + "grad_norm": 0.0, + "learning_rate": 1.5382750538995678e-05, + "loss": 1.2773, + "step": 11563 + }, + { + "epoch": 0.33953843443537496, + "grad_norm": 0.0, + "learning_rate": 1.5381949074530514e-05, + "loss": 1.2256, + "step": 11564 + }, + { + "epoch": 0.3395677961125139, + "grad_norm": 0.0, + "learning_rate": 1.5381147561395012e-05, + "loss": 1.3857, + "step": 11565 + }, + { + "epoch": 0.33959715778965294, + "grad_norm": 0.0, + "learning_rate": 1.5380345999596407e-05, + "loss": 1.3018, + "step": 11566 + }, + { + "epoch": 0.33962651946679195, + "grad_norm": 0.0, + "learning_rate": 1.537954438914196e-05, + "loss": 1.4688, + "step": 11567 + }, + { + "epoch": 0.3396558811439309, + "grad_norm": 0.0, + "learning_rate": 1.5378742730038917e-05, + "loss": 1.3096, + "step": 11568 + }, + { + "epoch": 0.33968524282106993, + "grad_norm": 0.0, + "learning_rate": 1.5377941022294523e-05, + "loss": 1.2236, + "step": 11569 + }, + { + "epoch": 0.33971460449820895, + "grad_norm": 0.0, + "learning_rate": 1.537713926591603e-05, + "loss": 1.3369, + "step": 11570 + }, + { + "epoch": 0.3397439661753479, + "grad_norm": 0.0, + "learning_rate": 1.5376337460910696e-05, + "loss": 1.3413, + "step": 11571 + }, + { + "epoch": 0.33977332785248693, + "grad_norm": 0.0, + "learning_rate": 1.5375535607285758e-05, + "loss": 1.2998, + "step": 11572 + }, + { + "epoch": 0.33980268952962595, + "grad_norm": 0.0, + "learning_rate": 1.5374733705048478e-05, + "loss": 1.46, + "step": 11573 + }, + { + "epoch": 0.3398320512067649, + "grad_norm": 0.0, + "learning_rate": 1.5373931754206105e-05, + "loss": 1.373, + "step": 11574 + }, + { + "epoch": 0.3398614128839039, + "grad_norm": 0.0, + "learning_rate": 1.537312975476589e-05, + "loss": 1.2681, + "step": 11575 + }, + { + "epoch": 0.33989077456104294, + "grad_norm": 0.0, + "learning_rate": 1.5372327706735088e-05, + "loss": 1.3008, + "step": 11576 + }, + { + "epoch": 0.3399201362381819, + "grad_norm": 0.0, + "learning_rate": 1.5371525610120944e-05, + "loss": 1.4521, + "step": 11577 + }, + { + "epoch": 0.3399494979153209, + "grad_norm": 0.0, + "learning_rate": 1.5370723464930728e-05, + "loss": 1.376, + "step": 11578 + }, + { + "epoch": 0.33997885959245994, + "grad_norm": 0.0, + "learning_rate": 1.536992127117168e-05, + "loss": 1.3174, + "step": 11579 + }, + { + "epoch": 0.3400082212695989, + "grad_norm": 0.0, + "learning_rate": 1.5369119028851057e-05, + "loss": 1.3193, + "step": 11580 + }, + { + "epoch": 0.3400375829467379, + "grad_norm": 0.0, + "learning_rate": 1.5368316737976117e-05, + "loss": 1.3105, + "step": 11581 + }, + { + "epoch": 0.34006694462387693, + "grad_norm": 0.0, + "learning_rate": 1.5367514398554114e-05, + "loss": 1.2725, + "step": 11582 + }, + { + "epoch": 0.3400963063010159, + "grad_norm": 0.0, + "learning_rate": 1.5366712010592305e-05, + "loss": 1.4346, + "step": 11583 + }, + { + "epoch": 0.3401256679781549, + "grad_norm": 0.0, + "learning_rate": 1.5365909574097945e-05, + "loss": 1.1978, + "step": 11584 + }, + { + "epoch": 0.34015502965529393, + "grad_norm": 0.0, + "learning_rate": 1.536510708907829e-05, + "loss": 1.3076, + "step": 11585 + }, + { + "epoch": 0.3401843913324329, + "grad_norm": 0.0, + "learning_rate": 1.5364304555540594e-05, + "loss": 1.4443, + "step": 11586 + }, + { + "epoch": 0.3402137530095719, + "grad_norm": 0.0, + "learning_rate": 1.5363501973492123e-05, + "loss": 1.5127, + "step": 11587 + }, + { + "epoch": 0.3402431146867109, + "grad_norm": 0.0, + "learning_rate": 1.5362699342940125e-05, + "loss": 1.3711, + "step": 11588 + }, + { + "epoch": 0.3402724763638499, + "grad_norm": 0.0, + "learning_rate": 1.5361896663891863e-05, + "loss": 1.3994, + "step": 11589 + }, + { + "epoch": 0.3403018380409889, + "grad_norm": 0.0, + "learning_rate": 1.5361093936354606e-05, + "loss": 1.4141, + "step": 11590 + }, + { + "epoch": 0.3403311997181279, + "grad_norm": 0.0, + "learning_rate": 1.5360291160335593e-05, + "loss": 1.3398, + "step": 11591 + }, + { + "epoch": 0.3403605613952669, + "grad_norm": 0.0, + "learning_rate": 1.53594883358421e-05, + "loss": 1.3604, + "step": 11592 + }, + { + "epoch": 0.3403899230724059, + "grad_norm": 0.0, + "learning_rate": 1.535868546288138e-05, + "loss": 1.3623, + "step": 11593 + }, + { + "epoch": 0.3404192847495449, + "grad_norm": 0.0, + "learning_rate": 1.5357882541460694e-05, + "loss": 1.3369, + "step": 11594 + }, + { + "epoch": 0.3404486464266839, + "grad_norm": 0.0, + "learning_rate": 1.5357079571587304e-05, + "loss": 1.2646, + "step": 11595 + }, + { + "epoch": 0.3404780081038229, + "grad_norm": 0.0, + "learning_rate": 1.5356276553268472e-05, + "loss": 1.3511, + "step": 11596 + }, + { + "epoch": 0.3405073697809619, + "grad_norm": 0.0, + "learning_rate": 1.535547348651146e-05, + "loss": 1.4014, + "step": 11597 + }, + { + "epoch": 0.34053673145810087, + "grad_norm": 0.0, + "learning_rate": 1.5354670371323532e-05, + "loss": 1.4072, + "step": 11598 + }, + { + "epoch": 0.3405660931352399, + "grad_norm": 0.0, + "learning_rate": 1.5353867207711942e-05, + "loss": 1.3418, + "step": 11599 + }, + { + "epoch": 0.3405954548123789, + "grad_norm": 0.0, + "learning_rate": 1.5353063995683966e-05, + "loss": 1.3799, + "step": 11600 + }, + { + "epoch": 0.34062481648951787, + "grad_norm": 0.0, + "learning_rate": 1.5352260735246858e-05, + "loss": 1.293, + "step": 11601 + }, + { + "epoch": 0.3406541781666569, + "grad_norm": 0.0, + "learning_rate": 1.5351457426407884e-05, + "loss": 1.4297, + "step": 11602 + }, + { + "epoch": 0.3406835398437959, + "grad_norm": 0.0, + "learning_rate": 1.5350654069174314e-05, + "loss": 1.3789, + "step": 11603 + }, + { + "epoch": 0.34071290152093486, + "grad_norm": 0.0, + "learning_rate": 1.5349850663553407e-05, + "loss": 1.3848, + "step": 11604 + }, + { + "epoch": 0.3407422631980739, + "grad_norm": 0.0, + "learning_rate": 1.5349047209552428e-05, + "loss": 1.3584, + "step": 11605 + }, + { + "epoch": 0.3407716248752129, + "grad_norm": 0.0, + "learning_rate": 1.534824370717865e-05, + "loss": 1.2183, + "step": 11606 + }, + { + "epoch": 0.34080098655235186, + "grad_norm": 0.0, + "learning_rate": 1.5347440156439332e-05, + "loss": 1.3975, + "step": 11607 + }, + { + "epoch": 0.3408303482294909, + "grad_norm": 0.0, + "learning_rate": 1.534663655734174e-05, + "loss": 1.3369, + "step": 11608 + }, + { + "epoch": 0.3408597099066299, + "grad_norm": 0.0, + "learning_rate": 1.534583290989315e-05, + "loss": 1.459, + "step": 11609 + }, + { + "epoch": 0.34088907158376885, + "grad_norm": 0.0, + "learning_rate": 1.534502921410082e-05, + "loss": 1.3438, + "step": 11610 + }, + { + "epoch": 0.34091843326090787, + "grad_norm": 0.0, + "learning_rate": 1.5344225469972026e-05, + "loss": 1.4297, + "step": 11611 + }, + { + "epoch": 0.3409477949380469, + "grad_norm": 0.0, + "learning_rate": 1.5343421677514028e-05, + "loss": 1.3223, + "step": 11612 + }, + { + "epoch": 0.34097715661518585, + "grad_norm": 0.0, + "learning_rate": 1.53426178367341e-05, + "loss": 1.5059, + "step": 11613 + }, + { + "epoch": 0.34100651829232487, + "grad_norm": 0.0, + "learning_rate": 1.534181394763951e-05, + "loss": 1.4248, + "step": 11614 + }, + { + "epoch": 0.34103587996946383, + "grad_norm": 0.0, + "learning_rate": 1.5341010010237533e-05, + "loss": 1.5068, + "step": 11615 + }, + { + "epoch": 0.34106524164660285, + "grad_norm": 0.0, + "learning_rate": 1.534020602453543e-05, + "loss": 1.5176, + "step": 11616 + }, + { + "epoch": 0.34109460332374186, + "grad_norm": 0.0, + "learning_rate": 1.5339401990540478e-05, + "loss": 1.4688, + "step": 11617 + }, + { + "epoch": 0.3411239650008808, + "grad_norm": 0.0, + "learning_rate": 1.5338597908259946e-05, + "loss": 1.3672, + "step": 11618 + }, + { + "epoch": 0.34115332667801984, + "grad_norm": 0.0, + "learning_rate": 1.533779377770111e-05, + "loss": 1.4131, + "step": 11619 + }, + { + "epoch": 0.34118268835515886, + "grad_norm": 0.0, + "learning_rate": 1.5336989598871232e-05, + "loss": 1.4229, + "step": 11620 + }, + { + "epoch": 0.3412120500322978, + "grad_norm": 0.0, + "learning_rate": 1.5336185371777593e-05, + "loss": 1.4062, + "step": 11621 + }, + { + "epoch": 0.34124141170943684, + "grad_norm": 0.0, + "learning_rate": 1.533538109642746e-05, + "loss": 1.3662, + "step": 11622 + }, + { + "epoch": 0.34127077338657585, + "grad_norm": 0.0, + "learning_rate": 1.5334576772828112e-05, + "loss": 1.2988, + "step": 11623 + }, + { + "epoch": 0.3413001350637148, + "grad_norm": 0.0, + "learning_rate": 1.5333772400986822e-05, + "loss": 1.3389, + "step": 11624 + }, + { + "epoch": 0.34132949674085383, + "grad_norm": 0.0, + "learning_rate": 1.533296798091086e-05, + "loss": 1.4434, + "step": 11625 + }, + { + "epoch": 0.34135885841799285, + "grad_norm": 0.0, + "learning_rate": 1.5332163512607498e-05, + "loss": 1.4165, + "step": 11626 + }, + { + "epoch": 0.3413882200951318, + "grad_norm": 0.0, + "learning_rate": 1.5331358996084022e-05, + "loss": 1.335, + "step": 11627 + }, + { + "epoch": 0.34141758177227083, + "grad_norm": 0.0, + "learning_rate": 1.53305544313477e-05, + "loss": 1.3789, + "step": 11628 + }, + { + "epoch": 0.34144694344940985, + "grad_norm": 0.0, + "learning_rate": 1.532974981840581e-05, + "loss": 1.4277, + "step": 11629 + }, + { + "epoch": 0.3414763051265488, + "grad_norm": 0.0, + "learning_rate": 1.5328945157265627e-05, + "loss": 1.3955, + "step": 11630 + }, + { + "epoch": 0.3415056668036878, + "grad_norm": 0.0, + "learning_rate": 1.5328140447934427e-05, + "loss": 1.415, + "step": 11631 + }, + { + "epoch": 0.34153502848082684, + "grad_norm": 0.0, + "learning_rate": 1.532733569041949e-05, + "loss": 1.2773, + "step": 11632 + }, + { + "epoch": 0.3415643901579658, + "grad_norm": 0.0, + "learning_rate": 1.532653088472809e-05, + "loss": 1.3662, + "step": 11633 + }, + { + "epoch": 0.3415937518351048, + "grad_norm": 0.0, + "learning_rate": 1.5325726030867506e-05, + "loss": 1.4629, + "step": 11634 + }, + { + "epoch": 0.34162311351224384, + "grad_norm": 0.0, + "learning_rate": 1.532492112884502e-05, + "loss": 1.3887, + "step": 11635 + }, + { + "epoch": 0.3416524751893828, + "grad_norm": 0.0, + "learning_rate": 1.5324116178667907e-05, + "loss": 1.3154, + "step": 11636 + }, + { + "epoch": 0.3416818368665218, + "grad_norm": 0.0, + "learning_rate": 1.5323311180343448e-05, + "loss": 1.3193, + "step": 11637 + }, + { + "epoch": 0.34171119854366083, + "grad_norm": 0.0, + "learning_rate": 1.532250613387892e-05, + "loss": 1.3271, + "step": 11638 + }, + { + "epoch": 0.3417405602207998, + "grad_norm": 0.0, + "learning_rate": 1.5321701039281612e-05, + "loss": 1.3945, + "step": 11639 + }, + { + "epoch": 0.3417699218979388, + "grad_norm": 0.0, + "learning_rate": 1.5320895896558798e-05, + "loss": 1.3179, + "step": 11640 + }, + { + "epoch": 0.34179928357507783, + "grad_norm": 0.0, + "learning_rate": 1.5320090705717757e-05, + "loss": 1.377, + "step": 11641 + }, + { + "epoch": 0.3418286452522168, + "grad_norm": 0.0, + "learning_rate": 1.5319285466765776e-05, + "loss": 1.418, + "step": 11642 + }, + { + "epoch": 0.3418580069293558, + "grad_norm": 0.0, + "learning_rate": 1.531848017971013e-05, + "loss": 1.2793, + "step": 11643 + }, + { + "epoch": 0.3418873686064948, + "grad_norm": 0.0, + "learning_rate": 1.531767484455811e-05, + "loss": 1.3389, + "step": 11644 + }, + { + "epoch": 0.3419167302836338, + "grad_norm": 0.0, + "learning_rate": 1.531686946131699e-05, + "loss": 1.3271, + "step": 11645 + }, + { + "epoch": 0.3419460919607728, + "grad_norm": 0.0, + "learning_rate": 1.531606402999406e-05, + "loss": 1.3237, + "step": 11646 + }, + { + "epoch": 0.3419754536379118, + "grad_norm": 0.0, + "learning_rate": 1.5315258550596604e-05, + "loss": 1.2886, + "step": 11647 + }, + { + "epoch": 0.3420048153150508, + "grad_norm": 0.0, + "learning_rate": 1.53144530231319e-05, + "loss": 1.3154, + "step": 11648 + }, + { + "epoch": 0.3420341769921898, + "grad_norm": 0.0, + "learning_rate": 1.531364744760724e-05, + "loss": 1.4395, + "step": 11649 + }, + { + "epoch": 0.3420635386693288, + "grad_norm": 0.0, + "learning_rate": 1.53128418240299e-05, + "loss": 1.2832, + "step": 11650 + }, + { + "epoch": 0.3420929003464678, + "grad_norm": 0.0, + "learning_rate": 1.5312036152407174e-05, + "loss": 1.3643, + "step": 11651 + }, + { + "epoch": 0.3421222620236068, + "grad_norm": 0.0, + "learning_rate": 1.5311230432746343e-05, + "loss": 1.3306, + "step": 11652 + }, + { + "epoch": 0.3421516237007458, + "grad_norm": 0.0, + "learning_rate": 1.53104246650547e-05, + "loss": 1.2744, + "step": 11653 + }, + { + "epoch": 0.34218098537788477, + "grad_norm": 0.0, + "learning_rate": 1.530961884933952e-05, + "loss": 1.2998, + "step": 11654 + }, + { + "epoch": 0.3422103470550238, + "grad_norm": 0.0, + "learning_rate": 1.5308812985608098e-05, + "loss": 1.3516, + "step": 11655 + }, + { + "epoch": 0.3422397087321628, + "grad_norm": 0.0, + "learning_rate": 1.530800707386772e-05, + "loss": 1.1313, + "step": 11656 + }, + { + "epoch": 0.34226907040930177, + "grad_norm": 0.0, + "learning_rate": 1.530720111412568e-05, + "loss": 1.2236, + "step": 11657 + }, + { + "epoch": 0.3422984320864408, + "grad_norm": 0.0, + "learning_rate": 1.5306395106389255e-05, + "loss": 1.5098, + "step": 11658 + }, + { + "epoch": 0.3423277937635798, + "grad_norm": 0.0, + "learning_rate": 1.5305589050665745e-05, + "loss": 1.2661, + "step": 11659 + }, + { + "epoch": 0.34235715544071876, + "grad_norm": 0.0, + "learning_rate": 1.5304782946962435e-05, + "loss": 1.3945, + "step": 11660 + }, + { + "epoch": 0.3423865171178578, + "grad_norm": 0.0, + "learning_rate": 1.530397679528661e-05, + "loss": 1.29, + "step": 11661 + }, + { + "epoch": 0.3424158787949968, + "grad_norm": 0.0, + "learning_rate": 1.5303170595645567e-05, + "loss": 1.3496, + "step": 11662 + }, + { + "epoch": 0.34244524047213576, + "grad_norm": 0.0, + "learning_rate": 1.5302364348046593e-05, + "loss": 1.3818, + "step": 11663 + }, + { + "epoch": 0.3424746021492748, + "grad_norm": 0.0, + "learning_rate": 1.5301558052496982e-05, + "loss": 1.374, + "step": 11664 + }, + { + "epoch": 0.34250396382641374, + "grad_norm": 0.0, + "learning_rate": 1.530075170900402e-05, + "loss": 1.249, + "step": 11665 + }, + { + "epoch": 0.34253332550355275, + "grad_norm": 0.0, + "learning_rate": 1.5299945317575006e-05, + "loss": 1.4102, + "step": 11666 + }, + { + "epoch": 0.34256268718069177, + "grad_norm": 0.0, + "learning_rate": 1.5299138878217225e-05, + "loss": 1.3916, + "step": 11667 + }, + { + "epoch": 0.34259204885783073, + "grad_norm": 0.0, + "learning_rate": 1.5298332390937984e-05, + "loss": 1.3672, + "step": 11668 + }, + { + "epoch": 0.34262141053496975, + "grad_norm": 0.0, + "learning_rate": 1.5297525855744558e-05, + "loss": 1.144, + "step": 11669 + }, + { + "epoch": 0.34265077221210877, + "grad_norm": 0.0, + "learning_rate": 1.529671927264425e-05, + "loss": 1.373, + "step": 11670 + }, + { + "epoch": 0.34268013388924773, + "grad_norm": 0.0, + "learning_rate": 1.5295912641644354e-05, + "loss": 1.3486, + "step": 11671 + }, + { + "epoch": 0.34270949556638675, + "grad_norm": 0.0, + "learning_rate": 1.529510596275216e-05, + "loss": 1.4033, + "step": 11672 + }, + { + "epoch": 0.34273885724352576, + "grad_norm": 0.0, + "learning_rate": 1.529429923597497e-05, + "loss": 1.2861, + "step": 11673 + }, + { + "epoch": 0.3427682189206647, + "grad_norm": 0.0, + "learning_rate": 1.5293492461320076e-05, + "loss": 1.4307, + "step": 11674 + }, + { + "epoch": 0.34279758059780374, + "grad_norm": 0.0, + "learning_rate": 1.5292685638794775e-05, + "loss": 1.3994, + "step": 11675 + }, + { + "epoch": 0.34282694227494276, + "grad_norm": 0.0, + "learning_rate": 1.5291878768406363e-05, + "loss": 1.3008, + "step": 11676 + }, + { + "epoch": 0.3428563039520817, + "grad_norm": 0.0, + "learning_rate": 1.5291071850162135e-05, + "loss": 1.459, + "step": 11677 + }, + { + "epoch": 0.34288566562922074, + "grad_norm": 0.0, + "learning_rate": 1.529026488406939e-05, + "loss": 1.3154, + "step": 11678 + }, + { + "epoch": 0.34291502730635975, + "grad_norm": 0.0, + "learning_rate": 1.528945787013542e-05, + "loss": 1.3652, + "step": 11679 + }, + { + "epoch": 0.3429443889834987, + "grad_norm": 0.0, + "learning_rate": 1.5288650808367534e-05, + "loss": 1.3682, + "step": 11680 + }, + { + "epoch": 0.34297375066063773, + "grad_norm": 0.0, + "learning_rate": 1.528784369877302e-05, + "loss": 1.2969, + "step": 11681 + }, + { + "epoch": 0.34300311233777675, + "grad_norm": 0.0, + "learning_rate": 1.5287036541359183e-05, + "loss": 1.5322, + "step": 11682 + }, + { + "epoch": 0.3430324740149157, + "grad_norm": 0.0, + "learning_rate": 1.528622933613332e-05, + "loss": 1.375, + "step": 11683 + }, + { + "epoch": 0.34306183569205473, + "grad_norm": 0.0, + "learning_rate": 1.5285422083102732e-05, + "loss": 1.3945, + "step": 11684 + }, + { + "epoch": 0.34309119736919375, + "grad_norm": 0.0, + "learning_rate": 1.5284614782274718e-05, + "loss": 1.377, + "step": 11685 + }, + { + "epoch": 0.3431205590463327, + "grad_norm": 0.0, + "learning_rate": 1.528380743365658e-05, + "loss": 1.4277, + "step": 11686 + }, + { + "epoch": 0.3431499207234717, + "grad_norm": 0.0, + "learning_rate": 1.5283000037255615e-05, + "loss": 1.585, + "step": 11687 + }, + { + "epoch": 0.34317928240061074, + "grad_norm": 0.0, + "learning_rate": 1.528219259307913e-05, + "loss": 1.2539, + "step": 11688 + }, + { + "epoch": 0.3432086440777497, + "grad_norm": 0.0, + "learning_rate": 1.5281385101134427e-05, + "loss": 1.3232, + "step": 11689 + }, + { + "epoch": 0.3432380057548887, + "grad_norm": 0.0, + "learning_rate": 1.5280577561428804e-05, + "loss": 1.3809, + "step": 11690 + }, + { + "epoch": 0.34326736743202774, + "grad_norm": 0.0, + "learning_rate": 1.5279769973969566e-05, + "loss": 1.3838, + "step": 11691 + }, + { + "epoch": 0.3432967291091667, + "grad_norm": 0.0, + "learning_rate": 1.5278962338764017e-05, + "loss": 1.2744, + "step": 11692 + }, + { + "epoch": 0.3433260907863057, + "grad_norm": 0.0, + "learning_rate": 1.5278154655819454e-05, + "loss": 1.377, + "step": 11693 + }, + { + "epoch": 0.34335545246344473, + "grad_norm": 0.0, + "learning_rate": 1.527734692514319e-05, + "loss": 1.5098, + "step": 11694 + }, + { + "epoch": 0.3433848141405837, + "grad_norm": 0.0, + "learning_rate": 1.5276539146742527e-05, + "loss": 1.4316, + "step": 11695 + }, + { + "epoch": 0.3434141758177227, + "grad_norm": 0.0, + "learning_rate": 1.527573132062477e-05, + "loss": 1.208, + "step": 11696 + }, + { + "epoch": 0.34344353749486173, + "grad_norm": 0.0, + "learning_rate": 1.5274923446797223e-05, + "loss": 1.4502, + "step": 11697 + }, + { + "epoch": 0.3434728991720007, + "grad_norm": 0.0, + "learning_rate": 1.5274115525267192e-05, + "loss": 1.4346, + "step": 11698 + }, + { + "epoch": 0.3435022608491397, + "grad_norm": 0.0, + "learning_rate": 1.5273307556041982e-05, + "loss": 1.3271, + "step": 11699 + }, + { + "epoch": 0.3435316225262787, + "grad_norm": 0.0, + "learning_rate": 1.5272499539128902e-05, + "loss": 1.3564, + "step": 11700 + }, + { + "epoch": 0.3435609842034177, + "grad_norm": 0.0, + "learning_rate": 1.527169147453526e-05, + "loss": 1.1997, + "step": 11701 + }, + { + "epoch": 0.3435903458805567, + "grad_norm": 0.0, + "learning_rate": 1.527088336226836e-05, + "loss": 1.314, + "step": 11702 + }, + { + "epoch": 0.3436197075576957, + "grad_norm": 0.0, + "learning_rate": 1.5270075202335513e-05, + "loss": 1.3633, + "step": 11703 + }, + { + "epoch": 0.3436490692348347, + "grad_norm": 0.0, + "learning_rate": 1.5269266994744024e-05, + "loss": 1.334, + "step": 11704 + }, + { + "epoch": 0.3436784309119737, + "grad_norm": 0.0, + "learning_rate": 1.526845873950121e-05, + "loss": 1.4307, + "step": 11705 + }, + { + "epoch": 0.3437077925891127, + "grad_norm": 0.0, + "learning_rate": 1.526765043661437e-05, + "loss": 1.4395, + "step": 11706 + }, + { + "epoch": 0.3437371542662517, + "grad_norm": 0.0, + "learning_rate": 1.5266842086090815e-05, + "loss": 1.3818, + "step": 11707 + }, + { + "epoch": 0.3437665159433907, + "grad_norm": 0.0, + "learning_rate": 1.5266033687937865e-05, + "loss": 1.4404, + "step": 11708 + }, + { + "epoch": 0.3437958776205297, + "grad_norm": 0.0, + "learning_rate": 1.526522524216282e-05, + "loss": 1.3428, + "step": 11709 + }, + { + "epoch": 0.34382523929766867, + "grad_norm": 0.0, + "learning_rate": 1.5264416748772998e-05, + "loss": 1.4395, + "step": 11710 + }, + { + "epoch": 0.3438546009748077, + "grad_norm": 0.0, + "learning_rate": 1.52636082077757e-05, + "loss": 1.4473, + "step": 11711 + }, + { + "epoch": 0.3438839626519467, + "grad_norm": 0.0, + "learning_rate": 1.5262799619178248e-05, + "loss": 1.4375, + "step": 11712 + }, + { + "epoch": 0.34391332432908567, + "grad_norm": 0.0, + "learning_rate": 1.5261990982987956e-05, + "loss": 1.3633, + "step": 11713 + }, + { + "epoch": 0.3439426860062247, + "grad_norm": 0.0, + "learning_rate": 1.526118229921213e-05, + "loss": 1.457, + "step": 11714 + }, + { + "epoch": 0.34397204768336365, + "grad_norm": 0.0, + "learning_rate": 1.5260373567858077e-05, + "loss": 1.3838, + "step": 11715 + }, + { + "epoch": 0.34400140936050266, + "grad_norm": 0.0, + "learning_rate": 1.5259564788933125e-05, + "loss": 1.3442, + "step": 11716 + }, + { + "epoch": 0.3440307710376417, + "grad_norm": 0.0, + "learning_rate": 1.5258755962444585e-05, + "loss": 1.3726, + "step": 11717 + }, + { + "epoch": 0.34406013271478064, + "grad_norm": 0.0, + "learning_rate": 1.5257947088399765e-05, + "loss": 1.3555, + "step": 11718 + }, + { + "epoch": 0.34408949439191966, + "grad_norm": 0.0, + "learning_rate": 1.5257138166805982e-05, + "loss": 1.3604, + "step": 11719 + }, + { + "epoch": 0.3441188560690587, + "grad_norm": 0.0, + "learning_rate": 1.5256329197670552e-05, + "loss": 1.4004, + "step": 11720 + }, + { + "epoch": 0.34414821774619764, + "grad_norm": 0.0, + "learning_rate": 1.5255520181000791e-05, + "loss": 1.3418, + "step": 11721 + }, + { + "epoch": 0.34417757942333665, + "grad_norm": 0.0, + "learning_rate": 1.5254711116804017e-05, + "loss": 1.3174, + "step": 11722 + }, + { + "epoch": 0.34420694110047567, + "grad_norm": 0.0, + "learning_rate": 1.5253902005087543e-05, + "loss": 1.293, + "step": 11723 + }, + { + "epoch": 0.34423630277761463, + "grad_norm": 0.0, + "learning_rate": 1.5253092845858688e-05, + "loss": 1.4863, + "step": 11724 + }, + { + "epoch": 0.34426566445475365, + "grad_norm": 0.0, + "learning_rate": 1.525228363912477e-05, + "loss": 1.3193, + "step": 11725 + }, + { + "epoch": 0.34429502613189267, + "grad_norm": 0.0, + "learning_rate": 1.5251474384893103e-05, + "loss": 1.3516, + "step": 11726 + }, + { + "epoch": 0.34432438780903163, + "grad_norm": 0.0, + "learning_rate": 1.5250665083171008e-05, + "loss": 1.4355, + "step": 11727 + }, + { + "epoch": 0.34435374948617065, + "grad_norm": 0.0, + "learning_rate": 1.5249855733965809e-05, + "loss": 1.4453, + "step": 11728 + }, + { + "epoch": 0.34438311116330966, + "grad_norm": 0.0, + "learning_rate": 1.5249046337284816e-05, + "loss": 1.5176, + "step": 11729 + }, + { + "epoch": 0.3444124728404486, + "grad_norm": 0.0, + "learning_rate": 1.5248236893135356e-05, + "loss": 1.3672, + "step": 11730 + }, + { + "epoch": 0.34444183451758764, + "grad_norm": 0.0, + "learning_rate": 1.5247427401524742e-05, + "loss": 1.4658, + "step": 11731 + }, + { + "epoch": 0.34447119619472666, + "grad_norm": 0.0, + "learning_rate": 1.5246617862460298e-05, + "loss": 1.2539, + "step": 11732 + }, + { + "epoch": 0.3445005578718656, + "grad_norm": 0.0, + "learning_rate": 1.5245808275949347e-05, + "loss": 1.4785, + "step": 11733 + }, + { + "epoch": 0.34452991954900464, + "grad_norm": 0.0, + "learning_rate": 1.5244998641999209e-05, + "loss": 1.4629, + "step": 11734 + }, + { + "epoch": 0.34455928122614365, + "grad_norm": 0.0, + "learning_rate": 1.5244188960617204e-05, + "loss": 1.2744, + "step": 11735 + }, + { + "epoch": 0.3445886429032826, + "grad_norm": 0.0, + "learning_rate": 1.5243379231810654e-05, + "loss": 1.3623, + "step": 11736 + }, + { + "epoch": 0.34461800458042163, + "grad_norm": 0.0, + "learning_rate": 1.5242569455586885e-05, + "loss": 1.4375, + "step": 11737 + }, + { + "epoch": 0.34464736625756065, + "grad_norm": 0.0, + "learning_rate": 1.5241759631953216e-05, + "loss": 1.5029, + "step": 11738 + }, + { + "epoch": 0.3446767279346996, + "grad_norm": 0.0, + "learning_rate": 1.5240949760916972e-05, + "loss": 1.3604, + "step": 11739 + }, + { + "epoch": 0.34470608961183863, + "grad_norm": 0.0, + "learning_rate": 1.5240139842485478e-05, + "loss": 1.415, + "step": 11740 + }, + { + "epoch": 0.34473545128897765, + "grad_norm": 0.0, + "learning_rate": 1.5239329876666059e-05, + "loss": 1.4316, + "step": 11741 + }, + { + "epoch": 0.3447648129661166, + "grad_norm": 0.0, + "learning_rate": 1.5238519863466037e-05, + "loss": 1.3018, + "step": 11742 + }, + { + "epoch": 0.3447941746432556, + "grad_norm": 0.0, + "learning_rate": 1.5237709802892735e-05, + "loss": 1.4512, + "step": 11743 + }, + { + "epoch": 0.34482353632039464, + "grad_norm": 0.0, + "learning_rate": 1.5236899694953484e-05, + "loss": 1.4355, + "step": 11744 + }, + { + "epoch": 0.3448528979975336, + "grad_norm": 0.0, + "learning_rate": 1.523608953965561e-05, + "loss": 1.2812, + "step": 11745 + }, + { + "epoch": 0.3448822596746726, + "grad_norm": 0.0, + "learning_rate": 1.5235279337006434e-05, + "loss": 1.3945, + "step": 11746 + }, + { + "epoch": 0.34491162135181164, + "grad_norm": 0.0, + "learning_rate": 1.523446908701329e-05, + "loss": 1.4453, + "step": 11747 + }, + { + "epoch": 0.3449409830289506, + "grad_norm": 0.0, + "learning_rate": 1.5233658789683499e-05, + "loss": 1.4443, + "step": 11748 + }, + { + "epoch": 0.3449703447060896, + "grad_norm": 0.0, + "learning_rate": 1.523284844502439e-05, + "loss": 1.3926, + "step": 11749 + }, + { + "epoch": 0.34499970638322863, + "grad_norm": 0.0, + "learning_rate": 1.5232038053043297e-05, + "loss": 1.4395, + "step": 11750 + }, + { + "epoch": 0.3450290680603676, + "grad_norm": 0.0, + "learning_rate": 1.5231227613747539e-05, + "loss": 1.4307, + "step": 11751 + }, + { + "epoch": 0.3450584297375066, + "grad_norm": 0.0, + "learning_rate": 1.523041712714445e-05, + "loss": 1.4141, + "step": 11752 + }, + { + "epoch": 0.34508779141464563, + "grad_norm": 0.0, + "learning_rate": 1.5229606593241362e-05, + "loss": 1.3994, + "step": 11753 + }, + { + "epoch": 0.3451171530917846, + "grad_norm": 0.0, + "learning_rate": 1.5228796012045601e-05, + "loss": 1.4492, + "step": 11754 + }, + { + "epoch": 0.3451465147689236, + "grad_norm": 0.0, + "learning_rate": 1.52279853835645e-05, + "loss": 1.335, + "step": 11755 + }, + { + "epoch": 0.3451758764460626, + "grad_norm": 0.0, + "learning_rate": 1.5227174707805386e-05, + "loss": 1.4766, + "step": 11756 + }, + { + "epoch": 0.3452052381232016, + "grad_norm": 0.0, + "learning_rate": 1.5226363984775594e-05, + "loss": 1.3291, + "step": 11757 + }, + { + "epoch": 0.3452345998003406, + "grad_norm": 0.0, + "learning_rate": 1.5225553214482456e-05, + "loss": 1.3584, + "step": 11758 + }, + { + "epoch": 0.3452639614774796, + "grad_norm": 0.0, + "learning_rate": 1.5224742396933298e-05, + "loss": 1.3643, + "step": 11759 + }, + { + "epoch": 0.3452933231546186, + "grad_norm": 0.0, + "learning_rate": 1.5223931532135459e-05, + "loss": 1.3896, + "step": 11760 + }, + { + "epoch": 0.3453226848317576, + "grad_norm": 0.0, + "learning_rate": 1.5223120620096268e-05, + "loss": 1.2627, + "step": 11761 + }, + { + "epoch": 0.3453520465088966, + "grad_norm": 0.0, + "learning_rate": 1.522230966082306e-05, + "loss": 1.4336, + "step": 11762 + }, + { + "epoch": 0.3453814081860356, + "grad_norm": 0.0, + "learning_rate": 1.5221498654323168e-05, + "loss": 1.4023, + "step": 11763 + }, + { + "epoch": 0.3454107698631746, + "grad_norm": 0.0, + "learning_rate": 1.5220687600603925e-05, + "loss": 1.3975, + "step": 11764 + }, + { + "epoch": 0.34544013154031356, + "grad_norm": 0.0, + "learning_rate": 1.521987649967267e-05, + "loss": 1.459, + "step": 11765 + }, + { + "epoch": 0.3454694932174526, + "grad_norm": 0.0, + "learning_rate": 1.5219065351536734e-05, + "loss": 1.2651, + "step": 11766 + }, + { + "epoch": 0.3454988548945916, + "grad_norm": 0.0, + "learning_rate": 1.5218254156203454e-05, + "loss": 1.4033, + "step": 11767 + }, + { + "epoch": 0.34552821657173055, + "grad_norm": 0.0, + "learning_rate": 1.5217442913680162e-05, + "loss": 1.4277, + "step": 11768 + }, + { + "epoch": 0.34555757824886957, + "grad_norm": 0.0, + "learning_rate": 1.52166316239742e-05, + "loss": 1.3105, + "step": 11769 + }, + { + "epoch": 0.3455869399260086, + "grad_norm": 0.0, + "learning_rate": 1.5215820287092904e-05, + "loss": 1.3447, + "step": 11770 + }, + { + "epoch": 0.34561630160314755, + "grad_norm": 0.0, + "learning_rate": 1.5215008903043607e-05, + "loss": 1.3027, + "step": 11771 + }, + { + "epoch": 0.34564566328028656, + "grad_norm": 0.0, + "learning_rate": 1.521419747183365e-05, + "loss": 1.3301, + "step": 11772 + }, + { + "epoch": 0.3456750249574256, + "grad_norm": 0.0, + "learning_rate": 1.5213385993470366e-05, + "loss": 1.4893, + "step": 11773 + }, + { + "epoch": 0.34570438663456454, + "grad_norm": 0.0, + "learning_rate": 1.5212574467961103e-05, + "loss": 1.3867, + "step": 11774 + }, + { + "epoch": 0.34573374831170356, + "grad_norm": 0.0, + "learning_rate": 1.5211762895313192e-05, + "loss": 1.3301, + "step": 11775 + }, + { + "epoch": 0.3457631099888426, + "grad_norm": 0.0, + "learning_rate": 1.5210951275533971e-05, + "loss": 1.4824, + "step": 11776 + }, + { + "epoch": 0.34579247166598154, + "grad_norm": 0.0, + "learning_rate": 1.5210139608630788e-05, + "loss": 1.4023, + "step": 11777 + }, + { + "epoch": 0.34582183334312055, + "grad_norm": 0.0, + "learning_rate": 1.5209327894610975e-05, + "loss": 1.498, + "step": 11778 + }, + { + "epoch": 0.34585119502025957, + "grad_norm": 0.0, + "learning_rate": 1.5208516133481878e-05, + "loss": 1.1421, + "step": 11779 + }, + { + "epoch": 0.34588055669739853, + "grad_norm": 0.0, + "learning_rate": 1.5207704325250834e-05, + "loss": 1.4336, + "step": 11780 + }, + { + "epoch": 0.34590991837453755, + "grad_norm": 0.0, + "learning_rate": 1.5206892469925185e-05, + "loss": 1.5273, + "step": 11781 + }, + { + "epoch": 0.34593928005167657, + "grad_norm": 0.0, + "learning_rate": 1.5206080567512278e-05, + "loss": 1.5088, + "step": 11782 + }, + { + "epoch": 0.34596864172881553, + "grad_norm": 0.0, + "learning_rate": 1.5205268618019449e-05, + "loss": 1.5303, + "step": 11783 + }, + { + "epoch": 0.34599800340595455, + "grad_norm": 0.0, + "learning_rate": 1.5204456621454041e-05, + "loss": 1.3105, + "step": 11784 + }, + { + "epoch": 0.34602736508309356, + "grad_norm": 0.0, + "learning_rate": 1.52036445778234e-05, + "loss": 1.3066, + "step": 11785 + }, + { + "epoch": 0.3460567267602325, + "grad_norm": 0.0, + "learning_rate": 1.5202832487134869e-05, + "loss": 1.3623, + "step": 11786 + }, + { + "epoch": 0.34608608843737154, + "grad_norm": 0.0, + "learning_rate": 1.5202020349395793e-05, + "loss": 1.3877, + "step": 11787 + }, + { + "epoch": 0.34611545011451056, + "grad_norm": 0.0, + "learning_rate": 1.5201208164613513e-05, + "loss": 1.3135, + "step": 11788 + }, + { + "epoch": 0.3461448117916495, + "grad_norm": 0.0, + "learning_rate": 1.5200395932795373e-05, + "loss": 1.3516, + "step": 11789 + }, + { + "epoch": 0.34617417346878854, + "grad_norm": 0.0, + "learning_rate": 1.5199583653948722e-05, + "loss": 1.3457, + "step": 11790 + }, + { + "epoch": 0.34620353514592755, + "grad_norm": 0.0, + "learning_rate": 1.5198771328080907e-05, + "loss": 1.4111, + "step": 11791 + }, + { + "epoch": 0.3462328968230665, + "grad_norm": 0.0, + "learning_rate": 1.519795895519927e-05, + "loss": 1.4443, + "step": 11792 + }, + { + "epoch": 0.34626225850020553, + "grad_norm": 0.0, + "learning_rate": 1.519714653531116e-05, + "loss": 1.4482, + "step": 11793 + }, + { + "epoch": 0.34629162017734455, + "grad_norm": 0.0, + "learning_rate": 1.5196334068423921e-05, + "loss": 1.4727, + "step": 11794 + }, + { + "epoch": 0.3463209818544835, + "grad_norm": 0.0, + "learning_rate": 1.5195521554544903e-05, + "loss": 1.3643, + "step": 11795 + }, + { + "epoch": 0.34635034353162253, + "grad_norm": 0.0, + "learning_rate": 1.5194708993681453e-05, + "loss": 1.332, + "step": 11796 + }, + { + "epoch": 0.34637970520876155, + "grad_norm": 0.0, + "learning_rate": 1.519389638584092e-05, + "loss": 1.3369, + "step": 11797 + }, + { + "epoch": 0.3464090668859005, + "grad_norm": 0.0, + "learning_rate": 1.519308373103065e-05, + "loss": 1.3818, + "step": 11798 + }, + { + "epoch": 0.3464384285630395, + "grad_norm": 0.0, + "learning_rate": 1.5192271029257996e-05, + "loss": 1.4121, + "step": 11799 + }, + { + "epoch": 0.34646779024017854, + "grad_norm": 0.0, + "learning_rate": 1.5191458280530302e-05, + "loss": 1.4111, + "step": 11800 + }, + { + "epoch": 0.3464971519173175, + "grad_norm": 0.0, + "learning_rate": 1.5190645484854925e-05, + "loss": 1.4141, + "step": 11801 + }, + { + "epoch": 0.3465265135944565, + "grad_norm": 0.0, + "learning_rate": 1.518983264223921e-05, + "loss": 1.2812, + "step": 11802 + }, + { + "epoch": 0.34655587527159554, + "grad_norm": 0.0, + "learning_rate": 1.518901975269051e-05, + "loss": 1.3906, + "step": 11803 + }, + { + "epoch": 0.3465852369487345, + "grad_norm": 0.0, + "learning_rate": 1.5188206816216174e-05, + "loss": 1.3389, + "step": 11804 + }, + { + "epoch": 0.3466145986258735, + "grad_norm": 0.0, + "learning_rate": 1.5187393832823553e-05, + "loss": 1.2812, + "step": 11805 + }, + { + "epoch": 0.34664396030301253, + "grad_norm": 0.0, + "learning_rate": 1.5186580802520005e-05, + "loss": 1.4941, + "step": 11806 + }, + { + "epoch": 0.3466733219801515, + "grad_norm": 0.0, + "learning_rate": 1.5185767725312879e-05, + "loss": 1.3096, + "step": 11807 + }, + { + "epoch": 0.3467026836572905, + "grad_norm": 0.0, + "learning_rate": 1.5184954601209527e-05, + "loss": 1.416, + "step": 11808 + }, + { + "epoch": 0.34673204533442953, + "grad_norm": 0.0, + "learning_rate": 1.51841414302173e-05, + "loss": 1.4072, + "step": 11809 + }, + { + "epoch": 0.3467614070115685, + "grad_norm": 0.0, + "learning_rate": 1.5183328212343553e-05, + "loss": 1.3545, + "step": 11810 + }, + { + "epoch": 0.3467907686887075, + "grad_norm": 0.0, + "learning_rate": 1.5182514947595647e-05, + "loss": 1.4375, + "step": 11811 + }, + { + "epoch": 0.3468201303658465, + "grad_norm": 0.0, + "learning_rate": 1.5181701635980928e-05, + "loss": 1.3989, + "step": 11812 + }, + { + "epoch": 0.3468494920429855, + "grad_norm": 0.0, + "learning_rate": 1.5180888277506753e-05, + "loss": 1.335, + "step": 11813 + }, + { + "epoch": 0.3468788537201245, + "grad_norm": 0.0, + "learning_rate": 1.5180074872180479e-05, + "loss": 1.5234, + "step": 11814 + }, + { + "epoch": 0.34690821539726346, + "grad_norm": 0.0, + "learning_rate": 1.5179261420009463e-05, + "loss": 1.4219, + "step": 11815 + }, + { + "epoch": 0.3469375770744025, + "grad_norm": 0.0, + "learning_rate": 1.517844792100106e-05, + "loss": 1.3662, + "step": 11816 + }, + { + "epoch": 0.3469669387515415, + "grad_norm": 0.0, + "learning_rate": 1.5177634375162624e-05, + "loss": 1.5117, + "step": 11817 + }, + { + "epoch": 0.34699630042868046, + "grad_norm": 0.0, + "learning_rate": 1.5176820782501515e-05, + "loss": 1.4004, + "step": 11818 + }, + { + "epoch": 0.3470256621058195, + "grad_norm": 0.0, + "learning_rate": 1.5176007143025089e-05, + "loss": 1.4854, + "step": 11819 + }, + { + "epoch": 0.3470550237829585, + "grad_norm": 0.0, + "learning_rate": 1.5175193456740707e-05, + "loss": 1.1836, + "step": 11820 + }, + { + "epoch": 0.34708438546009746, + "grad_norm": 0.0, + "learning_rate": 1.5174379723655723e-05, + "loss": 1.5107, + "step": 11821 + }, + { + "epoch": 0.3471137471372365, + "grad_norm": 0.0, + "learning_rate": 1.5173565943777497e-05, + "loss": 1.4541, + "step": 11822 + }, + { + "epoch": 0.3471431088143755, + "grad_norm": 0.0, + "learning_rate": 1.5172752117113394e-05, + "loss": 1.3408, + "step": 11823 + }, + { + "epoch": 0.34717247049151445, + "grad_norm": 0.0, + "learning_rate": 1.5171938243670766e-05, + "loss": 1.5195, + "step": 11824 + }, + { + "epoch": 0.34720183216865347, + "grad_norm": 0.0, + "learning_rate": 1.5171124323456977e-05, + "loss": 1.4482, + "step": 11825 + }, + { + "epoch": 0.3472311938457925, + "grad_norm": 0.0, + "learning_rate": 1.5170310356479383e-05, + "loss": 1.3965, + "step": 11826 + }, + { + "epoch": 0.34726055552293145, + "grad_norm": 0.0, + "learning_rate": 1.5169496342745352e-05, + "loss": 1.4102, + "step": 11827 + }, + { + "epoch": 0.34728991720007046, + "grad_norm": 0.0, + "learning_rate": 1.5168682282262239e-05, + "loss": 1.4141, + "step": 11828 + }, + { + "epoch": 0.3473192788772095, + "grad_norm": 0.0, + "learning_rate": 1.516786817503741e-05, + "loss": 1.2773, + "step": 11829 + }, + { + "epoch": 0.34734864055434844, + "grad_norm": 0.0, + "learning_rate": 1.5167054021078227e-05, + "loss": 1.4561, + "step": 11830 + }, + { + "epoch": 0.34737800223148746, + "grad_norm": 0.0, + "learning_rate": 1.5166239820392047e-05, + "loss": 1.2705, + "step": 11831 + }, + { + "epoch": 0.3474073639086265, + "grad_norm": 0.0, + "learning_rate": 1.516542557298624e-05, + "loss": 1.4092, + "step": 11832 + }, + { + "epoch": 0.34743672558576544, + "grad_norm": 0.0, + "learning_rate": 1.5164611278868167e-05, + "loss": 1.2129, + "step": 11833 + }, + { + "epoch": 0.34746608726290446, + "grad_norm": 0.0, + "learning_rate": 1.516379693804519e-05, + "loss": 1.3916, + "step": 11834 + }, + { + "epoch": 0.34749544894004347, + "grad_norm": 0.0, + "learning_rate": 1.5162982550524676e-05, + "loss": 1.3242, + "step": 11835 + }, + { + "epoch": 0.34752481061718243, + "grad_norm": 0.0, + "learning_rate": 1.5162168116313991e-05, + "loss": 1.3906, + "step": 11836 + }, + { + "epoch": 0.34755417229432145, + "grad_norm": 0.0, + "learning_rate": 1.5161353635420493e-05, + "loss": 1.2471, + "step": 11837 + }, + { + "epoch": 0.34758353397146047, + "grad_norm": 0.0, + "learning_rate": 1.5160539107851553e-05, + "loss": 1.4541, + "step": 11838 + }, + { + "epoch": 0.34761289564859943, + "grad_norm": 0.0, + "learning_rate": 1.5159724533614537e-05, + "loss": 1.3643, + "step": 11839 + }, + { + "epoch": 0.34764225732573845, + "grad_norm": 0.0, + "learning_rate": 1.5158909912716813e-05, + "loss": 1.3496, + "step": 11840 + }, + { + "epoch": 0.34767161900287746, + "grad_norm": 0.0, + "learning_rate": 1.5158095245165743e-05, + "loss": 1.4043, + "step": 11841 + }, + { + "epoch": 0.3477009806800164, + "grad_norm": 0.0, + "learning_rate": 1.5157280530968695e-05, + "loss": 1.4316, + "step": 11842 + }, + { + "epoch": 0.34773034235715544, + "grad_norm": 0.0, + "learning_rate": 1.5156465770133042e-05, + "loss": 1.2979, + "step": 11843 + }, + { + "epoch": 0.34775970403429446, + "grad_norm": 0.0, + "learning_rate": 1.5155650962666146e-05, + "loss": 1.4424, + "step": 11844 + }, + { + "epoch": 0.3477890657114334, + "grad_norm": 0.0, + "learning_rate": 1.515483610857538e-05, + "loss": 1.2881, + "step": 11845 + }, + { + "epoch": 0.34781842738857244, + "grad_norm": 0.0, + "learning_rate": 1.5154021207868108e-05, + "loss": 1.2939, + "step": 11846 + }, + { + "epoch": 0.34784778906571145, + "grad_norm": 0.0, + "learning_rate": 1.5153206260551704e-05, + "loss": 1.2305, + "step": 11847 + }, + { + "epoch": 0.3478771507428504, + "grad_norm": 0.0, + "learning_rate": 1.5152391266633539e-05, + "loss": 1.2002, + "step": 11848 + }, + { + "epoch": 0.34790651241998943, + "grad_norm": 0.0, + "learning_rate": 1.5151576226120976e-05, + "loss": 1.3682, + "step": 11849 + }, + { + "epoch": 0.34793587409712845, + "grad_norm": 0.0, + "learning_rate": 1.515076113902139e-05, + "loss": 1.4404, + "step": 11850 + }, + { + "epoch": 0.3479652357742674, + "grad_norm": 0.0, + "learning_rate": 1.5149946005342153e-05, + "loss": 1.4424, + "step": 11851 + }, + { + "epoch": 0.34799459745140643, + "grad_norm": 0.0, + "learning_rate": 1.5149130825090636e-05, + "loss": 1.4043, + "step": 11852 + }, + { + "epoch": 0.34802395912854545, + "grad_norm": 0.0, + "learning_rate": 1.514831559827421e-05, + "loss": 1.3281, + "step": 11853 + }, + { + "epoch": 0.3480533208056844, + "grad_norm": 0.0, + "learning_rate": 1.5147500324900246e-05, + "loss": 1.4424, + "step": 11854 + }, + { + "epoch": 0.3480826824828234, + "grad_norm": 0.0, + "learning_rate": 1.5146685004976118e-05, + "loss": 1.4736, + "step": 11855 + }, + { + "epoch": 0.34811204415996244, + "grad_norm": 0.0, + "learning_rate": 1.5145869638509201e-05, + "loss": 1.3223, + "step": 11856 + }, + { + "epoch": 0.3481414058371014, + "grad_norm": 0.0, + "learning_rate": 1.5145054225506868e-05, + "loss": 1.4385, + "step": 11857 + }, + { + "epoch": 0.3481707675142404, + "grad_norm": 0.0, + "learning_rate": 1.514423876597649e-05, + "loss": 1.4531, + "step": 11858 + }, + { + "epoch": 0.34820012919137944, + "grad_norm": 0.0, + "learning_rate": 1.5143423259925446e-05, + "loss": 1.3291, + "step": 11859 + }, + { + "epoch": 0.3482294908685184, + "grad_norm": 0.0, + "learning_rate": 1.5142607707361108e-05, + "loss": 1.2451, + "step": 11860 + }, + { + "epoch": 0.3482588525456574, + "grad_norm": 0.0, + "learning_rate": 1.514179210829085e-05, + "loss": 1.3369, + "step": 11861 + }, + { + "epoch": 0.34828821422279643, + "grad_norm": 0.0, + "learning_rate": 1.5140976462722046e-05, + "loss": 1.4287, + "step": 11862 + }, + { + "epoch": 0.3483175758999354, + "grad_norm": 0.0, + "learning_rate": 1.5140160770662078e-05, + "loss": 1.4268, + "step": 11863 + }, + { + "epoch": 0.3483469375770744, + "grad_norm": 0.0, + "learning_rate": 1.5139345032118323e-05, + "loss": 1.3281, + "step": 11864 + }, + { + "epoch": 0.3483762992542134, + "grad_norm": 0.0, + "learning_rate": 1.5138529247098149e-05, + "loss": 1.3428, + "step": 11865 + }, + { + "epoch": 0.3484056609313524, + "grad_norm": 0.0, + "learning_rate": 1.5137713415608941e-05, + "loss": 1.292, + "step": 11866 + }, + { + "epoch": 0.3484350226084914, + "grad_norm": 0.0, + "learning_rate": 1.5136897537658076e-05, + "loss": 1.3252, + "step": 11867 + }, + { + "epoch": 0.34846438428563037, + "grad_norm": 0.0, + "learning_rate": 1.5136081613252928e-05, + "loss": 1.4355, + "step": 11868 + }, + { + "epoch": 0.3484937459627694, + "grad_norm": 0.0, + "learning_rate": 1.5135265642400883e-05, + "loss": 1.3413, + "step": 11869 + }, + { + "epoch": 0.3485231076399084, + "grad_norm": 0.0, + "learning_rate": 1.5134449625109313e-05, + "loss": 1.418, + "step": 11870 + }, + { + "epoch": 0.34855246931704736, + "grad_norm": 0.0, + "learning_rate": 1.5133633561385601e-05, + "loss": 1.501, + "step": 11871 + }, + { + "epoch": 0.3485818309941864, + "grad_norm": 0.0, + "learning_rate": 1.5132817451237128e-05, + "loss": 1.4678, + "step": 11872 + }, + { + "epoch": 0.3486111926713254, + "grad_norm": 0.0, + "learning_rate": 1.5132001294671268e-05, + "loss": 1.3662, + "step": 11873 + }, + { + "epoch": 0.34864055434846436, + "grad_norm": 0.0, + "learning_rate": 1.5131185091695408e-05, + "loss": 1.3984, + "step": 11874 + }, + { + "epoch": 0.3486699160256034, + "grad_norm": 0.0, + "learning_rate": 1.5130368842316928e-05, + "loss": 1.3496, + "step": 11875 + }, + { + "epoch": 0.3486992777027424, + "grad_norm": 0.0, + "learning_rate": 1.5129552546543207e-05, + "loss": 1.2617, + "step": 11876 + }, + { + "epoch": 0.34872863937988136, + "grad_norm": 0.0, + "learning_rate": 1.5128736204381632e-05, + "loss": 1.4277, + "step": 11877 + }, + { + "epoch": 0.3487580010570204, + "grad_norm": 0.0, + "learning_rate": 1.5127919815839578e-05, + "loss": 1.4082, + "step": 11878 + }, + { + "epoch": 0.3487873627341594, + "grad_norm": 0.0, + "learning_rate": 1.5127103380924431e-05, + "loss": 1.3779, + "step": 11879 + }, + { + "epoch": 0.34881672441129835, + "grad_norm": 0.0, + "learning_rate": 1.5126286899643576e-05, + "loss": 1.293, + "step": 11880 + }, + { + "epoch": 0.34884608608843737, + "grad_norm": 0.0, + "learning_rate": 1.5125470372004398e-05, + "loss": 1.4404, + "step": 11881 + }, + { + "epoch": 0.3488754477655764, + "grad_norm": 0.0, + "learning_rate": 1.5124653798014277e-05, + "loss": 1.332, + "step": 11882 + }, + { + "epoch": 0.34890480944271535, + "grad_norm": 0.0, + "learning_rate": 1.51238371776806e-05, + "loss": 1.3857, + "step": 11883 + }, + { + "epoch": 0.34893417111985436, + "grad_norm": 0.0, + "learning_rate": 1.512302051101075e-05, + "loss": 1.4648, + "step": 11884 + }, + { + "epoch": 0.3489635327969934, + "grad_norm": 0.0, + "learning_rate": 1.5122203798012115e-05, + "loss": 1.2749, + "step": 11885 + }, + { + "epoch": 0.34899289447413234, + "grad_norm": 0.0, + "learning_rate": 1.512138703869208e-05, + "loss": 1.3223, + "step": 11886 + }, + { + "epoch": 0.34902225615127136, + "grad_norm": 0.0, + "learning_rate": 1.5120570233058029e-05, + "loss": 1.4375, + "step": 11887 + }, + { + "epoch": 0.3490516178284104, + "grad_norm": 0.0, + "learning_rate": 1.511975338111735e-05, + "loss": 1.3008, + "step": 11888 + }, + { + "epoch": 0.34908097950554934, + "grad_norm": 0.0, + "learning_rate": 1.511893648287743e-05, + "loss": 1.2202, + "step": 11889 + }, + { + "epoch": 0.34911034118268836, + "grad_norm": 0.0, + "learning_rate": 1.5118119538345658e-05, + "loss": 1.3271, + "step": 11890 + }, + { + "epoch": 0.34913970285982737, + "grad_norm": 0.0, + "learning_rate": 1.5117302547529418e-05, + "loss": 1.458, + "step": 11891 + }, + { + "epoch": 0.34916906453696633, + "grad_norm": 0.0, + "learning_rate": 1.51164855104361e-05, + "loss": 1.4414, + "step": 11892 + }, + { + "epoch": 0.34919842621410535, + "grad_norm": 0.0, + "learning_rate": 1.5115668427073096e-05, + "loss": 1.4873, + "step": 11893 + }, + { + "epoch": 0.34922778789124437, + "grad_norm": 0.0, + "learning_rate": 1.511485129744779e-05, + "loss": 1.5439, + "step": 11894 + }, + { + "epoch": 0.34925714956838333, + "grad_norm": 0.0, + "learning_rate": 1.5114034121567576e-05, + "loss": 1.4131, + "step": 11895 + }, + { + "epoch": 0.34928651124552235, + "grad_norm": 0.0, + "learning_rate": 1.511321689943984e-05, + "loss": 1.418, + "step": 11896 + }, + { + "epoch": 0.34931587292266136, + "grad_norm": 0.0, + "learning_rate": 1.5112399631071975e-05, + "loss": 1.4033, + "step": 11897 + }, + { + "epoch": 0.3493452345998003, + "grad_norm": 0.0, + "learning_rate": 1.5111582316471369e-05, + "loss": 1.2539, + "step": 11898 + }, + { + "epoch": 0.34937459627693934, + "grad_norm": 0.0, + "learning_rate": 1.5110764955645417e-05, + "loss": 1.3516, + "step": 11899 + }, + { + "epoch": 0.34940395795407836, + "grad_norm": 0.0, + "learning_rate": 1.5109947548601506e-05, + "loss": 1.3745, + "step": 11900 + }, + { + "epoch": 0.3494333196312173, + "grad_norm": 0.0, + "learning_rate": 1.5109130095347034e-05, + "loss": 1.3799, + "step": 11901 + }, + { + "epoch": 0.34946268130835634, + "grad_norm": 0.0, + "learning_rate": 1.5108312595889388e-05, + "loss": 1.4229, + "step": 11902 + }, + { + "epoch": 0.34949204298549535, + "grad_norm": 0.0, + "learning_rate": 1.510749505023596e-05, + "loss": 1.2871, + "step": 11903 + }, + { + "epoch": 0.3495214046626343, + "grad_norm": 0.0, + "learning_rate": 1.510667745839415e-05, + "loss": 1.3242, + "step": 11904 + }, + { + "epoch": 0.34955076633977333, + "grad_norm": 0.0, + "learning_rate": 1.5105859820371352e-05, + "loss": 1.3232, + "step": 11905 + }, + { + "epoch": 0.34958012801691235, + "grad_norm": 0.0, + "learning_rate": 1.510504213617495e-05, + "loss": 1.3018, + "step": 11906 + }, + { + "epoch": 0.3496094896940513, + "grad_norm": 0.0, + "learning_rate": 1.5104224405812344e-05, + "loss": 1.458, + "step": 11907 + }, + { + "epoch": 0.34963885137119033, + "grad_norm": 0.0, + "learning_rate": 1.5103406629290932e-05, + "loss": 1.4648, + "step": 11908 + }, + { + "epoch": 0.34966821304832935, + "grad_norm": 0.0, + "learning_rate": 1.5102588806618106e-05, + "loss": 1.4824, + "step": 11909 + }, + { + "epoch": 0.3496975747254683, + "grad_norm": 0.0, + "learning_rate": 1.5101770937801265e-05, + "loss": 1.3213, + "step": 11910 + }, + { + "epoch": 0.3497269364026073, + "grad_norm": 0.0, + "learning_rate": 1.5100953022847802e-05, + "loss": 1.4775, + "step": 11911 + }, + { + "epoch": 0.34975629807974634, + "grad_norm": 0.0, + "learning_rate": 1.5100135061765111e-05, + "loss": 1.2266, + "step": 11912 + }, + { + "epoch": 0.3497856597568853, + "grad_norm": 0.0, + "learning_rate": 1.5099317054560597e-05, + "loss": 1.4443, + "step": 11913 + }, + { + "epoch": 0.3498150214340243, + "grad_norm": 0.0, + "learning_rate": 1.5098499001241649e-05, + "loss": 1.3037, + "step": 11914 + }, + { + "epoch": 0.34984438311116334, + "grad_norm": 0.0, + "learning_rate": 1.5097680901815672e-05, + "loss": 1.2754, + "step": 11915 + }, + { + "epoch": 0.3498737447883023, + "grad_norm": 0.0, + "learning_rate": 1.5096862756290059e-05, + "loss": 1.3188, + "step": 11916 + }, + { + "epoch": 0.3499031064654413, + "grad_norm": 0.0, + "learning_rate": 1.509604456467221e-05, + "loss": 1.3281, + "step": 11917 + }, + { + "epoch": 0.3499324681425803, + "grad_norm": 0.0, + "learning_rate": 1.5095226326969527e-05, + "loss": 1.415, + "step": 11918 + }, + { + "epoch": 0.3499618298197193, + "grad_norm": 0.0, + "learning_rate": 1.5094408043189406e-05, + "loss": 1.3545, + "step": 11919 + }, + { + "epoch": 0.3499911914968583, + "grad_norm": 0.0, + "learning_rate": 1.509358971333925e-05, + "loss": 1.3164, + "step": 11920 + }, + { + "epoch": 0.3500205531739973, + "grad_norm": 0.0, + "learning_rate": 1.5092771337426455e-05, + "loss": 1.3242, + "step": 11921 + }, + { + "epoch": 0.3500499148511363, + "grad_norm": 0.0, + "learning_rate": 1.5091952915458428e-05, + "loss": 1.29, + "step": 11922 + }, + { + "epoch": 0.3500792765282753, + "grad_norm": 0.0, + "learning_rate": 1.5091134447442564e-05, + "loss": 1.2441, + "step": 11923 + }, + { + "epoch": 0.35010863820541427, + "grad_norm": 0.0, + "learning_rate": 1.5090315933386268e-05, + "loss": 1.4121, + "step": 11924 + }, + { + "epoch": 0.3501379998825533, + "grad_norm": 0.0, + "learning_rate": 1.5089497373296942e-05, + "loss": 1.3838, + "step": 11925 + }, + { + "epoch": 0.3501673615596923, + "grad_norm": 0.0, + "learning_rate": 1.508867876718199e-05, + "loss": 1.4912, + "step": 11926 + }, + { + "epoch": 0.35019672323683126, + "grad_norm": 0.0, + "learning_rate": 1.508786011504881e-05, + "loss": 1.3027, + "step": 11927 + }, + { + "epoch": 0.3502260849139703, + "grad_norm": 0.0, + "learning_rate": 1.5087041416904806e-05, + "loss": 1.3516, + "step": 11928 + }, + { + "epoch": 0.3502554465911093, + "grad_norm": 0.0, + "learning_rate": 1.5086222672757385e-05, + "loss": 1.3457, + "step": 11929 + }, + { + "epoch": 0.35028480826824826, + "grad_norm": 0.0, + "learning_rate": 1.5085403882613951e-05, + "loss": 1.4258, + "step": 11930 + }, + { + "epoch": 0.3503141699453873, + "grad_norm": 0.0, + "learning_rate": 1.5084585046481907e-05, + "loss": 1.3672, + "step": 11931 + }, + { + "epoch": 0.3503435316225263, + "grad_norm": 0.0, + "learning_rate": 1.508376616436866e-05, + "loss": 1.0967, + "step": 11932 + }, + { + "epoch": 0.35037289329966526, + "grad_norm": 0.0, + "learning_rate": 1.5082947236281609e-05, + "loss": 1.4209, + "step": 11933 + }, + { + "epoch": 0.3504022549768043, + "grad_norm": 0.0, + "learning_rate": 1.5082128262228169e-05, + "loss": 1.3154, + "step": 11934 + }, + { + "epoch": 0.3504316166539433, + "grad_norm": 0.0, + "learning_rate": 1.508130924221574e-05, + "loss": 1.2578, + "step": 11935 + }, + { + "epoch": 0.35046097833108225, + "grad_norm": 0.0, + "learning_rate": 1.508049017625173e-05, + "loss": 1.2476, + "step": 11936 + }, + { + "epoch": 0.35049034000822127, + "grad_norm": 0.0, + "learning_rate": 1.5079671064343546e-05, + "loss": 1.3398, + "step": 11937 + }, + { + "epoch": 0.3505197016853603, + "grad_norm": 0.0, + "learning_rate": 1.5078851906498595e-05, + "loss": 1.4434, + "step": 11938 + }, + { + "epoch": 0.35054906336249925, + "grad_norm": 0.0, + "learning_rate": 1.5078032702724286e-05, + "loss": 1.4385, + "step": 11939 + }, + { + "epoch": 0.35057842503963826, + "grad_norm": 0.0, + "learning_rate": 1.5077213453028026e-05, + "loss": 1.291, + "step": 11940 + }, + { + "epoch": 0.3506077867167773, + "grad_norm": 0.0, + "learning_rate": 1.5076394157417226e-05, + "loss": 1.3418, + "step": 11941 + }, + { + "epoch": 0.35063714839391624, + "grad_norm": 0.0, + "learning_rate": 1.5075574815899295e-05, + "loss": 1.3389, + "step": 11942 + }, + { + "epoch": 0.35066651007105526, + "grad_norm": 0.0, + "learning_rate": 1.5074755428481637e-05, + "loss": 1.4609, + "step": 11943 + }, + { + "epoch": 0.3506958717481943, + "grad_norm": 0.0, + "learning_rate": 1.507393599517167e-05, + "loss": 1.3672, + "step": 11944 + }, + { + "epoch": 0.35072523342533324, + "grad_norm": 0.0, + "learning_rate": 1.5073116515976796e-05, + "loss": 1.3438, + "step": 11945 + }, + { + "epoch": 0.35075459510247226, + "grad_norm": 0.0, + "learning_rate": 1.5072296990904431e-05, + "loss": 1.4756, + "step": 11946 + }, + { + "epoch": 0.3507839567796113, + "grad_norm": 0.0, + "learning_rate": 1.5071477419961991e-05, + "loss": 1.2979, + "step": 11947 + }, + { + "epoch": 0.35081331845675023, + "grad_norm": 0.0, + "learning_rate": 1.5070657803156876e-05, + "loss": 1.4092, + "step": 11948 + }, + { + "epoch": 0.35084268013388925, + "grad_norm": 0.0, + "learning_rate": 1.5069838140496504e-05, + "loss": 1.3984, + "step": 11949 + }, + { + "epoch": 0.35087204181102827, + "grad_norm": 0.0, + "learning_rate": 1.5069018431988292e-05, + "loss": 1.3364, + "step": 11950 + }, + { + "epoch": 0.35090140348816723, + "grad_norm": 0.0, + "learning_rate": 1.5068198677639643e-05, + "loss": 1.3037, + "step": 11951 + }, + { + "epoch": 0.35093076516530625, + "grad_norm": 0.0, + "learning_rate": 1.5067378877457977e-05, + "loss": 1.2563, + "step": 11952 + }, + { + "epoch": 0.35096012684244526, + "grad_norm": 0.0, + "learning_rate": 1.5066559031450705e-05, + "loss": 1.3477, + "step": 11953 + }, + { + "epoch": 0.3509894885195842, + "grad_norm": 0.0, + "learning_rate": 1.5065739139625244e-05, + "loss": 1.4062, + "step": 11954 + }, + { + "epoch": 0.35101885019672324, + "grad_norm": 0.0, + "learning_rate": 1.5064919201989008e-05, + "loss": 1.3926, + "step": 11955 + }, + { + "epoch": 0.35104821187386226, + "grad_norm": 0.0, + "learning_rate": 1.5064099218549409e-05, + "loss": 1.3223, + "step": 11956 + }, + { + "epoch": 0.3510775735510012, + "grad_norm": 0.0, + "learning_rate": 1.5063279189313862e-05, + "loss": 1.2891, + "step": 11957 + }, + { + "epoch": 0.35110693522814024, + "grad_norm": 0.0, + "learning_rate": 1.5062459114289784e-05, + "loss": 1.4697, + "step": 11958 + }, + { + "epoch": 0.35113629690527925, + "grad_norm": 0.0, + "learning_rate": 1.5061638993484595e-05, + "loss": 1.3994, + "step": 11959 + }, + { + "epoch": 0.3511656585824182, + "grad_norm": 0.0, + "learning_rate": 1.5060818826905705e-05, + "loss": 1.5283, + "step": 11960 + }, + { + "epoch": 0.35119502025955723, + "grad_norm": 0.0, + "learning_rate": 1.5059998614560538e-05, + "loss": 1.2617, + "step": 11961 + }, + { + "epoch": 0.35122438193669625, + "grad_norm": 0.0, + "learning_rate": 1.5059178356456507e-05, + "loss": 1.4004, + "step": 11962 + }, + { + "epoch": 0.3512537436138352, + "grad_norm": 0.0, + "learning_rate": 1.505835805260103e-05, + "loss": 1.2803, + "step": 11963 + }, + { + "epoch": 0.35128310529097423, + "grad_norm": 0.0, + "learning_rate": 1.5057537703001526e-05, + "loss": 1.3027, + "step": 11964 + }, + { + "epoch": 0.35131246696811325, + "grad_norm": 0.0, + "learning_rate": 1.505671730766541e-05, + "loss": 1.5518, + "step": 11965 + }, + { + "epoch": 0.3513418286452522, + "grad_norm": 0.0, + "learning_rate": 1.5055896866600106e-05, + "loss": 1.3218, + "step": 11966 + }, + { + "epoch": 0.3513711903223912, + "grad_norm": 0.0, + "learning_rate": 1.5055076379813035e-05, + "loss": 1.3936, + "step": 11967 + }, + { + "epoch": 0.3514005519995302, + "grad_norm": 0.0, + "learning_rate": 1.5054255847311607e-05, + "loss": 1.3906, + "step": 11968 + }, + { + "epoch": 0.3514299136766692, + "grad_norm": 0.0, + "learning_rate": 1.5053435269103253e-05, + "loss": 1.4033, + "step": 11969 + }, + { + "epoch": 0.3514592753538082, + "grad_norm": 0.0, + "learning_rate": 1.505261464519539e-05, + "loss": 1.3408, + "step": 11970 + }, + { + "epoch": 0.3514886370309472, + "grad_norm": 0.0, + "learning_rate": 1.5051793975595442e-05, + "loss": 1.3906, + "step": 11971 + }, + { + "epoch": 0.3515179987080862, + "grad_norm": 0.0, + "learning_rate": 1.5050973260310819e-05, + "loss": 1.4609, + "step": 11972 + }, + { + "epoch": 0.3515473603852252, + "grad_norm": 0.0, + "learning_rate": 1.5050152499348957e-05, + "loss": 1.457, + "step": 11973 + }, + { + "epoch": 0.3515767220623642, + "grad_norm": 0.0, + "learning_rate": 1.5049331692717273e-05, + "loss": 1.3623, + "step": 11974 + }, + { + "epoch": 0.3516060837395032, + "grad_norm": 0.0, + "learning_rate": 1.5048510840423189e-05, + "loss": 1.4541, + "step": 11975 + }, + { + "epoch": 0.3516354454166422, + "grad_norm": 0.0, + "learning_rate": 1.5047689942474126e-05, + "loss": 1.2622, + "step": 11976 + }, + { + "epoch": 0.3516648070937812, + "grad_norm": 0.0, + "learning_rate": 1.504686899887751e-05, + "loss": 1.4268, + "step": 11977 + }, + { + "epoch": 0.3516941687709202, + "grad_norm": 0.0, + "learning_rate": 1.5046048009640767e-05, + "loss": 1.4297, + "step": 11978 + }, + { + "epoch": 0.3517235304480592, + "grad_norm": 0.0, + "learning_rate": 1.504522697477132e-05, + "loss": 1.4258, + "step": 11979 + }, + { + "epoch": 0.35175289212519817, + "grad_norm": 0.0, + "learning_rate": 1.5044405894276593e-05, + "loss": 1.2202, + "step": 11980 + }, + { + "epoch": 0.3517822538023372, + "grad_norm": 0.0, + "learning_rate": 1.504358476816401e-05, + "loss": 1.4385, + "step": 11981 + }, + { + "epoch": 0.3518116154794762, + "grad_norm": 0.0, + "learning_rate": 1.5042763596440999e-05, + "loss": 1.2544, + "step": 11982 + }, + { + "epoch": 0.35184097715661516, + "grad_norm": 0.0, + "learning_rate": 1.5041942379114987e-05, + "loss": 1.3516, + "step": 11983 + }, + { + "epoch": 0.3518703388337542, + "grad_norm": 0.0, + "learning_rate": 1.5041121116193398e-05, + "loss": 1.1943, + "step": 11984 + }, + { + "epoch": 0.3518997005108932, + "grad_norm": 0.0, + "learning_rate": 1.5040299807683659e-05, + "loss": 1.3164, + "step": 11985 + }, + { + "epoch": 0.35192906218803216, + "grad_norm": 0.0, + "learning_rate": 1.5039478453593198e-05, + "loss": 1.377, + "step": 11986 + }, + { + "epoch": 0.3519584238651712, + "grad_norm": 0.0, + "learning_rate": 1.5038657053929444e-05, + "loss": 1.3623, + "step": 11987 + }, + { + "epoch": 0.3519877855423102, + "grad_norm": 0.0, + "learning_rate": 1.5037835608699822e-05, + "loss": 1.4199, + "step": 11988 + }, + { + "epoch": 0.35201714721944916, + "grad_norm": 0.0, + "learning_rate": 1.5037014117911764e-05, + "loss": 1.4209, + "step": 11989 + }, + { + "epoch": 0.3520465088965882, + "grad_norm": 0.0, + "learning_rate": 1.5036192581572694e-05, + "loss": 1.3955, + "step": 11990 + }, + { + "epoch": 0.3520758705737272, + "grad_norm": 0.0, + "learning_rate": 1.503537099969005e-05, + "loss": 1.1855, + "step": 11991 + }, + { + "epoch": 0.35210523225086615, + "grad_norm": 0.0, + "learning_rate": 1.5034549372271252e-05, + "loss": 1.4482, + "step": 11992 + }, + { + "epoch": 0.35213459392800517, + "grad_norm": 0.0, + "learning_rate": 1.5033727699323737e-05, + "loss": 1.377, + "step": 11993 + }, + { + "epoch": 0.3521639556051442, + "grad_norm": 0.0, + "learning_rate": 1.5032905980854932e-05, + "loss": 1.4385, + "step": 11994 + }, + { + "epoch": 0.35219331728228315, + "grad_norm": 0.0, + "learning_rate": 1.5032084216872269e-05, + "loss": 1.4375, + "step": 11995 + }, + { + "epoch": 0.35222267895942216, + "grad_norm": 0.0, + "learning_rate": 1.5031262407383182e-05, + "loss": 1.5117, + "step": 11996 + }, + { + "epoch": 0.3522520406365612, + "grad_norm": 0.0, + "learning_rate": 1.50304405523951e-05, + "loss": 1.2925, + "step": 11997 + }, + { + "epoch": 0.35228140231370014, + "grad_norm": 0.0, + "learning_rate": 1.5029618651915452e-05, + "loss": 1.3018, + "step": 11998 + }, + { + "epoch": 0.35231076399083916, + "grad_norm": 0.0, + "learning_rate": 1.5028796705951678e-05, + "loss": 1.4531, + "step": 11999 + }, + { + "epoch": 0.3523401256679782, + "grad_norm": 0.0, + "learning_rate": 1.5027974714511207e-05, + "loss": 1.2188, + "step": 12000 + }, + { + "epoch": 0.35236948734511714, + "grad_norm": 0.0, + "learning_rate": 1.5027152677601473e-05, + "loss": 1.2129, + "step": 12001 + }, + { + "epoch": 0.35239884902225616, + "grad_norm": 0.0, + "learning_rate": 1.502633059522991e-05, + "loss": 1.4307, + "step": 12002 + }, + { + "epoch": 0.3524282106993952, + "grad_norm": 0.0, + "learning_rate": 1.5025508467403952e-05, + "loss": 1.5361, + "step": 12003 + }, + { + "epoch": 0.35245757237653413, + "grad_norm": 0.0, + "learning_rate": 1.5024686294131034e-05, + "loss": 1.4297, + "step": 12004 + }, + { + "epoch": 0.35248693405367315, + "grad_norm": 0.0, + "learning_rate": 1.502386407541859e-05, + "loss": 1.4092, + "step": 12005 + }, + { + "epoch": 0.35251629573081217, + "grad_norm": 0.0, + "learning_rate": 1.5023041811274058e-05, + "loss": 1.1982, + "step": 12006 + }, + { + "epoch": 0.35254565740795113, + "grad_norm": 0.0, + "learning_rate": 1.5022219501704869e-05, + "loss": 1.2012, + "step": 12007 + }, + { + "epoch": 0.35257501908509015, + "grad_norm": 0.0, + "learning_rate": 1.5021397146718468e-05, + "loss": 1.3291, + "step": 12008 + }, + { + "epoch": 0.35260438076222916, + "grad_norm": 0.0, + "learning_rate": 1.5020574746322285e-05, + "loss": 1.3467, + "step": 12009 + }, + { + "epoch": 0.3526337424393681, + "grad_norm": 0.0, + "learning_rate": 1.5019752300523756e-05, + "loss": 1.3525, + "step": 12010 + }, + { + "epoch": 0.35266310411650714, + "grad_norm": 0.0, + "learning_rate": 1.5018929809330322e-05, + "loss": 1.3623, + "step": 12011 + }, + { + "epoch": 0.35269246579364616, + "grad_norm": 0.0, + "learning_rate": 1.5018107272749425e-05, + "loss": 1.5127, + "step": 12012 + }, + { + "epoch": 0.3527218274707851, + "grad_norm": 0.0, + "learning_rate": 1.5017284690788497e-05, + "loss": 1.3564, + "step": 12013 + }, + { + "epoch": 0.35275118914792414, + "grad_norm": 0.0, + "learning_rate": 1.5016462063454972e-05, + "loss": 1.3984, + "step": 12014 + }, + { + "epoch": 0.35278055082506315, + "grad_norm": 0.0, + "learning_rate": 1.5015639390756303e-05, + "loss": 1.3857, + "step": 12015 + }, + { + "epoch": 0.3528099125022021, + "grad_norm": 0.0, + "learning_rate": 1.501481667269992e-05, + "loss": 1.2822, + "step": 12016 + }, + { + "epoch": 0.35283927417934113, + "grad_norm": 0.0, + "learning_rate": 1.5013993909293265e-05, + "loss": 1.3623, + "step": 12017 + }, + { + "epoch": 0.3528686358564801, + "grad_norm": 0.0, + "learning_rate": 1.501317110054378e-05, + "loss": 1.373, + "step": 12018 + }, + { + "epoch": 0.3528979975336191, + "grad_norm": 0.0, + "learning_rate": 1.5012348246458905e-05, + "loss": 1.3848, + "step": 12019 + }, + { + "epoch": 0.35292735921075813, + "grad_norm": 0.0, + "learning_rate": 1.501152534704608e-05, + "loss": 1.3984, + "step": 12020 + }, + { + "epoch": 0.3529567208878971, + "grad_norm": 0.0, + "learning_rate": 1.501070240231275e-05, + "loss": 1.3408, + "step": 12021 + }, + { + "epoch": 0.3529860825650361, + "grad_norm": 0.0, + "learning_rate": 1.5009879412266352e-05, + "loss": 1.3242, + "step": 12022 + }, + { + "epoch": 0.3530154442421751, + "grad_norm": 0.0, + "learning_rate": 1.5009056376914335e-05, + "loss": 1.3076, + "step": 12023 + }, + { + "epoch": 0.3530448059193141, + "grad_norm": 0.0, + "learning_rate": 1.5008233296264134e-05, + "loss": 1.3262, + "step": 12024 + }, + { + "epoch": 0.3530741675964531, + "grad_norm": 0.0, + "learning_rate": 1.5007410170323201e-05, + "loss": 1.5254, + "step": 12025 + }, + { + "epoch": 0.3531035292735921, + "grad_norm": 0.0, + "learning_rate": 1.5006586999098973e-05, + "loss": 1.3662, + "step": 12026 + }, + { + "epoch": 0.3531328909507311, + "grad_norm": 0.0, + "learning_rate": 1.5005763782598895e-05, + "loss": 1.3818, + "step": 12027 + }, + { + "epoch": 0.3531622526278701, + "grad_norm": 0.0, + "learning_rate": 1.5004940520830417e-05, + "loss": 1.4033, + "step": 12028 + }, + { + "epoch": 0.3531916143050091, + "grad_norm": 0.0, + "learning_rate": 1.5004117213800978e-05, + "loss": 1.3252, + "step": 12029 + }, + { + "epoch": 0.3532209759821481, + "grad_norm": 0.0, + "learning_rate": 1.5003293861518024e-05, + "loss": 1.356, + "step": 12030 + }, + { + "epoch": 0.3532503376592871, + "grad_norm": 0.0, + "learning_rate": 1.5002470463989002e-05, + "loss": 1.4004, + "step": 12031 + }, + { + "epoch": 0.3532796993364261, + "grad_norm": 0.0, + "learning_rate": 1.5001647021221364e-05, + "loss": 1.3447, + "step": 12032 + }, + { + "epoch": 0.3533090610135651, + "grad_norm": 0.0, + "learning_rate": 1.5000823533222546e-05, + "loss": 1.4785, + "step": 12033 + }, + { + "epoch": 0.3533384226907041, + "grad_norm": 0.0, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.2036, + "step": 12034 + }, + { + "epoch": 0.3533677843678431, + "grad_norm": 0.0, + "learning_rate": 1.4999176421561177e-05, + "loss": 1.541, + "step": 12035 + }, + { + "epoch": 0.35339714604498207, + "grad_norm": 0.0, + "learning_rate": 1.4998352797913517e-05, + "loss": 1.2275, + "step": 12036 + }, + { + "epoch": 0.3534265077221211, + "grad_norm": 0.0, + "learning_rate": 1.4997529129064476e-05, + "loss": 1.3594, + "step": 12037 + }, + { + "epoch": 0.3534558693992601, + "grad_norm": 0.0, + "learning_rate": 1.4996705415021497e-05, + "loss": 1.3057, + "step": 12038 + }, + { + "epoch": 0.35348523107639906, + "grad_norm": 0.0, + "learning_rate": 1.4995881655792033e-05, + "loss": 1.3545, + "step": 12039 + }, + { + "epoch": 0.3535145927535381, + "grad_norm": 0.0, + "learning_rate": 1.4995057851383533e-05, + "loss": 1.4932, + "step": 12040 + }, + { + "epoch": 0.3535439544306771, + "grad_norm": 0.0, + "learning_rate": 1.4994234001803442e-05, + "loss": 1.2607, + "step": 12041 + }, + { + "epoch": 0.35357331610781606, + "grad_norm": 0.0, + "learning_rate": 1.4993410107059213e-05, + "loss": 1.2168, + "step": 12042 + }, + { + "epoch": 0.3536026777849551, + "grad_norm": 0.0, + "learning_rate": 1.4992586167158303e-05, + "loss": 1.5117, + "step": 12043 + }, + { + "epoch": 0.3536320394620941, + "grad_norm": 0.0, + "learning_rate": 1.4991762182108154e-05, + "loss": 1.4082, + "step": 12044 + }, + { + "epoch": 0.35366140113923306, + "grad_norm": 0.0, + "learning_rate": 1.4990938151916221e-05, + "loss": 1.418, + "step": 12045 + }, + { + "epoch": 0.3536907628163721, + "grad_norm": 0.0, + "learning_rate": 1.4990114076589956e-05, + "loss": 1.3594, + "step": 12046 + }, + { + "epoch": 0.3537201244935111, + "grad_norm": 0.0, + "learning_rate": 1.4989289956136812e-05, + "loss": 1.3574, + "step": 12047 + }, + { + "epoch": 0.35374948617065005, + "grad_norm": 0.0, + "learning_rate": 1.4988465790564242e-05, + "loss": 1.5205, + "step": 12048 + }, + { + "epoch": 0.35377884784778907, + "grad_norm": 0.0, + "learning_rate": 1.49876415798797e-05, + "loss": 1.4404, + "step": 12049 + }, + { + "epoch": 0.3538082095249281, + "grad_norm": 0.0, + "learning_rate": 1.4986817324090635e-05, + "loss": 1.4287, + "step": 12050 + }, + { + "epoch": 0.35383757120206705, + "grad_norm": 0.0, + "learning_rate": 1.49859930232045e-05, + "loss": 1.2915, + "step": 12051 + }, + { + "epoch": 0.35386693287920606, + "grad_norm": 0.0, + "learning_rate": 1.4985168677228761e-05, + "loss": 1.2251, + "step": 12052 + }, + { + "epoch": 0.3538962945563451, + "grad_norm": 0.0, + "learning_rate": 1.4984344286170862e-05, + "loss": 1.3311, + "step": 12053 + }, + { + "epoch": 0.35392565623348404, + "grad_norm": 0.0, + "learning_rate": 1.4983519850038261e-05, + "loss": 1.4697, + "step": 12054 + }, + { + "epoch": 0.35395501791062306, + "grad_norm": 0.0, + "learning_rate": 1.4982695368838411e-05, + "loss": 1.4863, + "step": 12055 + }, + { + "epoch": 0.3539843795877621, + "grad_norm": 0.0, + "learning_rate": 1.4981870842578775e-05, + "loss": 1.2383, + "step": 12056 + }, + { + "epoch": 0.35401374126490104, + "grad_norm": 0.0, + "learning_rate": 1.4981046271266804e-05, + "loss": 1.3457, + "step": 12057 + }, + { + "epoch": 0.35404310294204006, + "grad_norm": 0.0, + "learning_rate": 1.4980221654909954e-05, + "loss": 1.4053, + "step": 12058 + }, + { + "epoch": 0.3540724646191791, + "grad_norm": 0.0, + "learning_rate": 1.4979396993515684e-05, + "loss": 1.4922, + "step": 12059 + }, + { + "epoch": 0.35410182629631803, + "grad_norm": 0.0, + "learning_rate": 1.4978572287091452e-05, + "loss": 1.4219, + "step": 12060 + }, + { + "epoch": 0.35413118797345705, + "grad_norm": 0.0, + "learning_rate": 1.4977747535644717e-05, + "loss": 1.3291, + "step": 12061 + }, + { + "epoch": 0.35416054965059607, + "grad_norm": 0.0, + "learning_rate": 1.4976922739182938e-05, + "loss": 1.415, + "step": 12062 + }, + { + "epoch": 0.35418991132773503, + "grad_norm": 0.0, + "learning_rate": 1.4976097897713568e-05, + "loss": 1.3232, + "step": 12063 + }, + { + "epoch": 0.35421927300487405, + "grad_norm": 0.0, + "learning_rate": 1.4975273011244071e-05, + "loss": 1.2754, + "step": 12064 + }, + { + "epoch": 0.35424863468201306, + "grad_norm": 0.0, + "learning_rate": 1.497444807978191e-05, + "loss": 1.3818, + "step": 12065 + }, + { + "epoch": 0.354277996359152, + "grad_norm": 0.0, + "learning_rate": 1.4973623103334538e-05, + "loss": 1.2583, + "step": 12066 + }, + { + "epoch": 0.35430735803629104, + "grad_norm": 0.0, + "learning_rate": 1.4972798081909417e-05, + "loss": 1.335, + "step": 12067 + }, + { + "epoch": 0.35433671971343, + "grad_norm": 0.0, + "learning_rate": 1.4971973015514013e-05, + "loss": 1.4443, + "step": 12068 + }, + { + "epoch": 0.354366081390569, + "grad_norm": 0.0, + "learning_rate": 1.497114790415578e-05, + "loss": 1.3486, + "step": 12069 + }, + { + "epoch": 0.35439544306770804, + "grad_norm": 0.0, + "learning_rate": 1.4970322747842185e-05, + "loss": 1.4697, + "step": 12070 + }, + { + "epoch": 0.354424804744847, + "grad_norm": 0.0, + "learning_rate": 1.4969497546580689e-05, + "loss": 1.3936, + "step": 12071 + }, + { + "epoch": 0.354454166421986, + "grad_norm": 0.0, + "learning_rate": 1.4968672300378755e-05, + "loss": 1.5059, + "step": 12072 + }, + { + "epoch": 0.35448352809912503, + "grad_norm": 0.0, + "learning_rate": 1.4967847009243841e-05, + "loss": 1.3525, + "step": 12073 + }, + { + "epoch": 0.354512889776264, + "grad_norm": 0.0, + "learning_rate": 1.496702167318342e-05, + "loss": 1.3799, + "step": 12074 + }, + { + "epoch": 0.354542251453403, + "grad_norm": 0.0, + "learning_rate": 1.4966196292204943e-05, + "loss": 1.2485, + "step": 12075 + }, + { + "epoch": 0.35457161313054203, + "grad_norm": 0.0, + "learning_rate": 1.4965370866315886e-05, + "loss": 1.3965, + "step": 12076 + }, + { + "epoch": 0.354600974807681, + "grad_norm": 0.0, + "learning_rate": 1.4964545395523709e-05, + "loss": 1.4062, + "step": 12077 + }, + { + "epoch": 0.35463033648482, + "grad_norm": 0.0, + "learning_rate": 1.4963719879835874e-05, + "loss": 1.5293, + "step": 12078 + }, + { + "epoch": 0.354659698161959, + "grad_norm": 0.0, + "learning_rate": 1.4962894319259849e-05, + "loss": 1.2793, + "step": 12079 + }, + { + "epoch": 0.354689059839098, + "grad_norm": 0.0, + "learning_rate": 1.4962068713803099e-05, + "loss": 1.3311, + "step": 12080 + }, + { + "epoch": 0.354718421516237, + "grad_norm": 0.0, + "learning_rate": 1.4961243063473094e-05, + "loss": 1.4229, + "step": 12081 + }, + { + "epoch": 0.354747783193376, + "grad_norm": 0.0, + "learning_rate": 1.4960417368277296e-05, + "loss": 1.5332, + "step": 12082 + }, + { + "epoch": 0.354777144870515, + "grad_norm": 0.0, + "learning_rate": 1.4959591628223173e-05, + "loss": 1.3916, + "step": 12083 + }, + { + "epoch": 0.354806506547654, + "grad_norm": 0.0, + "learning_rate": 1.4958765843318192e-05, + "loss": 1.543, + "step": 12084 + }, + { + "epoch": 0.354835868224793, + "grad_norm": 0.0, + "learning_rate": 1.4957940013569823e-05, + "loss": 1.4727, + "step": 12085 + }, + { + "epoch": 0.354865229901932, + "grad_norm": 0.0, + "learning_rate": 1.4957114138985531e-05, + "loss": 1.3516, + "step": 12086 + }, + { + "epoch": 0.354894591579071, + "grad_norm": 0.0, + "learning_rate": 1.4956288219572788e-05, + "loss": 1.4951, + "step": 12087 + }, + { + "epoch": 0.35492395325621, + "grad_norm": 0.0, + "learning_rate": 1.495546225533906e-05, + "loss": 1.3799, + "step": 12088 + }, + { + "epoch": 0.354953314933349, + "grad_norm": 0.0, + "learning_rate": 1.4954636246291817e-05, + "loss": 1.291, + "step": 12089 + }, + { + "epoch": 0.354982676610488, + "grad_norm": 0.0, + "learning_rate": 1.4953810192438531e-05, + "loss": 1.4619, + "step": 12090 + }, + { + "epoch": 0.355012038287627, + "grad_norm": 0.0, + "learning_rate": 1.4952984093786671e-05, + "loss": 1.3154, + "step": 12091 + }, + { + "epoch": 0.35504139996476597, + "grad_norm": 0.0, + "learning_rate": 1.4952157950343709e-05, + "loss": 1.3369, + "step": 12092 + }, + { + "epoch": 0.355070761641905, + "grad_norm": 0.0, + "learning_rate": 1.4951331762117112e-05, + "loss": 1.4395, + "step": 12093 + }, + { + "epoch": 0.355100123319044, + "grad_norm": 0.0, + "learning_rate": 1.4950505529114354e-05, + "loss": 1.4023, + "step": 12094 + }, + { + "epoch": 0.35512948499618296, + "grad_norm": 0.0, + "learning_rate": 1.4949679251342906e-05, + "loss": 1.4053, + "step": 12095 + }, + { + "epoch": 0.355158846673322, + "grad_norm": 0.0, + "learning_rate": 1.494885292881024e-05, + "loss": 1.2598, + "step": 12096 + }, + { + "epoch": 0.355188208350461, + "grad_norm": 0.0, + "learning_rate": 1.4948026561523832e-05, + "loss": 1.4482, + "step": 12097 + }, + { + "epoch": 0.35521757002759996, + "grad_norm": 0.0, + "learning_rate": 1.4947200149491152e-05, + "loss": 1.3076, + "step": 12098 + }, + { + "epoch": 0.355246931704739, + "grad_norm": 0.0, + "learning_rate": 1.4946373692719673e-05, + "loss": 1.4395, + "step": 12099 + }, + { + "epoch": 0.355276293381878, + "grad_norm": 0.0, + "learning_rate": 1.4945547191216871e-05, + "loss": 1.4219, + "step": 12100 + }, + { + "epoch": 0.35530565505901696, + "grad_norm": 0.0, + "learning_rate": 1.4944720644990218e-05, + "loss": 1.4062, + "step": 12101 + }, + { + "epoch": 0.355335016736156, + "grad_norm": 0.0, + "learning_rate": 1.4943894054047195e-05, + "loss": 1.3994, + "step": 12102 + }, + { + "epoch": 0.355364378413295, + "grad_norm": 0.0, + "learning_rate": 1.4943067418395266e-05, + "loss": 1.5244, + "step": 12103 + }, + { + "epoch": 0.35539374009043395, + "grad_norm": 0.0, + "learning_rate": 1.4942240738041913e-05, + "loss": 1.1709, + "step": 12104 + }, + { + "epoch": 0.35542310176757297, + "grad_norm": 0.0, + "learning_rate": 1.4941414012994613e-05, + "loss": 1.2969, + "step": 12105 + }, + { + "epoch": 0.355452463444712, + "grad_norm": 0.0, + "learning_rate": 1.494058724326084e-05, + "loss": 1.3262, + "step": 12106 + }, + { + "epoch": 0.35548182512185095, + "grad_norm": 0.0, + "learning_rate": 1.4939760428848071e-05, + "loss": 1.4199, + "step": 12107 + }, + { + "epoch": 0.35551118679898996, + "grad_norm": 0.0, + "learning_rate": 1.4938933569763784e-05, + "loss": 1.2715, + "step": 12108 + }, + { + "epoch": 0.355540548476129, + "grad_norm": 0.0, + "learning_rate": 1.4938106666015452e-05, + "loss": 1.2676, + "step": 12109 + }, + { + "epoch": 0.35556991015326794, + "grad_norm": 0.0, + "learning_rate": 1.4937279717610563e-05, + "loss": 1.3623, + "step": 12110 + }, + { + "epoch": 0.35559927183040696, + "grad_norm": 0.0, + "learning_rate": 1.4936452724556585e-05, + "loss": 1.3633, + "step": 12111 + }, + { + "epoch": 0.355628633507546, + "grad_norm": 0.0, + "learning_rate": 1.4935625686861004e-05, + "loss": 1.4102, + "step": 12112 + }, + { + "epoch": 0.35565799518468494, + "grad_norm": 0.0, + "learning_rate": 1.4934798604531293e-05, + "loss": 1.353, + "step": 12113 + }, + { + "epoch": 0.35568735686182396, + "grad_norm": 0.0, + "learning_rate": 1.4933971477574933e-05, + "loss": 1.3574, + "step": 12114 + }, + { + "epoch": 0.355716718538963, + "grad_norm": 0.0, + "learning_rate": 1.493314430599941e-05, + "loss": 1.4414, + "step": 12115 + }, + { + "epoch": 0.35574608021610193, + "grad_norm": 0.0, + "learning_rate": 1.4932317089812197e-05, + "loss": 1.3232, + "step": 12116 + }, + { + "epoch": 0.35577544189324095, + "grad_norm": 0.0, + "learning_rate": 1.4931489829020776e-05, + "loss": 1.3486, + "step": 12117 + }, + { + "epoch": 0.3558048035703799, + "grad_norm": 0.0, + "learning_rate": 1.4930662523632633e-05, + "loss": 1.2803, + "step": 12118 + }, + { + "epoch": 0.35583416524751893, + "grad_norm": 0.0, + "learning_rate": 1.4929835173655242e-05, + "loss": 1.2969, + "step": 12119 + }, + { + "epoch": 0.35586352692465795, + "grad_norm": 0.0, + "learning_rate": 1.4929007779096092e-05, + "loss": 1.4326, + "step": 12120 + }, + { + "epoch": 0.3558928886017969, + "grad_norm": 0.0, + "learning_rate": 1.4928180339962661e-05, + "loss": 1.4854, + "step": 12121 + }, + { + "epoch": 0.3559222502789359, + "grad_norm": 0.0, + "learning_rate": 1.4927352856262434e-05, + "loss": 1.2979, + "step": 12122 + }, + { + "epoch": 0.35595161195607494, + "grad_norm": 0.0, + "learning_rate": 1.4926525328002894e-05, + "loss": 1.4443, + "step": 12123 + }, + { + "epoch": 0.3559809736332139, + "grad_norm": 0.0, + "learning_rate": 1.4925697755191521e-05, + "loss": 1.3496, + "step": 12124 + }, + { + "epoch": 0.3560103353103529, + "grad_norm": 0.0, + "learning_rate": 1.4924870137835803e-05, + "loss": 1.4678, + "step": 12125 + }, + { + "epoch": 0.35603969698749194, + "grad_norm": 0.0, + "learning_rate": 1.4924042475943223e-05, + "loss": 1.3457, + "step": 12126 + }, + { + "epoch": 0.3560690586646309, + "grad_norm": 0.0, + "learning_rate": 1.4923214769521269e-05, + "loss": 1.3232, + "step": 12127 + }, + { + "epoch": 0.3560984203417699, + "grad_norm": 0.0, + "learning_rate": 1.4922387018577417e-05, + "loss": 1.3965, + "step": 12128 + }, + { + "epoch": 0.35612778201890893, + "grad_norm": 0.0, + "learning_rate": 1.4921559223119162e-05, + "loss": 1.2783, + "step": 12129 + }, + { + "epoch": 0.3561571436960479, + "grad_norm": 0.0, + "learning_rate": 1.4920731383153986e-05, + "loss": 1.4658, + "step": 12130 + }, + { + "epoch": 0.3561865053731869, + "grad_norm": 0.0, + "learning_rate": 1.4919903498689378e-05, + "loss": 1.4824, + "step": 12131 + }, + { + "epoch": 0.35621586705032593, + "grad_norm": 0.0, + "learning_rate": 1.491907556973282e-05, + "loss": 1.3682, + "step": 12132 + }, + { + "epoch": 0.3562452287274649, + "grad_norm": 0.0, + "learning_rate": 1.4918247596291802e-05, + "loss": 1.3672, + "step": 12133 + }, + { + "epoch": 0.3562745904046039, + "grad_norm": 0.0, + "learning_rate": 1.4917419578373811e-05, + "loss": 1.2891, + "step": 12134 + }, + { + "epoch": 0.3563039520817429, + "grad_norm": 0.0, + "learning_rate": 1.4916591515986336e-05, + "loss": 1.0894, + "step": 12135 + }, + { + "epoch": 0.3563333137588819, + "grad_norm": 0.0, + "learning_rate": 1.4915763409136865e-05, + "loss": 1.2642, + "step": 12136 + }, + { + "epoch": 0.3563626754360209, + "grad_norm": 0.0, + "learning_rate": 1.4914935257832886e-05, + "loss": 1.3311, + "step": 12137 + }, + { + "epoch": 0.3563920371131599, + "grad_norm": 0.0, + "learning_rate": 1.4914107062081888e-05, + "loss": 1.1733, + "step": 12138 + }, + { + "epoch": 0.3564213987902989, + "grad_norm": 0.0, + "learning_rate": 1.4913278821891366e-05, + "loss": 1.4238, + "step": 12139 + }, + { + "epoch": 0.3564507604674379, + "grad_norm": 0.0, + "learning_rate": 1.49124505372688e-05, + "loss": 1.3408, + "step": 12140 + }, + { + "epoch": 0.3564801221445769, + "grad_norm": 0.0, + "learning_rate": 1.4911622208221688e-05, + "loss": 1.3359, + "step": 12141 + }, + { + "epoch": 0.3565094838217159, + "grad_norm": 0.0, + "learning_rate": 1.4910793834757518e-05, + "loss": 1.4004, + "step": 12142 + }, + { + "epoch": 0.3565388454988549, + "grad_norm": 0.0, + "learning_rate": 1.4909965416883783e-05, + "loss": 1.2783, + "step": 12143 + }, + { + "epoch": 0.3565682071759939, + "grad_norm": 0.0, + "learning_rate": 1.4909136954607972e-05, + "loss": 1.4639, + "step": 12144 + }, + { + "epoch": 0.3565975688531329, + "grad_norm": 0.0, + "learning_rate": 1.4908308447937576e-05, + "loss": 1.3018, + "step": 12145 + }, + { + "epoch": 0.3566269305302719, + "grad_norm": 0.0, + "learning_rate": 1.4907479896880093e-05, + "loss": 1.3672, + "step": 12146 + }, + { + "epoch": 0.3566562922074109, + "grad_norm": 0.0, + "learning_rate": 1.4906651301443012e-05, + "loss": 1.3916, + "step": 12147 + }, + { + "epoch": 0.35668565388454987, + "grad_norm": 0.0, + "learning_rate": 1.4905822661633824e-05, + "loss": 1.3867, + "step": 12148 + }, + { + "epoch": 0.3567150155616889, + "grad_norm": 0.0, + "learning_rate": 1.4904993977460027e-05, + "loss": 1.3701, + "step": 12149 + }, + { + "epoch": 0.3567443772388279, + "grad_norm": 0.0, + "learning_rate": 1.4904165248929115e-05, + "loss": 1.4277, + "step": 12150 + }, + { + "epoch": 0.35677373891596686, + "grad_norm": 0.0, + "learning_rate": 1.490333647604858e-05, + "loss": 1.3496, + "step": 12151 + }, + { + "epoch": 0.3568031005931059, + "grad_norm": 0.0, + "learning_rate": 1.4902507658825917e-05, + "loss": 1.2031, + "step": 12152 + }, + { + "epoch": 0.3568324622702449, + "grad_norm": 0.0, + "learning_rate": 1.4901678797268621e-05, + "loss": 1.2861, + "step": 12153 + }, + { + "epoch": 0.35686182394738386, + "grad_norm": 0.0, + "learning_rate": 1.4900849891384187e-05, + "loss": 1.3369, + "step": 12154 + }, + { + "epoch": 0.3568911856245229, + "grad_norm": 0.0, + "learning_rate": 1.4900020941180118e-05, + "loss": 1.3604, + "step": 12155 + }, + { + "epoch": 0.3569205473016619, + "grad_norm": 0.0, + "learning_rate": 1.4899191946663898e-05, + "loss": 1.4355, + "step": 12156 + }, + { + "epoch": 0.35694990897880086, + "grad_norm": 0.0, + "learning_rate": 1.4898362907843034e-05, + "loss": 1.2148, + "step": 12157 + }, + { + "epoch": 0.3569792706559399, + "grad_norm": 0.0, + "learning_rate": 1.489753382472502e-05, + "loss": 1.2773, + "step": 12158 + }, + { + "epoch": 0.3570086323330789, + "grad_norm": 0.0, + "learning_rate": 1.4896704697317353e-05, + "loss": 1.3135, + "step": 12159 + }, + { + "epoch": 0.35703799401021785, + "grad_norm": 0.0, + "learning_rate": 1.4895875525627528e-05, + "loss": 1.3975, + "step": 12160 + }, + { + "epoch": 0.35706735568735687, + "grad_norm": 0.0, + "learning_rate": 1.4895046309663053e-05, + "loss": 1.3506, + "step": 12161 + }, + { + "epoch": 0.3570967173644959, + "grad_norm": 0.0, + "learning_rate": 1.4894217049431415e-05, + "loss": 1.4551, + "step": 12162 + }, + { + "epoch": 0.35712607904163485, + "grad_norm": 0.0, + "learning_rate": 1.489338774494012e-05, + "loss": 1.3438, + "step": 12163 + }, + { + "epoch": 0.35715544071877386, + "grad_norm": 0.0, + "learning_rate": 1.4892558396196668e-05, + "loss": 1.4404, + "step": 12164 + }, + { + "epoch": 0.3571848023959129, + "grad_norm": 0.0, + "learning_rate": 1.4891729003208557e-05, + "loss": 1.3662, + "step": 12165 + }, + { + "epoch": 0.35721416407305184, + "grad_norm": 0.0, + "learning_rate": 1.4890899565983288e-05, + "loss": 1.3213, + "step": 12166 + }, + { + "epoch": 0.35724352575019086, + "grad_norm": 0.0, + "learning_rate": 1.489007008452836e-05, + "loss": 1.3574, + "step": 12167 + }, + { + "epoch": 0.3572728874273298, + "grad_norm": 0.0, + "learning_rate": 1.4889240558851277e-05, + "loss": 1.3223, + "step": 12168 + }, + { + "epoch": 0.35730224910446884, + "grad_norm": 0.0, + "learning_rate": 1.488841098895954e-05, + "loss": 1.2822, + "step": 12169 + }, + { + "epoch": 0.35733161078160786, + "grad_norm": 0.0, + "learning_rate": 1.4887581374860649e-05, + "loss": 1.4062, + "step": 12170 + }, + { + "epoch": 0.3573609724587468, + "grad_norm": 0.0, + "learning_rate": 1.4886751716562112e-05, + "loss": 1.1997, + "step": 12171 + }, + { + "epoch": 0.35739033413588583, + "grad_norm": 0.0, + "learning_rate": 1.4885922014071424e-05, + "loss": 1.3965, + "step": 12172 + }, + { + "epoch": 0.35741969581302485, + "grad_norm": 0.0, + "learning_rate": 1.4885092267396093e-05, + "loss": 1.376, + "step": 12173 + }, + { + "epoch": 0.3574490574901638, + "grad_norm": 0.0, + "learning_rate": 1.4884262476543621e-05, + "loss": 1.2842, + "step": 12174 + }, + { + "epoch": 0.35747841916730283, + "grad_norm": 0.0, + "learning_rate": 1.4883432641521512e-05, + "loss": 1.4111, + "step": 12175 + }, + { + "epoch": 0.35750778084444185, + "grad_norm": 0.0, + "learning_rate": 1.488260276233727e-05, + "loss": 1.3252, + "step": 12176 + }, + { + "epoch": 0.3575371425215808, + "grad_norm": 0.0, + "learning_rate": 1.4881772838998403e-05, + "loss": 1.3608, + "step": 12177 + }, + { + "epoch": 0.3575665041987198, + "grad_norm": 0.0, + "learning_rate": 1.4880942871512415e-05, + "loss": 1.335, + "step": 12178 + }, + { + "epoch": 0.35759586587585884, + "grad_norm": 0.0, + "learning_rate": 1.4880112859886806e-05, + "loss": 1.1992, + "step": 12179 + }, + { + "epoch": 0.3576252275529978, + "grad_norm": 0.0, + "learning_rate": 1.4879282804129091e-05, + "loss": 1.3828, + "step": 12180 + }, + { + "epoch": 0.3576545892301368, + "grad_norm": 0.0, + "learning_rate": 1.487845270424677e-05, + "loss": 1.4141, + "step": 12181 + }, + { + "epoch": 0.35768395090727584, + "grad_norm": 0.0, + "learning_rate": 1.4877622560247354e-05, + "loss": 1.3154, + "step": 12182 + }, + { + "epoch": 0.3577133125844148, + "grad_norm": 0.0, + "learning_rate": 1.4876792372138344e-05, + "loss": 1.3887, + "step": 12183 + }, + { + "epoch": 0.3577426742615538, + "grad_norm": 0.0, + "learning_rate": 1.4875962139927255e-05, + "loss": 1.4736, + "step": 12184 + }, + { + "epoch": 0.35777203593869283, + "grad_norm": 0.0, + "learning_rate": 1.487513186362159e-05, + "loss": 1.3643, + "step": 12185 + }, + { + "epoch": 0.3578013976158318, + "grad_norm": 0.0, + "learning_rate": 1.487430154322886e-05, + "loss": 1.2715, + "step": 12186 + }, + { + "epoch": 0.3578307592929708, + "grad_norm": 0.0, + "learning_rate": 1.4873471178756571e-05, + "loss": 1.3623, + "step": 12187 + }, + { + "epoch": 0.35786012097010983, + "grad_norm": 0.0, + "learning_rate": 1.487264077021224e-05, + "loss": 1.3252, + "step": 12188 + }, + { + "epoch": 0.3578894826472488, + "grad_norm": 0.0, + "learning_rate": 1.4871810317603363e-05, + "loss": 1.3389, + "step": 12189 + }, + { + "epoch": 0.3579188443243878, + "grad_norm": 0.0, + "learning_rate": 1.4870979820937461e-05, + "loss": 1.3857, + "step": 12190 + }, + { + "epoch": 0.3579482060015268, + "grad_norm": 0.0, + "learning_rate": 1.4870149280222042e-05, + "loss": 1.3691, + "step": 12191 + }, + { + "epoch": 0.3579775676786658, + "grad_norm": 0.0, + "learning_rate": 1.4869318695464613e-05, + "loss": 1.4766, + "step": 12192 + }, + { + "epoch": 0.3580069293558048, + "grad_norm": 0.0, + "learning_rate": 1.4868488066672691e-05, + "loss": 1.3359, + "step": 12193 + }, + { + "epoch": 0.3580362910329438, + "grad_norm": 0.0, + "learning_rate": 1.486765739385378e-05, + "loss": 1.3613, + "step": 12194 + }, + { + "epoch": 0.3580656527100828, + "grad_norm": 0.0, + "learning_rate": 1.48668266770154e-05, + "loss": 1.374, + "step": 12195 + }, + { + "epoch": 0.3580950143872218, + "grad_norm": 0.0, + "learning_rate": 1.4865995916165062e-05, + "loss": 1.3369, + "step": 12196 + }, + { + "epoch": 0.3581243760643608, + "grad_norm": 0.0, + "learning_rate": 1.4865165111310274e-05, + "loss": 1.4062, + "step": 12197 + }, + { + "epoch": 0.3581537377414998, + "grad_norm": 0.0, + "learning_rate": 1.486433426245855e-05, + "loss": 1.3154, + "step": 12198 + }, + { + "epoch": 0.3581830994186388, + "grad_norm": 0.0, + "learning_rate": 1.4863503369617406e-05, + "loss": 1.2998, + "step": 12199 + }, + { + "epoch": 0.3582124610957778, + "grad_norm": 0.0, + "learning_rate": 1.486267243279436e-05, + "loss": 1.3838, + "step": 12200 + }, + { + "epoch": 0.3582418227729168, + "grad_norm": 0.0, + "learning_rate": 1.486184145199692e-05, + "loss": 1.3457, + "step": 12201 + }, + { + "epoch": 0.3582711844500558, + "grad_norm": 0.0, + "learning_rate": 1.48610104272326e-05, + "loss": 1.4355, + "step": 12202 + }, + { + "epoch": 0.3583005461271948, + "grad_norm": 0.0, + "learning_rate": 1.4860179358508919e-05, + "loss": 1.3145, + "step": 12203 + }, + { + "epoch": 0.35832990780433377, + "grad_norm": 0.0, + "learning_rate": 1.4859348245833391e-05, + "loss": 1.2959, + "step": 12204 + }, + { + "epoch": 0.3583592694814728, + "grad_norm": 0.0, + "learning_rate": 1.4858517089213535e-05, + "loss": 1.251, + "step": 12205 + }, + { + "epoch": 0.3583886311586118, + "grad_norm": 0.0, + "learning_rate": 1.4857685888656864e-05, + "loss": 1.3877, + "step": 12206 + }, + { + "epoch": 0.35841799283575076, + "grad_norm": 0.0, + "learning_rate": 1.485685464417089e-05, + "loss": 1.4883, + "step": 12207 + }, + { + "epoch": 0.3584473545128898, + "grad_norm": 0.0, + "learning_rate": 1.4856023355763143e-05, + "loss": 1.3223, + "step": 12208 + }, + { + "epoch": 0.3584767161900288, + "grad_norm": 0.0, + "learning_rate": 1.4855192023441128e-05, + "loss": 1.2612, + "step": 12209 + }, + { + "epoch": 0.35850607786716776, + "grad_norm": 0.0, + "learning_rate": 1.4854360647212371e-05, + "loss": 1.2578, + "step": 12210 + }, + { + "epoch": 0.3585354395443068, + "grad_norm": 0.0, + "learning_rate": 1.4853529227084387e-05, + "loss": 1.4971, + "step": 12211 + }, + { + "epoch": 0.3585648012214458, + "grad_norm": 0.0, + "learning_rate": 1.4852697763064694e-05, + "loss": 1.3911, + "step": 12212 + }, + { + "epoch": 0.35859416289858476, + "grad_norm": 0.0, + "learning_rate": 1.4851866255160814e-05, + "loss": 1.4072, + "step": 12213 + }, + { + "epoch": 0.3586235245757238, + "grad_norm": 0.0, + "learning_rate": 1.4851034703380263e-05, + "loss": 1.2891, + "step": 12214 + }, + { + "epoch": 0.3586528862528628, + "grad_norm": 0.0, + "learning_rate": 1.4850203107730563e-05, + "loss": 1.3418, + "step": 12215 + }, + { + "epoch": 0.35868224793000175, + "grad_norm": 0.0, + "learning_rate": 1.4849371468219233e-05, + "loss": 1.2773, + "step": 12216 + }, + { + "epoch": 0.35871160960714077, + "grad_norm": 0.0, + "learning_rate": 1.4848539784853799e-05, + "loss": 1.3008, + "step": 12217 + }, + { + "epoch": 0.35874097128427973, + "grad_norm": 0.0, + "learning_rate": 1.4847708057641775e-05, + "loss": 1.4531, + "step": 12218 + }, + { + "epoch": 0.35877033296141875, + "grad_norm": 0.0, + "learning_rate": 1.4846876286590685e-05, + "loss": 1.373, + "step": 12219 + }, + { + "epoch": 0.35879969463855776, + "grad_norm": 0.0, + "learning_rate": 1.4846044471708054e-05, + "loss": 1.4248, + "step": 12220 + }, + { + "epoch": 0.3588290563156967, + "grad_norm": 0.0, + "learning_rate": 1.4845212613001402e-05, + "loss": 1.3555, + "step": 12221 + }, + { + "epoch": 0.35885841799283574, + "grad_norm": 0.0, + "learning_rate": 1.484438071047825e-05, + "loss": 1.3281, + "step": 12222 + }, + { + "epoch": 0.35888777966997476, + "grad_norm": 0.0, + "learning_rate": 1.484354876414612e-05, + "loss": 1.2441, + "step": 12223 + }, + { + "epoch": 0.3589171413471137, + "grad_norm": 0.0, + "learning_rate": 1.484271677401254e-05, + "loss": 1.4224, + "step": 12224 + }, + { + "epoch": 0.35894650302425274, + "grad_norm": 0.0, + "learning_rate": 1.4841884740085033e-05, + "loss": 1.3213, + "step": 12225 + }, + { + "epoch": 0.35897586470139176, + "grad_norm": 0.0, + "learning_rate": 1.4841052662371123e-05, + "loss": 1.333, + "step": 12226 + }, + { + "epoch": 0.3590052263785307, + "grad_norm": 0.0, + "learning_rate": 1.484022054087833e-05, + "loss": 1.3223, + "step": 12227 + }, + { + "epoch": 0.35903458805566973, + "grad_norm": 0.0, + "learning_rate": 1.4839388375614185e-05, + "loss": 1.3809, + "step": 12228 + }, + { + "epoch": 0.35906394973280875, + "grad_norm": 0.0, + "learning_rate": 1.4838556166586214e-05, + "loss": 1.3027, + "step": 12229 + }, + { + "epoch": 0.3590933114099477, + "grad_norm": 0.0, + "learning_rate": 1.483772391380194e-05, + "loss": 1.4404, + "step": 12230 + }, + { + "epoch": 0.35912267308708673, + "grad_norm": 0.0, + "learning_rate": 1.483689161726889e-05, + "loss": 1.459, + "step": 12231 + }, + { + "epoch": 0.35915203476422575, + "grad_norm": 0.0, + "learning_rate": 1.4836059276994586e-05, + "loss": 1.373, + "step": 12232 + }, + { + "epoch": 0.3591813964413647, + "grad_norm": 0.0, + "learning_rate": 1.4835226892986563e-05, + "loss": 1.2373, + "step": 12233 + }, + { + "epoch": 0.3592107581185037, + "grad_norm": 0.0, + "learning_rate": 1.4834394465252343e-05, + "loss": 1.1797, + "step": 12234 + }, + { + "epoch": 0.35924011979564274, + "grad_norm": 0.0, + "learning_rate": 1.4833561993799457e-05, + "loss": 1.2227, + "step": 12235 + }, + { + "epoch": 0.3592694814727817, + "grad_norm": 0.0, + "learning_rate": 1.483272947863543e-05, + "loss": 1.2959, + "step": 12236 + }, + { + "epoch": 0.3592988431499207, + "grad_norm": 0.0, + "learning_rate": 1.4831896919767797e-05, + "loss": 1.4346, + "step": 12237 + }, + { + "epoch": 0.35932820482705974, + "grad_norm": 0.0, + "learning_rate": 1.4831064317204078e-05, + "loss": 1.4414, + "step": 12238 + }, + { + "epoch": 0.3593575665041987, + "grad_norm": 0.0, + "learning_rate": 1.483023167095181e-05, + "loss": 1.2812, + "step": 12239 + }, + { + "epoch": 0.3593869281813377, + "grad_norm": 0.0, + "learning_rate": 1.482939898101852e-05, + "loss": 1.3994, + "step": 12240 + }, + { + "epoch": 0.35941628985847673, + "grad_norm": 0.0, + "learning_rate": 1.4828566247411735e-05, + "loss": 1.2988, + "step": 12241 + }, + { + "epoch": 0.3594456515356157, + "grad_norm": 0.0, + "learning_rate": 1.4827733470138994e-05, + "loss": 1.3906, + "step": 12242 + }, + { + "epoch": 0.3594750132127547, + "grad_norm": 0.0, + "learning_rate": 1.482690064920782e-05, + "loss": 1.3486, + "step": 12243 + }, + { + "epoch": 0.35950437488989373, + "grad_norm": 0.0, + "learning_rate": 1.4826067784625747e-05, + "loss": 1.2144, + "step": 12244 + }, + { + "epoch": 0.3595337365670327, + "grad_norm": 0.0, + "learning_rate": 1.4825234876400308e-05, + "loss": 1.4482, + "step": 12245 + }, + { + "epoch": 0.3595630982441717, + "grad_norm": 0.0, + "learning_rate": 1.4824401924539031e-05, + "loss": 1.4443, + "step": 12246 + }, + { + "epoch": 0.3595924599213107, + "grad_norm": 0.0, + "learning_rate": 1.4823568929049454e-05, + "loss": 1.4258, + "step": 12247 + }, + { + "epoch": 0.3596218215984497, + "grad_norm": 0.0, + "learning_rate": 1.4822735889939108e-05, + "loss": 1.3252, + "step": 12248 + }, + { + "epoch": 0.3596511832755887, + "grad_norm": 0.0, + "learning_rate": 1.482190280721553e-05, + "loss": 1.3828, + "step": 12249 + }, + { + "epoch": 0.3596805449527277, + "grad_norm": 0.0, + "learning_rate": 1.4821069680886247e-05, + "loss": 1.3428, + "step": 12250 + }, + { + "epoch": 0.3597099066298667, + "grad_norm": 0.0, + "learning_rate": 1.4820236510958794e-05, + "loss": 1.29, + "step": 12251 + }, + { + "epoch": 0.3597392683070057, + "grad_norm": 0.0, + "learning_rate": 1.4819403297440709e-05, + "loss": 1.4004, + "step": 12252 + }, + { + "epoch": 0.3597686299841447, + "grad_norm": 0.0, + "learning_rate": 1.4818570040339526e-05, + "loss": 1.3887, + "step": 12253 + }, + { + "epoch": 0.3597979916612837, + "grad_norm": 0.0, + "learning_rate": 1.481773673966278e-05, + "loss": 1.3232, + "step": 12254 + }, + { + "epoch": 0.3598273533384227, + "grad_norm": 0.0, + "learning_rate": 1.4816903395418007e-05, + "loss": 1.3564, + "step": 12255 + }, + { + "epoch": 0.3598567150155617, + "grad_norm": 0.0, + "learning_rate": 1.4816070007612742e-05, + "loss": 1.2637, + "step": 12256 + }, + { + "epoch": 0.3598860766927007, + "grad_norm": 0.0, + "learning_rate": 1.4815236576254526e-05, + "loss": 1.2441, + "step": 12257 + }, + { + "epoch": 0.3599154383698397, + "grad_norm": 0.0, + "learning_rate": 1.4814403101350889e-05, + "loss": 1.2031, + "step": 12258 + }, + { + "epoch": 0.3599448000469787, + "grad_norm": 0.0, + "learning_rate": 1.4813569582909374e-05, + "loss": 1.3535, + "step": 12259 + }, + { + "epoch": 0.35997416172411767, + "grad_norm": 0.0, + "learning_rate": 1.4812736020937512e-05, + "loss": 1.3936, + "step": 12260 + }, + { + "epoch": 0.3600035234012567, + "grad_norm": 0.0, + "learning_rate": 1.4811902415442849e-05, + "loss": 1.2852, + "step": 12261 + }, + { + "epoch": 0.3600328850783957, + "grad_norm": 0.0, + "learning_rate": 1.4811068766432921e-05, + "loss": 1.3955, + "step": 12262 + }, + { + "epoch": 0.36006224675553467, + "grad_norm": 0.0, + "learning_rate": 1.4810235073915265e-05, + "loss": 1.4639, + "step": 12263 + }, + { + "epoch": 0.3600916084326737, + "grad_norm": 0.0, + "learning_rate": 1.480940133789742e-05, + "loss": 1.3545, + "step": 12264 + }, + { + "epoch": 0.3601209701098127, + "grad_norm": 0.0, + "learning_rate": 1.4808567558386928e-05, + "loss": 1.2939, + "step": 12265 + }, + { + "epoch": 0.36015033178695166, + "grad_norm": 0.0, + "learning_rate": 1.4807733735391329e-05, + "loss": 1.3145, + "step": 12266 + }, + { + "epoch": 0.3601796934640907, + "grad_norm": 0.0, + "learning_rate": 1.480689986891816e-05, + "loss": 1.1782, + "step": 12267 + }, + { + "epoch": 0.36020905514122964, + "grad_norm": 0.0, + "learning_rate": 1.4806065958974966e-05, + "loss": 1.4395, + "step": 12268 + }, + { + "epoch": 0.36023841681836866, + "grad_norm": 0.0, + "learning_rate": 1.480523200556929e-05, + "loss": 1.3857, + "step": 12269 + }, + { + "epoch": 0.3602677784955077, + "grad_norm": 0.0, + "learning_rate": 1.4804398008708667e-05, + "loss": 1.5381, + "step": 12270 + }, + { + "epoch": 0.36029714017264663, + "grad_norm": 0.0, + "learning_rate": 1.4803563968400643e-05, + "loss": 1.4922, + "step": 12271 + }, + { + "epoch": 0.36032650184978565, + "grad_norm": 0.0, + "learning_rate": 1.4802729884652758e-05, + "loss": 1.3477, + "step": 12272 + }, + { + "epoch": 0.36035586352692467, + "grad_norm": 0.0, + "learning_rate": 1.4801895757472558e-05, + "loss": 1.3857, + "step": 12273 + }, + { + "epoch": 0.36038522520406363, + "grad_norm": 0.0, + "learning_rate": 1.4801061586867587e-05, + "loss": 1.5029, + "step": 12274 + }, + { + "epoch": 0.36041458688120265, + "grad_norm": 0.0, + "learning_rate": 1.4800227372845384e-05, + "loss": 1.1831, + "step": 12275 + }, + { + "epoch": 0.36044394855834166, + "grad_norm": 0.0, + "learning_rate": 1.4799393115413494e-05, + "loss": 1.3975, + "step": 12276 + }, + { + "epoch": 0.3604733102354806, + "grad_norm": 0.0, + "learning_rate": 1.4798558814579465e-05, + "loss": 1.4033, + "step": 12277 + }, + { + "epoch": 0.36050267191261964, + "grad_norm": 0.0, + "learning_rate": 1.4797724470350841e-05, + "loss": 1.3145, + "step": 12278 + }, + { + "epoch": 0.36053203358975866, + "grad_norm": 0.0, + "learning_rate": 1.4796890082735164e-05, + "loss": 1.3809, + "step": 12279 + }, + { + "epoch": 0.3605613952668976, + "grad_norm": 0.0, + "learning_rate": 1.4796055651739983e-05, + "loss": 1.3457, + "step": 12280 + }, + { + "epoch": 0.36059075694403664, + "grad_norm": 0.0, + "learning_rate": 1.479522117737284e-05, + "loss": 1.4238, + "step": 12281 + }, + { + "epoch": 0.36062011862117566, + "grad_norm": 0.0, + "learning_rate": 1.4794386659641287e-05, + "loss": 1.3984, + "step": 12282 + }, + { + "epoch": 0.3606494802983146, + "grad_norm": 0.0, + "learning_rate": 1.4793552098552865e-05, + "loss": 1.3838, + "step": 12283 + }, + { + "epoch": 0.36067884197545363, + "grad_norm": 0.0, + "learning_rate": 1.4792717494115123e-05, + "loss": 1.3125, + "step": 12284 + }, + { + "epoch": 0.36070820365259265, + "grad_norm": 0.0, + "learning_rate": 1.479188284633561e-05, + "loss": 1.3574, + "step": 12285 + }, + { + "epoch": 0.3607375653297316, + "grad_norm": 0.0, + "learning_rate": 1.4791048155221877e-05, + "loss": 1.5527, + "step": 12286 + }, + { + "epoch": 0.36076692700687063, + "grad_norm": 0.0, + "learning_rate": 1.4790213420781465e-05, + "loss": 1.4697, + "step": 12287 + }, + { + "epoch": 0.36079628868400965, + "grad_norm": 0.0, + "learning_rate": 1.4789378643021925e-05, + "loss": 1.3906, + "step": 12288 + }, + { + "epoch": 0.3608256503611486, + "grad_norm": 0.0, + "learning_rate": 1.4788543821950811e-05, + "loss": 1.2676, + "step": 12289 + }, + { + "epoch": 0.3608550120382876, + "grad_norm": 0.0, + "learning_rate": 1.4787708957575666e-05, + "loss": 1.1821, + "step": 12290 + }, + { + "epoch": 0.36088437371542664, + "grad_norm": 0.0, + "learning_rate": 1.4786874049904045e-05, + "loss": 1.2754, + "step": 12291 + }, + { + "epoch": 0.3609137353925656, + "grad_norm": 0.0, + "learning_rate": 1.4786039098943494e-05, + "loss": 1.4395, + "step": 12292 + }, + { + "epoch": 0.3609430970697046, + "grad_norm": 0.0, + "learning_rate": 1.4785204104701568e-05, + "loss": 1.2891, + "step": 12293 + }, + { + "epoch": 0.36097245874684364, + "grad_norm": 0.0, + "learning_rate": 1.478436906718581e-05, + "loss": 1.2598, + "step": 12294 + }, + { + "epoch": 0.3610018204239826, + "grad_norm": 0.0, + "learning_rate": 1.4783533986403784e-05, + "loss": 1.2852, + "step": 12295 + }, + { + "epoch": 0.3610311821011216, + "grad_norm": 0.0, + "learning_rate": 1.4782698862363032e-05, + "loss": 1.3398, + "step": 12296 + }, + { + "epoch": 0.36106054377826063, + "grad_norm": 0.0, + "learning_rate": 1.478186369507111e-05, + "loss": 1.3379, + "step": 12297 + }, + { + "epoch": 0.3610899054553996, + "grad_norm": 0.0, + "learning_rate": 1.4781028484535571e-05, + "loss": 1.4062, + "step": 12298 + }, + { + "epoch": 0.3611192671325386, + "grad_norm": 0.0, + "learning_rate": 1.4780193230763966e-05, + "loss": 1.4199, + "step": 12299 + }, + { + "epoch": 0.36114862880967763, + "grad_norm": 0.0, + "learning_rate": 1.477935793376385e-05, + "loss": 1.4824, + "step": 12300 + }, + { + "epoch": 0.3611779904868166, + "grad_norm": 0.0, + "learning_rate": 1.4778522593542774e-05, + "loss": 1.3457, + "step": 12301 + }, + { + "epoch": 0.3612073521639556, + "grad_norm": 0.0, + "learning_rate": 1.4777687210108297e-05, + "loss": 1.4248, + "step": 12302 + }, + { + "epoch": 0.3612367138410946, + "grad_norm": 0.0, + "learning_rate": 1.477685178346797e-05, + "loss": 1.416, + "step": 12303 + }, + { + "epoch": 0.3612660755182336, + "grad_norm": 0.0, + "learning_rate": 1.4776016313629349e-05, + "loss": 1.3438, + "step": 12304 + }, + { + "epoch": 0.3612954371953726, + "grad_norm": 0.0, + "learning_rate": 1.4775180800599986e-05, + "loss": 1.5234, + "step": 12305 + }, + { + "epoch": 0.3613247988725116, + "grad_norm": 0.0, + "learning_rate": 1.4774345244387442e-05, + "loss": 1.501, + "step": 12306 + }, + { + "epoch": 0.3613541605496506, + "grad_norm": 0.0, + "learning_rate": 1.4773509644999275e-05, + "loss": 1.3564, + "step": 12307 + }, + { + "epoch": 0.3613835222267896, + "grad_norm": 0.0, + "learning_rate": 1.4772674002443037e-05, + "loss": 1.5537, + "step": 12308 + }, + { + "epoch": 0.3614128839039286, + "grad_norm": 0.0, + "learning_rate": 1.4771838316726284e-05, + "loss": 1.4141, + "step": 12309 + }, + { + "epoch": 0.3614422455810676, + "grad_norm": 0.0, + "learning_rate": 1.4771002587856573e-05, + "loss": 1.3984, + "step": 12310 + }, + { + "epoch": 0.3614716072582066, + "grad_norm": 0.0, + "learning_rate": 1.4770166815841467e-05, + "loss": 1.3271, + "step": 12311 + }, + { + "epoch": 0.3615009689353456, + "grad_norm": 0.0, + "learning_rate": 1.4769331000688518e-05, + "loss": 1.2773, + "step": 12312 + }, + { + "epoch": 0.3615303306124846, + "grad_norm": 0.0, + "learning_rate": 1.4768495142405289e-05, + "loss": 1.3584, + "step": 12313 + }, + { + "epoch": 0.3615596922896236, + "grad_norm": 0.0, + "learning_rate": 1.4767659240999334e-05, + "loss": 1.3682, + "step": 12314 + }, + { + "epoch": 0.3615890539667626, + "grad_norm": 0.0, + "learning_rate": 1.4766823296478222e-05, + "loss": 1.3369, + "step": 12315 + }, + { + "epoch": 0.36161841564390157, + "grad_norm": 0.0, + "learning_rate": 1.47659873088495e-05, + "loss": 1.3232, + "step": 12316 + }, + { + "epoch": 0.3616477773210406, + "grad_norm": 0.0, + "learning_rate": 1.4765151278120737e-05, + "loss": 1.3867, + "step": 12317 + }, + { + "epoch": 0.3616771389981796, + "grad_norm": 0.0, + "learning_rate": 1.4764315204299491e-05, + "loss": 1.2695, + "step": 12318 + }, + { + "epoch": 0.36170650067531857, + "grad_norm": 0.0, + "learning_rate": 1.4763479087393322e-05, + "loss": 1.4404, + "step": 12319 + }, + { + "epoch": 0.3617358623524576, + "grad_norm": 0.0, + "learning_rate": 1.4762642927409792e-05, + "loss": 1.4014, + "step": 12320 + }, + { + "epoch": 0.36176522402959654, + "grad_norm": 0.0, + "learning_rate": 1.4761806724356462e-05, + "loss": 1.374, + "step": 12321 + }, + { + "epoch": 0.36179458570673556, + "grad_norm": 0.0, + "learning_rate": 1.4760970478240893e-05, + "loss": 1.335, + "step": 12322 + }, + { + "epoch": 0.3618239473838746, + "grad_norm": 0.0, + "learning_rate": 1.476013418907065e-05, + "loss": 1.2744, + "step": 12323 + }, + { + "epoch": 0.36185330906101354, + "grad_norm": 0.0, + "learning_rate": 1.4759297856853294e-05, + "loss": 1.1611, + "step": 12324 + }, + { + "epoch": 0.36188267073815256, + "grad_norm": 0.0, + "learning_rate": 1.4758461481596387e-05, + "loss": 1.3994, + "step": 12325 + }, + { + "epoch": 0.3619120324152916, + "grad_norm": 0.0, + "learning_rate": 1.4757625063307494e-05, + "loss": 1.1968, + "step": 12326 + }, + { + "epoch": 0.36194139409243054, + "grad_norm": 0.0, + "learning_rate": 1.4756788601994182e-05, + "loss": 1.4805, + "step": 12327 + }, + { + "epoch": 0.36197075576956955, + "grad_norm": 0.0, + "learning_rate": 1.4755952097664015e-05, + "loss": 1.3555, + "step": 12328 + }, + { + "epoch": 0.36200011744670857, + "grad_norm": 0.0, + "learning_rate": 1.475511555032455e-05, + "loss": 1.4355, + "step": 12329 + }, + { + "epoch": 0.36202947912384753, + "grad_norm": 0.0, + "learning_rate": 1.475427895998336e-05, + "loss": 1.2598, + "step": 12330 + }, + { + "epoch": 0.36205884080098655, + "grad_norm": 0.0, + "learning_rate": 1.4753442326648004e-05, + "loss": 1.3579, + "step": 12331 + }, + { + "epoch": 0.36208820247812556, + "grad_norm": 0.0, + "learning_rate": 1.4752605650326058e-05, + "loss": 1.3398, + "step": 12332 + }, + { + "epoch": 0.3621175641552645, + "grad_norm": 0.0, + "learning_rate": 1.4751768931025074e-05, + "loss": 1.3271, + "step": 12333 + }, + { + "epoch": 0.36214692583240354, + "grad_norm": 0.0, + "learning_rate": 1.4750932168752631e-05, + "loss": 1.3262, + "step": 12334 + }, + { + "epoch": 0.36217628750954256, + "grad_norm": 0.0, + "learning_rate": 1.4750095363516294e-05, + "loss": 1.417, + "step": 12335 + }, + { + "epoch": 0.3622056491866815, + "grad_norm": 0.0, + "learning_rate": 1.4749258515323623e-05, + "loss": 1.3799, + "step": 12336 + }, + { + "epoch": 0.36223501086382054, + "grad_norm": 0.0, + "learning_rate": 1.4748421624182194e-05, + "loss": 1.3857, + "step": 12337 + }, + { + "epoch": 0.36226437254095956, + "grad_norm": 0.0, + "learning_rate": 1.4747584690099572e-05, + "loss": 1.29, + "step": 12338 + }, + { + "epoch": 0.3622937342180985, + "grad_norm": 0.0, + "learning_rate": 1.4746747713083327e-05, + "loss": 1.3203, + "step": 12339 + }, + { + "epoch": 0.36232309589523753, + "grad_norm": 0.0, + "learning_rate": 1.4745910693141026e-05, + "loss": 1.3037, + "step": 12340 + }, + { + "epoch": 0.36235245757237655, + "grad_norm": 0.0, + "learning_rate": 1.4745073630280238e-05, + "loss": 1.4209, + "step": 12341 + }, + { + "epoch": 0.3623818192495155, + "grad_norm": 0.0, + "learning_rate": 1.4744236524508532e-05, + "loss": 1.4668, + "step": 12342 + }, + { + "epoch": 0.36241118092665453, + "grad_norm": 0.0, + "learning_rate": 1.4743399375833483e-05, + "loss": 1.2881, + "step": 12343 + }, + { + "epoch": 0.36244054260379355, + "grad_norm": 0.0, + "learning_rate": 1.4742562184262659e-05, + "loss": 1.3867, + "step": 12344 + }, + { + "epoch": 0.3624699042809325, + "grad_norm": 0.0, + "learning_rate": 1.4741724949803628e-05, + "loss": 1.3135, + "step": 12345 + }, + { + "epoch": 0.3624992659580715, + "grad_norm": 0.0, + "learning_rate": 1.4740887672463966e-05, + "loss": 1.2686, + "step": 12346 + }, + { + "epoch": 0.36252862763521054, + "grad_norm": 0.0, + "learning_rate": 1.4740050352251243e-05, + "loss": 1.3721, + "step": 12347 + }, + { + "epoch": 0.3625579893123495, + "grad_norm": 0.0, + "learning_rate": 1.473921298917303e-05, + "loss": 1.0903, + "step": 12348 + }, + { + "epoch": 0.3625873509894885, + "grad_norm": 0.0, + "learning_rate": 1.4738375583236902e-05, + "loss": 1.3887, + "step": 12349 + }, + { + "epoch": 0.36261671266662754, + "grad_norm": 0.0, + "learning_rate": 1.4737538134450427e-05, + "loss": 1.4014, + "step": 12350 + }, + { + "epoch": 0.3626460743437665, + "grad_norm": 0.0, + "learning_rate": 1.4736700642821182e-05, + "loss": 1.3701, + "step": 12351 + }, + { + "epoch": 0.3626754360209055, + "grad_norm": 0.0, + "learning_rate": 1.473586310835674e-05, + "loss": 1.4688, + "step": 12352 + }, + { + "epoch": 0.36270479769804453, + "grad_norm": 0.0, + "learning_rate": 1.4735025531064674e-05, + "loss": 1.3398, + "step": 12353 + }, + { + "epoch": 0.3627341593751835, + "grad_norm": 0.0, + "learning_rate": 1.4734187910952562e-05, + "loss": 1.4199, + "step": 12354 + }, + { + "epoch": 0.3627635210523225, + "grad_norm": 0.0, + "learning_rate": 1.4733350248027973e-05, + "loss": 1.4795, + "step": 12355 + }, + { + "epoch": 0.36279288272946153, + "grad_norm": 0.0, + "learning_rate": 1.4732512542298492e-05, + "loss": 1.418, + "step": 12356 + }, + { + "epoch": 0.3628222444066005, + "grad_norm": 0.0, + "learning_rate": 1.4731674793771682e-05, + "loss": 1.4668, + "step": 12357 + }, + { + "epoch": 0.3628516060837395, + "grad_norm": 0.0, + "learning_rate": 1.4730837002455128e-05, + "loss": 1.2998, + "step": 12358 + }, + { + "epoch": 0.3628809677608785, + "grad_norm": 0.0, + "learning_rate": 1.4729999168356401e-05, + "loss": 1.3594, + "step": 12359 + }, + { + "epoch": 0.3629103294380175, + "grad_norm": 0.0, + "learning_rate": 1.4729161291483083e-05, + "loss": 1.4004, + "step": 12360 + }, + { + "epoch": 0.3629396911151565, + "grad_norm": 0.0, + "learning_rate": 1.4728323371842748e-05, + "loss": 1.3438, + "step": 12361 + }, + { + "epoch": 0.3629690527922955, + "grad_norm": 0.0, + "learning_rate": 1.472748540944297e-05, + "loss": 1.4082, + "step": 12362 + }, + { + "epoch": 0.3629984144694345, + "grad_norm": 0.0, + "learning_rate": 1.4726647404291332e-05, + "loss": 1.3799, + "step": 12363 + }, + { + "epoch": 0.3630277761465735, + "grad_norm": 0.0, + "learning_rate": 1.4725809356395413e-05, + "loss": 1.292, + "step": 12364 + }, + { + "epoch": 0.3630571378237125, + "grad_norm": 0.0, + "learning_rate": 1.472497126576279e-05, + "loss": 1.3262, + "step": 12365 + }, + { + "epoch": 0.3630864995008515, + "grad_norm": 0.0, + "learning_rate": 1.472413313240104e-05, + "loss": 1.2979, + "step": 12366 + }, + { + "epoch": 0.3631158611779905, + "grad_norm": 0.0, + "learning_rate": 1.4723294956317743e-05, + "loss": 1.293, + "step": 12367 + }, + { + "epoch": 0.3631452228551295, + "grad_norm": 0.0, + "learning_rate": 1.4722456737520486e-05, + "loss": 1.376, + "step": 12368 + }, + { + "epoch": 0.3631745845322685, + "grad_norm": 0.0, + "learning_rate": 1.4721618476016838e-05, + "loss": 1.3105, + "step": 12369 + }, + { + "epoch": 0.3632039462094075, + "grad_norm": 0.0, + "learning_rate": 1.4720780171814386e-05, + "loss": 1.3848, + "step": 12370 + }, + { + "epoch": 0.36323330788654645, + "grad_norm": 0.0, + "learning_rate": 1.471994182492071e-05, + "loss": 1.417, + "step": 12371 + }, + { + "epoch": 0.36326266956368547, + "grad_norm": 0.0, + "learning_rate": 1.471910343534339e-05, + "loss": 1.2437, + "step": 12372 + }, + { + "epoch": 0.3632920312408245, + "grad_norm": 0.0, + "learning_rate": 1.471826500309001e-05, + "loss": 1.3779, + "step": 12373 + }, + { + "epoch": 0.36332139291796345, + "grad_norm": 0.0, + "learning_rate": 1.4717426528168153e-05, + "loss": 1.373, + "step": 12374 + }, + { + "epoch": 0.36335075459510247, + "grad_norm": 0.0, + "learning_rate": 1.4716588010585397e-05, + "loss": 1.3018, + "step": 12375 + }, + { + "epoch": 0.3633801162722415, + "grad_norm": 0.0, + "learning_rate": 1.471574945034933e-05, + "loss": 1.3887, + "step": 12376 + }, + { + "epoch": 0.36340947794938044, + "grad_norm": 0.0, + "learning_rate": 1.4714910847467534e-05, + "loss": 1.3535, + "step": 12377 + }, + { + "epoch": 0.36343883962651946, + "grad_norm": 0.0, + "learning_rate": 1.4714072201947588e-05, + "loss": 1.4258, + "step": 12378 + }, + { + "epoch": 0.3634682013036585, + "grad_norm": 0.0, + "learning_rate": 1.471323351379708e-05, + "loss": 1.2471, + "step": 12379 + }, + { + "epoch": 0.36349756298079744, + "grad_norm": 0.0, + "learning_rate": 1.4712394783023598e-05, + "loss": 1.3101, + "step": 12380 + }, + { + "epoch": 0.36352692465793646, + "grad_norm": 0.0, + "learning_rate": 1.471155600963472e-05, + "loss": 1.3398, + "step": 12381 + }, + { + "epoch": 0.3635562863350755, + "grad_norm": 0.0, + "learning_rate": 1.4710717193638035e-05, + "loss": 1.291, + "step": 12382 + }, + { + "epoch": 0.36358564801221444, + "grad_norm": 0.0, + "learning_rate": 1.4709878335041127e-05, + "loss": 1.415, + "step": 12383 + }, + { + "epoch": 0.36361500968935345, + "grad_norm": 0.0, + "learning_rate": 1.4709039433851584e-05, + "loss": 1.4795, + "step": 12384 + }, + { + "epoch": 0.36364437136649247, + "grad_norm": 0.0, + "learning_rate": 1.4708200490076994e-05, + "loss": 1.249, + "step": 12385 + }, + { + "epoch": 0.36367373304363143, + "grad_norm": 0.0, + "learning_rate": 1.4707361503724938e-05, + "loss": 1.4043, + "step": 12386 + }, + { + "epoch": 0.36370309472077045, + "grad_norm": 0.0, + "learning_rate": 1.4706522474803008e-05, + "loss": 1.3379, + "step": 12387 + }, + { + "epoch": 0.36373245639790946, + "grad_norm": 0.0, + "learning_rate": 1.4705683403318789e-05, + "loss": 1.2686, + "step": 12388 + }, + { + "epoch": 0.3637618180750484, + "grad_norm": 0.0, + "learning_rate": 1.470484428927987e-05, + "loss": 1.2021, + "step": 12389 + }, + { + "epoch": 0.36379117975218744, + "grad_norm": 0.0, + "learning_rate": 1.4704005132693839e-05, + "loss": 1.3457, + "step": 12390 + }, + { + "epoch": 0.36382054142932646, + "grad_norm": 0.0, + "learning_rate": 1.4703165933568284e-05, + "loss": 1.4336, + "step": 12391 + }, + { + "epoch": 0.3638499031064654, + "grad_norm": 0.0, + "learning_rate": 1.4702326691910795e-05, + "loss": 1.4336, + "step": 12392 + }, + { + "epoch": 0.36387926478360444, + "grad_norm": 0.0, + "learning_rate": 1.4701487407728965e-05, + "loss": 1.4912, + "step": 12393 + }, + { + "epoch": 0.36390862646074346, + "grad_norm": 0.0, + "learning_rate": 1.4700648081030376e-05, + "loss": 1.4258, + "step": 12394 + }, + { + "epoch": 0.3639379881378824, + "grad_norm": 0.0, + "learning_rate": 1.4699808711822623e-05, + "loss": 1.4814, + "step": 12395 + }, + { + "epoch": 0.36396734981502143, + "grad_norm": 0.0, + "learning_rate": 1.4698969300113297e-05, + "loss": 1.3496, + "step": 12396 + }, + { + "epoch": 0.36399671149216045, + "grad_norm": 0.0, + "learning_rate": 1.4698129845909992e-05, + "loss": 1.3608, + "step": 12397 + }, + { + "epoch": 0.3640260731692994, + "grad_norm": 0.0, + "learning_rate": 1.469729034922029e-05, + "loss": 1.3711, + "step": 12398 + }, + { + "epoch": 0.36405543484643843, + "grad_norm": 0.0, + "learning_rate": 1.469645081005179e-05, + "loss": 1.5166, + "step": 12399 + }, + { + "epoch": 0.36408479652357745, + "grad_norm": 0.0, + "learning_rate": 1.4695611228412084e-05, + "loss": 1.5371, + "step": 12400 + }, + { + "epoch": 0.3641141582007164, + "grad_norm": 0.0, + "learning_rate": 1.4694771604308762e-05, + "loss": 1.4141, + "step": 12401 + }, + { + "epoch": 0.3641435198778554, + "grad_norm": 0.0, + "learning_rate": 1.469393193774942e-05, + "loss": 1.457, + "step": 12402 + }, + { + "epoch": 0.36417288155499444, + "grad_norm": 0.0, + "learning_rate": 1.4693092228741645e-05, + "loss": 1.4238, + "step": 12403 + }, + { + "epoch": 0.3642022432321334, + "grad_norm": 0.0, + "learning_rate": 1.4692252477293037e-05, + "loss": 1.4502, + "step": 12404 + }, + { + "epoch": 0.3642316049092724, + "grad_norm": 0.0, + "learning_rate": 1.469141268341119e-05, + "loss": 1.417, + "step": 12405 + }, + { + "epoch": 0.36426096658641144, + "grad_norm": 0.0, + "learning_rate": 1.4690572847103694e-05, + "loss": 1.4404, + "step": 12406 + }, + { + "epoch": 0.3642903282635504, + "grad_norm": 0.0, + "learning_rate": 1.468973296837815e-05, + "loss": 1.377, + "step": 12407 + }, + { + "epoch": 0.3643196899406894, + "grad_norm": 0.0, + "learning_rate": 1.4688893047242148e-05, + "loss": 1.4697, + "step": 12408 + }, + { + "epoch": 0.36434905161782843, + "grad_norm": 0.0, + "learning_rate": 1.4688053083703285e-05, + "loss": 1.4482, + "step": 12409 + }, + { + "epoch": 0.3643784132949674, + "grad_norm": 0.0, + "learning_rate": 1.4687213077769156e-05, + "loss": 1.188, + "step": 12410 + }, + { + "epoch": 0.3644077749721064, + "grad_norm": 0.0, + "learning_rate": 1.4686373029447361e-05, + "loss": 1.3462, + "step": 12411 + }, + { + "epoch": 0.36443713664924543, + "grad_norm": 0.0, + "learning_rate": 1.4685532938745493e-05, + "loss": 1.4746, + "step": 12412 + }, + { + "epoch": 0.3644664983263844, + "grad_norm": 0.0, + "learning_rate": 1.4684692805671153e-05, + "loss": 1.3809, + "step": 12413 + }, + { + "epoch": 0.3644958600035234, + "grad_norm": 0.0, + "learning_rate": 1.4683852630231932e-05, + "loss": 1.2793, + "step": 12414 + }, + { + "epoch": 0.3645252216806624, + "grad_norm": 0.0, + "learning_rate": 1.4683012412435434e-05, + "loss": 1.3418, + "step": 12415 + }, + { + "epoch": 0.3645545833578014, + "grad_norm": 0.0, + "learning_rate": 1.4682172152289256e-05, + "loss": 1.2866, + "step": 12416 + }, + { + "epoch": 0.3645839450349404, + "grad_norm": 0.0, + "learning_rate": 1.4681331849800998e-05, + "loss": 1.3369, + "step": 12417 + }, + { + "epoch": 0.3646133067120794, + "grad_norm": 0.0, + "learning_rate": 1.4680491504978254e-05, + "loss": 1.3232, + "step": 12418 + }, + { + "epoch": 0.3646426683892184, + "grad_norm": 0.0, + "learning_rate": 1.4679651117828626e-05, + "loss": 1.2852, + "step": 12419 + }, + { + "epoch": 0.3646720300663574, + "grad_norm": 0.0, + "learning_rate": 1.4678810688359717e-05, + "loss": 1.4346, + "step": 12420 + }, + { + "epoch": 0.36470139174349636, + "grad_norm": 0.0, + "learning_rate": 1.4677970216579123e-05, + "loss": 1.4316, + "step": 12421 + }, + { + "epoch": 0.3647307534206354, + "grad_norm": 0.0, + "learning_rate": 1.4677129702494449e-05, + "loss": 1.3662, + "step": 12422 + }, + { + "epoch": 0.3647601150977744, + "grad_norm": 0.0, + "learning_rate": 1.4676289146113289e-05, + "loss": 1.4062, + "step": 12423 + }, + { + "epoch": 0.36478947677491336, + "grad_norm": 0.0, + "learning_rate": 1.4675448547443252e-05, + "loss": 1.2344, + "step": 12424 + }, + { + "epoch": 0.3648188384520524, + "grad_norm": 0.0, + "learning_rate": 1.4674607906491935e-05, + "loss": 1.4053, + "step": 12425 + }, + { + "epoch": 0.3648482001291914, + "grad_norm": 0.0, + "learning_rate": 1.4673767223266944e-05, + "loss": 1.4824, + "step": 12426 + }, + { + "epoch": 0.36487756180633035, + "grad_norm": 0.0, + "learning_rate": 1.4672926497775877e-05, + "loss": 1.3242, + "step": 12427 + }, + { + "epoch": 0.36490692348346937, + "grad_norm": 0.0, + "learning_rate": 1.4672085730026337e-05, + "loss": 1.4395, + "step": 12428 + }, + { + "epoch": 0.3649362851606084, + "grad_norm": 0.0, + "learning_rate": 1.4671244920025932e-05, + "loss": 1.2979, + "step": 12429 + }, + { + "epoch": 0.36496564683774735, + "grad_norm": 0.0, + "learning_rate": 1.4670404067782262e-05, + "loss": 1.3271, + "step": 12430 + }, + { + "epoch": 0.36499500851488637, + "grad_norm": 0.0, + "learning_rate": 1.4669563173302932e-05, + "loss": 1.4229, + "step": 12431 + }, + { + "epoch": 0.3650243701920254, + "grad_norm": 0.0, + "learning_rate": 1.4668722236595544e-05, + "loss": 1.4434, + "step": 12432 + }, + { + "epoch": 0.36505373186916434, + "grad_norm": 0.0, + "learning_rate": 1.466788125766771e-05, + "loss": 1.3506, + "step": 12433 + }, + { + "epoch": 0.36508309354630336, + "grad_norm": 0.0, + "learning_rate": 1.4667040236527027e-05, + "loss": 1.3633, + "step": 12434 + }, + { + "epoch": 0.3651124552234424, + "grad_norm": 0.0, + "learning_rate": 1.4666199173181103e-05, + "loss": 1.311, + "step": 12435 + }, + { + "epoch": 0.36514181690058134, + "grad_norm": 0.0, + "learning_rate": 1.466535806763755e-05, + "loss": 1.4609, + "step": 12436 + }, + { + "epoch": 0.36517117857772036, + "grad_norm": 0.0, + "learning_rate": 1.4664516919903965e-05, + "loss": 1.3672, + "step": 12437 + }, + { + "epoch": 0.3652005402548594, + "grad_norm": 0.0, + "learning_rate": 1.4663675729987961e-05, + "loss": 1.3711, + "step": 12438 + }, + { + "epoch": 0.36522990193199834, + "grad_norm": 0.0, + "learning_rate": 1.4662834497897138e-05, + "loss": 1.3682, + "step": 12439 + }, + { + "epoch": 0.36525926360913735, + "grad_norm": 0.0, + "learning_rate": 1.4661993223639113e-05, + "loss": 1.4775, + "step": 12440 + }, + { + "epoch": 0.36528862528627637, + "grad_norm": 0.0, + "learning_rate": 1.4661151907221488e-05, + "loss": 1.3447, + "step": 12441 + }, + { + "epoch": 0.36531798696341533, + "grad_norm": 0.0, + "learning_rate": 1.4660310548651873e-05, + "loss": 1.335, + "step": 12442 + }, + { + "epoch": 0.36534734864055435, + "grad_norm": 0.0, + "learning_rate": 1.4659469147937876e-05, + "loss": 1.4795, + "step": 12443 + }, + { + "epoch": 0.36537671031769337, + "grad_norm": 0.0, + "learning_rate": 1.4658627705087104e-05, + "loss": 1.2432, + "step": 12444 + }, + { + "epoch": 0.3654060719948323, + "grad_norm": 0.0, + "learning_rate": 1.465778622010717e-05, + "loss": 1.3604, + "step": 12445 + }, + { + "epoch": 0.36543543367197134, + "grad_norm": 0.0, + "learning_rate": 1.4656944693005687e-05, + "loss": 1.374, + "step": 12446 + }, + { + "epoch": 0.36546479534911036, + "grad_norm": 0.0, + "learning_rate": 1.4656103123790257e-05, + "loss": 1.3701, + "step": 12447 + }, + { + "epoch": 0.3654941570262493, + "grad_norm": 0.0, + "learning_rate": 1.4655261512468492e-05, + "loss": 1.4297, + "step": 12448 + }, + { + "epoch": 0.36552351870338834, + "grad_norm": 0.0, + "learning_rate": 1.4654419859048006e-05, + "loss": 1.3984, + "step": 12449 + }, + { + "epoch": 0.36555288038052736, + "grad_norm": 0.0, + "learning_rate": 1.465357816353641e-05, + "loss": 1.4434, + "step": 12450 + }, + { + "epoch": 0.3655822420576663, + "grad_norm": 0.0, + "learning_rate": 1.4652736425941317e-05, + "loss": 1.4023, + "step": 12451 + }, + { + "epoch": 0.36561160373480533, + "grad_norm": 0.0, + "learning_rate": 1.4651894646270333e-05, + "loss": 1.3359, + "step": 12452 + }, + { + "epoch": 0.36564096541194435, + "grad_norm": 0.0, + "learning_rate": 1.4651052824531073e-05, + "loss": 1.374, + "step": 12453 + }, + { + "epoch": 0.3656703270890833, + "grad_norm": 0.0, + "learning_rate": 1.4650210960731157e-05, + "loss": 1.2393, + "step": 12454 + }, + { + "epoch": 0.36569968876622233, + "grad_norm": 0.0, + "learning_rate": 1.4649369054878188e-05, + "loss": 1.3691, + "step": 12455 + }, + { + "epoch": 0.36572905044336135, + "grad_norm": 0.0, + "learning_rate": 1.4648527106979786e-05, + "loss": 1.3994, + "step": 12456 + }, + { + "epoch": 0.3657584121205003, + "grad_norm": 0.0, + "learning_rate": 1.4647685117043563e-05, + "loss": 1.2915, + "step": 12457 + }, + { + "epoch": 0.3657877737976393, + "grad_norm": 0.0, + "learning_rate": 1.4646843085077132e-05, + "loss": 1.5098, + "step": 12458 + }, + { + "epoch": 0.36581713547477834, + "grad_norm": 0.0, + "learning_rate": 1.464600101108811e-05, + "loss": 1.2949, + "step": 12459 + }, + { + "epoch": 0.3658464971519173, + "grad_norm": 0.0, + "learning_rate": 1.4645158895084108e-05, + "loss": 1.46, + "step": 12460 + }, + { + "epoch": 0.3658758588290563, + "grad_norm": 0.0, + "learning_rate": 1.4644316737072748e-05, + "loss": 1.4307, + "step": 12461 + }, + { + "epoch": 0.36590522050619534, + "grad_norm": 0.0, + "learning_rate": 1.4643474537061642e-05, + "loss": 1.3477, + "step": 12462 + }, + { + "epoch": 0.3659345821833343, + "grad_norm": 0.0, + "learning_rate": 1.4642632295058403e-05, + "loss": 1.3574, + "step": 12463 + }, + { + "epoch": 0.3659639438604733, + "grad_norm": 0.0, + "learning_rate": 1.4641790011070653e-05, + "loss": 1.3047, + "step": 12464 + }, + { + "epoch": 0.36599330553761233, + "grad_norm": 0.0, + "learning_rate": 1.4640947685106009e-05, + "loss": 1.3066, + "step": 12465 + }, + { + "epoch": 0.3660226672147513, + "grad_norm": 0.0, + "learning_rate": 1.4640105317172086e-05, + "loss": 1.418, + "step": 12466 + }, + { + "epoch": 0.3660520288918903, + "grad_norm": 0.0, + "learning_rate": 1.4639262907276504e-05, + "loss": 1.3779, + "step": 12467 + }, + { + "epoch": 0.36608139056902933, + "grad_norm": 0.0, + "learning_rate": 1.4638420455426876e-05, + "loss": 1.3867, + "step": 12468 + }, + { + "epoch": 0.3661107522461683, + "grad_norm": 0.0, + "learning_rate": 1.4637577961630824e-05, + "loss": 1.3281, + "step": 12469 + }, + { + "epoch": 0.3661401139233073, + "grad_norm": 0.0, + "learning_rate": 1.4636735425895968e-05, + "loss": 1.4307, + "step": 12470 + }, + { + "epoch": 0.36616947560044627, + "grad_norm": 0.0, + "learning_rate": 1.4635892848229926e-05, + "loss": 1.2236, + "step": 12471 + }, + { + "epoch": 0.3661988372775853, + "grad_norm": 0.0, + "learning_rate": 1.4635050228640318e-05, + "loss": 1.4697, + "step": 12472 + }, + { + "epoch": 0.3662281989547243, + "grad_norm": 0.0, + "learning_rate": 1.4634207567134761e-05, + "loss": 1.3359, + "step": 12473 + }, + { + "epoch": 0.36625756063186327, + "grad_norm": 0.0, + "learning_rate": 1.463336486372088e-05, + "loss": 1.3838, + "step": 12474 + }, + { + "epoch": 0.3662869223090023, + "grad_norm": 0.0, + "learning_rate": 1.4632522118406296e-05, + "loss": 1.4766, + "step": 12475 + }, + { + "epoch": 0.3663162839861413, + "grad_norm": 0.0, + "learning_rate": 1.4631679331198627e-05, + "loss": 1.2886, + "step": 12476 + }, + { + "epoch": 0.36634564566328026, + "grad_norm": 0.0, + "learning_rate": 1.4630836502105494e-05, + "loss": 1.2822, + "step": 12477 + }, + { + "epoch": 0.3663750073404193, + "grad_norm": 0.0, + "learning_rate": 1.462999363113452e-05, + "loss": 1.4326, + "step": 12478 + }, + { + "epoch": 0.3664043690175583, + "grad_norm": 0.0, + "learning_rate": 1.4629150718293332e-05, + "loss": 1.4844, + "step": 12479 + }, + { + "epoch": 0.36643373069469726, + "grad_norm": 0.0, + "learning_rate": 1.4628307763589544e-05, + "loss": 1.3955, + "step": 12480 + }, + { + "epoch": 0.3664630923718363, + "grad_norm": 0.0, + "learning_rate": 1.4627464767030785e-05, + "loss": 1.4219, + "step": 12481 + }, + { + "epoch": 0.3664924540489753, + "grad_norm": 0.0, + "learning_rate": 1.4626621728624677e-05, + "loss": 1.375, + "step": 12482 + }, + { + "epoch": 0.36652181572611425, + "grad_norm": 0.0, + "learning_rate": 1.4625778648378845e-05, + "loss": 1.4463, + "step": 12483 + }, + { + "epoch": 0.36655117740325327, + "grad_norm": 0.0, + "learning_rate": 1.4624935526300912e-05, + "loss": 1.3018, + "step": 12484 + }, + { + "epoch": 0.3665805390803923, + "grad_norm": 0.0, + "learning_rate": 1.46240923623985e-05, + "loss": 1.4756, + "step": 12485 + }, + { + "epoch": 0.36660990075753125, + "grad_norm": 0.0, + "learning_rate": 1.462324915667924e-05, + "loss": 1.4307, + "step": 12486 + }, + { + "epoch": 0.36663926243467027, + "grad_norm": 0.0, + "learning_rate": 1.4622405909150755e-05, + "loss": 1.5361, + "step": 12487 + }, + { + "epoch": 0.3666686241118093, + "grad_norm": 0.0, + "learning_rate": 1.4621562619820666e-05, + "loss": 1.3164, + "step": 12488 + }, + { + "epoch": 0.36669798578894824, + "grad_norm": 0.0, + "learning_rate": 1.4620719288696602e-05, + "loss": 1.3535, + "step": 12489 + }, + { + "epoch": 0.36672734746608726, + "grad_norm": 0.0, + "learning_rate": 1.461987591578619e-05, + "loss": 1.3008, + "step": 12490 + }, + { + "epoch": 0.3667567091432263, + "grad_norm": 0.0, + "learning_rate": 1.4619032501097059e-05, + "loss": 1.3809, + "step": 12491 + }, + { + "epoch": 0.36678607082036524, + "grad_norm": 0.0, + "learning_rate": 1.4618189044636832e-05, + "loss": 1.2295, + "step": 12492 + }, + { + "epoch": 0.36681543249750426, + "grad_norm": 0.0, + "learning_rate": 1.4617345546413142e-05, + "loss": 1.2344, + "step": 12493 + }, + { + "epoch": 0.3668447941746433, + "grad_norm": 0.0, + "learning_rate": 1.461650200643361e-05, + "loss": 1.3643, + "step": 12494 + }, + { + "epoch": 0.36687415585178224, + "grad_norm": 0.0, + "learning_rate": 1.4615658424705872e-05, + "loss": 1.3232, + "step": 12495 + }, + { + "epoch": 0.36690351752892125, + "grad_norm": 0.0, + "learning_rate": 1.4614814801237551e-05, + "loss": 1.4902, + "step": 12496 + }, + { + "epoch": 0.36693287920606027, + "grad_norm": 0.0, + "learning_rate": 1.4613971136036278e-05, + "loss": 1.375, + "step": 12497 + }, + { + "epoch": 0.36696224088319923, + "grad_norm": 0.0, + "learning_rate": 1.461312742910968e-05, + "loss": 1.3525, + "step": 12498 + }, + { + "epoch": 0.36699160256033825, + "grad_norm": 0.0, + "learning_rate": 1.4612283680465391e-05, + "loss": 1.3525, + "step": 12499 + }, + { + "epoch": 0.36702096423747727, + "grad_norm": 0.0, + "learning_rate": 1.4611439890111041e-05, + "loss": 1.415, + "step": 12500 + }, + { + "epoch": 0.3670503259146162, + "grad_norm": 0.0, + "learning_rate": 1.4610596058054259e-05, + "loss": 1.3818, + "step": 12501 + }, + { + "epoch": 0.36707968759175524, + "grad_norm": 0.0, + "learning_rate": 1.4609752184302672e-05, + "loss": 1.3481, + "step": 12502 + }, + { + "epoch": 0.36710904926889426, + "grad_norm": 0.0, + "learning_rate": 1.460890826886392e-05, + "loss": 1.4971, + "step": 12503 + }, + { + "epoch": 0.3671384109460332, + "grad_norm": 0.0, + "learning_rate": 1.460806431174563e-05, + "loss": 1.3125, + "step": 12504 + }, + { + "epoch": 0.36716777262317224, + "grad_norm": 0.0, + "learning_rate": 1.4607220312955432e-05, + "loss": 1.293, + "step": 12505 + }, + { + "epoch": 0.36719713430031126, + "grad_norm": 0.0, + "learning_rate": 1.4606376272500962e-05, + "loss": 1.4014, + "step": 12506 + }, + { + "epoch": 0.3672264959774502, + "grad_norm": 0.0, + "learning_rate": 1.4605532190389849e-05, + "loss": 1.3848, + "step": 12507 + }, + { + "epoch": 0.36725585765458924, + "grad_norm": 0.0, + "learning_rate": 1.4604688066629732e-05, + "loss": 1.2271, + "step": 12508 + }, + { + "epoch": 0.36728521933172825, + "grad_norm": 0.0, + "learning_rate": 1.4603843901228241e-05, + "loss": 1.3125, + "step": 12509 + }, + { + "epoch": 0.3673145810088672, + "grad_norm": 0.0, + "learning_rate": 1.4602999694193009e-05, + "loss": 1.2388, + "step": 12510 + }, + { + "epoch": 0.36734394268600623, + "grad_norm": 0.0, + "learning_rate": 1.4602155445531672e-05, + "loss": 1.332, + "step": 12511 + }, + { + "epoch": 0.36737330436314525, + "grad_norm": 0.0, + "learning_rate": 1.4601311155251868e-05, + "loss": 1.4961, + "step": 12512 + }, + { + "epoch": 0.3674026660402842, + "grad_norm": 0.0, + "learning_rate": 1.4600466823361226e-05, + "loss": 1.2676, + "step": 12513 + }, + { + "epoch": 0.3674320277174232, + "grad_norm": 0.0, + "learning_rate": 1.4599622449867384e-05, + "loss": 1.4023, + "step": 12514 + }, + { + "epoch": 0.36746138939456224, + "grad_norm": 0.0, + "learning_rate": 1.4598778034777978e-05, + "loss": 1.4824, + "step": 12515 + }, + { + "epoch": 0.3674907510717012, + "grad_norm": 0.0, + "learning_rate": 1.4597933578100649e-05, + "loss": 1.3633, + "step": 12516 + }, + { + "epoch": 0.3675201127488402, + "grad_norm": 0.0, + "learning_rate": 1.4597089079843026e-05, + "loss": 1.3701, + "step": 12517 + }, + { + "epoch": 0.36754947442597924, + "grad_norm": 0.0, + "learning_rate": 1.4596244540012746e-05, + "loss": 1.2344, + "step": 12518 + }, + { + "epoch": 0.3675788361031182, + "grad_norm": 0.0, + "learning_rate": 1.459539995861745e-05, + "loss": 1.1475, + "step": 12519 + }, + { + "epoch": 0.3676081977802572, + "grad_norm": 0.0, + "learning_rate": 1.4594555335664779e-05, + "loss": 1.3691, + "step": 12520 + }, + { + "epoch": 0.3676375594573962, + "grad_norm": 0.0, + "learning_rate": 1.4593710671162366e-05, + "loss": 1.2354, + "step": 12521 + }, + { + "epoch": 0.3676669211345352, + "grad_norm": 0.0, + "learning_rate": 1.459286596511785e-05, + "loss": 1.3623, + "step": 12522 + }, + { + "epoch": 0.3676962828116742, + "grad_norm": 0.0, + "learning_rate": 1.4592021217538872e-05, + "loss": 1.4551, + "step": 12523 + }, + { + "epoch": 0.3677256444888132, + "grad_norm": 0.0, + "learning_rate": 1.459117642843307e-05, + "loss": 1.2241, + "step": 12524 + }, + { + "epoch": 0.3677550061659522, + "grad_norm": 0.0, + "learning_rate": 1.4590331597808085e-05, + "loss": 1.4043, + "step": 12525 + }, + { + "epoch": 0.3677843678430912, + "grad_norm": 0.0, + "learning_rate": 1.4589486725671556e-05, + "loss": 1.2275, + "step": 12526 + }, + { + "epoch": 0.36781372952023017, + "grad_norm": 0.0, + "learning_rate": 1.4588641812031118e-05, + "loss": 1.4473, + "step": 12527 + }, + { + "epoch": 0.3678430911973692, + "grad_norm": 0.0, + "learning_rate": 1.4587796856894424e-05, + "loss": 1.3242, + "step": 12528 + }, + { + "epoch": 0.3678724528745082, + "grad_norm": 0.0, + "learning_rate": 1.4586951860269103e-05, + "loss": 1.3887, + "step": 12529 + }, + { + "epoch": 0.36790181455164717, + "grad_norm": 0.0, + "learning_rate": 1.4586106822162802e-05, + "loss": 1.3633, + "step": 12530 + }, + { + "epoch": 0.3679311762287862, + "grad_norm": 0.0, + "learning_rate": 1.4585261742583164e-05, + "loss": 1.3428, + "step": 12531 + }, + { + "epoch": 0.3679605379059252, + "grad_norm": 0.0, + "learning_rate": 1.458441662153783e-05, + "loss": 1.4287, + "step": 12532 + }, + { + "epoch": 0.36798989958306416, + "grad_norm": 0.0, + "learning_rate": 1.458357145903444e-05, + "loss": 1.3896, + "step": 12533 + }, + { + "epoch": 0.3680192612602032, + "grad_norm": 0.0, + "learning_rate": 1.4582726255080641e-05, + "loss": 1.3545, + "step": 12534 + }, + { + "epoch": 0.3680486229373422, + "grad_norm": 0.0, + "learning_rate": 1.4581881009684075e-05, + "loss": 1.374, + "step": 12535 + }, + { + "epoch": 0.36807798461448116, + "grad_norm": 0.0, + "learning_rate": 1.4581035722852385e-05, + "loss": 1.3213, + "step": 12536 + }, + { + "epoch": 0.3681073462916202, + "grad_norm": 0.0, + "learning_rate": 1.4580190394593218e-05, + "loss": 1.2769, + "step": 12537 + }, + { + "epoch": 0.3681367079687592, + "grad_norm": 0.0, + "learning_rate": 1.4579345024914213e-05, + "loss": 1.3711, + "step": 12538 + }, + { + "epoch": 0.36816606964589815, + "grad_norm": 0.0, + "learning_rate": 1.457849961382302e-05, + "loss": 1.3833, + "step": 12539 + }, + { + "epoch": 0.36819543132303717, + "grad_norm": 0.0, + "learning_rate": 1.4577654161327284e-05, + "loss": 1.4385, + "step": 12540 + }, + { + "epoch": 0.3682247930001762, + "grad_norm": 0.0, + "learning_rate": 1.4576808667434645e-05, + "loss": 1.3789, + "step": 12541 + }, + { + "epoch": 0.36825415467731515, + "grad_norm": 0.0, + "learning_rate": 1.4575963132152755e-05, + "loss": 1.2393, + "step": 12542 + }, + { + "epoch": 0.36828351635445417, + "grad_norm": 0.0, + "learning_rate": 1.457511755548926e-05, + "loss": 1.415, + "step": 12543 + }, + { + "epoch": 0.3683128780315932, + "grad_norm": 0.0, + "learning_rate": 1.4574271937451805e-05, + "loss": 1.4316, + "step": 12544 + }, + { + "epoch": 0.36834223970873214, + "grad_norm": 0.0, + "learning_rate": 1.4573426278048038e-05, + "loss": 1.4463, + "step": 12545 + }, + { + "epoch": 0.36837160138587116, + "grad_norm": 0.0, + "learning_rate": 1.4572580577285602e-05, + "loss": 1.3486, + "step": 12546 + }, + { + "epoch": 0.3684009630630102, + "grad_norm": 0.0, + "learning_rate": 1.4571734835172151e-05, + "loss": 1.5078, + "step": 12547 + }, + { + "epoch": 0.36843032474014914, + "grad_norm": 0.0, + "learning_rate": 1.4570889051715332e-05, + "loss": 1.4883, + "step": 12548 + }, + { + "epoch": 0.36845968641728816, + "grad_norm": 0.0, + "learning_rate": 1.4570043226922792e-05, + "loss": 1.4473, + "step": 12549 + }, + { + "epoch": 0.3684890480944272, + "grad_norm": 0.0, + "learning_rate": 1.4569197360802179e-05, + "loss": 1.2754, + "step": 12550 + }, + { + "epoch": 0.36851840977156614, + "grad_norm": 0.0, + "learning_rate": 1.4568351453361144e-05, + "loss": 1.3682, + "step": 12551 + }, + { + "epoch": 0.36854777144870515, + "grad_norm": 0.0, + "learning_rate": 1.456750550460734e-05, + "loss": 1.4072, + "step": 12552 + }, + { + "epoch": 0.36857713312584417, + "grad_norm": 0.0, + "learning_rate": 1.456665951454841e-05, + "loss": 1.5352, + "step": 12553 + }, + { + "epoch": 0.36860649480298313, + "grad_norm": 0.0, + "learning_rate": 1.456581348319201e-05, + "loss": 1.2437, + "step": 12554 + }, + { + "epoch": 0.36863585648012215, + "grad_norm": 0.0, + "learning_rate": 1.4564967410545794e-05, + "loss": 1.4316, + "step": 12555 + }, + { + "epoch": 0.36866521815726117, + "grad_norm": 0.0, + "learning_rate": 1.4564121296617403e-05, + "loss": 1.3496, + "step": 12556 + }, + { + "epoch": 0.3686945798344001, + "grad_norm": 0.0, + "learning_rate": 1.4563275141414497e-05, + "loss": 1.272, + "step": 12557 + }, + { + "epoch": 0.36872394151153914, + "grad_norm": 0.0, + "learning_rate": 1.4562428944944724e-05, + "loss": 1.3086, + "step": 12558 + }, + { + "epoch": 0.36875330318867816, + "grad_norm": 0.0, + "learning_rate": 1.4561582707215738e-05, + "loss": 1.3545, + "step": 12559 + }, + { + "epoch": 0.3687826648658171, + "grad_norm": 0.0, + "learning_rate": 1.4560736428235189e-05, + "loss": 1.2988, + "step": 12560 + }, + { + "epoch": 0.36881202654295614, + "grad_norm": 0.0, + "learning_rate": 1.4559890108010736e-05, + "loss": 1.416, + "step": 12561 + }, + { + "epoch": 0.36884138822009516, + "grad_norm": 0.0, + "learning_rate": 1.4559043746550028e-05, + "loss": 1.3604, + "step": 12562 + }, + { + "epoch": 0.3688707498972341, + "grad_norm": 0.0, + "learning_rate": 1.4558197343860718e-05, + "loss": 1.4141, + "step": 12563 + }, + { + "epoch": 0.36890011157437314, + "grad_norm": 0.0, + "learning_rate": 1.4557350899950464e-05, + "loss": 1.3682, + "step": 12564 + }, + { + "epoch": 0.36892947325151215, + "grad_norm": 0.0, + "learning_rate": 1.455650441482692e-05, + "loss": 1.3535, + "step": 12565 + }, + { + "epoch": 0.3689588349286511, + "grad_norm": 0.0, + "learning_rate": 1.4555657888497737e-05, + "loss": 1.4453, + "step": 12566 + }, + { + "epoch": 0.36898819660579013, + "grad_norm": 0.0, + "learning_rate": 1.4554811320970572e-05, + "loss": 1.2373, + "step": 12567 + }, + { + "epoch": 0.36901755828292915, + "grad_norm": 0.0, + "learning_rate": 1.4553964712253083e-05, + "loss": 1.4404, + "step": 12568 + }, + { + "epoch": 0.3690469199600681, + "grad_norm": 0.0, + "learning_rate": 1.4553118062352927e-05, + "loss": 1.3691, + "step": 12569 + }, + { + "epoch": 0.3690762816372071, + "grad_norm": 0.0, + "learning_rate": 1.4552271371277755e-05, + "loss": 1.2227, + "step": 12570 + }, + { + "epoch": 0.3691056433143461, + "grad_norm": 0.0, + "learning_rate": 1.4551424639035228e-05, + "loss": 1.1963, + "step": 12571 + }, + { + "epoch": 0.3691350049914851, + "grad_norm": 0.0, + "learning_rate": 1.4550577865633004e-05, + "loss": 1.3809, + "step": 12572 + }, + { + "epoch": 0.3691643666686241, + "grad_norm": 0.0, + "learning_rate": 1.454973105107874e-05, + "loss": 1.415, + "step": 12573 + }, + { + "epoch": 0.3691937283457631, + "grad_norm": 0.0, + "learning_rate": 1.4548884195380091e-05, + "loss": 1.3975, + "step": 12574 + }, + { + "epoch": 0.3692230900229021, + "grad_norm": 0.0, + "learning_rate": 1.4548037298544717e-05, + "loss": 1.3916, + "step": 12575 + }, + { + "epoch": 0.3692524517000411, + "grad_norm": 0.0, + "learning_rate": 1.4547190360580275e-05, + "loss": 1.4033, + "step": 12576 + }, + { + "epoch": 0.3692818133771801, + "grad_norm": 0.0, + "learning_rate": 1.454634338149443e-05, + "loss": 1.3252, + "step": 12577 + }, + { + "epoch": 0.3693111750543191, + "grad_norm": 0.0, + "learning_rate": 1.4545496361294835e-05, + "loss": 1.4199, + "step": 12578 + }, + { + "epoch": 0.3693405367314581, + "grad_norm": 0.0, + "learning_rate": 1.4544649299989151e-05, + "loss": 1.4404, + "step": 12579 + }, + { + "epoch": 0.3693698984085971, + "grad_norm": 0.0, + "learning_rate": 1.454380219758504e-05, + "loss": 1.4697, + "step": 12580 + }, + { + "epoch": 0.3693992600857361, + "grad_norm": 0.0, + "learning_rate": 1.4542955054090167e-05, + "loss": 1.5679, + "step": 12581 + }, + { + "epoch": 0.3694286217628751, + "grad_norm": 0.0, + "learning_rate": 1.4542107869512184e-05, + "loss": 1.3438, + "step": 12582 + }, + { + "epoch": 0.36945798344001407, + "grad_norm": 0.0, + "learning_rate": 1.4541260643858754e-05, + "loss": 1.334, + "step": 12583 + }, + { + "epoch": 0.3694873451171531, + "grad_norm": 0.0, + "learning_rate": 1.4540413377137545e-05, + "loss": 1.3291, + "step": 12584 + }, + { + "epoch": 0.3695167067942921, + "grad_norm": 0.0, + "learning_rate": 1.4539566069356212e-05, + "loss": 1.4121, + "step": 12585 + }, + { + "epoch": 0.36954606847143107, + "grad_norm": 0.0, + "learning_rate": 1.4538718720522425e-05, + "loss": 1.3799, + "step": 12586 + }, + { + "epoch": 0.3695754301485701, + "grad_norm": 0.0, + "learning_rate": 1.4537871330643838e-05, + "loss": 1.2842, + "step": 12587 + }, + { + "epoch": 0.3696047918257091, + "grad_norm": 0.0, + "learning_rate": 1.4537023899728118e-05, + "loss": 1.3135, + "step": 12588 + }, + { + "epoch": 0.36963415350284806, + "grad_norm": 0.0, + "learning_rate": 1.4536176427782929e-05, + "loss": 1.3799, + "step": 12589 + }, + { + "epoch": 0.3696635151799871, + "grad_norm": 0.0, + "learning_rate": 1.4535328914815938e-05, + "loss": 1.2568, + "step": 12590 + }, + { + "epoch": 0.3696928768571261, + "grad_norm": 0.0, + "learning_rate": 1.4534481360834805e-05, + "loss": 1.3584, + "step": 12591 + }, + { + "epoch": 0.36972223853426506, + "grad_norm": 0.0, + "learning_rate": 1.4533633765847195e-05, + "loss": 1.3447, + "step": 12592 + }, + { + "epoch": 0.3697516002114041, + "grad_norm": 0.0, + "learning_rate": 1.4532786129860773e-05, + "loss": 1.4668, + "step": 12593 + }, + { + "epoch": 0.3697809618885431, + "grad_norm": 0.0, + "learning_rate": 1.453193845288321e-05, + "loss": 1.3691, + "step": 12594 + }, + { + "epoch": 0.36981032356568205, + "grad_norm": 0.0, + "learning_rate": 1.453109073492216e-05, + "loss": 1.4062, + "step": 12595 + }, + { + "epoch": 0.36983968524282107, + "grad_norm": 0.0, + "learning_rate": 1.4530242975985301e-05, + "loss": 1.3604, + "step": 12596 + }, + { + "epoch": 0.3698690469199601, + "grad_norm": 0.0, + "learning_rate": 1.4529395176080289e-05, + "loss": 1.4258, + "step": 12597 + }, + { + "epoch": 0.36989840859709905, + "grad_norm": 0.0, + "learning_rate": 1.4528547335214802e-05, + "loss": 1.4121, + "step": 12598 + }, + { + "epoch": 0.36992777027423807, + "grad_norm": 0.0, + "learning_rate": 1.45276994533965e-05, + "loss": 1.335, + "step": 12599 + }, + { + "epoch": 0.3699571319513771, + "grad_norm": 0.0, + "learning_rate": 1.452685153063305e-05, + "loss": 1.3096, + "step": 12600 + }, + { + "epoch": 0.36998649362851604, + "grad_norm": 0.0, + "learning_rate": 1.4526003566932126e-05, + "loss": 1.4004, + "step": 12601 + }, + { + "epoch": 0.37001585530565506, + "grad_norm": 0.0, + "learning_rate": 1.4525155562301392e-05, + "loss": 1.2627, + "step": 12602 + }, + { + "epoch": 0.3700452169827941, + "grad_norm": 0.0, + "learning_rate": 1.4524307516748514e-05, + "loss": 1.3848, + "step": 12603 + }, + { + "epoch": 0.37007457865993304, + "grad_norm": 0.0, + "learning_rate": 1.4523459430281169e-05, + "loss": 1.3486, + "step": 12604 + }, + { + "epoch": 0.37010394033707206, + "grad_norm": 0.0, + "learning_rate": 1.452261130290702e-05, + "loss": 1.3525, + "step": 12605 + }, + { + "epoch": 0.3701333020142111, + "grad_norm": 0.0, + "learning_rate": 1.4521763134633738e-05, + "loss": 1.1982, + "step": 12606 + }, + { + "epoch": 0.37016266369135004, + "grad_norm": 0.0, + "learning_rate": 1.4520914925468995e-05, + "loss": 1.3672, + "step": 12607 + }, + { + "epoch": 0.37019202536848905, + "grad_norm": 0.0, + "learning_rate": 1.4520066675420457e-05, + "loss": 1.6348, + "step": 12608 + }, + { + "epoch": 0.37022138704562807, + "grad_norm": 0.0, + "learning_rate": 1.4519218384495801e-05, + "loss": 1.4121, + "step": 12609 + }, + { + "epoch": 0.37025074872276703, + "grad_norm": 0.0, + "learning_rate": 1.4518370052702698e-05, + "loss": 1.4014, + "step": 12610 + }, + { + "epoch": 0.37028011039990605, + "grad_norm": 0.0, + "learning_rate": 1.4517521680048815e-05, + "loss": 1.334, + "step": 12611 + }, + { + "epoch": 0.37030947207704507, + "grad_norm": 0.0, + "learning_rate": 1.4516673266541825e-05, + "loss": 1.2949, + "step": 12612 + }, + { + "epoch": 0.370338833754184, + "grad_norm": 0.0, + "learning_rate": 1.4515824812189403e-05, + "loss": 1.4414, + "step": 12613 + }, + { + "epoch": 0.37036819543132304, + "grad_norm": 0.0, + "learning_rate": 1.4514976316999223e-05, + "loss": 1.2866, + "step": 12614 + }, + { + "epoch": 0.37039755710846206, + "grad_norm": 0.0, + "learning_rate": 1.4514127780978954e-05, + "loss": 1.4102, + "step": 12615 + }, + { + "epoch": 0.370426918785601, + "grad_norm": 0.0, + "learning_rate": 1.4513279204136273e-05, + "loss": 1.3506, + "step": 12616 + }, + { + "epoch": 0.37045628046274004, + "grad_norm": 0.0, + "learning_rate": 1.4512430586478852e-05, + "loss": 1.3936, + "step": 12617 + }, + { + "epoch": 0.37048564213987906, + "grad_norm": 0.0, + "learning_rate": 1.4511581928014364e-05, + "loss": 1.415, + "step": 12618 + }, + { + "epoch": 0.370515003817018, + "grad_norm": 0.0, + "learning_rate": 1.4510733228750487e-05, + "loss": 1.2725, + "step": 12619 + }, + { + "epoch": 0.37054436549415704, + "grad_norm": 0.0, + "learning_rate": 1.4509884488694889e-05, + "loss": 1.3286, + "step": 12620 + }, + { + "epoch": 0.370573727171296, + "grad_norm": 0.0, + "learning_rate": 1.4509035707855255e-05, + "loss": 1.2754, + "step": 12621 + }, + { + "epoch": 0.370603088848435, + "grad_norm": 0.0, + "learning_rate": 1.450818688623926e-05, + "loss": 1.4395, + "step": 12622 + }, + { + "epoch": 0.37063245052557403, + "grad_norm": 0.0, + "learning_rate": 1.4507338023854572e-05, + "loss": 1.4424, + "step": 12623 + }, + { + "epoch": 0.370661812202713, + "grad_norm": 0.0, + "learning_rate": 1.450648912070887e-05, + "loss": 1.2939, + "step": 12624 + }, + { + "epoch": 0.370691173879852, + "grad_norm": 0.0, + "learning_rate": 1.4505640176809834e-05, + "loss": 1.4072, + "step": 12625 + }, + { + "epoch": 0.370720535556991, + "grad_norm": 0.0, + "learning_rate": 1.4504791192165142e-05, + "loss": 1.2324, + "step": 12626 + }, + { + "epoch": 0.37074989723413, + "grad_norm": 0.0, + "learning_rate": 1.450394216678247e-05, + "loss": 1.3994, + "step": 12627 + }, + { + "epoch": 0.370779258911269, + "grad_norm": 0.0, + "learning_rate": 1.4503093100669494e-05, + "loss": 1.4287, + "step": 12628 + }, + { + "epoch": 0.370808620588408, + "grad_norm": 0.0, + "learning_rate": 1.4502243993833892e-05, + "loss": 1.3643, + "step": 12629 + }, + { + "epoch": 0.370837982265547, + "grad_norm": 0.0, + "learning_rate": 1.4501394846283347e-05, + "loss": 1.2271, + "step": 12630 + }, + { + "epoch": 0.370867343942686, + "grad_norm": 0.0, + "learning_rate": 1.4500545658025535e-05, + "loss": 1.4248, + "step": 12631 + }, + { + "epoch": 0.370896705619825, + "grad_norm": 0.0, + "learning_rate": 1.4499696429068134e-05, + "loss": 1.3018, + "step": 12632 + }, + { + "epoch": 0.370926067296964, + "grad_norm": 0.0, + "learning_rate": 1.449884715941883e-05, + "loss": 1.3193, + "step": 12633 + }, + { + "epoch": 0.370955428974103, + "grad_norm": 0.0, + "learning_rate": 1.4497997849085297e-05, + "loss": 1.2646, + "step": 12634 + }, + { + "epoch": 0.370984790651242, + "grad_norm": 0.0, + "learning_rate": 1.4497148498075217e-05, + "loss": 1.4775, + "step": 12635 + }, + { + "epoch": 0.371014152328381, + "grad_norm": 0.0, + "learning_rate": 1.4496299106396272e-05, + "loss": 1.4688, + "step": 12636 + }, + { + "epoch": 0.37104351400552, + "grad_norm": 0.0, + "learning_rate": 1.4495449674056142e-05, + "loss": 1.373, + "step": 12637 + }, + { + "epoch": 0.371072875682659, + "grad_norm": 0.0, + "learning_rate": 1.4494600201062508e-05, + "loss": 1.335, + "step": 12638 + }, + { + "epoch": 0.37110223735979797, + "grad_norm": 0.0, + "learning_rate": 1.4493750687423056e-05, + "loss": 1.3105, + "step": 12639 + }, + { + "epoch": 0.371131599036937, + "grad_norm": 0.0, + "learning_rate": 1.4492901133145462e-05, + "loss": 1.4219, + "step": 12640 + }, + { + "epoch": 0.371160960714076, + "grad_norm": 0.0, + "learning_rate": 1.4492051538237416e-05, + "loss": 1.4238, + "step": 12641 + }, + { + "epoch": 0.37119032239121497, + "grad_norm": 0.0, + "learning_rate": 1.4491201902706597e-05, + "loss": 1.3887, + "step": 12642 + }, + { + "epoch": 0.371219684068354, + "grad_norm": 0.0, + "learning_rate": 1.4490352226560691e-05, + "loss": 1.3389, + "step": 12643 + }, + { + "epoch": 0.371249045745493, + "grad_norm": 0.0, + "learning_rate": 1.4489502509807375e-05, + "loss": 1.4902, + "step": 12644 + }, + { + "epoch": 0.37127840742263196, + "grad_norm": 0.0, + "learning_rate": 1.4488652752454342e-05, + "loss": 1.251, + "step": 12645 + }, + { + "epoch": 0.371307769099771, + "grad_norm": 0.0, + "learning_rate": 1.448780295450927e-05, + "loss": 1.3701, + "step": 12646 + }, + { + "epoch": 0.37133713077691, + "grad_norm": 0.0, + "learning_rate": 1.4486953115979848e-05, + "loss": 1.4639, + "step": 12647 + }, + { + "epoch": 0.37136649245404896, + "grad_norm": 0.0, + "learning_rate": 1.4486103236873758e-05, + "loss": 1.4131, + "step": 12648 + }, + { + "epoch": 0.371395854131188, + "grad_norm": 0.0, + "learning_rate": 1.4485253317198689e-05, + "loss": 1.3252, + "step": 12649 + }, + { + "epoch": 0.371425215808327, + "grad_norm": 0.0, + "learning_rate": 1.4484403356962325e-05, + "loss": 1.2754, + "step": 12650 + }, + { + "epoch": 0.37145457748546595, + "grad_norm": 0.0, + "learning_rate": 1.4483553356172353e-05, + "loss": 1.4229, + "step": 12651 + }, + { + "epoch": 0.37148393916260497, + "grad_norm": 0.0, + "learning_rate": 1.4482703314836459e-05, + "loss": 1.4717, + "step": 12652 + }, + { + "epoch": 0.371513300839744, + "grad_norm": 0.0, + "learning_rate": 1.4481853232962335e-05, + "loss": 1.4258, + "step": 12653 + }, + { + "epoch": 0.37154266251688295, + "grad_norm": 0.0, + "learning_rate": 1.4481003110557658e-05, + "loss": 1.2451, + "step": 12654 + }, + { + "epoch": 0.37157202419402197, + "grad_norm": 0.0, + "learning_rate": 1.4480152947630129e-05, + "loss": 1.4102, + "step": 12655 + }, + { + "epoch": 0.371601385871161, + "grad_norm": 0.0, + "learning_rate": 1.4479302744187424e-05, + "loss": 1.5029, + "step": 12656 + }, + { + "epoch": 0.37163074754829994, + "grad_norm": 0.0, + "learning_rate": 1.4478452500237239e-05, + "loss": 1.1851, + "step": 12657 + }, + { + "epoch": 0.37166010922543896, + "grad_norm": 0.0, + "learning_rate": 1.4477602215787261e-05, + "loss": 1.2969, + "step": 12658 + }, + { + "epoch": 0.371689470902578, + "grad_norm": 0.0, + "learning_rate": 1.4476751890845182e-05, + "loss": 1.417, + "step": 12659 + }, + { + "epoch": 0.37171883257971694, + "grad_norm": 0.0, + "learning_rate": 1.4475901525418685e-05, + "loss": 1.4863, + "step": 12660 + }, + { + "epoch": 0.37174819425685596, + "grad_norm": 0.0, + "learning_rate": 1.4475051119515467e-05, + "loss": 1.2344, + "step": 12661 + }, + { + "epoch": 0.371777555933995, + "grad_norm": 0.0, + "learning_rate": 1.4474200673143213e-05, + "loss": 1.416, + "step": 12662 + }, + { + "epoch": 0.37180691761113394, + "grad_norm": 0.0, + "learning_rate": 1.447335018630962e-05, + "loss": 1.2456, + "step": 12663 + }, + { + "epoch": 0.37183627928827295, + "grad_norm": 0.0, + "learning_rate": 1.4472499659022375e-05, + "loss": 1.4189, + "step": 12664 + }, + { + "epoch": 0.37186564096541197, + "grad_norm": 0.0, + "learning_rate": 1.4471649091289169e-05, + "loss": 1.4258, + "step": 12665 + }, + { + "epoch": 0.37189500264255093, + "grad_norm": 0.0, + "learning_rate": 1.4470798483117694e-05, + "loss": 1.2646, + "step": 12666 + }, + { + "epoch": 0.37192436431968995, + "grad_norm": 0.0, + "learning_rate": 1.4469947834515646e-05, + "loss": 1.4365, + "step": 12667 + }, + { + "epoch": 0.37195372599682897, + "grad_norm": 0.0, + "learning_rate": 1.4469097145490711e-05, + "loss": 1.3936, + "step": 12668 + }, + { + "epoch": 0.3719830876739679, + "grad_norm": 0.0, + "learning_rate": 1.4468246416050589e-05, + "loss": 1.4033, + "step": 12669 + }, + { + "epoch": 0.37201244935110694, + "grad_norm": 0.0, + "learning_rate": 1.4467395646202968e-05, + "loss": 1.3867, + "step": 12670 + }, + { + "epoch": 0.3720418110282459, + "grad_norm": 0.0, + "learning_rate": 1.4466544835955547e-05, + "loss": 1.2822, + "step": 12671 + }, + { + "epoch": 0.3720711727053849, + "grad_norm": 0.0, + "learning_rate": 1.4465693985316014e-05, + "loss": 1.4307, + "step": 12672 + }, + { + "epoch": 0.37210053438252394, + "grad_norm": 0.0, + "learning_rate": 1.4464843094292072e-05, + "loss": 1.3672, + "step": 12673 + }, + { + "epoch": 0.3721298960596629, + "grad_norm": 0.0, + "learning_rate": 1.4463992162891404e-05, + "loss": 1.3467, + "step": 12674 + }, + { + "epoch": 0.3721592577368019, + "grad_norm": 0.0, + "learning_rate": 1.4463141191121715e-05, + "loss": 1.3926, + "step": 12675 + }, + { + "epoch": 0.37218861941394094, + "grad_norm": 0.0, + "learning_rate": 1.4462290178990696e-05, + "loss": 1.4795, + "step": 12676 + }, + { + "epoch": 0.3722179810910799, + "grad_norm": 0.0, + "learning_rate": 1.4461439126506048e-05, + "loss": 1.3779, + "step": 12677 + }, + { + "epoch": 0.3722473427682189, + "grad_norm": 0.0, + "learning_rate": 1.4460588033675457e-05, + "loss": 1.5234, + "step": 12678 + }, + { + "epoch": 0.37227670444535793, + "grad_norm": 0.0, + "learning_rate": 1.4459736900506628e-05, + "loss": 1.459, + "step": 12679 + }, + { + "epoch": 0.3723060661224969, + "grad_norm": 0.0, + "learning_rate": 1.445888572700726e-05, + "loss": 1.4111, + "step": 12680 + }, + { + "epoch": 0.3723354277996359, + "grad_norm": 0.0, + "learning_rate": 1.4458034513185042e-05, + "loss": 1.2441, + "step": 12681 + }, + { + "epoch": 0.3723647894767749, + "grad_norm": 0.0, + "learning_rate": 1.4457183259047678e-05, + "loss": 1.3691, + "step": 12682 + }, + { + "epoch": 0.3723941511539139, + "grad_norm": 0.0, + "learning_rate": 1.4456331964602866e-05, + "loss": 1.2979, + "step": 12683 + }, + { + "epoch": 0.3724235128310529, + "grad_norm": 0.0, + "learning_rate": 1.44554806298583e-05, + "loss": 1.4229, + "step": 12684 + }, + { + "epoch": 0.3724528745081919, + "grad_norm": 0.0, + "learning_rate": 1.4454629254821683e-05, + "loss": 1.2637, + "step": 12685 + }, + { + "epoch": 0.3724822361853309, + "grad_norm": 0.0, + "learning_rate": 1.4453777839500711e-05, + "loss": 1.3027, + "step": 12686 + }, + { + "epoch": 0.3725115978624699, + "grad_norm": 0.0, + "learning_rate": 1.4452926383903085e-05, + "loss": 1.4199, + "step": 12687 + }, + { + "epoch": 0.3725409595396089, + "grad_norm": 0.0, + "learning_rate": 1.4452074888036508e-05, + "loss": 1.2031, + "step": 12688 + }, + { + "epoch": 0.3725703212167479, + "grad_norm": 0.0, + "learning_rate": 1.4451223351908677e-05, + "loss": 1.4941, + "step": 12689 + }, + { + "epoch": 0.3725996828938869, + "grad_norm": 0.0, + "learning_rate": 1.4450371775527292e-05, + "loss": 1.3467, + "step": 12690 + }, + { + "epoch": 0.3726290445710259, + "grad_norm": 0.0, + "learning_rate": 1.4449520158900056e-05, + "loss": 1.4326, + "step": 12691 + }, + { + "epoch": 0.3726584062481649, + "grad_norm": 0.0, + "learning_rate": 1.4448668502034673e-05, + "loss": 1.4111, + "step": 12692 + }, + { + "epoch": 0.3726877679253039, + "grad_norm": 0.0, + "learning_rate": 1.4447816804938838e-05, + "loss": 1.3711, + "step": 12693 + }, + { + "epoch": 0.3727171296024429, + "grad_norm": 0.0, + "learning_rate": 1.4446965067620256e-05, + "loss": 1.3301, + "step": 12694 + }, + { + "epoch": 0.37274649127958187, + "grad_norm": 0.0, + "learning_rate": 1.444611329008663e-05, + "loss": 1.3047, + "step": 12695 + }, + { + "epoch": 0.3727758529567209, + "grad_norm": 0.0, + "learning_rate": 1.4445261472345664e-05, + "loss": 1.2578, + "step": 12696 + }, + { + "epoch": 0.3728052146338599, + "grad_norm": 0.0, + "learning_rate": 1.444440961440506e-05, + "loss": 1.3848, + "step": 12697 + }, + { + "epoch": 0.37283457631099887, + "grad_norm": 0.0, + "learning_rate": 1.444355771627252e-05, + "loss": 1.3887, + "step": 12698 + }, + { + "epoch": 0.3728639379881379, + "grad_norm": 0.0, + "learning_rate": 1.444270577795575e-05, + "loss": 1.2095, + "step": 12699 + }, + { + "epoch": 0.3728932996652769, + "grad_norm": 0.0, + "learning_rate": 1.4441853799462457e-05, + "loss": 1.333, + "step": 12700 + }, + { + "epoch": 0.37292266134241586, + "grad_norm": 0.0, + "learning_rate": 1.4441001780800338e-05, + "loss": 1.293, + "step": 12701 + }, + { + "epoch": 0.3729520230195549, + "grad_norm": 0.0, + "learning_rate": 1.4440149721977106e-05, + "loss": 1.2734, + "step": 12702 + }, + { + "epoch": 0.3729813846966939, + "grad_norm": 0.0, + "learning_rate": 1.443929762300046e-05, + "loss": 1.3379, + "step": 12703 + }, + { + "epoch": 0.37301074637383286, + "grad_norm": 0.0, + "learning_rate": 1.4438445483878112e-05, + "loss": 1.2441, + "step": 12704 + }, + { + "epoch": 0.3730401080509719, + "grad_norm": 0.0, + "learning_rate": 1.4437593304617762e-05, + "loss": 1.2676, + "step": 12705 + }, + { + "epoch": 0.3730694697281109, + "grad_norm": 0.0, + "learning_rate": 1.4436741085227121e-05, + "loss": 1.3892, + "step": 12706 + }, + { + "epoch": 0.37309883140524985, + "grad_norm": 0.0, + "learning_rate": 1.4435888825713893e-05, + "loss": 1.4551, + "step": 12707 + }, + { + "epoch": 0.37312819308238887, + "grad_norm": 0.0, + "learning_rate": 1.4435036526085789e-05, + "loss": 1.2197, + "step": 12708 + }, + { + "epoch": 0.3731575547595279, + "grad_norm": 0.0, + "learning_rate": 1.443418418635051e-05, + "loss": 1.458, + "step": 12709 + }, + { + "epoch": 0.37318691643666685, + "grad_norm": 0.0, + "learning_rate": 1.443333180651577e-05, + "loss": 1.2563, + "step": 12710 + }, + { + "epoch": 0.37321627811380587, + "grad_norm": 0.0, + "learning_rate": 1.4432479386589273e-05, + "loss": 1.4033, + "step": 12711 + }, + { + "epoch": 0.3732456397909449, + "grad_norm": 0.0, + "learning_rate": 1.4431626926578732e-05, + "loss": 1.3584, + "step": 12712 + }, + { + "epoch": 0.37327500146808384, + "grad_norm": 0.0, + "learning_rate": 1.4430774426491854e-05, + "loss": 1.3936, + "step": 12713 + }, + { + "epoch": 0.37330436314522286, + "grad_norm": 0.0, + "learning_rate": 1.4429921886336346e-05, + "loss": 1.2827, + "step": 12714 + }, + { + "epoch": 0.3733337248223619, + "grad_norm": 0.0, + "learning_rate": 1.442906930611992e-05, + "loss": 1.4814, + "step": 12715 + }, + { + "epoch": 0.37336308649950084, + "grad_norm": 0.0, + "learning_rate": 1.4428216685850288e-05, + "loss": 1.3633, + "step": 12716 + }, + { + "epoch": 0.37339244817663986, + "grad_norm": 0.0, + "learning_rate": 1.4427364025535158e-05, + "loss": 1.4131, + "step": 12717 + }, + { + "epoch": 0.3734218098537789, + "grad_norm": 0.0, + "learning_rate": 1.442651132518224e-05, + "loss": 1.291, + "step": 12718 + }, + { + "epoch": 0.37345117153091784, + "grad_norm": 0.0, + "learning_rate": 1.4425658584799246e-05, + "loss": 1.4971, + "step": 12719 + }, + { + "epoch": 0.37348053320805685, + "grad_norm": 0.0, + "learning_rate": 1.4424805804393891e-05, + "loss": 1.4414, + "step": 12720 + }, + { + "epoch": 0.37350989488519587, + "grad_norm": 0.0, + "learning_rate": 1.4423952983973882e-05, + "loss": 1.4111, + "step": 12721 + }, + { + "epoch": 0.37353925656233483, + "grad_norm": 0.0, + "learning_rate": 1.4423100123546934e-05, + "loss": 1.374, + "step": 12722 + }, + { + "epoch": 0.37356861823947385, + "grad_norm": 0.0, + "learning_rate": 1.4422247223120756e-05, + "loss": 1.0996, + "step": 12723 + }, + { + "epoch": 0.3735979799166128, + "grad_norm": 0.0, + "learning_rate": 1.4421394282703064e-05, + "loss": 1.3887, + "step": 12724 + }, + { + "epoch": 0.3736273415937518, + "grad_norm": 0.0, + "learning_rate": 1.4420541302301575e-05, + "loss": 1.2549, + "step": 12725 + }, + { + "epoch": 0.37365670327089084, + "grad_norm": 0.0, + "learning_rate": 1.4419688281923997e-05, + "loss": 1.5391, + "step": 12726 + }, + { + "epoch": 0.3736860649480298, + "grad_norm": 0.0, + "learning_rate": 1.4418835221578045e-05, + "loss": 1.3701, + "step": 12727 + }, + { + "epoch": 0.3737154266251688, + "grad_norm": 0.0, + "learning_rate": 1.4417982121271434e-05, + "loss": 1.2559, + "step": 12728 + }, + { + "epoch": 0.37374478830230784, + "grad_norm": 0.0, + "learning_rate": 1.441712898101188e-05, + "loss": 1.3076, + "step": 12729 + }, + { + "epoch": 0.3737741499794468, + "grad_norm": 0.0, + "learning_rate": 1.4416275800807098e-05, + "loss": 1.3906, + "step": 12730 + }, + { + "epoch": 0.3738035116565858, + "grad_norm": 0.0, + "learning_rate": 1.4415422580664803e-05, + "loss": 1.3428, + "step": 12731 + }, + { + "epoch": 0.37383287333372484, + "grad_norm": 0.0, + "learning_rate": 1.4414569320592714e-05, + "loss": 1.3506, + "step": 12732 + }, + { + "epoch": 0.3738622350108638, + "grad_norm": 0.0, + "learning_rate": 1.4413716020598538e-05, + "loss": 1.4746, + "step": 12733 + }, + { + "epoch": 0.3738915966880028, + "grad_norm": 0.0, + "learning_rate": 1.4412862680690001e-05, + "loss": 1.3438, + "step": 12734 + }, + { + "epoch": 0.37392095836514183, + "grad_norm": 0.0, + "learning_rate": 1.4412009300874817e-05, + "loss": 1.4268, + "step": 12735 + }, + { + "epoch": 0.3739503200422808, + "grad_norm": 0.0, + "learning_rate": 1.44111558811607e-05, + "loss": 1.3232, + "step": 12736 + }, + { + "epoch": 0.3739796817194198, + "grad_norm": 0.0, + "learning_rate": 1.4410302421555373e-05, + "loss": 1.4053, + "step": 12737 + }, + { + "epoch": 0.3740090433965588, + "grad_norm": 0.0, + "learning_rate": 1.4409448922066549e-05, + "loss": 1.3975, + "step": 12738 + }, + { + "epoch": 0.3740384050736978, + "grad_norm": 0.0, + "learning_rate": 1.4408595382701951e-05, + "loss": 1.4717, + "step": 12739 + }, + { + "epoch": 0.3740677667508368, + "grad_norm": 0.0, + "learning_rate": 1.4407741803469297e-05, + "loss": 1.3887, + "step": 12740 + }, + { + "epoch": 0.3740971284279758, + "grad_norm": 0.0, + "learning_rate": 1.4406888184376304e-05, + "loss": 1.4385, + "step": 12741 + }, + { + "epoch": 0.3741264901051148, + "grad_norm": 0.0, + "learning_rate": 1.4406034525430694e-05, + "loss": 1.3447, + "step": 12742 + }, + { + "epoch": 0.3741558517822538, + "grad_norm": 0.0, + "learning_rate": 1.4405180826640184e-05, + "loss": 1.4102, + "step": 12743 + }, + { + "epoch": 0.3741852134593928, + "grad_norm": 0.0, + "learning_rate": 1.4404327088012496e-05, + "loss": 1.334, + "step": 12744 + }, + { + "epoch": 0.3742145751365318, + "grad_norm": 0.0, + "learning_rate": 1.440347330955535e-05, + "loss": 1.418, + "step": 12745 + }, + { + "epoch": 0.3742439368136708, + "grad_norm": 0.0, + "learning_rate": 1.4402619491276465e-05, + "loss": 1.4229, + "step": 12746 + }, + { + "epoch": 0.3742732984908098, + "grad_norm": 0.0, + "learning_rate": 1.4401765633183567e-05, + "loss": 1.4229, + "step": 12747 + }, + { + "epoch": 0.3743026601679488, + "grad_norm": 0.0, + "learning_rate": 1.4400911735284373e-05, + "loss": 1.416, + "step": 12748 + }, + { + "epoch": 0.3743320218450878, + "grad_norm": 0.0, + "learning_rate": 1.4400057797586612e-05, + "loss": 1.3008, + "step": 12749 + }, + { + "epoch": 0.3743613835222268, + "grad_norm": 0.0, + "learning_rate": 1.4399203820097995e-05, + "loss": 1.3662, + "step": 12750 + }, + { + "epoch": 0.37439074519936577, + "grad_norm": 0.0, + "learning_rate": 1.4398349802826257e-05, + "loss": 1.3545, + "step": 12751 + }, + { + "epoch": 0.3744201068765048, + "grad_norm": 0.0, + "learning_rate": 1.439749574577911e-05, + "loss": 1.3408, + "step": 12752 + }, + { + "epoch": 0.3744494685536438, + "grad_norm": 0.0, + "learning_rate": 1.4396641648964287e-05, + "loss": 1.3896, + "step": 12753 + }, + { + "epoch": 0.37447883023078277, + "grad_norm": 0.0, + "learning_rate": 1.4395787512389505e-05, + "loss": 1.3779, + "step": 12754 + }, + { + "epoch": 0.3745081919079218, + "grad_norm": 0.0, + "learning_rate": 1.439493333606249e-05, + "loss": 1.2285, + "step": 12755 + }, + { + "epoch": 0.3745375535850608, + "grad_norm": 0.0, + "learning_rate": 1.439407911999097e-05, + "loss": 1.293, + "step": 12756 + }, + { + "epoch": 0.37456691526219976, + "grad_norm": 0.0, + "learning_rate": 1.4393224864182666e-05, + "loss": 1.2437, + "step": 12757 + }, + { + "epoch": 0.3745962769393388, + "grad_norm": 0.0, + "learning_rate": 1.4392370568645304e-05, + "loss": 1.3447, + "step": 12758 + }, + { + "epoch": 0.3746256386164778, + "grad_norm": 0.0, + "learning_rate": 1.4391516233386611e-05, + "loss": 1.3135, + "step": 12759 + }, + { + "epoch": 0.37465500029361676, + "grad_norm": 0.0, + "learning_rate": 1.439066185841431e-05, + "loss": 1.2598, + "step": 12760 + }, + { + "epoch": 0.3746843619707558, + "grad_norm": 0.0, + "learning_rate": 1.4389807443736132e-05, + "loss": 1.3604, + "step": 12761 + }, + { + "epoch": 0.3747137236478948, + "grad_norm": 0.0, + "learning_rate": 1.4388952989359801e-05, + "loss": 1.376, + "step": 12762 + }, + { + "epoch": 0.37474308532503375, + "grad_norm": 0.0, + "learning_rate": 1.4388098495293044e-05, + "loss": 1.2793, + "step": 12763 + }, + { + "epoch": 0.37477244700217277, + "grad_norm": 0.0, + "learning_rate": 1.4387243961543587e-05, + "loss": 1.3525, + "step": 12764 + }, + { + "epoch": 0.3748018086793118, + "grad_norm": 0.0, + "learning_rate": 1.4386389388119158e-05, + "loss": 1.4492, + "step": 12765 + }, + { + "epoch": 0.37483117035645075, + "grad_norm": 0.0, + "learning_rate": 1.438553477502749e-05, + "loss": 1.2773, + "step": 12766 + }, + { + "epoch": 0.37486053203358977, + "grad_norm": 0.0, + "learning_rate": 1.4384680122276304e-05, + "loss": 1.3613, + "step": 12767 + }, + { + "epoch": 0.3748898937107288, + "grad_norm": 0.0, + "learning_rate": 1.4383825429873335e-05, + "loss": 1.3213, + "step": 12768 + }, + { + "epoch": 0.37491925538786774, + "grad_norm": 0.0, + "learning_rate": 1.4382970697826309e-05, + "loss": 1.3789, + "step": 12769 + }, + { + "epoch": 0.37494861706500676, + "grad_norm": 0.0, + "learning_rate": 1.4382115926142958e-05, + "loss": 1.4219, + "step": 12770 + }, + { + "epoch": 0.3749779787421458, + "grad_norm": 0.0, + "learning_rate": 1.438126111483101e-05, + "loss": 1.2388, + "step": 12771 + }, + { + "epoch": 0.37500734041928474, + "grad_norm": 0.0, + "learning_rate": 1.4380406263898197e-05, + "loss": 1.3105, + "step": 12772 + }, + { + "epoch": 0.37503670209642376, + "grad_norm": 0.0, + "learning_rate": 1.4379551373352245e-05, + "loss": 1.3301, + "step": 12773 + }, + { + "epoch": 0.3750660637735627, + "grad_norm": 0.0, + "learning_rate": 1.437869644320089e-05, + "loss": 1.3496, + "step": 12774 + }, + { + "epoch": 0.37509542545070174, + "grad_norm": 0.0, + "learning_rate": 1.4377841473451861e-05, + "loss": 1.4072, + "step": 12775 + }, + { + "epoch": 0.37512478712784075, + "grad_norm": 0.0, + "learning_rate": 1.4376986464112892e-05, + "loss": 1.3188, + "step": 12776 + }, + { + "epoch": 0.3751541488049797, + "grad_norm": 0.0, + "learning_rate": 1.437613141519171e-05, + "loss": 1.2598, + "step": 12777 + }, + { + "epoch": 0.37518351048211873, + "grad_norm": 0.0, + "learning_rate": 1.4375276326696055e-05, + "loss": 1.3721, + "step": 12778 + }, + { + "epoch": 0.37521287215925775, + "grad_norm": 0.0, + "learning_rate": 1.4374421198633653e-05, + "loss": 1.2085, + "step": 12779 + }, + { + "epoch": 0.3752422338363967, + "grad_norm": 0.0, + "learning_rate": 1.437356603101224e-05, + "loss": 1.249, + "step": 12780 + }, + { + "epoch": 0.3752715955135357, + "grad_norm": 0.0, + "learning_rate": 1.4372710823839552e-05, + "loss": 1.1631, + "step": 12781 + }, + { + "epoch": 0.37530095719067474, + "grad_norm": 0.0, + "learning_rate": 1.437185557712332e-05, + "loss": 1.416, + "step": 12782 + }, + { + "epoch": 0.3753303188678137, + "grad_norm": 0.0, + "learning_rate": 1.4371000290871275e-05, + "loss": 1.2686, + "step": 12783 + }, + { + "epoch": 0.3753596805449527, + "grad_norm": 0.0, + "learning_rate": 1.4370144965091155e-05, + "loss": 1.4141, + "step": 12784 + }, + { + "epoch": 0.37538904222209174, + "grad_norm": 0.0, + "learning_rate": 1.4369289599790697e-05, + "loss": 1.3887, + "step": 12785 + }, + { + "epoch": 0.3754184038992307, + "grad_norm": 0.0, + "learning_rate": 1.4368434194977634e-05, + "loss": 1.4199, + "step": 12786 + }, + { + "epoch": 0.3754477655763697, + "grad_norm": 0.0, + "learning_rate": 1.4367578750659701e-05, + "loss": 1.3252, + "step": 12787 + }, + { + "epoch": 0.37547712725350874, + "grad_norm": 0.0, + "learning_rate": 1.4366723266844634e-05, + "loss": 1.4043, + "step": 12788 + }, + { + "epoch": 0.3755064889306477, + "grad_norm": 0.0, + "learning_rate": 1.436586774354017e-05, + "loss": 1.46, + "step": 12789 + }, + { + "epoch": 0.3755358506077867, + "grad_norm": 0.0, + "learning_rate": 1.4365012180754048e-05, + "loss": 1.4336, + "step": 12790 + }, + { + "epoch": 0.37556521228492573, + "grad_norm": 0.0, + "learning_rate": 1.4364156578494004e-05, + "loss": 1.3076, + "step": 12791 + }, + { + "epoch": 0.3755945739620647, + "grad_norm": 0.0, + "learning_rate": 1.4363300936767772e-05, + "loss": 1.5293, + "step": 12792 + }, + { + "epoch": 0.3756239356392037, + "grad_norm": 0.0, + "learning_rate": 1.4362445255583091e-05, + "loss": 1.335, + "step": 12793 + }, + { + "epoch": 0.3756532973163427, + "grad_norm": 0.0, + "learning_rate": 1.4361589534947701e-05, + "loss": 1.374, + "step": 12794 + }, + { + "epoch": 0.3756826589934817, + "grad_norm": 0.0, + "learning_rate": 1.4360733774869342e-05, + "loss": 1.3535, + "step": 12795 + }, + { + "epoch": 0.3757120206706207, + "grad_norm": 0.0, + "learning_rate": 1.4359877975355749e-05, + "loss": 1.3906, + "step": 12796 + }, + { + "epoch": 0.3757413823477597, + "grad_norm": 0.0, + "learning_rate": 1.4359022136414664e-05, + "loss": 1.4395, + "step": 12797 + }, + { + "epoch": 0.3757707440248987, + "grad_norm": 0.0, + "learning_rate": 1.4358166258053825e-05, + "loss": 1.3984, + "step": 12798 + }, + { + "epoch": 0.3758001057020377, + "grad_norm": 0.0, + "learning_rate": 1.4357310340280971e-05, + "loss": 1.3379, + "step": 12799 + }, + { + "epoch": 0.3758294673791767, + "grad_norm": 0.0, + "learning_rate": 1.4356454383103844e-05, + "loss": 1.4814, + "step": 12800 + }, + { + "epoch": 0.3758588290563157, + "grad_norm": 0.0, + "learning_rate": 1.4355598386530187e-05, + "loss": 1.3218, + "step": 12801 + }, + { + "epoch": 0.3758881907334547, + "grad_norm": 0.0, + "learning_rate": 1.4354742350567736e-05, + "loss": 1.3955, + "step": 12802 + }, + { + "epoch": 0.3759175524105937, + "grad_norm": 0.0, + "learning_rate": 1.4353886275224235e-05, + "loss": 1.1401, + "step": 12803 + }, + { + "epoch": 0.3759469140877327, + "grad_norm": 0.0, + "learning_rate": 1.4353030160507426e-05, + "loss": 1.3203, + "step": 12804 + }, + { + "epoch": 0.3759762757648717, + "grad_norm": 0.0, + "learning_rate": 1.4352174006425048e-05, + "loss": 1.3271, + "step": 12805 + }, + { + "epoch": 0.3760056374420107, + "grad_norm": 0.0, + "learning_rate": 1.4351317812984848e-05, + "loss": 1.3096, + "step": 12806 + }, + { + "epoch": 0.37603499911914967, + "grad_norm": 0.0, + "learning_rate": 1.4350461580194568e-05, + "loss": 1.3965, + "step": 12807 + }, + { + "epoch": 0.3760643607962887, + "grad_norm": 0.0, + "learning_rate": 1.4349605308061947e-05, + "loss": 1.3262, + "step": 12808 + }, + { + "epoch": 0.3760937224734277, + "grad_norm": 0.0, + "learning_rate": 1.4348748996594732e-05, + "loss": 1.3779, + "step": 12809 + }, + { + "epoch": 0.37612308415056667, + "grad_norm": 0.0, + "learning_rate": 1.4347892645800667e-05, + "loss": 1.3486, + "step": 12810 + }, + { + "epoch": 0.3761524458277057, + "grad_norm": 0.0, + "learning_rate": 1.4347036255687497e-05, + "loss": 1.4551, + "step": 12811 + }, + { + "epoch": 0.3761818075048447, + "grad_norm": 0.0, + "learning_rate": 1.4346179826262963e-05, + "loss": 1.3086, + "step": 12812 + }, + { + "epoch": 0.37621116918198366, + "grad_norm": 0.0, + "learning_rate": 1.434532335753481e-05, + "loss": 1.5459, + "step": 12813 + }, + { + "epoch": 0.3762405308591227, + "grad_norm": 0.0, + "learning_rate": 1.4344466849510785e-05, + "loss": 1.4268, + "step": 12814 + }, + { + "epoch": 0.3762698925362617, + "grad_norm": 0.0, + "learning_rate": 1.4343610302198637e-05, + "loss": 1.3438, + "step": 12815 + }, + { + "epoch": 0.37629925421340066, + "grad_norm": 0.0, + "learning_rate": 1.4342753715606106e-05, + "loss": 1.5137, + "step": 12816 + }, + { + "epoch": 0.3763286158905397, + "grad_norm": 0.0, + "learning_rate": 1.434189708974094e-05, + "loss": 1.3027, + "step": 12817 + }, + { + "epoch": 0.3763579775676787, + "grad_norm": 0.0, + "learning_rate": 1.4341040424610887e-05, + "loss": 1.3086, + "step": 12818 + }, + { + "epoch": 0.37638733924481765, + "grad_norm": 0.0, + "learning_rate": 1.4340183720223698e-05, + "loss": 1.4355, + "step": 12819 + }, + { + "epoch": 0.37641670092195667, + "grad_norm": 0.0, + "learning_rate": 1.4339326976587114e-05, + "loss": 1.5488, + "step": 12820 + }, + { + "epoch": 0.3764460625990957, + "grad_norm": 0.0, + "learning_rate": 1.4338470193708883e-05, + "loss": 1.3623, + "step": 12821 + }, + { + "epoch": 0.37647542427623465, + "grad_norm": 0.0, + "learning_rate": 1.4337613371596754e-05, + "loss": 1.3779, + "step": 12822 + }, + { + "epoch": 0.37650478595337367, + "grad_norm": 0.0, + "learning_rate": 1.4336756510258481e-05, + "loss": 1.4766, + "step": 12823 + }, + { + "epoch": 0.37653414763051263, + "grad_norm": 0.0, + "learning_rate": 1.4335899609701805e-05, + "loss": 1.3394, + "step": 12824 + }, + { + "epoch": 0.37656350930765164, + "grad_norm": 0.0, + "learning_rate": 1.4335042669934476e-05, + "loss": 1.2979, + "step": 12825 + }, + { + "epoch": 0.37659287098479066, + "grad_norm": 0.0, + "learning_rate": 1.4334185690964248e-05, + "loss": 1.4219, + "step": 12826 + }, + { + "epoch": 0.3766222326619296, + "grad_norm": 0.0, + "learning_rate": 1.4333328672798872e-05, + "loss": 1.3477, + "step": 12827 + }, + { + "epoch": 0.37665159433906864, + "grad_norm": 0.0, + "learning_rate": 1.433247161544609e-05, + "loss": 1.2954, + "step": 12828 + }, + { + "epoch": 0.37668095601620766, + "grad_norm": 0.0, + "learning_rate": 1.4331614518913661e-05, + "loss": 1.2793, + "step": 12829 + }, + { + "epoch": 0.3767103176933466, + "grad_norm": 0.0, + "learning_rate": 1.4330757383209333e-05, + "loss": 1.3203, + "step": 12830 + }, + { + "epoch": 0.37673967937048564, + "grad_norm": 0.0, + "learning_rate": 1.4329900208340855e-05, + "loss": 1.5176, + "step": 12831 + }, + { + "epoch": 0.37676904104762465, + "grad_norm": 0.0, + "learning_rate": 1.4329042994315983e-05, + "loss": 1.2148, + "step": 12832 + }, + { + "epoch": 0.3767984027247636, + "grad_norm": 0.0, + "learning_rate": 1.4328185741142462e-05, + "loss": 1.4316, + "step": 12833 + }, + { + "epoch": 0.37682776440190263, + "grad_norm": 0.0, + "learning_rate": 1.4327328448828053e-05, + "loss": 1.333, + "step": 12834 + }, + { + "epoch": 0.37685712607904165, + "grad_norm": 0.0, + "learning_rate": 1.4326471117380503e-05, + "loss": 1.418, + "step": 12835 + }, + { + "epoch": 0.3768864877561806, + "grad_norm": 0.0, + "learning_rate": 1.4325613746807565e-05, + "loss": 1.3008, + "step": 12836 + }, + { + "epoch": 0.3769158494333196, + "grad_norm": 0.0, + "learning_rate": 1.4324756337116996e-05, + "loss": 1.2925, + "step": 12837 + }, + { + "epoch": 0.37694521111045864, + "grad_norm": 0.0, + "learning_rate": 1.4323898888316547e-05, + "loss": 1.3486, + "step": 12838 + }, + { + "epoch": 0.3769745727875976, + "grad_norm": 0.0, + "learning_rate": 1.4323041400413976e-05, + "loss": 1.4043, + "step": 12839 + }, + { + "epoch": 0.3770039344647366, + "grad_norm": 0.0, + "learning_rate": 1.4322183873417033e-05, + "loss": 1.3545, + "step": 12840 + }, + { + "epoch": 0.37703329614187564, + "grad_norm": 0.0, + "learning_rate": 1.4321326307333471e-05, + "loss": 1.2886, + "step": 12841 + }, + { + "epoch": 0.3770626578190146, + "grad_norm": 0.0, + "learning_rate": 1.432046870217105e-05, + "loss": 1.459, + "step": 12842 + }, + { + "epoch": 0.3770920194961536, + "grad_norm": 0.0, + "learning_rate": 1.4319611057937526e-05, + "loss": 1.377, + "step": 12843 + }, + { + "epoch": 0.37712138117329264, + "grad_norm": 0.0, + "learning_rate": 1.4318753374640652e-05, + "loss": 1.5498, + "step": 12844 + }, + { + "epoch": 0.3771507428504316, + "grad_norm": 0.0, + "learning_rate": 1.4317895652288186e-05, + "loss": 1.4219, + "step": 12845 + }, + { + "epoch": 0.3771801045275706, + "grad_norm": 0.0, + "learning_rate": 1.4317037890887881e-05, + "loss": 1.293, + "step": 12846 + }, + { + "epoch": 0.37720946620470963, + "grad_norm": 0.0, + "learning_rate": 1.43161800904475e-05, + "loss": 1.4053, + "step": 12847 + }, + { + "epoch": 0.3772388278818486, + "grad_norm": 0.0, + "learning_rate": 1.4315322250974795e-05, + "loss": 1.459, + "step": 12848 + }, + { + "epoch": 0.3772681895589876, + "grad_norm": 0.0, + "learning_rate": 1.4314464372477526e-05, + "loss": 1.4277, + "step": 12849 + }, + { + "epoch": 0.3772975512361266, + "grad_norm": 0.0, + "learning_rate": 1.4313606454963452e-05, + "loss": 1.332, + "step": 12850 + }, + { + "epoch": 0.3773269129132656, + "grad_norm": 0.0, + "learning_rate": 1.4312748498440328e-05, + "loss": 1.3193, + "step": 12851 + }, + { + "epoch": 0.3773562745904046, + "grad_norm": 0.0, + "learning_rate": 1.4311890502915918e-05, + "loss": 1.3408, + "step": 12852 + }, + { + "epoch": 0.3773856362675436, + "grad_norm": 0.0, + "learning_rate": 1.4311032468397975e-05, + "loss": 1.3906, + "step": 12853 + }, + { + "epoch": 0.3774149979446826, + "grad_norm": 0.0, + "learning_rate": 1.431017439489426e-05, + "loss": 1.2998, + "step": 12854 + }, + { + "epoch": 0.3774443596218216, + "grad_norm": 0.0, + "learning_rate": 1.4309316282412537e-05, + "loss": 1.3125, + "step": 12855 + }, + { + "epoch": 0.3774737212989606, + "grad_norm": 0.0, + "learning_rate": 1.4308458130960566e-05, + "loss": 1.2207, + "step": 12856 + }, + { + "epoch": 0.3775030829760996, + "grad_norm": 0.0, + "learning_rate": 1.43075999405461e-05, + "loss": 1.3242, + "step": 12857 + }, + { + "epoch": 0.3775324446532386, + "grad_norm": 0.0, + "learning_rate": 1.4306741711176903e-05, + "loss": 1.4512, + "step": 12858 + }, + { + "epoch": 0.3775618063303776, + "grad_norm": 0.0, + "learning_rate": 1.4305883442860743e-05, + "loss": 1.3311, + "step": 12859 + }, + { + "epoch": 0.3775911680075166, + "grad_norm": 0.0, + "learning_rate": 1.4305025135605375e-05, + "loss": 1.4512, + "step": 12860 + }, + { + "epoch": 0.3776205296846556, + "grad_norm": 0.0, + "learning_rate": 1.4304166789418559e-05, + "loss": 1.2876, + "step": 12861 + }, + { + "epoch": 0.3776498913617946, + "grad_norm": 0.0, + "learning_rate": 1.4303308404308063e-05, + "loss": 1.2344, + "step": 12862 + }, + { + "epoch": 0.37767925303893357, + "grad_norm": 0.0, + "learning_rate": 1.4302449980281644e-05, + "loss": 1.2773, + "step": 12863 + }, + { + "epoch": 0.3777086147160726, + "grad_norm": 0.0, + "learning_rate": 1.4301591517347073e-05, + "loss": 1.2656, + "step": 12864 + }, + { + "epoch": 0.3777379763932116, + "grad_norm": 0.0, + "learning_rate": 1.4300733015512104e-05, + "loss": 1.3945, + "step": 12865 + }, + { + "epoch": 0.37776733807035057, + "grad_norm": 0.0, + "learning_rate": 1.4299874474784505e-05, + "loss": 1.418, + "step": 12866 + }, + { + "epoch": 0.3777966997474896, + "grad_norm": 0.0, + "learning_rate": 1.4299015895172041e-05, + "loss": 1.2979, + "step": 12867 + }, + { + "epoch": 0.3778260614246286, + "grad_norm": 0.0, + "learning_rate": 1.4298157276682478e-05, + "loss": 1.3291, + "step": 12868 + }, + { + "epoch": 0.37785542310176756, + "grad_norm": 0.0, + "learning_rate": 1.4297298619323576e-05, + "loss": 1.415, + "step": 12869 + }, + { + "epoch": 0.3778847847789066, + "grad_norm": 0.0, + "learning_rate": 1.4296439923103102e-05, + "loss": 1.3799, + "step": 12870 + }, + { + "epoch": 0.3779141464560456, + "grad_norm": 0.0, + "learning_rate": 1.429558118802882e-05, + "loss": 1.3711, + "step": 12871 + }, + { + "epoch": 0.37794350813318456, + "grad_norm": 0.0, + "learning_rate": 1.42947224141085e-05, + "loss": 1.3945, + "step": 12872 + }, + { + "epoch": 0.3779728698103236, + "grad_norm": 0.0, + "learning_rate": 1.4293863601349905e-05, + "loss": 1.291, + "step": 12873 + }, + { + "epoch": 0.37800223148746254, + "grad_norm": 0.0, + "learning_rate": 1.4293004749760798e-05, + "loss": 1.416, + "step": 12874 + }, + { + "epoch": 0.37803159316460155, + "grad_norm": 0.0, + "learning_rate": 1.4292145859348952e-05, + "loss": 1.3271, + "step": 12875 + }, + { + "epoch": 0.37806095484174057, + "grad_norm": 0.0, + "learning_rate": 1.4291286930122133e-05, + "loss": 1.4502, + "step": 12876 + }, + { + "epoch": 0.37809031651887953, + "grad_norm": 0.0, + "learning_rate": 1.4290427962088104e-05, + "loss": 1.3154, + "step": 12877 + }, + { + "epoch": 0.37811967819601855, + "grad_norm": 0.0, + "learning_rate": 1.4289568955254637e-05, + "loss": 1.2822, + "step": 12878 + }, + { + "epoch": 0.37814903987315757, + "grad_norm": 0.0, + "learning_rate": 1.4288709909629504e-05, + "loss": 1.4053, + "step": 12879 + }, + { + "epoch": 0.37817840155029653, + "grad_norm": 0.0, + "learning_rate": 1.4287850825220464e-05, + "loss": 1.3828, + "step": 12880 + }, + { + "epoch": 0.37820776322743554, + "grad_norm": 0.0, + "learning_rate": 1.4286991702035294e-05, + "loss": 1.3828, + "step": 12881 + }, + { + "epoch": 0.37823712490457456, + "grad_norm": 0.0, + "learning_rate": 1.4286132540081756e-05, + "loss": 1.4258, + "step": 12882 + }, + { + "epoch": 0.3782664865817135, + "grad_norm": 0.0, + "learning_rate": 1.4285273339367626e-05, + "loss": 1.2524, + "step": 12883 + }, + { + "epoch": 0.37829584825885254, + "grad_norm": 0.0, + "learning_rate": 1.4284414099900672e-05, + "loss": 1.3428, + "step": 12884 + }, + { + "epoch": 0.37832520993599156, + "grad_norm": 0.0, + "learning_rate": 1.4283554821688665e-05, + "loss": 1.4941, + "step": 12885 + }, + { + "epoch": 0.3783545716131305, + "grad_norm": 0.0, + "learning_rate": 1.4282695504739372e-05, + "loss": 1.3467, + "step": 12886 + }, + { + "epoch": 0.37838393329026954, + "grad_norm": 0.0, + "learning_rate": 1.4281836149060568e-05, + "loss": 1.4229, + "step": 12887 + }, + { + "epoch": 0.37841329496740855, + "grad_norm": 0.0, + "learning_rate": 1.4280976754660025e-05, + "loss": 1.3232, + "step": 12888 + }, + { + "epoch": 0.3784426566445475, + "grad_norm": 0.0, + "learning_rate": 1.4280117321545513e-05, + "loss": 1.1211, + "step": 12889 + }, + { + "epoch": 0.37847201832168653, + "grad_norm": 0.0, + "learning_rate": 1.42792578497248e-05, + "loss": 1.2773, + "step": 12890 + }, + { + "epoch": 0.37850137999882555, + "grad_norm": 0.0, + "learning_rate": 1.4278398339205665e-05, + "loss": 1.2588, + "step": 12891 + }, + { + "epoch": 0.3785307416759645, + "grad_norm": 0.0, + "learning_rate": 1.4277538789995877e-05, + "loss": 1.3076, + "step": 12892 + }, + { + "epoch": 0.3785601033531035, + "grad_norm": 0.0, + "learning_rate": 1.4276679202103212e-05, + "loss": 1.374, + "step": 12893 + }, + { + "epoch": 0.37858946503024254, + "grad_norm": 0.0, + "learning_rate": 1.4275819575535442e-05, + "loss": 1.4111, + "step": 12894 + }, + { + "epoch": 0.3786188267073815, + "grad_norm": 0.0, + "learning_rate": 1.4274959910300336e-05, + "loss": 1.2998, + "step": 12895 + }, + { + "epoch": 0.3786481883845205, + "grad_norm": 0.0, + "learning_rate": 1.4274100206405676e-05, + "loss": 1.333, + "step": 12896 + }, + { + "epoch": 0.37867755006165954, + "grad_norm": 0.0, + "learning_rate": 1.4273240463859235e-05, + "loss": 1.4014, + "step": 12897 + }, + { + "epoch": 0.3787069117387985, + "grad_norm": 0.0, + "learning_rate": 1.4272380682668783e-05, + "loss": 1.3467, + "step": 12898 + }, + { + "epoch": 0.3787362734159375, + "grad_norm": 0.0, + "learning_rate": 1.42715208628421e-05, + "loss": 1.3013, + "step": 12899 + }, + { + "epoch": 0.37876563509307654, + "grad_norm": 0.0, + "learning_rate": 1.4270661004386959e-05, + "loss": 1.3633, + "step": 12900 + }, + { + "epoch": 0.3787949967702155, + "grad_norm": 0.0, + "learning_rate": 1.4269801107311139e-05, + "loss": 1.4111, + "step": 12901 + }, + { + "epoch": 0.3788243584473545, + "grad_norm": 0.0, + "learning_rate": 1.4268941171622413e-05, + "loss": 1.4395, + "step": 12902 + }, + { + "epoch": 0.37885372012449353, + "grad_norm": 0.0, + "learning_rate": 1.4268081197328556e-05, + "loss": 1.5015, + "step": 12903 + }, + { + "epoch": 0.3788830818016325, + "grad_norm": 0.0, + "learning_rate": 1.4267221184437347e-05, + "loss": 1.3799, + "step": 12904 + }, + { + "epoch": 0.3789124434787715, + "grad_norm": 0.0, + "learning_rate": 1.4266361132956567e-05, + "loss": 1.4316, + "step": 12905 + }, + { + "epoch": 0.3789418051559105, + "grad_norm": 0.0, + "learning_rate": 1.4265501042893993e-05, + "loss": 1.3652, + "step": 12906 + }, + { + "epoch": 0.3789711668330495, + "grad_norm": 0.0, + "learning_rate": 1.4264640914257395e-05, + "loss": 1.3662, + "step": 12907 + }, + { + "epoch": 0.3790005285101885, + "grad_norm": 0.0, + "learning_rate": 1.426378074705456e-05, + "loss": 1.3975, + "step": 12908 + }, + { + "epoch": 0.3790298901873275, + "grad_norm": 0.0, + "learning_rate": 1.4262920541293266e-05, + "loss": 1.4229, + "step": 12909 + }, + { + "epoch": 0.3790592518644665, + "grad_norm": 0.0, + "learning_rate": 1.4262060296981285e-05, + "loss": 1.4326, + "step": 12910 + }, + { + "epoch": 0.3790886135416055, + "grad_norm": 0.0, + "learning_rate": 1.4261200014126404e-05, + "loss": 1.4092, + "step": 12911 + }, + { + "epoch": 0.3791179752187445, + "grad_norm": 0.0, + "learning_rate": 1.42603396927364e-05, + "loss": 1.4678, + "step": 12912 + }, + { + "epoch": 0.3791473368958835, + "grad_norm": 0.0, + "learning_rate": 1.4259479332819056e-05, + "loss": 1.2798, + "step": 12913 + }, + { + "epoch": 0.3791766985730225, + "grad_norm": 0.0, + "learning_rate": 1.4258618934382144e-05, + "loss": 1.4404, + "step": 12914 + }, + { + "epoch": 0.3792060602501615, + "grad_norm": 0.0, + "learning_rate": 1.4257758497433452e-05, + "loss": 1.4355, + "step": 12915 + }, + { + "epoch": 0.3792354219273005, + "grad_norm": 0.0, + "learning_rate": 1.425689802198076e-05, + "loss": 1.4658, + "step": 12916 + }, + { + "epoch": 0.3792647836044395, + "grad_norm": 0.0, + "learning_rate": 1.4256037508031847e-05, + "loss": 1.3447, + "step": 12917 + }, + { + "epoch": 0.3792941452815785, + "grad_norm": 0.0, + "learning_rate": 1.42551769555945e-05, + "loss": 1.208, + "step": 12918 + }, + { + "epoch": 0.37932350695871747, + "grad_norm": 0.0, + "learning_rate": 1.4254316364676499e-05, + "loss": 1.2695, + "step": 12919 + }, + { + "epoch": 0.3793528686358565, + "grad_norm": 0.0, + "learning_rate": 1.4253455735285622e-05, + "loss": 1.228, + "step": 12920 + }, + { + "epoch": 0.3793822303129955, + "grad_norm": 0.0, + "learning_rate": 1.4252595067429655e-05, + "loss": 1.4258, + "step": 12921 + }, + { + "epoch": 0.37941159199013447, + "grad_norm": 0.0, + "learning_rate": 1.4251734361116386e-05, + "loss": 1.3301, + "step": 12922 + }, + { + "epoch": 0.3794409536672735, + "grad_norm": 0.0, + "learning_rate": 1.425087361635359e-05, + "loss": 1.4561, + "step": 12923 + }, + { + "epoch": 0.37947031534441245, + "grad_norm": 0.0, + "learning_rate": 1.4250012833149058e-05, + "loss": 1.3896, + "step": 12924 + }, + { + "epoch": 0.37949967702155146, + "grad_norm": 0.0, + "learning_rate": 1.4249152011510572e-05, + "loss": 1.4062, + "step": 12925 + }, + { + "epoch": 0.3795290386986905, + "grad_norm": 0.0, + "learning_rate": 1.4248291151445912e-05, + "loss": 1.292, + "step": 12926 + }, + { + "epoch": 0.37955840037582944, + "grad_norm": 0.0, + "learning_rate": 1.424743025296287e-05, + "loss": 1.3623, + "step": 12927 + }, + { + "epoch": 0.37958776205296846, + "grad_norm": 0.0, + "learning_rate": 1.4246569316069228e-05, + "loss": 1.3223, + "step": 12928 + }, + { + "epoch": 0.3796171237301075, + "grad_norm": 0.0, + "learning_rate": 1.4245708340772773e-05, + "loss": 1.3955, + "step": 12929 + }, + { + "epoch": 0.37964648540724644, + "grad_norm": 0.0, + "learning_rate": 1.424484732708129e-05, + "loss": 1.3682, + "step": 12930 + }, + { + "epoch": 0.37967584708438545, + "grad_norm": 0.0, + "learning_rate": 1.4243986275002566e-05, + "loss": 1.3799, + "step": 12931 + }, + { + "epoch": 0.37970520876152447, + "grad_norm": 0.0, + "learning_rate": 1.4243125184544384e-05, + "loss": 1.377, + "step": 12932 + }, + { + "epoch": 0.37973457043866343, + "grad_norm": 0.0, + "learning_rate": 1.4242264055714536e-05, + "loss": 1.4033, + "step": 12933 + }, + { + "epoch": 0.37976393211580245, + "grad_norm": 0.0, + "learning_rate": 1.424140288852081e-05, + "loss": 1.3066, + "step": 12934 + }, + { + "epoch": 0.37979329379294147, + "grad_norm": 0.0, + "learning_rate": 1.4240541682970989e-05, + "loss": 1.0698, + "step": 12935 + }, + { + "epoch": 0.37982265547008043, + "grad_norm": 0.0, + "learning_rate": 1.4239680439072863e-05, + "loss": 1.2212, + "step": 12936 + }, + { + "epoch": 0.37985201714721945, + "grad_norm": 0.0, + "learning_rate": 1.4238819156834222e-05, + "loss": 1.4355, + "step": 12937 + }, + { + "epoch": 0.37988137882435846, + "grad_norm": 0.0, + "learning_rate": 1.4237957836262856e-05, + "loss": 1.5117, + "step": 12938 + }, + { + "epoch": 0.3799107405014974, + "grad_norm": 0.0, + "learning_rate": 1.423709647736655e-05, + "loss": 1.4834, + "step": 12939 + }, + { + "epoch": 0.37994010217863644, + "grad_norm": 0.0, + "learning_rate": 1.4236235080153096e-05, + "loss": 1.4395, + "step": 12940 + }, + { + "epoch": 0.37996946385577546, + "grad_norm": 0.0, + "learning_rate": 1.423537364463028e-05, + "loss": 1.2261, + "step": 12941 + }, + { + "epoch": 0.3799988255329144, + "grad_norm": 0.0, + "learning_rate": 1.4234512170805902e-05, + "loss": 1.4863, + "step": 12942 + }, + { + "epoch": 0.38002818721005344, + "grad_norm": 0.0, + "learning_rate": 1.4233650658687738e-05, + "loss": 1.3311, + "step": 12943 + }, + { + "epoch": 0.38005754888719245, + "grad_norm": 0.0, + "learning_rate": 1.4232789108283591e-05, + "loss": 1.3223, + "step": 12944 + }, + { + "epoch": 0.3800869105643314, + "grad_norm": 0.0, + "learning_rate": 1.4231927519601247e-05, + "loss": 1.418, + "step": 12945 + }, + { + "epoch": 0.38011627224147043, + "grad_norm": 0.0, + "learning_rate": 1.4231065892648501e-05, + "loss": 1.3496, + "step": 12946 + }, + { + "epoch": 0.38014563391860945, + "grad_norm": 0.0, + "learning_rate": 1.423020422743314e-05, + "loss": 1.4482, + "step": 12947 + }, + { + "epoch": 0.3801749955957484, + "grad_norm": 0.0, + "learning_rate": 1.4229342523962961e-05, + "loss": 1.5078, + "step": 12948 + }, + { + "epoch": 0.3802043572728874, + "grad_norm": 0.0, + "learning_rate": 1.4228480782245753e-05, + "loss": 1.3789, + "step": 12949 + }, + { + "epoch": 0.38023371895002644, + "grad_norm": 0.0, + "learning_rate": 1.4227619002289311e-05, + "loss": 1.4092, + "step": 12950 + }, + { + "epoch": 0.3802630806271654, + "grad_norm": 0.0, + "learning_rate": 1.4226757184101427e-05, + "loss": 1.46, + "step": 12951 + }, + { + "epoch": 0.3802924423043044, + "grad_norm": 0.0, + "learning_rate": 1.4225895327689893e-05, + "loss": 1.2793, + "step": 12952 + }, + { + "epoch": 0.38032180398144344, + "grad_norm": 0.0, + "learning_rate": 1.422503343306251e-05, + "loss": 1.3594, + "step": 12953 + }, + { + "epoch": 0.3803511656585824, + "grad_norm": 0.0, + "learning_rate": 1.4224171500227066e-05, + "loss": 1.2236, + "step": 12954 + }, + { + "epoch": 0.3803805273357214, + "grad_norm": 0.0, + "learning_rate": 1.4223309529191357e-05, + "loss": 1.3975, + "step": 12955 + }, + { + "epoch": 0.38040988901286044, + "grad_norm": 0.0, + "learning_rate": 1.4222447519963177e-05, + "loss": 1.3633, + "step": 12956 + }, + { + "epoch": 0.3804392506899994, + "grad_norm": 0.0, + "learning_rate": 1.4221585472550322e-05, + "loss": 1.2568, + "step": 12957 + }, + { + "epoch": 0.3804686123671384, + "grad_norm": 0.0, + "learning_rate": 1.422072338696059e-05, + "loss": 1.3018, + "step": 12958 + }, + { + "epoch": 0.38049797404427743, + "grad_norm": 0.0, + "learning_rate": 1.4219861263201777e-05, + "loss": 1.3945, + "step": 12959 + }, + { + "epoch": 0.3805273357214164, + "grad_norm": 0.0, + "learning_rate": 1.4218999101281677e-05, + "loss": 1.3662, + "step": 12960 + }, + { + "epoch": 0.3805566973985554, + "grad_norm": 0.0, + "learning_rate": 1.4218136901208086e-05, + "loss": 1.3525, + "step": 12961 + }, + { + "epoch": 0.3805860590756944, + "grad_norm": 0.0, + "learning_rate": 1.4217274662988804e-05, + "loss": 1.4619, + "step": 12962 + }, + { + "epoch": 0.3806154207528334, + "grad_norm": 0.0, + "learning_rate": 1.4216412386631629e-05, + "loss": 1.4619, + "step": 12963 + }, + { + "epoch": 0.3806447824299724, + "grad_norm": 0.0, + "learning_rate": 1.4215550072144352e-05, + "loss": 1.4062, + "step": 12964 + }, + { + "epoch": 0.3806741441071114, + "grad_norm": 0.0, + "learning_rate": 1.4214687719534779e-05, + "loss": 1.2793, + "step": 12965 + }, + { + "epoch": 0.3807035057842504, + "grad_norm": 0.0, + "learning_rate": 1.4213825328810707e-05, + "loss": 1.5127, + "step": 12966 + }, + { + "epoch": 0.3807328674613894, + "grad_norm": 0.0, + "learning_rate": 1.4212962899979932e-05, + "loss": 1.3643, + "step": 12967 + }, + { + "epoch": 0.3807622291385284, + "grad_norm": 0.0, + "learning_rate": 1.4212100433050254e-05, + "loss": 1.4121, + "step": 12968 + }, + { + "epoch": 0.3807915908156674, + "grad_norm": 0.0, + "learning_rate": 1.4211237928029473e-05, + "loss": 1.3516, + "step": 12969 + }, + { + "epoch": 0.3808209524928064, + "grad_norm": 0.0, + "learning_rate": 1.4210375384925389e-05, + "loss": 1.4512, + "step": 12970 + }, + { + "epoch": 0.3808503141699454, + "grad_norm": 0.0, + "learning_rate": 1.4209512803745804e-05, + "loss": 1.4844, + "step": 12971 + }, + { + "epoch": 0.3808796758470844, + "grad_norm": 0.0, + "learning_rate": 1.4208650184498514e-05, + "loss": 1.4482, + "step": 12972 + }, + { + "epoch": 0.3809090375242234, + "grad_norm": 0.0, + "learning_rate": 1.4207787527191323e-05, + "loss": 1.252, + "step": 12973 + }, + { + "epoch": 0.38093839920136235, + "grad_norm": 0.0, + "learning_rate": 1.4206924831832031e-05, + "loss": 1.3994, + "step": 12974 + }, + { + "epoch": 0.38096776087850137, + "grad_norm": 0.0, + "learning_rate": 1.4206062098428444e-05, + "loss": 1.3838, + "step": 12975 + }, + { + "epoch": 0.3809971225556404, + "grad_norm": 0.0, + "learning_rate": 1.4205199326988356e-05, + "loss": 1.2773, + "step": 12976 + }, + { + "epoch": 0.38102648423277935, + "grad_norm": 0.0, + "learning_rate": 1.4204336517519578e-05, + "loss": 1.416, + "step": 12977 + }, + { + "epoch": 0.38105584590991837, + "grad_norm": 0.0, + "learning_rate": 1.4203473670029905e-05, + "loss": 1.3447, + "step": 12978 + }, + { + "epoch": 0.3810852075870574, + "grad_norm": 0.0, + "learning_rate": 1.4202610784527146e-05, + "loss": 1.4492, + "step": 12979 + }, + { + "epoch": 0.38111456926419635, + "grad_norm": 0.0, + "learning_rate": 1.4201747861019098e-05, + "loss": 1.3115, + "step": 12980 + }, + { + "epoch": 0.38114393094133536, + "grad_norm": 0.0, + "learning_rate": 1.4200884899513566e-05, + "loss": 1.4502, + "step": 12981 + }, + { + "epoch": 0.3811732926184744, + "grad_norm": 0.0, + "learning_rate": 1.4200021900018358e-05, + "loss": 1.3896, + "step": 12982 + }, + { + "epoch": 0.38120265429561334, + "grad_norm": 0.0, + "learning_rate": 1.419915886254128e-05, + "loss": 1.4473, + "step": 12983 + }, + { + "epoch": 0.38123201597275236, + "grad_norm": 0.0, + "learning_rate": 1.419829578709013e-05, + "loss": 1.2598, + "step": 12984 + }, + { + "epoch": 0.3812613776498914, + "grad_norm": 0.0, + "learning_rate": 1.4197432673672718e-05, + "loss": 1.2744, + "step": 12985 + }, + { + "epoch": 0.38129073932703034, + "grad_norm": 0.0, + "learning_rate": 1.4196569522296843e-05, + "loss": 1.3418, + "step": 12986 + }, + { + "epoch": 0.38132010100416935, + "grad_norm": 0.0, + "learning_rate": 1.419570633297032e-05, + "loss": 1.3926, + "step": 12987 + }, + { + "epoch": 0.38134946268130837, + "grad_norm": 0.0, + "learning_rate": 1.4194843105700948e-05, + "loss": 1.3662, + "step": 12988 + }, + { + "epoch": 0.38137882435844733, + "grad_norm": 0.0, + "learning_rate": 1.4193979840496534e-05, + "loss": 1.4619, + "step": 12989 + }, + { + "epoch": 0.38140818603558635, + "grad_norm": 0.0, + "learning_rate": 1.4193116537364888e-05, + "loss": 1.3623, + "step": 12990 + }, + { + "epoch": 0.38143754771272537, + "grad_norm": 0.0, + "learning_rate": 1.4192253196313817e-05, + "loss": 1.2812, + "step": 12991 + }, + { + "epoch": 0.38146690938986433, + "grad_norm": 0.0, + "learning_rate": 1.4191389817351125e-05, + "loss": 1.2495, + "step": 12992 + }, + { + "epoch": 0.38149627106700335, + "grad_norm": 0.0, + "learning_rate": 1.4190526400484618e-05, + "loss": 1.4346, + "step": 12993 + }, + { + "epoch": 0.38152563274414236, + "grad_norm": 0.0, + "learning_rate": 1.4189662945722111e-05, + "loss": 1.2656, + "step": 12994 + }, + { + "epoch": 0.3815549944212813, + "grad_norm": 0.0, + "learning_rate": 1.4188799453071409e-05, + "loss": 1.4004, + "step": 12995 + }, + { + "epoch": 0.38158435609842034, + "grad_norm": 0.0, + "learning_rate": 1.4187935922540321e-05, + "loss": 1.3115, + "step": 12996 + }, + { + "epoch": 0.38161371777555936, + "grad_norm": 0.0, + "learning_rate": 1.4187072354136656e-05, + "loss": 1.3828, + "step": 12997 + }, + { + "epoch": 0.3816430794526983, + "grad_norm": 0.0, + "learning_rate": 1.4186208747868222e-05, + "loss": 1.4326, + "step": 12998 + }, + { + "epoch": 0.38167244112983734, + "grad_norm": 0.0, + "learning_rate": 1.4185345103742831e-05, + "loss": 1.4531, + "step": 12999 + }, + { + "epoch": 0.38170180280697635, + "grad_norm": 0.0, + "learning_rate": 1.4184481421768292e-05, + "loss": 1.3672, + "step": 13000 + }, + { + "epoch": 0.3817311644841153, + "grad_norm": 0.0, + "learning_rate": 1.4183617701952415e-05, + "loss": 1.2617, + "step": 13001 + }, + { + "epoch": 0.38176052616125433, + "grad_norm": 0.0, + "learning_rate": 1.4182753944303013e-05, + "loss": 1.4004, + "step": 13002 + }, + { + "epoch": 0.38178988783839335, + "grad_norm": 0.0, + "learning_rate": 1.4181890148827897e-05, + "loss": 1.4219, + "step": 13003 + }, + { + "epoch": 0.3818192495155323, + "grad_norm": 0.0, + "learning_rate": 1.4181026315534875e-05, + "loss": 1.249, + "step": 13004 + }, + { + "epoch": 0.38184861119267133, + "grad_norm": 0.0, + "learning_rate": 1.4180162444431761e-05, + "loss": 1.3369, + "step": 13005 + }, + { + "epoch": 0.38187797286981034, + "grad_norm": 0.0, + "learning_rate": 1.4179298535526369e-05, + "loss": 1.4541, + "step": 13006 + }, + { + "epoch": 0.3819073345469493, + "grad_norm": 0.0, + "learning_rate": 1.4178434588826511e-05, + "loss": 1.377, + "step": 13007 + }, + { + "epoch": 0.3819366962240883, + "grad_norm": 0.0, + "learning_rate": 1.4177570604339999e-05, + "loss": 1.3242, + "step": 13008 + }, + { + "epoch": 0.38196605790122734, + "grad_norm": 0.0, + "learning_rate": 1.4176706582074644e-05, + "loss": 1.5088, + "step": 13009 + }, + { + "epoch": 0.3819954195783663, + "grad_norm": 0.0, + "learning_rate": 1.4175842522038261e-05, + "loss": 1.4326, + "step": 13010 + }, + { + "epoch": 0.3820247812555053, + "grad_norm": 0.0, + "learning_rate": 1.4174978424238666e-05, + "loss": 1.208, + "step": 13011 + }, + { + "epoch": 0.38205414293264434, + "grad_norm": 0.0, + "learning_rate": 1.4174114288683672e-05, + "loss": 1.3232, + "step": 13012 + }, + { + "epoch": 0.3820835046097833, + "grad_norm": 0.0, + "learning_rate": 1.4173250115381092e-05, + "loss": 1.3174, + "step": 13013 + }, + { + "epoch": 0.3821128662869223, + "grad_norm": 0.0, + "learning_rate": 1.4172385904338743e-05, + "loss": 1.4404, + "step": 13014 + }, + { + "epoch": 0.38214222796406133, + "grad_norm": 0.0, + "learning_rate": 1.4171521655564443e-05, + "loss": 1.2734, + "step": 13015 + }, + { + "epoch": 0.3821715896412003, + "grad_norm": 0.0, + "learning_rate": 1.4170657369066001e-05, + "loss": 1.3564, + "step": 13016 + }, + { + "epoch": 0.3822009513183393, + "grad_norm": 0.0, + "learning_rate": 1.4169793044851238e-05, + "loss": 1.3525, + "step": 13017 + }, + { + "epoch": 0.3822303129954783, + "grad_norm": 0.0, + "learning_rate": 1.4168928682927967e-05, + "loss": 1.249, + "step": 13018 + }, + { + "epoch": 0.3822596746726173, + "grad_norm": 0.0, + "learning_rate": 1.4168064283304005e-05, + "loss": 1.3643, + "step": 13019 + }, + { + "epoch": 0.3822890363497563, + "grad_norm": 0.0, + "learning_rate": 1.4167199845987174e-05, + "loss": 1.3164, + "step": 13020 + }, + { + "epoch": 0.3823183980268953, + "grad_norm": 0.0, + "learning_rate": 1.4166335370985287e-05, + "loss": 1.3623, + "step": 13021 + }, + { + "epoch": 0.3823477597040343, + "grad_norm": 0.0, + "learning_rate": 1.4165470858306161e-05, + "loss": 1.374, + "step": 13022 + }, + { + "epoch": 0.3823771213811733, + "grad_norm": 0.0, + "learning_rate": 1.4164606307957613e-05, + "loss": 1.4551, + "step": 13023 + }, + { + "epoch": 0.38240648305831226, + "grad_norm": 0.0, + "learning_rate": 1.4163741719947468e-05, + "loss": 1.333, + "step": 13024 + }, + { + "epoch": 0.3824358447354513, + "grad_norm": 0.0, + "learning_rate": 1.4162877094283535e-05, + "loss": 1.3643, + "step": 13025 + }, + { + "epoch": 0.3824652064125903, + "grad_norm": 0.0, + "learning_rate": 1.4162012430973642e-05, + "loss": 1.3721, + "step": 13026 + }, + { + "epoch": 0.38249456808972926, + "grad_norm": 0.0, + "learning_rate": 1.4161147730025606e-05, + "loss": 1.3418, + "step": 13027 + }, + { + "epoch": 0.3825239297668683, + "grad_norm": 0.0, + "learning_rate": 1.4160282991447245e-05, + "loss": 1.2847, + "step": 13028 + }, + { + "epoch": 0.3825532914440073, + "grad_norm": 0.0, + "learning_rate": 1.4159418215246377e-05, + "loss": 1.4014, + "step": 13029 + }, + { + "epoch": 0.38258265312114625, + "grad_norm": 0.0, + "learning_rate": 1.4158553401430823e-05, + "loss": 1.2324, + "step": 13030 + }, + { + "epoch": 0.38261201479828527, + "grad_norm": 0.0, + "learning_rate": 1.4157688550008409e-05, + "loss": 1.3643, + "step": 13031 + }, + { + "epoch": 0.3826413764754243, + "grad_norm": 0.0, + "learning_rate": 1.4156823660986954e-05, + "loss": 1.332, + "step": 13032 + }, + { + "epoch": 0.38267073815256325, + "grad_norm": 0.0, + "learning_rate": 1.4155958734374275e-05, + "loss": 1.3906, + "step": 13033 + }, + { + "epoch": 0.38270009982970227, + "grad_norm": 0.0, + "learning_rate": 1.4155093770178196e-05, + "loss": 1.2559, + "step": 13034 + }, + { + "epoch": 0.3827294615068413, + "grad_norm": 0.0, + "learning_rate": 1.4154228768406542e-05, + "loss": 1.4058, + "step": 13035 + }, + { + "epoch": 0.38275882318398025, + "grad_norm": 0.0, + "learning_rate": 1.4153363729067134e-05, + "loss": 1.2197, + "step": 13036 + }, + { + "epoch": 0.38278818486111926, + "grad_norm": 0.0, + "learning_rate": 1.4152498652167795e-05, + "loss": 1.3086, + "step": 13037 + }, + { + "epoch": 0.3828175465382583, + "grad_norm": 0.0, + "learning_rate": 1.4151633537716344e-05, + "loss": 1.3145, + "step": 13038 + }, + { + "epoch": 0.38284690821539724, + "grad_norm": 0.0, + "learning_rate": 1.4150768385720607e-05, + "loss": 1.2202, + "step": 13039 + }, + { + "epoch": 0.38287626989253626, + "grad_norm": 0.0, + "learning_rate": 1.4149903196188412e-05, + "loss": 1.3301, + "step": 13040 + }, + { + "epoch": 0.3829056315696753, + "grad_norm": 0.0, + "learning_rate": 1.4149037969127579e-05, + "loss": 1.4189, + "step": 13041 + }, + { + "epoch": 0.38293499324681424, + "grad_norm": 0.0, + "learning_rate": 1.414817270454593e-05, + "loss": 1.3174, + "step": 13042 + }, + { + "epoch": 0.38296435492395325, + "grad_norm": 0.0, + "learning_rate": 1.4147307402451296e-05, + "loss": 1.4062, + "step": 13043 + }, + { + "epoch": 0.38299371660109227, + "grad_norm": 0.0, + "learning_rate": 1.41464420628515e-05, + "loss": 1.3545, + "step": 13044 + }, + { + "epoch": 0.38302307827823123, + "grad_norm": 0.0, + "learning_rate": 1.4145576685754362e-05, + "loss": 1.3438, + "step": 13045 + }, + { + "epoch": 0.38305243995537025, + "grad_norm": 0.0, + "learning_rate": 1.4144711271167715e-05, + "loss": 1.3896, + "step": 13046 + }, + { + "epoch": 0.38308180163250927, + "grad_norm": 0.0, + "learning_rate": 1.4143845819099383e-05, + "loss": 1.3525, + "step": 13047 + }, + { + "epoch": 0.38311116330964823, + "grad_norm": 0.0, + "learning_rate": 1.4142980329557193e-05, + "loss": 1.2651, + "step": 13048 + }, + { + "epoch": 0.38314052498678725, + "grad_norm": 0.0, + "learning_rate": 1.4142114802548967e-05, + "loss": 1.3711, + "step": 13049 + }, + { + "epoch": 0.38316988666392626, + "grad_norm": 0.0, + "learning_rate": 1.414124923808254e-05, + "loss": 1.3535, + "step": 13050 + }, + { + "epoch": 0.3831992483410652, + "grad_norm": 0.0, + "learning_rate": 1.4140383636165731e-05, + "loss": 1.3369, + "step": 13051 + }, + { + "epoch": 0.38322861001820424, + "grad_norm": 0.0, + "learning_rate": 1.4139517996806377e-05, + "loss": 1.3057, + "step": 13052 + }, + { + "epoch": 0.38325797169534326, + "grad_norm": 0.0, + "learning_rate": 1.4138652320012301e-05, + "loss": 1.499, + "step": 13053 + }, + { + "epoch": 0.3832873333724822, + "grad_norm": 0.0, + "learning_rate": 1.4137786605791329e-05, + "loss": 1.4385, + "step": 13054 + }, + { + "epoch": 0.38331669504962124, + "grad_norm": 0.0, + "learning_rate": 1.4136920854151295e-05, + "loss": 1.3662, + "step": 13055 + }, + { + "epoch": 0.38334605672676025, + "grad_norm": 0.0, + "learning_rate": 1.4136055065100027e-05, + "loss": 1.2461, + "step": 13056 + }, + { + "epoch": 0.3833754184038992, + "grad_norm": 0.0, + "learning_rate": 1.4135189238645354e-05, + "loss": 1.376, + "step": 13057 + }, + { + "epoch": 0.38340478008103823, + "grad_norm": 0.0, + "learning_rate": 1.4134323374795105e-05, + "loss": 1.335, + "step": 13058 + }, + { + "epoch": 0.38343414175817725, + "grad_norm": 0.0, + "learning_rate": 1.4133457473557108e-05, + "loss": 1.2842, + "step": 13059 + }, + { + "epoch": 0.3834635034353162, + "grad_norm": 0.0, + "learning_rate": 1.41325915349392e-05, + "loss": 1.4248, + "step": 13060 + }, + { + "epoch": 0.38349286511245523, + "grad_norm": 0.0, + "learning_rate": 1.4131725558949207e-05, + "loss": 1.4131, + "step": 13061 + }, + { + "epoch": 0.38352222678959424, + "grad_norm": 0.0, + "learning_rate": 1.4130859545594962e-05, + "loss": 1.3345, + "step": 13062 + }, + { + "epoch": 0.3835515884667332, + "grad_norm": 0.0, + "learning_rate": 1.4129993494884296e-05, + "loss": 1.375, + "step": 13063 + }, + { + "epoch": 0.3835809501438722, + "grad_norm": 0.0, + "learning_rate": 1.4129127406825041e-05, + "loss": 1.4033, + "step": 13064 + }, + { + "epoch": 0.38361031182101124, + "grad_norm": 0.0, + "learning_rate": 1.4128261281425031e-05, + "loss": 1.3789, + "step": 13065 + }, + { + "epoch": 0.3836396734981502, + "grad_norm": 0.0, + "learning_rate": 1.4127395118692095e-05, + "loss": 1.3789, + "step": 13066 + }, + { + "epoch": 0.3836690351752892, + "grad_norm": 0.0, + "learning_rate": 1.4126528918634068e-05, + "loss": 1.2246, + "step": 13067 + }, + { + "epoch": 0.38369839685242824, + "grad_norm": 0.0, + "learning_rate": 1.4125662681258784e-05, + "loss": 1.4082, + "step": 13068 + }, + { + "epoch": 0.3837277585295672, + "grad_norm": 0.0, + "learning_rate": 1.4124796406574078e-05, + "loss": 1.3438, + "step": 13069 + }, + { + "epoch": 0.3837571202067062, + "grad_norm": 0.0, + "learning_rate": 1.4123930094587779e-05, + "loss": 1.2959, + "step": 13070 + }, + { + "epoch": 0.38378648188384523, + "grad_norm": 0.0, + "learning_rate": 1.4123063745307723e-05, + "loss": 1.3062, + "step": 13071 + }, + { + "epoch": 0.3838158435609842, + "grad_norm": 0.0, + "learning_rate": 1.4122197358741745e-05, + "loss": 1.3828, + "step": 13072 + }, + { + "epoch": 0.3838452052381232, + "grad_norm": 0.0, + "learning_rate": 1.4121330934897684e-05, + "loss": 1.375, + "step": 13073 + }, + { + "epoch": 0.3838745669152622, + "grad_norm": 0.0, + "learning_rate": 1.412046447378337e-05, + "loss": 1.3584, + "step": 13074 + }, + { + "epoch": 0.3839039285924012, + "grad_norm": 0.0, + "learning_rate": 1.411959797540664e-05, + "loss": 1.3193, + "step": 13075 + }, + { + "epoch": 0.3839332902695402, + "grad_norm": 0.0, + "learning_rate": 1.4118731439775333e-05, + "loss": 1.4756, + "step": 13076 + }, + { + "epoch": 0.38396265194667917, + "grad_norm": 0.0, + "learning_rate": 1.4117864866897282e-05, + "loss": 1.3398, + "step": 13077 + }, + { + "epoch": 0.3839920136238182, + "grad_norm": 0.0, + "learning_rate": 1.4116998256780323e-05, + "loss": 1.2715, + "step": 13078 + }, + { + "epoch": 0.3840213753009572, + "grad_norm": 0.0, + "learning_rate": 1.4116131609432296e-05, + "loss": 1.4043, + "step": 13079 + }, + { + "epoch": 0.38405073697809616, + "grad_norm": 0.0, + "learning_rate": 1.4115264924861035e-05, + "loss": 1.4902, + "step": 13080 + }, + { + "epoch": 0.3840800986552352, + "grad_norm": 0.0, + "learning_rate": 1.411439820307438e-05, + "loss": 1.4268, + "step": 13081 + }, + { + "epoch": 0.3841094603323742, + "grad_norm": 0.0, + "learning_rate": 1.4113531444080166e-05, + "loss": 1.4238, + "step": 13082 + }, + { + "epoch": 0.38413882200951316, + "grad_norm": 0.0, + "learning_rate": 1.4112664647886236e-05, + "loss": 1.4482, + "step": 13083 + }, + { + "epoch": 0.3841681836866522, + "grad_norm": 0.0, + "learning_rate": 1.4111797814500425e-05, + "loss": 1.3438, + "step": 13084 + }, + { + "epoch": 0.3841975453637912, + "grad_norm": 0.0, + "learning_rate": 1.4110930943930577e-05, + "loss": 1.3584, + "step": 13085 + }, + { + "epoch": 0.38422690704093015, + "grad_norm": 0.0, + "learning_rate": 1.4110064036184525e-05, + "loss": 1.3525, + "step": 13086 + }, + { + "epoch": 0.38425626871806917, + "grad_norm": 0.0, + "learning_rate": 1.4109197091270113e-05, + "loss": 1.4209, + "step": 13087 + }, + { + "epoch": 0.3842856303952082, + "grad_norm": 0.0, + "learning_rate": 1.4108330109195177e-05, + "loss": 1.4395, + "step": 13088 + }, + { + "epoch": 0.38431499207234715, + "grad_norm": 0.0, + "learning_rate": 1.410746308996756e-05, + "loss": 1.4326, + "step": 13089 + }, + { + "epoch": 0.38434435374948617, + "grad_norm": 0.0, + "learning_rate": 1.4106596033595104e-05, + "loss": 1.1411, + "step": 13090 + }, + { + "epoch": 0.3843737154266252, + "grad_norm": 0.0, + "learning_rate": 1.4105728940085645e-05, + "loss": 1.3291, + "step": 13091 + }, + { + "epoch": 0.38440307710376415, + "grad_norm": 0.0, + "learning_rate": 1.410486180944703e-05, + "loss": 1.3271, + "step": 13092 + }, + { + "epoch": 0.38443243878090316, + "grad_norm": 0.0, + "learning_rate": 1.4103994641687098e-05, + "loss": 1.3896, + "step": 13093 + }, + { + "epoch": 0.3844618004580422, + "grad_norm": 0.0, + "learning_rate": 1.4103127436813693e-05, + "loss": 1.3164, + "step": 13094 + }, + { + "epoch": 0.38449116213518114, + "grad_norm": 0.0, + "learning_rate": 1.4102260194834653e-05, + "loss": 1.3652, + "step": 13095 + }, + { + "epoch": 0.38452052381232016, + "grad_norm": 0.0, + "learning_rate": 1.4101392915757826e-05, + "loss": 1.3809, + "step": 13096 + }, + { + "epoch": 0.3845498854894592, + "grad_norm": 0.0, + "learning_rate": 1.410052559959105e-05, + "loss": 1.3428, + "step": 13097 + }, + { + "epoch": 0.38457924716659814, + "grad_norm": 0.0, + "learning_rate": 1.4099658246342174e-05, + "loss": 1.4404, + "step": 13098 + }, + { + "epoch": 0.38460860884373715, + "grad_norm": 0.0, + "learning_rate": 1.4098790856019036e-05, + "loss": 1.4023, + "step": 13099 + }, + { + "epoch": 0.38463797052087617, + "grad_norm": 0.0, + "learning_rate": 1.4097923428629484e-05, + "loss": 1.4043, + "step": 13100 + }, + { + "epoch": 0.38466733219801513, + "grad_norm": 0.0, + "learning_rate": 1.4097055964181359e-05, + "loss": 1.3008, + "step": 13101 + }, + { + "epoch": 0.38469669387515415, + "grad_norm": 0.0, + "learning_rate": 1.409618846268251e-05, + "loss": 1.251, + "step": 13102 + }, + { + "epoch": 0.38472605555229317, + "grad_norm": 0.0, + "learning_rate": 1.4095320924140779e-05, + "loss": 1.4717, + "step": 13103 + }, + { + "epoch": 0.38475541722943213, + "grad_norm": 0.0, + "learning_rate": 1.409445334856401e-05, + "loss": 1.4399, + "step": 13104 + }, + { + "epoch": 0.38478477890657115, + "grad_norm": 0.0, + "learning_rate": 1.4093585735960054e-05, + "loss": 1.3447, + "step": 13105 + }, + { + "epoch": 0.38481414058371016, + "grad_norm": 0.0, + "learning_rate": 1.4092718086336753e-05, + "loss": 1.3438, + "step": 13106 + }, + { + "epoch": 0.3848435022608491, + "grad_norm": 0.0, + "learning_rate": 1.4091850399701952e-05, + "loss": 1.3389, + "step": 13107 + }, + { + "epoch": 0.38487286393798814, + "grad_norm": 0.0, + "learning_rate": 1.4090982676063501e-05, + "loss": 1.248, + "step": 13108 + }, + { + "epoch": 0.38490222561512716, + "grad_norm": 0.0, + "learning_rate": 1.4090114915429244e-05, + "loss": 1.4053, + "step": 13109 + }, + { + "epoch": 0.3849315872922661, + "grad_norm": 0.0, + "learning_rate": 1.4089247117807034e-05, + "loss": 1.377, + "step": 13110 + }, + { + "epoch": 0.38496094896940514, + "grad_norm": 0.0, + "learning_rate": 1.4088379283204713e-05, + "loss": 1.4307, + "step": 13111 + }, + { + "epoch": 0.38499031064654415, + "grad_norm": 0.0, + "learning_rate": 1.4087511411630132e-05, + "loss": 1.4229, + "step": 13112 + }, + { + "epoch": 0.3850196723236831, + "grad_norm": 0.0, + "learning_rate": 1.4086643503091137e-05, + "loss": 1.4067, + "step": 13113 + }, + { + "epoch": 0.38504903400082213, + "grad_norm": 0.0, + "learning_rate": 1.4085775557595582e-05, + "loss": 1.4307, + "step": 13114 + }, + { + "epoch": 0.38507839567796115, + "grad_norm": 0.0, + "learning_rate": 1.4084907575151308e-05, + "loss": 1.4189, + "step": 13115 + }, + { + "epoch": 0.3851077573551001, + "grad_norm": 0.0, + "learning_rate": 1.4084039555766172e-05, + "loss": 1.3818, + "step": 13116 + }, + { + "epoch": 0.38513711903223913, + "grad_norm": 0.0, + "learning_rate": 1.4083171499448019e-05, + "loss": 1.3545, + "step": 13117 + }, + { + "epoch": 0.38516648070937815, + "grad_norm": 0.0, + "learning_rate": 1.40823034062047e-05, + "loss": 1.3203, + "step": 13118 + }, + { + "epoch": 0.3851958423865171, + "grad_norm": 0.0, + "learning_rate": 1.4081435276044064e-05, + "loss": 1.2998, + "step": 13119 + }, + { + "epoch": 0.3852252040636561, + "grad_norm": 0.0, + "learning_rate": 1.4080567108973965e-05, + "loss": 1.4404, + "step": 13120 + }, + { + "epoch": 0.38525456574079514, + "grad_norm": 0.0, + "learning_rate": 1.4079698905002252e-05, + "loss": 1.3057, + "step": 13121 + }, + { + "epoch": 0.3852839274179341, + "grad_norm": 0.0, + "learning_rate": 1.407883066413678e-05, + "loss": 1.4121, + "step": 13122 + }, + { + "epoch": 0.3853132890950731, + "grad_norm": 0.0, + "learning_rate": 1.4077962386385396e-05, + "loss": 1.3135, + "step": 13123 + }, + { + "epoch": 0.38534265077221214, + "grad_norm": 0.0, + "learning_rate": 1.4077094071755952e-05, + "loss": 1.4072, + "step": 13124 + }, + { + "epoch": 0.3853720124493511, + "grad_norm": 0.0, + "learning_rate": 1.4076225720256302e-05, + "loss": 1.4502, + "step": 13125 + }, + { + "epoch": 0.3854013741264901, + "grad_norm": 0.0, + "learning_rate": 1.4075357331894304e-05, + "loss": 1.4443, + "step": 13126 + }, + { + "epoch": 0.3854307358036291, + "grad_norm": 0.0, + "learning_rate": 1.4074488906677802e-05, + "loss": 1.3076, + "step": 13127 + }, + { + "epoch": 0.3854600974807681, + "grad_norm": 0.0, + "learning_rate": 1.4073620444614653e-05, + "loss": 1.4707, + "step": 13128 + }, + { + "epoch": 0.3854894591579071, + "grad_norm": 0.0, + "learning_rate": 1.407275194571271e-05, + "loss": 1.3149, + "step": 13129 + }, + { + "epoch": 0.38551882083504607, + "grad_norm": 0.0, + "learning_rate": 1.4071883409979832e-05, + "loss": 1.4453, + "step": 13130 + }, + { + "epoch": 0.3855481825121851, + "grad_norm": 0.0, + "learning_rate": 1.4071014837423866e-05, + "loss": 1.3691, + "step": 13131 + }, + { + "epoch": 0.3855775441893241, + "grad_norm": 0.0, + "learning_rate": 1.4070146228052672e-05, + "loss": 1.4277, + "step": 13132 + }, + { + "epoch": 0.38560690586646307, + "grad_norm": 0.0, + "learning_rate": 1.40692775818741e-05, + "loss": 1.2754, + "step": 13133 + }, + { + "epoch": 0.3856362675436021, + "grad_norm": 0.0, + "learning_rate": 1.4068408898896015e-05, + "loss": 1.3447, + "step": 13134 + }, + { + "epoch": 0.3856656292207411, + "grad_norm": 0.0, + "learning_rate": 1.4067540179126265e-05, + "loss": 1.3604, + "step": 13135 + }, + { + "epoch": 0.38569499089788006, + "grad_norm": 0.0, + "learning_rate": 1.4066671422572704e-05, + "loss": 1.292, + "step": 13136 + }, + { + "epoch": 0.3857243525750191, + "grad_norm": 0.0, + "learning_rate": 1.4065802629243193e-05, + "loss": 1.3398, + "step": 13137 + }, + { + "epoch": 0.3857537142521581, + "grad_norm": 0.0, + "learning_rate": 1.4064933799145586e-05, + "loss": 1.3828, + "step": 13138 + }, + { + "epoch": 0.38578307592929706, + "grad_norm": 0.0, + "learning_rate": 1.4064064932287745e-05, + "loss": 1.4062, + "step": 13139 + }, + { + "epoch": 0.3858124376064361, + "grad_norm": 0.0, + "learning_rate": 1.4063196028677522e-05, + "loss": 1.3428, + "step": 13140 + }, + { + "epoch": 0.3858417992835751, + "grad_norm": 0.0, + "learning_rate": 1.4062327088322776e-05, + "loss": 1.3115, + "step": 13141 + }, + { + "epoch": 0.38587116096071405, + "grad_norm": 0.0, + "learning_rate": 1.4061458111231368e-05, + "loss": 1.3594, + "step": 13142 + }, + { + "epoch": 0.38590052263785307, + "grad_norm": 0.0, + "learning_rate": 1.4060589097411153e-05, + "loss": 1.3027, + "step": 13143 + }, + { + "epoch": 0.3859298843149921, + "grad_norm": 0.0, + "learning_rate": 1.4059720046869989e-05, + "loss": 1.3057, + "step": 13144 + }, + { + "epoch": 0.38595924599213105, + "grad_norm": 0.0, + "learning_rate": 1.405885095961574e-05, + "loss": 1.4521, + "step": 13145 + }, + { + "epoch": 0.38598860766927007, + "grad_norm": 0.0, + "learning_rate": 1.4057981835656261e-05, + "loss": 1.3877, + "step": 13146 + }, + { + "epoch": 0.3860179693464091, + "grad_norm": 0.0, + "learning_rate": 1.4057112674999413e-05, + "loss": 1.3193, + "step": 13147 + }, + { + "epoch": 0.38604733102354805, + "grad_norm": 0.0, + "learning_rate": 1.4056243477653055e-05, + "loss": 1.2881, + "step": 13148 + }, + { + "epoch": 0.38607669270068706, + "grad_norm": 0.0, + "learning_rate": 1.4055374243625051e-05, + "loss": 1.3252, + "step": 13149 + }, + { + "epoch": 0.3861060543778261, + "grad_norm": 0.0, + "learning_rate": 1.4054504972923255e-05, + "loss": 1.3164, + "step": 13150 + }, + { + "epoch": 0.38613541605496504, + "grad_norm": 0.0, + "learning_rate": 1.4053635665555536e-05, + "loss": 1.3516, + "step": 13151 + }, + { + "epoch": 0.38616477773210406, + "grad_norm": 0.0, + "learning_rate": 1.405276632152975e-05, + "loss": 1.416, + "step": 13152 + }, + { + "epoch": 0.3861941394092431, + "grad_norm": 0.0, + "learning_rate": 1.405189694085376e-05, + "loss": 1.3584, + "step": 13153 + }, + { + "epoch": 0.38622350108638204, + "grad_norm": 0.0, + "learning_rate": 1.4051027523535427e-05, + "loss": 1.2871, + "step": 13154 + }, + { + "epoch": 0.38625286276352105, + "grad_norm": 0.0, + "learning_rate": 1.405015806958262e-05, + "loss": 1.3701, + "step": 13155 + }, + { + "epoch": 0.38628222444066007, + "grad_norm": 0.0, + "learning_rate": 1.4049288579003193e-05, + "loss": 1.4062, + "step": 13156 + }, + { + "epoch": 0.38631158611779903, + "grad_norm": 0.0, + "learning_rate": 1.404841905180501e-05, + "loss": 1.3818, + "step": 13157 + }, + { + "epoch": 0.38634094779493805, + "grad_norm": 0.0, + "learning_rate": 1.404754948799594e-05, + "loss": 1.3486, + "step": 13158 + }, + { + "epoch": 0.38637030947207707, + "grad_norm": 0.0, + "learning_rate": 1.4046679887583843e-05, + "loss": 1.3794, + "step": 13159 + }, + { + "epoch": 0.38639967114921603, + "grad_norm": 0.0, + "learning_rate": 1.4045810250576582e-05, + "loss": 1.1914, + "step": 13160 + }, + { + "epoch": 0.38642903282635505, + "grad_norm": 0.0, + "learning_rate": 1.4044940576982023e-05, + "loss": 1.4043, + "step": 13161 + }, + { + "epoch": 0.38645839450349406, + "grad_norm": 0.0, + "learning_rate": 1.4044070866808029e-05, + "loss": 1.3037, + "step": 13162 + }, + { + "epoch": 0.386487756180633, + "grad_norm": 0.0, + "learning_rate": 1.4043201120062469e-05, + "loss": 1.418, + "step": 13163 + }, + { + "epoch": 0.38651711785777204, + "grad_norm": 0.0, + "learning_rate": 1.4042331336753204e-05, + "loss": 1.3008, + "step": 13164 + }, + { + "epoch": 0.38654647953491106, + "grad_norm": 0.0, + "learning_rate": 1.4041461516888105e-05, + "loss": 1.2939, + "step": 13165 + }, + { + "epoch": 0.38657584121205, + "grad_norm": 0.0, + "learning_rate": 1.4040591660475031e-05, + "loss": 1.4297, + "step": 13166 + }, + { + "epoch": 0.38660520288918904, + "grad_norm": 0.0, + "learning_rate": 1.4039721767521854e-05, + "loss": 1.1533, + "step": 13167 + }, + { + "epoch": 0.38663456456632805, + "grad_norm": 0.0, + "learning_rate": 1.4038851838036435e-05, + "loss": 1.4053, + "step": 13168 + }, + { + "epoch": 0.386663926243467, + "grad_norm": 0.0, + "learning_rate": 1.4037981872026646e-05, + "loss": 1.376, + "step": 13169 + }, + { + "epoch": 0.38669328792060603, + "grad_norm": 0.0, + "learning_rate": 1.4037111869500352e-05, + "loss": 1.2192, + "step": 13170 + }, + { + "epoch": 0.38672264959774505, + "grad_norm": 0.0, + "learning_rate": 1.4036241830465425e-05, + "loss": 1.4326, + "step": 13171 + }, + { + "epoch": 0.386752011274884, + "grad_norm": 0.0, + "learning_rate": 1.4035371754929727e-05, + "loss": 1.3154, + "step": 13172 + }, + { + "epoch": 0.38678137295202303, + "grad_norm": 0.0, + "learning_rate": 1.4034501642901128e-05, + "loss": 1.4307, + "step": 13173 + }, + { + "epoch": 0.38681073462916205, + "grad_norm": 0.0, + "learning_rate": 1.4033631494387499e-05, + "loss": 1.3379, + "step": 13174 + }, + { + "epoch": 0.386840096306301, + "grad_norm": 0.0, + "learning_rate": 1.4032761309396708e-05, + "loss": 1.4727, + "step": 13175 + }, + { + "epoch": 0.38686945798344, + "grad_norm": 0.0, + "learning_rate": 1.4031891087936622e-05, + "loss": 1.2563, + "step": 13176 + }, + { + "epoch": 0.386898819660579, + "grad_norm": 0.0, + "learning_rate": 1.4031020830015111e-05, + "loss": 1.4307, + "step": 13177 + }, + { + "epoch": 0.386928181337718, + "grad_norm": 0.0, + "learning_rate": 1.4030150535640048e-05, + "loss": 1.3828, + "step": 13178 + }, + { + "epoch": 0.386957543014857, + "grad_norm": 0.0, + "learning_rate": 1.4029280204819302e-05, + "loss": 1.4355, + "step": 13179 + }, + { + "epoch": 0.386986904691996, + "grad_norm": 0.0, + "learning_rate": 1.4028409837560743e-05, + "loss": 1.2734, + "step": 13180 + }, + { + "epoch": 0.387016266369135, + "grad_norm": 0.0, + "learning_rate": 1.4027539433872242e-05, + "loss": 1.3789, + "step": 13181 + }, + { + "epoch": 0.387045628046274, + "grad_norm": 0.0, + "learning_rate": 1.402666899376167e-05, + "loss": 1.377, + "step": 13182 + }, + { + "epoch": 0.387074989723413, + "grad_norm": 0.0, + "learning_rate": 1.4025798517236902e-05, + "loss": 1.3457, + "step": 13183 + }, + { + "epoch": 0.387104351400552, + "grad_norm": 0.0, + "learning_rate": 1.4024928004305802e-05, + "loss": 1.291, + "step": 13184 + }, + { + "epoch": 0.387133713077691, + "grad_norm": 0.0, + "learning_rate": 1.4024057454976252e-05, + "loss": 1.4053, + "step": 13185 + }, + { + "epoch": 0.38716307475483, + "grad_norm": 0.0, + "learning_rate": 1.4023186869256115e-05, + "loss": 1.2969, + "step": 13186 + }, + { + "epoch": 0.387192436431969, + "grad_norm": 0.0, + "learning_rate": 1.4022316247153272e-05, + "loss": 1.3857, + "step": 13187 + }, + { + "epoch": 0.387221798109108, + "grad_norm": 0.0, + "learning_rate": 1.4021445588675593e-05, + "loss": 1.3711, + "step": 13188 + }, + { + "epoch": 0.38725115978624697, + "grad_norm": 0.0, + "learning_rate": 1.4020574893830952e-05, + "loss": 1.3262, + "step": 13189 + }, + { + "epoch": 0.387280521463386, + "grad_norm": 0.0, + "learning_rate": 1.4019704162627219e-05, + "loss": 1.4385, + "step": 13190 + }, + { + "epoch": 0.387309883140525, + "grad_norm": 0.0, + "learning_rate": 1.4018833395072276e-05, + "loss": 1.3633, + "step": 13191 + }, + { + "epoch": 0.38733924481766396, + "grad_norm": 0.0, + "learning_rate": 1.4017962591173992e-05, + "loss": 1.4287, + "step": 13192 + }, + { + "epoch": 0.387368606494803, + "grad_norm": 0.0, + "learning_rate": 1.4017091750940241e-05, + "loss": 1.2559, + "step": 13193 + }, + { + "epoch": 0.387397968171942, + "grad_norm": 0.0, + "learning_rate": 1.4016220874378906e-05, + "loss": 1.5322, + "step": 13194 + }, + { + "epoch": 0.38742732984908096, + "grad_norm": 0.0, + "learning_rate": 1.4015349961497851e-05, + "loss": 1.4277, + "step": 13195 + }, + { + "epoch": 0.38745669152622, + "grad_norm": 0.0, + "learning_rate": 1.4014479012304963e-05, + "loss": 1.3828, + "step": 13196 + }, + { + "epoch": 0.387486053203359, + "grad_norm": 0.0, + "learning_rate": 1.4013608026808108e-05, + "loss": 1.208, + "step": 13197 + }, + { + "epoch": 0.38751541488049795, + "grad_norm": 0.0, + "learning_rate": 1.4012737005015172e-05, + "loss": 1.4033, + "step": 13198 + }, + { + "epoch": 0.38754477655763697, + "grad_norm": 0.0, + "learning_rate": 1.4011865946934025e-05, + "loss": 1.2832, + "step": 13199 + }, + { + "epoch": 0.387574138234776, + "grad_norm": 0.0, + "learning_rate": 1.4010994852572548e-05, + "loss": 1.3799, + "step": 13200 + }, + { + "epoch": 0.38760349991191495, + "grad_norm": 0.0, + "learning_rate": 1.4010123721938615e-05, + "loss": 1.3203, + "step": 13201 + }, + { + "epoch": 0.38763286158905397, + "grad_norm": 0.0, + "learning_rate": 1.400925255504011e-05, + "loss": 1.3086, + "step": 13202 + }, + { + "epoch": 0.387662223266193, + "grad_norm": 0.0, + "learning_rate": 1.4008381351884904e-05, + "loss": 1.2637, + "step": 13203 + }, + { + "epoch": 0.38769158494333195, + "grad_norm": 0.0, + "learning_rate": 1.4007510112480881e-05, + "loss": 1.4873, + "step": 13204 + }, + { + "epoch": 0.38772094662047096, + "grad_norm": 0.0, + "learning_rate": 1.400663883683592e-05, + "loss": 1.417, + "step": 13205 + }, + { + "epoch": 0.38775030829761, + "grad_norm": 0.0, + "learning_rate": 1.4005767524957894e-05, + "loss": 1.4189, + "step": 13206 + }, + { + "epoch": 0.38777966997474894, + "grad_norm": 0.0, + "learning_rate": 1.4004896176854685e-05, + "loss": 1.3281, + "step": 13207 + }, + { + "epoch": 0.38780903165188796, + "grad_norm": 0.0, + "learning_rate": 1.4004024792534179e-05, + "loss": 1.3486, + "step": 13208 + }, + { + "epoch": 0.387838393329027, + "grad_norm": 0.0, + "learning_rate": 1.4003153372004248e-05, + "loss": 1.3174, + "step": 13209 + }, + { + "epoch": 0.38786775500616594, + "grad_norm": 0.0, + "learning_rate": 1.4002281915272775e-05, + "loss": 1.3838, + "step": 13210 + }, + { + "epoch": 0.38789711668330495, + "grad_norm": 0.0, + "learning_rate": 1.4001410422347643e-05, + "loss": 1.2119, + "step": 13211 + }, + { + "epoch": 0.38792647836044397, + "grad_norm": 0.0, + "learning_rate": 1.4000538893236734e-05, + "loss": 1.3496, + "step": 13212 + }, + { + "epoch": 0.38795584003758293, + "grad_norm": 0.0, + "learning_rate": 1.3999667327947925e-05, + "loss": 1.3486, + "step": 13213 + }, + { + "epoch": 0.38798520171472195, + "grad_norm": 0.0, + "learning_rate": 1.3998795726489104e-05, + "loss": 1.25, + "step": 13214 + }, + { + "epoch": 0.38801456339186097, + "grad_norm": 0.0, + "learning_rate": 1.3997924088868145e-05, + "loss": 1.4473, + "step": 13215 + }, + { + "epoch": 0.38804392506899993, + "grad_norm": 0.0, + "learning_rate": 1.3997052415092933e-05, + "loss": 1.3584, + "step": 13216 + }, + { + "epoch": 0.38807328674613895, + "grad_norm": 0.0, + "learning_rate": 1.3996180705171356e-05, + "loss": 1.3633, + "step": 13217 + }, + { + "epoch": 0.38810264842327796, + "grad_norm": 0.0, + "learning_rate": 1.3995308959111291e-05, + "loss": 1.5186, + "step": 13218 + }, + { + "epoch": 0.3881320101004169, + "grad_norm": 0.0, + "learning_rate": 1.3994437176920626e-05, + "loss": 1.3516, + "step": 13219 + }, + { + "epoch": 0.38816137177755594, + "grad_norm": 0.0, + "learning_rate": 1.3993565358607241e-05, + "loss": 1.3545, + "step": 13220 + }, + { + "epoch": 0.38819073345469496, + "grad_norm": 0.0, + "learning_rate": 1.3992693504179023e-05, + "loss": 1.4619, + "step": 13221 + }, + { + "epoch": 0.3882200951318339, + "grad_norm": 0.0, + "learning_rate": 1.3991821613643852e-05, + "loss": 1.334, + "step": 13222 + }, + { + "epoch": 0.38824945680897294, + "grad_norm": 0.0, + "learning_rate": 1.3990949687009617e-05, + "loss": 1.1367, + "step": 13223 + }, + { + "epoch": 0.38827881848611195, + "grad_norm": 0.0, + "learning_rate": 1.3990077724284206e-05, + "loss": 1.3252, + "step": 13224 + }, + { + "epoch": 0.3883081801632509, + "grad_norm": 0.0, + "learning_rate": 1.3989205725475498e-05, + "loss": 1.4512, + "step": 13225 + }, + { + "epoch": 0.38833754184038993, + "grad_norm": 0.0, + "learning_rate": 1.3988333690591378e-05, + "loss": 1.3364, + "step": 13226 + }, + { + "epoch": 0.3883669035175289, + "grad_norm": 0.0, + "learning_rate": 1.3987461619639737e-05, + "loss": 1.335, + "step": 13227 + }, + { + "epoch": 0.3883962651946679, + "grad_norm": 0.0, + "learning_rate": 1.3986589512628459e-05, + "loss": 1.2842, + "step": 13228 + }, + { + "epoch": 0.38842562687180693, + "grad_norm": 0.0, + "learning_rate": 1.3985717369565434e-05, + "loss": 1.3096, + "step": 13229 + }, + { + "epoch": 0.3884549885489459, + "grad_norm": 0.0, + "learning_rate": 1.3984845190458543e-05, + "loss": 1.3789, + "step": 13230 + }, + { + "epoch": 0.3884843502260849, + "grad_norm": 0.0, + "learning_rate": 1.3983972975315676e-05, + "loss": 1.3867, + "step": 13231 + }, + { + "epoch": 0.3885137119032239, + "grad_norm": 0.0, + "learning_rate": 1.3983100724144723e-05, + "loss": 1.3994, + "step": 13232 + }, + { + "epoch": 0.3885430735803629, + "grad_norm": 0.0, + "learning_rate": 1.3982228436953568e-05, + "loss": 1.3682, + "step": 13233 + }, + { + "epoch": 0.3885724352575019, + "grad_norm": 0.0, + "learning_rate": 1.3981356113750107e-05, + "loss": 1.3877, + "step": 13234 + }, + { + "epoch": 0.3886017969346409, + "grad_norm": 0.0, + "learning_rate": 1.3980483754542216e-05, + "loss": 1.1729, + "step": 13235 + }, + { + "epoch": 0.3886311586117799, + "grad_norm": 0.0, + "learning_rate": 1.3979611359337793e-05, + "loss": 1.3711, + "step": 13236 + }, + { + "epoch": 0.3886605202889189, + "grad_norm": 0.0, + "learning_rate": 1.3978738928144729e-05, + "loss": 1.582, + "step": 13237 + }, + { + "epoch": 0.3886898819660579, + "grad_norm": 0.0, + "learning_rate": 1.3977866460970906e-05, + "loss": 1.377, + "step": 13238 + }, + { + "epoch": 0.3887192436431969, + "grad_norm": 0.0, + "learning_rate": 1.3976993957824216e-05, + "loss": 1.4873, + "step": 13239 + }, + { + "epoch": 0.3887486053203359, + "grad_norm": 0.0, + "learning_rate": 1.3976121418712554e-05, + "loss": 1.397, + "step": 13240 + }, + { + "epoch": 0.3887779669974749, + "grad_norm": 0.0, + "learning_rate": 1.397524884364381e-05, + "loss": 1.3418, + "step": 13241 + }, + { + "epoch": 0.3888073286746139, + "grad_norm": 0.0, + "learning_rate": 1.3974376232625868e-05, + "loss": 1.334, + "step": 13242 + }, + { + "epoch": 0.3888366903517529, + "grad_norm": 0.0, + "learning_rate": 1.3973503585666626e-05, + "loss": 1.4258, + "step": 13243 + }, + { + "epoch": 0.3888660520288919, + "grad_norm": 0.0, + "learning_rate": 1.3972630902773974e-05, + "loss": 1.3955, + "step": 13244 + }, + { + "epoch": 0.38889541370603087, + "grad_norm": 0.0, + "learning_rate": 1.3971758183955804e-05, + "loss": 1.3013, + "step": 13245 + }, + { + "epoch": 0.3889247753831699, + "grad_norm": 0.0, + "learning_rate": 1.3970885429220007e-05, + "loss": 1.2861, + "step": 13246 + }, + { + "epoch": 0.3889541370603089, + "grad_norm": 0.0, + "learning_rate": 1.3970012638574474e-05, + "loss": 1.3574, + "step": 13247 + }, + { + "epoch": 0.38898349873744786, + "grad_norm": 0.0, + "learning_rate": 1.39691398120271e-05, + "loss": 1.4033, + "step": 13248 + }, + { + "epoch": 0.3890128604145869, + "grad_norm": 0.0, + "learning_rate": 1.3968266949585783e-05, + "loss": 1.3945, + "step": 13249 + }, + { + "epoch": 0.3890422220917259, + "grad_norm": 0.0, + "learning_rate": 1.3967394051258407e-05, + "loss": 1.3286, + "step": 13250 + }, + { + "epoch": 0.38907158376886486, + "grad_norm": 0.0, + "learning_rate": 1.3966521117052869e-05, + "loss": 1.3779, + "step": 13251 + }, + { + "epoch": 0.3891009454460039, + "grad_norm": 0.0, + "learning_rate": 1.3965648146977069e-05, + "loss": 1.3389, + "step": 13252 + }, + { + "epoch": 0.3891303071231429, + "grad_norm": 0.0, + "learning_rate": 1.3964775141038897e-05, + "loss": 1.3071, + "step": 13253 + }, + { + "epoch": 0.38915966880028185, + "grad_norm": 0.0, + "learning_rate": 1.3963902099246246e-05, + "loss": 1.3506, + "step": 13254 + }, + { + "epoch": 0.38918903047742087, + "grad_norm": 0.0, + "learning_rate": 1.3963029021607015e-05, + "loss": 1.3916, + "step": 13255 + }, + { + "epoch": 0.3892183921545599, + "grad_norm": 0.0, + "learning_rate": 1.3962155908129095e-05, + "loss": 1.2305, + "step": 13256 + }, + { + "epoch": 0.38924775383169885, + "grad_norm": 0.0, + "learning_rate": 1.3961282758820387e-05, + "loss": 1.3545, + "step": 13257 + }, + { + "epoch": 0.38927711550883787, + "grad_norm": 0.0, + "learning_rate": 1.3960409573688782e-05, + "loss": 1.3604, + "step": 13258 + }, + { + "epoch": 0.3893064771859769, + "grad_norm": 0.0, + "learning_rate": 1.3959536352742183e-05, + "loss": 1.1782, + "step": 13259 + }, + { + "epoch": 0.38933583886311585, + "grad_norm": 0.0, + "learning_rate": 1.3958663095988478e-05, + "loss": 1.3701, + "step": 13260 + }, + { + "epoch": 0.38936520054025486, + "grad_norm": 0.0, + "learning_rate": 1.395778980343557e-05, + "loss": 1.457, + "step": 13261 + }, + { + "epoch": 0.3893945622173939, + "grad_norm": 0.0, + "learning_rate": 1.3956916475091358e-05, + "loss": 1.3877, + "step": 13262 + }, + { + "epoch": 0.38942392389453284, + "grad_norm": 0.0, + "learning_rate": 1.3956043110963734e-05, + "loss": 1.4229, + "step": 13263 + }, + { + "epoch": 0.38945328557167186, + "grad_norm": 0.0, + "learning_rate": 1.3955169711060602e-05, + "loss": 1.3994, + "step": 13264 + }, + { + "epoch": 0.3894826472488109, + "grad_norm": 0.0, + "learning_rate": 1.3954296275389854e-05, + "loss": 1.4111, + "step": 13265 + }, + { + "epoch": 0.38951200892594984, + "grad_norm": 0.0, + "learning_rate": 1.3953422803959393e-05, + "loss": 1.374, + "step": 13266 + }, + { + "epoch": 0.38954137060308885, + "grad_norm": 0.0, + "learning_rate": 1.3952549296777119e-05, + "loss": 1.3516, + "step": 13267 + }, + { + "epoch": 0.38957073228022787, + "grad_norm": 0.0, + "learning_rate": 1.3951675753850926e-05, + "loss": 1.3008, + "step": 13268 + }, + { + "epoch": 0.38960009395736683, + "grad_norm": 0.0, + "learning_rate": 1.3950802175188718e-05, + "loss": 1.4385, + "step": 13269 + }, + { + "epoch": 0.38962945563450585, + "grad_norm": 0.0, + "learning_rate": 1.3949928560798398e-05, + "loss": 1.4561, + "step": 13270 + }, + { + "epoch": 0.38965881731164487, + "grad_norm": 0.0, + "learning_rate": 1.3949054910687859e-05, + "loss": 1.3301, + "step": 13271 + }, + { + "epoch": 0.38968817898878383, + "grad_norm": 0.0, + "learning_rate": 1.3948181224865006e-05, + "loss": 1.4346, + "step": 13272 + }, + { + "epoch": 0.38971754066592285, + "grad_norm": 0.0, + "learning_rate": 1.3947307503337738e-05, + "loss": 1.4092, + "step": 13273 + }, + { + "epoch": 0.38974690234306186, + "grad_norm": 0.0, + "learning_rate": 1.3946433746113962e-05, + "loss": 1.4336, + "step": 13274 + }, + { + "epoch": 0.3897762640202008, + "grad_norm": 0.0, + "learning_rate": 1.3945559953201568e-05, + "loss": 1.292, + "step": 13275 + }, + { + "epoch": 0.38980562569733984, + "grad_norm": 0.0, + "learning_rate": 1.3944686124608466e-05, + "loss": 1.4531, + "step": 13276 + }, + { + "epoch": 0.3898349873744788, + "grad_norm": 0.0, + "learning_rate": 1.3943812260342559e-05, + "loss": 1.4121, + "step": 13277 + }, + { + "epoch": 0.3898643490516178, + "grad_norm": 0.0, + "learning_rate": 1.3942938360411749e-05, + "loss": 1.3643, + "step": 13278 + }, + { + "epoch": 0.38989371072875684, + "grad_norm": 0.0, + "learning_rate": 1.3942064424823935e-05, + "loss": 1.2725, + "step": 13279 + }, + { + "epoch": 0.3899230724058958, + "grad_norm": 0.0, + "learning_rate": 1.3941190453587026e-05, + "loss": 1.2476, + "step": 13280 + }, + { + "epoch": 0.3899524340830348, + "grad_norm": 0.0, + "learning_rate": 1.394031644670892e-05, + "loss": 1.3545, + "step": 13281 + }, + { + "epoch": 0.38998179576017383, + "grad_norm": 0.0, + "learning_rate": 1.3939442404197523e-05, + "loss": 1.3535, + "step": 13282 + }, + { + "epoch": 0.3900111574373128, + "grad_norm": 0.0, + "learning_rate": 1.3938568326060742e-05, + "loss": 1.4531, + "step": 13283 + }, + { + "epoch": 0.3900405191144518, + "grad_norm": 0.0, + "learning_rate": 1.3937694212306476e-05, + "loss": 1.3594, + "step": 13284 + }, + { + "epoch": 0.39006988079159083, + "grad_norm": 0.0, + "learning_rate": 1.3936820062942634e-05, + "loss": 1.3428, + "step": 13285 + }, + { + "epoch": 0.3900992424687298, + "grad_norm": 0.0, + "learning_rate": 1.3935945877977122e-05, + "loss": 1.3452, + "step": 13286 + }, + { + "epoch": 0.3901286041458688, + "grad_norm": 0.0, + "learning_rate": 1.393507165741784e-05, + "loss": 1.375, + "step": 13287 + }, + { + "epoch": 0.3901579658230078, + "grad_norm": 0.0, + "learning_rate": 1.3934197401272697e-05, + "loss": 1.3779, + "step": 13288 + }, + { + "epoch": 0.3901873275001468, + "grad_norm": 0.0, + "learning_rate": 1.39333231095496e-05, + "loss": 1.3037, + "step": 13289 + }, + { + "epoch": 0.3902166891772858, + "grad_norm": 0.0, + "learning_rate": 1.3932448782256459e-05, + "loss": 1.3164, + "step": 13290 + }, + { + "epoch": 0.3902460508544248, + "grad_norm": 0.0, + "learning_rate": 1.393157441940117e-05, + "loss": 1.334, + "step": 13291 + }, + { + "epoch": 0.3902754125315638, + "grad_norm": 0.0, + "learning_rate": 1.393070002099165e-05, + "loss": 1.3135, + "step": 13292 + }, + { + "epoch": 0.3903047742087028, + "grad_norm": 0.0, + "learning_rate": 1.3929825587035803e-05, + "loss": 1.4307, + "step": 13293 + }, + { + "epoch": 0.3903341358858418, + "grad_norm": 0.0, + "learning_rate": 1.3928951117541538e-05, + "loss": 1.25, + "step": 13294 + }, + { + "epoch": 0.3903634975629808, + "grad_norm": 0.0, + "learning_rate": 1.392807661251676e-05, + "loss": 1.3545, + "step": 13295 + }, + { + "epoch": 0.3903928592401198, + "grad_norm": 0.0, + "learning_rate": 1.3927202071969379e-05, + "loss": 1.2891, + "step": 13296 + }, + { + "epoch": 0.3904222209172588, + "grad_norm": 0.0, + "learning_rate": 1.3926327495907304e-05, + "loss": 1.3936, + "step": 13297 + }, + { + "epoch": 0.3904515825943978, + "grad_norm": 0.0, + "learning_rate": 1.3925452884338446e-05, + "loss": 1.3828, + "step": 13298 + }, + { + "epoch": 0.3904809442715368, + "grad_norm": 0.0, + "learning_rate": 1.392457823727071e-05, + "loss": 1.4502, + "step": 13299 + }, + { + "epoch": 0.3905103059486758, + "grad_norm": 0.0, + "learning_rate": 1.3923703554712008e-05, + "loss": 1.291, + "step": 13300 + }, + { + "epoch": 0.39053966762581477, + "grad_norm": 0.0, + "learning_rate": 1.392282883667025e-05, + "loss": 1.2939, + "step": 13301 + }, + { + "epoch": 0.3905690293029538, + "grad_norm": 0.0, + "learning_rate": 1.3921954083153349e-05, + "loss": 1.3047, + "step": 13302 + }, + { + "epoch": 0.3905983909800928, + "grad_norm": 0.0, + "learning_rate": 1.392107929416921e-05, + "loss": 1.4639, + "step": 13303 + }, + { + "epoch": 0.39062775265723176, + "grad_norm": 0.0, + "learning_rate": 1.3920204469725746e-05, + "loss": 1.4814, + "step": 13304 + }, + { + "epoch": 0.3906571143343708, + "grad_norm": 0.0, + "learning_rate": 1.3919329609830872e-05, + "loss": 1.2949, + "step": 13305 + }, + { + "epoch": 0.3906864760115098, + "grad_norm": 0.0, + "learning_rate": 1.3918454714492493e-05, + "loss": 1.4775, + "step": 13306 + }, + { + "epoch": 0.39071583768864876, + "grad_norm": 0.0, + "learning_rate": 1.3917579783718527e-05, + "loss": 1.4248, + "step": 13307 + }, + { + "epoch": 0.3907451993657878, + "grad_norm": 0.0, + "learning_rate": 1.3916704817516882e-05, + "loss": 1.3945, + "step": 13308 + }, + { + "epoch": 0.3907745610429268, + "grad_norm": 0.0, + "learning_rate": 1.3915829815895471e-05, + "loss": 1.3828, + "step": 13309 + }, + { + "epoch": 0.39080392272006576, + "grad_norm": 0.0, + "learning_rate": 1.3914954778862213e-05, + "loss": 1.4648, + "step": 13310 + }, + { + "epoch": 0.39083328439720477, + "grad_norm": 0.0, + "learning_rate": 1.3914079706425014e-05, + "loss": 1.3584, + "step": 13311 + }, + { + "epoch": 0.3908626460743438, + "grad_norm": 0.0, + "learning_rate": 1.3913204598591787e-05, + "loss": 1.4375, + "step": 13312 + }, + { + "epoch": 0.39089200775148275, + "grad_norm": 0.0, + "learning_rate": 1.3912329455370452e-05, + "loss": 1.207, + "step": 13313 + }, + { + "epoch": 0.39092136942862177, + "grad_norm": 0.0, + "learning_rate": 1.3911454276768922e-05, + "loss": 1.2207, + "step": 13314 + }, + { + "epoch": 0.3909507311057608, + "grad_norm": 0.0, + "learning_rate": 1.3910579062795107e-05, + "loss": 1.4365, + "step": 13315 + }, + { + "epoch": 0.39098009278289975, + "grad_norm": 0.0, + "learning_rate": 1.390970381345692e-05, + "loss": 1.5127, + "step": 13316 + }, + { + "epoch": 0.39100945446003876, + "grad_norm": 0.0, + "learning_rate": 1.3908828528762284e-05, + "loss": 1.3643, + "step": 13317 + }, + { + "epoch": 0.3910388161371778, + "grad_norm": 0.0, + "learning_rate": 1.3907953208719107e-05, + "loss": 1.4082, + "step": 13318 + }, + { + "epoch": 0.39106817781431674, + "grad_norm": 0.0, + "learning_rate": 1.3907077853335313e-05, + "loss": 1.4795, + "step": 13319 + }, + { + "epoch": 0.39109753949145576, + "grad_norm": 0.0, + "learning_rate": 1.3906202462618812e-05, + "loss": 1.3037, + "step": 13320 + }, + { + "epoch": 0.3911269011685948, + "grad_norm": 0.0, + "learning_rate": 1.3905327036577521e-05, + "loss": 1.2549, + "step": 13321 + }, + { + "epoch": 0.39115626284573374, + "grad_norm": 0.0, + "learning_rate": 1.3904451575219358e-05, + "loss": 1.3037, + "step": 13322 + }, + { + "epoch": 0.39118562452287275, + "grad_norm": 0.0, + "learning_rate": 1.390357607855224e-05, + "loss": 1.4082, + "step": 13323 + }, + { + "epoch": 0.39121498620001177, + "grad_norm": 0.0, + "learning_rate": 1.3902700546584083e-05, + "loss": 1.2183, + "step": 13324 + }, + { + "epoch": 0.39124434787715073, + "grad_norm": 0.0, + "learning_rate": 1.3901824979322804e-05, + "loss": 1.3223, + "step": 13325 + }, + { + "epoch": 0.39127370955428975, + "grad_norm": 0.0, + "learning_rate": 1.3900949376776323e-05, + "loss": 1.3838, + "step": 13326 + }, + { + "epoch": 0.3913030712314287, + "grad_norm": 0.0, + "learning_rate": 1.390007373895256e-05, + "loss": 1.4326, + "step": 13327 + }, + { + "epoch": 0.39133243290856773, + "grad_norm": 0.0, + "learning_rate": 1.389919806585943e-05, + "loss": 1.4707, + "step": 13328 + }, + { + "epoch": 0.39136179458570675, + "grad_norm": 0.0, + "learning_rate": 1.3898322357504852e-05, + "loss": 1.2637, + "step": 13329 + }, + { + "epoch": 0.3913911562628457, + "grad_norm": 0.0, + "learning_rate": 1.3897446613896745e-05, + "loss": 1.46, + "step": 13330 + }, + { + "epoch": 0.3914205179399847, + "grad_norm": 0.0, + "learning_rate": 1.3896570835043035e-05, + "loss": 1.4717, + "step": 13331 + }, + { + "epoch": 0.39144987961712374, + "grad_norm": 0.0, + "learning_rate": 1.3895695020951636e-05, + "loss": 1.1567, + "step": 13332 + }, + { + "epoch": 0.3914792412942627, + "grad_norm": 0.0, + "learning_rate": 1.3894819171630468e-05, + "loss": 1.3975, + "step": 13333 + }, + { + "epoch": 0.3915086029714017, + "grad_norm": 0.0, + "learning_rate": 1.3893943287087452e-05, + "loss": 1.4219, + "step": 13334 + }, + { + "epoch": 0.39153796464854074, + "grad_norm": 0.0, + "learning_rate": 1.389306736733051e-05, + "loss": 1.459, + "step": 13335 + }, + { + "epoch": 0.3915673263256797, + "grad_norm": 0.0, + "learning_rate": 1.3892191412367564e-05, + "loss": 1.2046, + "step": 13336 + }, + { + "epoch": 0.3915966880028187, + "grad_norm": 0.0, + "learning_rate": 1.3891315422206533e-05, + "loss": 1.4609, + "step": 13337 + }, + { + "epoch": 0.39162604967995773, + "grad_norm": 0.0, + "learning_rate": 1.389043939685534e-05, + "loss": 1.3096, + "step": 13338 + }, + { + "epoch": 0.3916554113570967, + "grad_norm": 0.0, + "learning_rate": 1.3889563336321908e-05, + "loss": 1.4375, + "step": 13339 + }, + { + "epoch": 0.3916847730342357, + "grad_norm": 0.0, + "learning_rate": 1.3888687240614158e-05, + "loss": 1.2617, + "step": 13340 + }, + { + "epoch": 0.39171413471137473, + "grad_norm": 0.0, + "learning_rate": 1.3887811109740013e-05, + "loss": 1.3115, + "step": 13341 + }, + { + "epoch": 0.3917434963885137, + "grad_norm": 0.0, + "learning_rate": 1.3886934943707398e-05, + "loss": 1.4014, + "step": 13342 + }, + { + "epoch": 0.3917728580656527, + "grad_norm": 0.0, + "learning_rate": 1.3886058742524232e-05, + "loss": 1.3047, + "step": 13343 + }, + { + "epoch": 0.3918022197427917, + "grad_norm": 0.0, + "learning_rate": 1.3885182506198447e-05, + "loss": 1.2915, + "step": 13344 + }, + { + "epoch": 0.3918315814199307, + "grad_norm": 0.0, + "learning_rate": 1.3884306234737956e-05, + "loss": 1.3994, + "step": 13345 + }, + { + "epoch": 0.3918609430970697, + "grad_norm": 0.0, + "learning_rate": 1.3883429928150691e-05, + "loss": 1.3833, + "step": 13346 + }, + { + "epoch": 0.3918903047742087, + "grad_norm": 0.0, + "learning_rate": 1.3882553586444575e-05, + "loss": 1.3574, + "step": 13347 + }, + { + "epoch": 0.3919196664513477, + "grad_norm": 0.0, + "learning_rate": 1.3881677209627534e-05, + "loss": 1.2246, + "step": 13348 + }, + { + "epoch": 0.3919490281284867, + "grad_norm": 0.0, + "learning_rate": 1.3880800797707487e-05, + "loss": 1.3975, + "step": 13349 + }, + { + "epoch": 0.3919783898056257, + "grad_norm": 0.0, + "learning_rate": 1.3879924350692368e-05, + "loss": 1.335, + "step": 13350 + }, + { + "epoch": 0.3920077514827647, + "grad_norm": 0.0, + "learning_rate": 1.3879047868590099e-05, + "loss": 1.3984, + "step": 13351 + }, + { + "epoch": 0.3920371131599037, + "grad_norm": 0.0, + "learning_rate": 1.3878171351408609e-05, + "loss": 1.4043, + "step": 13352 + }, + { + "epoch": 0.3920664748370427, + "grad_norm": 0.0, + "learning_rate": 1.3877294799155818e-05, + "loss": 1.3164, + "step": 13353 + }, + { + "epoch": 0.3920958365141817, + "grad_norm": 0.0, + "learning_rate": 1.387641821183966e-05, + "loss": 1.4609, + "step": 13354 + }, + { + "epoch": 0.3921251981913207, + "grad_norm": 0.0, + "learning_rate": 1.3875541589468058e-05, + "loss": 1.2783, + "step": 13355 + }, + { + "epoch": 0.3921545598684597, + "grad_norm": 0.0, + "learning_rate": 1.3874664932048942e-05, + "loss": 1.2266, + "step": 13356 + }, + { + "epoch": 0.39218392154559867, + "grad_norm": 0.0, + "learning_rate": 1.3873788239590238e-05, + "loss": 1.4521, + "step": 13357 + }, + { + "epoch": 0.3922132832227377, + "grad_norm": 0.0, + "learning_rate": 1.3872911512099873e-05, + "loss": 1.3369, + "step": 13358 + }, + { + "epoch": 0.3922426448998767, + "grad_norm": 0.0, + "learning_rate": 1.387203474958578e-05, + "loss": 1.377, + "step": 13359 + }, + { + "epoch": 0.39227200657701566, + "grad_norm": 0.0, + "learning_rate": 1.3871157952055884e-05, + "loss": 1.3301, + "step": 13360 + }, + { + "epoch": 0.3923013682541547, + "grad_norm": 0.0, + "learning_rate": 1.3870281119518114e-05, + "loss": 1.4219, + "step": 13361 + }, + { + "epoch": 0.3923307299312937, + "grad_norm": 0.0, + "learning_rate": 1.3869404251980405e-05, + "loss": 1.4131, + "step": 13362 + }, + { + "epoch": 0.39236009160843266, + "grad_norm": 0.0, + "learning_rate": 1.3868527349450678e-05, + "loss": 1.4033, + "step": 13363 + }, + { + "epoch": 0.3923894532855717, + "grad_norm": 0.0, + "learning_rate": 1.386765041193687e-05, + "loss": 1.2529, + "step": 13364 + }, + { + "epoch": 0.3924188149627107, + "grad_norm": 0.0, + "learning_rate": 1.3866773439446907e-05, + "loss": 1.3359, + "step": 13365 + }, + { + "epoch": 0.39244817663984966, + "grad_norm": 0.0, + "learning_rate": 1.3865896431988722e-05, + "loss": 1.3096, + "step": 13366 + }, + { + "epoch": 0.39247753831698867, + "grad_norm": 0.0, + "learning_rate": 1.3865019389570245e-05, + "loss": 1.3838, + "step": 13367 + }, + { + "epoch": 0.3925068999941277, + "grad_norm": 0.0, + "learning_rate": 1.3864142312199409e-05, + "loss": 1.3232, + "step": 13368 + }, + { + "epoch": 0.39253626167126665, + "grad_norm": 0.0, + "learning_rate": 1.3863265199884144e-05, + "loss": 1.4004, + "step": 13369 + }, + { + "epoch": 0.39256562334840567, + "grad_norm": 0.0, + "learning_rate": 1.3862388052632381e-05, + "loss": 1.5127, + "step": 13370 + }, + { + "epoch": 0.3925949850255447, + "grad_norm": 0.0, + "learning_rate": 1.3861510870452055e-05, + "loss": 1.3203, + "step": 13371 + }, + { + "epoch": 0.39262434670268365, + "grad_norm": 0.0, + "learning_rate": 1.3860633653351101e-05, + "loss": 1.3623, + "step": 13372 + }, + { + "epoch": 0.39265370837982266, + "grad_norm": 0.0, + "learning_rate": 1.3859756401337445e-05, + "loss": 1.3398, + "step": 13373 + }, + { + "epoch": 0.3926830700569617, + "grad_norm": 0.0, + "learning_rate": 1.3858879114419021e-05, + "loss": 1.3223, + "step": 13374 + }, + { + "epoch": 0.39271243173410064, + "grad_norm": 0.0, + "learning_rate": 1.3858001792603765e-05, + "loss": 1.2256, + "step": 13375 + }, + { + "epoch": 0.39274179341123966, + "grad_norm": 0.0, + "learning_rate": 1.3857124435899616e-05, + "loss": 1.3574, + "step": 13376 + }, + { + "epoch": 0.3927711550883786, + "grad_norm": 0.0, + "learning_rate": 1.3856247044314497e-05, + "loss": 1.5068, + "step": 13377 + }, + { + "epoch": 0.39280051676551764, + "grad_norm": 0.0, + "learning_rate": 1.3855369617856352e-05, + "loss": 1.4199, + "step": 13378 + }, + { + "epoch": 0.39282987844265665, + "grad_norm": 0.0, + "learning_rate": 1.3854492156533107e-05, + "loss": 1.2812, + "step": 13379 + }, + { + "epoch": 0.3928592401197956, + "grad_norm": 0.0, + "learning_rate": 1.385361466035271e-05, + "loss": 1.3691, + "step": 13380 + }, + { + "epoch": 0.39288860179693463, + "grad_norm": 0.0, + "learning_rate": 1.3852737129323086e-05, + "loss": 1.2466, + "step": 13381 + }, + { + "epoch": 0.39291796347407365, + "grad_norm": 0.0, + "learning_rate": 1.3851859563452173e-05, + "loss": 1.3818, + "step": 13382 + }, + { + "epoch": 0.3929473251512126, + "grad_norm": 0.0, + "learning_rate": 1.3850981962747905e-05, + "loss": 1.4482, + "step": 13383 + }, + { + "epoch": 0.39297668682835163, + "grad_norm": 0.0, + "learning_rate": 1.3850104327218222e-05, + "loss": 1.1714, + "step": 13384 + }, + { + "epoch": 0.39300604850549065, + "grad_norm": 0.0, + "learning_rate": 1.384922665687106e-05, + "loss": 1.3125, + "step": 13385 + }, + { + "epoch": 0.3930354101826296, + "grad_norm": 0.0, + "learning_rate": 1.3848348951714357e-05, + "loss": 1.2563, + "step": 13386 + }, + { + "epoch": 0.3930647718597686, + "grad_norm": 0.0, + "learning_rate": 1.3847471211756048e-05, + "loss": 1.2734, + "step": 13387 + }, + { + "epoch": 0.39309413353690764, + "grad_norm": 0.0, + "learning_rate": 1.384659343700407e-05, + "loss": 1.3867, + "step": 13388 + }, + { + "epoch": 0.3931234952140466, + "grad_norm": 0.0, + "learning_rate": 1.3845715627466362e-05, + "loss": 1.3105, + "step": 13389 + }, + { + "epoch": 0.3931528568911856, + "grad_norm": 0.0, + "learning_rate": 1.3844837783150864e-05, + "loss": 1.3906, + "step": 13390 + }, + { + "epoch": 0.39318221856832464, + "grad_norm": 0.0, + "learning_rate": 1.3843959904065516e-05, + "loss": 1.3926, + "step": 13391 + }, + { + "epoch": 0.3932115802454636, + "grad_norm": 0.0, + "learning_rate": 1.384308199021825e-05, + "loss": 1.3594, + "step": 13392 + }, + { + "epoch": 0.3932409419226026, + "grad_norm": 0.0, + "learning_rate": 1.3842204041617013e-05, + "loss": 1.4238, + "step": 13393 + }, + { + "epoch": 0.39327030359974163, + "grad_norm": 0.0, + "learning_rate": 1.3841326058269738e-05, + "loss": 1.3789, + "step": 13394 + }, + { + "epoch": 0.3932996652768806, + "grad_norm": 0.0, + "learning_rate": 1.3840448040184366e-05, + "loss": 1.4248, + "step": 13395 + }, + { + "epoch": 0.3933290269540196, + "grad_norm": 0.0, + "learning_rate": 1.3839569987368841e-05, + "loss": 1.5693, + "step": 13396 + }, + { + "epoch": 0.39335838863115863, + "grad_norm": 0.0, + "learning_rate": 1.3838691899831105e-05, + "loss": 1.4131, + "step": 13397 + }, + { + "epoch": 0.3933877503082976, + "grad_norm": 0.0, + "learning_rate": 1.383781377757909e-05, + "loss": 1.1851, + "step": 13398 + }, + { + "epoch": 0.3934171119854366, + "grad_norm": 0.0, + "learning_rate": 1.3836935620620744e-05, + "loss": 1.3828, + "step": 13399 + }, + { + "epoch": 0.3934464736625756, + "grad_norm": 0.0, + "learning_rate": 1.3836057428964007e-05, + "loss": 1.2256, + "step": 13400 + }, + { + "epoch": 0.3934758353397146, + "grad_norm": 0.0, + "learning_rate": 1.383517920261682e-05, + "loss": 1.3745, + "step": 13401 + }, + { + "epoch": 0.3935051970168536, + "grad_norm": 0.0, + "learning_rate": 1.3834300941587124e-05, + "loss": 1.3574, + "step": 13402 + }, + { + "epoch": 0.3935345586939926, + "grad_norm": 0.0, + "learning_rate": 1.3833422645882864e-05, + "loss": 1.3125, + "step": 13403 + }, + { + "epoch": 0.3935639203711316, + "grad_norm": 0.0, + "learning_rate": 1.383254431551198e-05, + "loss": 1.3896, + "step": 13404 + }, + { + "epoch": 0.3935932820482706, + "grad_norm": 0.0, + "learning_rate": 1.3831665950482419e-05, + "loss": 1.2178, + "step": 13405 + }, + { + "epoch": 0.3936226437254096, + "grad_norm": 0.0, + "learning_rate": 1.3830787550802116e-05, + "loss": 1.3281, + "step": 13406 + }, + { + "epoch": 0.3936520054025486, + "grad_norm": 0.0, + "learning_rate": 1.3829909116479023e-05, + "loss": 1.3457, + "step": 13407 + }, + { + "epoch": 0.3936813670796876, + "grad_norm": 0.0, + "learning_rate": 1.3829030647521082e-05, + "loss": 1.4775, + "step": 13408 + }, + { + "epoch": 0.3937107287568266, + "grad_norm": 0.0, + "learning_rate": 1.3828152143936237e-05, + "loss": 1.4268, + "step": 13409 + }, + { + "epoch": 0.3937400904339656, + "grad_norm": 0.0, + "learning_rate": 1.3827273605732429e-05, + "loss": 1.3477, + "step": 13410 + }, + { + "epoch": 0.3937694521111046, + "grad_norm": 0.0, + "learning_rate": 1.382639503291761e-05, + "loss": 1.332, + "step": 13411 + }, + { + "epoch": 0.3937988137882436, + "grad_norm": 0.0, + "learning_rate": 1.3825516425499717e-05, + "loss": 1.3936, + "step": 13412 + }, + { + "epoch": 0.39382817546538257, + "grad_norm": 0.0, + "learning_rate": 1.38246377834867e-05, + "loss": 1.4121, + "step": 13413 + }, + { + "epoch": 0.3938575371425216, + "grad_norm": 0.0, + "learning_rate": 1.3823759106886502e-05, + "loss": 1.4434, + "step": 13414 + }, + { + "epoch": 0.3938868988196606, + "grad_norm": 0.0, + "learning_rate": 1.3822880395707074e-05, + "loss": 1.4531, + "step": 13415 + }, + { + "epoch": 0.39391626049679956, + "grad_norm": 0.0, + "learning_rate": 1.3822001649956358e-05, + "loss": 1.1484, + "step": 13416 + }, + { + "epoch": 0.3939456221739386, + "grad_norm": 0.0, + "learning_rate": 1.3821122869642305e-05, + "loss": 1.4805, + "step": 13417 + }, + { + "epoch": 0.3939749838510776, + "grad_norm": 0.0, + "learning_rate": 1.3820244054772856e-05, + "loss": 1.417, + "step": 13418 + }, + { + "epoch": 0.39400434552821656, + "grad_norm": 0.0, + "learning_rate": 1.3819365205355961e-05, + "loss": 1.417, + "step": 13419 + }, + { + "epoch": 0.3940337072053556, + "grad_norm": 0.0, + "learning_rate": 1.381848632139957e-05, + "loss": 1.25, + "step": 13420 + }, + { + "epoch": 0.3940630688824946, + "grad_norm": 0.0, + "learning_rate": 1.381760740291163e-05, + "loss": 1.3081, + "step": 13421 + }, + { + "epoch": 0.39409243055963356, + "grad_norm": 0.0, + "learning_rate": 1.3816728449900089e-05, + "loss": 1.2969, + "step": 13422 + }, + { + "epoch": 0.3941217922367726, + "grad_norm": 0.0, + "learning_rate": 1.3815849462372895e-05, + "loss": 1.3428, + "step": 13423 + }, + { + "epoch": 0.3941511539139116, + "grad_norm": 0.0, + "learning_rate": 1.3814970440337996e-05, + "loss": 1.4082, + "step": 13424 + }, + { + "epoch": 0.39418051559105055, + "grad_norm": 0.0, + "learning_rate": 1.3814091383803343e-05, + "loss": 1.416, + "step": 13425 + }, + { + "epoch": 0.39420987726818957, + "grad_norm": 0.0, + "learning_rate": 1.3813212292776884e-05, + "loss": 1.4336, + "step": 13426 + }, + { + "epoch": 0.39423923894532853, + "grad_norm": 0.0, + "learning_rate": 1.381233316726657e-05, + "loss": 1.4316, + "step": 13427 + }, + { + "epoch": 0.39426860062246755, + "grad_norm": 0.0, + "learning_rate": 1.3811454007280349e-05, + "loss": 1.3682, + "step": 13428 + }, + { + "epoch": 0.39429796229960656, + "grad_norm": 0.0, + "learning_rate": 1.3810574812826176e-05, + "loss": 1.25, + "step": 13429 + }, + { + "epoch": 0.3943273239767455, + "grad_norm": 0.0, + "learning_rate": 1.3809695583911997e-05, + "loss": 1.2437, + "step": 13430 + }, + { + "epoch": 0.39435668565388454, + "grad_norm": 0.0, + "learning_rate": 1.3808816320545768e-05, + "loss": 1.4805, + "step": 13431 + }, + { + "epoch": 0.39438604733102356, + "grad_norm": 0.0, + "learning_rate": 1.3807937022735437e-05, + "loss": 1.3633, + "step": 13432 + }, + { + "epoch": 0.3944154090081625, + "grad_norm": 0.0, + "learning_rate": 1.3807057690488952e-05, + "loss": 1.3057, + "step": 13433 + }, + { + "epoch": 0.39444477068530154, + "grad_norm": 0.0, + "learning_rate": 1.3806178323814275e-05, + "loss": 1.2783, + "step": 13434 + }, + { + "epoch": 0.39447413236244055, + "grad_norm": 0.0, + "learning_rate": 1.3805298922719349e-05, + "loss": 1.3428, + "step": 13435 + }, + { + "epoch": 0.3945034940395795, + "grad_norm": 0.0, + "learning_rate": 1.3804419487212131e-05, + "loss": 1.3809, + "step": 13436 + }, + { + "epoch": 0.39453285571671853, + "grad_norm": 0.0, + "learning_rate": 1.3803540017300575e-05, + "loss": 1.3105, + "step": 13437 + }, + { + "epoch": 0.39456221739385755, + "grad_norm": 0.0, + "learning_rate": 1.380266051299263e-05, + "loss": 1.3281, + "step": 13438 + }, + { + "epoch": 0.3945915790709965, + "grad_norm": 0.0, + "learning_rate": 1.3801780974296254e-05, + "loss": 1.3779, + "step": 13439 + }, + { + "epoch": 0.39462094074813553, + "grad_norm": 0.0, + "learning_rate": 1.3800901401219395e-05, + "loss": 1.3291, + "step": 13440 + }, + { + "epoch": 0.39465030242527455, + "grad_norm": 0.0, + "learning_rate": 1.3800021793770017e-05, + "loss": 1.4414, + "step": 13441 + }, + { + "epoch": 0.3946796641024135, + "grad_norm": 0.0, + "learning_rate": 1.3799142151956067e-05, + "loss": 1.4238, + "step": 13442 + }, + { + "epoch": 0.3947090257795525, + "grad_norm": 0.0, + "learning_rate": 1.37982624757855e-05, + "loss": 1.3135, + "step": 13443 + }, + { + "epoch": 0.39473838745669154, + "grad_norm": 0.0, + "learning_rate": 1.3797382765266274e-05, + "loss": 1.25, + "step": 13444 + }, + { + "epoch": 0.3947677491338305, + "grad_norm": 0.0, + "learning_rate": 1.3796503020406342e-05, + "loss": 1.4307, + "step": 13445 + }, + { + "epoch": 0.3947971108109695, + "grad_norm": 0.0, + "learning_rate": 1.3795623241213662e-05, + "loss": 1.3154, + "step": 13446 + }, + { + "epoch": 0.39482647248810854, + "grad_norm": 0.0, + "learning_rate": 1.3794743427696186e-05, + "loss": 1.3838, + "step": 13447 + }, + { + "epoch": 0.3948558341652475, + "grad_norm": 0.0, + "learning_rate": 1.3793863579861877e-05, + "loss": 1.3682, + "step": 13448 + }, + { + "epoch": 0.3948851958423865, + "grad_norm": 0.0, + "learning_rate": 1.3792983697718684e-05, + "loss": 1.3828, + "step": 13449 + }, + { + "epoch": 0.39491455751952553, + "grad_norm": 0.0, + "learning_rate": 1.3792103781274576e-05, + "loss": 1.2998, + "step": 13450 + }, + { + "epoch": 0.3949439191966645, + "grad_norm": 0.0, + "learning_rate": 1.3791223830537495e-05, + "loss": 1.3936, + "step": 13451 + }, + { + "epoch": 0.3949732808738035, + "grad_norm": 0.0, + "learning_rate": 1.3790343845515408e-05, + "loss": 1.4258, + "step": 13452 + }, + { + "epoch": 0.39500264255094253, + "grad_norm": 0.0, + "learning_rate": 1.378946382621627e-05, + "loss": 1.25, + "step": 13453 + }, + { + "epoch": 0.3950320042280815, + "grad_norm": 0.0, + "learning_rate": 1.3788583772648042e-05, + "loss": 1.3535, + "step": 13454 + }, + { + "epoch": 0.3950613659052205, + "grad_norm": 0.0, + "learning_rate": 1.378770368481868e-05, + "loss": 1.2451, + "step": 13455 + }, + { + "epoch": 0.3950907275823595, + "grad_norm": 0.0, + "learning_rate": 1.3786823562736143e-05, + "loss": 1.3213, + "step": 13456 + }, + { + "epoch": 0.3951200892594985, + "grad_norm": 0.0, + "learning_rate": 1.3785943406408389e-05, + "loss": 1.3545, + "step": 13457 + }, + { + "epoch": 0.3951494509366375, + "grad_norm": 0.0, + "learning_rate": 1.3785063215843382e-05, + "loss": 1.3037, + "step": 13458 + }, + { + "epoch": 0.3951788126137765, + "grad_norm": 0.0, + "learning_rate": 1.3784182991049077e-05, + "loss": 1.3975, + "step": 13459 + }, + { + "epoch": 0.3952081742909155, + "grad_norm": 0.0, + "learning_rate": 1.3783302732033438e-05, + "loss": 1.3818, + "step": 13460 + }, + { + "epoch": 0.3952375359680545, + "grad_norm": 0.0, + "learning_rate": 1.3782422438804424e-05, + "loss": 1.4297, + "step": 13461 + }, + { + "epoch": 0.3952668976451935, + "grad_norm": 0.0, + "learning_rate": 1.3781542111369994e-05, + "loss": 1.4785, + "step": 13462 + }, + { + "epoch": 0.3952962593223325, + "grad_norm": 0.0, + "learning_rate": 1.3780661749738111e-05, + "loss": 1.3613, + "step": 13463 + }, + { + "epoch": 0.3953256209994715, + "grad_norm": 0.0, + "learning_rate": 1.3779781353916733e-05, + "loss": 1.3721, + "step": 13464 + }, + { + "epoch": 0.3953549826766105, + "grad_norm": 0.0, + "learning_rate": 1.3778900923913824e-05, + "loss": 1.416, + "step": 13465 + }, + { + "epoch": 0.3953843443537495, + "grad_norm": 0.0, + "learning_rate": 1.3778020459737351e-05, + "loss": 1.2949, + "step": 13466 + }, + { + "epoch": 0.3954137060308885, + "grad_norm": 0.0, + "learning_rate": 1.3777139961395267e-05, + "loss": 1.249, + "step": 13467 + }, + { + "epoch": 0.3954430677080275, + "grad_norm": 0.0, + "learning_rate": 1.377625942889554e-05, + "loss": 1.2661, + "step": 13468 + }, + { + "epoch": 0.39547242938516647, + "grad_norm": 0.0, + "learning_rate": 1.3775378862246132e-05, + "loss": 1.3926, + "step": 13469 + }, + { + "epoch": 0.3955017910623055, + "grad_norm": 0.0, + "learning_rate": 1.3774498261455008e-05, + "loss": 1.3057, + "step": 13470 + }, + { + "epoch": 0.3955311527394445, + "grad_norm": 0.0, + "learning_rate": 1.377361762653013e-05, + "loss": 1.1689, + "step": 13471 + }, + { + "epoch": 0.39556051441658346, + "grad_norm": 0.0, + "learning_rate": 1.3772736957479455e-05, + "loss": 1.4668, + "step": 13472 + }, + { + "epoch": 0.3955898760937225, + "grad_norm": 0.0, + "learning_rate": 1.3771856254310959e-05, + "loss": 1.4189, + "step": 13473 + }, + { + "epoch": 0.3956192377708615, + "grad_norm": 0.0, + "learning_rate": 1.3770975517032598e-05, + "loss": 1.3203, + "step": 13474 + }, + { + "epoch": 0.39564859944800046, + "grad_norm": 0.0, + "learning_rate": 1.3770094745652342e-05, + "loss": 1.2227, + "step": 13475 + }, + { + "epoch": 0.3956779611251395, + "grad_norm": 0.0, + "learning_rate": 1.3769213940178153e-05, + "loss": 1.3486, + "step": 13476 + }, + { + "epoch": 0.3957073228022785, + "grad_norm": 0.0, + "learning_rate": 1.3768333100617997e-05, + "loss": 1.2334, + "step": 13477 + }, + { + "epoch": 0.39573668447941746, + "grad_norm": 0.0, + "learning_rate": 1.3767452226979841e-05, + "loss": 1.0439, + "step": 13478 + }, + { + "epoch": 0.3957660461565565, + "grad_norm": 0.0, + "learning_rate": 1.3766571319271646e-05, + "loss": 1.4473, + "step": 13479 + }, + { + "epoch": 0.39579540783369543, + "grad_norm": 0.0, + "learning_rate": 1.3765690377501385e-05, + "loss": 1.3457, + "step": 13480 + }, + { + "epoch": 0.39582476951083445, + "grad_norm": 0.0, + "learning_rate": 1.3764809401677022e-05, + "loss": 1.2656, + "step": 13481 + }, + { + "epoch": 0.39585413118797347, + "grad_norm": 0.0, + "learning_rate": 1.376392839180652e-05, + "loss": 1.2437, + "step": 13482 + }, + { + "epoch": 0.39588349286511243, + "grad_norm": 0.0, + "learning_rate": 1.3763047347897853e-05, + "loss": 1.3037, + "step": 13483 + }, + { + "epoch": 0.39591285454225145, + "grad_norm": 0.0, + "learning_rate": 1.3762166269958982e-05, + "loss": 1.2793, + "step": 13484 + }, + { + "epoch": 0.39594221621939046, + "grad_norm": 0.0, + "learning_rate": 1.3761285157997881e-05, + "loss": 1.3438, + "step": 13485 + }, + { + "epoch": 0.3959715778965294, + "grad_norm": 0.0, + "learning_rate": 1.3760404012022512e-05, + "loss": 1.46, + "step": 13486 + }, + { + "epoch": 0.39600093957366844, + "grad_norm": 0.0, + "learning_rate": 1.375952283204085e-05, + "loss": 1.4043, + "step": 13487 + }, + { + "epoch": 0.39603030125080746, + "grad_norm": 0.0, + "learning_rate": 1.375864161806086e-05, + "loss": 1.3057, + "step": 13488 + }, + { + "epoch": 0.3960596629279464, + "grad_norm": 0.0, + "learning_rate": 1.3757760370090507e-05, + "loss": 1.2891, + "step": 13489 + }, + { + "epoch": 0.39608902460508544, + "grad_norm": 0.0, + "learning_rate": 1.3756879088137769e-05, + "loss": 1.3682, + "step": 13490 + }, + { + "epoch": 0.39611838628222446, + "grad_norm": 0.0, + "learning_rate": 1.3755997772210612e-05, + "loss": 1.3125, + "step": 13491 + }, + { + "epoch": 0.3961477479593634, + "grad_norm": 0.0, + "learning_rate": 1.3755116422317e-05, + "loss": 1.3115, + "step": 13492 + }, + { + "epoch": 0.39617710963650243, + "grad_norm": 0.0, + "learning_rate": 1.3754235038464912e-05, + "loss": 1.3135, + "step": 13493 + }, + { + "epoch": 0.39620647131364145, + "grad_norm": 0.0, + "learning_rate": 1.3753353620662312e-05, + "loss": 1.3291, + "step": 13494 + }, + { + "epoch": 0.3962358329907804, + "grad_norm": 0.0, + "learning_rate": 1.375247216891718e-05, + "loss": 1.3955, + "step": 13495 + }, + { + "epoch": 0.39626519466791943, + "grad_norm": 0.0, + "learning_rate": 1.3751590683237475e-05, + "loss": 1.4277, + "step": 13496 + }, + { + "epoch": 0.39629455634505845, + "grad_norm": 0.0, + "learning_rate": 1.3750709163631177e-05, + "loss": 1.3389, + "step": 13497 + }, + { + "epoch": 0.3963239180221974, + "grad_norm": 0.0, + "learning_rate": 1.3749827610106255e-05, + "loss": 1.4346, + "step": 13498 + }, + { + "epoch": 0.3963532796993364, + "grad_norm": 0.0, + "learning_rate": 1.3748946022670682e-05, + "loss": 1.3662, + "step": 13499 + }, + { + "epoch": 0.39638264137647544, + "grad_norm": 0.0, + "learning_rate": 1.3748064401332433e-05, + "loss": 1.3857, + "step": 13500 + }, + { + "epoch": 0.3964120030536144, + "grad_norm": 0.0, + "learning_rate": 1.3747182746099475e-05, + "loss": 1.3223, + "step": 13501 + }, + { + "epoch": 0.3964413647307534, + "grad_norm": 0.0, + "learning_rate": 1.3746301056979781e-05, + "loss": 1.168, + "step": 13502 + }, + { + "epoch": 0.39647072640789244, + "grad_norm": 0.0, + "learning_rate": 1.3745419333981331e-05, + "loss": 1.3242, + "step": 13503 + }, + { + "epoch": 0.3965000880850314, + "grad_norm": 0.0, + "learning_rate": 1.3744537577112093e-05, + "loss": 1.4043, + "step": 13504 + }, + { + "epoch": 0.3965294497621704, + "grad_norm": 0.0, + "learning_rate": 1.3743655786380044e-05, + "loss": 1.3252, + "step": 13505 + }, + { + "epoch": 0.39655881143930943, + "grad_norm": 0.0, + "learning_rate": 1.3742773961793155e-05, + "loss": 1.3672, + "step": 13506 + }, + { + "epoch": 0.3965881731164484, + "grad_norm": 0.0, + "learning_rate": 1.3741892103359405e-05, + "loss": 1.417, + "step": 13507 + }, + { + "epoch": 0.3966175347935874, + "grad_norm": 0.0, + "learning_rate": 1.3741010211086763e-05, + "loss": 1.3652, + "step": 13508 + }, + { + "epoch": 0.39664689647072643, + "grad_norm": 0.0, + "learning_rate": 1.3740128284983213e-05, + "loss": 1.4062, + "step": 13509 + }, + { + "epoch": 0.3966762581478654, + "grad_norm": 0.0, + "learning_rate": 1.3739246325056718e-05, + "loss": 1.335, + "step": 13510 + }, + { + "epoch": 0.3967056198250044, + "grad_norm": 0.0, + "learning_rate": 1.3738364331315266e-05, + "loss": 1.3345, + "step": 13511 + }, + { + "epoch": 0.3967349815021434, + "grad_norm": 0.0, + "learning_rate": 1.3737482303766827e-05, + "loss": 1.3779, + "step": 13512 + }, + { + "epoch": 0.3967643431792824, + "grad_norm": 0.0, + "learning_rate": 1.3736600242419376e-05, + "loss": 1.3789, + "step": 13513 + }, + { + "epoch": 0.3967937048564214, + "grad_norm": 0.0, + "learning_rate": 1.3735718147280895e-05, + "loss": 1.3018, + "step": 13514 + }, + { + "epoch": 0.3968230665335604, + "grad_norm": 0.0, + "learning_rate": 1.3734836018359357e-05, + "loss": 1.3032, + "step": 13515 + }, + { + "epoch": 0.3968524282106994, + "grad_norm": 0.0, + "learning_rate": 1.373395385566274e-05, + "loss": 1.3389, + "step": 13516 + }, + { + "epoch": 0.3968817898878384, + "grad_norm": 0.0, + "learning_rate": 1.3733071659199022e-05, + "loss": 1.376, + "step": 13517 + }, + { + "epoch": 0.3969111515649774, + "grad_norm": 0.0, + "learning_rate": 1.3732189428976179e-05, + "loss": 1.1367, + "step": 13518 + }, + { + "epoch": 0.3969405132421164, + "grad_norm": 0.0, + "learning_rate": 1.3731307165002195e-05, + "loss": 1.3652, + "step": 13519 + }, + { + "epoch": 0.3969698749192554, + "grad_norm": 0.0, + "learning_rate": 1.3730424867285046e-05, + "loss": 1.4023, + "step": 13520 + }, + { + "epoch": 0.3969992365963944, + "grad_norm": 0.0, + "learning_rate": 1.3729542535832705e-05, + "loss": 1.3691, + "step": 13521 + }, + { + "epoch": 0.3970285982735334, + "grad_norm": 0.0, + "learning_rate": 1.3728660170653156e-05, + "loss": 1.3857, + "step": 13522 + }, + { + "epoch": 0.3970579599506724, + "grad_norm": 0.0, + "learning_rate": 1.3727777771754381e-05, + "loss": 1.4443, + "step": 13523 + }, + { + "epoch": 0.3970873216278114, + "grad_norm": 0.0, + "learning_rate": 1.3726895339144356e-05, + "loss": 1.416, + "step": 13524 + }, + { + "epoch": 0.39711668330495037, + "grad_norm": 0.0, + "learning_rate": 1.372601287283106e-05, + "loss": 1.418, + "step": 13525 + }, + { + "epoch": 0.3971460449820894, + "grad_norm": 0.0, + "learning_rate": 1.3725130372822476e-05, + "loss": 1.4746, + "step": 13526 + }, + { + "epoch": 0.3971754066592284, + "grad_norm": 0.0, + "learning_rate": 1.3724247839126586e-05, + "loss": 1.2686, + "step": 13527 + }, + { + "epoch": 0.39720476833636736, + "grad_norm": 0.0, + "learning_rate": 1.3723365271751367e-05, + "loss": 1.2002, + "step": 13528 + }, + { + "epoch": 0.3972341300135064, + "grad_norm": 0.0, + "learning_rate": 1.3722482670704805e-05, + "loss": 1.2769, + "step": 13529 + }, + { + "epoch": 0.39726349169064534, + "grad_norm": 0.0, + "learning_rate": 1.3721600035994876e-05, + "loss": 1.2812, + "step": 13530 + }, + { + "epoch": 0.39729285336778436, + "grad_norm": 0.0, + "learning_rate": 1.3720717367629564e-05, + "loss": 1.3652, + "step": 13531 + }, + { + "epoch": 0.3973222150449234, + "grad_norm": 0.0, + "learning_rate": 1.3719834665616855e-05, + "loss": 1.4482, + "step": 13532 + }, + { + "epoch": 0.39735157672206234, + "grad_norm": 0.0, + "learning_rate": 1.3718951929964726e-05, + "loss": 1.4902, + "step": 13533 + }, + { + "epoch": 0.39738093839920136, + "grad_norm": 0.0, + "learning_rate": 1.3718069160681164e-05, + "loss": 1.4404, + "step": 13534 + }, + { + "epoch": 0.3974103000763404, + "grad_norm": 0.0, + "learning_rate": 1.3717186357774148e-05, + "loss": 1.3975, + "step": 13535 + }, + { + "epoch": 0.39743966175347933, + "grad_norm": 0.0, + "learning_rate": 1.3716303521251666e-05, + "loss": 1.3984, + "step": 13536 + }, + { + "epoch": 0.39746902343061835, + "grad_norm": 0.0, + "learning_rate": 1.3715420651121698e-05, + "loss": 1.2397, + "step": 13537 + }, + { + "epoch": 0.39749838510775737, + "grad_norm": 0.0, + "learning_rate": 1.3714537747392232e-05, + "loss": 1.3594, + "step": 13538 + }, + { + "epoch": 0.39752774678489633, + "grad_norm": 0.0, + "learning_rate": 1.3713654810071248e-05, + "loss": 1.3828, + "step": 13539 + }, + { + "epoch": 0.39755710846203535, + "grad_norm": 0.0, + "learning_rate": 1.3712771839166734e-05, + "loss": 1.207, + "step": 13540 + }, + { + "epoch": 0.39758647013917436, + "grad_norm": 0.0, + "learning_rate": 1.3711888834686673e-05, + "loss": 1.3057, + "step": 13541 + }, + { + "epoch": 0.3976158318163133, + "grad_norm": 0.0, + "learning_rate": 1.3711005796639047e-05, + "loss": 1.2852, + "step": 13542 + }, + { + "epoch": 0.39764519349345234, + "grad_norm": 0.0, + "learning_rate": 1.3710122725031849e-05, + "loss": 1.3174, + "step": 13543 + }, + { + "epoch": 0.39767455517059136, + "grad_norm": 0.0, + "learning_rate": 1.370923961987306e-05, + "loss": 1.3486, + "step": 13544 + }, + { + "epoch": 0.3977039168477303, + "grad_norm": 0.0, + "learning_rate": 1.3708356481170668e-05, + "loss": 1.4453, + "step": 13545 + }, + { + "epoch": 0.39773327852486934, + "grad_norm": 0.0, + "learning_rate": 1.3707473308932657e-05, + "loss": 1.416, + "step": 13546 + }, + { + "epoch": 0.39776264020200836, + "grad_norm": 0.0, + "learning_rate": 1.3706590103167015e-05, + "loss": 1.3193, + "step": 13547 + }, + { + "epoch": 0.3977920018791473, + "grad_norm": 0.0, + "learning_rate": 1.3705706863881732e-05, + "loss": 1.4043, + "step": 13548 + }, + { + "epoch": 0.39782136355628633, + "grad_norm": 0.0, + "learning_rate": 1.3704823591084792e-05, + "loss": 1.3247, + "step": 13549 + }, + { + "epoch": 0.39785072523342535, + "grad_norm": 0.0, + "learning_rate": 1.3703940284784183e-05, + "loss": 1.2949, + "step": 13550 + }, + { + "epoch": 0.3978800869105643, + "grad_norm": 0.0, + "learning_rate": 1.3703056944987893e-05, + "loss": 1.4512, + "step": 13551 + }, + { + "epoch": 0.39790944858770333, + "grad_norm": 0.0, + "learning_rate": 1.3702173571703912e-05, + "loss": 1.4551, + "step": 13552 + }, + { + "epoch": 0.39793881026484235, + "grad_norm": 0.0, + "learning_rate": 1.3701290164940227e-05, + "loss": 1.333, + "step": 13553 + }, + { + "epoch": 0.3979681719419813, + "grad_norm": 0.0, + "learning_rate": 1.3700406724704825e-05, + "loss": 1.46, + "step": 13554 + }, + { + "epoch": 0.3979975336191203, + "grad_norm": 0.0, + "learning_rate": 1.3699523251005698e-05, + "loss": 1.332, + "step": 13555 + }, + { + "epoch": 0.39802689529625934, + "grad_norm": 0.0, + "learning_rate": 1.3698639743850834e-05, + "loss": 1.4121, + "step": 13556 + }, + { + "epoch": 0.3980562569733983, + "grad_norm": 0.0, + "learning_rate": 1.3697756203248225e-05, + "loss": 1.3838, + "step": 13557 + }, + { + "epoch": 0.3980856186505373, + "grad_norm": 0.0, + "learning_rate": 1.369687262920586e-05, + "loss": 1.3389, + "step": 13558 + }, + { + "epoch": 0.39811498032767634, + "grad_norm": 0.0, + "learning_rate": 1.369598902173173e-05, + "loss": 1.3047, + "step": 13559 + }, + { + "epoch": 0.3981443420048153, + "grad_norm": 0.0, + "learning_rate": 1.3695105380833822e-05, + "loss": 1.3223, + "step": 13560 + }, + { + "epoch": 0.3981737036819543, + "grad_norm": 0.0, + "learning_rate": 1.3694221706520131e-05, + "loss": 1.3652, + "step": 13561 + }, + { + "epoch": 0.39820306535909333, + "grad_norm": 0.0, + "learning_rate": 1.3693337998798649e-05, + "loss": 1.2773, + "step": 13562 + }, + { + "epoch": 0.3982324270362323, + "grad_norm": 0.0, + "learning_rate": 1.369245425767736e-05, + "loss": 1.3691, + "step": 13563 + }, + { + "epoch": 0.3982617887133713, + "grad_norm": 0.0, + "learning_rate": 1.3691570483164265e-05, + "loss": 1.3311, + "step": 13564 + }, + { + "epoch": 0.39829115039051033, + "grad_norm": 0.0, + "learning_rate": 1.3690686675267355e-05, + "loss": 1.2837, + "step": 13565 + }, + { + "epoch": 0.3983205120676493, + "grad_norm": 0.0, + "learning_rate": 1.368980283399462e-05, + "loss": 1.4443, + "step": 13566 + }, + { + "epoch": 0.3983498737447883, + "grad_norm": 0.0, + "learning_rate": 1.3688918959354047e-05, + "loss": 1.4287, + "step": 13567 + }, + { + "epoch": 0.3983792354219273, + "grad_norm": 0.0, + "learning_rate": 1.3688035051353642e-05, + "loss": 1.3477, + "step": 13568 + }, + { + "epoch": 0.3984085970990663, + "grad_norm": 0.0, + "learning_rate": 1.3687151110001388e-05, + "loss": 1.3076, + "step": 13569 + }, + { + "epoch": 0.3984379587762053, + "grad_norm": 0.0, + "learning_rate": 1.3686267135305285e-05, + "loss": 1.5195, + "step": 13570 + }, + { + "epoch": 0.3984673204533443, + "grad_norm": 0.0, + "learning_rate": 1.368538312727332e-05, + "loss": 1.3613, + "step": 13571 + }, + { + "epoch": 0.3984966821304833, + "grad_norm": 0.0, + "learning_rate": 1.3684499085913495e-05, + "loss": 1.3062, + "step": 13572 + }, + { + "epoch": 0.3985260438076223, + "grad_norm": 0.0, + "learning_rate": 1.36836150112338e-05, + "loss": 1.3281, + "step": 13573 + }, + { + "epoch": 0.3985554054847613, + "grad_norm": 0.0, + "learning_rate": 1.3682730903242229e-05, + "loss": 1.2217, + "step": 13574 + }, + { + "epoch": 0.3985847671619003, + "grad_norm": 0.0, + "learning_rate": 1.368184676194678e-05, + "loss": 1.3867, + "step": 13575 + }, + { + "epoch": 0.3986141288390393, + "grad_norm": 0.0, + "learning_rate": 1.3680962587355449e-05, + "loss": 1.3262, + "step": 13576 + }, + { + "epoch": 0.3986434905161783, + "grad_norm": 0.0, + "learning_rate": 1.3680078379476232e-05, + "loss": 1.3613, + "step": 13577 + }, + { + "epoch": 0.3986728521933173, + "grad_norm": 0.0, + "learning_rate": 1.3679194138317122e-05, + "loss": 1.375, + "step": 13578 + }, + { + "epoch": 0.3987022138704563, + "grad_norm": 0.0, + "learning_rate": 1.3678309863886118e-05, + "loss": 1.4541, + "step": 13579 + }, + { + "epoch": 0.39873157554759525, + "grad_norm": 0.0, + "learning_rate": 1.3677425556191216e-05, + "loss": 1.3506, + "step": 13580 + }, + { + "epoch": 0.39876093722473427, + "grad_norm": 0.0, + "learning_rate": 1.367654121524041e-05, + "loss": 1.4209, + "step": 13581 + }, + { + "epoch": 0.3987902989018733, + "grad_norm": 0.0, + "learning_rate": 1.3675656841041705e-05, + "loss": 1.5605, + "step": 13582 + }, + { + "epoch": 0.39881966057901225, + "grad_norm": 0.0, + "learning_rate": 1.367477243360309e-05, + "loss": 1.417, + "step": 13583 + }, + { + "epoch": 0.39884902225615126, + "grad_norm": 0.0, + "learning_rate": 1.3673887992932566e-05, + "loss": 1.313, + "step": 13584 + }, + { + "epoch": 0.3988783839332903, + "grad_norm": 0.0, + "learning_rate": 1.3673003519038135e-05, + "loss": 1.2866, + "step": 13585 + }, + { + "epoch": 0.39890774561042924, + "grad_norm": 0.0, + "learning_rate": 1.3672119011927792e-05, + "loss": 1.2568, + "step": 13586 + }, + { + "epoch": 0.39893710728756826, + "grad_norm": 0.0, + "learning_rate": 1.3671234471609536e-05, + "loss": 1.2524, + "step": 13587 + }, + { + "epoch": 0.3989664689647073, + "grad_norm": 0.0, + "learning_rate": 1.3670349898091366e-05, + "loss": 1.3516, + "step": 13588 + }, + { + "epoch": 0.39899583064184624, + "grad_norm": 0.0, + "learning_rate": 1.3669465291381281e-05, + "loss": 1.3633, + "step": 13589 + }, + { + "epoch": 0.39902519231898526, + "grad_norm": 0.0, + "learning_rate": 1.3668580651487284e-05, + "loss": 1.3115, + "step": 13590 + }, + { + "epoch": 0.3990545539961243, + "grad_norm": 0.0, + "learning_rate": 1.3667695978417372e-05, + "loss": 1.3984, + "step": 13591 + }, + { + "epoch": 0.39908391567326323, + "grad_norm": 0.0, + "learning_rate": 1.3666811272179543e-05, + "loss": 1.3511, + "step": 13592 + }, + { + "epoch": 0.39911327735040225, + "grad_norm": 0.0, + "learning_rate": 1.3665926532781806e-05, + "loss": 1.3506, + "step": 13593 + }, + { + "epoch": 0.39914263902754127, + "grad_norm": 0.0, + "learning_rate": 1.3665041760232151e-05, + "loss": 1.2607, + "step": 13594 + }, + { + "epoch": 0.39917200070468023, + "grad_norm": 0.0, + "learning_rate": 1.3664156954538587e-05, + "loss": 1.3311, + "step": 13595 + }, + { + "epoch": 0.39920136238181925, + "grad_norm": 0.0, + "learning_rate": 1.3663272115709114e-05, + "loss": 1.3887, + "step": 13596 + }, + { + "epoch": 0.39923072405895826, + "grad_norm": 0.0, + "learning_rate": 1.3662387243751734e-05, + "loss": 1.2598, + "step": 13597 + }, + { + "epoch": 0.3992600857360972, + "grad_norm": 0.0, + "learning_rate": 1.3661502338674448e-05, + "loss": 1.4053, + "step": 13598 + }, + { + "epoch": 0.39928944741323624, + "grad_norm": 0.0, + "learning_rate": 1.3660617400485257e-05, + "loss": 1.376, + "step": 13599 + }, + { + "epoch": 0.39931880909037526, + "grad_norm": 0.0, + "learning_rate": 1.3659732429192163e-05, + "loss": 1.2075, + "step": 13600 + }, + { + "epoch": 0.3993481707675142, + "grad_norm": 0.0, + "learning_rate": 1.3658847424803175e-05, + "loss": 1.2324, + "step": 13601 + }, + { + "epoch": 0.39937753244465324, + "grad_norm": 0.0, + "learning_rate": 1.3657962387326293e-05, + "loss": 1.3535, + "step": 13602 + }, + { + "epoch": 0.39940689412179226, + "grad_norm": 0.0, + "learning_rate": 1.3657077316769518e-05, + "loss": 1.3877, + "step": 13603 + }, + { + "epoch": 0.3994362557989312, + "grad_norm": 0.0, + "learning_rate": 1.3656192213140855e-05, + "loss": 1.4043, + "step": 13604 + }, + { + "epoch": 0.39946561747607023, + "grad_norm": 0.0, + "learning_rate": 1.3655307076448314e-05, + "loss": 1.3789, + "step": 13605 + }, + { + "epoch": 0.39949497915320925, + "grad_norm": 0.0, + "learning_rate": 1.3654421906699893e-05, + "loss": 1.2529, + "step": 13606 + }, + { + "epoch": 0.3995243408303482, + "grad_norm": 0.0, + "learning_rate": 1.3653536703903597e-05, + "loss": 1.3281, + "step": 13607 + }, + { + "epoch": 0.39955370250748723, + "grad_norm": 0.0, + "learning_rate": 1.3652651468067437e-05, + "loss": 1.4736, + "step": 13608 + }, + { + "epoch": 0.39958306418462625, + "grad_norm": 0.0, + "learning_rate": 1.365176619919941e-05, + "loss": 1.3203, + "step": 13609 + }, + { + "epoch": 0.3996124258617652, + "grad_norm": 0.0, + "learning_rate": 1.3650880897307528e-05, + "loss": 1.373, + "step": 13610 + }, + { + "epoch": 0.3996417875389042, + "grad_norm": 0.0, + "learning_rate": 1.3649995562399797e-05, + "loss": 1.4521, + "step": 13611 + }, + { + "epoch": 0.39967114921604324, + "grad_norm": 0.0, + "learning_rate": 1.3649110194484216e-05, + "loss": 1.2412, + "step": 13612 + }, + { + "epoch": 0.3997005108931822, + "grad_norm": 0.0, + "learning_rate": 1.36482247935688e-05, + "loss": 1.2695, + "step": 13613 + }, + { + "epoch": 0.3997298725703212, + "grad_norm": 0.0, + "learning_rate": 1.3647339359661556e-05, + "loss": 1.2847, + "step": 13614 + }, + { + "epoch": 0.39975923424746024, + "grad_norm": 0.0, + "learning_rate": 1.3646453892770484e-05, + "loss": 1.3711, + "step": 13615 + }, + { + "epoch": 0.3997885959245992, + "grad_norm": 0.0, + "learning_rate": 1.3645568392903594e-05, + "loss": 1.3467, + "step": 13616 + }, + { + "epoch": 0.3998179576017382, + "grad_norm": 0.0, + "learning_rate": 1.36446828600689e-05, + "loss": 1.3643, + "step": 13617 + }, + { + "epoch": 0.39984731927887723, + "grad_norm": 0.0, + "learning_rate": 1.3643797294274406e-05, + "loss": 1.2515, + "step": 13618 + }, + { + "epoch": 0.3998766809560162, + "grad_norm": 0.0, + "learning_rate": 1.3642911695528116e-05, + "loss": 1.3926, + "step": 13619 + }, + { + "epoch": 0.3999060426331552, + "grad_norm": 0.0, + "learning_rate": 1.3642026063838043e-05, + "loss": 1.4209, + "step": 13620 + }, + { + "epoch": 0.39993540431029423, + "grad_norm": 0.0, + "learning_rate": 1.3641140399212196e-05, + "loss": 1.3633, + "step": 13621 + }, + { + "epoch": 0.3999647659874332, + "grad_norm": 0.0, + "learning_rate": 1.3640254701658586e-05, + "loss": 1.4395, + "step": 13622 + }, + { + "epoch": 0.3999941276645722, + "grad_norm": 0.0, + "learning_rate": 1.3639368971185217e-05, + "loss": 1.3447, + "step": 13623 + }, + { + "epoch": 0.4000234893417112, + "grad_norm": 0.0, + "learning_rate": 1.3638483207800104e-05, + "loss": 1.2949, + "step": 13624 + }, + { + "epoch": 0.4000528510188502, + "grad_norm": 0.0, + "learning_rate": 1.3637597411511254e-05, + "loss": 1.3232, + "step": 13625 + }, + { + "epoch": 0.4000822126959892, + "grad_norm": 0.0, + "learning_rate": 1.3636711582326682e-05, + "loss": 1.3604, + "step": 13626 + }, + { + "epoch": 0.4001115743731282, + "grad_norm": 0.0, + "learning_rate": 1.3635825720254395e-05, + "loss": 1.2627, + "step": 13627 + }, + { + "epoch": 0.4001409360502672, + "grad_norm": 0.0, + "learning_rate": 1.3634939825302404e-05, + "loss": 1.4189, + "step": 13628 + }, + { + "epoch": 0.4001702977274062, + "grad_norm": 0.0, + "learning_rate": 1.3634053897478721e-05, + "loss": 1.2793, + "step": 13629 + }, + { + "epoch": 0.40019965940454516, + "grad_norm": 0.0, + "learning_rate": 1.363316793679136e-05, + "loss": 1.4131, + "step": 13630 + }, + { + "epoch": 0.4002290210816842, + "grad_norm": 0.0, + "learning_rate": 1.3632281943248329e-05, + "loss": 1.3457, + "step": 13631 + }, + { + "epoch": 0.4002583827588232, + "grad_norm": 0.0, + "learning_rate": 1.3631395916857642e-05, + "loss": 1.2612, + "step": 13632 + }, + { + "epoch": 0.40028774443596216, + "grad_norm": 0.0, + "learning_rate": 1.3630509857627312e-05, + "loss": 1.29, + "step": 13633 + }, + { + "epoch": 0.4003171061131012, + "grad_norm": 0.0, + "learning_rate": 1.3629623765565354e-05, + "loss": 1.3535, + "step": 13634 + }, + { + "epoch": 0.4003464677902402, + "grad_norm": 0.0, + "learning_rate": 1.3628737640679775e-05, + "loss": 1.3965, + "step": 13635 + }, + { + "epoch": 0.40037582946737915, + "grad_norm": 0.0, + "learning_rate": 1.3627851482978591e-05, + "loss": 1.3242, + "step": 13636 + }, + { + "epoch": 0.40040519114451817, + "grad_norm": 0.0, + "learning_rate": 1.3626965292469822e-05, + "loss": 1.3013, + "step": 13637 + }, + { + "epoch": 0.4004345528216572, + "grad_norm": 0.0, + "learning_rate": 1.3626079069161472e-05, + "loss": 1.3838, + "step": 13638 + }, + { + "epoch": 0.40046391449879615, + "grad_norm": 0.0, + "learning_rate": 1.3625192813061567e-05, + "loss": 1.2422, + "step": 13639 + }, + { + "epoch": 0.40049327617593516, + "grad_norm": 0.0, + "learning_rate": 1.362430652417811e-05, + "loss": 1.2808, + "step": 13640 + }, + { + "epoch": 0.4005226378530742, + "grad_norm": 0.0, + "learning_rate": 1.3623420202519122e-05, + "loss": 1.3672, + "step": 13641 + }, + { + "epoch": 0.40055199953021314, + "grad_norm": 0.0, + "learning_rate": 1.3622533848092618e-05, + "loss": 1.4609, + "step": 13642 + }, + { + "epoch": 0.40058136120735216, + "grad_norm": 0.0, + "learning_rate": 1.362164746090661e-05, + "loss": 1.373, + "step": 13643 + }, + { + "epoch": 0.4006107228844912, + "grad_norm": 0.0, + "learning_rate": 1.3620761040969117e-05, + "loss": 1.3174, + "step": 13644 + }, + { + "epoch": 0.40064008456163014, + "grad_norm": 0.0, + "learning_rate": 1.3619874588288155e-05, + "loss": 1.3242, + "step": 13645 + }, + { + "epoch": 0.40066944623876916, + "grad_norm": 0.0, + "learning_rate": 1.3618988102871742e-05, + "loss": 1.373, + "step": 13646 + }, + { + "epoch": 0.4006988079159082, + "grad_norm": 0.0, + "learning_rate": 1.3618101584727894e-05, + "loss": 1.3242, + "step": 13647 + }, + { + "epoch": 0.40072816959304713, + "grad_norm": 0.0, + "learning_rate": 1.3617215033864623e-05, + "loss": 1.5088, + "step": 13648 + }, + { + "epoch": 0.40075753127018615, + "grad_norm": 0.0, + "learning_rate": 1.361632845028995e-05, + "loss": 1.2568, + "step": 13649 + }, + { + "epoch": 0.40078689294732517, + "grad_norm": 0.0, + "learning_rate": 1.3615441834011893e-05, + "loss": 1.208, + "step": 13650 + }, + { + "epoch": 0.40081625462446413, + "grad_norm": 0.0, + "learning_rate": 1.3614555185038474e-05, + "loss": 1.3633, + "step": 13651 + }, + { + "epoch": 0.40084561630160315, + "grad_norm": 0.0, + "learning_rate": 1.3613668503377702e-05, + "loss": 1.2358, + "step": 13652 + }, + { + "epoch": 0.40087497797874216, + "grad_norm": 0.0, + "learning_rate": 1.3612781789037598e-05, + "loss": 1.2852, + "step": 13653 + }, + { + "epoch": 0.4009043396558811, + "grad_norm": 0.0, + "learning_rate": 1.3611895042026189e-05, + "loss": 1.3047, + "step": 13654 + }, + { + "epoch": 0.40093370133302014, + "grad_norm": 0.0, + "learning_rate": 1.3611008262351487e-05, + "loss": 1.4463, + "step": 13655 + }, + { + "epoch": 0.40096306301015916, + "grad_norm": 0.0, + "learning_rate": 1.361012145002151e-05, + "loss": 1.335, + "step": 13656 + }, + { + "epoch": 0.4009924246872981, + "grad_norm": 0.0, + "learning_rate": 1.3609234605044282e-05, + "loss": 1.3877, + "step": 13657 + }, + { + "epoch": 0.40102178636443714, + "grad_norm": 0.0, + "learning_rate": 1.360834772742782e-05, + "loss": 1.4346, + "step": 13658 + }, + { + "epoch": 0.40105114804157616, + "grad_norm": 0.0, + "learning_rate": 1.3607460817180147e-05, + "loss": 1.2979, + "step": 13659 + }, + { + "epoch": 0.4010805097187151, + "grad_norm": 0.0, + "learning_rate": 1.360657387430928e-05, + "loss": 1.2852, + "step": 13660 + }, + { + "epoch": 0.40110987139585413, + "grad_norm": 0.0, + "learning_rate": 1.3605686898823243e-05, + "loss": 1.2666, + "step": 13661 + }, + { + "epoch": 0.40113923307299315, + "grad_norm": 0.0, + "learning_rate": 1.3604799890730053e-05, + "loss": 1.3379, + "step": 13662 + }, + { + "epoch": 0.4011685947501321, + "grad_norm": 0.0, + "learning_rate": 1.3603912850037739e-05, + "loss": 1.2988, + "step": 13663 + }, + { + "epoch": 0.40119795642727113, + "grad_norm": 0.0, + "learning_rate": 1.3603025776754315e-05, + "loss": 1.3311, + "step": 13664 + }, + { + "epoch": 0.40122731810441015, + "grad_norm": 0.0, + "learning_rate": 1.3602138670887808e-05, + "loss": 1.416, + "step": 13665 + }, + { + "epoch": 0.4012566797815491, + "grad_norm": 0.0, + "learning_rate": 1.3601251532446236e-05, + "loss": 1.1055, + "step": 13666 + }, + { + "epoch": 0.4012860414586881, + "grad_norm": 0.0, + "learning_rate": 1.360036436143763e-05, + "loss": 1.2061, + "step": 13667 + }, + { + "epoch": 0.40131540313582714, + "grad_norm": 0.0, + "learning_rate": 1.359947715787e-05, + "loss": 1.4082, + "step": 13668 + }, + { + "epoch": 0.4013447648129661, + "grad_norm": 0.0, + "learning_rate": 1.3598589921751379e-05, + "loss": 1.2979, + "step": 13669 + }, + { + "epoch": 0.4013741264901051, + "grad_norm": 0.0, + "learning_rate": 1.3597702653089788e-05, + "loss": 1.3135, + "step": 13670 + }, + { + "epoch": 0.40140348816724414, + "grad_norm": 0.0, + "learning_rate": 1.359681535189325e-05, + "loss": 1.4395, + "step": 13671 + }, + { + "epoch": 0.4014328498443831, + "grad_norm": 0.0, + "learning_rate": 1.359592801816979e-05, + "loss": 1.3672, + "step": 13672 + }, + { + "epoch": 0.4014622115215221, + "grad_norm": 0.0, + "learning_rate": 1.3595040651927431e-05, + "loss": 1.3359, + "step": 13673 + }, + { + "epoch": 0.40149157319866113, + "grad_norm": 0.0, + "learning_rate": 1.35941532531742e-05, + "loss": 1.3477, + "step": 13674 + }, + { + "epoch": 0.4015209348758001, + "grad_norm": 0.0, + "learning_rate": 1.359326582191812e-05, + "loss": 1.3564, + "step": 13675 + }, + { + "epoch": 0.4015502965529391, + "grad_norm": 0.0, + "learning_rate": 1.3592378358167218e-05, + "loss": 1.2988, + "step": 13676 + }, + { + "epoch": 0.40157965823007813, + "grad_norm": 0.0, + "learning_rate": 1.3591490861929519e-05, + "loss": 1.5146, + "step": 13677 + }, + { + "epoch": 0.4016090199072171, + "grad_norm": 0.0, + "learning_rate": 1.3590603333213046e-05, + "loss": 1.3242, + "step": 13678 + }, + { + "epoch": 0.4016383815843561, + "grad_norm": 0.0, + "learning_rate": 1.3589715772025827e-05, + "loss": 1.1396, + "step": 13679 + }, + { + "epoch": 0.40166774326149507, + "grad_norm": 0.0, + "learning_rate": 1.3588828178375892e-05, + "loss": 1.4629, + "step": 13680 + }, + { + "epoch": 0.4016971049386341, + "grad_norm": 0.0, + "learning_rate": 1.3587940552271262e-05, + "loss": 1.3877, + "step": 13681 + }, + { + "epoch": 0.4017264666157731, + "grad_norm": 0.0, + "learning_rate": 1.3587052893719968e-05, + "loss": 1.3506, + "step": 13682 + }, + { + "epoch": 0.40175582829291206, + "grad_norm": 0.0, + "learning_rate": 1.3586165202730038e-05, + "loss": 1.2119, + "step": 13683 + }, + { + "epoch": 0.4017851899700511, + "grad_norm": 0.0, + "learning_rate": 1.3585277479309493e-05, + "loss": 1.377, + "step": 13684 + }, + { + "epoch": 0.4018145516471901, + "grad_norm": 0.0, + "learning_rate": 1.3584389723466366e-05, + "loss": 1.2188, + "step": 13685 + }, + { + "epoch": 0.40184391332432906, + "grad_norm": 0.0, + "learning_rate": 1.3583501935208686e-05, + "loss": 1.4531, + "step": 13686 + }, + { + "epoch": 0.4018732750014681, + "grad_norm": 0.0, + "learning_rate": 1.3582614114544485e-05, + "loss": 1.4854, + "step": 13687 + }, + { + "epoch": 0.4019026366786071, + "grad_norm": 0.0, + "learning_rate": 1.3581726261481785e-05, + "loss": 1.3945, + "step": 13688 + }, + { + "epoch": 0.40193199835574606, + "grad_norm": 0.0, + "learning_rate": 1.3580838376028613e-05, + "loss": 1.3877, + "step": 13689 + }, + { + "epoch": 0.4019613600328851, + "grad_norm": 0.0, + "learning_rate": 1.3579950458193003e-05, + "loss": 1.3906, + "step": 13690 + }, + { + "epoch": 0.4019907217100241, + "grad_norm": 0.0, + "learning_rate": 1.3579062507982985e-05, + "loss": 1.3936, + "step": 13691 + }, + { + "epoch": 0.40202008338716305, + "grad_norm": 0.0, + "learning_rate": 1.357817452540659e-05, + "loss": 1.248, + "step": 13692 + }, + { + "epoch": 0.40204944506430207, + "grad_norm": 0.0, + "learning_rate": 1.3577286510471846e-05, + "loss": 1.3291, + "step": 13693 + }, + { + "epoch": 0.4020788067414411, + "grad_norm": 0.0, + "learning_rate": 1.3576398463186781e-05, + "loss": 1.377, + "step": 13694 + }, + { + "epoch": 0.40210816841858005, + "grad_norm": 0.0, + "learning_rate": 1.3575510383559434e-05, + "loss": 1.3643, + "step": 13695 + }, + { + "epoch": 0.40213753009571906, + "grad_norm": 0.0, + "learning_rate": 1.357462227159783e-05, + "loss": 1.3223, + "step": 13696 + }, + { + "epoch": 0.4021668917728581, + "grad_norm": 0.0, + "learning_rate": 1.3573734127309997e-05, + "loss": 1.3447, + "step": 13697 + }, + { + "epoch": 0.40219625344999704, + "grad_norm": 0.0, + "learning_rate": 1.3572845950703974e-05, + "loss": 1.4355, + "step": 13698 + }, + { + "epoch": 0.40222561512713606, + "grad_norm": 0.0, + "learning_rate": 1.357195774178779e-05, + "loss": 1.3911, + "step": 13699 + }, + { + "epoch": 0.4022549768042751, + "grad_norm": 0.0, + "learning_rate": 1.3571069500569477e-05, + "loss": 1.291, + "step": 13700 + }, + { + "epoch": 0.40228433848141404, + "grad_norm": 0.0, + "learning_rate": 1.3570181227057067e-05, + "loss": 1.3535, + "step": 13701 + }, + { + "epoch": 0.40231370015855306, + "grad_norm": 0.0, + "learning_rate": 1.3569292921258592e-05, + "loss": 1.3564, + "step": 13702 + }, + { + "epoch": 0.4023430618356921, + "grad_norm": 0.0, + "learning_rate": 1.3568404583182089e-05, + "loss": 1.3525, + "step": 13703 + }, + { + "epoch": 0.40237242351283103, + "grad_norm": 0.0, + "learning_rate": 1.3567516212835592e-05, + "loss": 1.3887, + "step": 13704 + }, + { + "epoch": 0.40240178518997005, + "grad_norm": 0.0, + "learning_rate": 1.3566627810227128e-05, + "loss": 1.2842, + "step": 13705 + }, + { + "epoch": 0.40243114686710907, + "grad_norm": 0.0, + "learning_rate": 1.3565739375364739e-05, + "loss": 1.3467, + "step": 13706 + }, + { + "epoch": 0.40246050854424803, + "grad_norm": 0.0, + "learning_rate": 1.3564850908256454e-05, + "loss": 1.374, + "step": 13707 + }, + { + "epoch": 0.40248987022138705, + "grad_norm": 0.0, + "learning_rate": 1.3563962408910312e-05, + "loss": 1.2915, + "step": 13708 + }, + { + "epoch": 0.40251923189852606, + "grad_norm": 0.0, + "learning_rate": 1.3563073877334341e-05, + "loss": 1.4248, + "step": 13709 + }, + { + "epoch": 0.402548593575665, + "grad_norm": 0.0, + "learning_rate": 1.3562185313536582e-05, + "loss": 1.3032, + "step": 13710 + }, + { + "epoch": 0.40257795525280404, + "grad_norm": 0.0, + "learning_rate": 1.3561296717525068e-05, + "loss": 1.1787, + "step": 13711 + }, + { + "epoch": 0.40260731692994306, + "grad_norm": 0.0, + "learning_rate": 1.3560408089307837e-05, + "loss": 1.3418, + "step": 13712 + }, + { + "epoch": 0.402636678607082, + "grad_norm": 0.0, + "learning_rate": 1.3559519428892922e-05, + "loss": 1.4092, + "step": 13713 + }, + { + "epoch": 0.40266604028422104, + "grad_norm": 0.0, + "learning_rate": 1.3558630736288363e-05, + "loss": 1.3936, + "step": 13714 + }, + { + "epoch": 0.40269540196136006, + "grad_norm": 0.0, + "learning_rate": 1.3557742011502193e-05, + "loss": 1.3711, + "step": 13715 + }, + { + "epoch": 0.402724763638499, + "grad_norm": 0.0, + "learning_rate": 1.3556853254542454e-05, + "loss": 1.2822, + "step": 13716 + }, + { + "epoch": 0.40275412531563803, + "grad_norm": 0.0, + "learning_rate": 1.355596446541718e-05, + "loss": 1.3721, + "step": 13717 + }, + { + "epoch": 0.40278348699277705, + "grad_norm": 0.0, + "learning_rate": 1.3555075644134406e-05, + "loss": 1.2627, + "step": 13718 + }, + { + "epoch": 0.402812848669916, + "grad_norm": 0.0, + "learning_rate": 1.3554186790702173e-05, + "loss": 1.501, + "step": 13719 + }, + { + "epoch": 0.40284221034705503, + "grad_norm": 0.0, + "learning_rate": 1.355329790512852e-05, + "loss": 1.4912, + "step": 13720 + }, + { + "epoch": 0.40287157202419405, + "grad_norm": 0.0, + "learning_rate": 1.3552408987421483e-05, + "loss": 1.3633, + "step": 13721 + }, + { + "epoch": 0.402900933701333, + "grad_norm": 0.0, + "learning_rate": 1.3551520037589102e-05, + "loss": 1.3877, + "step": 13722 + }, + { + "epoch": 0.402930295378472, + "grad_norm": 0.0, + "learning_rate": 1.3550631055639414e-05, + "loss": 1.3564, + "step": 13723 + }, + { + "epoch": 0.40295965705561104, + "grad_norm": 0.0, + "learning_rate": 1.3549742041580463e-05, + "loss": 1.4043, + "step": 13724 + }, + { + "epoch": 0.40298901873275, + "grad_norm": 0.0, + "learning_rate": 1.3548852995420286e-05, + "loss": 1.2568, + "step": 13725 + }, + { + "epoch": 0.403018380409889, + "grad_norm": 0.0, + "learning_rate": 1.354796391716692e-05, + "loss": 1.3408, + "step": 13726 + }, + { + "epoch": 0.40304774208702804, + "grad_norm": 0.0, + "learning_rate": 1.3547074806828406e-05, + "loss": 1.3955, + "step": 13727 + }, + { + "epoch": 0.403077103764167, + "grad_norm": 0.0, + "learning_rate": 1.354618566441279e-05, + "loss": 1.3955, + "step": 13728 + }, + { + "epoch": 0.403106465441306, + "grad_norm": 0.0, + "learning_rate": 1.3545296489928108e-05, + "loss": 1.3818, + "step": 13729 + }, + { + "epoch": 0.403135827118445, + "grad_norm": 0.0, + "learning_rate": 1.3544407283382402e-05, + "loss": 1.3809, + "step": 13730 + }, + { + "epoch": 0.403165188795584, + "grad_norm": 0.0, + "learning_rate": 1.354351804478371e-05, + "loss": 1.3252, + "step": 13731 + }, + { + "epoch": 0.403194550472723, + "grad_norm": 0.0, + "learning_rate": 1.3542628774140082e-05, + "loss": 1.4209, + "step": 13732 + }, + { + "epoch": 0.403223912149862, + "grad_norm": 0.0, + "learning_rate": 1.354173947145955e-05, + "loss": 1.3086, + "step": 13733 + }, + { + "epoch": 0.403253273827001, + "grad_norm": 0.0, + "learning_rate": 1.3540850136750163e-05, + "loss": 1.2314, + "step": 13734 + }, + { + "epoch": 0.40328263550414, + "grad_norm": 0.0, + "learning_rate": 1.353996077001996e-05, + "loss": 1.3135, + "step": 13735 + }, + { + "epoch": 0.40331199718127897, + "grad_norm": 0.0, + "learning_rate": 1.3539071371276988e-05, + "loss": 1.2861, + "step": 13736 + }, + { + "epoch": 0.403341358858418, + "grad_norm": 0.0, + "learning_rate": 1.3538181940529288e-05, + "loss": 1.3867, + "step": 13737 + }, + { + "epoch": 0.403370720535557, + "grad_norm": 0.0, + "learning_rate": 1.3537292477784899e-05, + "loss": 1.3945, + "step": 13738 + }, + { + "epoch": 0.40340008221269597, + "grad_norm": 0.0, + "learning_rate": 1.3536402983051868e-05, + "loss": 1.4365, + "step": 13739 + }, + { + "epoch": 0.403429443889835, + "grad_norm": 0.0, + "learning_rate": 1.3535513456338241e-05, + "loss": 1.3936, + "step": 13740 + }, + { + "epoch": 0.403458805566974, + "grad_norm": 0.0, + "learning_rate": 1.353462389765206e-05, + "loss": 1.3965, + "step": 13741 + }, + { + "epoch": 0.40348816724411296, + "grad_norm": 0.0, + "learning_rate": 1.353373430700137e-05, + "loss": 1.2969, + "step": 13742 + }, + { + "epoch": 0.403517528921252, + "grad_norm": 0.0, + "learning_rate": 1.3532844684394214e-05, + "loss": 1.3291, + "step": 13743 + }, + { + "epoch": 0.403546890598391, + "grad_norm": 0.0, + "learning_rate": 1.3531955029838638e-05, + "loss": 1.1899, + "step": 13744 + }, + { + "epoch": 0.40357625227552996, + "grad_norm": 0.0, + "learning_rate": 1.3531065343342694e-05, + "loss": 1.4561, + "step": 13745 + }, + { + "epoch": 0.403605613952669, + "grad_norm": 0.0, + "learning_rate": 1.353017562491442e-05, + "loss": 1.2808, + "step": 13746 + }, + { + "epoch": 0.403634975629808, + "grad_norm": 0.0, + "learning_rate": 1.3529285874561859e-05, + "loss": 1.4141, + "step": 13747 + }, + { + "epoch": 0.40366433730694695, + "grad_norm": 0.0, + "learning_rate": 1.3528396092293066e-05, + "loss": 1.3809, + "step": 13748 + }, + { + "epoch": 0.40369369898408597, + "grad_norm": 0.0, + "learning_rate": 1.3527506278116082e-05, + "loss": 1.3633, + "step": 13749 + }, + { + "epoch": 0.403723060661225, + "grad_norm": 0.0, + "learning_rate": 1.3526616432038956e-05, + "loss": 1.1768, + "step": 13750 + }, + { + "epoch": 0.40375242233836395, + "grad_norm": 0.0, + "learning_rate": 1.3525726554069734e-05, + "loss": 1.2515, + "step": 13751 + }, + { + "epoch": 0.40378178401550296, + "grad_norm": 0.0, + "learning_rate": 1.3524836644216461e-05, + "loss": 1.4062, + "step": 13752 + }, + { + "epoch": 0.403811145692642, + "grad_norm": 0.0, + "learning_rate": 1.3523946702487192e-05, + "loss": 1.4062, + "step": 13753 + }, + { + "epoch": 0.40384050736978094, + "grad_norm": 0.0, + "learning_rate": 1.352305672888997e-05, + "loss": 1.4004, + "step": 13754 + }, + { + "epoch": 0.40386986904691996, + "grad_norm": 0.0, + "learning_rate": 1.3522166723432842e-05, + "loss": 1.4766, + "step": 13755 + }, + { + "epoch": 0.403899230724059, + "grad_norm": 0.0, + "learning_rate": 1.3521276686123858e-05, + "loss": 1.1919, + "step": 13756 + }, + { + "epoch": 0.40392859240119794, + "grad_norm": 0.0, + "learning_rate": 1.352038661697107e-05, + "loss": 1.4199, + "step": 13757 + }, + { + "epoch": 0.40395795407833696, + "grad_norm": 0.0, + "learning_rate": 1.3519496515982523e-05, + "loss": 1.4365, + "step": 13758 + }, + { + "epoch": 0.403987315755476, + "grad_norm": 0.0, + "learning_rate": 1.3518606383166266e-05, + "loss": 1.4814, + "step": 13759 + }, + { + "epoch": 0.40401667743261493, + "grad_norm": 0.0, + "learning_rate": 1.351771621853035e-05, + "loss": 1.501, + "step": 13760 + }, + { + "epoch": 0.40404603910975395, + "grad_norm": 0.0, + "learning_rate": 1.3516826022082827e-05, + "loss": 1.5088, + "step": 13761 + }, + { + "epoch": 0.40407540078689297, + "grad_norm": 0.0, + "learning_rate": 1.3515935793831744e-05, + "loss": 1.3447, + "step": 13762 + }, + { + "epoch": 0.40410476246403193, + "grad_norm": 0.0, + "learning_rate": 1.3515045533785154e-05, + "loss": 1.4336, + "step": 13763 + }, + { + "epoch": 0.40413412414117095, + "grad_norm": 0.0, + "learning_rate": 1.3514155241951108e-05, + "loss": 1.4443, + "step": 13764 + }, + { + "epoch": 0.40416348581830996, + "grad_norm": 0.0, + "learning_rate": 1.3513264918337656e-05, + "loss": 1.4473, + "step": 13765 + }, + { + "epoch": 0.4041928474954489, + "grad_norm": 0.0, + "learning_rate": 1.351237456295285e-05, + "loss": 1.3877, + "step": 13766 + }, + { + "epoch": 0.40422220917258794, + "grad_norm": 0.0, + "learning_rate": 1.3511484175804741e-05, + "loss": 1.5156, + "step": 13767 + }, + { + "epoch": 0.40425157084972696, + "grad_norm": 0.0, + "learning_rate": 1.351059375690138e-05, + "loss": 1.3159, + "step": 13768 + }, + { + "epoch": 0.4042809325268659, + "grad_norm": 0.0, + "learning_rate": 1.3509703306250819e-05, + "loss": 1.2466, + "step": 13769 + }, + { + "epoch": 0.40431029420400494, + "grad_norm": 0.0, + "learning_rate": 1.3508812823861117e-05, + "loss": 1.3193, + "step": 13770 + }, + { + "epoch": 0.40433965588114396, + "grad_norm": 0.0, + "learning_rate": 1.3507922309740318e-05, + "loss": 1.1948, + "step": 13771 + }, + { + "epoch": 0.4043690175582829, + "grad_norm": 0.0, + "learning_rate": 1.3507031763896482e-05, + "loss": 1.4316, + "step": 13772 + }, + { + "epoch": 0.40439837923542193, + "grad_norm": 0.0, + "learning_rate": 1.3506141186337658e-05, + "loss": 1.2539, + "step": 13773 + }, + { + "epoch": 0.40442774091256095, + "grad_norm": 0.0, + "learning_rate": 1.35052505770719e-05, + "loss": 1.2998, + "step": 13774 + }, + { + "epoch": 0.4044571025896999, + "grad_norm": 0.0, + "learning_rate": 1.3504359936107268e-05, + "loss": 1.3818, + "step": 13775 + }, + { + "epoch": 0.40448646426683893, + "grad_norm": 0.0, + "learning_rate": 1.3503469263451808e-05, + "loss": 1.4316, + "step": 13776 + }, + { + "epoch": 0.40451582594397795, + "grad_norm": 0.0, + "learning_rate": 1.3502578559113575e-05, + "loss": 1.2881, + "step": 13777 + }, + { + "epoch": 0.4045451876211169, + "grad_norm": 0.0, + "learning_rate": 1.3501687823100634e-05, + "loss": 1.3232, + "step": 13778 + }, + { + "epoch": 0.4045745492982559, + "grad_norm": 0.0, + "learning_rate": 1.3500797055421028e-05, + "loss": 1.458, + "step": 13779 + }, + { + "epoch": 0.4046039109753949, + "grad_norm": 0.0, + "learning_rate": 1.349990625608282e-05, + "loss": 1.375, + "step": 13780 + }, + { + "epoch": 0.4046332726525339, + "grad_norm": 0.0, + "learning_rate": 1.3499015425094062e-05, + "loss": 1.3369, + "step": 13781 + }, + { + "epoch": 0.4046626343296729, + "grad_norm": 0.0, + "learning_rate": 1.3498124562462812e-05, + "loss": 1.3555, + "step": 13782 + }, + { + "epoch": 0.4046919960068119, + "grad_norm": 0.0, + "learning_rate": 1.3497233668197126e-05, + "loss": 1.3115, + "step": 13783 + }, + { + "epoch": 0.4047213576839509, + "grad_norm": 0.0, + "learning_rate": 1.349634274230506e-05, + "loss": 1.3594, + "step": 13784 + }, + { + "epoch": 0.4047507193610899, + "grad_norm": 0.0, + "learning_rate": 1.3495451784794673e-05, + "loss": 1.4453, + "step": 13785 + }, + { + "epoch": 0.4047800810382289, + "grad_norm": 0.0, + "learning_rate": 1.3494560795674021e-05, + "loss": 1.3115, + "step": 13786 + }, + { + "epoch": 0.4048094427153679, + "grad_norm": 0.0, + "learning_rate": 1.3493669774951158e-05, + "loss": 1.3906, + "step": 13787 + }, + { + "epoch": 0.4048388043925069, + "grad_norm": 0.0, + "learning_rate": 1.3492778722634143e-05, + "loss": 1.4209, + "step": 13788 + }, + { + "epoch": 0.4048681660696459, + "grad_norm": 0.0, + "learning_rate": 1.3491887638731038e-05, + "loss": 1.3125, + "step": 13789 + }, + { + "epoch": 0.4048975277467849, + "grad_norm": 0.0, + "learning_rate": 1.34909965232499e-05, + "loss": 1.2988, + "step": 13790 + }, + { + "epoch": 0.4049268894239239, + "grad_norm": 0.0, + "learning_rate": 1.3490105376198784e-05, + "loss": 1.417, + "step": 13791 + }, + { + "epoch": 0.40495625110106287, + "grad_norm": 0.0, + "learning_rate": 1.3489214197585749e-05, + "loss": 1.2959, + "step": 13792 + }, + { + "epoch": 0.4049856127782019, + "grad_norm": 0.0, + "learning_rate": 1.3488322987418861e-05, + "loss": 1.3711, + "step": 13793 + }, + { + "epoch": 0.4050149744553409, + "grad_norm": 0.0, + "learning_rate": 1.3487431745706174e-05, + "loss": 1.2061, + "step": 13794 + }, + { + "epoch": 0.40504433613247987, + "grad_norm": 0.0, + "learning_rate": 1.3486540472455748e-05, + "loss": 1.4658, + "step": 13795 + }, + { + "epoch": 0.4050736978096189, + "grad_norm": 0.0, + "learning_rate": 1.3485649167675643e-05, + "loss": 1.3096, + "step": 13796 + }, + { + "epoch": 0.4051030594867579, + "grad_norm": 0.0, + "learning_rate": 1.3484757831373921e-05, + "loss": 1.3613, + "step": 13797 + }, + { + "epoch": 0.40513242116389686, + "grad_norm": 0.0, + "learning_rate": 1.3483866463558642e-05, + "loss": 1.2676, + "step": 13798 + }, + { + "epoch": 0.4051617828410359, + "grad_norm": 0.0, + "learning_rate": 1.3482975064237863e-05, + "loss": 1.3545, + "step": 13799 + }, + { + "epoch": 0.4051911445181749, + "grad_norm": 0.0, + "learning_rate": 1.3482083633419652e-05, + "loss": 1.3584, + "step": 13800 + }, + { + "epoch": 0.40522050619531386, + "grad_norm": 0.0, + "learning_rate": 1.3481192171112064e-05, + "loss": 1.3691, + "step": 13801 + }, + { + "epoch": 0.4052498678724529, + "grad_norm": 0.0, + "learning_rate": 1.3480300677323166e-05, + "loss": 1.4629, + "step": 13802 + }, + { + "epoch": 0.4052792295495919, + "grad_norm": 0.0, + "learning_rate": 1.3479409152061016e-05, + "loss": 1.4058, + "step": 13803 + }, + { + "epoch": 0.40530859122673085, + "grad_norm": 0.0, + "learning_rate": 1.3478517595333678e-05, + "loss": 1.2637, + "step": 13804 + }, + { + "epoch": 0.40533795290386987, + "grad_norm": 0.0, + "learning_rate": 1.3477626007149216e-05, + "loss": 1.3457, + "step": 13805 + }, + { + "epoch": 0.4053673145810089, + "grad_norm": 0.0, + "learning_rate": 1.347673438751569e-05, + "loss": 1.3784, + "step": 13806 + }, + { + "epoch": 0.40539667625814785, + "grad_norm": 0.0, + "learning_rate": 1.3475842736441167e-05, + "loss": 1.3906, + "step": 13807 + }, + { + "epoch": 0.40542603793528686, + "grad_norm": 0.0, + "learning_rate": 1.3474951053933705e-05, + "loss": 1.3154, + "step": 13808 + }, + { + "epoch": 0.4054553996124259, + "grad_norm": 0.0, + "learning_rate": 1.3474059340001371e-05, + "loss": 1.3613, + "step": 13809 + }, + { + "epoch": 0.40548476128956484, + "grad_norm": 0.0, + "learning_rate": 1.3473167594652231e-05, + "loss": 1.3389, + "step": 13810 + }, + { + "epoch": 0.40551412296670386, + "grad_norm": 0.0, + "learning_rate": 1.3472275817894346e-05, + "loss": 1.3457, + "step": 13811 + }, + { + "epoch": 0.4055434846438429, + "grad_norm": 0.0, + "learning_rate": 1.3471384009735779e-05, + "loss": 1.2402, + "step": 13812 + }, + { + "epoch": 0.40557284632098184, + "grad_norm": 0.0, + "learning_rate": 1.34704921701846e-05, + "loss": 1.2568, + "step": 13813 + }, + { + "epoch": 0.40560220799812086, + "grad_norm": 0.0, + "learning_rate": 1.3469600299248874e-05, + "loss": 1.3311, + "step": 13814 + }, + { + "epoch": 0.4056315696752599, + "grad_norm": 0.0, + "learning_rate": 1.346870839693666e-05, + "loss": 1.3066, + "step": 13815 + }, + { + "epoch": 0.40566093135239883, + "grad_norm": 0.0, + "learning_rate": 1.3467816463256028e-05, + "loss": 1.271, + "step": 13816 + }, + { + "epoch": 0.40569029302953785, + "grad_norm": 0.0, + "learning_rate": 1.346692449821504e-05, + "loss": 1.3848, + "step": 13817 + }, + { + "epoch": 0.40571965470667687, + "grad_norm": 0.0, + "learning_rate": 1.3466032501821773e-05, + "loss": 1.2578, + "step": 13818 + }, + { + "epoch": 0.40574901638381583, + "grad_norm": 0.0, + "learning_rate": 1.3465140474084283e-05, + "loss": 1.2026, + "step": 13819 + }, + { + "epoch": 0.40577837806095485, + "grad_norm": 0.0, + "learning_rate": 1.3464248415010638e-05, + "loss": 1.4531, + "step": 13820 + }, + { + "epoch": 0.40580773973809386, + "grad_norm": 0.0, + "learning_rate": 1.3463356324608911e-05, + "loss": 1.4795, + "step": 13821 + }, + { + "epoch": 0.4058371014152328, + "grad_norm": 0.0, + "learning_rate": 1.3462464202887166e-05, + "loss": 1.3809, + "step": 13822 + }, + { + "epoch": 0.40586646309237184, + "grad_norm": 0.0, + "learning_rate": 1.346157204985347e-05, + "loss": 1.3721, + "step": 13823 + }, + { + "epoch": 0.40589582476951086, + "grad_norm": 0.0, + "learning_rate": 1.3460679865515891e-05, + "loss": 1.3486, + "step": 13824 + }, + { + "epoch": 0.4059251864466498, + "grad_norm": 0.0, + "learning_rate": 1.3459787649882497e-05, + "loss": 1.2529, + "step": 13825 + }, + { + "epoch": 0.40595454812378884, + "grad_norm": 0.0, + "learning_rate": 1.3458895402961356e-05, + "loss": 1.2773, + "step": 13826 + }, + { + "epoch": 0.40598390980092786, + "grad_norm": 0.0, + "learning_rate": 1.3458003124760542e-05, + "loss": 1.2593, + "step": 13827 + }, + { + "epoch": 0.4060132714780668, + "grad_norm": 0.0, + "learning_rate": 1.3457110815288115e-05, + "loss": 1.3389, + "step": 13828 + }, + { + "epoch": 0.40604263315520583, + "grad_norm": 0.0, + "learning_rate": 1.3456218474552151e-05, + "loss": 1.2627, + "step": 13829 + }, + { + "epoch": 0.4060719948323448, + "grad_norm": 0.0, + "learning_rate": 1.345532610256072e-05, + "loss": 1.3623, + "step": 13830 + }, + { + "epoch": 0.4061013565094838, + "grad_norm": 0.0, + "learning_rate": 1.3454433699321892e-05, + "loss": 1.3857, + "step": 13831 + }, + { + "epoch": 0.40613071818662283, + "grad_norm": 0.0, + "learning_rate": 1.3453541264843731e-05, + "loss": 1.3164, + "step": 13832 + }, + { + "epoch": 0.4061600798637618, + "grad_norm": 0.0, + "learning_rate": 1.3452648799134313e-05, + "loss": 1.2383, + "step": 13833 + }, + { + "epoch": 0.4061894415409008, + "grad_norm": 0.0, + "learning_rate": 1.3451756302201711e-05, + "loss": 1.4355, + "step": 13834 + }, + { + "epoch": 0.4062188032180398, + "grad_norm": 0.0, + "learning_rate": 1.345086377405399e-05, + "loss": 1.5244, + "step": 13835 + }, + { + "epoch": 0.4062481648951788, + "grad_norm": 0.0, + "learning_rate": 1.3449971214699224e-05, + "loss": 1.332, + "step": 13836 + }, + { + "epoch": 0.4062775265723178, + "grad_norm": 0.0, + "learning_rate": 1.3449078624145485e-05, + "loss": 1.416, + "step": 13837 + }, + { + "epoch": 0.4063068882494568, + "grad_norm": 0.0, + "learning_rate": 1.3448186002400843e-05, + "loss": 1.2549, + "step": 13838 + }, + { + "epoch": 0.4063362499265958, + "grad_norm": 0.0, + "learning_rate": 1.3447293349473377e-05, + "loss": 1.3682, + "step": 13839 + }, + { + "epoch": 0.4063656116037348, + "grad_norm": 0.0, + "learning_rate": 1.3446400665371149e-05, + "loss": 1.2695, + "step": 13840 + }, + { + "epoch": 0.4063949732808738, + "grad_norm": 0.0, + "learning_rate": 1.3445507950102238e-05, + "loss": 1.4326, + "step": 13841 + }, + { + "epoch": 0.4064243349580128, + "grad_norm": 0.0, + "learning_rate": 1.3444615203674716e-05, + "loss": 1.3867, + "step": 13842 + }, + { + "epoch": 0.4064536966351518, + "grad_norm": 0.0, + "learning_rate": 1.344372242609666e-05, + "loss": 1.293, + "step": 13843 + }, + { + "epoch": 0.4064830583122908, + "grad_norm": 0.0, + "learning_rate": 1.3442829617376139e-05, + "loss": 1.3594, + "step": 13844 + }, + { + "epoch": 0.4065124199894298, + "grad_norm": 0.0, + "learning_rate": 1.3441936777521227e-05, + "loss": 1.3926, + "step": 13845 + }, + { + "epoch": 0.4065417816665688, + "grad_norm": 0.0, + "learning_rate": 1.3441043906539997e-05, + "loss": 1.3467, + "step": 13846 + }, + { + "epoch": 0.4065711433437078, + "grad_norm": 0.0, + "learning_rate": 1.344015100444053e-05, + "loss": 1.3442, + "step": 13847 + }, + { + "epoch": 0.40660050502084677, + "grad_norm": 0.0, + "learning_rate": 1.3439258071230894e-05, + "loss": 1.3857, + "step": 13848 + }, + { + "epoch": 0.4066298666979858, + "grad_norm": 0.0, + "learning_rate": 1.3438365106919164e-05, + "loss": 1.458, + "step": 13849 + }, + { + "epoch": 0.4066592283751248, + "grad_norm": 0.0, + "learning_rate": 1.343747211151342e-05, + "loss": 1.2012, + "step": 13850 + }, + { + "epoch": 0.40668859005226377, + "grad_norm": 0.0, + "learning_rate": 1.3436579085021739e-05, + "loss": 1.3496, + "step": 13851 + }, + { + "epoch": 0.4067179517294028, + "grad_norm": 0.0, + "learning_rate": 1.3435686027452191e-05, + "loss": 1.3135, + "step": 13852 + }, + { + "epoch": 0.4067473134065418, + "grad_norm": 0.0, + "learning_rate": 1.343479293881285e-05, + "loss": 1.373, + "step": 13853 + }, + { + "epoch": 0.40677667508368076, + "grad_norm": 0.0, + "learning_rate": 1.3433899819111801e-05, + "loss": 1.3506, + "step": 13854 + }, + { + "epoch": 0.4068060367608198, + "grad_norm": 0.0, + "learning_rate": 1.3433006668357118e-05, + "loss": 1.4951, + "step": 13855 + }, + { + "epoch": 0.4068353984379588, + "grad_norm": 0.0, + "learning_rate": 1.3432113486556876e-05, + "loss": 1.5049, + "step": 13856 + }, + { + "epoch": 0.40686476011509776, + "grad_norm": 0.0, + "learning_rate": 1.3431220273719148e-05, + "loss": 1.4189, + "step": 13857 + }, + { + "epoch": 0.4068941217922368, + "grad_norm": 0.0, + "learning_rate": 1.3430327029852019e-05, + "loss": 1.3608, + "step": 13858 + }, + { + "epoch": 0.4069234834693758, + "grad_norm": 0.0, + "learning_rate": 1.3429433754963563e-05, + "loss": 1.2275, + "step": 13859 + }, + { + "epoch": 0.40695284514651475, + "grad_norm": 0.0, + "learning_rate": 1.3428540449061864e-05, + "loss": 1.3506, + "step": 13860 + }, + { + "epoch": 0.40698220682365377, + "grad_norm": 0.0, + "learning_rate": 1.342764711215499e-05, + "loss": 1.3652, + "step": 13861 + }, + { + "epoch": 0.4070115685007928, + "grad_norm": 0.0, + "learning_rate": 1.3426753744251028e-05, + "loss": 1.2271, + "step": 13862 + }, + { + "epoch": 0.40704093017793175, + "grad_norm": 0.0, + "learning_rate": 1.3425860345358057e-05, + "loss": 1.3896, + "step": 13863 + }, + { + "epoch": 0.40707029185507076, + "grad_norm": 0.0, + "learning_rate": 1.342496691548415e-05, + "loss": 1.2476, + "step": 13864 + }, + { + "epoch": 0.4070996535322098, + "grad_norm": 0.0, + "learning_rate": 1.3424073454637393e-05, + "loss": 1.3818, + "step": 13865 + }, + { + "epoch": 0.40712901520934874, + "grad_norm": 0.0, + "learning_rate": 1.342317996282586e-05, + "loss": 1.4365, + "step": 13866 + }, + { + "epoch": 0.40715837688648776, + "grad_norm": 0.0, + "learning_rate": 1.342228644005763e-05, + "loss": 1.4014, + "step": 13867 + }, + { + "epoch": 0.4071877385636268, + "grad_norm": 0.0, + "learning_rate": 1.3421392886340793e-05, + "loss": 1.2842, + "step": 13868 + }, + { + "epoch": 0.40721710024076574, + "grad_norm": 0.0, + "learning_rate": 1.3420499301683422e-05, + "loss": 1.5059, + "step": 13869 + }, + { + "epoch": 0.40724646191790476, + "grad_norm": 0.0, + "learning_rate": 1.34196056860936e-05, + "loss": 1.3613, + "step": 13870 + }, + { + "epoch": 0.4072758235950438, + "grad_norm": 0.0, + "learning_rate": 1.3418712039579407e-05, + "loss": 1.3887, + "step": 13871 + }, + { + "epoch": 0.40730518527218273, + "grad_norm": 0.0, + "learning_rate": 1.341781836214893e-05, + "loss": 1.3975, + "step": 13872 + }, + { + "epoch": 0.40733454694932175, + "grad_norm": 0.0, + "learning_rate": 1.3416924653810242e-05, + "loss": 1.3877, + "step": 13873 + }, + { + "epoch": 0.40736390862646077, + "grad_norm": 0.0, + "learning_rate": 1.3416030914571431e-05, + "loss": 1.3604, + "step": 13874 + }, + { + "epoch": 0.40739327030359973, + "grad_norm": 0.0, + "learning_rate": 1.3415137144440577e-05, + "loss": 1.4141, + "step": 13875 + }, + { + "epoch": 0.40742263198073875, + "grad_norm": 0.0, + "learning_rate": 1.3414243343425763e-05, + "loss": 1.2661, + "step": 13876 + }, + { + "epoch": 0.40745199365787776, + "grad_norm": 0.0, + "learning_rate": 1.3413349511535072e-05, + "loss": 1.3848, + "step": 13877 + }, + { + "epoch": 0.4074813553350167, + "grad_norm": 0.0, + "learning_rate": 1.3412455648776588e-05, + "loss": 1.3809, + "step": 13878 + }, + { + "epoch": 0.40751071701215574, + "grad_norm": 0.0, + "learning_rate": 1.341156175515839e-05, + "loss": 1.3086, + "step": 13879 + }, + { + "epoch": 0.40754007868929476, + "grad_norm": 0.0, + "learning_rate": 1.341066783068857e-05, + "loss": 1.3301, + "step": 13880 + }, + { + "epoch": 0.4075694403664337, + "grad_norm": 0.0, + "learning_rate": 1.3409773875375204e-05, + "loss": 1.2632, + "step": 13881 + }, + { + "epoch": 0.40759880204357274, + "grad_norm": 0.0, + "learning_rate": 1.3408879889226383e-05, + "loss": 1.373, + "step": 13882 + }, + { + "epoch": 0.4076281637207117, + "grad_norm": 0.0, + "learning_rate": 1.3407985872250184e-05, + "loss": 1.3208, + "step": 13883 + }, + { + "epoch": 0.4076575253978507, + "grad_norm": 0.0, + "learning_rate": 1.34070918244547e-05, + "loss": 1.4121, + "step": 13884 + }, + { + "epoch": 0.40768688707498973, + "grad_norm": 0.0, + "learning_rate": 1.3406197745848012e-05, + "loss": 1.2363, + "step": 13885 + }, + { + "epoch": 0.4077162487521287, + "grad_norm": 0.0, + "learning_rate": 1.3405303636438203e-05, + "loss": 1.3691, + "step": 13886 + }, + { + "epoch": 0.4077456104292677, + "grad_norm": 0.0, + "learning_rate": 1.3404409496233362e-05, + "loss": 1.3096, + "step": 13887 + }, + { + "epoch": 0.40777497210640673, + "grad_norm": 0.0, + "learning_rate": 1.3403515325241574e-05, + "loss": 1.459, + "step": 13888 + }, + { + "epoch": 0.4078043337835457, + "grad_norm": 0.0, + "learning_rate": 1.3402621123470924e-05, + "loss": 1.3311, + "step": 13889 + }, + { + "epoch": 0.4078336954606847, + "grad_norm": 0.0, + "learning_rate": 1.34017268909295e-05, + "loss": 1.4932, + "step": 13890 + }, + { + "epoch": 0.4078630571378237, + "grad_norm": 0.0, + "learning_rate": 1.340083262762539e-05, + "loss": 1.3535, + "step": 13891 + }, + { + "epoch": 0.4078924188149627, + "grad_norm": 0.0, + "learning_rate": 1.339993833356668e-05, + "loss": 1.2803, + "step": 13892 + }, + { + "epoch": 0.4079217804921017, + "grad_norm": 0.0, + "learning_rate": 1.3399044008761457e-05, + "loss": 1.3467, + "step": 13893 + }, + { + "epoch": 0.4079511421692407, + "grad_norm": 0.0, + "learning_rate": 1.3398149653217806e-05, + "loss": 1.3398, + "step": 13894 + }, + { + "epoch": 0.4079805038463797, + "grad_norm": 0.0, + "learning_rate": 1.339725526694382e-05, + "loss": 1.4131, + "step": 13895 + }, + { + "epoch": 0.4080098655235187, + "grad_norm": 0.0, + "learning_rate": 1.3396360849947582e-05, + "loss": 1.2998, + "step": 13896 + }, + { + "epoch": 0.4080392272006577, + "grad_norm": 0.0, + "learning_rate": 1.3395466402237184e-05, + "loss": 1.4609, + "step": 13897 + }, + { + "epoch": 0.4080685888777967, + "grad_norm": 0.0, + "learning_rate": 1.3394571923820717e-05, + "loss": 1.4307, + "step": 13898 + }, + { + "epoch": 0.4080979505549357, + "grad_norm": 0.0, + "learning_rate": 1.3393677414706261e-05, + "loss": 1.2695, + "step": 13899 + }, + { + "epoch": 0.4081273122320747, + "grad_norm": 0.0, + "learning_rate": 1.3392782874901916e-05, + "loss": 1.2646, + "step": 13900 + }, + { + "epoch": 0.4081566739092137, + "grad_norm": 0.0, + "learning_rate": 1.3391888304415766e-05, + "loss": 1.3193, + "step": 13901 + }, + { + "epoch": 0.4081860355863527, + "grad_norm": 0.0, + "learning_rate": 1.3390993703255896e-05, + "loss": 1.4775, + "step": 13902 + }, + { + "epoch": 0.4082153972634917, + "grad_norm": 0.0, + "learning_rate": 1.3390099071430407e-05, + "loss": 1.3428, + "step": 13903 + }, + { + "epoch": 0.40824475894063067, + "grad_norm": 0.0, + "learning_rate": 1.3389204408947383e-05, + "loss": 1.1392, + "step": 13904 + }, + { + "epoch": 0.4082741206177697, + "grad_norm": 0.0, + "learning_rate": 1.3388309715814916e-05, + "loss": 1.4092, + "step": 13905 + }, + { + "epoch": 0.4083034822949087, + "grad_norm": 0.0, + "learning_rate": 1.3387414992041095e-05, + "loss": 1.2437, + "step": 13906 + }, + { + "epoch": 0.40833284397204767, + "grad_norm": 0.0, + "learning_rate": 1.338652023763401e-05, + "loss": 1.2754, + "step": 13907 + }, + { + "epoch": 0.4083622056491867, + "grad_norm": 0.0, + "learning_rate": 1.3385625452601758e-05, + "loss": 1.4717, + "step": 13908 + }, + { + "epoch": 0.4083915673263257, + "grad_norm": 0.0, + "learning_rate": 1.338473063695243e-05, + "loss": 1.3154, + "step": 13909 + }, + { + "epoch": 0.40842092900346466, + "grad_norm": 0.0, + "learning_rate": 1.3383835790694116e-05, + "loss": 1.3721, + "step": 13910 + }, + { + "epoch": 0.4084502906806037, + "grad_norm": 0.0, + "learning_rate": 1.3382940913834903e-05, + "loss": 1.333, + "step": 13911 + }, + { + "epoch": 0.4084796523577427, + "grad_norm": 0.0, + "learning_rate": 1.3382046006382894e-05, + "loss": 1.2231, + "step": 13912 + }, + { + "epoch": 0.40850901403488166, + "grad_norm": 0.0, + "learning_rate": 1.3381151068346176e-05, + "loss": 1.3154, + "step": 13913 + }, + { + "epoch": 0.4085383757120207, + "grad_norm": 0.0, + "learning_rate": 1.3380256099732842e-05, + "loss": 1.2534, + "step": 13914 + }, + { + "epoch": 0.4085677373891597, + "grad_norm": 0.0, + "learning_rate": 1.3379361100550987e-05, + "loss": 1.2764, + "step": 13915 + }, + { + "epoch": 0.40859709906629865, + "grad_norm": 0.0, + "learning_rate": 1.3378466070808703e-05, + "loss": 1.2969, + "step": 13916 + }, + { + "epoch": 0.40862646074343767, + "grad_norm": 0.0, + "learning_rate": 1.3377571010514086e-05, + "loss": 1.3857, + "step": 13917 + }, + { + "epoch": 0.4086558224205767, + "grad_norm": 0.0, + "learning_rate": 1.3376675919675228e-05, + "loss": 1.3672, + "step": 13918 + }, + { + "epoch": 0.40868518409771565, + "grad_norm": 0.0, + "learning_rate": 1.3375780798300227e-05, + "loss": 1.1919, + "step": 13919 + }, + { + "epoch": 0.40871454577485467, + "grad_norm": 0.0, + "learning_rate": 1.3374885646397173e-05, + "loss": 1.2651, + "step": 13920 + }, + { + "epoch": 0.4087439074519937, + "grad_norm": 0.0, + "learning_rate": 1.3373990463974168e-05, + "loss": 1.334, + "step": 13921 + }, + { + "epoch": 0.40877326912913264, + "grad_norm": 0.0, + "learning_rate": 1.3373095251039299e-05, + "loss": 1.2969, + "step": 13922 + }, + { + "epoch": 0.40880263080627166, + "grad_norm": 0.0, + "learning_rate": 1.337220000760067e-05, + "loss": 1.4004, + "step": 13923 + }, + { + "epoch": 0.4088319924834107, + "grad_norm": 0.0, + "learning_rate": 1.3371304733666369e-05, + "loss": 1.415, + "step": 13924 + }, + { + "epoch": 0.40886135416054964, + "grad_norm": 0.0, + "learning_rate": 1.3370409429244498e-05, + "loss": 1.3516, + "step": 13925 + }, + { + "epoch": 0.40889071583768866, + "grad_norm": 0.0, + "learning_rate": 1.336951409434315e-05, + "loss": 1.46, + "step": 13926 + }, + { + "epoch": 0.4089200775148277, + "grad_norm": 0.0, + "learning_rate": 1.3368618728970422e-05, + "loss": 1.2393, + "step": 13927 + }, + { + "epoch": 0.40894943919196663, + "grad_norm": 0.0, + "learning_rate": 1.3367723333134413e-05, + "loss": 1.3721, + "step": 13928 + }, + { + "epoch": 0.40897880086910565, + "grad_norm": 0.0, + "learning_rate": 1.336682790684322e-05, + "loss": 1.3467, + "step": 13929 + }, + { + "epoch": 0.40900816254624467, + "grad_norm": 0.0, + "learning_rate": 1.3365932450104939e-05, + "loss": 1.3154, + "step": 13930 + }, + { + "epoch": 0.40903752422338363, + "grad_norm": 0.0, + "learning_rate": 1.3365036962927671e-05, + "loss": 1.3691, + "step": 13931 + }, + { + "epoch": 0.40906688590052265, + "grad_norm": 0.0, + "learning_rate": 1.336414144531951e-05, + "loss": 1.3613, + "step": 13932 + }, + { + "epoch": 0.4090962475776616, + "grad_norm": 0.0, + "learning_rate": 1.3363245897288556e-05, + "loss": 1.2715, + "step": 13933 + }, + { + "epoch": 0.4091256092548006, + "grad_norm": 0.0, + "learning_rate": 1.3362350318842911e-05, + "loss": 1.3535, + "step": 13934 + }, + { + "epoch": 0.40915497093193964, + "grad_norm": 0.0, + "learning_rate": 1.3361454709990668e-05, + "loss": 1.3291, + "step": 13935 + }, + { + "epoch": 0.4091843326090786, + "grad_norm": 0.0, + "learning_rate": 1.336055907073993e-05, + "loss": 1.3252, + "step": 13936 + }, + { + "epoch": 0.4092136942862176, + "grad_norm": 0.0, + "learning_rate": 1.3359663401098797e-05, + "loss": 1.3721, + "step": 13937 + }, + { + "epoch": 0.40924305596335664, + "grad_norm": 0.0, + "learning_rate": 1.3358767701075365e-05, + "loss": 1.377, + "step": 13938 + }, + { + "epoch": 0.4092724176404956, + "grad_norm": 0.0, + "learning_rate": 1.3357871970677738e-05, + "loss": 1.2393, + "step": 13939 + }, + { + "epoch": 0.4093017793176346, + "grad_norm": 0.0, + "learning_rate": 1.3356976209914014e-05, + "loss": 1.4287, + "step": 13940 + }, + { + "epoch": 0.40933114099477363, + "grad_norm": 0.0, + "learning_rate": 1.3356080418792296e-05, + "loss": 1.3594, + "step": 13941 + }, + { + "epoch": 0.4093605026719126, + "grad_norm": 0.0, + "learning_rate": 1.3355184597320684e-05, + "loss": 1.417, + "step": 13942 + }, + { + "epoch": 0.4093898643490516, + "grad_norm": 0.0, + "learning_rate": 1.3354288745507276e-05, + "loss": 1.2544, + "step": 13943 + }, + { + "epoch": 0.40941922602619063, + "grad_norm": 0.0, + "learning_rate": 1.3353392863360174e-05, + "loss": 1.374, + "step": 13944 + }, + { + "epoch": 0.4094485877033296, + "grad_norm": 0.0, + "learning_rate": 1.3352496950887486e-05, + "loss": 1.3662, + "step": 13945 + }, + { + "epoch": 0.4094779493804686, + "grad_norm": 0.0, + "learning_rate": 1.3351601008097308e-05, + "loss": 1.3291, + "step": 13946 + }, + { + "epoch": 0.4095073110576076, + "grad_norm": 0.0, + "learning_rate": 1.3350705034997742e-05, + "loss": 1.334, + "step": 13947 + }, + { + "epoch": 0.4095366727347466, + "grad_norm": 0.0, + "learning_rate": 1.3349809031596892e-05, + "loss": 1.4004, + "step": 13948 + }, + { + "epoch": 0.4095660344118856, + "grad_norm": 0.0, + "learning_rate": 1.3348912997902862e-05, + "loss": 1.3545, + "step": 13949 + }, + { + "epoch": 0.4095953960890246, + "grad_norm": 0.0, + "learning_rate": 1.3348016933923757e-05, + "loss": 1.3271, + "step": 13950 + }, + { + "epoch": 0.4096247577661636, + "grad_norm": 0.0, + "learning_rate": 1.3347120839667675e-05, + "loss": 1.3955, + "step": 13951 + }, + { + "epoch": 0.4096541194433026, + "grad_norm": 0.0, + "learning_rate": 1.3346224715142721e-05, + "loss": 1.4668, + "step": 13952 + }, + { + "epoch": 0.4096834811204416, + "grad_norm": 0.0, + "learning_rate": 1.3345328560357003e-05, + "loss": 1.3896, + "step": 13953 + }, + { + "epoch": 0.4097128427975806, + "grad_norm": 0.0, + "learning_rate": 1.3344432375318622e-05, + "loss": 1.4004, + "step": 13954 + }, + { + "epoch": 0.4097422044747196, + "grad_norm": 0.0, + "learning_rate": 1.334353616003568e-05, + "loss": 1.3867, + "step": 13955 + }, + { + "epoch": 0.4097715661518586, + "grad_norm": 0.0, + "learning_rate": 1.3342639914516282e-05, + "loss": 1.4316, + "step": 13956 + }, + { + "epoch": 0.4098009278289976, + "grad_norm": 0.0, + "learning_rate": 1.3341743638768536e-05, + "loss": 1.4111, + "step": 13957 + }, + { + "epoch": 0.4098302895061366, + "grad_norm": 0.0, + "learning_rate": 1.334084733280055e-05, + "loss": 1.3975, + "step": 13958 + }, + { + "epoch": 0.4098596511832756, + "grad_norm": 0.0, + "learning_rate": 1.3339950996620425e-05, + "loss": 1.3887, + "step": 13959 + }, + { + "epoch": 0.40988901286041457, + "grad_norm": 0.0, + "learning_rate": 1.3339054630236265e-05, + "loss": 1.2646, + "step": 13960 + }, + { + "epoch": 0.4099183745375536, + "grad_norm": 0.0, + "learning_rate": 1.3338158233656182e-05, + "loss": 1.4238, + "step": 13961 + }, + { + "epoch": 0.4099477362146926, + "grad_norm": 0.0, + "learning_rate": 1.333726180688828e-05, + "loss": 1.3809, + "step": 13962 + }, + { + "epoch": 0.40997709789183157, + "grad_norm": 0.0, + "learning_rate": 1.3336365349940663e-05, + "loss": 1.4102, + "step": 13963 + }, + { + "epoch": 0.4100064595689706, + "grad_norm": 0.0, + "learning_rate": 1.3335468862821439e-05, + "loss": 1.3525, + "step": 13964 + }, + { + "epoch": 0.4100358212461096, + "grad_norm": 0.0, + "learning_rate": 1.3334572345538716e-05, + "loss": 1.1958, + "step": 13965 + }, + { + "epoch": 0.41006518292324856, + "grad_norm": 0.0, + "learning_rate": 1.3333675798100603e-05, + "loss": 1.4141, + "step": 13966 + }, + { + "epoch": 0.4100945446003876, + "grad_norm": 0.0, + "learning_rate": 1.3332779220515204e-05, + "loss": 1.3428, + "step": 13967 + }, + { + "epoch": 0.4101239062775266, + "grad_norm": 0.0, + "learning_rate": 1.3331882612790628e-05, + "loss": 1.3965, + "step": 13968 + }, + { + "epoch": 0.41015326795466556, + "grad_norm": 0.0, + "learning_rate": 1.3330985974934987e-05, + "loss": 1.3096, + "step": 13969 + }, + { + "epoch": 0.4101826296318046, + "grad_norm": 0.0, + "learning_rate": 1.3330089306956386e-05, + "loss": 1.25, + "step": 13970 + }, + { + "epoch": 0.4102119913089436, + "grad_norm": 0.0, + "learning_rate": 1.3329192608862933e-05, + "loss": 1.3428, + "step": 13971 + }, + { + "epoch": 0.41024135298608255, + "grad_norm": 0.0, + "learning_rate": 1.3328295880662743e-05, + "loss": 1.2163, + "step": 13972 + }, + { + "epoch": 0.41027071466322157, + "grad_norm": 0.0, + "learning_rate": 1.3327399122363915e-05, + "loss": 1.3848, + "step": 13973 + }, + { + "epoch": 0.4103000763403606, + "grad_norm": 0.0, + "learning_rate": 1.3326502333974567e-05, + "loss": 1.4316, + "step": 13974 + }, + { + "epoch": 0.41032943801749955, + "grad_norm": 0.0, + "learning_rate": 1.3325605515502809e-05, + "loss": 1.3867, + "step": 13975 + }, + { + "epoch": 0.41035879969463857, + "grad_norm": 0.0, + "learning_rate": 1.3324708666956744e-05, + "loss": 1.3691, + "step": 13976 + }, + { + "epoch": 0.4103881613717776, + "grad_norm": 0.0, + "learning_rate": 1.332381178834449e-05, + "loss": 1.4346, + "step": 13977 + }, + { + "epoch": 0.41041752304891654, + "grad_norm": 0.0, + "learning_rate": 1.3322914879674155e-05, + "loss": 1.2432, + "step": 13978 + }, + { + "epoch": 0.41044688472605556, + "grad_norm": 0.0, + "learning_rate": 1.3322017940953848e-05, + "loss": 1.3809, + "step": 13979 + }, + { + "epoch": 0.4104762464031946, + "grad_norm": 0.0, + "learning_rate": 1.3321120972191684e-05, + "loss": 1.3926, + "step": 13980 + }, + { + "epoch": 0.41050560808033354, + "grad_norm": 0.0, + "learning_rate": 1.3320223973395771e-05, + "loss": 1.3418, + "step": 13981 + }, + { + "epoch": 0.41053496975747256, + "grad_norm": 0.0, + "learning_rate": 1.3319326944574225e-05, + "loss": 1.2881, + "step": 13982 + }, + { + "epoch": 0.4105643314346115, + "grad_norm": 0.0, + "learning_rate": 1.3318429885735154e-05, + "loss": 1.3203, + "step": 13983 + }, + { + "epoch": 0.41059369311175054, + "grad_norm": 0.0, + "learning_rate": 1.3317532796886669e-05, + "loss": 1.3711, + "step": 13984 + }, + { + "epoch": 0.41062305478888955, + "grad_norm": 0.0, + "learning_rate": 1.3316635678036887e-05, + "loss": 1.4326, + "step": 13985 + }, + { + "epoch": 0.4106524164660285, + "grad_norm": 0.0, + "learning_rate": 1.3315738529193918e-05, + "loss": 1.3887, + "step": 13986 + }, + { + "epoch": 0.41068177814316753, + "grad_norm": 0.0, + "learning_rate": 1.3314841350365879e-05, + "loss": 1.2671, + "step": 13987 + }, + { + "epoch": 0.41071113982030655, + "grad_norm": 0.0, + "learning_rate": 1.3313944141560878e-05, + "loss": 1.3008, + "step": 13988 + }, + { + "epoch": 0.4107405014974455, + "grad_norm": 0.0, + "learning_rate": 1.3313046902787033e-05, + "loss": 1.3721, + "step": 13989 + }, + { + "epoch": 0.4107698631745845, + "grad_norm": 0.0, + "learning_rate": 1.3312149634052455e-05, + "loss": 1.2754, + "step": 13990 + }, + { + "epoch": 0.41079922485172354, + "grad_norm": 0.0, + "learning_rate": 1.3311252335365261e-05, + "loss": 1.4438, + "step": 13991 + }, + { + "epoch": 0.4108285865288625, + "grad_norm": 0.0, + "learning_rate": 1.3310355006733567e-05, + "loss": 1.3457, + "step": 13992 + }, + { + "epoch": 0.4108579482060015, + "grad_norm": 0.0, + "learning_rate": 1.330945764816548e-05, + "loss": 1.4355, + "step": 13993 + }, + { + "epoch": 0.41088730988314054, + "grad_norm": 0.0, + "learning_rate": 1.3308560259669121e-05, + "loss": 1.4434, + "step": 13994 + }, + { + "epoch": 0.4109166715602795, + "grad_norm": 0.0, + "learning_rate": 1.3307662841252607e-05, + "loss": 1.4326, + "step": 13995 + }, + { + "epoch": 0.4109460332374185, + "grad_norm": 0.0, + "learning_rate": 1.3306765392924046e-05, + "loss": 1.5459, + "step": 13996 + }, + { + "epoch": 0.41097539491455753, + "grad_norm": 0.0, + "learning_rate": 1.3305867914691561e-05, + "loss": 1.3789, + "step": 13997 + }, + { + "epoch": 0.4110047565916965, + "grad_norm": 0.0, + "learning_rate": 1.3304970406563267e-05, + "loss": 1.459, + "step": 13998 + }, + { + "epoch": 0.4110341182688355, + "grad_norm": 0.0, + "learning_rate": 1.3304072868547278e-05, + "loss": 1.4307, + "step": 13999 + }, + { + "epoch": 0.41106347994597453, + "grad_norm": 0.0, + "learning_rate": 1.3303175300651712e-05, + "loss": 1.1841, + "step": 14000 + }, + { + "epoch": 0.4110928416231135, + "grad_norm": 0.0, + "learning_rate": 1.3302277702884686e-05, + "loss": 1.3906, + "step": 14001 + }, + { + "epoch": 0.4111222033002525, + "grad_norm": 0.0, + "learning_rate": 1.3301380075254318e-05, + "loss": 1.1846, + "step": 14002 + }, + { + "epoch": 0.4111515649773915, + "grad_norm": 0.0, + "learning_rate": 1.3300482417768725e-05, + "loss": 1.21, + "step": 14003 + }, + { + "epoch": 0.4111809266545305, + "grad_norm": 0.0, + "learning_rate": 1.3299584730436021e-05, + "loss": 1.5234, + "step": 14004 + }, + { + "epoch": 0.4112102883316695, + "grad_norm": 0.0, + "learning_rate": 1.3298687013264329e-05, + "loss": 1.3115, + "step": 14005 + }, + { + "epoch": 0.4112396500088085, + "grad_norm": 0.0, + "learning_rate": 1.3297789266261764e-05, + "loss": 1.3223, + "step": 14006 + }, + { + "epoch": 0.4112690116859475, + "grad_norm": 0.0, + "learning_rate": 1.329689148943645e-05, + "loss": 1.4189, + "step": 14007 + }, + { + "epoch": 0.4112983733630865, + "grad_norm": 0.0, + "learning_rate": 1.3295993682796498e-05, + "loss": 1.4297, + "step": 14008 + }, + { + "epoch": 0.4113277350402255, + "grad_norm": 0.0, + "learning_rate": 1.3295095846350033e-05, + "loss": 1.3418, + "step": 14009 + }, + { + "epoch": 0.4113570967173645, + "grad_norm": 0.0, + "learning_rate": 1.329419798010517e-05, + "loss": 1.3701, + "step": 14010 + }, + { + "epoch": 0.4113864583945035, + "grad_norm": 0.0, + "learning_rate": 1.3293300084070033e-05, + "loss": 1.3662, + "step": 14011 + }, + { + "epoch": 0.4114158200716425, + "grad_norm": 0.0, + "learning_rate": 1.3292402158252741e-05, + "loss": 1.1548, + "step": 14012 + }, + { + "epoch": 0.4114451817487815, + "grad_norm": 0.0, + "learning_rate": 1.3291504202661414e-05, + "loss": 1.4736, + "step": 14013 + }, + { + "epoch": 0.4114745434259205, + "grad_norm": 0.0, + "learning_rate": 1.3290606217304167e-05, + "loss": 1.4092, + "step": 14014 + }, + { + "epoch": 0.4115039051030595, + "grad_norm": 0.0, + "learning_rate": 1.3289708202189129e-05, + "loss": 1.3896, + "step": 14015 + }, + { + "epoch": 0.41153326678019847, + "grad_norm": 0.0, + "learning_rate": 1.3288810157324416e-05, + "loss": 1.3936, + "step": 14016 + }, + { + "epoch": 0.4115626284573375, + "grad_norm": 0.0, + "learning_rate": 1.3287912082718149e-05, + "loss": 1.3535, + "step": 14017 + }, + { + "epoch": 0.4115919901344765, + "grad_norm": 0.0, + "learning_rate": 1.328701397837845e-05, + "loss": 1.271, + "step": 14018 + }, + { + "epoch": 0.41162135181161547, + "grad_norm": 0.0, + "learning_rate": 1.3286115844313447e-05, + "loss": 1.3047, + "step": 14019 + }, + { + "epoch": 0.4116507134887545, + "grad_norm": 0.0, + "learning_rate": 1.3285217680531253e-05, + "loss": 1.3096, + "step": 14020 + }, + { + "epoch": 0.4116800751658935, + "grad_norm": 0.0, + "learning_rate": 1.3284319487039995e-05, + "loss": 1.4111, + "step": 14021 + }, + { + "epoch": 0.41170943684303246, + "grad_norm": 0.0, + "learning_rate": 1.3283421263847795e-05, + "loss": 1.2588, + "step": 14022 + }, + { + "epoch": 0.4117387985201715, + "grad_norm": 0.0, + "learning_rate": 1.3282523010962776e-05, + "loss": 1.3555, + "step": 14023 + }, + { + "epoch": 0.4117681601973105, + "grad_norm": 0.0, + "learning_rate": 1.3281624728393061e-05, + "loss": 1.3867, + "step": 14024 + }, + { + "epoch": 0.41179752187444946, + "grad_norm": 0.0, + "learning_rate": 1.3280726416146771e-05, + "loss": 1.3354, + "step": 14025 + }, + { + "epoch": 0.4118268835515885, + "grad_norm": 0.0, + "learning_rate": 1.3279828074232032e-05, + "loss": 1.4307, + "step": 14026 + }, + { + "epoch": 0.4118562452287275, + "grad_norm": 0.0, + "learning_rate": 1.3278929702656971e-05, + "loss": 1.418, + "step": 14027 + }, + { + "epoch": 0.41188560690586645, + "grad_norm": 0.0, + "learning_rate": 1.3278031301429705e-05, + "loss": 1.3574, + "step": 14028 + }, + { + "epoch": 0.41191496858300547, + "grad_norm": 0.0, + "learning_rate": 1.3277132870558365e-05, + "loss": 1.3594, + "step": 14029 + }, + { + "epoch": 0.4119443302601445, + "grad_norm": 0.0, + "learning_rate": 1.3276234410051072e-05, + "loss": 1.3633, + "step": 14030 + }, + { + "epoch": 0.41197369193728345, + "grad_norm": 0.0, + "learning_rate": 1.3275335919915955e-05, + "loss": 1.2925, + "step": 14031 + }, + { + "epoch": 0.41200305361442247, + "grad_norm": 0.0, + "learning_rate": 1.3274437400161135e-05, + "loss": 1.3965, + "step": 14032 + }, + { + "epoch": 0.4120324152915614, + "grad_norm": 0.0, + "learning_rate": 1.3273538850794739e-05, + "loss": 1.3516, + "step": 14033 + }, + { + "epoch": 0.41206177696870044, + "grad_norm": 0.0, + "learning_rate": 1.3272640271824889e-05, + "loss": 1.4434, + "step": 14034 + }, + { + "epoch": 0.41209113864583946, + "grad_norm": 0.0, + "learning_rate": 1.3271741663259716e-05, + "loss": 1.3311, + "step": 14035 + }, + { + "epoch": 0.4121205003229784, + "grad_norm": 0.0, + "learning_rate": 1.327084302510735e-05, + "loss": 1.3154, + "step": 14036 + }, + { + "epoch": 0.41214986200011744, + "grad_norm": 0.0, + "learning_rate": 1.3269944357375908e-05, + "loss": 1.3857, + "step": 14037 + }, + { + "epoch": 0.41217922367725646, + "grad_norm": 0.0, + "learning_rate": 1.3269045660073524e-05, + "loss": 1.2354, + "step": 14038 + }, + { + "epoch": 0.4122085853543954, + "grad_norm": 0.0, + "learning_rate": 1.3268146933208321e-05, + "loss": 1.2124, + "step": 14039 + }, + { + "epoch": 0.41223794703153444, + "grad_norm": 0.0, + "learning_rate": 1.3267248176788433e-05, + "loss": 1.2666, + "step": 14040 + }, + { + "epoch": 0.41226730870867345, + "grad_norm": 0.0, + "learning_rate": 1.326634939082198e-05, + "loss": 1.332, + "step": 14041 + }, + { + "epoch": 0.4122966703858124, + "grad_norm": 0.0, + "learning_rate": 1.3265450575317092e-05, + "loss": 1.4043, + "step": 14042 + }, + { + "epoch": 0.41232603206295143, + "grad_norm": 0.0, + "learning_rate": 1.3264551730281898e-05, + "loss": 1.3037, + "step": 14043 + }, + { + "epoch": 0.41235539374009045, + "grad_norm": 0.0, + "learning_rate": 1.3263652855724529e-05, + "loss": 1.4023, + "step": 14044 + }, + { + "epoch": 0.4123847554172294, + "grad_norm": 0.0, + "learning_rate": 1.326275395165311e-05, + "loss": 1.3208, + "step": 14045 + }, + { + "epoch": 0.4124141170943684, + "grad_norm": 0.0, + "learning_rate": 1.3261855018075769e-05, + "loss": 1.3789, + "step": 14046 + }, + { + "epoch": 0.41244347877150744, + "grad_norm": 0.0, + "learning_rate": 1.326095605500064e-05, + "loss": 1.3813, + "step": 14047 + }, + { + "epoch": 0.4124728404486464, + "grad_norm": 0.0, + "learning_rate": 1.3260057062435852e-05, + "loss": 1.189, + "step": 14048 + }, + { + "epoch": 0.4125022021257854, + "grad_norm": 0.0, + "learning_rate": 1.325915804038953e-05, + "loss": 1.3857, + "step": 14049 + }, + { + "epoch": 0.41253156380292444, + "grad_norm": 0.0, + "learning_rate": 1.3258258988869808e-05, + "loss": 1.4502, + "step": 14050 + }, + { + "epoch": 0.4125609254800634, + "grad_norm": 0.0, + "learning_rate": 1.3257359907884816e-05, + "loss": 1.334, + "step": 14051 + }, + { + "epoch": 0.4125902871572024, + "grad_norm": 0.0, + "learning_rate": 1.3256460797442685e-05, + "loss": 1.3389, + "step": 14052 + }, + { + "epoch": 0.41261964883434143, + "grad_norm": 0.0, + "learning_rate": 1.3255561657551544e-05, + "loss": 1.4248, + "step": 14053 + }, + { + "epoch": 0.4126490105114804, + "grad_norm": 0.0, + "learning_rate": 1.3254662488219524e-05, + "loss": 1.2871, + "step": 14054 + }, + { + "epoch": 0.4126783721886194, + "grad_norm": 0.0, + "learning_rate": 1.325376328945476e-05, + "loss": 1.4062, + "step": 14055 + }, + { + "epoch": 0.41270773386575843, + "grad_norm": 0.0, + "learning_rate": 1.3252864061265381e-05, + "loss": 1.29, + "step": 14056 + }, + { + "epoch": 0.4127370955428974, + "grad_norm": 0.0, + "learning_rate": 1.3251964803659517e-05, + "loss": 1.2466, + "step": 14057 + }, + { + "epoch": 0.4127664572200364, + "grad_norm": 0.0, + "learning_rate": 1.3251065516645303e-05, + "loss": 1.3496, + "step": 14058 + }, + { + "epoch": 0.4127958188971754, + "grad_norm": 0.0, + "learning_rate": 1.3250166200230872e-05, + "loss": 1.3506, + "step": 14059 + }, + { + "epoch": 0.4128251805743144, + "grad_norm": 0.0, + "learning_rate": 1.3249266854424356e-05, + "loss": 1.4297, + "step": 14060 + }, + { + "epoch": 0.4128545422514534, + "grad_norm": 0.0, + "learning_rate": 1.3248367479233887e-05, + "loss": 1.3584, + "step": 14061 + }, + { + "epoch": 0.4128839039285924, + "grad_norm": 0.0, + "learning_rate": 1.32474680746676e-05, + "loss": 1.4629, + "step": 14062 + }, + { + "epoch": 0.4129132656057314, + "grad_norm": 0.0, + "learning_rate": 1.3246568640733626e-05, + "loss": 1.3711, + "step": 14063 + }, + { + "epoch": 0.4129426272828704, + "grad_norm": 0.0, + "learning_rate": 1.3245669177440098e-05, + "loss": 1.3623, + "step": 14064 + }, + { + "epoch": 0.4129719889600094, + "grad_norm": 0.0, + "learning_rate": 1.3244769684795158e-05, + "loss": 1.4053, + "step": 14065 + }, + { + "epoch": 0.4130013506371484, + "grad_norm": 0.0, + "learning_rate": 1.324387016280693e-05, + "loss": 1.2646, + "step": 14066 + }, + { + "epoch": 0.4130307123142874, + "grad_norm": 0.0, + "learning_rate": 1.3242970611483557e-05, + "loss": 1.3467, + "step": 14067 + }, + { + "epoch": 0.4130600739914264, + "grad_norm": 0.0, + "learning_rate": 1.324207103083317e-05, + "loss": 1.3379, + "step": 14068 + }, + { + "epoch": 0.4130894356685654, + "grad_norm": 0.0, + "learning_rate": 1.32411714208639e-05, + "loss": 1.2437, + "step": 14069 + }, + { + "epoch": 0.4131187973457044, + "grad_norm": 0.0, + "learning_rate": 1.324027178158389e-05, + "loss": 1.2715, + "step": 14070 + }, + { + "epoch": 0.4131481590228434, + "grad_norm": 0.0, + "learning_rate": 1.3239372113001274e-05, + "loss": 1.4736, + "step": 14071 + }, + { + "epoch": 0.41317752069998237, + "grad_norm": 0.0, + "learning_rate": 1.3238472415124183e-05, + "loss": 1.3662, + "step": 14072 + }, + { + "epoch": 0.4132068823771214, + "grad_norm": 0.0, + "learning_rate": 1.323757268796076e-05, + "loss": 1.3447, + "step": 14073 + }, + { + "epoch": 0.4132362440542604, + "grad_norm": 0.0, + "learning_rate": 1.3236672931519134e-05, + "loss": 1.2764, + "step": 14074 + }, + { + "epoch": 0.41326560573139937, + "grad_norm": 0.0, + "learning_rate": 1.323577314580745e-05, + "loss": 1.251, + "step": 14075 + }, + { + "epoch": 0.4132949674085384, + "grad_norm": 0.0, + "learning_rate": 1.3234873330833838e-05, + "loss": 1.3535, + "step": 14076 + }, + { + "epoch": 0.4133243290856774, + "grad_norm": 0.0, + "learning_rate": 1.3233973486606439e-05, + "loss": 1.3232, + "step": 14077 + }, + { + "epoch": 0.41335369076281636, + "grad_norm": 0.0, + "learning_rate": 1.3233073613133388e-05, + "loss": 1.437, + "step": 14078 + }, + { + "epoch": 0.4133830524399554, + "grad_norm": 0.0, + "learning_rate": 1.3232173710422824e-05, + "loss": 1.3652, + "step": 14079 + }, + { + "epoch": 0.4134124141170944, + "grad_norm": 0.0, + "learning_rate": 1.3231273778482889e-05, + "loss": 1.3252, + "step": 14080 + }, + { + "epoch": 0.41344177579423336, + "grad_norm": 0.0, + "learning_rate": 1.3230373817321716e-05, + "loss": 1.3789, + "step": 14081 + }, + { + "epoch": 0.4134711374713724, + "grad_norm": 0.0, + "learning_rate": 1.3229473826947445e-05, + "loss": 1.3838, + "step": 14082 + }, + { + "epoch": 0.41350049914851134, + "grad_norm": 0.0, + "learning_rate": 1.3228573807368214e-05, + "loss": 1.4209, + "step": 14083 + }, + { + "epoch": 0.41352986082565035, + "grad_norm": 0.0, + "learning_rate": 1.3227673758592164e-05, + "loss": 1.4209, + "step": 14084 + }, + { + "epoch": 0.41355922250278937, + "grad_norm": 0.0, + "learning_rate": 1.3226773680627434e-05, + "loss": 1.2671, + "step": 14085 + }, + { + "epoch": 0.41358858417992833, + "grad_norm": 0.0, + "learning_rate": 1.3225873573482162e-05, + "loss": 1.1958, + "step": 14086 + }, + { + "epoch": 0.41361794585706735, + "grad_norm": 0.0, + "learning_rate": 1.322497343716449e-05, + "loss": 1.2847, + "step": 14087 + }, + { + "epoch": 0.41364730753420637, + "grad_norm": 0.0, + "learning_rate": 1.3224073271682556e-05, + "loss": 1.3823, + "step": 14088 + }, + { + "epoch": 0.4136766692113453, + "grad_norm": 0.0, + "learning_rate": 1.3223173077044504e-05, + "loss": 1.4561, + "step": 14089 + }, + { + "epoch": 0.41370603088848434, + "grad_norm": 0.0, + "learning_rate": 1.3222272853258474e-05, + "loss": 1.3213, + "step": 14090 + }, + { + "epoch": 0.41373539256562336, + "grad_norm": 0.0, + "learning_rate": 1.3221372600332601e-05, + "loss": 1.4053, + "step": 14091 + }, + { + "epoch": 0.4137647542427623, + "grad_norm": 0.0, + "learning_rate": 1.3220472318275032e-05, + "loss": 1.3662, + "step": 14092 + }, + { + "epoch": 0.41379411591990134, + "grad_norm": 0.0, + "learning_rate": 1.3219572007093907e-05, + "loss": 1.2842, + "step": 14093 + }, + { + "epoch": 0.41382347759704036, + "grad_norm": 0.0, + "learning_rate": 1.3218671666797369e-05, + "loss": 1.373, + "step": 14094 + }, + { + "epoch": 0.4138528392741793, + "grad_norm": 0.0, + "learning_rate": 1.3217771297393557e-05, + "loss": 1.5312, + "step": 14095 + }, + { + "epoch": 0.41388220095131834, + "grad_norm": 0.0, + "learning_rate": 1.3216870898890615e-05, + "loss": 1.457, + "step": 14096 + }, + { + "epoch": 0.41391156262845735, + "grad_norm": 0.0, + "learning_rate": 1.3215970471296689e-05, + "loss": 1.2988, + "step": 14097 + }, + { + "epoch": 0.4139409243055963, + "grad_norm": 0.0, + "learning_rate": 1.3215070014619915e-05, + "loss": 1.4023, + "step": 14098 + }, + { + "epoch": 0.41397028598273533, + "grad_norm": 0.0, + "learning_rate": 1.321416952886844e-05, + "loss": 1.4619, + "step": 14099 + }, + { + "epoch": 0.41399964765987435, + "grad_norm": 0.0, + "learning_rate": 1.321326901405041e-05, + "loss": 1.2163, + "step": 14100 + }, + { + "epoch": 0.4140290093370133, + "grad_norm": 0.0, + "learning_rate": 1.321236847017396e-05, + "loss": 1.4434, + "step": 14101 + }, + { + "epoch": 0.4140583710141523, + "grad_norm": 0.0, + "learning_rate": 1.3211467897247243e-05, + "loss": 1.335, + "step": 14102 + }, + { + "epoch": 0.41408773269129134, + "grad_norm": 0.0, + "learning_rate": 1.3210567295278398e-05, + "loss": 1.2705, + "step": 14103 + }, + { + "epoch": 0.4141170943684303, + "grad_norm": 0.0, + "learning_rate": 1.320966666427557e-05, + "loss": 1.3916, + "step": 14104 + }, + { + "epoch": 0.4141464560455693, + "grad_norm": 0.0, + "learning_rate": 1.3208766004246906e-05, + "loss": 1.3525, + "step": 14105 + }, + { + "epoch": 0.41417581772270834, + "grad_norm": 0.0, + "learning_rate": 1.3207865315200547e-05, + "loss": 1.2227, + "step": 14106 + }, + { + "epoch": 0.4142051793998473, + "grad_norm": 0.0, + "learning_rate": 1.3206964597144642e-05, + "loss": 1.3643, + "step": 14107 + }, + { + "epoch": 0.4142345410769863, + "grad_norm": 0.0, + "learning_rate": 1.3206063850087335e-05, + "loss": 1.334, + "step": 14108 + }, + { + "epoch": 0.41426390275412533, + "grad_norm": 0.0, + "learning_rate": 1.3205163074036774e-05, + "loss": 1.4678, + "step": 14109 + }, + { + "epoch": 0.4142932644312643, + "grad_norm": 0.0, + "learning_rate": 1.32042622690011e-05, + "loss": 1.6289, + "step": 14110 + }, + { + "epoch": 0.4143226261084033, + "grad_norm": 0.0, + "learning_rate": 1.320336143498846e-05, + "loss": 1.4424, + "step": 14111 + }, + { + "epoch": 0.41435198778554233, + "grad_norm": 0.0, + "learning_rate": 1.3202460572007002e-05, + "loss": 1.2891, + "step": 14112 + }, + { + "epoch": 0.4143813494626813, + "grad_norm": 0.0, + "learning_rate": 1.3201559680064874e-05, + "loss": 1.5684, + "step": 14113 + }, + { + "epoch": 0.4144107111398203, + "grad_norm": 0.0, + "learning_rate": 1.3200658759170224e-05, + "loss": 1.3779, + "step": 14114 + }, + { + "epoch": 0.4144400728169593, + "grad_norm": 0.0, + "learning_rate": 1.3199757809331195e-05, + "loss": 1.3867, + "step": 14115 + }, + { + "epoch": 0.4144694344940983, + "grad_norm": 0.0, + "learning_rate": 1.3198856830555937e-05, + "loss": 1.3672, + "step": 14116 + }, + { + "epoch": 0.4144987961712373, + "grad_norm": 0.0, + "learning_rate": 1.31979558228526e-05, + "loss": 1.3076, + "step": 14117 + }, + { + "epoch": 0.4145281578483763, + "grad_norm": 0.0, + "learning_rate": 1.3197054786229324e-05, + "loss": 1.3633, + "step": 14118 + }, + { + "epoch": 0.4145575195255153, + "grad_norm": 0.0, + "learning_rate": 1.3196153720694267e-05, + "loss": 1.4326, + "step": 14119 + }, + { + "epoch": 0.4145868812026543, + "grad_norm": 0.0, + "learning_rate": 1.3195252626255572e-05, + "loss": 1.4385, + "step": 14120 + }, + { + "epoch": 0.4146162428797933, + "grad_norm": 0.0, + "learning_rate": 1.319435150292139e-05, + "loss": 1.2822, + "step": 14121 + }, + { + "epoch": 0.4146456045569323, + "grad_norm": 0.0, + "learning_rate": 1.319345035069987e-05, + "loss": 1.2026, + "step": 14122 + }, + { + "epoch": 0.4146749662340713, + "grad_norm": 0.0, + "learning_rate": 1.3192549169599158e-05, + "loss": 1.3682, + "step": 14123 + }, + { + "epoch": 0.4147043279112103, + "grad_norm": 0.0, + "learning_rate": 1.3191647959627407e-05, + "loss": 1.3584, + "step": 14124 + }, + { + "epoch": 0.4147336895883493, + "grad_norm": 0.0, + "learning_rate": 1.3190746720792766e-05, + "loss": 1.4326, + "step": 14125 + }, + { + "epoch": 0.4147630512654883, + "grad_norm": 0.0, + "learning_rate": 1.3189845453103387e-05, + "loss": 1.4346, + "step": 14126 + }, + { + "epoch": 0.4147924129426273, + "grad_norm": 0.0, + "learning_rate": 1.3188944156567419e-05, + "loss": 1.2969, + "step": 14127 + }, + { + "epoch": 0.41482177461976627, + "grad_norm": 0.0, + "learning_rate": 1.3188042831193009e-05, + "loss": 1.4072, + "step": 14128 + }, + { + "epoch": 0.4148511362969053, + "grad_norm": 0.0, + "learning_rate": 1.3187141476988312e-05, + "loss": 1.3857, + "step": 14129 + }, + { + "epoch": 0.4148804979740443, + "grad_norm": 0.0, + "learning_rate": 1.3186240093961482e-05, + "loss": 1.4629, + "step": 14130 + }, + { + "epoch": 0.41490985965118327, + "grad_norm": 0.0, + "learning_rate": 1.3185338682120663e-05, + "loss": 1.3438, + "step": 14131 + }, + { + "epoch": 0.4149392213283223, + "grad_norm": 0.0, + "learning_rate": 1.3184437241474011e-05, + "loss": 1.2969, + "step": 14132 + }, + { + "epoch": 0.41496858300546124, + "grad_norm": 0.0, + "learning_rate": 1.3183535772029678e-05, + "loss": 1.4043, + "step": 14133 + }, + { + "epoch": 0.41499794468260026, + "grad_norm": 0.0, + "learning_rate": 1.3182634273795819e-05, + "loss": 1.4238, + "step": 14134 + }, + { + "epoch": 0.4150273063597393, + "grad_norm": 0.0, + "learning_rate": 1.3181732746780578e-05, + "loss": 1.3369, + "step": 14135 + }, + { + "epoch": 0.41505666803687824, + "grad_norm": 0.0, + "learning_rate": 1.3180831190992114e-05, + "loss": 1.457, + "step": 14136 + }, + { + "epoch": 0.41508602971401726, + "grad_norm": 0.0, + "learning_rate": 1.317992960643858e-05, + "loss": 1.4199, + "step": 14137 + }, + { + "epoch": 0.4151153913911563, + "grad_norm": 0.0, + "learning_rate": 1.3179027993128129e-05, + "loss": 1.2529, + "step": 14138 + }, + { + "epoch": 0.41514475306829524, + "grad_norm": 0.0, + "learning_rate": 1.3178126351068913e-05, + "loss": 1.4014, + "step": 14139 + }, + { + "epoch": 0.41517411474543425, + "grad_norm": 0.0, + "learning_rate": 1.3177224680269088e-05, + "loss": 1.332, + "step": 14140 + }, + { + "epoch": 0.41520347642257327, + "grad_norm": 0.0, + "learning_rate": 1.3176322980736801e-05, + "loss": 1.2588, + "step": 14141 + }, + { + "epoch": 0.41523283809971223, + "grad_norm": 0.0, + "learning_rate": 1.3175421252480218e-05, + "loss": 1.3398, + "step": 14142 + }, + { + "epoch": 0.41526219977685125, + "grad_norm": 0.0, + "learning_rate": 1.3174519495507485e-05, + "loss": 1.3848, + "step": 14143 + }, + { + "epoch": 0.41529156145399027, + "grad_norm": 0.0, + "learning_rate": 1.3173617709826758e-05, + "loss": 1.3789, + "step": 14144 + }, + { + "epoch": 0.4153209231311292, + "grad_norm": 0.0, + "learning_rate": 1.3172715895446194e-05, + "loss": 1.2207, + "step": 14145 + }, + { + "epoch": 0.41535028480826824, + "grad_norm": 0.0, + "learning_rate": 1.317181405237395e-05, + "loss": 1.3545, + "step": 14146 + }, + { + "epoch": 0.41537964648540726, + "grad_norm": 0.0, + "learning_rate": 1.3170912180618176e-05, + "loss": 1.3984, + "step": 14147 + }, + { + "epoch": 0.4154090081625462, + "grad_norm": 0.0, + "learning_rate": 1.3170010280187032e-05, + "loss": 1.3994, + "step": 14148 + }, + { + "epoch": 0.41543836983968524, + "grad_norm": 0.0, + "learning_rate": 1.3169108351088676e-05, + "loss": 1.3311, + "step": 14149 + }, + { + "epoch": 0.41546773151682426, + "grad_norm": 0.0, + "learning_rate": 1.3168206393331257e-05, + "loss": 1.3369, + "step": 14150 + }, + { + "epoch": 0.4154970931939632, + "grad_norm": 0.0, + "learning_rate": 1.316730440692294e-05, + "loss": 1.2959, + "step": 14151 + }, + { + "epoch": 0.41552645487110224, + "grad_norm": 0.0, + "learning_rate": 1.3166402391871875e-05, + "loss": 1.4189, + "step": 14152 + }, + { + "epoch": 0.41555581654824125, + "grad_norm": 0.0, + "learning_rate": 1.3165500348186221e-05, + "loss": 1.3799, + "step": 14153 + }, + { + "epoch": 0.4155851782253802, + "grad_norm": 0.0, + "learning_rate": 1.316459827587414e-05, + "loss": 1.2646, + "step": 14154 + }, + { + "epoch": 0.41561453990251923, + "grad_norm": 0.0, + "learning_rate": 1.3163696174943788e-05, + "loss": 1.3091, + "step": 14155 + }, + { + "epoch": 0.41564390157965825, + "grad_norm": 0.0, + "learning_rate": 1.3162794045403318e-05, + "loss": 1.3477, + "step": 14156 + }, + { + "epoch": 0.4156732632567972, + "grad_norm": 0.0, + "learning_rate": 1.3161891887260892e-05, + "loss": 1.3184, + "step": 14157 + }, + { + "epoch": 0.4157026249339362, + "grad_norm": 0.0, + "learning_rate": 1.3160989700524668e-05, + "loss": 1.3789, + "step": 14158 + }, + { + "epoch": 0.41573198661107524, + "grad_norm": 0.0, + "learning_rate": 1.3160087485202806e-05, + "loss": 1.4385, + "step": 14159 + }, + { + "epoch": 0.4157613482882142, + "grad_norm": 0.0, + "learning_rate": 1.3159185241303463e-05, + "loss": 1.2065, + "step": 14160 + }, + { + "epoch": 0.4157907099653532, + "grad_norm": 0.0, + "learning_rate": 1.3158282968834797e-05, + "loss": 1.4863, + "step": 14161 + }, + { + "epoch": 0.41582007164249224, + "grad_norm": 0.0, + "learning_rate": 1.315738066780497e-05, + "loss": 1.3467, + "step": 14162 + }, + { + "epoch": 0.4158494333196312, + "grad_norm": 0.0, + "learning_rate": 1.3156478338222143e-05, + "loss": 1.2402, + "step": 14163 + }, + { + "epoch": 0.4158787949967702, + "grad_norm": 0.0, + "learning_rate": 1.315557598009447e-05, + "loss": 1.4541, + "step": 14164 + }, + { + "epoch": 0.41590815667390924, + "grad_norm": 0.0, + "learning_rate": 1.3154673593430118e-05, + "loss": 1.2285, + "step": 14165 + }, + { + "epoch": 0.4159375183510482, + "grad_norm": 0.0, + "learning_rate": 1.3153771178237245e-05, + "loss": 1.3818, + "step": 14166 + }, + { + "epoch": 0.4159668800281872, + "grad_norm": 0.0, + "learning_rate": 1.3152868734524012e-05, + "loss": 1.3262, + "step": 14167 + }, + { + "epoch": 0.41599624170532623, + "grad_norm": 0.0, + "learning_rate": 1.3151966262298576e-05, + "loss": 1.417, + "step": 14168 + }, + { + "epoch": 0.4160256033824652, + "grad_norm": 0.0, + "learning_rate": 1.3151063761569107e-05, + "loss": 1.3047, + "step": 14169 + }, + { + "epoch": 0.4160549650596042, + "grad_norm": 0.0, + "learning_rate": 1.3150161232343758e-05, + "loss": 1.2646, + "step": 14170 + }, + { + "epoch": 0.4160843267367432, + "grad_norm": 0.0, + "learning_rate": 1.3149258674630697e-05, + "loss": 1.3193, + "step": 14171 + }, + { + "epoch": 0.4161136884138822, + "grad_norm": 0.0, + "learning_rate": 1.3148356088438079e-05, + "loss": 1.3623, + "step": 14172 + }, + { + "epoch": 0.4161430500910212, + "grad_norm": 0.0, + "learning_rate": 1.3147453473774073e-05, + "loss": 1.4541, + "step": 14173 + }, + { + "epoch": 0.4161724117681602, + "grad_norm": 0.0, + "learning_rate": 1.3146550830646838e-05, + "loss": 1.3691, + "step": 14174 + }, + { + "epoch": 0.4162017734452992, + "grad_norm": 0.0, + "learning_rate": 1.3145648159064542e-05, + "loss": 1.3779, + "step": 14175 + }, + { + "epoch": 0.4162311351224382, + "grad_norm": 0.0, + "learning_rate": 1.314474545903534e-05, + "loss": 1.4492, + "step": 14176 + }, + { + "epoch": 0.4162604967995772, + "grad_norm": 0.0, + "learning_rate": 1.31438427305674e-05, + "loss": 1.3682, + "step": 14177 + }, + { + "epoch": 0.4162898584767162, + "grad_norm": 0.0, + "learning_rate": 1.3142939973668885e-05, + "loss": 1.29, + "step": 14178 + }, + { + "epoch": 0.4163192201538552, + "grad_norm": 0.0, + "learning_rate": 1.314203718834796e-05, + "loss": 1.1821, + "step": 14179 + }, + { + "epoch": 0.4163485818309942, + "grad_norm": 0.0, + "learning_rate": 1.314113437461279e-05, + "loss": 1.2959, + "step": 14180 + }, + { + "epoch": 0.4163779435081332, + "grad_norm": 0.0, + "learning_rate": 1.3140231532471533e-05, + "loss": 1.4346, + "step": 14181 + }, + { + "epoch": 0.4164073051852722, + "grad_norm": 0.0, + "learning_rate": 1.3139328661932358e-05, + "loss": 1.2617, + "step": 14182 + }, + { + "epoch": 0.41643666686241115, + "grad_norm": 0.0, + "learning_rate": 1.3138425763003435e-05, + "loss": 1.2217, + "step": 14183 + }, + { + "epoch": 0.41646602853955017, + "grad_norm": 0.0, + "learning_rate": 1.313752283569292e-05, + "loss": 1.3828, + "step": 14184 + }, + { + "epoch": 0.4164953902166892, + "grad_norm": 0.0, + "learning_rate": 1.3136619880008983e-05, + "loss": 1.3965, + "step": 14185 + }, + { + "epoch": 0.41652475189382815, + "grad_norm": 0.0, + "learning_rate": 1.3135716895959787e-05, + "loss": 1.4619, + "step": 14186 + }, + { + "epoch": 0.41655411357096717, + "grad_norm": 0.0, + "learning_rate": 1.3134813883553505e-05, + "loss": 1.3701, + "step": 14187 + }, + { + "epoch": 0.4165834752481062, + "grad_norm": 0.0, + "learning_rate": 1.3133910842798295e-05, + "loss": 1.1582, + "step": 14188 + }, + { + "epoch": 0.41661283692524514, + "grad_norm": 0.0, + "learning_rate": 1.3133007773702329e-05, + "loss": 1.25, + "step": 14189 + }, + { + "epoch": 0.41664219860238416, + "grad_norm": 0.0, + "learning_rate": 1.313210467627377e-05, + "loss": 1.4238, + "step": 14190 + }, + { + "epoch": 0.4166715602795232, + "grad_norm": 0.0, + "learning_rate": 1.3131201550520785e-05, + "loss": 1.2773, + "step": 14191 + }, + { + "epoch": 0.41670092195666214, + "grad_norm": 0.0, + "learning_rate": 1.3130298396451545e-05, + "loss": 1.4346, + "step": 14192 + }, + { + "epoch": 0.41673028363380116, + "grad_norm": 0.0, + "learning_rate": 1.3129395214074213e-05, + "loss": 1.2842, + "step": 14193 + }, + { + "epoch": 0.4167596453109402, + "grad_norm": 0.0, + "learning_rate": 1.3128492003396956e-05, + "loss": 1.2773, + "step": 14194 + }, + { + "epoch": 0.41678900698807914, + "grad_norm": 0.0, + "learning_rate": 1.312758876442795e-05, + "loss": 1.332, + "step": 14195 + }, + { + "epoch": 0.41681836866521815, + "grad_norm": 0.0, + "learning_rate": 1.3126685497175355e-05, + "loss": 1.4043, + "step": 14196 + }, + { + "epoch": 0.41684773034235717, + "grad_norm": 0.0, + "learning_rate": 1.3125782201647341e-05, + "loss": 1.457, + "step": 14197 + }, + { + "epoch": 0.41687709201949613, + "grad_norm": 0.0, + "learning_rate": 1.3124878877852079e-05, + "loss": 1.4717, + "step": 14198 + }, + { + "epoch": 0.41690645369663515, + "grad_norm": 0.0, + "learning_rate": 1.3123975525797738e-05, + "loss": 1.3193, + "step": 14199 + }, + { + "epoch": 0.41693581537377417, + "grad_norm": 0.0, + "learning_rate": 1.3123072145492488e-05, + "loss": 1.3564, + "step": 14200 + }, + { + "epoch": 0.4169651770509131, + "grad_norm": 0.0, + "learning_rate": 1.3122168736944494e-05, + "loss": 1.2349, + "step": 14201 + }, + { + "epoch": 0.41699453872805214, + "grad_norm": 0.0, + "learning_rate": 1.3121265300161929e-05, + "loss": 1.4004, + "step": 14202 + }, + { + "epoch": 0.41702390040519116, + "grad_norm": 0.0, + "learning_rate": 1.3120361835152962e-05, + "loss": 1.2974, + "step": 14203 + }, + { + "epoch": 0.4170532620823301, + "grad_norm": 0.0, + "learning_rate": 1.3119458341925766e-05, + "loss": 1.3828, + "step": 14204 + }, + { + "epoch": 0.41708262375946914, + "grad_norm": 0.0, + "learning_rate": 1.3118554820488508e-05, + "loss": 1.291, + "step": 14205 + }, + { + "epoch": 0.41711198543660816, + "grad_norm": 0.0, + "learning_rate": 1.3117651270849358e-05, + "loss": 1.3853, + "step": 14206 + }, + { + "epoch": 0.4171413471137471, + "grad_norm": 0.0, + "learning_rate": 1.3116747693016494e-05, + "loss": 1.4404, + "step": 14207 + }, + { + "epoch": 0.41717070879088614, + "grad_norm": 0.0, + "learning_rate": 1.3115844086998079e-05, + "loss": 1.3945, + "step": 14208 + }, + { + "epoch": 0.41720007046802515, + "grad_norm": 0.0, + "learning_rate": 1.3114940452802289e-05, + "loss": 1.3086, + "step": 14209 + }, + { + "epoch": 0.4172294321451641, + "grad_norm": 0.0, + "learning_rate": 1.3114036790437293e-05, + "loss": 1.3535, + "step": 14210 + }, + { + "epoch": 0.41725879382230313, + "grad_norm": 0.0, + "learning_rate": 1.3113133099911267e-05, + "loss": 1.4619, + "step": 14211 + }, + { + "epoch": 0.41728815549944215, + "grad_norm": 0.0, + "learning_rate": 1.311222938123238e-05, + "loss": 1.3125, + "step": 14212 + }, + { + "epoch": 0.4173175171765811, + "grad_norm": 0.0, + "learning_rate": 1.3111325634408805e-05, + "loss": 1.4014, + "step": 14213 + }, + { + "epoch": 0.4173468788537201, + "grad_norm": 0.0, + "learning_rate": 1.3110421859448717e-05, + "loss": 1.3496, + "step": 14214 + }, + { + "epoch": 0.41737624053085914, + "grad_norm": 0.0, + "learning_rate": 1.3109518056360284e-05, + "loss": 1.3164, + "step": 14215 + }, + { + "epoch": 0.4174056022079981, + "grad_norm": 0.0, + "learning_rate": 1.3108614225151688e-05, + "loss": 1.2529, + "step": 14216 + }, + { + "epoch": 0.4174349638851371, + "grad_norm": 0.0, + "learning_rate": 1.3107710365831094e-05, + "loss": 1.2461, + "step": 14217 + }, + { + "epoch": 0.41746432556227614, + "grad_norm": 0.0, + "learning_rate": 1.3106806478406681e-05, + "loss": 1.2676, + "step": 14218 + }, + { + "epoch": 0.4174936872394151, + "grad_norm": 0.0, + "learning_rate": 1.3105902562886619e-05, + "loss": 1.1885, + "step": 14219 + }, + { + "epoch": 0.4175230489165541, + "grad_norm": 0.0, + "learning_rate": 1.3104998619279088e-05, + "loss": 1.3662, + "step": 14220 + }, + { + "epoch": 0.41755241059369314, + "grad_norm": 0.0, + "learning_rate": 1.3104094647592257e-05, + "loss": 1.4043, + "step": 14221 + }, + { + "epoch": 0.4175817722708321, + "grad_norm": 0.0, + "learning_rate": 1.3103190647834303e-05, + "loss": 1.2119, + "step": 14222 + }, + { + "epoch": 0.4176111339479711, + "grad_norm": 0.0, + "learning_rate": 1.3102286620013401e-05, + "loss": 1.3027, + "step": 14223 + }, + { + "epoch": 0.41764049562511013, + "grad_norm": 0.0, + "learning_rate": 1.310138256413773e-05, + "loss": 1.2715, + "step": 14224 + }, + { + "epoch": 0.4176698573022491, + "grad_norm": 0.0, + "learning_rate": 1.3100478480215459e-05, + "loss": 1.2451, + "step": 14225 + }, + { + "epoch": 0.4176992189793881, + "grad_norm": 0.0, + "learning_rate": 1.3099574368254766e-05, + "loss": 1.3516, + "step": 14226 + }, + { + "epoch": 0.4177285806565271, + "grad_norm": 0.0, + "learning_rate": 1.309867022826383e-05, + "loss": 1.457, + "step": 14227 + }, + { + "epoch": 0.4177579423336661, + "grad_norm": 0.0, + "learning_rate": 1.3097766060250828e-05, + "loss": 1.3457, + "step": 14228 + }, + { + "epoch": 0.4177873040108051, + "grad_norm": 0.0, + "learning_rate": 1.3096861864223931e-05, + "loss": 1.3252, + "step": 14229 + }, + { + "epoch": 0.4178166656879441, + "grad_norm": 0.0, + "learning_rate": 1.3095957640191319e-05, + "loss": 1.3301, + "step": 14230 + }, + { + "epoch": 0.4178460273650831, + "grad_norm": 0.0, + "learning_rate": 1.309505338816117e-05, + "loss": 1.335, + "step": 14231 + }, + { + "epoch": 0.4178753890422221, + "grad_norm": 0.0, + "learning_rate": 1.3094149108141663e-05, + "loss": 1.3086, + "step": 14232 + }, + { + "epoch": 0.41790475071936106, + "grad_norm": 0.0, + "learning_rate": 1.3093244800140967e-05, + "loss": 1.457, + "step": 14233 + }, + { + "epoch": 0.4179341123965001, + "grad_norm": 0.0, + "learning_rate": 1.309234046416727e-05, + "loss": 1.2891, + "step": 14234 + }, + { + "epoch": 0.4179634740736391, + "grad_norm": 0.0, + "learning_rate": 1.3091436100228744e-05, + "loss": 1.4268, + "step": 14235 + }, + { + "epoch": 0.41799283575077806, + "grad_norm": 0.0, + "learning_rate": 1.3090531708333573e-05, + "loss": 1.2998, + "step": 14236 + }, + { + "epoch": 0.4180221974279171, + "grad_norm": 0.0, + "learning_rate": 1.3089627288489933e-05, + "loss": 1.4043, + "step": 14237 + }, + { + "epoch": 0.4180515591050561, + "grad_norm": 0.0, + "learning_rate": 1.3088722840706002e-05, + "loss": 1.4131, + "step": 14238 + }, + { + "epoch": 0.41808092078219505, + "grad_norm": 0.0, + "learning_rate": 1.3087818364989954e-05, + "loss": 1.4697, + "step": 14239 + }, + { + "epoch": 0.41811028245933407, + "grad_norm": 0.0, + "learning_rate": 1.3086913861349976e-05, + "loss": 1.2988, + "step": 14240 + }, + { + "epoch": 0.4181396441364731, + "grad_norm": 0.0, + "learning_rate": 1.308600932979425e-05, + "loss": 1.458, + "step": 14241 + }, + { + "epoch": 0.41816900581361205, + "grad_norm": 0.0, + "learning_rate": 1.3085104770330945e-05, + "loss": 1.2617, + "step": 14242 + }, + { + "epoch": 0.41819836749075107, + "grad_norm": 0.0, + "learning_rate": 1.308420018296825e-05, + "loss": 1.2881, + "step": 14243 + }, + { + "epoch": 0.4182277291678901, + "grad_norm": 0.0, + "learning_rate": 1.3083295567714343e-05, + "loss": 1.4014, + "step": 14244 + }, + { + "epoch": 0.41825709084502904, + "grad_norm": 0.0, + "learning_rate": 1.3082390924577405e-05, + "loss": 1.3096, + "step": 14245 + }, + { + "epoch": 0.41828645252216806, + "grad_norm": 0.0, + "learning_rate": 1.3081486253565616e-05, + "loss": 1.1919, + "step": 14246 + }, + { + "epoch": 0.4183158141993071, + "grad_norm": 0.0, + "learning_rate": 1.3080581554687157e-05, + "loss": 1.2734, + "step": 14247 + }, + { + "epoch": 0.41834517587644604, + "grad_norm": 0.0, + "learning_rate": 1.3079676827950212e-05, + "loss": 1.2686, + "step": 14248 + }, + { + "epoch": 0.41837453755358506, + "grad_norm": 0.0, + "learning_rate": 1.307877207336296e-05, + "loss": 1.3652, + "step": 14249 + }, + { + "epoch": 0.4184038992307241, + "grad_norm": 0.0, + "learning_rate": 1.3077867290933583e-05, + "loss": 1.3926, + "step": 14250 + }, + { + "epoch": 0.41843326090786304, + "grad_norm": 0.0, + "learning_rate": 1.3076962480670264e-05, + "loss": 1.4531, + "step": 14251 + }, + { + "epoch": 0.41846262258500205, + "grad_norm": 0.0, + "learning_rate": 1.3076057642581184e-05, + "loss": 1.3125, + "step": 14252 + }, + { + "epoch": 0.41849198426214107, + "grad_norm": 0.0, + "learning_rate": 1.3075152776674528e-05, + "loss": 1.3467, + "step": 14253 + }, + { + "epoch": 0.41852134593928003, + "grad_norm": 0.0, + "learning_rate": 1.3074247882958477e-05, + "loss": 1.2979, + "step": 14254 + }, + { + "epoch": 0.41855070761641905, + "grad_norm": 0.0, + "learning_rate": 1.3073342961441217e-05, + "loss": 1.2295, + "step": 14255 + }, + { + "epoch": 0.41858006929355807, + "grad_norm": 0.0, + "learning_rate": 1.3072438012130927e-05, + "loss": 1.4307, + "step": 14256 + }, + { + "epoch": 0.418609430970697, + "grad_norm": 0.0, + "learning_rate": 1.3071533035035797e-05, + "loss": 1.2476, + "step": 14257 + }, + { + "epoch": 0.41863879264783604, + "grad_norm": 0.0, + "learning_rate": 1.3070628030164005e-05, + "loss": 1.3926, + "step": 14258 + }, + { + "epoch": 0.41866815432497506, + "grad_norm": 0.0, + "learning_rate": 1.3069722997523737e-05, + "loss": 1.2939, + "step": 14259 + }, + { + "epoch": 0.418697516002114, + "grad_norm": 0.0, + "learning_rate": 1.3068817937123178e-05, + "loss": 1.333, + "step": 14260 + }, + { + "epoch": 0.41872687767925304, + "grad_norm": 0.0, + "learning_rate": 1.3067912848970515e-05, + "loss": 1.4473, + "step": 14261 + }, + { + "epoch": 0.41875623935639206, + "grad_norm": 0.0, + "learning_rate": 1.3067007733073927e-05, + "loss": 1.4023, + "step": 14262 + }, + { + "epoch": 0.418785601033531, + "grad_norm": 0.0, + "learning_rate": 1.30661025894416e-05, + "loss": 1.2705, + "step": 14263 + }, + { + "epoch": 0.41881496271067004, + "grad_norm": 0.0, + "learning_rate": 1.3065197418081726e-05, + "loss": 1.4199, + "step": 14264 + }, + { + "epoch": 0.41884432438780905, + "grad_norm": 0.0, + "learning_rate": 1.3064292219002487e-05, + "loss": 1.2207, + "step": 14265 + }, + { + "epoch": 0.418873686064948, + "grad_norm": 0.0, + "learning_rate": 1.3063386992212068e-05, + "loss": 1.3027, + "step": 14266 + }, + { + "epoch": 0.41890304774208703, + "grad_norm": 0.0, + "learning_rate": 1.3062481737718657e-05, + "loss": 1.4102, + "step": 14267 + }, + { + "epoch": 0.41893240941922605, + "grad_norm": 0.0, + "learning_rate": 1.3061576455530438e-05, + "loss": 1.3008, + "step": 14268 + }, + { + "epoch": 0.418961771096365, + "grad_norm": 0.0, + "learning_rate": 1.3060671145655598e-05, + "loss": 1.3149, + "step": 14269 + }, + { + "epoch": 0.418991132773504, + "grad_norm": 0.0, + "learning_rate": 1.3059765808102328e-05, + "loss": 1.3477, + "step": 14270 + }, + { + "epoch": 0.41902049445064304, + "grad_norm": 0.0, + "learning_rate": 1.3058860442878808e-05, + "loss": 1.4092, + "step": 14271 + }, + { + "epoch": 0.419049856127782, + "grad_norm": 0.0, + "learning_rate": 1.3057955049993231e-05, + "loss": 1.2012, + "step": 14272 + }, + { + "epoch": 0.419079217804921, + "grad_norm": 0.0, + "learning_rate": 1.3057049629453786e-05, + "loss": 1.2793, + "step": 14273 + }, + { + "epoch": 0.41910857948206004, + "grad_norm": 0.0, + "learning_rate": 1.3056144181268658e-05, + "loss": 1.3828, + "step": 14274 + }, + { + "epoch": 0.419137941159199, + "grad_norm": 0.0, + "learning_rate": 1.3055238705446032e-05, + "loss": 1.4297, + "step": 14275 + }, + { + "epoch": 0.419167302836338, + "grad_norm": 0.0, + "learning_rate": 1.30543332019941e-05, + "loss": 1.2686, + "step": 14276 + }, + { + "epoch": 0.41919666451347704, + "grad_norm": 0.0, + "learning_rate": 1.3053427670921055e-05, + "loss": 1.3252, + "step": 14277 + }, + { + "epoch": 0.419226026190616, + "grad_norm": 0.0, + "learning_rate": 1.305252211223508e-05, + "loss": 1.3643, + "step": 14278 + }, + { + "epoch": 0.419255387867755, + "grad_norm": 0.0, + "learning_rate": 1.3051616525944364e-05, + "loss": 1.3447, + "step": 14279 + }, + { + "epoch": 0.41928474954489403, + "grad_norm": 0.0, + "learning_rate": 1.3050710912057101e-05, + "loss": 1.2979, + "step": 14280 + }, + { + "epoch": 0.419314111222033, + "grad_norm": 0.0, + "learning_rate": 1.3049805270581475e-05, + "loss": 1.3984, + "step": 14281 + }, + { + "epoch": 0.419343472899172, + "grad_norm": 0.0, + "learning_rate": 1.3048899601525679e-05, + "loss": 1.2891, + "step": 14282 + }, + { + "epoch": 0.419372834576311, + "grad_norm": 0.0, + "learning_rate": 1.3047993904897905e-05, + "loss": 1.3408, + "step": 14283 + }, + { + "epoch": 0.41940219625345, + "grad_norm": 0.0, + "learning_rate": 1.304708818070634e-05, + "loss": 1.3623, + "step": 14284 + }, + { + "epoch": 0.419431557930589, + "grad_norm": 0.0, + "learning_rate": 1.3046182428959178e-05, + "loss": 1.3291, + "step": 14285 + }, + { + "epoch": 0.41946091960772797, + "grad_norm": 0.0, + "learning_rate": 1.3045276649664604e-05, + "loss": 1.4209, + "step": 14286 + }, + { + "epoch": 0.419490281284867, + "grad_norm": 0.0, + "learning_rate": 1.304437084283082e-05, + "loss": 1.3975, + "step": 14287 + }, + { + "epoch": 0.419519642962006, + "grad_norm": 0.0, + "learning_rate": 1.3043465008466005e-05, + "loss": 1.2202, + "step": 14288 + }, + { + "epoch": 0.41954900463914496, + "grad_norm": 0.0, + "learning_rate": 1.3042559146578358e-05, + "loss": 1.4453, + "step": 14289 + }, + { + "epoch": 0.419578366316284, + "grad_norm": 0.0, + "learning_rate": 1.3041653257176073e-05, + "loss": 1.3271, + "step": 14290 + }, + { + "epoch": 0.419607727993423, + "grad_norm": 0.0, + "learning_rate": 1.3040747340267332e-05, + "loss": 1.3984, + "step": 14291 + }, + { + "epoch": 0.41963708967056196, + "grad_norm": 0.0, + "learning_rate": 1.3039841395860337e-05, + "loss": 1.3486, + "step": 14292 + }, + { + "epoch": 0.419666451347701, + "grad_norm": 0.0, + "learning_rate": 1.3038935423963279e-05, + "loss": 1.3936, + "step": 14293 + }, + { + "epoch": 0.41969581302484, + "grad_norm": 0.0, + "learning_rate": 1.303802942458435e-05, + "loss": 1.293, + "step": 14294 + }, + { + "epoch": 0.41972517470197895, + "grad_norm": 0.0, + "learning_rate": 1.3037123397731741e-05, + "loss": 1.2959, + "step": 14295 + }, + { + "epoch": 0.41975453637911797, + "grad_norm": 0.0, + "learning_rate": 1.3036217343413649e-05, + "loss": 1.3604, + "step": 14296 + }, + { + "epoch": 0.419783898056257, + "grad_norm": 0.0, + "learning_rate": 1.3035311261638266e-05, + "loss": 1.2461, + "step": 14297 + }, + { + "epoch": 0.41981325973339595, + "grad_norm": 0.0, + "learning_rate": 1.3034405152413788e-05, + "loss": 1.3271, + "step": 14298 + }, + { + "epoch": 0.41984262141053497, + "grad_norm": 0.0, + "learning_rate": 1.3033499015748401e-05, + "loss": 1.4434, + "step": 14299 + }, + { + "epoch": 0.419871983087674, + "grad_norm": 0.0, + "learning_rate": 1.303259285165031e-05, + "loss": 1.3291, + "step": 14300 + }, + { + "epoch": 0.41990134476481294, + "grad_norm": 0.0, + "learning_rate": 1.3031686660127703e-05, + "loss": 1.3867, + "step": 14301 + }, + { + "epoch": 0.41993070644195196, + "grad_norm": 0.0, + "learning_rate": 1.3030780441188777e-05, + "loss": 1.3994, + "step": 14302 + }, + { + "epoch": 0.419960068119091, + "grad_norm": 0.0, + "learning_rate": 1.302987419484173e-05, + "loss": 1.3574, + "step": 14303 + }, + { + "epoch": 0.41998942979622994, + "grad_norm": 0.0, + "learning_rate": 1.302896792109475e-05, + "loss": 1.3027, + "step": 14304 + }, + { + "epoch": 0.42001879147336896, + "grad_norm": 0.0, + "learning_rate": 1.302806161995604e-05, + "loss": 1.4072, + "step": 14305 + }, + { + "epoch": 0.420048153150508, + "grad_norm": 0.0, + "learning_rate": 1.3027155291433794e-05, + "loss": 1.332, + "step": 14306 + }, + { + "epoch": 0.42007751482764694, + "grad_norm": 0.0, + "learning_rate": 1.302624893553621e-05, + "loss": 1.2129, + "step": 14307 + }, + { + "epoch": 0.42010687650478595, + "grad_norm": 0.0, + "learning_rate": 1.3025342552271479e-05, + "loss": 1.2979, + "step": 14308 + }, + { + "epoch": 0.42013623818192497, + "grad_norm": 0.0, + "learning_rate": 1.3024436141647799e-05, + "loss": 1.2549, + "step": 14309 + }, + { + "epoch": 0.42016559985906393, + "grad_norm": 0.0, + "learning_rate": 1.3023529703673372e-05, + "loss": 1.3203, + "step": 14310 + }, + { + "epoch": 0.42019496153620295, + "grad_norm": 0.0, + "learning_rate": 1.3022623238356387e-05, + "loss": 1.5508, + "step": 14311 + }, + { + "epoch": 0.42022432321334197, + "grad_norm": 0.0, + "learning_rate": 1.3021716745705048e-05, + "loss": 1.3408, + "step": 14312 + }, + { + "epoch": 0.4202536848904809, + "grad_norm": 0.0, + "learning_rate": 1.3020810225727551e-05, + "loss": 1.333, + "step": 14313 + }, + { + "epoch": 0.42028304656761994, + "grad_norm": 0.0, + "learning_rate": 1.3019903678432093e-05, + "loss": 1.4521, + "step": 14314 + }, + { + "epoch": 0.42031240824475896, + "grad_norm": 0.0, + "learning_rate": 1.3018997103826873e-05, + "loss": 1.4453, + "step": 14315 + }, + { + "epoch": 0.4203417699218979, + "grad_norm": 0.0, + "learning_rate": 1.3018090501920089e-05, + "loss": 1.3086, + "step": 14316 + }, + { + "epoch": 0.42037113159903694, + "grad_norm": 0.0, + "learning_rate": 1.3017183872719941e-05, + "loss": 1.3955, + "step": 14317 + }, + { + "epoch": 0.42040049327617596, + "grad_norm": 0.0, + "learning_rate": 1.3016277216234623e-05, + "loss": 1.2178, + "step": 14318 + }, + { + "epoch": 0.4204298549533149, + "grad_norm": 0.0, + "learning_rate": 1.3015370532472343e-05, + "loss": 1.3506, + "step": 14319 + }, + { + "epoch": 0.42045921663045394, + "grad_norm": 0.0, + "learning_rate": 1.301446382144129e-05, + "loss": 1.417, + "step": 14320 + }, + { + "epoch": 0.42048857830759295, + "grad_norm": 0.0, + "learning_rate": 1.3013557083149671e-05, + "loss": 1.2793, + "step": 14321 + }, + { + "epoch": 0.4205179399847319, + "grad_norm": 0.0, + "learning_rate": 1.3012650317605684e-05, + "loss": 1.4062, + "step": 14322 + }, + { + "epoch": 0.42054730166187093, + "grad_norm": 0.0, + "learning_rate": 1.3011743524817527e-05, + "loss": 1.4697, + "step": 14323 + }, + { + "epoch": 0.42057666333900995, + "grad_norm": 0.0, + "learning_rate": 1.30108367047934e-05, + "loss": 1.2705, + "step": 14324 + }, + { + "epoch": 0.4206060250161489, + "grad_norm": 0.0, + "learning_rate": 1.3009929857541507e-05, + "loss": 1.3906, + "step": 14325 + }, + { + "epoch": 0.4206353866932879, + "grad_norm": 0.0, + "learning_rate": 1.3009022983070053e-05, + "loss": 1.3984, + "step": 14326 + }, + { + "epoch": 0.42066474837042694, + "grad_norm": 0.0, + "learning_rate": 1.300811608138723e-05, + "loss": 1.2153, + "step": 14327 + }, + { + "epoch": 0.4206941100475659, + "grad_norm": 0.0, + "learning_rate": 1.3007209152501241e-05, + "loss": 1.3936, + "step": 14328 + }, + { + "epoch": 0.4207234717247049, + "grad_norm": 0.0, + "learning_rate": 1.3006302196420291e-05, + "loss": 1.3301, + "step": 14329 + }, + { + "epoch": 0.42075283340184394, + "grad_norm": 0.0, + "learning_rate": 1.3005395213152579e-05, + "loss": 1.335, + "step": 14330 + }, + { + "epoch": 0.4207821950789829, + "grad_norm": 0.0, + "learning_rate": 1.3004488202706309e-05, + "loss": 1.2754, + "step": 14331 + }, + { + "epoch": 0.4208115567561219, + "grad_norm": 0.0, + "learning_rate": 1.3003581165089683e-05, + "loss": 1.3936, + "step": 14332 + }, + { + "epoch": 0.42084091843326094, + "grad_norm": 0.0, + "learning_rate": 1.3002674100310904e-05, + "loss": 1.3389, + "step": 14333 + }, + { + "epoch": 0.4208702801103999, + "grad_norm": 0.0, + "learning_rate": 1.3001767008378172e-05, + "loss": 1.3701, + "step": 14334 + }, + { + "epoch": 0.4208996417875389, + "grad_norm": 0.0, + "learning_rate": 1.3000859889299692e-05, + "loss": 1.2617, + "step": 14335 + }, + { + "epoch": 0.4209290034646779, + "grad_norm": 0.0, + "learning_rate": 1.2999952743083673e-05, + "loss": 1.3652, + "step": 14336 + }, + { + "epoch": 0.4209583651418169, + "grad_norm": 0.0, + "learning_rate": 1.299904556973831e-05, + "loss": 1.3335, + "step": 14337 + }, + { + "epoch": 0.4209877268189559, + "grad_norm": 0.0, + "learning_rate": 1.2998138369271807e-05, + "loss": 1.3525, + "step": 14338 + }, + { + "epoch": 0.42101708849609487, + "grad_norm": 0.0, + "learning_rate": 1.2997231141692374e-05, + "loss": 1.4316, + "step": 14339 + }, + { + "epoch": 0.4210464501732339, + "grad_norm": 0.0, + "learning_rate": 1.2996323887008209e-05, + "loss": 1.2139, + "step": 14340 + }, + { + "epoch": 0.4210758118503729, + "grad_norm": 0.0, + "learning_rate": 1.2995416605227522e-05, + "loss": 1.3359, + "step": 14341 + }, + { + "epoch": 0.42110517352751187, + "grad_norm": 0.0, + "learning_rate": 1.2994509296358512e-05, + "loss": 1.3965, + "step": 14342 + }, + { + "epoch": 0.4211345352046509, + "grad_norm": 0.0, + "learning_rate": 1.2993601960409395e-05, + "loss": 1.3076, + "step": 14343 + }, + { + "epoch": 0.4211638968817899, + "grad_norm": 0.0, + "learning_rate": 1.2992694597388362e-05, + "loss": 1.3975, + "step": 14344 + }, + { + "epoch": 0.42119325855892886, + "grad_norm": 0.0, + "learning_rate": 1.2991787207303627e-05, + "loss": 1.3115, + "step": 14345 + }, + { + "epoch": 0.4212226202360679, + "grad_norm": 0.0, + "learning_rate": 1.2990879790163398e-05, + "loss": 1.2373, + "step": 14346 + }, + { + "epoch": 0.4212519819132069, + "grad_norm": 0.0, + "learning_rate": 1.2989972345975877e-05, + "loss": 1.3457, + "step": 14347 + }, + { + "epoch": 0.42128134359034586, + "grad_norm": 0.0, + "learning_rate": 1.2989064874749263e-05, + "loss": 1.3623, + "step": 14348 + }, + { + "epoch": 0.4213107052674849, + "grad_norm": 0.0, + "learning_rate": 1.2988157376491774e-05, + "loss": 1.2656, + "step": 14349 + }, + { + "epoch": 0.4213400669446239, + "grad_norm": 0.0, + "learning_rate": 1.2987249851211614e-05, + "loss": 1.3711, + "step": 14350 + }, + { + "epoch": 0.42136942862176285, + "grad_norm": 0.0, + "learning_rate": 1.2986342298916988e-05, + "loss": 1.3262, + "step": 14351 + }, + { + "epoch": 0.42139879029890187, + "grad_norm": 0.0, + "learning_rate": 1.2985434719616104e-05, + "loss": 1.3896, + "step": 14352 + }, + { + "epoch": 0.4214281519760409, + "grad_norm": 0.0, + "learning_rate": 1.2984527113317167e-05, + "loss": 1.2959, + "step": 14353 + }, + { + "epoch": 0.42145751365317985, + "grad_norm": 0.0, + "learning_rate": 1.2983619480028387e-05, + "loss": 1.4629, + "step": 14354 + }, + { + "epoch": 0.42148687533031887, + "grad_norm": 0.0, + "learning_rate": 1.2982711819757973e-05, + "loss": 1.3301, + "step": 14355 + }, + { + "epoch": 0.4215162370074579, + "grad_norm": 0.0, + "learning_rate": 1.2981804132514135e-05, + "loss": 1.3955, + "step": 14356 + }, + { + "epoch": 0.42154559868459685, + "grad_norm": 0.0, + "learning_rate": 1.2980896418305075e-05, + "loss": 1.334, + "step": 14357 + }, + { + "epoch": 0.42157496036173586, + "grad_norm": 0.0, + "learning_rate": 1.2979988677139005e-05, + "loss": 1.2119, + "step": 14358 + }, + { + "epoch": 0.4216043220388749, + "grad_norm": 0.0, + "learning_rate": 1.2979080909024135e-05, + "loss": 1.2188, + "step": 14359 + }, + { + "epoch": 0.42163368371601384, + "grad_norm": 0.0, + "learning_rate": 1.2978173113968674e-05, + "loss": 1.3667, + "step": 14360 + }, + { + "epoch": 0.42166304539315286, + "grad_norm": 0.0, + "learning_rate": 1.2977265291980832e-05, + "loss": 1.2305, + "step": 14361 + }, + { + "epoch": 0.4216924070702919, + "grad_norm": 0.0, + "learning_rate": 1.2976357443068814e-05, + "loss": 1.2598, + "step": 14362 + }, + { + "epoch": 0.42172176874743084, + "grad_norm": 0.0, + "learning_rate": 1.2975449567240838e-05, + "loss": 1.3994, + "step": 14363 + }, + { + "epoch": 0.42175113042456985, + "grad_norm": 0.0, + "learning_rate": 1.2974541664505104e-05, + "loss": 1.2822, + "step": 14364 + }, + { + "epoch": 0.42178049210170887, + "grad_norm": 0.0, + "learning_rate": 1.2973633734869833e-05, + "loss": 1.333, + "step": 14365 + }, + { + "epoch": 0.42180985377884783, + "grad_norm": 0.0, + "learning_rate": 1.297272577834323e-05, + "loss": 1.502, + "step": 14366 + }, + { + "epoch": 0.42183921545598685, + "grad_norm": 0.0, + "learning_rate": 1.2971817794933504e-05, + "loss": 1.4756, + "step": 14367 + }, + { + "epoch": 0.42186857713312587, + "grad_norm": 0.0, + "learning_rate": 1.297090978464887e-05, + "loss": 1.2065, + "step": 14368 + }, + { + "epoch": 0.4218979388102648, + "grad_norm": 0.0, + "learning_rate": 1.2970001747497535e-05, + "loss": 1.2603, + "step": 14369 + }, + { + "epoch": 0.42192730048740384, + "grad_norm": 0.0, + "learning_rate": 1.2969093683487719e-05, + "loss": 1.2686, + "step": 14370 + }, + { + "epoch": 0.42195666216454286, + "grad_norm": 0.0, + "learning_rate": 1.2968185592627624e-05, + "loss": 1.3994, + "step": 14371 + }, + { + "epoch": 0.4219860238416818, + "grad_norm": 0.0, + "learning_rate": 1.2967277474925469e-05, + "loss": 1.3164, + "step": 14372 + }, + { + "epoch": 0.42201538551882084, + "grad_norm": 0.0, + "learning_rate": 1.2966369330389462e-05, + "loss": 1.3525, + "step": 14373 + }, + { + "epoch": 0.42204474719595986, + "grad_norm": 0.0, + "learning_rate": 1.2965461159027818e-05, + "loss": 1.1143, + "step": 14374 + }, + { + "epoch": 0.4220741088730988, + "grad_norm": 0.0, + "learning_rate": 1.296455296084875e-05, + "loss": 1.3623, + "step": 14375 + }, + { + "epoch": 0.42210347055023784, + "grad_norm": 0.0, + "learning_rate": 1.2963644735860472e-05, + "loss": 1.3477, + "step": 14376 + }, + { + "epoch": 0.42213283222737685, + "grad_norm": 0.0, + "learning_rate": 1.2962736484071192e-05, + "loss": 1.2637, + "step": 14377 + }, + { + "epoch": 0.4221621939045158, + "grad_norm": 0.0, + "learning_rate": 1.296182820548913e-05, + "loss": 1.2998, + "step": 14378 + }, + { + "epoch": 0.42219155558165483, + "grad_norm": 0.0, + "learning_rate": 1.2960919900122496e-05, + "loss": 1.1812, + "step": 14379 + }, + { + "epoch": 0.42222091725879385, + "grad_norm": 0.0, + "learning_rate": 1.2960011567979504e-05, + "loss": 1.4619, + "step": 14380 + }, + { + "epoch": 0.4222502789359328, + "grad_norm": 0.0, + "learning_rate": 1.295910320906837e-05, + "loss": 1.3037, + "step": 14381 + }, + { + "epoch": 0.4222796406130718, + "grad_norm": 0.0, + "learning_rate": 1.2958194823397307e-05, + "loss": 1.2754, + "step": 14382 + }, + { + "epoch": 0.42230900229021084, + "grad_norm": 0.0, + "learning_rate": 1.295728641097453e-05, + "loss": 1.4619, + "step": 14383 + }, + { + "epoch": 0.4223383639673498, + "grad_norm": 0.0, + "learning_rate": 1.2956377971808258e-05, + "loss": 1.2041, + "step": 14384 + }, + { + "epoch": 0.4223677256444888, + "grad_norm": 0.0, + "learning_rate": 1.2955469505906701e-05, + "loss": 1.3975, + "step": 14385 + }, + { + "epoch": 0.4223970873216278, + "grad_norm": 0.0, + "learning_rate": 1.2954561013278078e-05, + "loss": 1.3555, + "step": 14386 + }, + { + "epoch": 0.4224264489987668, + "grad_norm": 0.0, + "learning_rate": 1.29536524939306e-05, + "loss": 1.416, + "step": 14387 + }, + { + "epoch": 0.4224558106759058, + "grad_norm": 0.0, + "learning_rate": 1.295274394787249e-05, + "loss": 1.3711, + "step": 14388 + }, + { + "epoch": 0.4224851723530448, + "grad_norm": 0.0, + "learning_rate": 1.2951835375111957e-05, + "loss": 1.3584, + "step": 14389 + }, + { + "epoch": 0.4225145340301838, + "grad_norm": 0.0, + "learning_rate": 1.2950926775657222e-05, + "loss": 1.4111, + "step": 14390 + }, + { + "epoch": 0.4225438957073228, + "grad_norm": 0.0, + "learning_rate": 1.29500181495165e-05, + "loss": 1.3975, + "step": 14391 + }, + { + "epoch": 0.4225732573844618, + "grad_norm": 0.0, + "learning_rate": 1.2949109496698008e-05, + "loss": 1.3467, + "step": 14392 + }, + { + "epoch": 0.4226026190616008, + "grad_norm": 0.0, + "learning_rate": 1.2948200817209963e-05, + "loss": 1.2969, + "step": 14393 + }, + { + "epoch": 0.4226319807387398, + "grad_norm": 0.0, + "learning_rate": 1.2947292111060584e-05, + "loss": 1.251, + "step": 14394 + }, + { + "epoch": 0.42266134241587877, + "grad_norm": 0.0, + "learning_rate": 1.2946383378258087e-05, + "loss": 1.3252, + "step": 14395 + }, + { + "epoch": 0.4226907040930178, + "grad_norm": 0.0, + "learning_rate": 1.2945474618810691e-05, + "loss": 1.2666, + "step": 14396 + }, + { + "epoch": 0.4227200657701568, + "grad_norm": 0.0, + "learning_rate": 1.2944565832726617e-05, + "loss": 1.2422, + "step": 14397 + }, + { + "epoch": 0.42274942744729577, + "grad_norm": 0.0, + "learning_rate": 1.2943657020014076e-05, + "loss": 1.3516, + "step": 14398 + }, + { + "epoch": 0.4227787891244348, + "grad_norm": 0.0, + "learning_rate": 1.2942748180681291e-05, + "loss": 1.2627, + "step": 14399 + }, + { + "epoch": 0.4228081508015738, + "grad_norm": 0.0, + "learning_rate": 1.2941839314736481e-05, + "loss": 1.3633, + "step": 14400 + }, + { + "epoch": 0.42283751247871276, + "grad_norm": 0.0, + "learning_rate": 1.2940930422187866e-05, + "loss": 1.2666, + "step": 14401 + }, + { + "epoch": 0.4228668741558518, + "grad_norm": 0.0, + "learning_rate": 1.294002150304366e-05, + "loss": 1.229, + "step": 14402 + }, + { + "epoch": 0.4228962358329908, + "grad_norm": 0.0, + "learning_rate": 1.2939112557312088e-05, + "loss": 1.3643, + "step": 14403 + }, + { + "epoch": 0.42292559751012976, + "grad_norm": 0.0, + "learning_rate": 1.2938203585001373e-05, + "loss": 1.4619, + "step": 14404 + }, + { + "epoch": 0.4229549591872688, + "grad_norm": 0.0, + "learning_rate": 1.293729458611973e-05, + "loss": 1.4209, + "step": 14405 + }, + { + "epoch": 0.4229843208644078, + "grad_norm": 0.0, + "learning_rate": 1.2936385560675373e-05, + "loss": 1.3828, + "step": 14406 + }, + { + "epoch": 0.42301368254154675, + "grad_norm": 0.0, + "learning_rate": 1.2935476508676534e-05, + "loss": 1.333, + "step": 14407 + }, + { + "epoch": 0.42304304421868577, + "grad_norm": 0.0, + "learning_rate": 1.2934567430131426e-05, + "loss": 1.2734, + "step": 14408 + }, + { + "epoch": 0.4230724058958248, + "grad_norm": 0.0, + "learning_rate": 1.293365832504828e-05, + "loss": 1.3379, + "step": 14409 + }, + { + "epoch": 0.42310176757296375, + "grad_norm": 0.0, + "learning_rate": 1.2932749193435303e-05, + "loss": 1.3311, + "step": 14410 + }, + { + "epoch": 0.42313112925010277, + "grad_norm": 0.0, + "learning_rate": 1.2931840035300726e-05, + "loss": 1.2393, + "step": 14411 + }, + { + "epoch": 0.4231604909272418, + "grad_norm": 0.0, + "learning_rate": 1.2930930850652772e-05, + "loss": 1.3867, + "step": 14412 + }, + { + "epoch": 0.42318985260438075, + "grad_norm": 0.0, + "learning_rate": 1.2930021639499657e-05, + "loss": 1.333, + "step": 14413 + }, + { + "epoch": 0.42321921428151976, + "grad_norm": 0.0, + "learning_rate": 1.2929112401849605e-05, + "loss": 1.5713, + "step": 14414 + }, + { + "epoch": 0.4232485759586588, + "grad_norm": 0.0, + "learning_rate": 1.2928203137710841e-05, + "loss": 1.292, + "step": 14415 + }, + { + "epoch": 0.42327793763579774, + "grad_norm": 0.0, + "learning_rate": 1.2927293847091587e-05, + "loss": 1.4668, + "step": 14416 + }, + { + "epoch": 0.42330729931293676, + "grad_norm": 0.0, + "learning_rate": 1.2926384530000065e-05, + "loss": 1.4072, + "step": 14417 + }, + { + "epoch": 0.4233366609900758, + "grad_norm": 0.0, + "learning_rate": 1.2925475186444496e-05, + "loss": 1.418, + "step": 14418 + }, + { + "epoch": 0.42336602266721474, + "grad_norm": 0.0, + "learning_rate": 1.2924565816433105e-05, + "loss": 1.2949, + "step": 14419 + }, + { + "epoch": 0.42339538434435375, + "grad_norm": 0.0, + "learning_rate": 1.292365641997412e-05, + "loss": 1.4453, + "step": 14420 + }, + { + "epoch": 0.42342474602149277, + "grad_norm": 0.0, + "learning_rate": 1.2922746997075759e-05, + "loss": 1.3838, + "step": 14421 + }, + { + "epoch": 0.42345410769863173, + "grad_norm": 0.0, + "learning_rate": 1.292183754774625e-05, + "loss": 1.2559, + "step": 14422 + }, + { + "epoch": 0.42348346937577075, + "grad_norm": 0.0, + "learning_rate": 1.2920928071993813e-05, + "loss": 1.3652, + "step": 14423 + }, + { + "epoch": 0.42351283105290977, + "grad_norm": 0.0, + "learning_rate": 1.2920018569826676e-05, + "loss": 1.3066, + "step": 14424 + }, + { + "epoch": 0.4235421927300487, + "grad_norm": 0.0, + "learning_rate": 1.2919109041253069e-05, + "loss": 1.46, + "step": 14425 + }, + { + "epoch": 0.42357155440718774, + "grad_norm": 0.0, + "learning_rate": 1.2918199486281205e-05, + "loss": 1.3545, + "step": 14426 + }, + { + "epoch": 0.42360091608432676, + "grad_norm": 0.0, + "learning_rate": 1.2917289904919317e-05, + "loss": 1.4336, + "step": 14427 + }, + { + "epoch": 0.4236302777614657, + "grad_norm": 0.0, + "learning_rate": 1.2916380297175629e-05, + "loss": 1.2759, + "step": 14428 + }, + { + "epoch": 0.42365963943860474, + "grad_norm": 0.0, + "learning_rate": 1.2915470663058368e-05, + "loss": 1.2598, + "step": 14429 + }, + { + "epoch": 0.42368900111574376, + "grad_norm": 0.0, + "learning_rate": 1.2914561002575758e-05, + "loss": 1.4951, + "step": 14430 + }, + { + "epoch": 0.4237183627928827, + "grad_norm": 0.0, + "learning_rate": 1.2913651315736026e-05, + "loss": 1.5117, + "step": 14431 + }, + { + "epoch": 0.42374772447002174, + "grad_norm": 0.0, + "learning_rate": 1.29127416025474e-05, + "loss": 1.2217, + "step": 14432 + }, + { + "epoch": 0.42377708614716075, + "grad_norm": 0.0, + "learning_rate": 1.2911831863018108e-05, + "loss": 1.3994, + "step": 14433 + }, + { + "epoch": 0.4238064478242997, + "grad_norm": 0.0, + "learning_rate": 1.291092209715637e-05, + "loss": 1.3232, + "step": 14434 + }, + { + "epoch": 0.42383580950143873, + "grad_norm": 0.0, + "learning_rate": 1.2910012304970423e-05, + "loss": 1.3242, + "step": 14435 + }, + { + "epoch": 0.4238651711785777, + "grad_norm": 0.0, + "learning_rate": 1.2909102486468486e-05, + "loss": 1.3154, + "step": 14436 + }, + { + "epoch": 0.4238945328557167, + "grad_norm": 0.0, + "learning_rate": 1.2908192641658792e-05, + "loss": 1.2998, + "step": 14437 + }, + { + "epoch": 0.4239238945328557, + "grad_norm": 0.0, + "learning_rate": 1.2907282770549565e-05, + "loss": 1.2607, + "step": 14438 + }, + { + "epoch": 0.4239532562099947, + "grad_norm": 0.0, + "learning_rate": 1.2906372873149037e-05, + "loss": 1.3711, + "step": 14439 + }, + { + "epoch": 0.4239826178871337, + "grad_norm": 0.0, + "learning_rate": 1.2905462949465432e-05, + "loss": 1.2539, + "step": 14440 + }, + { + "epoch": 0.4240119795642727, + "grad_norm": 0.0, + "learning_rate": 1.2904552999506988e-05, + "loss": 1.4111, + "step": 14441 + }, + { + "epoch": 0.4240413412414117, + "grad_norm": 0.0, + "learning_rate": 1.2903643023281923e-05, + "loss": 1.2998, + "step": 14442 + }, + { + "epoch": 0.4240707029185507, + "grad_norm": 0.0, + "learning_rate": 1.2902733020798468e-05, + "loss": 1.2764, + "step": 14443 + }, + { + "epoch": 0.4241000645956897, + "grad_norm": 0.0, + "learning_rate": 1.2901822992064857e-05, + "loss": 1.3125, + "step": 14444 + }, + { + "epoch": 0.4241294262728287, + "grad_norm": 0.0, + "learning_rate": 1.290091293708932e-05, + "loss": 1.3721, + "step": 14445 + }, + { + "epoch": 0.4241587879499677, + "grad_norm": 0.0, + "learning_rate": 1.2900002855880085e-05, + "loss": 1.4912, + "step": 14446 + }, + { + "epoch": 0.4241881496271067, + "grad_norm": 0.0, + "learning_rate": 1.2899092748445376e-05, + "loss": 1.3555, + "step": 14447 + }, + { + "epoch": 0.4242175113042457, + "grad_norm": 0.0, + "learning_rate": 1.289818261479343e-05, + "loss": 1.3594, + "step": 14448 + }, + { + "epoch": 0.4242468729813847, + "grad_norm": 0.0, + "learning_rate": 1.289727245493248e-05, + "loss": 1.3379, + "step": 14449 + }, + { + "epoch": 0.4242762346585237, + "grad_norm": 0.0, + "learning_rate": 1.2896362268870753e-05, + "loss": 1.2134, + "step": 14450 + }, + { + "epoch": 0.42430559633566267, + "grad_norm": 0.0, + "learning_rate": 1.2895452056616478e-05, + "loss": 1.3193, + "step": 14451 + }, + { + "epoch": 0.4243349580128017, + "grad_norm": 0.0, + "learning_rate": 1.2894541818177888e-05, + "loss": 1.3828, + "step": 14452 + }, + { + "epoch": 0.4243643196899407, + "grad_norm": 0.0, + "learning_rate": 1.2893631553563217e-05, + "loss": 1.3276, + "step": 14453 + }, + { + "epoch": 0.42439368136707967, + "grad_norm": 0.0, + "learning_rate": 1.2892721262780695e-05, + "loss": 1.4141, + "step": 14454 + }, + { + "epoch": 0.4244230430442187, + "grad_norm": 0.0, + "learning_rate": 1.2891810945838553e-05, + "loss": 1.2964, + "step": 14455 + }, + { + "epoch": 0.4244524047213577, + "grad_norm": 0.0, + "learning_rate": 1.2890900602745022e-05, + "loss": 1.3799, + "step": 14456 + }, + { + "epoch": 0.42448176639849666, + "grad_norm": 0.0, + "learning_rate": 1.2889990233508338e-05, + "loss": 1.3887, + "step": 14457 + }, + { + "epoch": 0.4245111280756357, + "grad_norm": 0.0, + "learning_rate": 1.2889079838136734e-05, + "loss": 1.4287, + "step": 14458 + }, + { + "epoch": 0.4245404897527747, + "grad_norm": 0.0, + "learning_rate": 1.2888169416638439e-05, + "loss": 1.4609, + "step": 14459 + }, + { + "epoch": 0.42456985142991366, + "grad_norm": 0.0, + "learning_rate": 1.2887258969021688e-05, + "loss": 1.3027, + "step": 14460 + }, + { + "epoch": 0.4245992131070527, + "grad_norm": 0.0, + "learning_rate": 1.2886348495294713e-05, + "loss": 1.3018, + "step": 14461 + }, + { + "epoch": 0.4246285747841917, + "grad_norm": 0.0, + "learning_rate": 1.2885437995465756e-05, + "loss": 1.314, + "step": 14462 + }, + { + "epoch": 0.42465793646133065, + "grad_norm": 0.0, + "learning_rate": 1.2884527469543038e-05, + "loss": 1.3711, + "step": 14463 + }, + { + "epoch": 0.42468729813846967, + "grad_norm": 0.0, + "learning_rate": 1.2883616917534803e-05, + "loss": 1.292, + "step": 14464 + }, + { + "epoch": 0.4247166598156087, + "grad_norm": 0.0, + "learning_rate": 1.288270633944928e-05, + "loss": 1.2861, + "step": 14465 + }, + { + "epoch": 0.42474602149274765, + "grad_norm": 0.0, + "learning_rate": 1.2881795735294706e-05, + "loss": 1.3486, + "step": 14466 + }, + { + "epoch": 0.42477538316988667, + "grad_norm": 0.0, + "learning_rate": 1.2880885105079312e-05, + "loss": 1.272, + "step": 14467 + }, + { + "epoch": 0.4248047448470257, + "grad_norm": 0.0, + "learning_rate": 1.2879974448811342e-05, + "loss": 1.4199, + "step": 14468 + }, + { + "epoch": 0.42483410652416465, + "grad_norm": 0.0, + "learning_rate": 1.287906376649902e-05, + "loss": 1.2314, + "step": 14469 + }, + { + "epoch": 0.42486346820130366, + "grad_norm": 0.0, + "learning_rate": 1.2878153058150588e-05, + "loss": 1.3037, + "step": 14470 + }, + { + "epoch": 0.4248928298784427, + "grad_norm": 0.0, + "learning_rate": 1.2877242323774284e-05, + "loss": 1.373, + "step": 14471 + }, + { + "epoch": 0.42492219155558164, + "grad_norm": 0.0, + "learning_rate": 1.2876331563378338e-05, + "loss": 1.4297, + "step": 14472 + }, + { + "epoch": 0.42495155323272066, + "grad_norm": 0.0, + "learning_rate": 1.2875420776970988e-05, + "loss": 1.2764, + "step": 14473 + }, + { + "epoch": 0.4249809149098597, + "grad_norm": 0.0, + "learning_rate": 1.2874509964560476e-05, + "loss": 1.4375, + "step": 14474 + }, + { + "epoch": 0.42501027658699864, + "grad_norm": 0.0, + "learning_rate": 1.2873599126155033e-05, + "loss": 1.2676, + "step": 14475 + }, + { + "epoch": 0.42503963826413765, + "grad_norm": 0.0, + "learning_rate": 1.2872688261762893e-05, + "loss": 1.4424, + "step": 14476 + }, + { + "epoch": 0.42506899994127667, + "grad_norm": 0.0, + "learning_rate": 1.28717773713923e-05, + "loss": 1.4072, + "step": 14477 + }, + { + "epoch": 0.42509836161841563, + "grad_norm": 0.0, + "learning_rate": 1.2870866455051491e-05, + "loss": 1.4873, + "step": 14478 + }, + { + "epoch": 0.42512772329555465, + "grad_norm": 0.0, + "learning_rate": 1.28699555127487e-05, + "loss": 1.4619, + "step": 14479 + }, + { + "epoch": 0.42515708497269367, + "grad_norm": 0.0, + "learning_rate": 1.2869044544492164e-05, + "loss": 1.3359, + "step": 14480 + }, + { + "epoch": 0.42518644664983263, + "grad_norm": 0.0, + "learning_rate": 1.2868133550290124e-05, + "loss": 1.2285, + "step": 14481 + }, + { + "epoch": 0.42521580832697164, + "grad_norm": 0.0, + "learning_rate": 1.286722253015082e-05, + "loss": 1.4092, + "step": 14482 + }, + { + "epoch": 0.42524517000411066, + "grad_norm": 0.0, + "learning_rate": 1.2866311484082485e-05, + "loss": 1.4316, + "step": 14483 + }, + { + "epoch": 0.4252745316812496, + "grad_norm": 0.0, + "learning_rate": 1.2865400412093366e-05, + "loss": 1.3047, + "step": 14484 + }, + { + "epoch": 0.42530389335838864, + "grad_norm": 0.0, + "learning_rate": 1.2864489314191695e-05, + "loss": 1.2515, + "step": 14485 + }, + { + "epoch": 0.4253332550355276, + "grad_norm": 0.0, + "learning_rate": 1.286357819038571e-05, + "loss": 1.3438, + "step": 14486 + }, + { + "epoch": 0.4253626167126666, + "grad_norm": 0.0, + "learning_rate": 1.2862667040683658e-05, + "loss": 1.4385, + "step": 14487 + }, + { + "epoch": 0.42539197838980564, + "grad_norm": 0.0, + "learning_rate": 1.2861755865093775e-05, + "loss": 1.3701, + "step": 14488 + }, + { + "epoch": 0.4254213400669446, + "grad_norm": 0.0, + "learning_rate": 1.28608446636243e-05, + "loss": 1.3281, + "step": 14489 + }, + { + "epoch": 0.4254507017440836, + "grad_norm": 0.0, + "learning_rate": 1.2859933436283473e-05, + "loss": 1.3564, + "step": 14490 + }, + { + "epoch": 0.42548006342122263, + "grad_norm": 0.0, + "learning_rate": 1.2859022183079538e-05, + "loss": 1.3584, + "step": 14491 + }, + { + "epoch": 0.4255094250983616, + "grad_norm": 0.0, + "learning_rate": 1.2858110904020728e-05, + "loss": 1.2803, + "step": 14492 + }, + { + "epoch": 0.4255387867755006, + "grad_norm": 0.0, + "learning_rate": 1.2857199599115292e-05, + "loss": 1.2485, + "step": 14493 + }, + { + "epoch": 0.4255681484526396, + "grad_norm": 0.0, + "learning_rate": 1.285628826837147e-05, + "loss": 1.3447, + "step": 14494 + }, + { + "epoch": 0.4255975101297786, + "grad_norm": 0.0, + "learning_rate": 1.2855376911797503e-05, + "loss": 1.4043, + "step": 14495 + }, + { + "epoch": 0.4256268718069176, + "grad_norm": 0.0, + "learning_rate": 1.2854465529401628e-05, + "loss": 1.2642, + "step": 14496 + }, + { + "epoch": 0.4256562334840566, + "grad_norm": 0.0, + "learning_rate": 1.2853554121192091e-05, + "loss": 1.3857, + "step": 14497 + }, + { + "epoch": 0.4256855951611956, + "grad_norm": 0.0, + "learning_rate": 1.2852642687177133e-05, + "loss": 1.4292, + "step": 14498 + }, + { + "epoch": 0.4257149568383346, + "grad_norm": 0.0, + "learning_rate": 1.2851731227364996e-05, + "loss": 1.417, + "step": 14499 + }, + { + "epoch": 0.4257443185154736, + "grad_norm": 0.0, + "learning_rate": 1.2850819741763924e-05, + "loss": 1.3071, + "step": 14500 + }, + { + "epoch": 0.4257736801926126, + "grad_norm": 0.0, + "learning_rate": 1.284990823038216e-05, + "loss": 1.4004, + "step": 14501 + }, + { + "epoch": 0.4258030418697516, + "grad_norm": 0.0, + "learning_rate": 1.2848996693227945e-05, + "loss": 1.2988, + "step": 14502 + }, + { + "epoch": 0.4258324035468906, + "grad_norm": 0.0, + "learning_rate": 1.2848085130309523e-05, + "loss": 1.2529, + "step": 14503 + }, + { + "epoch": 0.4258617652240296, + "grad_norm": 0.0, + "learning_rate": 1.284717354163514e-05, + "loss": 1.3379, + "step": 14504 + }, + { + "epoch": 0.4258911269011686, + "grad_norm": 0.0, + "learning_rate": 1.2846261927213033e-05, + "loss": 1.4717, + "step": 14505 + }, + { + "epoch": 0.4259204885783076, + "grad_norm": 0.0, + "learning_rate": 1.2845350287051455e-05, + "loss": 1.3643, + "step": 14506 + }, + { + "epoch": 0.42594985025544657, + "grad_norm": 0.0, + "learning_rate": 1.2844438621158643e-05, + "loss": 1.3721, + "step": 14507 + }, + { + "epoch": 0.4259792119325856, + "grad_norm": 0.0, + "learning_rate": 1.2843526929542844e-05, + "loss": 1.2803, + "step": 14508 + }, + { + "epoch": 0.4260085736097246, + "grad_norm": 0.0, + "learning_rate": 1.2842615212212303e-05, + "loss": 1.4131, + "step": 14509 + }, + { + "epoch": 0.42603793528686357, + "grad_norm": 0.0, + "learning_rate": 1.2841703469175264e-05, + "loss": 1.1934, + "step": 14510 + }, + { + "epoch": 0.4260672969640026, + "grad_norm": 0.0, + "learning_rate": 1.2840791700439977e-05, + "loss": 1.1963, + "step": 14511 + }, + { + "epoch": 0.4260966586411416, + "grad_norm": 0.0, + "learning_rate": 1.2839879906014678e-05, + "loss": 1.2178, + "step": 14512 + }, + { + "epoch": 0.42612602031828056, + "grad_norm": 0.0, + "learning_rate": 1.2838968085907619e-05, + "loss": 1.3647, + "step": 14513 + }, + { + "epoch": 0.4261553819954196, + "grad_norm": 0.0, + "learning_rate": 1.2838056240127045e-05, + "loss": 1.377, + "step": 14514 + }, + { + "epoch": 0.4261847436725586, + "grad_norm": 0.0, + "learning_rate": 1.2837144368681205e-05, + "loss": 1.251, + "step": 14515 + }, + { + "epoch": 0.42621410534969756, + "grad_norm": 0.0, + "learning_rate": 1.2836232471578338e-05, + "loss": 1.3398, + "step": 14516 + }, + { + "epoch": 0.4262434670268366, + "grad_norm": 0.0, + "learning_rate": 1.2835320548826692e-05, + "loss": 1.252, + "step": 14517 + }, + { + "epoch": 0.4262728287039756, + "grad_norm": 0.0, + "learning_rate": 1.283440860043452e-05, + "loss": 1.2334, + "step": 14518 + }, + { + "epoch": 0.42630219038111455, + "grad_norm": 0.0, + "learning_rate": 1.2833496626410064e-05, + "loss": 1.4014, + "step": 14519 + }, + { + "epoch": 0.42633155205825357, + "grad_norm": 0.0, + "learning_rate": 1.2832584626761575e-05, + "loss": 1.2607, + "step": 14520 + }, + { + "epoch": 0.4263609137353926, + "grad_norm": 0.0, + "learning_rate": 1.2831672601497294e-05, + "loss": 1.4648, + "step": 14521 + }, + { + "epoch": 0.42639027541253155, + "grad_norm": 0.0, + "learning_rate": 1.2830760550625473e-05, + "loss": 1.3438, + "step": 14522 + }, + { + "epoch": 0.42641963708967057, + "grad_norm": 0.0, + "learning_rate": 1.282984847415436e-05, + "loss": 1.2891, + "step": 14523 + }, + { + "epoch": 0.4264489987668096, + "grad_norm": 0.0, + "learning_rate": 1.2828936372092207e-05, + "loss": 1.2363, + "step": 14524 + }, + { + "epoch": 0.42647836044394855, + "grad_norm": 0.0, + "learning_rate": 1.282802424444725e-05, + "loss": 1.4424, + "step": 14525 + }, + { + "epoch": 0.42650772212108756, + "grad_norm": 0.0, + "learning_rate": 1.2827112091227751e-05, + "loss": 1.3848, + "step": 14526 + }, + { + "epoch": 0.4265370837982266, + "grad_norm": 0.0, + "learning_rate": 1.2826199912441953e-05, + "loss": 1.4521, + "step": 14527 + }, + { + "epoch": 0.42656644547536554, + "grad_norm": 0.0, + "learning_rate": 1.2825287708098102e-05, + "loss": 1.2256, + "step": 14528 + }, + { + "epoch": 0.42659580715250456, + "grad_norm": 0.0, + "learning_rate": 1.2824375478204451e-05, + "loss": 1.2227, + "step": 14529 + }, + { + "epoch": 0.4266251688296436, + "grad_norm": 0.0, + "learning_rate": 1.282346322276925e-05, + "loss": 1.3467, + "step": 14530 + }, + { + "epoch": 0.42665453050678254, + "grad_norm": 0.0, + "learning_rate": 1.2822550941800752e-05, + "loss": 1.4668, + "step": 14531 + }, + { + "epoch": 0.42668389218392155, + "grad_norm": 0.0, + "learning_rate": 1.28216386353072e-05, + "loss": 1.4092, + "step": 14532 + }, + { + "epoch": 0.42671325386106057, + "grad_norm": 0.0, + "learning_rate": 1.2820726303296847e-05, + "loss": 1.2881, + "step": 14533 + }, + { + "epoch": 0.42674261553819953, + "grad_norm": 0.0, + "learning_rate": 1.2819813945777942e-05, + "loss": 1.293, + "step": 14534 + }, + { + "epoch": 0.42677197721533855, + "grad_norm": 0.0, + "learning_rate": 1.2818901562758739e-05, + "loss": 1.4238, + "step": 14535 + }, + { + "epoch": 0.4268013388924775, + "grad_norm": 0.0, + "learning_rate": 1.2817989154247488e-05, + "loss": 1.4375, + "step": 14536 + }, + { + "epoch": 0.42683070056961653, + "grad_norm": 0.0, + "learning_rate": 1.2817076720252438e-05, + "loss": 1.1143, + "step": 14537 + }, + { + "epoch": 0.42686006224675554, + "grad_norm": 0.0, + "learning_rate": 1.281616426078184e-05, + "loss": 1.2979, + "step": 14538 + }, + { + "epoch": 0.4268894239238945, + "grad_norm": 0.0, + "learning_rate": 1.2815251775843951e-05, + "loss": 1.4336, + "step": 14539 + }, + { + "epoch": 0.4269187856010335, + "grad_norm": 0.0, + "learning_rate": 1.2814339265447017e-05, + "loss": 1.2163, + "step": 14540 + }, + { + "epoch": 0.42694814727817254, + "grad_norm": 0.0, + "learning_rate": 1.2813426729599294e-05, + "loss": 1.1855, + "step": 14541 + }, + { + "epoch": 0.4269775089553115, + "grad_norm": 0.0, + "learning_rate": 1.281251416830903e-05, + "loss": 1.333, + "step": 14542 + }, + { + "epoch": 0.4270068706324505, + "grad_norm": 0.0, + "learning_rate": 1.2811601581584482e-05, + "loss": 1.3369, + "step": 14543 + }, + { + "epoch": 0.42703623230958954, + "grad_norm": 0.0, + "learning_rate": 1.2810688969433902e-05, + "loss": 1.2666, + "step": 14544 + }, + { + "epoch": 0.4270655939867285, + "grad_norm": 0.0, + "learning_rate": 1.280977633186554e-05, + "loss": 1.3271, + "step": 14545 + }, + { + "epoch": 0.4270949556638675, + "grad_norm": 0.0, + "learning_rate": 1.2808863668887652e-05, + "loss": 1.3438, + "step": 14546 + }, + { + "epoch": 0.42712431734100653, + "grad_norm": 0.0, + "learning_rate": 1.2807950980508487e-05, + "loss": 1.3154, + "step": 14547 + }, + { + "epoch": 0.4271536790181455, + "grad_norm": 0.0, + "learning_rate": 1.280703826673631e-05, + "loss": 1.2607, + "step": 14548 + }, + { + "epoch": 0.4271830406952845, + "grad_norm": 0.0, + "learning_rate": 1.2806125527579361e-05, + "loss": 1.4014, + "step": 14549 + }, + { + "epoch": 0.4272124023724235, + "grad_norm": 0.0, + "learning_rate": 1.2805212763045897e-05, + "loss": 1.335, + "step": 14550 + }, + { + "epoch": 0.4272417640495625, + "grad_norm": 0.0, + "learning_rate": 1.280429997314418e-05, + "loss": 1.2988, + "step": 14551 + }, + { + "epoch": 0.4272711257267015, + "grad_norm": 0.0, + "learning_rate": 1.2803387157882463e-05, + "loss": 1.335, + "step": 14552 + }, + { + "epoch": 0.4273004874038405, + "grad_norm": 0.0, + "learning_rate": 1.2802474317268996e-05, + "loss": 1.3906, + "step": 14553 + }, + { + "epoch": 0.4273298490809795, + "grad_norm": 0.0, + "learning_rate": 1.2801561451312034e-05, + "loss": 1.3965, + "step": 14554 + }, + { + "epoch": 0.4273592107581185, + "grad_norm": 0.0, + "learning_rate": 1.2800648560019835e-05, + "loss": 1.2344, + "step": 14555 + }, + { + "epoch": 0.4273885724352575, + "grad_norm": 0.0, + "learning_rate": 1.2799735643400654e-05, + "loss": 1.3682, + "step": 14556 + }, + { + "epoch": 0.4274179341123965, + "grad_norm": 0.0, + "learning_rate": 1.2798822701462748e-05, + "loss": 1.3516, + "step": 14557 + }, + { + "epoch": 0.4274472957895355, + "grad_norm": 0.0, + "learning_rate": 1.2797909734214369e-05, + "loss": 1.4023, + "step": 14558 + }, + { + "epoch": 0.4274766574666745, + "grad_norm": 0.0, + "learning_rate": 1.2796996741663777e-05, + "loss": 1.3867, + "step": 14559 + }, + { + "epoch": 0.4275060191438135, + "grad_norm": 0.0, + "learning_rate": 1.2796083723819228e-05, + "loss": 1.2998, + "step": 14560 + }, + { + "epoch": 0.4275353808209525, + "grad_norm": 0.0, + "learning_rate": 1.2795170680688975e-05, + "loss": 1.3379, + "step": 14561 + }, + { + "epoch": 0.4275647424980915, + "grad_norm": 0.0, + "learning_rate": 1.279425761228128e-05, + "loss": 1.3945, + "step": 14562 + }, + { + "epoch": 0.42759410417523047, + "grad_norm": 0.0, + "learning_rate": 1.2793344518604398e-05, + "loss": 1.4307, + "step": 14563 + }, + { + "epoch": 0.4276234658523695, + "grad_norm": 0.0, + "learning_rate": 1.2792431399666585e-05, + "loss": 1.4326, + "step": 14564 + }, + { + "epoch": 0.4276528275295085, + "grad_norm": 0.0, + "learning_rate": 1.2791518255476101e-05, + "loss": 1.2881, + "step": 14565 + }, + { + "epoch": 0.42768218920664747, + "grad_norm": 0.0, + "learning_rate": 1.2790605086041201e-05, + "loss": 1.3887, + "step": 14566 + }, + { + "epoch": 0.4277115508837865, + "grad_norm": 0.0, + "learning_rate": 1.2789691891370144e-05, + "loss": 1.3193, + "step": 14567 + }, + { + "epoch": 0.4277409125609255, + "grad_norm": 0.0, + "learning_rate": 1.2788778671471191e-05, + "loss": 1.2773, + "step": 14568 + }, + { + "epoch": 0.42777027423806446, + "grad_norm": 0.0, + "learning_rate": 1.2787865426352597e-05, + "loss": 1.3374, + "step": 14569 + }, + { + "epoch": 0.4277996359152035, + "grad_norm": 0.0, + "learning_rate": 1.2786952156022624e-05, + "loss": 1.313, + "step": 14570 + }, + { + "epoch": 0.4278289975923425, + "grad_norm": 0.0, + "learning_rate": 1.2786038860489524e-05, + "loss": 1.1904, + "step": 14571 + }, + { + "epoch": 0.42785835926948146, + "grad_norm": 0.0, + "learning_rate": 1.2785125539761565e-05, + "loss": 1.2646, + "step": 14572 + }, + { + "epoch": 0.4278877209466205, + "grad_norm": 0.0, + "learning_rate": 1.2784212193847003e-05, + "loss": 1.373, + "step": 14573 + }, + { + "epoch": 0.4279170826237595, + "grad_norm": 0.0, + "learning_rate": 1.2783298822754096e-05, + "loss": 1.3887, + "step": 14574 + }, + { + "epoch": 0.42794644430089845, + "grad_norm": 0.0, + "learning_rate": 1.2782385426491105e-05, + "loss": 1.2891, + "step": 14575 + }, + { + "epoch": 0.42797580597803747, + "grad_norm": 0.0, + "learning_rate": 1.2781472005066287e-05, + "loss": 1.4648, + "step": 14576 + }, + { + "epoch": 0.4280051676551765, + "grad_norm": 0.0, + "learning_rate": 1.278055855848791e-05, + "loss": 1.3975, + "step": 14577 + }, + { + "epoch": 0.42803452933231545, + "grad_norm": 0.0, + "learning_rate": 1.2779645086764228e-05, + "loss": 1.1362, + "step": 14578 + }, + { + "epoch": 0.42806389100945447, + "grad_norm": 0.0, + "learning_rate": 1.27787315899035e-05, + "loss": 1.3848, + "step": 14579 + }, + { + "epoch": 0.4280932526865935, + "grad_norm": 0.0, + "learning_rate": 1.2777818067913996e-05, + "loss": 1.3486, + "step": 14580 + }, + { + "epoch": 0.42812261436373245, + "grad_norm": 0.0, + "learning_rate": 1.277690452080397e-05, + "loss": 1.2896, + "step": 14581 + }, + { + "epoch": 0.42815197604087146, + "grad_norm": 0.0, + "learning_rate": 1.2775990948581686e-05, + "loss": 1.2168, + "step": 14582 + }, + { + "epoch": 0.4281813377180105, + "grad_norm": 0.0, + "learning_rate": 1.2775077351255403e-05, + "loss": 1.4229, + "step": 14583 + }, + { + "epoch": 0.42821069939514944, + "grad_norm": 0.0, + "learning_rate": 1.2774163728833387e-05, + "loss": 1.5098, + "step": 14584 + }, + { + "epoch": 0.42824006107228846, + "grad_norm": 0.0, + "learning_rate": 1.2773250081323896e-05, + "loss": 1.3496, + "step": 14585 + }, + { + "epoch": 0.4282694227494274, + "grad_norm": 0.0, + "learning_rate": 1.2772336408735195e-05, + "loss": 1.2559, + "step": 14586 + }, + { + "epoch": 0.42829878442656644, + "grad_norm": 0.0, + "learning_rate": 1.2771422711075544e-05, + "loss": 1.4502, + "step": 14587 + }, + { + "epoch": 0.42832814610370545, + "grad_norm": 0.0, + "learning_rate": 1.277050898835321e-05, + "loss": 1.3057, + "step": 14588 + }, + { + "epoch": 0.4283575077808444, + "grad_norm": 0.0, + "learning_rate": 1.2769595240576453e-05, + "loss": 1.2344, + "step": 14589 + }, + { + "epoch": 0.42838686945798343, + "grad_norm": 0.0, + "learning_rate": 1.2768681467753537e-05, + "loss": 1.4053, + "step": 14590 + }, + { + "epoch": 0.42841623113512245, + "grad_norm": 0.0, + "learning_rate": 1.2767767669892725e-05, + "loss": 1.3154, + "step": 14591 + }, + { + "epoch": 0.4284455928122614, + "grad_norm": 0.0, + "learning_rate": 1.2766853847002282e-05, + "loss": 1.3906, + "step": 14592 + }, + { + "epoch": 0.42847495448940043, + "grad_norm": 0.0, + "learning_rate": 1.2765939999090473e-05, + "loss": 1.2832, + "step": 14593 + }, + { + "epoch": 0.42850431616653945, + "grad_norm": 0.0, + "learning_rate": 1.2765026126165556e-05, + "loss": 1.5068, + "step": 14594 + }, + { + "epoch": 0.4285336778436784, + "grad_norm": 0.0, + "learning_rate": 1.27641122282358e-05, + "loss": 1.3364, + "step": 14595 + }, + { + "epoch": 0.4285630395208174, + "grad_norm": 0.0, + "learning_rate": 1.2763198305309473e-05, + "loss": 1.4326, + "step": 14596 + }, + { + "epoch": 0.42859240119795644, + "grad_norm": 0.0, + "learning_rate": 1.2762284357394835e-05, + "loss": 1.4023, + "step": 14597 + }, + { + "epoch": 0.4286217628750954, + "grad_norm": 0.0, + "learning_rate": 1.276137038450015e-05, + "loss": 1.251, + "step": 14598 + }, + { + "epoch": 0.4286511245522344, + "grad_norm": 0.0, + "learning_rate": 1.2760456386633683e-05, + "loss": 1.207, + "step": 14599 + }, + { + "epoch": 0.42868048622937344, + "grad_norm": 0.0, + "learning_rate": 1.2759542363803706e-05, + "loss": 1.3652, + "step": 14600 + }, + { + "epoch": 0.4287098479065124, + "grad_norm": 0.0, + "learning_rate": 1.2758628316018482e-05, + "loss": 1.3916, + "step": 14601 + }, + { + "epoch": 0.4287392095836514, + "grad_norm": 0.0, + "learning_rate": 1.2757714243286275e-05, + "loss": 1.2539, + "step": 14602 + }, + { + "epoch": 0.42876857126079043, + "grad_norm": 0.0, + "learning_rate": 1.275680014561535e-05, + "loss": 1.3506, + "step": 14603 + }, + { + "epoch": 0.4287979329379294, + "grad_norm": 0.0, + "learning_rate": 1.2755886023013973e-05, + "loss": 1.3013, + "step": 14604 + }, + { + "epoch": 0.4288272946150684, + "grad_norm": 0.0, + "learning_rate": 1.2754971875490418e-05, + "loss": 1.3066, + "step": 14605 + }, + { + "epoch": 0.4288566562922074, + "grad_norm": 0.0, + "learning_rate": 1.2754057703052945e-05, + "loss": 1.2344, + "step": 14606 + }, + { + "epoch": 0.4288860179693464, + "grad_norm": 0.0, + "learning_rate": 1.275314350570982e-05, + "loss": 1.2271, + "step": 14607 + }, + { + "epoch": 0.4289153796464854, + "grad_norm": 0.0, + "learning_rate": 1.2752229283469316e-05, + "loss": 1.2393, + "step": 14608 + }, + { + "epoch": 0.4289447413236244, + "grad_norm": 0.0, + "learning_rate": 1.2751315036339698e-05, + "loss": 1.3594, + "step": 14609 + }, + { + "epoch": 0.4289741030007634, + "grad_norm": 0.0, + "learning_rate": 1.2750400764329232e-05, + "loss": 1.4365, + "step": 14610 + }, + { + "epoch": 0.4290034646779024, + "grad_norm": 0.0, + "learning_rate": 1.2749486467446186e-05, + "loss": 1.3457, + "step": 14611 + }, + { + "epoch": 0.4290328263550414, + "grad_norm": 0.0, + "learning_rate": 1.2748572145698832e-05, + "loss": 1.291, + "step": 14612 + }, + { + "epoch": 0.4290621880321804, + "grad_norm": 0.0, + "learning_rate": 1.2747657799095437e-05, + "loss": 1.3164, + "step": 14613 + }, + { + "epoch": 0.4290915497093194, + "grad_norm": 0.0, + "learning_rate": 1.2746743427644269e-05, + "loss": 1.4238, + "step": 14614 + }, + { + "epoch": 0.4291209113864584, + "grad_norm": 0.0, + "learning_rate": 1.2745829031353596e-05, + "loss": 1.4209, + "step": 14615 + }, + { + "epoch": 0.4291502730635974, + "grad_norm": 0.0, + "learning_rate": 1.2744914610231684e-05, + "loss": 1.3584, + "step": 14616 + }, + { + "epoch": 0.4291796347407364, + "grad_norm": 0.0, + "learning_rate": 1.2744000164286811e-05, + "loss": 1.4043, + "step": 14617 + }, + { + "epoch": 0.4292089964178754, + "grad_norm": 0.0, + "learning_rate": 1.2743085693527242e-05, + "loss": 1.333, + "step": 14618 + }, + { + "epoch": 0.42923835809501437, + "grad_norm": 0.0, + "learning_rate": 1.2742171197961245e-05, + "loss": 1.3281, + "step": 14619 + }, + { + "epoch": 0.4292677197721534, + "grad_norm": 0.0, + "learning_rate": 1.2741256677597089e-05, + "loss": 1.3555, + "step": 14620 + }, + { + "epoch": 0.4292970814492924, + "grad_norm": 0.0, + "learning_rate": 1.2740342132443051e-05, + "loss": 1.4062, + "step": 14621 + }, + { + "epoch": 0.42932644312643137, + "grad_norm": 0.0, + "learning_rate": 1.2739427562507397e-05, + "loss": 1.3457, + "step": 14622 + }, + { + "epoch": 0.4293558048035704, + "grad_norm": 0.0, + "learning_rate": 1.2738512967798395e-05, + "loss": 1.3896, + "step": 14623 + }, + { + "epoch": 0.4293851664807094, + "grad_norm": 0.0, + "learning_rate": 1.273759834832432e-05, + "loss": 1.3394, + "step": 14624 + }, + { + "epoch": 0.42941452815784836, + "grad_norm": 0.0, + "learning_rate": 1.2736683704093442e-05, + "loss": 1.2837, + "step": 14625 + }, + { + "epoch": 0.4294438898349874, + "grad_norm": 0.0, + "learning_rate": 1.2735769035114033e-05, + "loss": 1.2539, + "step": 14626 + }, + { + "epoch": 0.4294732515121264, + "grad_norm": 0.0, + "learning_rate": 1.2734854341394364e-05, + "loss": 1.1445, + "step": 14627 + }, + { + "epoch": 0.42950261318926536, + "grad_norm": 0.0, + "learning_rate": 1.2733939622942705e-05, + "loss": 1.3008, + "step": 14628 + }, + { + "epoch": 0.4295319748664044, + "grad_norm": 0.0, + "learning_rate": 1.2733024879767332e-05, + "loss": 1.4121, + "step": 14629 + }, + { + "epoch": 0.4295613365435434, + "grad_norm": 0.0, + "learning_rate": 1.2732110111876517e-05, + "loss": 1.1826, + "step": 14630 + }, + { + "epoch": 0.42959069822068235, + "grad_norm": 0.0, + "learning_rate": 1.2731195319278526e-05, + "loss": 1.3945, + "step": 14631 + }, + { + "epoch": 0.42962005989782137, + "grad_norm": 0.0, + "learning_rate": 1.273028050198164e-05, + "loss": 1.2241, + "step": 14632 + }, + { + "epoch": 0.4296494215749604, + "grad_norm": 0.0, + "learning_rate": 1.2729365659994127e-05, + "loss": 1.3574, + "step": 14633 + }, + { + "epoch": 0.42967878325209935, + "grad_norm": 0.0, + "learning_rate": 1.272845079332426e-05, + "loss": 1.3975, + "step": 14634 + }, + { + "epoch": 0.42970814492923837, + "grad_norm": 0.0, + "learning_rate": 1.2727535901980314e-05, + "loss": 1.3418, + "step": 14635 + }, + { + "epoch": 0.42973750660637733, + "grad_norm": 0.0, + "learning_rate": 1.2726620985970563e-05, + "loss": 1.3301, + "step": 14636 + }, + { + "epoch": 0.42976686828351635, + "grad_norm": 0.0, + "learning_rate": 1.2725706045303278e-05, + "loss": 1.2939, + "step": 14637 + }, + { + "epoch": 0.42979622996065536, + "grad_norm": 0.0, + "learning_rate": 1.272479107998674e-05, + "loss": 1.333, + "step": 14638 + }, + { + "epoch": 0.4298255916377943, + "grad_norm": 0.0, + "learning_rate": 1.2723876090029214e-05, + "loss": 1.2676, + "step": 14639 + }, + { + "epoch": 0.42985495331493334, + "grad_norm": 0.0, + "learning_rate": 1.2722961075438982e-05, + "loss": 1.4121, + "step": 14640 + }, + { + "epoch": 0.42988431499207236, + "grad_norm": 0.0, + "learning_rate": 1.2722046036224313e-05, + "loss": 1.3936, + "step": 14641 + }, + { + "epoch": 0.4299136766692113, + "grad_norm": 0.0, + "learning_rate": 1.2721130972393488e-05, + "loss": 1.459, + "step": 14642 + }, + { + "epoch": 0.42994303834635034, + "grad_norm": 0.0, + "learning_rate": 1.2720215883954776e-05, + "loss": 1.3242, + "step": 14643 + }, + { + "epoch": 0.42997240002348935, + "grad_norm": 0.0, + "learning_rate": 1.2719300770916454e-05, + "loss": 1.1494, + "step": 14644 + }, + { + "epoch": 0.4300017617006283, + "grad_norm": 0.0, + "learning_rate": 1.2718385633286801e-05, + "loss": 1.3477, + "step": 14645 + }, + { + "epoch": 0.43003112337776733, + "grad_norm": 0.0, + "learning_rate": 1.2717470471074092e-05, + "loss": 1.2617, + "step": 14646 + }, + { + "epoch": 0.43006048505490635, + "grad_norm": 0.0, + "learning_rate": 1.2716555284286598e-05, + "loss": 1.4365, + "step": 14647 + }, + { + "epoch": 0.4300898467320453, + "grad_norm": 0.0, + "learning_rate": 1.27156400729326e-05, + "loss": 1.3535, + "step": 14648 + }, + { + "epoch": 0.43011920840918433, + "grad_norm": 0.0, + "learning_rate": 1.2714724837020372e-05, + "loss": 1.3682, + "step": 14649 + }, + { + "epoch": 0.43014857008632335, + "grad_norm": 0.0, + "learning_rate": 1.2713809576558193e-05, + "loss": 1.334, + "step": 14650 + }, + { + "epoch": 0.4301779317634623, + "grad_norm": 0.0, + "learning_rate": 1.2712894291554343e-05, + "loss": 1.2842, + "step": 14651 + }, + { + "epoch": 0.4302072934406013, + "grad_norm": 0.0, + "learning_rate": 1.271197898201709e-05, + "loss": 1.3242, + "step": 14652 + }, + { + "epoch": 0.43023665511774034, + "grad_norm": 0.0, + "learning_rate": 1.2711063647954717e-05, + "loss": 1.4531, + "step": 14653 + }, + { + "epoch": 0.4302660167948793, + "grad_norm": 0.0, + "learning_rate": 1.2710148289375503e-05, + "loss": 1.3779, + "step": 14654 + }, + { + "epoch": 0.4302953784720183, + "grad_norm": 0.0, + "learning_rate": 1.2709232906287724e-05, + "loss": 1.3809, + "step": 14655 + }, + { + "epoch": 0.43032474014915734, + "grad_norm": 0.0, + "learning_rate": 1.2708317498699657e-05, + "loss": 1.4785, + "step": 14656 + }, + { + "epoch": 0.4303541018262963, + "grad_norm": 0.0, + "learning_rate": 1.270740206661958e-05, + "loss": 1.3721, + "step": 14657 + }, + { + "epoch": 0.4303834635034353, + "grad_norm": 0.0, + "learning_rate": 1.2706486610055778e-05, + "loss": 1.3252, + "step": 14658 + }, + { + "epoch": 0.43041282518057433, + "grad_norm": 0.0, + "learning_rate": 1.2705571129016518e-05, + "loss": 1.375, + "step": 14659 + }, + { + "epoch": 0.4304421868577133, + "grad_norm": 0.0, + "learning_rate": 1.2704655623510088e-05, + "loss": 1.3037, + "step": 14660 + }, + { + "epoch": 0.4304715485348523, + "grad_norm": 0.0, + "learning_rate": 1.2703740093544765e-05, + "loss": 1.3232, + "step": 14661 + }, + { + "epoch": 0.43050091021199133, + "grad_norm": 0.0, + "learning_rate": 1.2702824539128828e-05, + "loss": 1.2441, + "step": 14662 + }, + { + "epoch": 0.4305302718891303, + "grad_norm": 0.0, + "learning_rate": 1.2701908960270557e-05, + "loss": 1.376, + "step": 14663 + }, + { + "epoch": 0.4305596335662693, + "grad_norm": 0.0, + "learning_rate": 1.270099335697823e-05, + "loss": 1.4521, + "step": 14664 + }, + { + "epoch": 0.4305889952434083, + "grad_norm": 0.0, + "learning_rate": 1.2700077729260127e-05, + "loss": 1.4453, + "step": 14665 + }, + { + "epoch": 0.4306183569205473, + "grad_norm": 0.0, + "learning_rate": 1.2699162077124531e-05, + "loss": 1.3945, + "step": 14666 + }, + { + "epoch": 0.4306477185976863, + "grad_norm": 0.0, + "learning_rate": 1.2698246400579723e-05, + "loss": 1.4941, + "step": 14667 + }, + { + "epoch": 0.4306770802748253, + "grad_norm": 0.0, + "learning_rate": 1.2697330699633978e-05, + "loss": 1.3711, + "step": 14668 + }, + { + "epoch": 0.4307064419519643, + "grad_norm": 0.0, + "learning_rate": 1.2696414974295583e-05, + "loss": 1.4307, + "step": 14669 + }, + { + "epoch": 0.4307358036291033, + "grad_norm": 0.0, + "learning_rate": 1.2695499224572818e-05, + "loss": 1.3115, + "step": 14670 + }, + { + "epoch": 0.4307651653062423, + "grad_norm": 0.0, + "learning_rate": 1.2694583450473964e-05, + "loss": 1.4492, + "step": 14671 + }, + { + "epoch": 0.4307945269833813, + "grad_norm": 0.0, + "learning_rate": 1.2693667652007297e-05, + "loss": 1.291, + "step": 14672 + }, + { + "epoch": 0.4308238886605203, + "grad_norm": 0.0, + "learning_rate": 1.2692751829181107e-05, + "loss": 1.2378, + "step": 14673 + }, + { + "epoch": 0.4308532503376593, + "grad_norm": 0.0, + "learning_rate": 1.2691835982003672e-05, + "loss": 1.3984, + "step": 14674 + }, + { + "epoch": 0.43088261201479827, + "grad_norm": 0.0, + "learning_rate": 1.2690920110483273e-05, + "loss": 1.5029, + "step": 14675 + }, + { + "epoch": 0.4309119736919373, + "grad_norm": 0.0, + "learning_rate": 1.2690004214628196e-05, + "loss": 1.249, + "step": 14676 + }, + { + "epoch": 0.4309413353690763, + "grad_norm": 0.0, + "learning_rate": 1.268908829444672e-05, + "loss": 1.4092, + "step": 14677 + }, + { + "epoch": 0.43097069704621527, + "grad_norm": 0.0, + "learning_rate": 1.2688172349947132e-05, + "loss": 1.4189, + "step": 14678 + }, + { + "epoch": 0.4310000587233543, + "grad_norm": 0.0, + "learning_rate": 1.2687256381137714e-05, + "loss": 1.4805, + "step": 14679 + }, + { + "epoch": 0.4310294204004933, + "grad_norm": 0.0, + "learning_rate": 1.2686340388026745e-05, + "loss": 1.3174, + "step": 14680 + }, + { + "epoch": 0.43105878207763226, + "grad_norm": 0.0, + "learning_rate": 1.2685424370622517e-05, + "loss": 1.3887, + "step": 14681 + }, + { + "epoch": 0.4310881437547713, + "grad_norm": 0.0, + "learning_rate": 1.2684508328933304e-05, + "loss": 1.3857, + "step": 14682 + }, + { + "epoch": 0.4311175054319103, + "grad_norm": 0.0, + "learning_rate": 1.2683592262967399e-05, + "loss": 1.3779, + "step": 14683 + }, + { + "epoch": 0.43114686710904926, + "grad_norm": 0.0, + "learning_rate": 1.2682676172733077e-05, + "loss": 1.3379, + "step": 14684 + }, + { + "epoch": 0.4311762287861883, + "grad_norm": 0.0, + "learning_rate": 1.2681760058238628e-05, + "loss": 1.1689, + "step": 14685 + }, + { + "epoch": 0.4312055904633273, + "grad_norm": 0.0, + "learning_rate": 1.2680843919492338e-05, + "loss": 1.418, + "step": 14686 + }, + { + "epoch": 0.43123495214046625, + "grad_norm": 0.0, + "learning_rate": 1.267992775650249e-05, + "loss": 1.3115, + "step": 14687 + }, + { + "epoch": 0.43126431381760527, + "grad_norm": 0.0, + "learning_rate": 1.2679011569277368e-05, + "loss": 1.3408, + "step": 14688 + }, + { + "epoch": 0.43129367549474423, + "grad_norm": 0.0, + "learning_rate": 1.2678095357825258e-05, + "loss": 1.3125, + "step": 14689 + }, + { + "epoch": 0.43132303717188325, + "grad_norm": 0.0, + "learning_rate": 1.2677179122154444e-05, + "loss": 1.2656, + "step": 14690 + }, + { + "epoch": 0.43135239884902227, + "grad_norm": 0.0, + "learning_rate": 1.2676262862273218e-05, + "loss": 1.3281, + "step": 14691 + }, + { + "epoch": 0.43138176052616123, + "grad_norm": 0.0, + "learning_rate": 1.267534657818986e-05, + "loss": 1.3662, + "step": 14692 + }, + { + "epoch": 0.43141112220330025, + "grad_norm": 0.0, + "learning_rate": 1.2674430269912657e-05, + "loss": 1.3838, + "step": 14693 + }, + { + "epoch": 0.43144048388043926, + "grad_norm": 0.0, + "learning_rate": 1.2673513937449895e-05, + "loss": 1.2754, + "step": 14694 + }, + { + "epoch": 0.4314698455575782, + "grad_norm": 0.0, + "learning_rate": 1.2672597580809865e-05, + "loss": 1.3867, + "step": 14695 + }, + { + "epoch": 0.43149920723471724, + "grad_norm": 0.0, + "learning_rate": 1.267168120000085e-05, + "loss": 1.416, + "step": 14696 + }, + { + "epoch": 0.43152856891185626, + "grad_norm": 0.0, + "learning_rate": 1.2670764795031134e-05, + "loss": 1.3477, + "step": 14697 + }, + { + "epoch": 0.4315579305889952, + "grad_norm": 0.0, + "learning_rate": 1.266984836590901e-05, + "loss": 1.3564, + "step": 14698 + }, + { + "epoch": 0.43158729226613424, + "grad_norm": 0.0, + "learning_rate": 1.2668931912642764e-05, + "loss": 1.3359, + "step": 14699 + }, + { + "epoch": 0.43161665394327325, + "grad_norm": 0.0, + "learning_rate": 1.2668015435240681e-05, + "loss": 1.3887, + "step": 14700 + }, + { + "epoch": 0.4316460156204122, + "grad_norm": 0.0, + "learning_rate": 1.2667098933711056e-05, + "loss": 1.335, + "step": 14701 + }, + { + "epoch": 0.43167537729755123, + "grad_norm": 0.0, + "learning_rate": 1.266618240806217e-05, + "loss": 1.3232, + "step": 14702 + }, + { + "epoch": 0.43170473897469025, + "grad_norm": 0.0, + "learning_rate": 1.2665265858302312e-05, + "loss": 1.377, + "step": 14703 + }, + { + "epoch": 0.4317341006518292, + "grad_norm": 0.0, + "learning_rate": 1.2664349284439772e-05, + "loss": 1.3008, + "step": 14704 + }, + { + "epoch": 0.43176346232896823, + "grad_norm": 0.0, + "learning_rate": 1.2663432686482842e-05, + "loss": 1.25, + "step": 14705 + }, + { + "epoch": 0.43179282400610725, + "grad_norm": 0.0, + "learning_rate": 1.2662516064439805e-05, + "loss": 1.2612, + "step": 14706 + }, + { + "epoch": 0.4318221856832462, + "grad_norm": 0.0, + "learning_rate": 1.2661599418318956e-05, + "loss": 1.3701, + "step": 14707 + }, + { + "epoch": 0.4318515473603852, + "grad_norm": 0.0, + "learning_rate": 1.2660682748128579e-05, + "loss": 1.3481, + "step": 14708 + }, + { + "epoch": 0.43188090903752424, + "grad_norm": 0.0, + "learning_rate": 1.2659766053876968e-05, + "loss": 1.144, + "step": 14709 + }, + { + "epoch": 0.4319102707146632, + "grad_norm": 0.0, + "learning_rate": 1.2658849335572411e-05, + "loss": 1.2466, + "step": 14710 + }, + { + "epoch": 0.4319396323918022, + "grad_norm": 0.0, + "learning_rate": 1.2657932593223201e-05, + "loss": 1.4219, + "step": 14711 + }, + { + "epoch": 0.43196899406894124, + "grad_norm": 0.0, + "learning_rate": 1.2657015826837624e-05, + "loss": 1.2998, + "step": 14712 + }, + { + "epoch": 0.4319983557460802, + "grad_norm": 0.0, + "learning_rate": 1.2656099036423971e-05, + "loss": 1.3882, + "step": 14713 + }, + { + "epoch": 0.4320277174232192, + "grad_norm": 0.0, + "learning_rate": 1.2655182221990537e-05, + "loss": 1.3789, + "step": 14714 + }, + { + "epoch": 0.43205707910035823, + "grad_norm": 0.0, + "learning_rate": 1.2654265383545608e-05, + "loss": 1.2612, + "step": 14715 + }, + { + "epoch": 0.4320864407774972, + "grad_norm": 0.0, + "learning_rate": 1.2653348521097477e-05, + "loss": 1.3438, + "step": 14716 + }, + { + "epoch": 0.4321158024546362, + "grad_norm": 0.0, + "learning_rate": 1.2652431634654436e-05, + "loss": 1.4229, + "step": 14717 + }, + { + "epoch": 0.43214516413177523, + "grad_norm": 0.0, + "learning_rate": 1.2651514724224777e-05, + "loss": 1.1895, + "step": 14718 + }, + { + "epoch": 0.4321745258089142, + "grad_norm": 0.0, + "learning_rate": 1.2650597789816792e-05, + "loss": 1.3477, + "step": 14719 + }, + { + "epoch": 0.4322038874860532, + "grad_norm": 0.0, + "learning_rate": 1.2649680831438773e-05, + "loss": 1.3203, + "step": 14720 + }, + { + "epoch": 0.4322332491631922, + "grad_norm": 0.0, + "learning_rate": 1.264876384909901e-05, + "loss": 1.4805, + "step": 14721 + }, + { + "epoch": 0.4322626108403312, + "grad_norm": 0.0, + "learning_rate": 1.2647846842805797e-05, + "loss": 1.2959, + "step": 14722 + }, + { + "epoch": 0.4322919725174702, + "grad_norm": 0.0, + "learning_rate": 1.2646929812567428e-05, + "loss": 1.3516, + "step": 14723 + }, + { + "epoch": 0.4323213341946092, + "grad_norm": 0.0, + "learning_rate": 1.2646012758392195e-05, + "loss": 1.4443, + "step": 14724 + }, + { + "epoch": 0.4323506958717482, + "grad_norm": 0.0, + "learning_rate": 1.2645095680288391e-05, + "loss": 1.3086, + "step": 14725 + }, + { + "epoch": 0.4323800575488872, + "grad_norm": 0.0, + "learning_rate": 1.2644178578264307e-05, + "loss": 1.3018, + "step": 14726 + }, + { + "epoch": 0.4324094192260262, + "grad_norm": 0.0, + "learning_rate": 1.264326145232824e-05, + "loss": 1.3828, + "step": 14727 + }, + { + "epoch": 0.4324387809031652, + "grad_norm": 0.0, + "learning_rate": 1.2642344302488486e-05, + "loss": 1.4219, + "step": 14728 + }, + { + "epoch": 0.4324681425803042, + "grad_norm": 0.0, + "learning_rate": 1.2641427128753332e-05, + "loss": 1.2256, + "step": 14729 + }, + { + "epoch": 0.4324975042574432, + "grad_norm": 0.0, + "learning_rate": 1.2640509931131078e-05, + "loss": 1.3887, + "step": 14730 + }, + { + "epoch": 0.43252686593458217, + "grad_norm": 0.0, + "learning_rate": 1.2639592709630014e-05, + "loss": 1.3223, + "step": 14731 + }, + { + "epoch": 0.4325562276117212, + "grad_norm": 0.0, + "learning_rate": 1.2638675464258441e-05, + "loss": 1.334, + "step": 14732 + }, + { + "epoch": 0.4325855892888602, + "grad_norm": 0.0, + "learning_rate": 1.2637758195024645e-05, + "loss": 1.229, + "step": 14733 + }, + { + "epoch": 0.43261495096599917, + "grad_norm": 0.0, + "learning_rate": 1.2636840901936928e-05, + "loss": 1.3721, + "step": 14734 + }, + { + "epoch": 0.4326443126431382, + "grad_norm": 0.0, + "learning_rate": 1.2635923585003585e-05, + "loss": 1.4434, + "step": 14735 + }, + { + "epoch": 0.4326736743202772, + "grad_norm": 0.0, + "learning_rate": 1.263500624423291e-05, + "loss": 1.3252, + "step": 14736 + }, + { + "epoch": 0.43270303599741616, + "grad_norm": 0.0, + "learning_rate": 1.2634088879633198e-05, + "loss": 1.3818, + "step": 14737 + }, + { + "epoch": 0.4327323976745552, + "grad_norm": 0.0, + "learning_rate": 1.2633171491212743e-05, + "loss": 1.3223, + "step": 14738 + }, + { + "epoch": 0.43276175935169414, + "grad_norm": 0.0, + "learning_rate": 1.2632254078979846e-05, + "loss": 1.2012, + "step": 14739 + }, + { + "epoch": 0.43279112102883316, + "grad_norm": 0.0, + "learning_rate": 1.2631336642942804e-05, + "loss": 1.4727, + "step": 14740 + }, + { + "epoch": 0.4328204827059722, + "grad_norm": 0.0, + "learning_rate": 1.263041918310991e-05, + "loss": 1.3184, + "step": 14741 + }, + { + "epoch": 0.43284984438311114, + "grad_norm": 0.0, + "learning_rate": 1.262950169948946e-05, + "loss": 1.3428, + "step": 14742 + }, + { + "epoch": 0.43287920606025015, + "grad_norm": 0.0, + "learning_rate": 1.262858419208975e-05, + "loss": 1.3477, + "step": 14743 + }, + { + "epoch": 0.43290856773738917, + "grad_norm": 0.0, + "learning_rate": 1.2627666660919082e-05, + "loss": 1.3389, + "step": 14744 + }, + { + "epoch": 0.43293792941452813, + "grad_norm": 0.0, + "learning_rate": 1.2626749105985754e-05, + "loss": 1.4111, + "step": 14745 + }, + { + "epoch": 0.43296729109166715, + "grad_norm": 0.0, + "learning_rate": 1.2625831527298057e-05, + "loss": 1.4072, + "step": 14746 + }, + { + "epoch": 0.43299665276880617, + "grad_norm": 0.0, + "learning_rate": 1.2624913924864295e-05, + "loss": 1.2563, + "step": 14747 + }, + { + "epoch": 0.43302601444594513, + "grad_norm": 0.0, + "learning_rate": 1.2623996298692766e-05, + "loss": 1.4541, + "step": 14748 + }, + { + "epoch": 0.43305537612308415, + "grad_norm": 0.0, + "learning_rate": 1.2623078648791765e-05, + "loss": 1.3047, + "step": 14749 + }, + { + "epoch": 0.43308473780022316, + "grad_norm": 0.0, + "learning_rate": 1.2622160975169593e-05, + "loss": 1.4307, + "step": 14750 + }, + { + "epoch": 0.4331140994773621, + "grad_norm": 0.0, + "learning_rate": 1.2621243277834547e-05, + "loss": 1.2549, + "step": 14751 + }, + { + "epoch": 0.43314346115450114, + "grad_norm": 0.0, + "learning_rate": 1.2620325556794923e-05, + "loss": 1.3809, + "step": 14752 + }, + { + "epoch": 0.43317282283164016, + "grad_norm": 0.0, + "learning_rate": 1.261940781205903e-05, + "loss": 1.2588, + "step": 14753 + }, + { + "epoch": 0.4332021845087791, + "grad_norm": 0.0, + "learning_rate": 1.2618490043635159e-05, + "loss": 1.3574, + "step": 14754 + }, + { + "epoch": 0.43323154618591814, + "grad_norm": 0.0, + "learning_rate": 1.2617572251531608e-05, + "loss": 1.4473, + "step": 14755 + }, + { + "epoch": 0.43326090786305715, + "grad_norm": 0.0, + "learning_rate": 1.2616654435756686e-05, + "loss": 1.3335, + "step": 14756 + }, + { + "epoch": 0.4332902695401961, + "grad_norm": 0.0, + "learning_rate": 1.2615736596318686e-05, + "loss": 1.3799, + "step": 14757 + }, + { + "epoch": 0.43331963121733513, + "grad_norm": 0.0, + "learning_rate": 1.2614818733225911e-05, + "loss": 1.3242, + "step": 14758 + }, + { + "epoch": 0.43334899289447415, + "grad_norm": 0.0, + "learning_rate": 1.2613900846486658e-05, + "loss": 1.3926, + "step": 14759 + }, + { + "epoch": 0.4333783545716131, + "grad_norm": 0.0, + "learning_rate": 1.2612982936109233e-05, + "loss": 1.3662, + "step": 14760 + }, + { + "epoch": 0.43340771624875213, + "grad_norm": 0.0, + "learning_rate": 1.2612065002101934e-05, + "loss": 1.3467, + "step": 14761 + }, + { + "epoch": 0.43343707792589115, + "grad_norm": 0.0, + "learning_rate": 1.261114704447306e-05, + "loss": 1.3359, + "step": 14762 + }, + { + "epoch": 0.4334664396030301, + "grad_norm": 0.0, + "learning_rate": 1.2610229063230917e-05, + "loss": 1.3867, + "step": 14763 + }, + { + "epoch": 0.4334958012801691, + "grad_norm": 0.0, + "learning_rate": 1.2609311058383802e-05, + "loss": 1.3027, + "step": 14764 + }, + { + "epoch": 0.43352516295730814, + "grad_norm": 0.0, + "learning_rate": 1.260839302994002e-05, + "loss": 1.376, + "step": 14765 + }, + { + "epoch": 0.4335545246344471, + "grad_norm": 0.0, + "learning_rate": 1.2607474977907872e-05, + "loss": 1.3545, + "step": 14766 + }, + { + "epoch": 0.4335838863115861, + "grad_norm": 0.0, + "learning_rate": 1.2606556902295659e-05, + "loss": 1.1865, + "step": 14767 + }, + { + "epoch": 0.43361324798872514, + "grad_norm": 0.0, + "learning_rate": 1.2605638803111684e-05, + "loss": 1.2422, + "step": 14768 + }, + { + "epoch": 0.4336426096658641, + "grad_norm": 0.0, + "learning_rate": 1.260472068036425e-05, + "loss": 1.4717, + "step": 14769 + }, + { + "epoch": 0.4336719713430031, + "grad_norm": 0.0, + "learning_rate": 1.2603802534061664e-05, + "loss": 1.1025, + "step": 14770 + }, + { + "epoch": 0.43370133302014213, + "grad_norm": 0.0, + "learning_rate": 1.260288436421222e-05, + "loss": 1.25, + "step": 14771 + }, + { + "epoch": 0.4337306946972811, + "grad_norm": 0.0, + "learning_rate": 1.2601966170824227e-05, + "loss": 1.4199, + "step": 14772 + }, + { + "epoch": 0.4337600563744201, + "grad_norm": 0.0, + "learning_rate": 1.260104795390599e-05, + "loss": 1.2539, + "step": 14773 + }, + { + "epoch": 0.43378941805155913, + "grad_norm": 0.0, + "learning_rate": 1.2600129713465807e-05, + "loss": 1.335, + "step": 14774 + }, + { + "epoch": 0.4338187797286981, + "grad_norm": 0.0, + "learning_rate": 1.2599211449511986e-05, + "loss": 1.3223, + "step": 14775 + }, + { + "epoch": 0.4338481414058371, + "grad_norm": 0.0, + "learning_rate": 1.259829316205283e-05, + "loss": 1.2729, + "step": 14776 + }, + { + "epoch": 0.4338775030829761, + "grad_norm": 0.0, + "learning_rate": 1.2597374851096645e-05, + "loss": 1.3086, + "step": 14777 + }, + { + "epoch": 0.4339068647601151, + "grad_norm": 0.0, + "learning_rate": 1.2596456516651734e-05, + "loss": 1.4043, + "step": 14778 + }, + { + "epoch": 0.4339362264372541, + "grad_norm": 0.0, + "learning_rate": 1.25955381587264e-05, + "loss": 1.3184, + "step": 14779 + }, + { + "epoch": 0.4339655881143931, + "grad_norm": 0.0, + "learning_rate": 1.2594619777328951e-05, + "loss": 1.3662, + "step": 14780 + }, + { + "epoch": 0.4339949497915321, + "grad_norm": 0.0, + "learning_rate": 1.259370137246769e-05, + "loss": 1.4014, + "step": 14781 + }, + { + "epoch": 0.4340243114686711, + "grad_norm": 0.0, + "learning_rate": 1.2592782944150926e-05, + "loss": 1.3584, + "step": 14782 + }, + { + "epoch": 0.4340536731458101, + "grad_norm": 0.0, + "learning_rate": 1.2591864492386958e-05, + "loss": 1.2588, + "step": 14783 + }, + { + "epoch": 0.4340830348229491, + "grad_norm": 0.0, + "learning_rate": 1.2590946017184095e-05, + "loss": 1.4355, + "step": 14784 + }, + { + "epoch": 0.4341123965000881, + "grad_norm": 0.0, + "learning_rate": 1.2590027518550647e-05, + "loss": 1.3672, + "step": 14785 + }, + { + "epoch": 0.4341417581772271, + "grad_norm": 0.0, + "learning_rate": 1.2589108996494916e-05, + "loss": 1.2334, + "step": 14786 + }, + { + "epoch": 0.43417111985436607, + "grad_norm": 0.0, + "learning_rate": 1.2588190451025209e-05, + "loss": 1.3389, + "step": 14787 + }, + { + "epoch": 0.4342004815315051, + "grad_norm": 0.0, + "learning_rate": 1.2587271882149832e-05, + "loss": 1.1597, + "step": 14788 + }, + { + "epoch": 0.43422984320864405, + "grad_norm": 0.0, + "learning_rate": 1.2586353289877096e-05, + "loss": 1.4482, + "step": 14789 + }, + { + "epoch": 0.43425920488578307, + "grad_norm": 0.0, + "learning_rate": 1.2585434674215301e-05, + "loss": 1.3818, + "step": 14790 + }, + { + "epoch": 0.4342885665629221, + "grad_norm": 0.0, + "learning_rate": 1.258451603517276e-05, + "loss": 1.3438, + "step": 14791 + }, + { + "epoch": 0.43431792824006105, + "grad_norm": 0.0, + "learning_rate": 1.2583597372757776e-05, + "loss": 1.3037, + "step": 14792 + }, + { + "epoch": 0.43434728991720006, + "grad_norm": 0.0, + "learning_rate": 1.2582678686978663e-05, + "loss": 1.4502, + "step": 14793 + }, + { + "epoch": 0.4343766515943391, + "grad_norm": 0.0, + "learning_rate": 1.2581759977843723e-05, + "loss": 1.3008, + "step": 14794 + }, + { + "epoch": 0.43440601327147804, + "grad_norm": 0.0, + "learning_rate": 1.2580841245361266e-05, + "loss": 1.2842, + "step": 14795 + }, + { + "epoch": 0.43443537494861706, + "grad_norm": 0.0, + "learning_rate": 1.2579922489539602e-05, + "loss": 1.3477, + "step": 14796 + }, + { + "epoch": 0.4344647366257561, + "grad_norm": 0.0, + "learning_rate": 1.2579003710387038e-05, + "loss": 1.2725, + "step": 14797 + }, + { + "epoch": 0.43449409830289504, + "grad_norm": 0.0, + "learning_rate": 1.2578084907911882e-05, + "loss": 1.3877, + "step": 14798 + }, + { + "epoch": 0.43452345998003405, + "grad_norm": 0.0, + "learning_rate": 1.2577166082122447e-05, + "loss": 1.2773, + "step": 14799 + }, + { + "epoch": 0.43455282165717307, + "grad_norm": 0.0, + "learning_rate": 1.2576247233027034e-05, + "loss": 1.3301, + "step": 14800 + }, + { + "epoch": 0.43458218333431203, + "grad_norm": 0.0, + "learning_rate": 1.257532836063396e-05, + "loss": 1.3076, + "step": 14801 + }, + { + "epoch": 0.43461154501145105, + "grad_norm": 0.0, + "learning_rate": 1.2574409464951534e-05, + "loss": 1.2598, + "step": 14802 + }, + { + "epoch": 0.43464090668859007, + "grad_norm": 0.0, + "learning_rate": 1.2573490545988061e-05, + "loss": 1.4121, + "step": 14803 + }, + { + "epoch": 0.43467026836572903, + "grad_norm": 0.0, + "learning_rate": 1.2572571603751855e-05, + "loss": 1.459, + "step": 14804 + }, + { + "epoch": 0.43469963004286805, + "grad_norm": 0.0, + "learning_rate": 1.2571652638251224e-05, + "loss": 1.3828, + "step": 14805 + }, + { + "epoch": 0.43472899172000706, + "grad_norm": 0.0, + "learning_rate": 1.2570733649494483e-05, + "loss": 1.4824, + "step": 14806 + }, + { + "epoch": 0.434758353397146, + "grad_norm": 0.0, + "learning_rate": 1.2569814637489937e-05, + "loss": 1.2715, + "step": 14807 + }, + { + "epoch": 0.43478771507428504, + "grad_norm": 0.0, + "learning_rate": 1.2568895602245896e-05, + "loss": 1.4375, + "step": 14808 + }, + { + "epoch": 0.43481707675142406, + "grad_norm": 0.0, + "learning_rate": 1.256797654377068e-05, + "loss": 1.2852, + "step": 14809 + }, + { + "epoch": 0.434846438428563, + "grad_norm": 0.0, + "learning_rate": 1.2567057462072595e-05, + "loss": 1.3271, + "step": 14810 + }, + { + "epoch": 0.43487580010570204, + "grad_norm": 0.0, + "learning_rate": 1.2566138357159947e-05, + "loss": 1.2949, + "step": 14811 + }, + { + "epoch": 0.43490516178284105, + "grad_norm": 0.0, + "learning_rate": 1.2565219229041053e-05, + "loss": 1.3398, + "step": 14812 + }, + { + "epoch": 0.43493452345998, + "grad_norm": 0.0, + "learning_rate": 1.2564300077724225e-05, + "loss": 1.4531, + "step": 14813 + }, + { + "epoch": 0.43496388513711903, + "grad_norm": 0.0, + "learning_rate": 1.2563380903217777e-05, + "loss": 1.3027, + "step": 14814 + }, + { + "epoch": 0.43499324681425805, + "grad_norm": 0.0, + "learning_rate": 1.2562461705530016e-05, + "loss": 1.4121, + "step": 14815 + }, + { + "epoch": 0.435022608491397, + "grad_norm": 0.0, + "learning_rate": 1.2561542484669257e-05, + "loss": 1.4375, + "step": 14816 + }, + { + "epoch": 0.43505197016853603, + "grad_norm": 0.0, + "learning_rate": 1.2560623240643816e-05, + "loss": 1.2842, + "step": 14817 + }, + { + "epoch": 0.43508133184567505, + "grad_norm": 0.0, + "learning_rate": 1.2559703973462006e-05, + "loss": 1.4404, + "step": 14818 + }, + { + "epoch": 0.435110693522814, + "grad_norm": 0.0, + "learning_rate": 1.2558784683132135e-05, + "loss": 1.3159, + "step": 14819 + }, + { + "epoch": 0.435140055199953, + "grad_norm": 0.0, + "learning_rate": 1.2557865369662517e-05, + "loss": 1.4082, + "step": 14820 + }, + { + "epoch": 0.43516941687709204, + "grad_norm": 0.0, + "learning_rate": 1.2556946033061465e-05, + "loss": 1.3828, + "step": 14821 + }, + { + "epoch": 0.435198778554231, + "grad_norm": 0.0, + "learning_rate": 1.2556026673337302e-05, + "loss": 1.3799, + "step": 14822 + }, + { + "epoch": 0.43522814023137, + "grad_norm": 0.0, + "learning_rate": 1.255510729049833e-05, + "loss": 1.3887, + "step": 14823 + }, + { + "epoch": 0.43525750190850904, + "grad_norm": 0.0, + "learning_rate": 1.2554187884552868e-05, + "loss": 1.2153, + "step": 14824 + }, + { + "epoch": 0.435286863585648, + "grad_norm": 0.0, + "learning_rate": 1.2553268455509233e-05, + "loss": 1.3447, + "step": 14825 + }, + { + "epoch": 0.435316225262787, + "grad_norm": 0.0, + "learning_rate": 1.2552349003375739e-05, + "loss": 1.3203, + "step": 14826 + }, + { + "epoch": 0.43534558693992603, + "grad_norm": 0.0, + "learning_rate": 1.2551429528160697e-05, + "loss": 1.2871, + "step": 14827 + }, + { + "epoch": 0.435374948617065, + "grad_norm": 0.0, + "learning_rate": 1.2550510029872425e-05, + "loss": 1.2617, + "step": 14828 + }, + { + "epoch": 0.435404310294204, + "grad_norm": 0.0, + "learning_rate": 1.2549590508519239e-05, + "loss": 1.3604, + "step": 14829 + }, + { + "epoch": 0.43543367197134303, + "grad_norm": 0.0, + "learning_rate": 1.254867096410945e-05, + "loss": 1.3584, + "step": 14830 + }, + { + "epoch": 0.435463033648482, + "grad_norm": 0.0, + "learning_rate": 1.254775139665138e-05, + "loss": 1.4502, + "step": 14831 + }, + { + "epoch": 0.435492395325621, + "grad_norm": 0.0, + "learning_rate": 1.254683180615334e-05, + "loss": 1.2056, + "step": 14832 + }, + { + "epoch": 0.43552175700276, + "grad_norm": 0.0, + "learning_rate": 1.2545912192623646e-05, + "loss": 1.3008, + "step": 14833 + }, + { + "epoch": 0.435551118679899, + "grad_norm": 0.0, + "learning_rate": 1.2544992556070617e-05, + "loss": 1.2344, + "step": 14834 + }, + { + "epoch": 0.435580480357038, + "grad_norm": 0.0, + "learning_rate": 1.254407289650257e-05, + "loss": 1.3721, + "step": 14835 + }, + { + "epoch": 0.435609842034177, + "grad_norm": 0.0, + "learning_rate": 1.2543153213927819e-05, + "loss": 1.3867, + "step": 14836 + }, + { + "epoch": 0.435639203711316, + "grad_norm": 0.0, + "learning_rate": 1.2542233508354682e-05, + "loss": 1.4092, + "step": 14837 + }, + { + "epoch": 0.435668565388455, + "grad_norm": 0.0, + "learning_rate": 1.2541313779791478e-05, + "loss": 1.2202, + "step": 14838 + }, + { + "epoch": 0.43569792706559396, + "grad_norm": 0.0, + "learning_rate": 1.2540394028246522e-05, + "loss": 1.3428, + "step": 14839 + }, + { + "epoch": 0.435727288742733, + "grad_norm": 0.0, + "learning_rate": 1.2539474253728133e-05, + "loss": 1.5215, + "step": 14840 + }, + { + "epoch": 0.435756650419872, + "grad_norm": 0.0, + "learning_rate": 1.2538554456244625e-05, + "loss": 1.4297, + "step": 14841 + }, + { + "epoch": 0.43578601209701096, + "grad_norm": 0.0, + "learning_rate": 1.2537634635804321e-05, + "loss": 1.4043, + "step": 14842 + }, + { + "epoch": 0.43581537377415, + "grad_norm": 0.0, + "learning_rate": 1.2536714792415536e-05, + "loss": 1.3662, + "step": 14843 + }, + { + "epoch": 0.435844735451289, + "grad_norm": 0.0, + "learning_rate": 1.2535794926086591e-05, + "loss": 1.3105, + "step": 14844 + }, + { + "epoch": 0.43587409712842795, + "grad_norm": 0.0, + "learning_rate": 1.25348750368258e-05, + "loss": 1.1821, + "step": 14845 + }, + { + "epoch": 0.43590345880556697, + "grad_norm": 0.0, + "learning_rate": 1.2533955124641489e-05, + "loss": 1.2686, + "step": 14846 + }, + { + "epoch": 0.435932820482706, + "grad_norm": 0.0, + "learning_rate": 1.2533035189541971e-05, + "loss": 1.4473, + "step": 14847 + }, + { + "epoch": 0.43596218215984495, + "grad_norm": 0.0, + "learning_rate": 1.253211523153557e-05, + "loss": 1.3008, + "step": 14848 + }, + { + "epoch": 0.43599154383698396, + "grad_norm": 0.0, + "learning_rate": 1.2531195250630597e-05, + "loss": 1.2969, + "step": 14849 + }, + { + "epoch": 0.436020905514123, + "grad_norm": 0.0, + "learning_rate": 1.253027524683538e-05, + "loss": 1.4639, + "step": 14850 + }, + { + "epoch": 0.43605026719126194, + "grad_norm": 0.0, + "learning_rate": 1.2529355220158235e-05, + "loss": 1.4951, + "step": 14851 + }, + { + "epoch": 0.43607962886840096, + "grad_norm": 0.0, + "learning_rate": 1.2528435170607485e-05, + "loss": 1.4531, + "step": 14852 + }, + { + "epoch": 0.43610899054554, + "grad_norm": 0.0, + "learning_rate": 1.2527515098191445e-05, + "loss": 1.4072, + "step": 14853 + }, + { + "epoch": 0.43613835222267894, + "grad_norm": 0.0, + "learning_rate": 1.252659500291844e-05, + "loss": 1.2666, + "step": 14854 + }, + { + "epoch": 0.43616771389981795, + "grad_norm": 0.0, + "learning_rate": 1.252567488479679e-05, + "loss": 1.2969, + "step": 14855 + }, + { + "epoch": 0.43619707557695697, + "grad_norm": 0.0, + "learning_rate": 1.2524754743834815e-05, + "loss": 1.3457, + "step": 14856 + }, + { + "epoch": 0.43622643725409593, + "grad_norm": 0.0, + "learning_rate": 1.2523834580040834e-05, + "loss": 1.376, + "step": 14857 + }, + { + "epoch": 0.43625579893123495, + "grad_norm": 0.0, + "learning_rate": 1.2522914393423175e-05, + "loss": 1.2808, + "step": 14858 + }, + { + "epoch": 0.43628516060837397, + "grad_norm": 0.0, + "learning_rate": 1.2521994183990152e-05, + "loss": 1.3701, + "step": 14859 + }, + { + "epoch": 0.43631452228551293, + "grad_norm": 0.0, + "learning_rate": 1.2521073951750091e-05, + "loss": 1.3389, + "step": 14860 + }, + { + "epoch": 0.43634388396265195, + "grad_norm": 0.0, + "learning_rate": 1.2520153696711312e-05, + "loss": 1.4531, + "step": 14861 + }, + { + "epoch": 0.43637324563979096, + "grad_norm": 0.0, + "learning_rate": 1.2519233418882137e-05, + "loss": 1.3369, + "step": 14862 + }, + { + "epoch": 0.4364026073169299, + "grad_norm": 0.0, + "learning_rate": 1.2518313118270891e-05, + "loss": 1.2822, + "step": 14863 + }, + { + "epoch": 0.43643196899406894, + "grad_norm": 0.0, + "learning_rate": 1.2517392794885892e-05, + "loss": 1.2793, + "step": 14864 + }, + { + "epoch": 0.43646133067120796, + "grad_norm": 0.0, + "learning_rate": 1.2516472448735466e-05, + "loss": 1.3555, + "step": 14865 + }, + { + "epoch": 0.4364906923483469, + "grad_norm": 0.0, + "learning_rate": 1.2515552079827935e-05, + "loss": 1.291, + "step": 14866 + }, + { + "epoch": 0.43652005402548594, + "grad_norm": 0.0, + "learning_rate": 1.2514631688171623e-05, + "loss": 1.4707, + "step": 14867 + }, + { + "epoch": 0.43654941570262495, + "grad_norm": 0.0, + "learning_rate": 1.2513711273774855e-05, + "loss": 1.3896, + "step": 14868 + }, + { + "epoch": 0.4365787773797639, + "grad_norm": 0.0, + "learning_rate": 1.2512790836645949e-05, + "loss": 1.3867, + "step": 14869 + }, + { + "epoch": 0.43660813905690293, + "grad_norm": 0.0, + "learning_rate": 1.251187037679323e-05, + "loss": 1.2695, + "step": 14870 + }, + { + "epoch": 0.43663750073404195, + "grad_norm": 0.0, + "learning_rate": 1.2510949894225029e-05, + "loss": 1.3613, + "step": 14871 + }, + { + "epoch": 0.4366668624111809, + "grad_norm": 0.0, + "learning_rate": 1.2510029388949662e-05, + "loss": 1.1885, + "step": 14872 + }, + { + "epoch": 0.43669622408831993, + "grad_norm": 0.0, + "learning_rate": 1.2509108860975459e-05, + "loss": 1.3818, + "step": 14873 + }, + { + "epoch": 0.43672558576545895, + "grad_norm": 0.0, + "learning_rate": 1.2508188310310738e-05, + "loss": 1.3271, + "step": 14874 + }, + { + "epoch": 0.4367549474425979, + "grad_norm": 0.0, + "learning_rate": 1.250726773696383e-05, + "loss": 1.415, + "step": 14875 + }, + { + "epoch": 0.4367843091197369, + "grad_norm": 0.0, + "learning_rate": 1.2506347140943058e-05, + "loss": 1.2163, + "step": 14876 + }, + { + "epoch": 0.43681367079687594, + "grad_norm": 0.0, + "learning_rate": 1.2505426522256746e-05, + "loss": 1.2427, + "step": 14877 + }, + { + "epoch": 0.4368430324740149, + "grad_norm": 0.0, + "learning_rate": 1.250450588091322e-05, + "loss": 1.2461, + "step": 14878 + }, + { + "epoch": 0.4368723941511539, + "grad_norm": 0.0, + "learning_rate": 1.2503585216920807e-05, + "loss": 1.4219, + "step": 14879 + }, + { + "epoch": 0.43690175582829294, + "grad_norm": 0.0, + "learning_rate": 1.2502664530287834e-05, + "loss": 1.4414, + "step": 14880 + }, + { + "epoch": 0.4369311175054319, + "grad_norm": 0.0, + "learning_rate": 1.250174382102262e-05, + "loss": 1.4033, + "step": 14881 + }, + { + "epoch": 0.4369604791825709, + "grad_norm": 0.0, + "learning_rate": 1.2500823089133496e-05, + "loss": 1.3613, + "step": 14882 + }, + { + "epoch": 0.43698984085970993, + "grad_norm": 0.0, + "learning_rate": 1.2499902334628791e-05, + "loss": 1.3467, + "step": 14883 + }, + { + "epoch": 0.4370192025368489, + "grad_norm": 0.0, + "learning_rate": 1.249898155751683e-05, + "loss": 1.373, + "step": 14884 + }, + { + "epoch": 0.4370485642139879, + "grad_norm": 0.0, + "learning_rate": 1.2498060757805935e-05, + "loss": 1.3867, + "step": 14885 + }, + { + "epoch": 0.43707792589112693, + "grad_norm": 0.0, + "learning_rate": 1.2497139935504438e-05, + "loss": 1.2363, + "step": 14886 + }, + { + "epoch": 0.4371072875682659, + "grad_norm": 0.0, + "learning_rate": 1.2496219090620666e-05, + "loss": 1.3145, + "step": 14887 + }, + { + "epoch": 0.4371366492454049, + "grad_norm": 0.0, + "learning_rate": 1.2495298223162948e-05, + "loss": 1.3662, + "step": 14888 + }, + { + "epoch": 0.43716601092254387, + "grad_norm": 0.0, + "learning_rate": 1.2494377333139607e-05, + "loss": 1.3296, + "step": 14889 + }, + { + "epoch": 0.4371953725996829, + "grad_norm": 0.0, + "learning_rate": 1.2493456420558971e-05, + "loss": 1.3936, + "step": 14890 + }, + { + "epoch": 0.4372247342768219, + "grad_norm": 0.0, + "learning_rate": 1.2492535485429371e-05, + "loss": 1.4639, + "step": 14891 + }, + { + "epoch": 0.43725409595396086, + "grad_norm": 0.0, + "learning_rate": 1.2491614527759135e-05, + "loss": 1.4629, + "step": 14892 + }, + { + "epoch": 0.4372834576310999, + "grad_norm": 0.0, + "learning_rate": 1.249069354755659e-05, + "loss": 1.3232, + "step": 14893 + }, + { + "epoch": 0.4373128193082389, + "grad_norm": 0.0, + "learning_rate": 1.2489772544830066e-05, + "loss": 1.4219, + "step": 14894 + }, + { + "epoch": 0.43734218098537786, + "grad_norm": 0.0, + "learning_rate": 1.248885151958789e-05, + "loss": 1.416, + "step": 14895 + }, + { + "epoch": 0.4373715426625169, + "grad_norm": 0.0, + "learning_rate": 1.2487930471838394e-05, + "loss": 1.2407, + "step": 14896 + }, + { + "epoch": 0.4374009043396559, + "grad_norm": 0.0, + "learning_rate": 1.2487009401589907e-05, + "loss": 1.4053, + "step": 14897 + }, + { + "epoch": 0.43743026601679486, + "grad_norm": 0.0, + "learning_rate": 1.2486088308850755e-05, + "loss": 1.3555, + "step": 14898 + }, + { + "epoch": 0.4374596276939339, + "grad_norm": 0.0, + "learning_rate": 1.2485167193629267e-05, + "loss": 1.3037, + "step": 14899 + }, + { + "epoch": 0.4374889893710729, + "grad_norm": 0.0, + "learning_rate": 1.2484246055933779e-05, + "loss": 1.2012, + "step": 14900 + }, + { + "epoch": 0.43751835104821185, + "grad_norm": 0.0, + "learning_rate": 1.2483324895772617e-05, + "loss": 1.3154, + "step": 14901 + }, + { + "epoch": 0.43754771272535087, + "grad_norm": 0.0, + "learning_rate": 1.2482403713154112e-05, + "loss": 1.333, + "step": 14902 + }, + { + "epoch": 0.4375770744024899, + "grad_norm": 0.0, + "learning_rate": 1.2481482508086593e-05, + "loss": 1.2881, + "step": 14903 + }, + { + "epoch": 0.43760643607962885, + "grad_norm": 0.0, + "learning_rate": 1.2480561280578394e-05, + "loss": 1.4424, + "step": 14904 + }, + { + "epoch": 0.43763579775676786, + "grad_norm": 0.0, + "learning_rate": 1.2479640030637843e-05, + "loss": 1.2969, + "step": 14905 + }, + { + "epoch": 0.4376651594339069, + "grad_norm": 0.0, + "learning_rate": 1.2478718758273272e-05, + "loss": 1.4014, + "step": 14906 + }, + { + "epoch": 0.43769452111104584, + "grad_norm": 0.0, + "learning_rate": 1.2477797463493015e-05, + "loss": 1.3145, + "step": 14907 + }, + { + "epoch": 0.43772388278818486, + "grad_norm": 0.0, + "learning_rate": 1.2476876146305399e-05, + "loss": 1.2998, + "step": 14908 + }, + { + "epoch": 0.4377532444653239, + "grad_norm": 0.0, + "learning_rate": 1.247595480671876e-05, + "loss": 1.3936, + "step": 14909 + }, + { + "epoch": 0.43778260614246284, + "grad_norm": 0.0, + "learning_rate": 1.2475033444741422e-05, + "loss": 1.334, + "step": 14910 + }, + { + "epoch": 0.43781196781960185, + "grad_norm": 0.0, + "learning_rate": 1.2474112060381727e-05, + "loss": 1.3867, + "step": 14911 + }, + { + "epoch": 0.43784132949674087, + "grad_norm": 0.0, + "learning_rate": 1.2473190653648004e-05, + "loss": 1.3906, + "step": 14912 + }, + { + "epoch": 0.43787069117387983, + "grad_norm": 0.0, + "learning_rate": 1.2472269224548581e-05, + "loss": 1.29, + "step": 14913 + }, + { + "epoch": 0.43790005285101885, + "grad_norm": 0.0, + "learning_rate": 1.2471347773091795e-05, + "loss": 1.2412, + "step": 14914 + }, + { + "epoch": 0.43792941452815787, + "grad_norm": 0.0, + "learning_rate": 1.2470426299285978e-05, + "loss": 1.3877, + "step": 14915 + }, + { + "epoch": 0.43795877620529683, + "grad_norm": 0.0, + "learning_rate": 1.2469504803139467e-05, + "loss": 1.3188, + "step": 14916 + }, + { + "epoch": 0.43798813788243585, + "grad_norm": 0.0, + "learning_rate": 1.2468583284660589e-05, + "loss": 1.3271, + "step": 14917 + }, + { + "epoch": 0.43801749955957486, + "grad_norm": 0.0, + "learning_rate": 1.246766174385768e-05, + "loss": 1.3506, + "step": 14918 + }, + { + "epoch": 0.4380468612367138, + "grad_norm": 0.0, + "learning_rate": 1.2466740180739074e-05, + "loss": 1.3037, + "step": 14919 + }, + { + "epoch": 0.43807622291385284, + "grad_norm": 0.0, + "learning_rate": 1.2465818595313103e-05, + "loss": 1.3096, + "step": 14920 + }, + { + "epoch": 0.43810558459099186, + "grad_norm": 0.0, + "learning_rate": 1.2464896987588106e-05, + "loss": 1.3477, + "step": 14921 + }, + { + "epoch": 0.4381349462681308, + "grad_norm": 0.0, + "learning_rate": 1.2463975357572414e-05, + "loss": 1.3105, + "step": 14922 + }, + { + "epoch": 0.43816430794526984, + "grad_norm": 0.0, + "learning_rate": 1.246305370527436e-05, + "loss": 1.3184, + "step": 14923 + }, + { + "epoch": 0.43819366962240885, + "grad_norm": 0.0, + "learning_rate": 1.246213203070228e-05, + "loss": 1.2285, + "step": 14924 + }, + { + "epoch": 0.4382230312995478, + "grad_norm": 0.0, + "learning_rate": 1.246121033386451e-05, + "loss": 1.4668, + "step": 14925 + }, + { + "epoch": 0.43825239297668683, + "grad_norm": 0.0, + "learning_rate": 1.2460288614769387e-05, + "loss": 1.2539, + "step": 14926 + }, + { + "epoch": 0.43828175465382585, + "grad_norm": 0.0, + "learning_rate": 1.2459366873425242e-05, + "loss": 1.2959, + "step": 14927 + }, + { + "epoch": 0.4383111163309648, + "grad_norm": 0.0, + "learning_rate": 1.2458445109840414e-05, + "loss": 1.4502, + "step": 14928 + }, + { + "epoch": 0.43834047800810383, + "grad_norm": 0.0, + "learning_rate": 1.2457523324023237e-05, + "loss": 1.2505, + "step": 14929 + }, + { + "epoch": 0.43836983968524285, + "grad_norm": 0.0, + "learning_rate": 1.2456601515982046e-05, + "loss": 1.2998, + "step": 14930 + }, + { + "epoch": 0.4383992013623818, + "grad_norm": 0.0, + "learning_rate": 1.2455679685725178e-05, + "loss": 1.3047, + "step": 14931 + }, + { + "epoch": 0.4384285630395208, + "grad_norm": 0.0, + "learning_rate": 1.2454757833260968e-05, + "loss": 1.2754, + "step": 14932 + }, + { + "epoch": 0.43845792471665984, + "grad_norm": 0.0, + "learning_rate": 1.2453835958597759e-05, + "loss": 1.3936, + "step": 14933 + }, + { + "epoch": 0.4384872863937988, + "grad_norm": 0.0, + "learning_rate": 1.245291406174388e-05, + "loss": 1.3516, + "step": 14934 + }, + { + "epoch": 0.4385166480709378, + "grad_norm": 0.0, + "learning_rate": 1.2451992142707672e-05, + "loss": 1.3496, + "step": 14935 + }, + { + "epoch": 0.43854600974807684, + "grad_norm": 0.0, + "learning_rate": 1.2451070201497467e-05, + "loss": 1.3613, + "step": 14936 + }, + { + "epoch": 0.4385753714252158, + "grad_norm": 0.0, + "learning_rate": 1.2450148238121615e-05, + "loss": 1.4365, + "step": 14937 + }, + { + "epoch": 0.4386047331023548, + "grad_norm": 0.0, + "learning_rate": 1.244922625258844e-05, + "loss": 1.3584, + "step": 14938 + }, + { + "epoch": 0.4386340947794938, + "grad_norm": 0.0, + "learning_rate": 1.2448304244906283e-05, + "loss": 1.2803, + "step": 14939 + }, + { + "epoch": 0.4386634564566328, + "grad_norm": 0.0, + "learning_rate": 1.2447382215083486e-05, + "loss": 1.4023, + "step": 14940 + }, + { + "epoch": 0.4386928181337718, + "grad_norm": 0.0, + "learning_rate": 1.2446460163128387e-05, + "loss": 1.3916, + "step": 14941 + }, + { + "epoch": 0.4387221798109108, + "grad_norm": 0.0, + "learning_rate": 1.2445538089049318e-05, + "loss": 1.4619, + "step": 14942 + }, + { + "epoch": 0.4387515414880498, + "grad_norm": 0.0, + "learning_rate": 1.2444615992854623e-05, + "loss": 1.2319, + "step": 14943 + }, + { + "epoch": 0.4387809031651888, + "grad_norm": 0.0, + "learning_rate": 1.2443693874552638e-05, + "loss": 1.3271, + "step": 14944 + }, + { + "epoch": 0.43881026484232777, + "grad_norm": 0.0, + "learning_rate": 1.2442771734151707e-05, + "loss": 1.2295, + "step": 14945 + }, + { + "epoch": 0.4388396265194668, + "grad_norm": 0.0, + "learning_rate": 1.2441849571660163e-05, + "loss": 1.3135, + "step": 14946 + }, + { + "epoch": 0.4388689881966058, + "grad_norm": 0.0, + "learning_rate": 1.2440927387086351e-05, + "loss": 1.2832, + "step": 14947 + }, + { + "epoch": 0.43889834987374476, + "grad_norm": 0.0, + "learning_rate": 1.2440005180438604e-05, + "loss": 1.3408, + "step": 14948 + }, + { + "epoch": 0.4389277115508838, + "grad_norm": 0.0, + "learning_rate": 1.2439082951725266e-05, + "loss": 1.3652, + "step": 14949 + }, + { + "epoch": 0.4389570732280228, + "grad_norm": 0.0, + "learning_rate": 1.2438160700954678e-05, + "loss": 1.2568, + "step": 14950 + }, + { + "epoch": 0.43898643490516176, + "grad_norm": 0.0, + "learning_rate": 1.2437238428135177e-05, + "loss": 1.3828, + "step": 14951 + }, + { + "epoch": 0.4390157965823008, + "grad_norm": 0.0, + "learning_rate": 1.2436316133275103e-05, + "loss": 1.2734, + "step": 14952 + }, + { + "epoch": 0.4390451582594398, + "grad_norm": 0.0, + "learning_rate": 1.2435393816382803e-05, + "loss": 1.1797, + "step": 14953 + }, + { + "epoch": 0.43907451993657876, + "grad_norm": 0.0, + "learning_rate": 1.243447147746661e-05, + "loss": 1.3828, + "step": 14954 + }, + { + "epoch": 0.4391038816137178, + "grad_norm": 0.0, + "learning_rate": 1.2433549116534867e-05, + "loss": 1.2402, + "step": 14955 + }, + { + "epoch": 0.4391332432908568, + "grad_norm": 0.0, + "learning_rate": 1.2432626733595918e-05, + "loss": 1.2842, + "step": 14956 + }, + { + "epoch": 0.43916260496799575, + "grad_norm": 0.0, + "learning_rate": 1.24317043286581e-05, + "loss": 1.4531, + "step": 14957 + }, + { + "epoch": 0.43919196664513477, + "grad_norm": 0.0, + "learning_rate": 1.243078190172976e-05, + "loss": 1.3975, + "step": 14958 + }, + { + "epoch": 0.4392213283222738, + "grad_norm": 0.0, + "learning_rate": 1.2429859452819235e-05, + "loss": 1.4004, + "step": 14959 + }, + { + "epoch": 0.43925068999941275, + "grad_norm": 0.0, + "learning_rate": 1.242893698193487e-05, + "loss": 1.3721, + "step": 14960 + }, + { + "epoch": 0.43928005167655176, + "grad_norm": 0.0, + "learning_rate": 1.2428014489085003e-05, + "loss": 1.4424, + "step": 14961 + }, + { + "epoch": 0.4393094133536908, + "grad_norm": 0.0, + "learning_rate": 1.2427091974277983e-05, + "loss": 1.1885, + "step": 14962 + }, + { + "epoch": 0.43933877503082974, + "grad_norm": 0.0, + "learning_rate": 1.2426169437522145e-05, + "loss": 1.2632, + "step": 14963 + }, + { + "epoch": 0.43936813670796876, + "grad_norm": 0.0, + "learning_rate": 1.2425246878825836e-05, + "loss": 1.2627, + "step": 14964 + }, + { + "epoch": 0.4393974983851078, + "grad_norm": 0.0, + "learning_rate": 1.2424324298197401e-05, + "loss": 1.4258, + "step": 14965 + }, + { + "epoch": 0.43942686006224674, + "grad_norm": 0.0, + "learning_rate": 1.2423401695645181e-05, + "loss": 1.3213, + "step": 14966 + }, + { + "epoch": 0.43945622173938576, + "grad_norm": 0.0, + "learning_rate": 1.2422479071177516e-05, + "loss": 1.293, + "step": 14967 + }, + { + "epoch": 0.43948558341652477, + "grad_norm": 0.0, + "learning_rate": 1.2421556424802752e-05, + "loss": 1.4307, + "step": 14968 + }, + { + "epoch": 0.43951494509366373, + "grad_norm": 0.0, + "learning_rate": 1.2420633756529235e-05, + "loss": 1.3525, + "step": 14969 + }, + { + "epoch": 0.43954430677080275, + "grad_norm": 0.0, + "learning_rate": 1.2419711066365308e-05, + "loss": 1.4424, + "step": 14970 + }, + { + "epoch": 0.43957366844794177, + "grad_norm": 0.0, + "learning_rate": 1.241878835431931e-05, + "loss": 1.4092, + "step": 14971 + }, + { + "epoch": 0.43960303012508073, + "grad_norm": 0.0, + "learning_rate": 1.2417865620399592e-05, + "loss": 1.4844, + "step": 14972 + }, + { + "epoch": 0.43963239180221975, + "grad_norm": 0.0, + "learning_rate": 1.2416942864614495e-05, + "loss": 1.415, + "step": 14973 + }, + { + "epoch": 0.43966175347935876, + "grad_norm": 0.0, + "learning_rate": 1.2416020086972366e-05, + "loss": 1.2202, + "step": 14974 + }, + { + "epoch": 0.4396911151564977, + "grad_norm": 0.0, + "learning_rate": 1.2415097287481548e-05, + "loss": 1.2686, + "step": 14975 + }, + { + "epoch": 0.43972047683363674, + "grad_norm": 0.0, + "learning_rate": 1.2414174466150389e-05, + "loss": 1.3613, + "step": 14976 + }, + { + "epoch": 0.43974983851077576, + "grad_norm": 0.0, + "learning_rate": 1.2413251622987228e-05, + "loss": 1.3779, + "step": 14977 + }, + { + "epoch": 0.4397792001879147, + "grad_norm": 0.0, + "learning_rate": 1.2412328758000418e-05, + "loss": 1.3896, + "step": 14978 + }, + { + "epoch": 0.43980856186505374, + "grad_norm": 0.0, + "learning_rate": 1.2411405871198299e-05, + "loss": 1.3359, + "step": 14979 + }, + { + "epoch": 0.43983792354219275, + "grad_norm": 0.0, + "learning_rate": 1.2410482962589219e-05, + "loss": 1.335, + "step": 14980 + }, + { + "epoch": 0.4398672852193317, + "grad_norm": 0.0, + "learning_rate": 1.2409560032181522e-05, + "loss": 1.29, + "step": 14981 + }, + { + "epoch": 0.43989664689647073, + "grad_norm": 0.0, + "learning_rate": 1.2408637079983562e-05, + "loss": 1.3262, + "step": 14982 + }, + { + "epoch": 0.43992600857360975, + "grad_norm": 0.0, + "learning_rate": 1.2407714106003676e-05, + "loss": 1.3438, + "step": 14983 + }, + { + "epoch": 0.4399553702507487, + "grad_norm": 0.0, + "learning_rate": 1.2406791110250217e-05, + "loss": 1.2822, + "step": 14984 + }, + { + "epoch": 0.43998473192788773, + "grad_norm": 0.0, + "learning_rate": 1.2405868092731527e-05, + "loss": 1.3926, + "step": 14985 + }, + { + "epoch": 0.44001409360502675, + "grad_norm": 0.0, + "learning_rate": 1.240494505345596e-05, + "loss": 1.4404, + "step": 14986 + }, + { + "epoch": 0.4400434552821657, + "grad_norm": 0.0, + "learning_rate": 1.2404021992431856e-05, + "loss": 1.2764, + "step": 14987 + }, + { + "epoch": 0.4400728169593047, + "grad_norm": 0.0, + "learning_rate": 1.2403098909667565e-05, + "loss": 1.2554, + "step": 14988 + }, + { + "epoch": 0.4401021786364437, + "grad_norm": 0.0, + "learning_rate": 1.2402175805171437e-05, + "loss": 1.2568, + "step": 14989 + }, + { + "epoch": 0.4401315403135827, + "grad_norm": 0.0, + "learning_rate": 1.2401252678951818e-05, + "loss": 1.2754, + "step": 14990 + }, + { + "epoch": 0.4401609019907217, + "grad_norm": 0.0, + "learning_rate": 1.2400329531017055e-05, + "loss": 1.2979, + "step": 14991 + }, + { + "epoch": 0.4401902636678607, + "grad_norm": 0.0, + "learning_rate": 1.2399406361375497e-05, + "loss": 1.3535, + "step": 14992 + }, + { + "epoch": 0.4402196253449997, + "grad_norm": 0.0, + "learning_rate": 1.2398483170035491e-05, + "loss": 1.3379, + "step": 14993 + }, + { + "epoch": 0.4402489870221387, + "grad_norm": 0.0, + "learning_rate": 1.2397559957005392e-05, + "loss": 1.2607, + "step": 14994 + }, + { + "epoch": 0.4402783486992777, + "grad_norm": 0.0, + "learning_rate": 1.239663672229354e-05, + "loss": 1.2529, + "step": 14995 + }, + { + "epoch": 0.4403077103764167, + "grad_norm": 0.0, + "learning_rate": 1.2395713465908292e-05, + "loss": 1.3545, + "step": 14996 + }, + { + "epoch": 0.4403370720535557, + "grad_norm": 0.0, + "learning_rate": 1.239479018785799e-05, + "loss": 1.3584, + "step": 14997 + }, + { + "epoch": 0.4403664337306947, + "grad_norm": 0.0, + "learning_rate": 1.239386688815099e-05, + "loss": 1.3623, + "step": 14998 + }, + { + "epoch": 0.4403957954078337, + "grad_norm": 0.0, + "learning_rate": 1.239294356679564e-05, + "loss": 1.3623, + "step": 14999 + }, + { + "epoch": 0.4404251570849727, + "grad_norm": 0.0, + "learning_rate": 1.2392020223800283e-05, + "loss": 1.3076, + "step": 15000 + }, + { + "epoch": 0.44045451876211167, + "grad_norm": 0.0, + "learning_rate": 1.2391096859173277e-05, + "loss": 1.4121, + "step": 15001 + }, + { + "epoch": 0.4404838804392507, + "grad_norm": 0.0, + "learning_rate": 1.2390173472922974e-05, + "loss": 1.3643, + "step": 15002 + }, + { + "epoch": 0.4405132421163897, + "grad_norm": 0.0, + "learning_rate": 1.2389250065057714e-05, + "loss": 1.2666, + "step": 15003 + }, + { + "epoch": 0.44054260379352866, + "grad_norm": 0.0, + "learning_rate": 1.2388326635585855e-05, + "loss": 1.3672, + "step": 15004 + }, + { + "epoch": 0.4405719654706677, + "grad_norm": 0.0, + "learning_rate": 1.2387403184515747e-05, + "loss": 1.4414, + "step": 15005 + }, + { + "epoch": 0.4406013271478067, + "grad_norm": 0.0, + "learning_rate": 1.2386479711855742e-05, + "loss": 1.0869, + "step": 15006 + }, + { + "epoch": 0.44063068882494566, + "grad_norm": 0.0, + "learning_rate": 1.2385556217614192e-05, + "loss": 1.458, + "step": 15007 + }, + { + "epoch": 0.4406600505020847, + "grad_norm": 0.0, + "learning_rate": 1.2384632701799442e-05, + "loss": 1.2393, + "step": 15008 + }, + { + "epoch": 0.4406894121792237, + "grad_norm": 0.0, + "learning_rate": 1.2383709164419848e-05, + "loss": 1.416, + "step": 15009 + }, + { + "epoch": 0.44071877385636266, + "grad_norm": 0.0, + "learning_rate": 1.238278560548376e-05, + "loss": 1.3906, + "step": 15010 + }, + { + "epoch": 0.4407481355335017, + "grad_norm": 0.0, + "learning_rate": 1.2381862024999536e-05, + "loss": 1.2764, + "step": 15011 + }, + { + "epoch": 0.4407774972106407, + "grad_norm": 0.0, + "learning_rate": 1.2380938422975522e-05, + "loss": 1.1147, + "step": 15012 + }, + { + "epoch": 0.44080685888777965, + "grad_norm": 0.0, + "learning_rate": 1.238001479942007e-05, + "loss": 1.3447, + "step": 15013 + }, + { + "epoch": 0.44083622056491867, + "grad_norm": 0.0, + "learning_rate": 1.2379091154341536e-05, + "loss": 1.3564, + "step": 15014 + }, + { + "epoch": 0.4408655822420577, + "grad_norm": 0.0, + "learning_rate": 1.2378167487748274e-05, + "loss": 1.4521, + "step": 15015 + }, + { + "epoch": 0.44089494391919665, + "grad_norm": 0.0, + "learning_rate": 1.2377243799648633e-05, + "loss": 1.3438, + "step": 15016 + }, + { + "epoch": 0.44092430559633566, + "grad_norm": 0.0, + "learning_rate": 1.2376320090050966e-05, + "loss": 1.3936, + "step": 15017 + }, + { + "epoch": 0.4409536672734747, + "grad_norm": 0.0, + "learning_rate": 1.2375396358963628e-05, + "loss": 1.3018, + "step": 15018 + }, + { + "epoch": 0.44098302895061364, + "grad_norm": 0.0, + "learning_rate": 1.2374472606394975e-05, + "loss": 1.3672, + "step": 15019 + }, + { + "epoch": 0.44101239062775266, + "grad_norm": 0.0, + "learning_rate": 1.2373548832353355e-05, + "loss": 1.377, + "step": 15020 + }, + { + "epoch": 0.4410417523048917, + "grad_norm": 0.0, + "learning_rate": 1.2372625036847125e-05, + "loss": 1.4199, + "step": 15021 + }, + { + "epoch": 0.44107111398203064, + "grad_norm": 0.0, + "learning_rate": 1.237170121988464e-05, + "loss": 1.2915, + "step": 15022 + }, + { + "epoch": 0.44110047565916966, + "grad_norm": 0.0, + "learning_rate": 1.2370777381474255e-05, + "loss": 1.4307, + "step": 15023 + }, + { + "epoch": 0.44112983733630867, + "grad_norm": 0.0, + "learning_rate": 1.236985352162432e-05, + "loss": 1.3936, + "step": 15024 + }, + { + "epoch": 0.44115919901344763, + "grad_norm": 0.0, + "learning_rate": 1.2368929640343197e-05, + "loss": 1.3994, + "step": 15025 + }, + { + "epoch": 0.44118856069058665, + "grad_norm": 0.0, + "learning_rate": 1.2368005737639233e-05, + "loss": 1.3301, + "step": 15026 + }, + { + "epoch": 0.44121792236772567, + "grad_norm": 0.0, + "learning_rate": 1.2367081813520789e-05, + "loss": 1.3418, + "step": 15027 + }, + { + "epoch": 0.44124728404486463, + "grad_norm": 0.0, + "learning_rate": 1.2366157867996216e-05, + "loss": 1.3525, + "step": 15028 + }, + { + "epoch": 0.44127664572200365, + "grad_norm": 0.0, + "learning_rate": 1.236523390107387e-05, + "loss": 1.3916, + "step": 15029 + }, + { + "epoch": 0.44130600739914266, + "grad_norm": 0.0, + "learning_rate": 1.2364309912762108e-05, + "loss": 1.416, + "step": 15030 + }, + { + "epoch": 0.4413353690762816, + "grad_norm": 0.0, + "learning_rate": 1.2363385903069291e-05, + "loss": 1.2998, + "step": 15031 + }, + { + "epoch": 0.44136473075342064, + "grad_norm": 0.0, + "learning_rate": 1.2362461872003765e-05, + "loss": 1.3408, + "step": 15032 + }, + { + "epoch": 0.44139409243055966, + "grad_norm": 0.0, + "learning_rate": 1.2361537819573894e-05, + "loss": 1.3877, + "step": 15033 + }, + { + "epoch": 0.4414234541076986, + "grad_norm": 0.0, + "learning_rate": 1.2360613745788031e-05, + "loss": 1.3721, + "step": 15034 + }, + { + "epoch": 0.44145281578483764, + "grad_norm": 0.0, + "learning_rate": 1.2359689650654534e-05, + "loss": 1.4619, + "step": 15035 + }, + { + "epoch": 0.44148217746197665, + "grad_norm": 0.0, + "learning_rate": 1.235876553418176e-05, + "loss": 1.2676, + "step": 15036 + }, + { + "epoch": 0.4415115391391156, + "grad_norm": 0.0, + "learning_rate": 1.2357841396378062e-05, + "loss": 1.2417, + "step": 15037 + }, + { + "epoch": 0.44154090081625463, + "grad_norm": 0.0, + "learning_rate": 1.2356917237251803e-05, + "loss": 1.3506, + "step": 15038 + }, + { + "epoch": 0.4415702624933936, + "grad_norm": 0.0, + "learning_rate": 1.2355993056811336e-05, + "loss": 1.3154, + "step": 15039 + }, + { + "epoch": 0.4415996241705326, + "grad_norm": 0.0, + "learning_rate": 1.2355068855065022e-05, + "loss": 1.2803, + "step": 15040 + }, + { + "epoch": 0.44162898584767163, + "grad_norm": 0.0, + "learning_rate": 1.2354144632021214e-05, + "loss": 1.3555, + "step": 15041 + }, + { + "epoch": 0.4416583475248106, + "grad_norm": 0.0, + "learning_rate": 1.2353220387688276e-05, + "loss": 1.1743, + "step": 15042 + }, + { + "epoch": 0.4416877092019496, + "grad_norm": 0.0, + "learning_rate": 1.2352296122074566e-05, + "loss": 1.4268, + "step": 15043 + }, + { + "epoch": 0.4417170708790886, + "grad_norm": 0.0, + "learning_rate": 1.2351371835188436e-05, + "loss": 1.3828, + "step": 15044 + }, + { + "epoch": 0.4417464325562276, + "grad_norm": 0.0, + "learning_rate": 1.235044752703825e-05, + "loss": 1.2354, + "step": 15045 + }, + { + "epoch": 0.4417757942333666, + "grad_norm": 0.0, + "learning_rate": 1.2349523197632362e-05, + "loss": 1.2969, + "step": 15046 + }, + { + "epoch": 0.4418051559105056, + "grad_norm": 0.0, + "learning_rate": 1.2348598846979135e-05, + "loss": 1.2412, + "step": 15047 + }, + { + "epoch": 0.4418345175876446, + "grad_norm": 0.0, + "learning_rate": 1.234767447508693e-05, + "loss": 1.4619, + "step": 15048 + }, + { + "epoch": 0.4418638792647836, + "grad_norm": 0.0, + "learning_rate": 1.23467500819641e-05, + "loss": 1.334, + "step": 15049 + }, + { + "epoch": 0.4418932409419226, + "grad_norm": 0.0, + "learning_rate": 1.2345825667619009e-05, + "loss": 1.4082, + "step": 15050 + }, + { + "epoch": 0.4419226026190616, + "grad_norm": 0.0, + "learning_rate": 1.2344901232060016e-05, + "loss": 1.3926, + "step": 15051 + }, + { + "epoch": 0.4419519642962006, + "grad_norm": 0.0, + "learning_rate": 1.2343976775295482e-05, + "loss": 1.2236, + "step": 15052 + }, + { + "epoch": 0.4419813259733396, + "grad_norm": 0.0, + "learning_rate": 1.2343052297333765e-05, + "loss": 1.2168, + "step": 15053 + }, + { + "epoch": 0.4420106876504786, + "grad_norm": 0.0, + "learning_rate": 1.2342127798183226e-05, + "loss": 1.3682, + "step": 15054 + }, + { + "epoch": 0.4420400493276176, + "grad_norm": 0.0, + "learning_rate": 1.2341203277852226e-05, + "loss": 1.3613, + "step": 15055 + }, + { + "epoch": 0.4420694110047566, + "grad_norm": 0.0, + "learning_rate": 1.2340278736349125e-05, + "loss": 1.2383, + "step": 15056 + }, + { + "epoch": 0.44209877268189557, + "grad_norm": 0.0, + "learning_rate": 1.2339354173682283e-05, + "loss": 1.3164, + "step": 15057 + }, + { + "epoch": 0.4421281343590346, + "grad_norm": 0.0, + "learning_rate": 1.2338429589860061e-05, + "loss": 1.3604, + "step": 15058 + }, + { + "epoch": 0.4421574960361736, + "grad_norm": 0.0, + "learning_rate": 1.2337504984890823e-05, + "loss": 1.4102, + "step": 15059 + }, + { + "epoch": 0.44218685771331256, + "grad_norm": 0.0, + "learning_rate": 1.2336580358782931e-05, + "loss": 1.3057, + "step": 15060 + }, + { + "epoch": 0.4422162193904516, + "grad_norm": 0.0, + "learning_rate": 1.233565571154474e-05, + "loss": 1.3213, + "step": 15061 + }, + { + "epoch": 0.4422455810675906, + "grad_norm": 0.0, + "learning_rate": 1.2334731043184618e-05, + "loss": 1.2598, + "step": 15062 + }, + { + "epoch": 0.44227494274472956, + "grad_norm": 0.0, + "learning_rate": 1.2333806353710925e-05, + "loss": 1.3115, + "step": 15063 + }, + { + "epoch": 0.4423043044218686, + "grad_norm": 0.0, + "learning_rate": 1.2332881643132027e-05, + "loss": 1.2959, + "step": 15064 + }, + { + "epoch": 0.4423336660990076, + "grad_norm": 0.0, + "learning_rate": 1.233195691145628e-05, + "loss": 1.1099, + "step": 15065 + }, + { + "epoch": 0.44236302777614656, + "grad_norm": 0.0, + "learning_rate": 1.2331032158692046e-05, + "loss": 1.207, + "step": 15066 + }, + { + "epoch": 0.4423923894532856, + "grad_norm": 0.0, + "learning_rate": 1.2330107384847697e-05, + "loss": 1.4131, + "step": 15067 + }, + { + "epoch": 0.4424217511304246, + "grad_norm": 0.0, + "learning_rate": 1.2329182589931588e-05, + "loss": 1.3281, + "step": 15068 + }, + { + "epoch": 0.44245111280756355, + "grad_norm": 0.0, + "learning_rate": 1.2328257773952084e-05, + "loss": 1.2734, + "step": 15069 + }, + { + "epoch": 0.44248047448470257, + "grad_norm": 0.0, + "learning_rate": 1.2327332936917546e-05, + "loss": 1.3311, + "step": 15070 + }, + { + "epoch": 0.4425098361618416, + "grad_norm": 0.0, + "learning_rate": 1.2326408078836343e-05, + "loss": 1.3271, + "step": 15071 + }, + { + "epoch": 0.44253919783898055, + "grad_norm": 0.0, + "learning_rate": 1.2325483199716836e-05, + "loss": 1.3096, + "step": 15072 + }, + { + "epoch": 0.44256855951611956, + "grad_norm": 0.0, + "learning_rate": 1.2324558299567389e-05, + "loss": 1.2705, + "step": 15073 + }, + { + "epoch": 0.4425979211932586, + "grad_norm": 0.0, + "learning_rate": 1.2323633378396365e-05, + "loss": 1.2881, + "step": 15074 + }, + { + "epoch": 0.44262728287039754, + "grad_norm": 0.0, + "learning_rate": 1.2322708436212132e-05, + "loss": 1.355, + "step": 15075 + }, + { + "epoch": 0.44265664454753656, + "grad_norm": 0.0, + "learning_rate": 1.2321783473023049e-05, + "loss": 1.3672, + "step": 15076 + }, + { + "epoch": 0.4426860062246756, + "grad_norm": 0.0, + "learning_rate": 1.2320858488837484e-05, + "loss": 1.3477, + "step": 15077 + }, + { + "epoch": 0.44271536790181454, + "grad_norm": 0.0, + "learning_rate": 1.2319933483663801e-05, + "loss": 1.3271, + "step": 15078 + }, + { + "epoch": 0.44274472957895356, + "grad_norm": 0.0, + "learning_rate": 1.2319008457510364e-05, + "loss": 1.335, + "step": 15079 + }, + { + "epoch": 0.4427740912560926, + "grad_norm": 0.0, + "learning_rate": 1.2318083410385544e-05, + "loss": 1.3076, + "step": 15080 + }, + { + "epoch": 0.44280345293323153, + "grad_norm": 0.0, + "learning_rate": 1.23171583422977e-05, + "loss": 1.2422, + "step": 15081 + }, + { + "epoch": 0.44283281461037055, + "grad_norm": 0.0, + "learning_rate": 1.2316233253255197e-05, + "loss": 1.3633, + "step": 15082 + }, + { + "epoch": 0.44286217628750957, + "grad_norm": 0.0, + "learning_rate": 1.2315308143266405e-05, + "loss": 1.4072, + "step": 15083 + }, + { + "epoch": 0.44289153796464853, + "grad_norm": 0.0, + "learning_rate": 1.231438301233969e-05, + "loss": 1.2871, + "step": 15084 + }, + { + "epoch": 0.44292089964178755, + "grad_norm": 0.0, + "learning_rate": 1.2313457860483418e-05, + "loss": 1.2666, + "step": 15085 + }, + { + "epoch": 0.44295026131892656, + "grad_norm": 0.0, + "learning_rate": 1.2312532687705949e-05, + "loss": 1.3506, + "step": 15086 + }, + { + "epoch": 0.4429796229960655, + "grad_norm": 0.0, + "learning_rate": 1.2311607494015658e-05, + "loss": 1.2861, + "step": 15087 + }, + { + "epoch": 0.44300898467320454, + "grad_norm": 0.0, + "learning_rate": 1.2310682279420907e-05, + "loss": 1.416, + "step": 15088 + }, + { + "epoch": 0.44303834635034356, + "grad_norm": 0.0, + "learning_rate": 1.2309757043930065e-05, + "loss": 1.2515, + "step": 15089 + }, + { + "epoch": 0.4430677080274825, + "grad_norm": 0.0, + "learning_rate": 1.2308831787551501e-05, + "loss": 1.3672, + "step": 15090 + }, + { + "epoch": 0.44309706970462154, + "grad_norm": 0.0, + "learning_rate": 1.2307906510293575e-05, + "loss": 1.3008, + "step": 15091 + }, + { + "epoch": 0.4431264313817605, + "grad_norm": 0.0, + "learning_rate": 1.2306981212164664e-05, + "loss": 1.3555, + "step": 15092 + }, + { + "epoch": 0.4431557930588995, + "grad_norm": 0.0, + "learning_rate": 1.2306055893173128e-05, + "loss": 1.3916, + "step": 15093 + }, + { + "epoch": 0.44318515473603853, + "grad_norm": 0.0, + "learning_rate": 1.230513055332734e-05, + "loss": 1.4697, + "step": 15094 + }, + { + "epoch": 0.4432145164131775, + "grad_norm": 0.0, + "learning_rate": 1.2304205192635665e-05, + "loss": 1.3662, + "step": 15095 + }, + { + "epoch": 0.4432438780903165, + "grad_norm": 0.0, + "learning_rate": 1.2303279811106472e-05, + "loss": 1.417, + "step": 15096 + }, + { + "epoch": 0.44327323976745553, + "grad_norm": 0.0, + "learning_rate": 1.2302354408748132e-05, + "loss": 1.2695, + "step": 15097 + }, + { + "epoch": 0.4433026014445945, + "grad_norm": 0.0, + "learning_rate": 1.2301428985569008e-05, + "loss": 1.4072, + "step": 15098 + }, + { + "epoch": 0.4433319631217335, + "grad_norm": 0.0, + "learning_rate": 1.2300503541577473e-05, + "loss": 1.3164, + "step": 15099 + }, + { + "epoch": 0.4433613247988725, + "grad_norm": 0.0, + "learning_rate": 1.2299578076781897e-05, + "loss": 1.4736, + "step": 15100 + }, + { + "epoch": 0.4433906864760115, + "grad_norm": 0.0, + "learning_rate": 1.2298652591190647e-05, + "loss": 1.2139, + "step": 15101 + }, + { + "epoch": 0.4434200481531505, + "grad_norm": 0.0, + "learning_rate": 1.2297727084812092e-05, + "loss": 1.4395, + "step": 15102 + }, + { + "epoch": 0.4434494098302895, + "grad_norm": 0.0, + "learning_rate": 1.2296801557654603e-05, + "loss": 1.502, + "step": 15103 + }, + { + "epoch": 0.4434787715074285, + "grad_norm": 0.0, + "learning_rate": 1.229587600972655e-05, + "loss": 1.3789, + "step": 15104 + }, + { + "epoch": 0.4435081331845675, + "grad_norm": 0.0, + "learning_rate": 1.2294950441036305e-05, + "loss": 1.3296, + "step": 15105 + }, + { + "epoch": 0.4435374948617065, + "grad_norm": 0.0, + "learning_rate": 1.229402485159223e-05, + "loss": 1.269, + "step": 15106 + }, + { + "epoch": 0.4435668565388455, + "grad_norm": 0.0, + "learning_rate": 1.2293099241402704e-05, + "loss": 1.291, + "step": 15107 + }, + { + "epoch": 0.4435962182159845, + "grad_norm": 0.0, + "learning_rate": 1.229217361047609e-05, + "loss": 1.2979, + "step": 15108 + }, + { + "epoch": 0.4436255798931235, + "grad_norm": 0.0, + "learning_rate": 1.229124795882077e-05, + "loss": 1.3867, + "step": 15109 + }, + { + "epoch": 0.4436549415702625, + "grad_norm": 0.0, + "learning_rate": 1.2290322286445103e-05, + "loss": 1.3408, + "step": 15110 + }, + { + "epoch": 0.4436843032474015, + "grad_norm": 0.0, + "learning_rate": 1.2289396593357466e-05, + "loss": 1.4443, + "step": 15111 + }, + { + "epoch": 0.4437136649245405, + "grad_norm": 0.0, + "learning_rate": 1.2288470879566228e-05, + "loss": 1.3291, + "step": 15112 + }, + { + "epoch": 0.44374302660167947, + "grad_norm": 0.0, + "learning_rate": 1.2287545145079765e-05, + "loss": 1.2725, + "step": 15113 + }, + { + "epoch": 0.4437723882788185, + "grad_norm": 0.0, + "learning_rate": 1.2286619389906444e-05, + "loss": 1.4004, + "step": 15114 + }, + { + "epoch": 0.4438017499559575, + "grad_norm": 0.0, + "learning_rate": 1.2285693614054637e-05, + "loss": 1.2285, + "step": 15115 + }, + { + "epoch": 0.44383111163309646, + "grad_norm": 0.0, + "learning_rate": 1.2284767817532717e-05, + "loss": 1.3315, + "step": 15116 + }, + { + "epoch": 0.4438604733102355, + "grad_norm": 0.0, + "learning_rate": 1.228384200034906e-05, + "loss": 1.293, + "step": 15117 + }, + { + "epoch": 0.4438898349873745, + "grad_norm": 0.0, + "learning_rate": 1.228291616251203e-05, + "loss": 1.3926, + "step": 15118 + }, + { + "epoch": 0.44391919666451346, + "grad_norm": 0.0, + "learning_rate": 1.2281990304030008e-05, + "loss": 1.1528, + "step": 15119 + }, + { + "epoch": 0.4439485583416525, + "grad_norm": 0.0, + "learning_rate": 1.2281064424911362e-05, + "loss": 1.2939, + "step": 15120 + }, + { + "epoch": 0.4439779200187915, + "grad_norm": 0.0, + "learning_rate": 1.2280138525164466e-05, + "loss": 1.3936, + "step": 15121 + }, + { + "epoch": 0.44400728169593046, + "grad_norm": 0.0, + "learning_rate": 1.2279212604797695e-05, + "loss": 1.2471, + "step": 15122 + }, + { + "epoch": 0.4440366433730695, + "grad_norm": 0.0, + "learning_rate": 1.2278286663819417e-05, + "loss": 1.3574, + "step": 15123 + }, + { + "epoch": 0.4440660050502085, + "grad_norm": 0.0, + "learning_rate": 1.2277360702238012e-05, + "loss": 1.4756, + "step": 15124 + }, + { + "epoch": 0.44409536672734745, + "grad_norm": 0.0, + "learning_rate": 1.227643472006185e-05, + "loss": 1.373, + "step": 15125 + }, + { + "epoch": 0.44412472840448647, + "grad_norm": 0.0, + "learning_rate": 1.2275508717299309e-05, + "loss": 1.4775, + "step": 15126 + }, + { + "epoch": 0.4441540900816255, + "grad_norm": 0.0, + "learning_rate": 1.2274582693958756e-05, + "loss": 1.2646, + "step": 15127 + }, + { + "epoch": 0.44418345175876445, + "grad_norm": 0.0, + "learning_rate": 1.2273656650048571e-05, + "loss": 1.3672, + "step": 15128 + }, + { + "epoch": 0.44421281343590346, + "grad_norm": 0.0, + "learning_rate": 1.2272730585577125e-05, + "loss": 1.3701, + "step": 15129 + }, + { + "epoch": 0.4442421751130425, + "grad_norm": 0.0, + "learning_rate": 1.2271804500552798e-05, + "loss": 1.3125, + "step": 15130 + }, + { + "epoch": 0.44427153679018144, + "grad_norm": 0.0, + "learning_rate": 1.2270878394983957e-05, + "loss": 1.293, + "step": 15131 + }, + { + "epoch": 0.44430089846732046, + "grad_norm": 0.0, + "learning_rate": 1.2269952268878983e-05, + "loss": 1.3145, + "step": 15132 + }, + { + "epoch": 0.4443302601444595, + "grad_norm": 0.0, + "learning_rate": 1.2269026122246248e-05, + "loss": 1.3403, + "step": 15133 + }, + { + "epoch": 0.44435962182159844, + "grad_norm": 0.0, + "learning_rate": 1.2268099955094132e-05, + "loss": 1.3418, + "step": 15134 + }, + { + "epoch": 0.44438898349873746, + "grad_norm": 0.0, + "learning_rate": 1.2267173767431005e-05, + "loss": 1.3398, + "step": 15135 + }, + { + "epoch": 0.4444183451758765, + "grad_norm": 0.0, + "learning_rate": 1.2266247559265243e-05, + "loss": 1.416, + "step": 15136 + }, + { + "epoch": 0.44444770685301543, + "grad_norm": 0.0, + "learning_rate": 1.2265321330605225e-05, + "loss": 1.2334, + "step": 15137 + }, + { + "epoch": 0.44447706853015445, + "grad_norm": 0.0, + "learning_rate": 1.2264395081459328e-05, + "loss": 1.4961, + "step": 15138 + }, + { + "epoch": 0.44450643020729347, + "grad_norm": 0.0, + "learning_rate": 1.2263468811835924e-05, + "loss": 1.4043, + "step": 15139 + }, + { + "epoch": 0.44453579188443243, + "grad_norm": 0.0, + "learning_rate": 1.2262542521743393e-05, + "loss": 1.3428, + "step": 15140 + }, + { + "epoch": 0.44456515356157145, + "grad_norm": 0.0, + "learning_rate": 1.2261616211190108e-05, + "loss": 1.2832, + "step": 15141 + }, + { + "epoch": 0.4445945152387104, + "grad_norm": 0.0, + "learning_rate": 1.2260689880184452e-05, + "loss": 1.3428, + "step": 15142 + }, + { + "epoch": 0.4446238769158494, + "grad_norm": 0.0, + "learning_rate": 1.2259763528734797e-05, + "loss": 1.2656, + "step": 15143 + }, + { + "epoch": 0.44465323859298844, + "grad_norm": 0.0, + "learning_rate": 1.2258837156849526e-05, + "loss": 1.3047, + "step": 15144 + }, + { + "epoch": 0.4446826002701274, + "grad_norm": 0.0, + "learning_rate": 1.2257910764537006e-05, + "loss": 1.333, + "step": 15145 + }, + { + "epoch": 0.4447119619472664, + "grad_norm": 0.0, + "learning_rate": 1.2256984351805623e-05, + "loss": 1.3271, + "step": 15146 + }, + { + "epoch": 0.44474132362440544, + "grad_norm": 0.0, + "learning_rate": 1.2256057918663752e-05, + "loss": 1.3818, + "step": 15147 + }, + { + "epoch": 0.4447706853015444, + "grad_norm": 0.0, + "learning_rate": 1.2255131465119771e-05, + "loss": 1.251, + "step": 15148 + }, + { + "epoch": 0.4448000469786834, + "grad_norm": 0.0, + "learning_rate": 1.2254204991182059e-05, + "loss": 1.3242, + "step": 15149 + }, + { + "epoch": 0.44482940865582243, + "grad_norm": 0.0, + "learning_rate": 1.2253278496858996e-05, + "loss": 1.3887, + "step": 15150 + }, + { + "epoch": 0.4448587703329614, + "grad_norm": 0.0, + "learning_rate": 1.2252351982158957e-05, + "loss": 1.3467, + "step": 15151 + }, + { + "epoch": 0.4448881320101004, + "grad_norm": 0.0, + "learning_rate": 1.2251425447090321e-05, + "loss": 1.334, + "step": 15152 + }, + { + "epoch": 0.44491749368723943, + "grad_norm": 0.0, + "learning_rate": 1.2250498891661469e-05, + "loss": 1.1733, + "step": 15153 + }, + { + "epoch": 0.4449468553643784, + "grad_norm": 0.0, + "learning_rate": 1.2249572315880782e-05, + "loss": 1.2153, + "step": 15154 + }, + { + "epoch": 0.4449762170415174, + "grad_norm": 0.0, + "learning_rate": 1.2248645719756633e-05, + "loss": 1.3818, + "step": 15155 + }, + { + "epoch": 0.4450055787186564, + "grad_norm": 0.0, + "learning_rate": 1.2247719103297404e-05, + "loss": 1.498, + "step": 15156 + }, + { + "epoch": 0.4450349403957954, + "grad_norm": 0.0, + "learning_rate": 1.2246792466511477e-05, + "loss": 1.188, + "step": 15157 + }, + { + "epoch": 0.4450643020729344, + "grad_norm": 0.0, + "learning_rate": 1.2245865809407232e-05, + "loss": 1.3037, + "step": 15158 + }, + { + "epoch": 0.4450936637500734, + "grad_norm": 0.0, + "learning_rate": 1.2244939131993043e-05, + "loss": 1.3818, + "step": 15159 + }, + { + "epoch": 0.4451230254272124, + "grad_norm": 0.0, + "learning_rate": 1.2244012434277297e-05, + "loss": 1.4473, + "step": 15160 + }, + { + "epoch": 0.4451523871043514, + "grad_norm": 0.0, + "learning_rate": 1.2243085716268372e-05, + "loss": 1.3545, + "step": 15161 + }, + { + "epoch": 0.4451817487814904, + "grad_norm": 0.0, + "learning_rate": 1.224215897797465e-05, + "loss": 1.3584, + "step": 15162 + }, + { + "epoch": 0.4452111104586294, + "grad_norm": 0.0, + "learning_rate": 1.2241232219404508e-05, + "loss": 1.3789, + "step": 15163 + }, + { + "epoch": 0.4452404721357684, + "grad_norm": 0.0, + "learning_rate": 1.2240305440566331e-05, + "loss": 1.2559, + "step": 15164 + }, + { + "epoch": 0.4452698338129074, + "grad_norm": 0.0, + "learning_rate": 1.2239378641468496e-05, + "loss": 1.3125, + "step": 15165 + }, + { + "epoch": 0.4452991954900464, + "grad_norm": 0.0, + "learning_rate": 1.2238451822119386e-05, + "loss": 1.3994, + "step": 15166 + }, + { + "epoch": 0.4453285571671854, + "grad_norm": 0.0, + "learning_rate": 1.2237524982527385e-05, + "loss": 1.3467, + "step": 15167 + }, + { + "epoch": 0.4453579188443244, + "grad_norm": 0.0, + "learning_rate": 1.2236598122700871e-05, + "loss": 1.4004, + "step": 15168 + }, + { + "epoch": 0.44538728052146337, + "grad_norm": 0.0, + "learning_rate": 1.2235671242648227e-05, + "loss": 1.4521, + "step": 15169 + }, + { + "epoch": 0.4454166421986024, + "grad_norm": 0.0, + "learning_rate": 1.2234744342377838e-05, + "loss": 1.375, + "step": 15170 + }, + { + "epoch": 0.4454460038757414, + "grad_norm": 0.0, + "learning_rate": 1.2233817421898083e-05, + "loss": 1.3184, + "step": 15171 + }, + { + "epoch": 0.44547536555288036, + "grad_norm": 0.0, + "learning_rate": 1.2232890481217341e-05, + "loss": 1.3535, + "step": 15172 + }, + { + "epoch": 0.4455047272300194, + "grad_norm": 0.0, + "learning_rate": 1.2231963520344003e-05, + "loss": 1.2998, + "step": 15173 + }, + { + "epoch": 0.4455340889071584, + "grad_norm": 0.0, + "learning_rate": 1.2231036539286445e-05, + "loss": 1.2783, + "step": 15174 + }, + { + "epoch": 0.44556345058429736, + "grad_norm": 0.0, + "learning_rate": 1.2230109538053054e-05, + "loss": 1.3408, + "step": 15175 + }, + { + "epoch": 0.4455928122614364, + "grad_norm": 0.0, + "learning_rate": 1.2229182516652209e-05, + "loss": 1.4775, + "step": 15176 + }, + { + "epoch": 0.4456221739385754, + "grad_norm": 0.0, + "learning_rate": 1.2228255475092296e-05, + "loss": 1.2192, + "step": 15177 + }, + { + "epoch": 0.44565153561571436, + "grad_norm": 0.0, + "learning_rate": 1.2227328413381698e-05, + "loss": 1.2852, + "step": 15178 + }, + { + "epoch": 0.4456808972928534, + "grad_norm": 0.0, + "learning_rate": 1.22264013315288e-05, + "loss": 1.2441, + "step": 15179 + }, + { + "epoch": 0.4457102589699924, + "grad_norm": 0.0, + "learning_rate": 1.2225474229541986e-05, + "loss": 1.2031, + "step": 15180 + }, + { + "epoch": 0.44573962064713135, + "grad_norm": 0.0, + "learning_rate": 1.2224547107429636e-05, + "loss": 1.2715, + "step": 15181 + }, + { + "epoch": 0.44576898232427037, + "grad_norm": 0.0, + "learning_rate": 1.222361996520014e-05, + "loss": 1.3877, + "step": 15182 + }, + { + "epoch": 0.4457983440014094, + "grad_norm": 0.0, + "learning_rate": 1.2222692802861877e-05, + "loss": 1.2559, + "step": 15183 + }, + { + "epoch": 0.44582770567854835, + "grad_norm": 0.0, + "learning_rate": 1.2221765620423231e-05, + "loss": 1.3594, + "step": 15184 + }, + { + "epoch": 0.44585706735568736, + "grad_norm": 0.0, + "learning_rate": 1.2220838417892594e-05, + "loss": 1.2007, + "step": 15185 + }, + { + "epoch": 0.4458864290328264, + "grad_norm": 0.0, + "learning_rate": 1.2219911195278342e-05, + "loss": 1.3291, + "step": 15186 + }, + { + "epoch": 0.44591579070996534, + "grad_norm": 0.0, + "learning_rate": 1.221898395258887e-05, + "loss": 1.3389, + "step": 15187 + }, + { + "epoch": 0.44594515238710436, + "grad_norm": 0.0, + "learning_rate": 1.2218056689832554e-05, + "loss": 1.2446, + "step": 15188 + }, + { + "epoch": 0.4459745140642434, + "grad_norm": 0.0, + "learning_rate": 1.2217129407017783e-05, + "loss": 1.2598, + "step": 15189 + }, + { + "epoch": 0.44600387574138234, + "grad_norm": 0.0, + "learning_rate": 1.2216202104152944e-05, + "loss": 1.2725, + "step": 15190 + }, + { + "epoch": 0.44603323741852136, + "grad_norm": 0.0, + "learning_rate": 1.2215274781246423e-05, + "loss": 1.2705, + "step": 15191 + }, + { + "epoch": 0.4460625990956603, + "grad_norm": 0.0, + "learning_rate": 1.2214347438306602e-05, + "loss": 1.3467, + "step": 15192 + }, + { + "epoch": 0.44609196077279933, + "grad_norm": 0.0, + "learning_rate": 1.2213420075341873e-05, + "loss": 1.3584, + "step": 15193 + }, + { + "epoch": 0.44612132244993835, + "grad_norm": 0.0, + "learning_rate": 1.2212492692360618e-05, + "loss": 1.2842, + "step": 15194 + }, + { + "epoch": 0.4461506841270773, + "grad_norm": 0.0, + "learning_rate": 1.2211565289371225e-05, + "loss": 1.3037, + "step": 15195 + }, + { + "epoch": 0.44618004580421633, + "grad_norm": 0.0, + "learning_rate": 1.2210637866382081e-05, + "loss": 1.3398, + "step": 15196 + }, + { + "epoch": 0.44620940748135535, + "grad_norm": 0.0, + "learning_rate": 1.220971042340157e-05, + "loss": 1.3877, + "step": 15197 + }, + { + "epoch": 0.4462387691584943, + "grad_norm": 0.0, + "learning_rate": 1.2208782960438084e-05, + "loss": 1.3472, + "step": 15198 + }, + { + "epoch": 0.4462681308356333, + "grad_norm": 0.0, + "learning_rate": 1.220785547750001e-05, + "loss": 1.4072, + "step": 15199 + }, + { + "epoch": 0.44629749251277234, + "grad_norm": 0.0, + "learning_rate": 1.220692797459573e-05, + "loss": 1.3818, + "step": 15200 + }, + { + "epoch": 0.4463268541899113, + "grad_norm": 0.0, + "learning_rate": 1.2206000451733638e-05, + "loss": 1.334, + "step": 15201 + }, + { + "epoch": 0.4463562158670503, + "grad_norm": 0.0, + "learning_rate": 1.2205072908922115e-05, + "loss": 1.2959, + "step": 15202 + }, + { + "epoch": 0.44638557754418934, + "grad_norm": 0.0, + "learning_rate": 1.2204145346169559e-05, + "loss": 1.3613, + "step": 15203 + }, + { + "epoch": 0.4464149392213283, + "grad_norm": 0.0, + "learning_rate": 1.2203217763484347e-05, + "loss": 1.3379, + "step": 15204 + }, + { + "epoch": 0.4464443008984673, + "grad_norm": 0.0, + "learning_rate": 1.2202290160874875e-05, + "loss": 1.4023, + "step": 15205 + }, + { + "epoch": 0.44647366257560633, + "grad_norm": 0.0, + "learning_rate": 1.2201362538349529e-05, + "loss": 1.2939, + "step": 15206 + }, + { + "epoch": 0.4465030242527453, + "grad_norm": 0.0, + "learning_rate": 1.2200434895916698e-05, + "loss": 1.5078, + "step": 15207 + }, + { + "epoch": 0.4465323859298843, + "grad_norm": 0.0, + "learning_rate": 1.2199507233584768e-05, + "loss": 1.3525, + "step": 15208 + }, + { + "epoch": 0.44656174760702333, + "grad_norm": 0.0, + "learning_rate": 1.219857955136213e-05, + "loss": 1.4004, + "step": 15209 + }, + { + "epoch": 0.4465911092841623, + "grad_norm": 0.0, + "learning_rate": 1.2197651849257177e-05, + "loss": 1.4014, + "step": 15210 + }, + { + "epoch": 0.4466204709613013, + "grad_norm": 0.0, + "learning_rate": 1.2196724127278296e-05, + "loss": 1.2637, + "step": 15211 + }, + { + "epoch": 0.4466498326384403, + "grad_norm": 0.0, + "learning_rate": 1.2195796385433877e-05, + "loss": 1.1323, + "step": 15212 + }, + { + "epoch": 0.4466791943155793, + "grad_norm": 0.0, + "learning_rate": 1.2194868623732308e-05, + "loss": 1.4326, + "step": 15213 + }, + { + "epoch": 0.4467085559927183, + "grad_norm": 0.0, + "learning_rate": 1.2193940842181977e-05, + "loss": 1.3359, + "step": 15214 + }, + { + "epoch": 0.4467379176698573, + "grad_norm": 0.0, + "learning_rate": 1.219301304079128e-05, + "loss": 1.3809, + "step": 15215 + }, + { + "epoch": 0.4467672793469963, + "grad_norm": 0.0, + "learning_rate": 1.2192085219568604e-05, + "loss": 1.1143, + "step": 15216 + }, + { + "epoch": 0.4467966410241353, + "grad_norm": 0.0, + "learning_rate": 1.2191157378522338e-05, + "loss": 1.1445, + "step": 15217 + }, + { + "epoch": 0.4468260027012743, + "grad_norm": 0.0, + "learning_rate": 1.2190229517660877e-05, + "loss": 1.3232, + "step": 15218 + }, + { + "epoch": 0.4468553643784133, + "grad_norm": 0.0, + "learning_rate": 1.2189301636992608e-05, + "loss": 1.3223, + "step": 15219 + }, + { + "epoch": 0.4468847260555523, + "grad_norm": 0.0, + "learning_rate": 1.2188373736525925e-05, + "loss": 1.293, + "step": 15220 + }, + { + "epoch": 0.4469140877326913, + "grad_norm": 0.0, + "learning_rate": 1.2187445816269216e-05, + "loss": 1.3257, + "step": 15221 + }, + { + "epoch": 0.4469434494098303, + "grad_norm": 0.0, + "learning_rate": 1.2186517876230877e-05, + "loss": 1.3389, + "step": 15222 + }, + { + "epoch": 0.4469728110869693, + "grad_norm": 0.0, + "learning_rate": 1.2185589916419293e-05, + "loss": 1.3535, + "step": 15223 + }, + { + "epoch": 0.4470021727641083, + "grad_norm": 0.0, + "learning_rate": 1.2184661936842865e-05, + "loss": 1.2354, + "step": 15224 + }, + { + "epoch": 0.44703153444124727, + "grad_norm": 0.0, + "learning_rate": 1.2183733937509976e-05, + "loss": 1.3994, + "step": 15225 + }, + { + "epoch": 0.4470608961183863, + "grad_norm": 0.0, + "learning_rate": 1.2182805918429022e-05, + "loss": 1.2246, + "step": 15226 + }, + { + "epoch": 0.4470902577955253, + "grad_norm": 0.0, + "learning_rate": 1.2181877879608394e-05, + "loss": 1.1313, + "step": 15227 + }, + { + "epoch": 0.44711961947266426, + "grad_norm": 0.0, + "learning_rate": 1.218094982105649e-05, + "loss": 1.3252, + "step": 15228 + }, + { + "epoch": 0.4471489811498033, + "grad_norm": 0.0, + "learning_rate": 1.2180021742781693e-05, + "loss": 1.418, + "step": 15229 + }, + { + "epoch": 0.4471783428269423, + "grad_norm": 0.0, + "learning_rate": 1.2179093644792404e-05, + "loss": 1.3516, + "step": 15230 + }, + { + "epoch": 0.44720770450408126, + "grad_norm": 0.0, + "learning_rate": 1.217816552709701e-05, + "loss": 1.3252, + "step": 15231 + }, + { + "epoch": 0.4472370661812203, + "grad_norm": 0.0, + "learning_rate": 1.2177237389703913e-05, + "loss": 1.3613, + "step": 15232 + }, + { + "epoch": 0.4472664278583593, + "grad_norm": 0.0, + "learning_rate": 1.2176309232621497e-05, + "loss": 1.3916, + "step": 15233 + }, + { + "epoch": 0.44729578953549826, + "grad_norm": 0.0, + "learning_rate": 1.217538105585816e-05, + "loss": 1.1401, + "step": 15234 + }, + { + "epoch": 0.4473251512126373, + "grad_norm": 0.0, + "learning_rate": 1.2174452859422292e-05, + "loss": 1.3877, + "step": 15235 + }, + { + "epoch": 0.4473545128897763, + "grad_norm": 0.0, + "learning_rate": 1.2173524643322294e-05, + "loss": 1.457, + "step": 15236 + }, + { + "epoch": 0.44738387456691525, + "grad_norm": 0.0, + "learning_rate": 1.2172596407566555e-05, + "loss": 1.3662, + "step": 15237 + }, + { + "epoch": 0.44741323624405427, + "grad_norm": 0.0, + "learning_rate": 1.2171668152163467e-05, + "loss": 1.3213, + "step": 15238 + }, + { + "epoch": 0.4474425979211933, + "grad_norm": 0.0, + "learning_rate": 1.2170739877121431e-05, + "loss": 1.375, + "step": 15239 + }, + { + "epoch": 0.44747195959833225, + "grad_norm": 0.0, + "learning_rate": 1.216981158244884e-05, + "loss": 1.3516, + "step": 15240 + }, + { + "epoch": 0.44750132127547126, + "grad_norm": 0.0, + "learning_rate": 1.2168883268154083e-05, + "loss": 1.2803, + "step": 15241 + }, + { + "epoch": 0.4475306829526102, + "grad_norm": 0.0, + "learning_rate": 1.2167954934245562e-05, + "loss": 1.3301, + "step": 15242 + }, + { + "epoch": 0.44756004462974924, + "grad_norm": 0.0, + "learning_rate": 1.2167026580731669e-05, + "loss": 1.3096, + "step": 15243 + }, + { + "epoch": 0.44758940630688826, + "grad_norm": 0.0, + "learning_rate": 1.2166098207620801e-05, + "loss": 1.3047, + "step": 15244 + }, + { + "epoch": 0.4476187679840272, + "grad_norm": 0.0, + "learning_rate": 1.2165169814921349e-05, + "loss": 1.4482, + "step": 15245 + }, + { + "epoch": 0.44764812966116624, + "grad_norm": 0.0, + "learning_rate": 1.2164241402641713e-05, + "loss": 1.2603, + "step": 15246 + }, + { + "epoch": 0.44767749133830526, + "grad_norm": 0.0, + "learning_rate": 1.2163312970790287e-05, + "loss": 1.3906, + "step": 15247 + }, + { + "epoch": 0.4477068530154442, + "grad_norm": 0.0, + "learning_rate": 1.216238451937547e-05, + "loss": 1.4121, + "step": 15248 + }, + { + "epoch": 0.44773621469258323, + "grad_norm": 0.0, + "learning_rate": 1.2161456048405653e-05, + "loss": 1.3438, + "step": 15249 + }, + { + "epoch": 0.44776557636972225, + "grad_norm": 0.0, + "learning_rate": 1.2160527557889237e-05, + "loss": 1.3652, + "step": 15250 + }, + { + "epoch": 0.4477949380468612, + "grad_norm": 0.0, + "learning_rate": 1.2159599047834615e-05, + "loss": 1.3711, + "step": 15251 + }, + { + "epoch": 0.44782429972400023, + "grad_norm": 0.0, + "learning_rate": 1.215867051825019e-05, + "loss": 1.1299, + "step": 15252 + }, + { + "epoch": 0.44785366140113925, + "grad_norm": 0.0, + "learning_rate": 1.2157741969144354e-05, + "loss": 1.2817, + "step": 15253 + }, + { + "epoch": 0.4478830230782782, + "grad_norm": 0.0, + "learning_rate": 1.2156813400525501e-05, + "loss": 1.334, + "step": 15254 + }, + { + "epoch": 0.4479123847554172, + "grad_norm": 0.0, + "learning_rate": 1.2155884812402035e-05, + "loss": 1.3936, + "step": 15255 + }, + { + "epoch": 0.44794174643255624, + "grad_norm": 0.0, + "learning_rate": 1.2154956204782349e-05, + "loss": 1.4453, + "step": 15256 + }, + { + "epoch": 0.4479711081096952, + "grad_norm": 0.0, + "learning_rate": 1.2154027577674843e-05, + "loss": 1.4199, + "step": 15257 + }, + { + "epoch": 0.4480004697868342, + "grad_norm": 0.0, + "learning_rate": 1.2153098931087915e-05, + "loss": 1.3779, + "step": 15258 + }, + { + "epoch": 0.44802983146397324, + "grad_norm": 0.0, + "learning_rate": 1.2152170265029958e-05, + "loss": 1.4209, + "step": 15259 + }, + { + "epoch": 0.4480591931411122, + "grad_norm": 0.0, + "learning_rate": 1.2151241579509378e-05, + "loss": 1.417, + "step": 15260 + }, + { + "epoch": 0.4480885548182512, + "grad_norm": 0.0, + "learning_rate": 1.215031287453457e-05, + "loss": 1.5342, + "step": 15261 + }, + { + "epoch": 0.44811791649539023, + "grad_norm": 0.0, + "learning_rate": 1.214938415011393e-05, + "loss": 1.4736, + "step": 15262 + }, + { + "epoch": 0.4481472781725292, + "grad_norm": 0.0, + "learning_rate": 1.2148455406255859e-05, + "loss": 1.3535, + "step": 15263 + }, + { + "epoch": 0.4481766398496682, + "grad_norm": 0.0, + "learning_rate": 1.2147526642968754e-05, + "loss": 1.1074, + "step": 15264 + }, + { + "epoch": 0.44820600152680723, + "grad_norm": 0.0, + "learning_rate": 1.2146597860261018e-05, + "loss": 1.3994, + "step": 15265 + }, + { + "epoch": 0.4482353632039462, + "grad_norm": 0.0, + "learning_rate": 1.2145669058141048e-05, + "loss": 1.2163, + "step": 15266 + }, + { + "epoch": 0.4482647248810852, + "grad_norm": 0.0, + "learning_rate": 1.2144740236617241e-05, + "loss": 1.2847, + "step": 15267 + }, + { + "epoch": 0.4482940865582242, + "grad_norm": 0.0, + "learning_rate": 1.2143811395697998e-05, + "loss": 1.2715, + "step": 15268 + }, + { + "epoch": 0.4483234482353632, + "grad_norm": 0.0, + "learning_rate": 1.2142882535391722e-05, + "loss": 1.2949, + "step": 15269 + }, + { + "epoch": 0.4483528099125022, + "grad_norm": 0.0, + "learning_rate": 1.214195365570681e-05, + "loss": 1.3613, + "step": 15270 + }, + { + "epoch": 0.4483821715896412, + "grad_norm": 0.0, + "learning_rate": 1.2141024756651666e-05, + "loss": 1.29, + "step": 15271 + }, + { + "epoch": 0.4484115332667802, + "grad_norm": 0.0, + "learning_rate": 1.214009583823468e-05, + "loss": 1.3682, + "step": 15272 + }, + { + "epoch": 0.4484408949439192, + "grad_norm": 0.0, + "learning_rate": 1.2139166900464263e-05, + "loss": 1.4688, + "step": 15273 + }, + { + "epoch": 0.4484702566210582, + "grad_norm": 0.0, + "learning_rate": 1.2138237943348813e-05, + "loss": 1.2529, + "step": 15274 + }, + { + "epoch": 0.4484996182981972, + "grad_norm": 0.0, + "learning_rate": 1.2137308966896725e-05, + "loss": 1.4668, + "step": 15275 + }, + { + "epoch": 0.4485289799753362, + "grad_norm": 0.0, + "learning_rate": 1.2136379971116408e-05, + "loss": 1.3389, + "step": 15276 + }, + { + "epoch": 0.4485583416524752, + "grad_norm": 0.0, + "learning_rate": 1.213545095601626e-05, + "loss": 1.4219, + "step": 15277 + }, + { + "epoch": 0.4485877033296142, + "grad_norm": 0.0, + "learning_rate": 1.213452192160468e-05, + "loss": 1.3018, + "step": 15278 + }, + { + "epoch": 0.4486170650067532, + "grad_norm": 0.0, + "learning_rate": 1.2133592867890073e-05, + "loss": 1.1445, + "step": 15279 + }, + { + "epoch": 0.4486464266838922, + "grad_norm": 0.0, + "learning_rate": 1.213266379488084e-05, + "loss": 1.3066, + "step": 15280 + }, + { + "epoch": 0.44867578836103117, + "grad_norm": 0.0, + "learning_rate": 1.213173470258538e-05, + "loss": 1.3379, + "step": 15281 + }, + { + "epoch": 0.4487051500381702, + "grad_norm": 0.0, + "learning_rate": 1.21308055910121e-05, + "loss": 1.3486, + "step": 15282 + }, + { + "epoch": 0.4487345117153092, + "grad_norm": 0.0, + "learning_rate": 1.2129876460169398e-05, + "loss": 1.1719, + "step": 15283 + }, + { + "epoch": 0.44876387339244816, + "grad_norm": 0.0, + "learning_rate": 1.2128947310065677e-05, + "loss": 1.3135, + "step": 15284 + }, + { + "epoch": 0.4487932350695872, + "grad_norm": 0.0, + "learning_rate": 1.2128018140709343e-05, + "loss": 1.3379, + "step": 15285 + }, + { + "epoch": 0.4488225967467262, + "grad_norm": 0.0, + "learning_rate": 1.2127088952108792e-05, + "loss": 1.5449, + "step": 15286 + }, + { + "epoch": 0.44885195842386516, + "grad_norm": 0.0, + "learning_rate": 1.2126159744272432e-05, + "loss": 1.2744, + "step": 15287 + }, + { + "epoch": 0.4488813201010042, + "grad_norm": 0.0, + "learning_rate": 1.2125230517208666e-05, + "loss": 1.2021, + "step": 15288 + }, + { + "epoch": 0.4489106817781432, + "grad_norm": 0.0, + "learning_rate": 1.2124301270925895e-05, + "loss": 1.3271, + "step": 15289 + }, + { + "epoch": 0.44894004345528216, + "grad_norm": 0.0, + "learning_rate": 1.2123372005432525e-05, + "loss": 1.2754, + "step": 15290 + }, + { + "epoch": 0.4489694051324212, + "grad_norm": 0.0, + "learning_rate": 1.2122442720736958e-05, + "loss": 1.23, + "step": 15291 + }, + { + "epoch": 0.44899876680956013, + "grad_norm": 0.0, + "learning_rate": 1.2121513416847597e-05, + "loss": 1.2744, + "step": 15292 + }, + { + "epoch": 0.44902812848669915, + "grad_norm": 0.0, + "learning_rate": 1.2120584093772848e-05, + "loss": 1.3857, + "step": 15293 + }, + { + "epoch": 0.44905749016383817, + "grad_norm": 0.0, + "learning_rate": 1.2119654751521113e-05, + "loss": 1.3408, + "step": 15294 + }, + { + "epoch": 0.44908685184097713, + "grad_norm": 0.0, + "learning_rate": 1.2118725390100797e-05, + "loss": 1.2412, + "step": 15295 + }, + { + "epoch": 0.44911621351811615, + "grad_norm": 0.0, + "learning_rate": 1.2117796009520303e-05, + "loss": 1.3652, + "step": 15296 + }, + { + "epoch": 0.44914557519525516, + "grad_norm": 0.0, + "learning_rate": 1.2116866609788042e-05, + "loss": 1.1885, + "step": 15297 + }, + { + "epoch": 0.4491749368723941, + "grad_norm": 0.0, + "learning_rate": 1.211593719091241e-05, + "loss": 1.3633, + "step": 15298 + }, + { + "epoch": 0.44920429854953314, + "grad_norm": 0.0, + "learning_rate": 1.2115007752901818e-05, + "loss": 1.3174, + "step": 15299 + }, + { + "epoch": 0.44923366022667216, + "grad_norm": 0.0, + "learning_rate": 1.2114078295764667e-05, + "loss": 1.2412, + "step": 15300 + }, + { + "epoch": 0.4492630219038111, + "grad_norm": 0.0, + "learning_rate": 1.2113148819509368e-05, + "loss": 1.2744, + "step": 15301 + }, + { + "epoch": 0.44929238358095014, + "grad_norm": 0.0, + "learning_rate": 1.2112219324144323e-05, + "loss": 1.25, + "step": 15302 + }, + { + "epoch": 0.44932174525808916, + "grad_norm": 0.0, + "learning_rate": 1.2111289809677936e-05, + "loss": 1.3809, + "step": 15303 + }, + { + "epoch": 0.4493511069352281, + "grad_norm": 0.0, + "learning_rate": 1.2110360276118613e-05, + "loss": 1.1914, + "step": 15304 + }, + { + "epoch": 0.44938046861236713, + "grad_norm": 0.0, + "learning_rate": 1.2109430723474762e-05, + "loss": 1.3506, + "step": 15305 + }, + { + "epoch": 0.44940983028950615, + "grad_norm": 0.0, + "learning_rate": 1.2108501151754793e-05, + "loss": 1.3389, + "step": 15306 + }, + { + "epoch": 0.4494391919666451, + "grad_norm": 0.0, + "learning_rate": 1.2107571560967103e-05, + "loss": 1.3389, + "step": 15307 + }, + { + "epoch": 0.44946855364378413, + "grad_norm": 0.0, + "learning_rate": 1.2106641951120106e-05, + "loss": 1.1914, + "step": 15308 + }, + { + "epoch": 0.44949791532092315, + "grad_norm": 0.0, + "learning_rate": 1.2105712322222205e-05, + "loss": 1.3193, + "step": 15309 + }, + { + "epoch": 0.4495272769980621, + "grad_norm": 0.0, + "learning_rate": 1.210478267428181e-05, + "loss": 1.2739, + "step": 15310 + }, + { + "epoch": 0.4495566386752011, + "grad_norm": 0.0, + "learning_rate": 1.2103853007307327e-05, + "loss": 1.3379, + "step": 15311 + }, + { + "epoch": 0.44958600035234014, + "grad_norm": 0.0, + "learning_rate": 1.2102923321307158e-05, + "loss": 1.2373, + "step": 15312 + }, + { + "epoch": 0.4496153620294791, + "grad_norm": 0.0, + "learning_rate": 1.2101993616289718e-05, + "loss": 1.3926, + "step": 15313 + }, + { + "epoch": 0.4496447237066181, + "grad_norm": 0.0, + "learning_rate": 1.2101063892263414e-05, + "loss": 1.2539, + "step": 15314 + }, + { + "epoch": 0.44967408538375714, + "grad_norm": 0.0, + "learning_rate": 1.2100134149236647e-05, + "loss": 1.2734, + "step": 15315 + }, + { + "epoch": 0.4497034470608961, + "grad_norm": 0.0, + "learning_rate": 1.2099204387217829e-05, + "loss": 1.3936, + "step": 15316 + }, + { + "epoch": 0.4497328087380351, + "grad_norm": 0.0, + "learning_rate": 1.2098274606215368e-05, + "loss": 1.3701, + "step": 15317 + }, + { + "epoch": 0.44976217041517413, + "grad_norm": 0.0, + "learning_rate": 1.2097344806237676e-05, + "loss": 1.2046, + "step": 15318 + }, + { + "epoch": 0.4497915320923131, + "grad_norm": 0.0, + "learning_rate": 1.2096414987293155e-05, + "loss": 1.2646, + "step": 15319 + }, + { + "epoch": 0.4498208937694521, + "grad_norm": 0.0, + "learning_rate": 1.2095485149390215e-05, + "loss": 1.377, + "step": 15320 + }, + { + "epoch": 0.44985025544659113, + "grad_norm": 0.0, + "learning_rate": 1.2094555292537268e-05, + "loss": 1.2793, + "step": 15321 + }, + { + "epoch": 0.4498796171237301, + "grad_norm": 0.0, + "learning_rate": 1.2093625416742725e-05, + "loss": 1.3691, + "step": 15322 + }, + { + "epoch": 0.4499089788008691, + "grad_norm": 0.0, + "learning_rate": 1.2092695522014986e-05, + "loss": 1.3594, + "step": 15323 + }, + { + "epoch": 0.4499383404780081, + "grad_norm": 0.0, + "learning_rate": 1.2091765608362464e-05, + "loss": 1.4463, + "step": 15324 + }, + { + "epoch": 0.4499677021551471, + "grad_norm": 0.0, + "learning_rate": 1.2090835675793574e-05, + "loss": 1.3057, + "step": 15325 + }, + { + "epoch": 0.4499970638322861, + "grad_norm": 0.0, + "learning_rate": 1.208990572431672e-05, + "loss": 0.9927, + "step": 15326 + }, + { + "epoch": 0.4500264255094251, + "grad_norm": 0.0, + "learning_rate": 1.2088975753940314e-05, + "loss": 1.2285, + "step": 15327 + }, + { + "epoch": 0.4500557871865641, + "grad_norm": 0.0, + "learning_rate": 1.2088045764672762e-05, + "loss": 1.4238, + "step": 15328 + }, + { + "epoch": 0.4500851488637031, + "grad_norm": 0.0, + "learning_rate": 1.208711575652248e-05, + "loss": 1.3271, + "step": 15329 + }, + { + "epoch": 0.4501145105408421, + "grad_norm": 0.0, + "learning_rate": 1.2086185729497876e-05, + "loss": 1.4082, + "step": 15330 + }, + { + "epoch": 0.4501438722179811, + "grad_norm": 0.0, + "learning_rate": 1.2085255683607362e-05, + "loss": 1.375, + "step": 15331 + }, + { + "epoch": 0.4501732338951201, + "grad_norm": 0.0, + "learning_rate": 1.2084325618859345e-05, + "loss": 1.3008, + "step": 15332 + }, + { + "epoch": 0.4502025955722591, + "grad_norm": 0.0, + "learning_rate": 1.2083395535262237e-05, + "loss": 1.2842, + "step": 15333 + }, + { + "epoch": 0.4502319572493981, + "grad_norm": 0.0, + "learning_rate": 1.2082465432824448e-05, + "loss": 1.3291, + "step": 15334 + }, + { + "epoch": 0.4502613189265371, + "grad_norm": 0.0, + "learning_rate": 1.2081535311554395e-05, + "loss": 1.4414, + "step": 15335 + }, + { + "epoch": 0.4502906806036761, + "grad_norm": 0.0, + "learning_rate": 1.2080605171460483e-05, + "loss": 1.3037, + "step": 15336 + }, + { + "epoch": 0.45032004228081507, + "grad_norm": 0.0, + "learning_rate": 1.2079675012551121e-05, + "loss": 1.3247, + "step": 15337 + }, + { + "epoch": 0.4503494039579541, + "grad_norm": 0.0, + "learning_rate": 1.2078744834834733e-05, + "loss": 1.3906, + "step": 15338 + }, + { + "epoch": 0.4503787656350931, + "grad_norm": 0.0, + "learning_rate": 1.2077814638319717e-05, + "loss": 1.3652, + "step": 15339 + }, + { + "epoch": 0.45040812731223206, + "grad_norm": 0.0, + "learning_rate": 1.2076884423014497e-05, + "loss": 1.3174, + "step": 15340 + }, + { + "epoch": 0.4504374889893711, + "grad_norm": 0.0, + "learning_rate": 1.2075954188927475e-05, + "loss": 1.4365, + "step": 15341 + }, + { + "epoch": 0.45046685066651004, + "grad_norm": 0.0, + "learning_rate": 1.2075023936067067e-05, + "loss": 1.4443, + "step": 15342 + }, + { + "epoch": 0.45049621234364906, + "grad_norm": 0.0, + "learning_rate": 1.2074093664441691e-05, + "loss": 1.3535, + "step": 15343 + }, + { + "epoch": 0.4505255740207881, + "grad_norm": 0.0, + "learning_rate": 1.2073163374059752e-05, + "loss": 1.375, + "step": 15344 + }, + { + "epoch": 0.45055493569792704, + "grad_norm": 0.0, + "learning_rate": 1.2072233064929664e-05, + "loss": 1.3262, + "step": 15345 + }, + { + "epoch": 0.45058429737506606, + "grad_norm": 0.0, + "learning_rate": 1.207130273705984e-05, + "loss": 1.3857, + "step": 15346 + }, + { + "epoch": 0.4506136590522051, + "grad_norm": 0.0, + "learning_rate": 1.20703723904587e-05, + "loss": 1.333, + "step": 15347 + }, + { + "epoch": 0.45064302072934403, + "grad_norm": 0.0, + "learning_rate": 1.206944202513465e-05, + "loss": 1.2764, + "step": 15348 + }, + { + "epoch": 0.45067238240648305, + "grad_norm": 0.0, + "learning_rate": 1.2068511641096103e-05, + "loss": 1.4463, + "step": 15349 + }, + { + "epoch": 0.45070174408362207, + "grad_norm": 0.0, + "learning_rate": 1.2067581238351482e-05, + "loss": 1.2695, + "step": 15350 + }, + { + "epoch": 0.45073110576076103, + "grad_norm": 0.0, + "learning_rate": 1.2066650816909192e-05, + "loss": 1.3408, + "step": 15351 + }, + { + "epoch": 0.45076046743790005, + "grad_norm": 0.0, + "learning_rate": 1.2065720376777648e-05, + "loss": 1.3564, + "step": 15352 + }, + { + "epoch": 0.45078982911503906, + "grad_norm": 0.0, + "learning_rate": 1.2064789917965263e-05, + "loss": 1.252, + "step": 15353 + }, + { + "epoch": 0.450819190792178, + "grad_norm": 0.0, + "learning_rate": 1.2063859440480456e-05, + "loss": 1.3281, + "step": 15354 + }, + { + "epoch": 0.45084855246931704, + "grad_norm": 0.0, + "learning_rate": 1.2062928944331641e-05, + "loss": 1.4336, + "step": 15355 + }, + { + "epoch": 0.45087791414645606, + "grad_norm": 0.0, + "learning_rate": 1.2061998429527231e-05, + "loss": 1.2334, + "step": 15356 + }, + { + "epoch": 0.450907275823595, + "grad_norm": 0.0, + "learning_rate": 1.206106789607564e-05, + "loss": 1.3164, + "step": 15357 + }, + { + "epoch": 0.45093663750073404, + "grad_norm": 0.0, + "learning_rate": 1.2060137343985284e-05, + "loss": 1.4785, + "step": 15358 + }, + { + "epoch": 0.45096599917787306, + "grad_norm": 0.0, + "learning_rate": 1.205920677326458e-05, + "loss": 1.3232, + "step": 15359 + }, + { + "epoch": 0.450995360855012, + "grad_norm": 0.0, + "learning_rate": 1.2058276183921943e-05, + "loss": 1.3164, + "step": 15360 + }, + { + "epoch": 0.45102472253215103, + "grad_norm": 0.0, + "learning_rate": 1.2057345575965784e-05, + "loss": 1.2812, + "step": 15361 + }, + { + "epoch": 0.45105408420929005, + "grad_norm": 0.0, + "learning_rate": 1.2056414949404523e-05, + "loss": 1.3789, + "step": 15362 + }, + { + "epoch": 0.451083445886429, + "grad_norm": 0.0, + "learning_rate": 1.2055484304246577e-05, + "loss": 1.2861, + "step": 15363 + }, + { + "epoch": 0.45111280756356803, + "grad_norm": 0.0, + "learning_rate": 1.2054553640500354e-05, + "loss": 1.3818, + "step": 15364 + }, + { + "epoch": 0.45114216924070705, + "grad_norm": 0.0, + "learning_rate": 1.2053622958174281e-05, + "loss": 1.3975, + "step": 15365 + }, + { + "epoch": 0.451171530917846, + "grad_norm": 0.0, + "learning_rate": 1.2052692257276768e-05, + "loss": 1.3789, + "step": 15366 + }, + { + "epoch": 0.451200892594985, + "grad_norm": 0.0, + "learning_rate": 1.2051761537816235e-05, + "loss": 1.1758, + "step": 15367 + }, + { + "epoch": 0.45123025427212404, + "grad_norm": 0.0, + "learning_rate": 1.2050830799801096e-05, + "loss": 1.2002, + "step": 15368 + }, + { + "epoch": 0.451259615949263, + "grad_norm": 0.0, + "learning_rate": 1.2049900043239768e-05, + "loss": 1.2871, + "step": 15369 + }, + { + "epoch": 0.451288977626402, + "grad_norm": 0.0, + "learning_rate": 1.2048969268140673e-05, + "loss": 1.4971, + "step": 15370 + }, + { + "epoch": 0.45131833930354104, + "grad_norm": 0.0, + "learning_rate": 1.204803847451222e-05, + "loss": 1.2148, + "step": 15371 + }, + { + "epoch": 0.45134770098068, + "grad_norm": 0.0, + "learning_rate": 1.2047107662362834e-05, + "loss": 1.1885, + "step": 15372 + }, + { + "epoch": 0.451377062657819, + "grad_norm": 0.0, + "learning_rate": 1.2046176831700923e-05, + "loss": 1.3828, + "step": 15373 + }, + { + "epoch": 0.45140642433495803, + "grad_norm": 0.0, + "learning_rate": 1.2045245982534916e-05, + "loss": 1.3662, + "step": 15374 + }, + { + "epoch": 0.451435786012097, + "grad_norm": 0.0, + "learning_rate": 1.2044315114873226e-05, + "loss": 1.3848, + "step": 15375 + }, + { + "epoch": 0.451465147689236, + "grad_norm": 0.0, + "learning_rate": 1.2043384228724268e-05, + "loss": 1.3252, + "step": 15376 + }, + { + "epoch": 0.45149450936637503, + "grad_norm": 0.0, + "learning_rate": 1.2042453324096464e-05, + "loss": 1.4473, + "step": 15377 + }, + { + "epoch": 0.451523871043514, + "grad_norm": 0.0, + "learning_rate": 1.2041522400998231e-05, + "loss": 1.2319, + "step": 15378 + }, + { + "epoch": 0.451553232720653, + "grad_norm": 0.0, + "learning_rate": 1.204059145943799e-05, + "loss": 1.3516, + "step": 15379 + }, + { + "epoch": 0.451582594397792, + "grad_norm": 0.0, + "learning_rate": 1.203966049942416e-05, + "loss": 1.3516, + "step": 15380 + }, + { + "epoch": 0.451611956074931, + "grad_norm": 0.0, + "learning_rate": 1.2038729520965153e-05, + "loss": 1.3262, + "step": 15381 + }, + { + "epoch": 0.45164131775207, + "grad_norm": 0.0, + "learning_rate": 1.2037798524069394e-05, + "loss": 1.3711, + "step": 15382 + }, + { + "epoch": 0.451670679429209, + "grad_norm": 0.0, + "learning_rate": 1.20368675087453e-05, + "loss": 1.3232, + "step": 15383 + }, + { + "epoch": 0.451700041106348, + "grad_norm": 0.0, + "learning_rate": 1.2035936475001296e-05, + "loss": 1.3076, + "step": 15384 + }, + { + "epoch": 0.451729402783487, + "grad_norm": 0.0, + "learning_rate": 1.2035005422845794e-05, + "loss": 1.2725, + "step": 15385 + }, + { + "epoch": 0.451758764460626, + "grad_norm": 0.0, + "learning_rate": 1.2034074352287214e-05, + "loss": 1.2373, + "step": 15386 + }, + { + "epoch": 0.451788126137765, + "grad_norm": 0.0, + "learning_rate": 1.2033143263333984e-05, + "loss": 1.2275, + "step": 15387 + }, + { + "epoch": 0.451817487814904, + "grad_norm": 0.0, + "learning_rate": 1.2032212155994517e-05, + "loss": 1.3398, + "step": 15388 + }, + { + "epoch": 0.451846849492043, + "grad_norm": 0.0, + "learning_rate": 1.2031281030277233e-05, + "loss": 1.4404, + "step": 15389 + }, + { + "epoch": 0.451876211169182, + "grad_norm": 0.0, + "learning_rate": 1.2030349886190555e-05, + "loss": 1.418, + "step": 15390 + }, + { + "epoch": 0.451905572846321, + "grad_norm": 0.0, + "learning_rate": 1.2029418723742905e-05, + "loss": 1.3662, + "step": 15391 + }, + { + "epoch": 0.45193493452345995, + "grad_norm": 0.0, + "learning_rate": 1.2028487542942701e-05, + "loss": 1.3213, + "step": 15392 + }, + { + "epoch": 0.45196429620059897, + "grad_norm": 0.0, + "learning_rate": 1.2027556343798364e-05, + "loss": 1.3047, + "step": 15393 + }, + { + "epoch": 0.451993657877738, + "grad_norm": 0.0, + "learning_rate": 1.2026625126318314e-05, + "loss": 1.3418, + "step": 15394 + }, + { + "epoch": 0.45202301955487695, + "grad_norm": 0.0, + "learning_rate": 1.2025693890510977e-05, + "loss": 1.3652, + "step": 15395 + }, + { + "epoch": 0.45205238123201597, + "grad_norm": 0.0, + "learning_rate": 1.202476263638477e-05, + "loss": 1.4326, + "step": 15396 + }, + { + "epoch": 0.452081742909155, + "grad_norm": 0.0, + "learning_rate": 1.2023831363948116e-05, + "loss": 1.4336, + "step": 15397 + }, + { + "epoch": 0.45211110458629394, + "grad_norm": 0.0, + "learning_rate": 1.2022900073209435e-05, + "loss": 1.291, + "step": 15398 + }, + { + "epoch": 0.45214046626343296, + "grad_norm": 0.0, + "learning_rate": 1.2021968764177154e-05, + "loss": 1.3623, + "step": 15399 + }, + { + "epoch": 0.452169827940572, + "grad_norm": 0.0, + "learning_rate": 1.202103743685969e-05, + "loss": 1.2803, + "step": 15400 + }, + { + "epoch": 0.45219918961771094, + "grad_norm": 0.0, + "learning_rate": 1.2020106091265467e-05, + "loss": 1.4756, + "step": 15401 + }, + { + "epoch": 0.45222855129484996, + "grad_norm": 0.0, + "learning_rate": 1.2019174727402907e-05, + "loss": 1.4648, + "step": 15402 + }, + { + "epoch": 0.452257912971989, + "grad_norm": 0.0, + "learning_rate": 1.2018243345280432e-05, + "loss": 1.2388, + "step": 15403 + }, + { + "epoch": 0.45228727464912793, + "grad_norm": 0.0, + "learning_rate": 1.2017311944906469e-05, + "loss": 1.3906, + "step": 15404 + }, + { + "epoch": 0.45231663632626695, + "grad_norm": 0.0, + "learning_rate": 1.2016380526289433e-05, + "loss": 1.4238, + "step": 15405 + }, + { + "epoch": 0.45234599800340597, + "grad_norm": 0.0, + "learning_rate": 1.2015449089437752e-05, + "loss": 1.3027, + "step": 15406 + }, + { + "epoch": 0.45237535968054493, + "grad_norm": 0.0, + "learning_rate": 1.201451763435985e-05, + "loss": 1.3447, + "step": 15407 + }, + { + "epoch": 0.45240472135768395, + "grad_norm": 0.0, + "learning_rate": 1.201358616106415e-05, + "loss": 1.2773, + "step": 15408 + }, + { + "epoch": 0.45243408303482296, + "grad_norm": 0.0, + "learning_rate": 1.2012654669559076e-05, + "loss": 1.334, + "step": 15409 + }, + { + "epoch": 0.4524634447119619, + "grad_norm": 0.0, + "learning_rate": 1.2011723159853049e-05, + "loss": 1.3408, + "step": 15410 + }, + { + "epoch": 0.45249280638910094, + "grad_norm": 0.0, + "learning_rate": 1.2010791631954491e-05, + "loss": 1.4229, + "step": 15411 + }, + { + "epoch": 0.45252216806623996, + "grad_norm": 0.0, + "learning_rate": 1.2009860085871833e-05, + "loss": 1.3516, + "step": 15412 + }, + { + "epoch": 0.4525515297433789, + "grad_norm": 0.0, + "learning_rate": 1.2008928521613495e-05, + "loss": 1.3984, + "step": 15413 + }, + { + "epoch": 0.45258089142051794, + "grad_norm": 0.0, + "learning_rate": 1.20079969391879e-05, + "loss": 1.4053, + "step": 15414 + }, + { + "epoch": 0.45261025309765696, + "grad_norm": 0.0, + "learning_rate": 1.2007065338603474e-05, + "loss": 1.3936, + "step": 15415 + }, + { + "epoch": 0.4526396147747959, + "grad_norm": 0.0, + "learning_rate": 1.2006133719868647e-05, + "loss": 1.2622, + "step": 15416 + }, + { + "epoch": 0.45266897645193493, + "grad_norm": 0.0, + "learning_rate": 1.2005202082991834e-05, + "loss": 1.3818, + "step": 15417 + }, + { + "epoch": 0.45269833812907395, + "grad_norm": 0.0, + "learning_rate": 1.2004270427981466e-05, + "loss": 1.4043, + "step": 15418 + }, + { + "epoch": 0.4527276998062129, + "grad_norm": 0.0, + "learning_rate": 1.200333875484597e-05, + "loss": 1.2231, + "step": 15419 + }, + { + "epoch": 0.45275706148335193, + "grad_norm": 0.0, + "learning_rate": 1.2002407063593765e-05, + "loss": 1.3535, + "step": 15420 + }, + { + "epoch": 0.45278642316049095, + "grad_norm": 0.0, + "learning_rate": 1.2001475354233283e-05, + "loss": 1.3369, + "step": 15421 + }, + { + "epoch": 0.4528157848376299, + "grad_norm": 0.0, + "learning_rate": 1.2000543626772945e-05, + "loss": 1.3516, + "step": 15422 + }, + { + "epoch": 0.4528451465147689, + "grad_norm": 0.0, + "learning_rate": 1.1999611881221179e-05, + "loss": 1.2544, + "step": 15423 + }, + { + "epoch": 0.45287450819190794, + "grad_norm": 0.0, + "learning_rate": 1.1998680117586407e-05, + "loss": 1.2944, + "step": 15424 + }, + { + "epoch": 0.4529038698690469, + "grad_norm": 0.0, + "learning_rate": 1.1997748335877066e-05, + "loss": 1.3457, + "step": 15425 + }, + { + "epoch": 0.4529332315461859, + "grad_norm": 0.0, + "learning_rate": 1.199681653610157e-05, + "loss": 1.2861, + "step": 15426 + }, + { + "epoch": 0.45296259322332494, + "grad_norm": 0.0, + "learning_rate": 1.1995884718268353e-05, + "loss": 1.3682, + "step": 15427 + }, + { + "epoch": 0.4529919549004639, + "grad_norm": 0.0, + "learning_rate": 1.1994952882385839e-05, + "loss": 1.4902, + "step": 15428 + }, + { + "epoch": 0.4530213165776029, + "grad_norm": 0.0, + "learning_rate": 1.1994021028462457e-05, + "loss": 1.4082, + "step": 15429 + }, + { + "epoch": 0.45305067825474193, + "grad_norm": 0.0, + "learning_rate": 1.199308915650663e-05, + "loss": 1.3652, + "step": 15430 + }, + { + "epoch": 0.4530800399318809, + "grad_norm": 0.0, + "learning_rate": 1.1992157266526785e-05, + "loss": 1.2949, + "step": 15431 + }, + { + "epoch": 0.4531094016090199, + "grad_norm": 0.0, + "learning_rate": 1.1991225358531353e-05, + "loss": 1.3027, + "step": 15432 + }, + { + "epoch": 0.45313876328615893, + "grad_norm": 0.0, + "learning_rate": 1.1990293432528764e-05, + "loss": 1.3467, + "step": 15433 + }, + { + "epoch": 0.4531681249632979, + "grad_norm": 0.0, + "learning_rate": 1.1989361488527437e-05, + "loss": 1.3828, + "step": 15434 + }, + { + "epoch": 0.4531974866404369, + "grad_norm": 0.0, + "learning_rate": 1.1988429526535807e-05, + "loss": 1.2886, + "step": 15435 + }, + { + "epoch": 0.4532268483175759, + "grad_norm": 0.0, + "learning_rate": 1.1987497546562299e-05, + "loss": 1.3906, + "step": 15436 + }, + { + "epoch": 0.4532562099947149, + "grad_norm": 0.0, + "learning_rate": 1.1986565548615343e-05, + "loss": 1.582, + "step": 15437 + }, + { + "epoch": 0.4532855716718539, + "grad_norm": 0.0, + "learning_rate": 1.1985633532703362e-05, + "loss": 1.4238, + "step": 15438 + }, + { + "epoch": 0.4533149333489929, + "grad_norm": 0.0, + "learning_rate": 1.1984701498834794e-05, + "loss": 1.2734, + "step": 15439 + }, + { + "epoch": 0.4533442950261319, + "grad_norm": 0.0, + "learning_rate": 1.1983769447018059e-05, + "loss": 1.3721, + "step": 15440 + }, + { + "epoch": 0.4533736567032709, + "grad_norm": 0.0, + "learning_rate": 1.198283737726159e-05, + "loss": 1.3799, + "step": 15441 + }, + { + "epoch": 0.45340301838040986, + "grad_norm": 0.0, + "learning_rate": 1.1981905289573812e-05, + "loss": 1.333, + "step": 15442 + }, + { + "epoch": 0.4534323800575489, + "grad_norm": 0.0, + "learning_rate": 1.1980973183963158e-05, + "loss": 1.3887, + "step": 15443 + }, + { + "epoch": 0.4534617417346879, + "grad_norm": 0.0, + "learning_rate": 1.1980041060438057e-05, + "loss": 1.3354, + "step": 15444 + }, + { + "epoch": 0.45349110341182686, + "grad_norm": 0.0, + "learning_rate": 1.1979108919006938e-05, + "loss": 1.3389, + "step": 15445 + }, + { + "epoch": 0.4535204650889659, + "grad_norm": 0.0, + "learning_rate": 1.1978176759678229e-05, + "loss": 1.2656, + "step": 15446 + }, + { + "epoch": 0.4535498267661049, + "grad_norm": 0.0, + "learning_rate": 1.1977244582460361e-05, + "loss": 1.2881, + "step": 15447 + }, + { + "epoch": 0.45357918844324385, + "grad_norm": 0.0, + "learning_rate": 1.1976312387361764e-05, + "loss": 1.1309, + "step": 15448 + }, + { + "epoch": 0.45360855012038287, + "grad_norm": 0.0, + "learning_rate": 1.1975380174390871e-05, + "loss": 1.3779, + "step": 15449 + }, + { + "epoch": 0.4536379117975219, + "grad_norm": 0.0, + "learning_rate": 1.1974447943556106e-05, + "loss": 1.4639, + "step": 15450 + }, + { + "epoch": 0.45366727347466085, + "grad_norm": 0.0, + "learning_rate": 1.1973515694865903e-05, + "loss": 1.2881, + "step": 15451 + }, + { + "epoch": 0.45369663515179987, + "grad_norm": 0.0, + "learning_rate": 1.1972583428328693e-05, + "loss": 1.3896, + "step": 15452 + }, + { + "epoch": 0.4537259968289389, + "grad_norm": 0.0, + "learning_rate": 1.1971651143952904e-05, + "loss": 1.3271, + "step": 15453 + }, + { + "epoch": 0.45375535850607784, + "grad_norm": 0.0, + "learning_rate": 1.1970718841746972e-05, + "loss": 1.1982, + "step": 15454 + }, + { + "epoch": 0.45378472018321686, + "grad_norm": 0.0, + "learning_rate": 1.196978652171932e-05, + "loss": 1.3721, + "step": 15455 + }, + { + "epoch": 0.4538140818603559, + "grad_norm": 0.0, + "learning_rate": 1.1968854183878387e-05, + "loss": 1.4023, + "step": 15456 + }, + { + "epoch": 0.45384344353749484, + "grad_norm": 0.0, + "learning_rate": 1.1967921828232602e-05, + "loss": 1.5566, + "step": 15457 + }, + { + "epoch": 0.45387280521463386, + "grad_norm": 0.0, + "learning_rate": 1.1966989454790394e-05, + "loss": 1.3008, + "step": 15458 + }, + { + "epoch": 0.4539021668917729, + "grad_norm": 0.0, + "learning_rate": 1.19660570635602e-05, + "loss": 1.3574, + "step": 15459 + }, + { + "epoch": 0.45393152856891184, + "grad_norm": 0.0, + "learning_rate": 1.1965124654550443e-05, + "loss": 1.3027, + "step": 15460 + }, + { + "epoch": 0.45396089024605085, + "grad_norm": 0.0, + "learning_rate": 1.1964192227769565e-05, + "loss": 1.293, + "step": 15461 + }, + { + "epoch": 0.45399025192318987, + "grad_norm": 0.0, + "learning_rate": 1.1963259783225993e-05, + "loss": 1.29, + "step": 15462 + }, + { + "epoch": 0.45401961360032883, + "grad_norm": 0.0, + "learning_rate": 1.1962327320928159e-05, + "loss": 1.2656, + "step": 15463 + }, + { + "epoch": 0.45404897527746785, + "grad_norm": 0.0, + "learning_rate": 1.1961394840884495e-05, + "loss": 1.3574, + "step": 15464 + }, + { + "epoch": 0.45407833695460686, + "grad_norm": 0.0, + "learning_rate": 1.1960462343103438e-05, + "loss": 1.3398, + "step": 15465 + }, + { + "epoch": 0.4541076986317458, + "grad_norm": 0.0, + "learning_rate": 1.1959529827593416e-05, + "loss": 1.3174, + "step": 15466 + }, + { + "epoch": 0.45413706030888484, + "grad_norm": 0.0, + "learning_rate": 1.1958597294362866e-05, + "loss": 1.4141, + "step": 15467 + }, + { + "epoch": 0.45416642198602386, + "grad_norm": 0.0, + "learning_rate": 1.1957664743420219e-05, + "loss": 1.2646, + "step": 15468 + }, + { + "epoch": 0.4541957836631628, + "grad_norm": 0.0, + "learning_rate": 1.1956732174773906e-05, + "loss": 1.1904, + "step": 15469 + }, + { + "epoch": 0.45422514534030184, + "grad_norm": 0.0, + "learning_rate": 1.1955799588432364e-05, + "loss": 1.416, + "step": 15470 + }, + { + "epoch": 0.45425450701744086, + "grad_norm": 0.0, + "learning_rate": 1.1954866984404026e-05, + "loss": 1.3926, + "step": 15471 + }, + { + "epoch": 0.4542838686945798, + "grad_norm": 0.0, + "learning_rate": 1.1953934362697323e-05, + "loss": 1.1855, + "step": 15472 + }, + { + "epoch": 0.45431323037171883, + "grad_norm": 0.0, + "learning_rate": 1.1953001723320692e-05, + "loss": 1.3633, + "step": 15473 + }, + { + "epoch": 0.45434259204885785, + "grad_norm": 0.0, + "learning_rate": 1.1952069066282568e-05, + "loss": 1.3047, + "step": 15474 + }, + { + "epoch": 0.4543719537259968, + "grad_norm": 0.0, + "learning_rate": 1.1951136391591382e-05, + "loss": 1.2412, + "step": 15475 + }, + { + "epoch": 0.45440131540313583, + "grad_norm": 0.0, + "learning_rate": 1.1950203699255569e-05, + "loss": 1.3145, + "step": 15476 + }, + { + "epoch": 0.45443067708027485, + "grad_norm": 0.0, + "learning_rate": 1.1949270989283568e-05, + "loss": 1.3169, + "step": 15477 + }, + { + "epoch": 0.4544600387574138, + "grad_norm": 0.0, + "learning_rate": 1.194833826168381e-05, + "loss": 1.2959, + "step": 15478 + }, + { + "epoch": 0.4544894004345528, + "grad_norm": 0.0, + "learning_rate": 1.1947405516464727e-05, + "loss": 1.3604, + "step": 15479 + }, + { + "epoch": 0.45451876211169184, + "grad_norm": 0.0, + "learning_rate": 1.194647275363476e-05, + "loss": 1.3857, + "step": 15480 + }, + { + "epoch": 0.4545481237888308, + "grad_norm": 0.0, + "learning_rate": 1.1945539973202338e-05, + "loss": 1.3701, + "step": 15481 + }, + { + "epoch": 0.4545774854659698, + "grad_norm": 0.0, + "learning_rate": 1.1944607175175902e-05, + "loss": 1.3545, + "step": 15482 + }, + { + "epoch": 0.45460684714310884, + "grad_norm": 0.0, + "learning_rate": 1.1943674359563885e-05, + "loss": 1.377, + "step": 15483 + }, + { + "epoch": 0.4546362088202478, + "grad_norm": 0.0, + "learning_rate": 1.194274152637472e-05, + "loss": 1.2051, + "step": 15484 + }, + { + "epoch": 0.4546655704973868, + "grad_norm": 0.0, + "learning_rate": 1.194180867561685e-05, + "loss": 1.3008, + "step": 15485 + }, + { + "epoch": 0.45469493217452583, + "grad_norm": 0.0, + "learning_rate": 1.1940875807298706e-05, + "loss": 1.2656, + "step": 15486 + }, + { + "epoch": 0.4547242938516648, + "grad_norm": 0.0, + "learning_rate": 1.1939942921428724e-05, + "loss": 1.2686, + "step": 15487 + }, + { + "epoch": 0.4547536555288038, + "grad_norm": 0.0, + "learning_rate": 1.1939010018015345e-05, + "loss": 1.2578, + "step": 15488 + }, + { + "epoch": 0.45478301720594283, + "grad_norm": 0.0, + "learning_rate": 1.1938077097066996e-05, + "loss": 1.4307, + "step": 15489 + }, + { + "epoch": 0.4548123788830818, + "grad_norm": 0.0, + "learning_rate": 1.1937144158592125e-05, + "loss": 1.3066, + "step": 15490 + }, + { + "epoch": 0.4548417405602208, + "grad_norm": 0.0, + "learning_rate": 1.1936211202599159e-05, + "loss": 1.3779, + "step": 15491 + }, + { + "epoch": 0.4548711022373598, + "grad_norm": 0.0, + "learning_rate": 1.193527822909654e-05, + "loss": 1.3086, + "step": 15492 + }, + { + "epoch": 0.4549004639144988, + "grad_norm": 0.0, + "learning_rate": 1.1934345238092705e-05, + "loss": 1.4717, + "step": 15493 + }, + { + "epoch": 0.4549298255916378, + "grad_norm": 0.0, + "learning_rate": 1.1933412229596091e-05, + "loss": 1.2373, + "step": 15494 + }, + { + "epoch": 0.45495918726877677, + "grad_norm": 0.0, + "learning_rate": 1.1932479203615132e-05, + "loss": 1.3887, + "step": 15495 + }, + { + "epoch": 0.4549885489459158, + "grad_norm": 0.0, + "learning_rate": 1.1931546160158272e-05, + "loss": 1.291, + "step": 15496 + }, + { + "epoch": 0.4550179106230548, + "grad_norm": 0.0, + "learning_rate": 1.1930613099233943e-05, + "loss": 1.3818, + "step": 15497 + }, + { + "epoch": 0.45504727230019376, + "grad_norm": 0.0, + "learning_rate": 1.1929680020850589e-05, + "loss": 1.4648, + "step": 15498 + }, + { + "epoch": 0.4550766339773328, + "grad_norm": 0.0, + "learning_rate": 1.1928746925016644e-05, + "loss": 1.2803, + "step": 15499 + }, + { + "epoch": 0.4551059956544718, + "grad_norm": 0.0, + "learning_rate": 1.1927813811740545e-05, + "loss": 1.1948, + "step": 15500 + }, + { + "epoch": 0.45513535733161076, + "grad_norm": 0.0, + "learning_rate": 1.192688068103073e-05, + "loss": 1.3721, + "step": 15501 + }, + { + "epoch": 0.4551647190087498, + "grad_norm": 0.0, + "learning_rate": 1.1925947532895643e-05, + "loss": 1.3184, + "step": 15502 + }, + { + "epoch": 0.4551940806858888, + "grad_norm": 0.0, + "learning_rate": 1.1925014367343717e-05, + "loss": 1.3682, + "step": 15503 + }, + { + "epoch": 0.45522344236302775, + "grad_norm": 0.0, + "learning_rate": 1.1924081184383394e-05, + "loss": 1.416, + "step": 15504 + }, + { + "epoch": 0.45525280404016677, + "grad_norm": 0.0, + "learning_rate": 1.1923147984023111e-05, + "loss": 1.2334, + "step": 15505 + }, + { + "epoch": 0.4552821657173058, + "grad_norm": 0.0, + "learning_rate": 1.192221476627131e-05, + "loss": 1.418, + "step": 15506 + }, + { + "epoch": 0.45531152739444475, + "grad_norm": 0.0, + "learning_rate": 1.1921281531136426e-05, + "loss": 1.3418, + "step": 15507 + }, + { + "epoch": 0.45534088907158377, + "grad_norm": 0.0, + "learning_rate": 1.1920348278626905e-05, + "loss": 1.4355, + "step": 15508 + }, + { + "epoch": 0.4553702507487228, + "grad_norm": 0.0, + "learning_rate": 1.1919415008751181e-05, + "loss": 1.4346, + "step": 15509 + }, + { + "epoch": 0.45539961242586174, + "grad_norm": 0.0, + "learning_rate": 1.1918481721517693e-05, + "loss": 1.3047, + "step": 15510 + }, + { + "epoch": 0.45542897410300076, + "grad_norm": 0.0, + "learning_rate": 1.1917548416934887e-05, + "loss": 1.3789, + "step": 15511 + }, + { + "epoch": 0.4554583357801398, + "grad_norm": 0.0, + "learning_rate": 1.1916615095011198e-05, + "loss": 1.3164, + "step": 15512 + }, + { + "epoch": 0.45548769745727874, + "grad_norm": 0.0, + "learning_rate": 1.1915681755755066e-05, + "loss": 1.2925, + "step": 15513 + }, + { + "epoch": 0.45551705913441776, + "grad_norm": 0.0, + "learning_rate": 1.1914748399174936e-05, + "loss": 1.418, + "step": 15514 + }, + { + "epoch": 0.4555464208115568, + "grad_norm": 0.0, + "learning_rate": 1.1913815025279245e-05, + "loss": 1.252, + "step": 15515 + }, + { + "epoch": 0.45557578248869574, + "grad_norm": 0.0, + "learning_rate": 1.1912881634076434e-05, + "loss": 1.1777, + "step": 15516 + }, + { + "epoch": 0.45560514416583475, + "grad_norm": 0.0, + "learning_rate": 1.1911948225574945e-05, + "loss": 1.2466, + "step": 15517 + }, + { + "epoch": 0.45563450584297377, + "grad_norm": 0.0, + "learning_rate": 1.1911014799783218e-05, + "loss": 1.209, + "step": 15518 + }, + { + "epoch": 0.45566386752011273, + "grad_norm": 0.0, + "learning_rate": 1.1910081356709697e-05, + "loss": 1.3916, + "step": 15519 + }, + { + "epoch": 0.45569322919725175, + "grad_norm": 0.0, + "learning_rate": 1.190914789636282e-05, + "loss": 1.4004, + "step": 15520 + }, + { + "epoch": 0.45572259087439076, + "grad_norm": 0.0, + "learning_rate": 1.1908214418751028e-05, + "loss": 1.3779, + "step": 15521 + }, + { + "epoch": 0.4557519525515297, + "grad_norm": 0.0, + "learning_rate": 1.1907280923882765e-05, + "loss": 1.3154, + "step": 15522 + }, + { + "epoch": 0.45578131422866874, + "grad_norm": 0.0, + "learning_rate": 1.1906347411766475e-05, + "loss": 1.374, + "step": 15523 + }, + { + "epoch": 0.45581067590580776, + "grad_norm": 0.0, + "learning_rate": 1.1905413882410593e-05, + "loss": 1.4189, + "step": 15524 + }, + { + "epoch": 0.4558400375829467, + "grad_norm": 0.0, + "learning_rate": 1.1904480335823566e-05, + "loss": 1.311, + "step": 15525 + }, + { + "epoch": 0.45586939926008574, + "grad_norm": 0.0, + "learning_rate": 1.1903546772013836e-05, + "loss": 1.2734, + "step": 15526 + }, + { + "epoch": 0.45589876093722476, + "grad_norm": 0.0, + "learning_rate": 1.1902613190989849e-05, + "loss": 1.3945, + "step": 15527 + }, + { + "epoch": 0.4559281226143637, + "grad_norm": 0.0, + "learning_rate": 1.1901679592760039e-05, + "loss": 1.2358, + "step": 15528 + }, + { + "epoch": 0.45595748429150273, + "grad_norm": 0.0, + "learning_rate": 1.1900745977332855e-05, + "loss": 1.3311, + "step": 15529 + }, + { + "epoch": 0.45598684596864175, + "grad_norm": 0.0, + "learning_rate": 1.1899812344716736e-05, + "loss": 1.3008, + "step": 15530 + }, + { + "epoch": 0.4560162076457807, + "grad_norm": 0.0, + "learning_rate": 1.189887869492013e-05, + "loss": 1.3701, + "step": 15531 + }, + { + "epoch": 0.45604556932291973, + "grad_norm": 0.0, + "learning_rate": 1.1897945027951479e-05, + "loss": 1.3105, + "step": 15532 + }, + { + "epoch": 0.45607493100005875, + "grad_norm": 0.0, + "learning_rate": 1.189701134381922e-05, + "loss": 1.3369, + "step": 15533 + }, + { + "epoch": 0.4561042926771977, + "grad_norm": 0.0, + "learning_rate": 1.1896077642531804e-05, + "loss": 1.189, + "step": 15534 + }, + { + "epoch": 0.4561336543543367, + "grad_norm": 0.0, + "learning_rate": 1.1895143924097673e-05, + "loss": 1.2178, + "step": 15535 + }, + { + "epoch": 0.45616301603147574, + "grad_norm": 0.0, + "learning_rate": 1.1894210188525271e-05, + "loss": 1.3535, + "step": 15536 + }, + { + "epoch": 0.4561923777086147, + "grad_norm": 0.0, + "learning_rate": 1.1893276435823042e-05, + "loss": 1.1738, + "step": 15537 + }, + { + "epoch": 0.4562217393857537, + "grad_norm": 0.0, + "learning_rate": 1.1892342665999426e-05, + "loss": 1.356, + "step": 15538 + }, + { + "epoch": 0.45625110106289274, + "grad_norm": 0.0, + "learning_rate": 1.1891408879062873e-05, + "loss": 1.3896, + "step": 15539 + }, + { + "epoch": 0.4562804627400317, + "grad_norm": 0.0, + "learning_rate": 1.1890475075021826e-05, + "loss": 1.2275, + "step": 15540 + }, + { + "epoch": 0.4563098244171707, + "grad_norm": 0.0, + "learning_rate": 1.1889541253884725e-05, + "loss": 1.3203, + "step": 15541 + }, + { + "epoch": 0.45633918609430973, + "grad_norm": 0.0, + "learning_rate": 1.188860741566002e-05, + "loss": 1.3574, + "step": 15542 + }, + { + "epoch": 0.4563685477714487, + "grad_norm": 0.0, + "learning_rate": 1.1887673560356157e-05, + "loss": 1.2852, + "step": 15543 + }, + { + "epoch": 0.4563979094485877, + "grad_norm": 0.0, + "learning_rate": 1.1886739687981574e-05, + "loss": 1.4414, + "step": 15544 + }, + { + "epoch": 0.4564272711257267, + "grad_norm": 0.0, + "learning_rate": 1.1885805798544723e-05, + "loss": 1.3672, + "step": 15545 + }, + { + "epoch": 0.4564566328028657, + "grad_norm": 0.0, + "learning_rate": 1.1884871892054049e-05, + "loss": 1.2676, + "step": 15546 + }, + { + "epoch": 0.4564859944800047, + "grad_norm": 0.0, + "learning_rate": 1.1883937968517993e-05, + "loss": 1.3418, + "step": 15547 + }, + { + "epoch": 0.45651535615714367, + "grad_norm": 0.0, + "learning_rate": 1.1883004027945008e-05, + "loss": 1.3018, + "step": 15548 + }, + { + "epoch": 0.4565447178342827, + "grad_norm": 0.0, + "learning_rate": 1.188207007034353e-05, + "loss": 1.3271, + "step": 15549 + }, + { + "epoch": 0.4565740795114217, + "grad_norm": 0.0, + "learning_rate": 1.1881136095722012e-05, + "loss": 1.3711, + "step": 15550 + }, + { + "epoch": 0.45660344118856067, + "grad_norm": 0.0, + "learning_rate": 1.1880202104088898e-05, + "loss": 1.3135, + "step": 15551 + }, + { + "epoch": 0.4566328028656997, + "grad_norm": 0.0, + "learning_rate": 1.1879268095452638e-05, + "loss": 1.3027, + "step": 15552 + }, + { + "epoch": 0.4566621645428387, + "grad_norm": 0.0, + "learning_rate": 1.1878334069821674e-05, + "loss": 1.4658, + "step": 15553 + }, + { + "epoch": 0.45669152621997766, + "grad_norm": 0.0, + "learning_rate": 1.1877400027204451e-05, + "loss": 1.2773, + "step": 15554 + }, + { + "epoch": 0.4567208878971167, + "grad_norm": 0.0, + "learning_rate": 1.1876465967609424e-05, + "loss": 1.4141, + "step": 15555 + }, + { + "epoch": 0.4567502495742557, + "grad_norm": 0.0, + "learning_rate": 1.187553189104503e-05, + "loss": 1.3579, + "step": 15556 + }, + { + "epoch": 0.45677961125139466, + "grad_norm": 0.0, + "learning_rate": 1.1874597797519723e-05, + "loss": 1.2744, + "step": 15557 + }, + { + "epoch": 0.4568089729285337, + "grad_norm": 0.0, + "learning_rate": 1.1873663687041947e-05, + "loss": 1.3359, + "step": 15558 + }, + { + "epoch": 0.4568383346056727, + "grad_norm": 0.0, + "learning_rate": 1.1872729559620148e-05, + "loss": 1.46, + "step": 15559 + }, + { + "epoch": 0.45686769628281165, + "grad_norm": 0.0, + "learning_rate": 1.1871795415262781e-05, + "loss": 1.3516, + "step": 15560 + }, + { + "epoch": 0.45689705795995067, + "grad_norm": 0.0, + "learning_rate": 1.1870861253978286e-05, + "loss": 1.2715, + "step": 15561 + }, + { + "epoch": 0.4569264196370897, + "grad_norm": 0.0, + "learning_rate": 1.1869927075775114e-05, + "loss": 1.3203, + "step": 15562 + }, + { + "epoch": 0.45695578131422865, + "grad_norm": 0.0, + "learning_rate": 1.1868992880661711e-05, + "loss": 1.2734, + "step": 15563 + }, + { + "epoch": 0.45698514299136767, + "grad_norm": 0.0, + "learning_rate": 1.1868058668646529e-05, + "loss": 1.2324, + "step": 15564 + }, + { + "epoch": 0.4570145046685067, + "grad_norm": 0.0, + "learning_rate": 1.1867124439738014e-05, + "loss": 1.4795, + "step": 15565 + }, + { + "epoch": 0.45704386634564564, + "grad_norm": 0.0, + "learning_rate": 1.1866190193944613e-05, + "loss": 1.3232, + "step": 15566 + }, + { + "epoch": 0.45707322802278466, + "grad_norm": 0.0, + "learning_rate": 1.1865255931274777e-05, + "loss": 1.3936, + "step": 15567 + }, + { + "epoch": 0.4571025896999237, + "grad_norm": 0.0, + "learning_rate": 1.1864321651736957e-05, + "loss": 1.1523, + "step": 15568 + }, + { + "epoch": 0.45713195137706264, + "grad_norm": 0.0, + "learning_rate": 1.1863387355339595e-05, + "loss": 1.3408, + "step": 15569 + }, + { + "epoch": 0.45716131305420166, + "grad_norm": 0.0, + "learning_rate": 1.1862453042091144e-05, + "loss": 1.3457, + "step": 15570 + }, + { + "epoch": 0.4571906747313407, + "grad_norm": 0.0, + "learning_rate": 1.1861518712000053e-05, + "loss": 1.3281, + "step": 15571 + }, + { + "epoch": 0.45722003640847964, + "grad_norm": 0.0, + "learning_rate": 1.1860584365074772e-05, + "loss": 1.3516, + "step": 15572 + }, + { + "epoch": 0.45724939808561865, + "grad_norm": 0.0, + "learning_rate": 1.185965000132375e-05, + "loss": 1.2891, + "step": 15573 + }, + { + "epoch": 0.45727875976275767, + "grad_norm": 0.0, + "learning_rate": 1.1858715620755437e-05, + "loss": 1.1768, + "step": 15574 + }, + { + "epoch": 0.45730812143989663, + "grad_norm": 0.0, + "learning_rate": 1.1857781223378282e-05, + "loss": 1.4404, + "step": 15575 + }, + { + "epoch": 0.45733748311703565, + "grad_norm": 0.0, + "learning_rate": 1.1856846809200738e-05, + "loss": 1.3486, + "step": 15576 + }, + { + "epoch": 0.45736684479417467, + "grad_norm": 0.0, + "learning_rate": 1.1855912378231254e-05, + "loss": 1.4072, + "step": 15577 + }, + { + "epoch": 0.4573962064713136, + "grad_norm": 0.0, + "learning_rate": 1.1854977930478272e-05, + "loss": 1.3994, + "step": 15578 + }, + { + "epoch": 0.45742556814845264, + "grad_norm": 0.0, + "learning_rate": 1.1854043465950256e-05, + "loss": 1.3721, + "step": 15579 + }, + { + "epoch": 0.45745492982559166, + "grad_norm": 0.0, + "learning_rate": 1.1853108984655647e-05, + "loss": 1.3428, + "step": 15580 + }, + { + "epoch": 0.4574842915027306, + "grad_norm": 0.0, + "learning_rate": 1.1852174486602902e-05, + "loss": 1.3809, + "step": 15581 + }, + { + "epoch": 0.45751365317986964, + "grad_norm": 0.0, + "learning_rate": 1.1851239971800463e-05, + "loss": 1.3936, + "step": 15582 + }, + { + "epoch": 0.45754301485700866, + "grad_norm": 0.0, + "learning_rate": 1.185030544025679e-05, + "loss": 1.1899, + "step": 15583 + }, + { + "epoch": 0.4575723765341476, + "grad_norm": 0.0, + "learning_rate": 1.1849370891980334e-05, + "loss": 1.3779, + "step": 15584 + }, + { + "epoch": 0.45760173821128663, + "grad_norm": 0.0, + "learning_rate": 1.184843632697954e-05, + "loss": 1.3027, + "step": 15585 + }, + { + "epoch": 0.45763109988842565, + "grad_norm": 0.0, + "learning_rate": 1.1847501745262864e-05, + "loss": 1.376, + "step": 15586 + }, + { + "epoch": 0.4576604615655646, + "grad_norm": 0.0, + "learning_rate": 1.1846567146838756e-05, + "loss": 1.4121, + "step": 15587 + }, + { + "epoch": 0.45768982324270363, + "grad_norm": 0.0, + "learning_rate": 1.1845632531715668e-05, + "loss": 1.415, + "step": 15588 + }, + { + "epoch": 0.45771918491984265, + "grad_norm": 0.0, + "learning_rate": 1.1844697899902054e-05, + "loss": 1.3789, + "step": 15589 + }, + { + "epoch": 0.4577485465969816, + "grad_norm": 0.0, + "learning_rate": 1.1843763251406364e-05, + "loss": 1.3311, + "step": 15590 + }, + { + "epoch": 0.4577779082741206, + "grad_norm": 0.0, + "learning_rate": 1.1842828586237047e-05, + "loss": 1.2803, + "step": 15591 + }, + { + "epoch": 0.45780726995125964, + "grad_norm": 0.0, + "learning_rate": 1.1841893904402567e-05, + "loss": 1.4434, + "step": 15592 + }, + { + "epoch": 0.4578366316283986, + "grad_norm": 0.0, + "learning_rate": 1.1840959205911362e-05, + "loss": 1.25, + "step": 15593 + }, + { + "epoch": 0.4578659933055376, + "grad_norm": 0.0, + "learning_rate": 1.1840024490771893e-05, + "loss": 1.2324, + "step": 15594 + }, + { + "epoch": 0.4578953549826766, + "grad_norm": 0.0, + "learning_rate": 1.183908975899261e-05, + "loss": 1.3418, + "step": 15595 + }, + { + "epoch": 0.4579247166598156, + "grad_norm": 0.0, + "learning_rate": 1.183815501058197e-05, + "loss": 1.3584, + "step": 15596 + }, + { + "epoch": 0.4579540783369546, + "grad_norm": 0.0, + "learning_rate": 1.1837220245548425e-05, + "loss": 1.2598, + "step": 15597 + }, + { + "epoch": 0.4579834400140936, + "grad_norm": 0.0, + "learning_rate": 1.1836285463900424e-05, + "loss": 1.3203, + "step": 15598 + }, + { + "epoch": 0.4580128016912326, + "grad_norm": 0.0, + "learning_rate": 1.1835350665646422e-05, + "loss": 1.4453, + "step": 15599 + }, + { + "epoch": 0.4580421633683716, + "grad_norm": 0.0, + "learning_rate": 1.1834415850794874e-05, + "loss": 1.2715, + "step": 15600 + }, + { + "epoch": 0.4580715250455106, + "grad_norm": 0.0, + "learning_rate": 1.1833481019354236e-05, + "loss": 1.3574, + "step": 15601 + }, + { + "epoch": 0.4581008867226496, + "grad_norm": 0.0, + "learning_rate": 1.1832546171332959e-05, + "loss": 1.3057, + "step": 15602 + }, + { + "epoch": 0.4581302483997886, + "grad_norm": 0.0, + "learning_rate": 1.1831611306739496e-05, + "loss": 1.3652, + "step": 15603 + }, + { + "epoch": 0.45815961007692757, + "grad_norm": 0.0, + "learning_rate": 1.1830676425582303e-05, + "loss": 1.3286, + "step": 15604 + }, + { + "epoch": 0.4581889717540666, + "grad_norm": 0.0, + "learning_rate": 1.1829741527869838e-05, + "loss": 1.2534, + "step": 15605 + }, + { + "epoch": 0.4582183334312056, + "grad_norm": 0.0, + "learning_rate": 1.1828806613610551e-05, + "loss": 1.2666, + "step": 15606 + }, + { + "epoch": 0.45824769510834457, + "grad_norm": 0.0, + "learning_rate": 1.1827871682812894e-05, + "loss": 1.3516, + "step": 15607 + }, + { + "epoch": 0.4582770567854836, + "grad_norm": 0.0, + "learning_rate": 1.1826936735485325e-05, + "loss": 1.2041, + "step": 15608 + }, + { + "epoch": 0.4583064184626226, + "grad_norm": 0.0, + "learning_rate": 1.1826001771636304e-05, + "loss": 1.5361, + "step": 15609 + }, + { + "epoch": 0.45833578013976156, + "grad_norm": 0.0, + "learning_rate": 1.1825066791274278e-05, + "loss": 1.2451, + "step": 15610 + }, + { + "epoch": 0.4583651418169006, + "grad_norm": 0.0, + "learning_rate": 1.1824131794407706e-05, + "loss": 1.2417, + "step": 15611 + }, + { + "epoch": 0.4583945034940396, + "grad_norm": 0.0, + "learning_rate": 1.1823196781045042e-05, + "loss": 1.2744, + "step": 15612 + }, + { + "epoch": 0.45842386517117856, + "grad_norm": 0.0, + "learning_rate": 1.1822261751194744e-05, + "loss": 1.3262, + "step": 15613 + }, + { + "epoch": 0.4584532268483176, + "grad_norm": 0.0, + "learning_rate": 1.1821326704865266e-05, + "loss": 1.5176, + "step": 15614 + }, + { + "epoch": 0.4584825885254566, + "grad_norm": 0.0, + "learning_rate": 1.1820391642065063e-05, + "loss": 1.3311, + "step": 15615 + }, + { + "epoch": 0.45851195020259555, + "grad_norm": 0.0, + "learning_rate": 1.1819456562802594e-05, + "loss": 1.3037, + "step": 15616 + }, + { + "epoch": 0.45854131187973457, + "grad_norm": 0.0, + "learning_rate": 1.1818521467086315e-05, + "loss": 1.373, + "step": 15617 + }, + { + "epoch": 0.4585706735568736, + "grad_norm": 0.0, + "learning_rate": 1.1817586354924677e-05, + "loss": 1.2285, + "step": 15618 + }, + { + "epoch": 0.45860003523401255, + "grad_norm": 0.0, + "learning_rate": 1.181665122632614e-05, + "loss": 1.3135, + "step": 15619 + }, + { + "epoch": 0.45862939691115157, + "grad_norm": 0.0, + "learning_rate": 1.1815716081299162e-05, + "loss": 1.3516, + "step": 15620 + }, + { + "epoch": 0.4586587585882906, + "grad_norm": 0.0, + "learning_rate": 1.18147809198522e-05, + "loss": 1.2871, + "step": 15621 + }, + { + "epoch": 0.45868812026542954, + "grad_norm": 0.0, + "learning_rate": 1.1813845741993706e-05, + "loss": 1.2871, + "step": 15622 + }, + { + "epoch": 0.45871748194256856, + "grad_norm": 0.0, + "learning_rate": 1.1812910547732142e-05, + "loss": 1.2939, + "step": 15623 + }, + { + "epoch": 0.4587468436197076, + "grad_norm": 0.0, + "learning_rate": 1.1811975337075963e-05, + "loss": 1.2607, + "step": 15624 + }, + { + "epoch": 0.45877620529684654, + "grad_norm": 0.0, + "learning_rate": 1.1811040110033629e-05, + "loss": 1.3584, + "step": 15625 + }, + { + "epoch": 0.45880556697398556, + "grad_norm": 0.0, + "learning_rate": 1.1810104866613596e-05, + "loss": 1.2856, + "step": 15626 + }, + { + "epoch": 0.4588349286511246, + "grad_norm": 0.0, + "learning_rate": 1.1809169606824318e-05, + "loss": 1.3643, + "step": 15627 + }, + { + "epoch": 0.45886429032826354, + "grad_norm": 0.0, + "learning_rate": 1.1808234330674257e-05, + "loss": 1.3452, + "step": 15628 + }, + { + "epoch": 0.45889365200540255, + "grad_norm": 0.0, + "learning_rate": 1.1807299038171874e-05, + "loss": 1.3901, + "step": 15629 + }, + { + "epoch": 0.45892301368254157, + "grad_norm": 0.0, + "learning_rate": 1.1806363729325617e-05, + "loss": 1.2246, + "step": 15630 + }, + { + "epoch": 0.45895237535968053, + "grad_norm": 0.0, + "learning_rate": 1.180542840414395e-05, + "loss": 1.25, + "step": 15631 + }, + { + "epoch": 0.45898173703681955, + "grad_norm": 0.0, + "learning_rate": 1.1804493062635333e-05, + "loss": 1.249, + "step": 15632 + }, + { + "epoch": 0.45901109871395857, + "grad_norm": 0.0, + "learning_rate": 1.1803557704808227e-05, + "loss": 1.4395, + "step": 15633 + }, + { + "epoch": 0.4590404603910975, + "grad_norm": 0.0, + "learning_rate": 1.1802622330671083e-05, + "loss": 1.2939, + "step": 15634 + }, + { + "epoch": 0.45906982206823654, + "grad_norm": 0.0, + "learning_rate": 1.1801686940232365e-05, + "loss": 1.3652, + "step": 15635 + }, + { + "epoch": 0.45909918374537556, + "grad_norm": 0.0, + "learning_rate": 1.180075153350053e-05, + "loss": 1.3574, + "step": 15636 + }, + { + "epoch": 0.4591285454225145, + "grad_norm": 0.0, + "learning_rate": 1.1799816110484037e-05, + "loss": 1.3721, + "step": 15637 + }, + { + "epoch": 0.45915790709965354, + "grad_norm": 0.0, + "learning_rate": 1.1798880671191349e-05, + "loss": 1.2949, + "step": 15638 + }, + { + "epoch": 0.45918726877679256, + "grad_norm": 0.0, + "learning_rate": 1.1797945215630916e-05, + "loss": 1.3975, + "step": 15639 + }, + { + "epoch": 0.4592166304539315, + "grad_norm": 0.0, + "learning_rate": 1.179700974381121e-05, + "loss": 1.4531, + "step": 15640 + }, + { + "epoch": 0.45924599213107054, + "grad_norm": 0.0, + "learning_rate": 1.1796074255740681e-05, + "loss": 1.3955, + "step": 15641 + }, + { + "epoch": 0.45927535380820955, + "grad_norm": 0.0, + "learning_rate": 1.1795138751427797e-05, + "loss": 1.3408, + "step": 15642 + }, + { + "epoch": 0.4593047154853485, + "grad_norm": 0.0, + "learning_rate": 1.179420323088101e-05, + "loss": 1.3066, + "step": 15643 + }, + { + "epoch": 0.45933407716248753, + "grad_norm": 0.0, + "learning_rate": 1.1793267694108783e-05, + "loss": 1.3379, + "step": 15644 + }, + { + "epoch": 0.4593634388396265, + "grad_norm": 0.0, + "learning_rate": 1.179233214111958e-05, + "loss": 1.4502, + "step": 15645 + }, + { + "epoch": 0.4593928005167655, + "grad_norm": 0.0, + "learning_rate": 1.1791396571921858e-05, + "loss": 1.2637, + "step": 15646 + }, + { + "epoch": 0.4594221621939045, + "grad_norm": 0.0, + "learning_rate": 1.1790460986524075e-05, + "loss": 1.2793, + "step": 15647 + }, + { + "epoch": 0.4594515238710435, + "grad_norm": 0.0, + "learning_rate": 1.1789525384934697e-05, + "loss": 1.2319, + "step": 15648 + }, + { + "epoch": 0.4594808855481825, + "grad_norm": 0.0, + "learning_rate": 1.1788589767162181e-05, + "loss": 1.3516, + "step": 15649 + }, + { + "epoch": 0.4595102472253215, + "grad_norm": 0.0, + "learning_rate": 1.1787654133214992e-05, + "loss": 1.2178, + "step": 15650 + }, + { + "epoch": 0.4595396089024605, + "grad_norm": 0.0, + "learning_rate": 1.1786718483101587e-05, + "loss": 1.3604, + "step": 15651 + }, + { + "epoch": 0.4595689705795995, + "grad_norm": 0.0, + "learning_rate": 1.178578281683043e-05, + "loss": 1.3906, + "step": 15652 + }, + { + "epoch": 0.4595983322567385, + "grad_norm": 0.0, + "learning_rate": 1.178484713440998e-05, + "loss": 1.3828, + "step": 15653 + }, + { + "epoch": 0.4596276939338775, + "grad_norm": 0.0, + "learning_rate": 1.1783911435848703e-05, + "loss": 1.3809, + "step": 15654 + }, + { + "epoch": 0.4596570556110165, + "grad_norm": 0.0, + "learning_rate": 1.178297572115506e-05, + "loss": 1.2368, + "step": 15655 + }, + { + "epoch": 0.4596864172881555, + "grad_norm": 0.0, + "learning_rate": 1.1782039990337507e-05, + "loss": 1.2363, + "step": 15656 + }, + { + "epoch": 0.4597157789652945, + "grad_norm": 0.0, + "learning_rate": 1.1781104243404508e-05, + "loss": 1.3389, + "step": 15657 + }, + { + "epoch": 0.4597451406424335, + "grad_norm": 0.0, + "learning_rate": 1.1780168480364531e-05, + "loss": 1.4336, + "step": 15658 + }, + { + "epoch": 0.4597745023195725, + "grad_norm": 0.0, + "learning_rate": 1.1779232701226034e-05, + "loss": 1.3262, + "step": 15659 + }, + { + "epoch": 0.45980386399671147, + "grad_norm": 0.0, + "learning_rate": 1.177829690599748e-05, + "loss": 1.458, + "step": 15660 + }, + { + "epoch": 0.4598332256738505, + "grad_norm": 0.0, + "learning_rate": 1.177736109468733e-05, + "loss": 1.2285, + "step": 15661 + }, + { + "epoch": 0.4598625873509895, + "grad_norm": 0.0, + "learning_rate": 1.177642526730405e-05, + "loss": 1.2705, + "step": 15662 + }, + { + "epoch": 0.45989194902812847, + "grad_norm": 0.0, + "learning_rate": 1.17754894238561e-05, + "loss": 1.3506, + "step": 15663 + }, + { + "epoch": 0.4599213107052675, + "grad_norm": 0.0, + "learning_rate": 1.1774553564351944e-05, + "loss": 1.3818, + "step": 15664 + }, + { + "epoch": 0.4599506723824065, + "grad_norm": 0.0, + "learning_rate": 1.1773617688800049e-05, + "loss": 1.2241, + "step": 15665 + }, + { + "epoch": 0.45998003405954546, + "grad_norm": 0.0, + "learning_rate": 1.1772681797208871e-05, + "loss": 1.2905, + "step": 15666 + }, + { + "epoch": 0.4600093957366845, + "grad_norm": 0.0, + "learning_rate": 1.1771745889586882e-05, + "loss": 1.3945, + "step": 15667 + }, + { + "epoch": 0.4600387574138235, + "grad_norm": 0.0, + "learning_rate": 1.1770809965942538e-05, + "loss": 1.2681, + "step": 15668 + }, + { + "epoch": 0.46006811909096246, + "grad_norm": 0.0, + "learning_rate": 1.1769874026284308e-05, + "loss": 1.2637, + "step": 15669 + }, + { + "epoch": 0.4600974807681015, + "grad_norm": 0.0, + "learning_rate": 1.1768938070620653e-05, + "loss": 1.3477, + "step": 15670 + }, + { + "epoch": 0.4601268424452405, + "grad_norm": 0.0, + "learning_rate": 1.1768002098960037e-05, + "loss": 1.416, + "step": 15671 + }, + { + "epoch": 0.46015620412237945, + "grad_norm": 0.0, + "learning_rate": 1.1767066111310924e-05, + "loss": 1.3018, + "step": 15672 + }, + { + "epoch": 0.46018556579951847, + "grad_norm": 0.0, + "learning_rate": 1.1766130107681782e-05, + "loss": 1.3271, + "step": 15673 + }, + { + "epoch": 0.4602149274766575, + "grad_norm": 0.0, + "learning_rate": 1.1765194088081073e-05, + "loss": 1.1846, + "step": 15674 + }, + { + "epoch": 0.46024428915379645, + "grad_norm": 0.0, + "learning_rate": 1.1764258052517261e-05, + "loss": 1.3281, + "step": 15675 + }, + { + "epoch": 0.46027365083093547, + "grad_norm": 0.0, + "learning_rate": 1.1763322000998811e-05, + "loss": 1.2949, + "step": 15676 + }, + { + "epoch": 0.4603030125080745, + "grad_norm": 0.0, + "learning_rate": 1.176238593353419e-05, + "loss": 1.4658, + "step": 15677 + }, + { + "epoch": 0.46033237418521344, + "grad_norm": 0.0, + "learning_rate": 1.1761449850131861e-05, + "loss": 1.3809, + "step": 15678 + }, + { + "epoch": 0.46036173586235246, + "grad_norm": 0.0, + "learning_rate": 1.1760513750800292e-05, + "loss": 1.1157, + "step": 15679 + }, + { + "epoch": 0.4603910975394915, + "grad_norm": 0.0, + "learning_rate": 1.175957763554794e-05, + "loss": 1.2979, + "step": 15680 + }, + { + "epoch": 0.46042045921663044, + "grad_norm": 0.0, + "learning_rate": 1.1758641504383282e-05, + "loss": 1.2881, + "step": 15681 + }, + { + "epoch": 0.46044982089376946, + "grad_norm": 0.0, + "learning_rate": 1.1757705357314779e-05, + "loss": 1.3115, + "step": 15682 + }, + { + "epoch": 0.4604791825709085, + "grad_norm": 0.0, + "learning_rate": 1.1756769194350894e-05, + "loss": 1.2065, + "step": 15683 + }, + { + "epoch": 0.46050854424804744, + "grad_norm": 0.0, + "learning_rate": 1.1755833015500095e-05, + "loss": 1.4209, + "step": 15684 + }, + { + "epoch": 0.46053790592518645, + "grad_norm": 0.0, + "learning_rate": 1.175489682077085e-05, + "loss": 1.4043, + "step": 15685 + }, + { + "epoch": 0.46056726760232547, + "grad_norm": 0.0, + "learning_rate": 1.1753960610171624e-05, + "loss": 1.3008, + "step": 15686 + }, + { + "epoch": 0.46059662927946443, + "grad_norm": 0.0, + "learning_rate": 1.1753024383710883e-05, + "loss": 1.2622, + "step": 15687 + }, + { + "epoch": 0.46062599095660345, + "grad_norm": 0.0, + "learning_rate": 1.1752088141397091e-05, + "loss": 1.4277, + "step": 15688 + }, + { + "epoch": 0.46065535263374247, + "grad_norm": 0.0, + "learning_rate": 1.1751151883238718e-05, + "loss": 1.3301, + "step": 15689 + }, + { + "epoch": 0.4606847143108814, + "grad_norm": 0.0, + "learning_rate": 1.1750215609244232e-05, + "loss": 1.374, + "step": 15690 + }, + { + "epoch": 0.46071407598802044, + "grad_norm": 0.0, + "learning_rate": 1.1749279319422098e-05, + "loss": 1.4067, + "step": 15691 + }, + { + "epoch": 0.46074343766515946, + "grad_norm": 0.0, + "learning_rate": 1.174834301378078e-05, + "loss": 1.3345, + "step": 15692 + }, + { + "epoch": 0.4607727993422984, + "grad_norm": 0.0, + "learning_rate": 1.1747406692328751e-05, + "loss": 1.2598, + "step": 15693 + }, + { + "epoch": 0.46080216101943744, + "grad_norm": 0.0, + "learning_rate": 1.1746470355074473e-05, + "loss": 1.3193, + "step": 15694 + }, + { + "epoch": 0.4608315226965764, + "grad_norm": 0.0, + "learning_rate": 1.1745534002026422e-05, + "loss": 1.3271, + "step": 15695 + }, + { + "epoch": 0.4608608843737154, + "grad_norm": 0.0, + "learning_rate": 1.1744597633193057e-05, + "loss": 1.3853, + "step": 15696 + }, + { + "epoch": 0.46089024605085444, + "grad_norm": 0.0, + "learning_rate": 1.1743661248582848e-05, + "loss": 1.2139, + "step": 15697 + }, + { + "epoch": 0.4609196077279934, + "grad_norm": 0.0, + "learning_rate": 1.1742724848204263e-05, + "loss": 1.2983, + "step": 15698 + }, + { + "epoch": 0.4609489694051324, + "grad_norm": 0.0, + "learning_rate": 1.1741788432065772e-05, + "loss": 1.1401, + "step": 15699 + }, + { + "epoch": 0.46097833108227143, + "grad_norm": 0.0, + "learning_rate": 1.1740852000175842e-05, + "loss": 1.2236, + "step": 15700 + }, + { + "epoch": 0.4610076927594104, + "grad_norm": 0.0, + "learning_rate": 1.173991555254294e-05, + "loss": 1.2812, + "step": 15701 + }, + { + "epoch": 0.4610370544365494, + "grad_norm": 0.0, + "learning_rate": 1.1738979089175537e-05, + "loss": 1.3916, + "step": 15702 + }, + { + "epoch": 0.4610664161136884, + "grad_norm": 0.0, + "learning_rate": 1.1738042610082104e-05, + "loss": 1.3799, + "step": 15703 + }, + { + "epoch": 0.4610957777908274, + "grad_norm": 0.0, + "learning_rate": 1.1737106115271102e-05, + "loss": 1.3936, + "step": 15704 + }, + { + "epoch": 0.4611251394679664, + "grad_norm": 0.0, + "learning_rate": 1.1736169604751009e-05, + "loss": 1.2539, + "step": 15705 + }, + { + "epoch": 0.4611545011451054, + "grad_norm": 0.0, + "learning_rate": 1.1735233078530286e-05, + "loss": 1.3906, + "step": 15706 + }, + { + "epoch": 0.4611838628222444, + "grad_norm": 0.0, + "learning_rate": 1.173429653661741e-05, + "loss": 1.3032, + "step": 15707 + }, + { + "epoch": 0.4612132244993834, + "grad_norm": 0.0, + "learning_rate": 1.173335997902084e-05, + "loss": 1.2217, + "step": 15708 + }, + { + "epoch": 0.4612425861765224, + "grad_norm": 0.0, + "learning_rate": 1.1732423405749054e-05, + "loss": 1.4004, + "step": 15709 + }, + { + "epoch": 0.4612719478536614, + "grad_norm": 0.0, + "learning_rate": 1.173148681681052e-05, + "loss": 1.2666, + "step": 15710 + }, + { + "epoch": 0.4613013095308004, + "grad_norm": 0.0, + "learning_rate": 1.173055021221371e-05, + "loss": 1.4141, + "step": 15711 + }, + { + "epoch": 0.4613306712079394, + "grad_norm": 0.0, + "learning_rate": 1.1729613591967089e-05, + "loss": 1.3262, + "step": 15712 + }, + { + "epoch": 0.4613600328850784, + "grad_norm": 0.0, + "learning_rate": 1.1728676956079128e-05, + "loss": 1.2705, + "step": 15713 + }, + { + "epoch": 0.4613893945622174, + "grad_norm": 0.0, + "learning_rate": 1.17277403045583e-05, + "loss": 1.208, + "step": 15714 + }, + { + "epoch": 0.4614187562393564, + "grad_norm": 0.0, + "learning_rate": 1.1726803637413073e-05, + "loss": 1.3174, + "step": 15715 + }, + { + "epoch": 0.46144811791649537, + "grad_norm": 0.0, + "learning_rate": 1.1725866954651921e-05, + "loss": 1.3486, + "step": 15716 + }, + { + "epoch": 0.4614774795936344, + "grad_norm": 0.0, + "learning_rate": 1.1724930256283309e-05, + "loss": 1.3223, + "step": 15717 + }, + { + "epoch": 0.4615068412707734, + "grad_norm": 0.0, + "learning_rate": 1.1723993542315713e-05, + "loss": 1.2295, + "step": 15718 + }, + { + "epoch": 0.46153620294791237, + "grad_norm": 0.0, + "learning_rate": 1.17230568127576e-05, + "loss": 1.2783, + "step": 15719 + }, + { + "epoch": 0.4615655646250514, + "grad_norm": 0.0, + "learning_rate": 1.1722120067617445e-05, + "loss": 1.3877, + "step": 15720 + }, + { + "epoch": 0.4615949263021904, + "grad_norm": 0.0, + "learning_rate": 1.1721183306903715e-05, + "loss": 1.3926, + "step": 15721 + }, + { + "epoch": 0.46162428797932936, + "grad_norm": 0.0, + "learning_rate": 1.1720246530624883e-05, + "loss": 1.2646, + "step": 15722 + }, + { + "epoch": 0.4616536496564684, + "grad_norm": 0.0, + "learning_rate": 1.1719309738789426e-05, + "loss": 1.4053, + "step": 15723 + }, + { + "epoch": 0.4616830113336074, + "grad_norm": 0.0, + "learning_rate": 1.1718372931405809e-05, + "loss": 1.2686, + "step": 15724 + }, + { + "epoch": 0.46171237301074636, + "grad_norm": 0.0, + "learning_rate": 1.1717436108482502e-05, + "loss": 1.4336, + "step": 15725 + }, + { + "epoch": 0.4617417346878854, + "grad_norm": 0.0, + "learning_rate": 1.1716499270027984e-05, + "loss": 1.3584, + "step": 15726 + }, + { + "epoch": 0.4617710963650244, + "grad_norm": 0.0, + "learning_rate": 1.1715562416050721e-05, + "loss": 1.3691, + "step": 15727 + }, + { + "epoch": 0.46180045804216335, + "grad_norm": 0.0, + "learning_rate": 1.1714625546559191e-05, + "loss": 1.229, + "step": 15728 + }, + { + "epoch": 0.46182981971930237, + "grad_norm": 0.0, + "learning_rate": 1.171368866156186e-05, + "loss": 1.335, + "step": 15729 + }, + { + "epoch": 0.4618591813964414, + "grad_norm": 0.0, + "learning_rate": 1.1712751761067202e-05, + "loss": 1.2783, + "step": 15730 + }, + { + "epoch": 0.46188854307358035, + "grad_norm": 0.0, + "learning_rate": 1.1711814845083693e-05, + "loss": 1.2119, + "step": 15731 + }, + { + "epoch": 0.46191790475071937, + "grad_norm": 0.0, + "learning_rate": 1.1710877913619807e-05, + "loss": 1.29, + "step": 15732 + }, + { + "epoch": 0.4619472664278584, + "grad_norm": 0.0, + "learning_rate": 1.170994096668401e-05, + "loss": 1.1436, + "step": 15733 + }, + { + "epoch": 0.46197662810499734, + "grad_norm": 0.0, + "learning_rate": 1.170900400428478e-05, + "loss": 1.3447, + "step": 15734 + }, + { + "epoch": 0.46200598978213636, + "grad_norm": 0.0, + "learning_rate": 1.170806702643059e-05, + "loss": 1.2822, + "step": 15735 + }, + { + "epoch": 0.4620353514592754, + "grad_norm": 0.0, + "learning_rate": 1.1707130033129914e-05, + "loss": 1.3135, + "step": 15736 + }, + { + "epoch": 0.46206471313641434, + "grad_norm": 0.0, + "learning_rate": 1.170619302439122e-05, + "loss": 1.3296, + "step": 15737 + }, + { + "epoch": 0.46209407481355336, + "grad_norm": 0.0, + "learning_rate": 1.1705256000222986e-05, + "loss": 1.1587, + "step": 15738 + }, + { + "epoch": 0.4621234364906924, + "grad_norm": 0.0, + "learning_rate": 1.1704318960633687e-05, + "loss": 1.2754, + "step": 15739 + }, + { + "epoch": 0.46215279816783134, + "grad_norm": 0.0, + "learning_rate": 1.1703381905631796e-05, + "loss": 1.2998, + "step": 15740 + }, + { + "epoch": 0.46218215984497035, + "grad_norm": 0.0, + "learning_rate": 1.1702444835225784e-05, + "loss": 1.376, + "step": 15741 + }, + { + "epoch": 0.46221152152210937, + "grad_norm": 0.0, + "learning_rate": 1.1701507749424129e-05, + "loss": 1.4229, + "step": 15742 + }, + { + "epoch": 0.46224088319924833, + "grad_norm": 0.0, + "learning_rate": 1.1700570648235301e-05, + "loss": 1.29, + "step": 15743 + }, + { + "epoch": 0.46227024487638735, + "grad_norm": 0.0, + "learning_rate": 1.1699633531667783e-05, + "loss": 1.2598, + "step": 15744 + }, + { + "epoch": 0.4622996065535263, + "grad_norm": 0.0, + "learning_rate": 1.1698696399730038e-05, + "loss": 1.3545, + "step": 15745 + }, + { + "epoch": 0.4623289682306653, + "grad_norm": 0.0, + "learning_rate": 1.1697759252430548e-05, + "loss": 1.3408, + "step": 15746 + }, + { + "epoch": 0.46235832990780434, + "grad_norm": 0.0, + "learning_rate": 1.1696822089777784e-05, + "loss": 1.4834, + "step": 15747 + }, + { + "epoch": 0.4623876915849433, + "grad_norm": 0.0, + "learning_rate": 1.169588491178023e-05, + "loss": 1.4414, + "step": 15748 + }, + { + "epoch": 0.4624170532620823, + "grad_norm": 0.0, + "learning_rate": 1.1694947718446348e-05, + "loss": 1.2998, + "step": 15749 + }, + { + "epoch": 0.46244641493922134, + "grad_norm": 0.0, + "learning_rate": 1.1694010509784622e-05, + "loss": 1.3701, + "step": 15750 + }, + { + "epoch": 0.4624757766163603, + "grad_norm": 0.0, + "learning_rate": 1.1693073285803522e-05, + "loss": 1.3506, + "step": 15751 + }, + { + "epoch": 0.4625051382934993, + "grad_norm": 0.0, + "learning_rate": 1.169213604651153e-05, + "loss": 1.252, + "step": 15752 + }, + { + "epoch": 0.46253449997063834, + "grad_norm": 0.0, + "learning_rate": 1.1691198791917119e-05, + "loss": 1.4434, + "step": 15753 + }, + { + "epoch": 0.4625638616477773, + "grad_norm": 0.0, + "learning_rate": 1.1690261522028762e-05, + "loss": 1.3975, + "step": 15754 + }, + { + "epoch": 0.4625932233249163, + "grad_norm": 0.0, + "learning_rate": 1.168932423685494e-05, + "loss": 1.3271, + "step": 15755 + }, + { + "epoch": 0.46262258500205533, + "grad_norm": 0.0, + "learning_rate": 1.1688386936404123e-05, + "loss": 1.1938, + "step": 15756 + }, + { + "epoch": 0.4626519466791943, + "grad_norm": 0.0, + "learning_rate": 1.1687449620684795e-05, + "loss": 1.1006, + "step": 15757 + }, + { + "epoch": 0.4626813083563333, + "grad_norm": 0.0, + "learning_rate": 1.1686512289705422e-05, + "loss": 1.3252, + "step": 15758 + }, + { + "epoch": 0.4627106700334723, + "grad_norm": 0.0, + "learning_rate": 1.168557494347449e-05, + "loss": 1.2539, + "step": 15759 + }, + { + "epoch": 0.4627400317106113, + "grad_norm": 0.0, + "learning_rate": 1.1684637582000473e-05, + "loss": 1.3511, + "step": 15760 + }, + { + "epoch": 0.4627693933877503, + "grad_norm": 0.0, + "learning_rate": 1.1683700205291846e-05, + "loss": 1.1875, + "step": 15761 + }, + { + "epoch": 0.4627987550648893, + "grad_norm": 0.0, + "learning_rate": 1.1682762813357084e-05, + "loss": 1.415, + "step": 15762 + }, + { + "epoch": 0.4628281167420283, + "grad_norm": 0.0, + "learning_rate": 1.168182540620467e-05, + "loss": 1.5098, + "step": 15763 + }, + { + "epoch": 0.4628574784191673, + "grad_norm": 0.0, + "learning_rate": 1.1680887983843076e-05, + "loss": 1.5215, + "step": 15764 + }, + { + "epoch": 0.4628868400963063, + "grad_norm": 0.0, + "learning_rate": 1.1679950546280785e-05, + "loss": 1.2891, + "step": 15765 + }, + { + "epoch": 0.4629162017734453, + "grad_norm": 0.0, + "learning_rate": 1.1679013093526268e-05, + "loss": 1.5039, + "step": 15766 + }, + { + "epoch": 0.4629455634505843, + "grad_norm": 0.0, + "learning_rate": 1.1678075625588009e-05, + "loss": 1.3408, + "step": 15767 + }, + { + "epoch": 0.4629749251277233, + "grad_norm": 0.0, + "learning_rate": 1.1677138142474478e-05, + "loss": 1.3643, + "step": 15768 + }, + { + "epoch": 0.4630042868048623, + "grad_norm": 0.0, + "learning_rate": 1.167620064419416e-05, + "loss": 1.3438, + "step": 15769 + }, + { + "epoch": 0.4630336484820013, + "grad_norm": 0.0, + "learning_rate": 1.1675263130755528e-05, + "loss": 1.5322, + "step": 15770 + }, + { + "epoch": 0.4630630101591403, + "grad_norm": 0.0, + "learning_rate": 1.1674325602167063e-05, + "loss": 1.3613, + "step": 15771 + }, + { + "epoch": 0.46309237183627927, + "grad_norm": 0.0, + "learning_rate": 1.1673388058437245e-05, + "loss": 1.3223, + "step": 15772 + }, + { + "epoch": 0.4631217335134183, + "grad_norm": 0.0, + "learning_rate": 1.1672450499574547e-05, + "loss": 1.1548, + "step": 15773 + }, + { + "epoch": 0.4631510951905573, + "grad_norm": 0.0, + "learning_rate": 1.1671512925587453e-05, + "loss": 1.3789, + "step": 15774 + }, + { + "epoch": 0.46318045686769627, + "grad_norm": 0.0, + "learning_rate": 1.167057533648444e-05, + "loss": 1.3857, + "step": 15775 + }, + { + "epoch": 0.4632098185448353, + "grad_norm": 0.0, + "learning_rate": 1.1669637732273986e-05, + "loss": 1.3975, + "step": 15776 + }, + { + "epoch": 0.4632391802219743, + "grad_norm": 0.0, + "learning_rate": 1.166870011296457e-05, + "loss": 1.3423, + "step": 15777 + }, + { + "epoch": 0.46326854189911326, + "grad_norm": 0.0, + "learning_rate": 1.166776247856467e-05, + "loss": 1.3057, + "step": 15778 + }, + { + "epoch": 0.4632979035762523, + "grad_norm": 0.0, + "learning_rate": 1.1666824829082768e-05, + "loss": 1.2744, + "step": 15779 + }, + { + "epoch": 0.4633272652533913, + "grad_norm": 0.0, + "learning_rate": 1.1665887164527343e-05, + "loss": 1.3467, + "step": 15780 + }, + { + "epoch": 0.46335662693053026, + "grad_norm": 0.0, + "learning_rate": 1.1664949484906874e-05, + "loss": 1.248, + "step": 15781 + }, + { + "epoch": 0.4633859886076693, + "grad_norm": 0.0, + "learning_rate": 1.166401179022984e-05, + "loss": 1.3662, + "step": 15782 + }, + { + "epoch": 0.4634153502848083, + "grad_norm": 0.0, + "learning_rate": 1.1663074080504721e-05, + "loss": 1.2734, + "step": 15783 + }, + { + "epoch": 0.46344471196194725, + "grad_norm": 0.0, + "learning_rate": 1.1662136355739997e-05, + "loss": 1.3535, + "step": 15784 + }, + { + "epoch": 0.46347407363908627, + "grad_norm": 0.0, + "learning_rate": 1.1661198615944149e-05, + "loss": 1.4111, + "step": 15785 + }, + { + "epoch": 0.4635034353162253, + "grad_norm": 0.0, + "learning_rate": 1.1660260861125654e-05, + "loss": 1.4453, + "step": 15786 + }, + { + "epoch": 0.46353279699336425, + "grad_norm": 0.0, + "learning_rate": 1.1659323091292996e-05, + "loss": 1.2891, + "step": 15787 + }, + { + "epoch": 0.46356215867050327, + "grad_norm": 0.0, + "learning_rate": 1.1658385306454654e-05, + "loss": 1.3574, + "step": 15788 + }, + { + "epoch": 0.4635915203476423, + "grad_norm": 0.0, + "learning_rate": 1.1657447506619111e-05, + "loss": 1.4229, + "step": 15789 + }, + { + "epoch": 0.46362088202478124, + "grad_norm": 0.0, + "learning_rate": 1.1656509691794845e-05, + "loss": 1.2686, + "step": 15790 + }, + { + "epoch": 0.46365024370192026, + "grad_norm": 0.0, + "learning_rate": 1.1655571861990336e-05, + "loss": 1.2246, + "step": 15791 + }, + { + "epoch": 0.4636796053790593, + "grad_norm": 0.0, + "learning_rate": 1.1654634017214066e-05, + "loss": 1.25, + "step": 15792 + }, + { + "epoch": 0.46370896705619824, + "grad_norm": 0.0, + "learning_rate": 1.1653696157474522e-05, + "loss": 1.2969, + "step": 15793 + }, + { + "epoch": 0.46373832873333726, + "grad_norm": 0.0, + "learning_rate": 1.1652758282780177e-05, + "loss": 1.3516, + "step": 15794 + }, + { + "epoch": 0.4637676904104762, + "grad_norm": 0.0, + "learning_rate": 1.1651820393139515e-05, + "loss": 1.3496, + "step": 15795 + }, + { + "epoch": 0.46379705208761524, + "grad_norm": 0.0, + "learning_rate": 1.1650882488561018e-05, + "loss": 1.3037, + "step": 15796 + }, + { + "epoch": 0.46382641376475425, + "grad_norm": 0.0, + "learning_rate": 1.1649944569053169e-05, + "loss": 1.2388, + "step": 15797 + }, + { + "epoch": 0.4638557754418932, + "grad_norm": 0.0, + "learning_rate": 1.1649006634624446e-05, + "loss": 1.3574, + "step": 15798 + }, + { + "epoch": 0.46388513711903223, + "grad_norm": 0.0, + "learning_rate": 1.1648068685283336e-05, + "loss": 1.3311, + "step": 15799 + }, + { + "epoch": 0.46391449879617125, + "grad_norm": 0.0, + "learning_rate": 1.1647130721038317e-05, + "loss": 1.3711, + "step": 15800 + }, + { + "epoch": 0.4639438604733102, + "grad_norm": 0.0, + "learning_rate": 1.1646192741897876e-05, + "loss": 1.4531, + "step": 15801 + }, + { + "epoch": 0.4639732221504492, + "grad_norm": 0.0, + "learning_rate": 1.1645254747870491e-05, + "loss": 1.3545, + "step": 15802 + }, + { + "epoch": 0.46400258382758824, + "grad_norm": 0.0, + "learning_rate": 1.1644316738964647e-05, + "loss": 1.4023, + "step": 15803 + }, + { + "epoch": 0.4640319455047272, + "grad_norm": 0.0, + "learning_rate": 1.1643378715188824e-05, + "loss": 1.4121, + "step": 15804 + }, + { + "epoch": 0.4640613071818662, + "grad_norm": 0.0, + "learning_rate": 1.1642440676551507e-05, + "loss": 1.3252, + "step": 15805 + }, + { + "epoch": 0.46409066885900524, + "grad_norm": 0.0, + "learning_rate": 1.164150262306118e-05, + "loss": 1.291, + "step": 15806 + }, + { + "epoch": 0.4641200305361442, + "grad_norm": 0.0, + "learning_rate": 1.164056455472632e-05, + "loss": 1.2764, + "step": 15807 + }, + { + "epoch": 0.4641493922132832, + "grad_norm": 0.0, + "learning_rate": 1.163962647155542e-05, + "loss": 1.4014, + "step": 15808 + }, + { + "epoch": 0.46417875389042224, + "grad_norm": 0.0, + "learning_rate": 1.1638688373556954e-05, + "loss": 1.3232, + "step": 15809 + }, + { + "epoch": 0.4642081155675612, + "grad_norm": 0.0, + "learning_rate": 1.1637750260739412e-05, + "loss": 1.3428, + "step": 15810 + }, + { + "epoch": 0.4642374772447002, + "grad_norm": 0.0, + "learning_rate": 1.163681213311127e-05, + "loss": 1.1729, + "step": 15811 + }, + { + "epoch": 0.46426683892183923, + "grad_norm": 0.0, + "learning_rate": 1.1635873990681018e-05, + "loss": 1.332, + "step": 15812 + }, + { + "epoch": 0.4642962005989782, + "grad_norm": 0.0, + "learning_rate": 1.1634935833457144e-05, + "loss": 1.3613, + "step": 15813 + }, + { + "epoch": 0.4643255622761172, + "grad_norm": 0.0, + "learning_rate": 1.1633997661448124e-05, + "loss": 1.3457, + "step": 15814 + }, + { + "epoch": 0.4643549239532562, + "grad_norm": 0.0, + "learning_rate": 1.1633059474662446e-05, + "loss": 1.4287, + "step": 15815 + }, + { + "epoch": 0.4643842856303952, + "grad_norm": 0.0, + "learning_rate": 1.163212127310859e-05, + "loss": 1.2412, + "step": 15816 + }, + { + "epoch": 0.4644136473075342, + "grad_norm": 0.0, + "learning_rate": 1.1631183056795041e-05, + "loss": 1.2549, + "step": 15817 + }, + { + "epoch": 0.4644430089846732, + "grad_norm": 0.0, + "learning_rate": 1.1630244825730292e-05, + "loss": 1.3135, + "step": 15818 + }, + { + "epoch": 0.4644723706618122, + "grad_norm": 0.0, + "learning_rate": 1.1629306579922817e-05, + "loss": 1.3867, + "step": 15819 + }, + { + "epoch": 0.4645017323389512, + "grad_norm": 0.0, + "learning_rate": 1.1628368319381108e-05, + "loss": 1.3555, + "step": 15820 + }, + { + "epoch": 0.4645310940160902, + "grad_norm": 0.0, + "learning_rate": 1.1627430044113644e-05, + "loss": 1.3262, + "step": 15821 + }, + { + "epoch": 0.4645604556932292, + "grad_norm": 0.0, + "learning_rate": 1.1626491754128917e-05, + "loss": 1.416, + "step": 15822 + }, + { + "epoch": 0.4645898173703682, + "grad_norm": 0.0, + "learning_rate": 1.1625553449435406e-05, + "loss": 1.3057, + "step": 15823 + }, + { + "epoch": 0.4646191790475072, + "grad_norm": 0.0, + "learning_rate": 1.16246151300416e-05, + "loss": 1.2549, + "step": 15824 + }, + { + "epoch": 0.4646485407246462, + "grad_norm": 0.0, + "learning_rate": 1.1623676795955984e-05, + "loss": 1.2793, + "step": 15825 + }, + { + "epoch": 0.4646779024017852, + "grad_norm": 0.0, + "learning_rate": 1.1622738447187043e-05, + "loss": 1.3462, + "step": 15826 + }, + { + "epoch": 0.4647072640789242, + "grad_norm": 0.0, + "learning_rate": 1.1621800083743262e-05, + "loss": 1.3008, + "step": 15827 + }, + { + "epoch": 0.46473662575606317, + "grad_norm": 0.0, + "learning_rate": 1.1620861705633127e-05, + "loss": 1.4043, + "step": 15828 + }, + { + "epoch": 0.4647659874332022, + "grad_norm": 0.0, + "learning_rate": 1.1619923312865122e-05, + "loss": 1.1479, + "step": 15829 + }, + { + "epoch": 0.4647953491103412, + "grad_norm": 0.0, + "learning_rate": 1.161898490544774e-05, + "loss": 1.4229, + "step": 15830 + }, + { + "epoch": 0.46482471078748017, + "grad_norm": 0.0, + "learning_rate": 1.161804648338946e-05, + "loss": 1.3652, + "step": 15831 + }, + { + "epoch": 0.4648540724646192, + "grad_norm": 0.0, + "learning_rate": 1.1617108046698772e-05, + "loss": 1.3291, + "step": 15832 + }, + { + "epoch": 0.4648834341417582, + "grad_norm": 0.0, + "learning_rate": 1.1616169595384164e-05, + "loss": 1.1436, + "step": 15833 + }, + { + "epoch": 0.46491279581889716, + "grad_norm": 0.0, + "learning_rate": 1.161523112945412e-05, + "loss": 1.2471, + "step": 15834 + }, + { + "epoch": 0.4649421574960362, + "grad_norm": 0.0, + "learning_rate": 1.1614292648917124e-05, + "loss": 1.4375, + "step": 15835 + }, + { + "epoch": 0.4649715191731752, + "grad_norm": 0.0, + "learning_rate": 1.1613354153781668e-05, + "loss": 1.3779, + "step": 15836 + }, + { + "epoch": 0.46500088085031416, + "grad_norm": 0.0, + "learning_rate": 1.1612415644056237e-05, + "loss": 1.3818, + "step": 15837 + }, + { + "epoch": 0.4650302425274532, + "grad_norm": 0.0, + "learning_rate": 1.1611477119749321e-05, + "loss": 1.3535, + "step": 15838 + }, + { + "epoch": 0.4650596042045922, + "grad_norm": 0.0, + "learning_rate": 1.1610538580869402e-05, + "loss": 1.3779, + "step": 15839 + }, + { + "epoch": 0.46508896588173115, + "grad_norm": 0.0, + "learning_rate": 1.1609600027424968e-05, + "loss": 1.2852, + "step": 15840 + }, + { + "epoch": 0.46511832755887017, + "grad_norm": 0.0, + "learning_rate": 1.160866145942451e-05, + "loss": 1.3242, + "step": 15841 + }, + { + "epoch": 0.4651476892360092, + "grad_norm": 0.0, + "learning_rate": 1.1607722876876518e-05, + "loss": 1.2349, + "step": 15842 + }, + { + "epoch": 0.46517705091314815, + "grad_norm": 0.0, + "learning_rate": 1.1606784279789475e-05, + "loss": 1.3721, + "step": 15843 + }, + { + "epoch": 0.46520641259028717, + "grad_norm": 0.0, + "learning_rate": 1.1605845668171869e-05, + "loss": 1.2939, + "step": 15844 + }, + { + "epoch": 0.4652357742674262, + "grad_norm": 0.0, + "learning_rate": 1.160490704203219e-05, + "loss": 1.4082, + "step": 15845 + }, + { + "epoch": 0.46526513594456514, + "grad_norm": 0.0, + "learning_rate": 1.1603968401378922e-05, + "loss": 1.2607, + "step": 15846 + }, + { + "epoch": 0.46529449762170416, + "grad_norm": 0.0, + "learning_rate": 1.1603029746220561e-05, + "loss": 1.3223, + "step": 15847 + }, + { + "epoch": 0.4653238592988431, + "grad_norm": 0.0, + "learning_rate": 1.160209107656559e-05, + "loss": 1.3105, + "step": 15848 + }, + { + "epoch": 0.46535322097598214, + "grad_norm": 0.0, + "learning_rate": 1.1601152392422501e-05, + "loss": 1.3789, + "step": 15849 + }, + { + "epoch": 0.46538258265312116, + "grad_norm": 0.0, + "learning_rate": 1.160021369379978e-05, + "loss": 1.3252, + "step": 15850 + }, + { + "epoch": 0.4654119443302601, + "grad_norm": 0.0, + "learning_rate": 1.1599274980705917e-05, + "loss": 1.3496, + "step": 15851 + }, + { + "epoch": 0.46544130600739914, + "grad_norm": 0.0, + "learning_rate": 1.15983362531494e-05, + "loss": 1.2861, + "step": 15852 + }, + { + "epoch": 0.46547066768453815, + "grad_norm": 0.0, + "learning_rate": 1.1597397511138719e-05, + "loss": 1.4082, + "step": 15853 + }, + { + "epoch": 0.4655000293616771, + "grad_norm": 0.0, + "learning_rate": 1.1596458754682363e-05, + "loss": 1.272, + "step": 15854 + }, + { + "epoch": 0.46552939103881613, + "grad_norm": 0.0, + "learning_rate": 1.1595519983788824e-05, + "loss": 1.2988, + "step": 15855 + }, + { + "epoch": 0.46555875271595515, + "grad_norm": 0.0, + "learning_rate": 1.1594581198466583e-05, + "loss": 1.3379, + "step": 15856 + }, + { + "epoch": 0.4655881143930941, + "grad_norm": 0.0, + "learning_rate": 1.1593642398724141e-05, + "loss": 1.4473, + "step": 15857 + }, + { + "epoch": 0.4656174760702331, + "grad_norm": 0.0, + "learning_rate": 1.159270358456998e-05, + "loss": 1.4941, + "step": 15858 + }, + { + "epoch": 0.46564683774737214, + "grad_norm": 0.0, + "learning_rate": 1.1591764756012596e-05, + "loss": 1.3174, + "step": 15859 + }, + { + "epoch": 0.4656761994245111, + "grad_norm": 0.0, + "learning_rate": 1.1590825913060473e-05, + "loss": 1.3013, + "step": 15860 + }, + { + "epoch": 0.4657055611016501, + "grad_norm": 0.0, + "learning_rate": 1.1589887055722105e-05, + "loss": 1.436, + "step": 15861 + }, + { + "epoch": 0.46573492277878914, + "grad_norm": 0.0, + "learning_rate": 1.1588948184005981e-05, + "loss": 1.3994, + "step": 15862 + }, + { + "epoch": 0.4657642844559281, + "grad_norm": 0.0, + "learning_rate": 1.1588009297920593e-05, + "loss": 1.3779, + "step": 15863 + }, + { + "epoch": 0.4657936461330671, + "grad_norm": 0.0, + "learning_rate": 1.1587070397474426e-05, + "loss": 1.3008, + "step": 15864 + }, + { + "epoch": 0.46582300781020614, + "grad_norm": 0.0, + "learning_rate": 1.1586131482675978e-05, + "loss": 1.4746, + "step": 15865 + }, + { + "epoch": 0.4658523694873451, + "grad_norm": 0.0, + "learning_rate": 1.1585192553533736e-05, + "loss": 1.335, + "step": 15866 + }, + { + "epoch": 0.4658817311644841, + "grad_norm": 0.0, + "learning_rate": 1.1584253610056193e-05, + "loss": 1.3887, + "step": 15867 + }, + { + "epoch": 0.46591109284162313, + "grad_norm": 0.0, + "learning_rate": 1.1583314652251837e-05, + "loss": 1.3174, + "step": 15868 + }, + { + "epoch": 0.4659404545187621, + "grad_norm": 0.0, + "learning_rate": 1.1582375680129162e-05, + "loss": 1.3418, + "step": 15869 + }, + { + "epoch": 0.4659698161959011, + "grad_norm": 0.0, + "learning_rate": 1.1581436693696657e-05, + "loss": 1.3311, + "step": 15870 + }, + { + "epoch": 0.4659991778730401, + "grad_norm": 0.0, + "learning_rate": 1.1580497692962818e-05, + "loss": 1.4482, + "step": 15871 + }, + { + "epoch": 0.4660285395501791, + "grad_norm": 0.0, + "learning_rate": 1.1579558677936133e-05, + "loss": 1.3311, + "step": 15872 + }, + { + "epoch": 0.4660579012273181, + "grad_norm": 0.0, + "learning_rate": 1.1578619648625093e-05, + "loss": 1.3506, + "step": 15873 + }, + { + "epoch": 0.4660872629044571, + "grad_norm": 0.0, + "learning_rate": 1.157768060503819e-05, + "loss": 1.4336, + "step": 15874 + }, + { + "epoch": 0.4661166245815961, + "grad_norm": 0.0, + "learning_rate": 1.157674154718392e-05, + "loss": 1.4629, + "step": 15875 + }, + { + "epoch": 0.4661459862587351, + "grad_norm": 0.0, + "learning_rate": 1.1575802475070772e-05, + "loss": 1.3262, + "step": 15876 + }, + { + "epoch": 0.4661753479358741, + "grad_norm": 0.0, + "learning_rate": 1.1574863388707238e-05, + "loss": 1.417, + "step": 15877 + }, + { + "epoch": 0.4662047096130131, + "grad_norm": 0.0, + "learning_rate": 1.157392428810181e-05, + "loss": 1.3428, + "step": 15878 + }, + { + "epoch": 0.4662340712901521, + "grad_norm": 0.0, + "learning_rate": 1.1572985173262983e-05, + "loss": 1.3643, + "step": 15879 + }, + { + "epoch": 0.4662634329672911, + "grad_norm": 0.0, + "learning_rate": 1.1572046044199248e-05, + "loss": 1.4277, + "step": 15880 + }, + { + "epoch": 0.4662927946444301, + "grad_norm": 0.0, + "learning_rate": 1.1571106900919096e-05, + "loss": 1.2363, + "step": 15881 + }, + { + "epoch": 0.4663221563215691, + "grad_norm": 0.0, + "learning_rate": 1.1570167743431027e-05, + "loss": 1.3281, + "step": 15882 + }, + { + "epoch": 0.4663515179987081, + "grad_norm": 0.0, + "learning_rate": 1.1569228571743522e-05, + "loss": 1.2368, + "step": 15883 + }, + { + "epoch": 0.46638087967584707, + "grad_norm": 0.0, + "learning_rate": 1.1568289385865087e-05, + "loss": 1.2285, + "step": 15884 + }, + { + "epoch": 0.4664102413529861, + "grad_norm": 0.0, + "learning_rate": 1.1567350185804207e-05, + "loss": 1.3867, + "step": 15885 + }, + { + "epoch": 0.4664396030301251, + "grad_norm": 0.0, + "learning_rate": 1.1566410971569378e-05, + "loss": 1.3174, + "step": 15886 + }, + { + "epoch": 0.46646896470726407, + "grad_norm": 0.0, + "learning_rate": 1.1565471743169095e-05, + "loss": 1.4111, + "step": 15887 + }, + { + "epoch": 0.4664983263844031, + "grad_norm": 0.0, + "learning_rate": 1.1564532500611847e-05, + "loss": 1.333, + "step": 15888 + }, + { + "epoch": 0.4665276880615421, + "grad_norm": 0.0, + "learning_rate": 1.1563593243906132e-05, + "loss": 1.2285, + "step": 15889 + }, + { + "epoch": 0.46655704973868106, + "grad_norm": 0.0, + "learning_rate": 1.1562653973060442e-05, + "loss": 1.3027, + "step": 15890 + }, + { + "epoch": 0.4665864114158201, + "grad_norm": 0.0, + "learning_rate": 1.1561714688083276e-05, + "loss": 1.374, + "step": 15891 + }, + { + "epoch": 0.4666157730929591, + "grad_norm": 0.0, + "learning_rate": 1.1560775388983123e-05, + "loss": 1.291, + "step": 15892 + }, + { + "epoch": 0.46664513477009806, + "grad_norm": 0.0, + "learning_rate": 1.1559836075768475e-05, + "loss": 1.1953, + "step": 15893 + }, + { + "epoch": 0.4666744964472371, + "grad_norm": 0.0, + "learning_rate": 1.1558896748447832e-05, + "loss": 1.3252, + "step": 15894 + }, + { + "epoch": 0.4667038581243761, + "grad_norm": 0.0, + "learning_rate": 1.1557957407029685e-05, + "loss": 1.2822, + "step": 15895 + }, + { + "epoch": 0.46673321980151505, + "grad_norm": 0.0, + "learning_rate": 1.1557018051522533e-05, + "loss": 1.3398, + "step": 15896 + }, + { + "epoch": 0.46676258147865407, + "grad_norm": 0.0, + "learning_rate": 1.1556078681934866e-05, + "loss": 1.2695, + "step": 15897 + }, + { + "epoch": 0.46679194315579303, + "grad_norm": 0.0, + "learning_rate": 1.155513929827518e-05, + "loss": 1.3462, + "step": 15898 + }, + { + "epoch": 0.46682130483293205, + "grad_norm": 0.0, + "learning_rate": 1.1554199900551974e-05, + "loss": 1.4014, + "step": 15899 + }, + { + "epoch": 0.46685066651007107, + "grad_norm": 0.0, + "learning_rate": 1.1553260488773739e-05, + "loss": 1.3486, + "step": 15900 + }, + { + "epoch": 0.46688002818721, + "grad_norm": 0.0, + "learning_rate": 1.1552321062948972e-05, + "loss": 1.335, + "step": 15901 + }, + { + "epoch": 0.46690938986434904, + "grad_norm": 0.0, + "learning_rate": 1.1551381623086166e-05, + "loss": 1.2607, + "step": 15902 + }, + { + "epoch": 0.46693875154148806, + "grad_norm": 0.0, + "learning_rate": 1.155044216919382e-05, + "loss": 1.3047, + "step": 15903 + }, + { + "epoch": 0.466968113218627, + "grad_norm": 0.0, + "learning_rate": 1.1549502701280431e-05, + "loss": 1.2305, + "step": 15904 + }, + { + "epoch": 0.46699747489576604, + "grad_norm": 0.0, + "learning_rate": 1.1548563219354488e-05, + "loss": 1.2974, + "step": 15905 + }, + { + "epoch": 0.46702683657290506, + "grad_norm": 0.0, + "learning_rate": 1.1547623723424493e-05, + "loss": 1.3086, + "step": 15906 + }, + { + "epoch": 0.467056198250044, + "grad_norm": 0.0, + "learning_rate": 1.1546684213498937e-05, + "loss": 1.2764, + "step": 15907 + }, + { + "epoch": 0.46708555992718304, + "grad_norm": 0.0, + "learning_rate": 1.1545744689586325e-05, + "loss": 1.3965, + "step": 15908 + }, + { + "epoch": 0.46711492160432205, + "grad_norm": 0.0, + "learning_rate": 1.1544805151695144e-05, + "loss": 1.2715, + "step": 15909 + }, + { + "epoch": 0.467144283281461, + "grad_norm": 0.0, + "learning_rate": 1.1543865599833897e-05, + "loss": 1.415, + "step": 15910 + }, + { + "epoch": 0.46717364495860003, + "grad_norm": 0.0, + "learning_rate": 1.1542926034011077e-05, + "loss": 1.3965, + "step": 15911 + }, + { + "epoch": 0.46720300663573905, + "grad_norm": 0.0, + "learning_rate": 1.1541986454235183e-05, + "loss": 1.1079, + "step": 15912 + }, + { + "epoch": 0.467232368312878, + "grad_norm": 0.0, + "learning_rate": 1.1541046860514711e-05, + "loss": 1.2705, + "step": 15913 + }, + { + "epoch": 0.467261729990017, + "grad_norm": 0.0, + "learning_rate": 1.1540107252858155e-05, + "loss": 1.2188, + "step": 15914 + }, + { + "epoch": 0.46729109166715604, + "grad_norm": 0.0, + "learning_rate": 1.1539167631274014e-05, + "loss": 1.3994, + "step": 15915 + }, + { + "epoch": 0.467320453344295, + "grad_norm": 0.0, + "learning_rate": 1.1538227995770787e-05, + "loss": 1.3926, + "step": 15916 + }, + { + "epoch": 0.467349815021434, + "grad_norm": 0.0, + "learning_rate": 1.1537288346356972e-05, + "loss": 1.2686, + "step": 15917 + }, + { + "epoch": 0.46737917669857304, + "grad_norm": 0.0, + "learning_rate": 1.1536348683041063e-05, + "loss": 1.2568, + "step": 15918 + }, + { + "epoch": 0.467408538375712, + "grad_norm": 0.0, + "learning_rate": 1.1535409005831559e-05, + "loss": 1.373, + "step": 15919 + }, + { + "epoch": 0.467437900052851, + "grad_norm": 0.0, + "learning_rate": 1.153446931473696e-05, + "loss": 1.3975, + "step": 15920 + }, + { + "epoch": 0.46746726172999004, + "grad_norm": 0.0, + "learning_rate": 1.1533529609765763e-05, + "loss": 1.3643, + "step": 15921 + }, + { + "epoch": 0.467496623407129, + "grad_norm": 0.0, + "learning_rate": 1.1532589890926462e-05, + "loss": 1.292, + "step": 15922 + }, + { + "epoch": 0.467525985084268, + "grad_norm": 0.0, + "learning_rate": 1.1531650158227559e-05, + "loss": 1.3174, + "step": 15923 + }, + { + "epoch": 0.46755534676140703, + "grad_norm": 0.0, + "learning_rate": 1.1530710411677552e-05, + "loss": 1.3887, + "step": 15924 + }, + { + "epoch": 0.467584708438546, + "grad_norm": 0.0, + "learning_rate": 1.1529770651284936e-05, + "loss": 1.332, + "step": 15925 + }, + { + "epoch": 0.467614070115685, + "grad_norm": 0.0, + "learning_rate": 1.1528830877058215e-05, + "loss": 1.2607, + "step": 15926 + }, + { + "epoch": 0.467643431792824, + "grad_norm": 0.0, + "learning_rate": 1.1527891089005886e-05, + "loss": 1.3477, + "step": 15927 + }, + { + "epoch": 0.467672793469963, + "grad_norm": 0.0, + "learning_rate": 1.1526951287136445e-05, + "loss": 1.2124, + "step": 15928 + }, + { + "epoch": 0.467702155147102, + "grad_norm": 0.0, + "learning_rate": 1.1526011471458392e-05, + "loss": 1.1826, + "step": 15929 + }, + { + "epoch": 0.467731516824241, + "grad_norm": 0.0, + "learning_rate": 1.1525071641980226e-05, + "loss": 1.3574, + "step": 15930 + }, + { + "epoch": 0.46776087850138, + "grad_norm": 0.0, + "learning_rate": 1.152413179871045e-05, + "loss": 1.3701, + "step": 15931 + }, + { + "epoch": 0.467790240178519, + "grad_norm": 0.0, + "learning_rate": 1.1523191941657555e-05, + "loss": 1.2661, + "step": 15932 + }, + { + "epoch": 0.467819601855658, + "grad_norm": 0.0, + "learning_rate": 1.1522252070830049e-05, + "loss": 1.2871, + "step": 15933 + }, + { + "epoch": 0.467848963532797, + "grad_norm": 0.0, + "learning_rate": 1.1521312186236425e-05, + "loss": 1.4736, + "step": 15934 + }, + { + "epoch": 0.467878325209936, + "grad_norm": 0.0, + "learning_rate": 1.1520372287885185e-05, + "loss": 1.3281, + "step": 15935 + }, + { + "epoch": 0.467907686887075, + "grad_norm": 0.0, + "learning_rate": 1.151943237578483e-05, + "loss": 1.2861, + "step": 15936 + }, + { + "epoch": 0.467937048564214, + "grad_norm": 0.0, + "learning_rate": 1.151849244994386e-05, + "loss": 1.3174, + "step": 15937 + }, + { + "epoch": 0.467966410241353, + "grad_norm": 0.0, + "learning_rate": 1.1517552510370773e-05, + "loss": 1.2998, + "step": 15938 + }, + { + "epoch": 0.467995771918492, + "grad_norm": 0.0, + "learning_rate": 1.151661255707407e-05, + "loss": 1.2852, + "step": 15939 + }, + { + "epoch": 0.46802513359563097, + "grad_norm": 0.0, + "learning_rate": 1.1515672590062252e-05, + "loss": 1.3984, + "step": 15940 + }, + { + "epoch": 0.46805449527277, + "grad_norm": 0.0, + "learning_rate": 1.151473260934382e-05, + "loss": 1.2539, + "step": 15941 + }, + { + "epoch": 0.468083856949909, + "grad_norm": 0.0, + "learning_rate": 1.1513792614927271e-05, + "loss": 1.1865, + "step": 15942 + }, + { + "epoch": 0.46811321862704797, + "grad_norm": 0.0, + "learning_rate": 1.1512852606821105e-05, + "loss": 1.3408, + "step": 15943 + }, + { + "epoch": 0.468142580304187, + "grad_norm": 0.0, + "learning_rate": 1.1511912585033829e-05, + "loss": 1.2998, + "step": 15944 + }, + { + "epoch": 0.468171941981326, + "grad_norm": 0.0, + "learning_rate": 1.151097254957394e-05, + "loss": 1.4033, + "step": 15945 + }, + { + "epoch": 0.46820130365846496, + "grad_norm": 0.0, + "learning_rate": 1.151003250044994e-05, + "loss": 1.3516, + "step": 15946 + }, + { + "epoch": 0.468230665335604, + "grad_norm": 0.0, + "learning_rate": 1.1509092437670326e-05, + "loss": 1.3359, + "step": 15947 + }, + { + "epoch": 0.46826002701274294, + "grad_norm": 0.0, + "learning_rate": 1.1508152361243604e-05, + "loss": 1.3828, + "step": 15948 + }, + { + "epoch": 0.46828938868988196, + "grad_norm": 0.0, + "learning_rate": 1.1507212271178276e-05, + "loss": 1.4062, + "step": 15949 + }, + { + "epoch": 0.468318750367021, + "grad_norm": 0.0, + "learning_rate": 1.1506272167482839e-05, + "loss": 1.2979, + "step": 15950 + }, + { + "epoch": 0.46834811204415994, + "grad_norm": 0.0, + "learning_rate": 1.1505332050165801e-05, + "loss": 1.4678, + "step": 15951 + }, + { + "epoch": 0.46837747372129895, + "grad_norm": 0.0, + "learning_rate": 1.1504391919235655e-05, + "loss": 1.375, + "step": 15952 + }, + { + "epoch": 0.46840683539843797, + "grad_norm": 0.0, + "learning_rate": 1.1503451774700911e-05, + "loss": 1.2197, + "step": 15953 + }, + { + "epoch": 0.46843619707557693, + "grad_norm": 0.0, + "learning_rate": 1.1502511616570065e-05, + "loss": 1.3516, + "step": 15954 + }, + { + "epoch": 0.46846555875271595, + "grad_norm": 0.0, + "learning_rate": 1.1501571444851622e-05, + "loss": 1.2803, + "step": 15955 + }, + { + "epoch": 0.46849492042985497, + "grad_norm": 0.0, + "learning_rate": 1.1500631259554083e-05, + "loss": 1.3633, + "step": 15956 + }, + { + "epoch": 0.46852428210699393, + "grad_norm": 0.0, + "learning_rate": 1.1499691060685956e-05, + "loss": 1.3271, + "step": 15957 + }, + { + "epoch": 0.46855364378413294, + "grad_norm": 0.0, + "learning_rate": 1.1498750848255733e-05, + "loss": 1.3096, + "step": 15958 + }, + { + "epoch": 0.46858300546127196, + "grad_norm": 0.0, + "learning_rate": 1.1497810622271924e-05, + "loss": 1.3184, + "step": 15959 + }, + { + "epoch": 0.4686123671384109, + "grad_norm": 0.0, + "learning_rate": 1.1496870382743031e-05, + "loss": 1.2842, + "step": 15960 + }, + { + "epoch": 0.46864172881554994, + "grad_norm": 0.0, + "learning_rate": 1.1495930129677558e-05, + "loss": 1.2266, + "step": 15961 + }, + { + "epoch": 0.46867109049268896, + "grad_norm": 0.0, + "learning_rate": 1.1494989863084002e-05, + "loss": 1.375, + "step": 15962 + }, + { + "epoch": 0.4687004521698279, + "grad_norm": 0.0, + "learning_rate": 1.1494049582970873e-05, + "loss": 1.2998, + "step": 15963 + }, + { + "epoch": 0.46872981384696694, + "grad_norm": 0.0, + "learning_rate": 1.1493109289346666e-05, + "loss": 1.3926, + "step": 15964 + }, + { + "epoch": 0.46875917552410595, + "grad_norm": 0.0, + "learning_rate": 1.1492168982219894e-05, + "loss": 1.2783, + "step": 15965 + }, + { + "epoch": 0.4687885372012449, + "grad_norm": 0.0, + "learning_rate": 1.1491228661599052e-05, + "loss": 1.3525, + "step": 15966 + }, + { + "epoch": 0.46881789887838393, + "grad_norm": 0.0, + "learning_rate": 1.149028832749265e-05, + "loss": 1.377, + "step": 15967 + }, + { + "epoch": 0.46884726055552295, + "grad_norm": 0.0, + "learning_rate": 1.1489347979909186e-05, + "loss": 1.3311, + "step": 15968 + }, + { + "epoch": 0.4688766222326619, + "grad_norm": 0.0, + "learning_rate": 1.1488407618857171e-05, + "loss": 1.3594, + "step": 15969 + }, + { + "epoch": 0.4689059839098009, + "grad_norm": 0.0, + "learning_rate": 1.1487467244345105e-05, + "loss": 1.4727, + "step": 15970 + }, + { + "epoch": 0.46893534558693994, + "grad_norm": 0.0, + "learning_rate": 1.1486526856381488e-05, + "loss": 1.4414, + "step": 15971 + }, + { + "epoch": 0.4689647072640789, + "grad_norm": 0.0, + "learning_rate": 1.1485586454974829e-05, + "loss": 1.3164, + "step": 15972 + }, + { + "epoch": 0.4689940689412179, + "grad_norm": 0.0, + "learning_rate": 1.1484646040133632e-05, + "loss": 1.332, + "step": 15973 + }, + { + "epoch": 0.46902343061835694, + "grad_norm": 0.0, + "learning_rate": 1.14837056118664e-05, + "loss": 1.1357, + "step": 15974 + }, + { + "epoch": 0.4690527922954959, + "grad_norm": 0.0, + "learning_rate": 1.1482765170181639e-05, + "loss": 1.2432, + "step": 15975 + }, + { + "epoch": 0.4690821539726349, + "grad_norm": 0.0, + "learning_rate": 1.148182471508785e-05, + "loss": 1.3574, + "step": 15976 + }, + { + "epoch": 0.46911151564977394, + "grad_norm": 0.0, + "learning_rate": 1.1480884246593544e-05, + "loss": 1.4863, + "step": 15977 + }, + { + "epoch": 0.4691408773269129, + "grad_norm": 0.0, + "learning_rate": 1.1479943764707222e-05, + "loss": 1.3086, + "step": 15978 + }, + { + "epoch": 0.4691702390040519, + "grad_norm": 0.0, + "learning_rate": 1.1479003269437387e-05, + "loss": 1.292, + "step": 15979 + }, + { + "epoch": 0.46919960068119093, + "grad_norm": 0.0, + "learning_rate": 1.1478062760792549e-05, + "loss": 1.2998, + "step": 15980 + }, + { + "epoch": 0.4692289623583299, + "grad_norm": 0.0, + "learning_rate": 1.1477122238781211e-05, + "loss": 1.3213, + "step": 15981 + }, + { + "epoch": 0.4692583240354689, + "grad_norm": 0.0, + "learning_rate": 1.1476181703411879e-05, + "loss": 1.3672, + "step": 15982 + }, + { + "epoch": 0.4692876857126079, + "grad_norm": 0.0, + "learning_rate": 1.1475241154693055e-05, + "loss": 1.2627, + "step": 15983 + }, + { + "epoch": 0.4693170473897469, + "grad_norm": 0.0, + "learning_rate": 1.147430059263325e-05, + "loss": 1.3857, + "step": 15984 + }, + { + "epoch": 0.4693464090668859, + "grad_norm": 0.0, + "learning_rate": 1.1473360017240964e-05, + "loss": 1.3564, + "step": 15985 + }, + { + "epoch": 0.4693757707440249, + "grad_norm": 0.0, + "learning_rate": 1.1472419428524711e-05, + "loss": 1.2852, + "step": 15986 + }, + { + "epoch": 0.4694051324211639, + "grad_norm": 0.0, + "learning_rate": 1.1471478826492988e-05, + "loss": 1.3545, + "step": 15987 + }, + { + "epoch": 0.4694344940983029, + "grad_norm": 0.0, + "learning_rate": 1.1470538211154304e-05, + "loss": 1.4697, + "step": 15988 + }, + { + "epoch": 0.4694638557754419, + "grad_norm": 0.0, + "learning_rate": 1.146959758251717e-05, + "loss": 1.2793, + "step": 15989 + }, + { + "epoch": 0.4694932174525809, + "grad_norm": 0.0, + "learning_rate": 1.146865694059009e-05, + "loss": 1.1562, + "step": 15990 + }, + { + "epoch": 0.4695225791297199, + "grad_norm": 0.0, + "learning_rate": 1.1467716285381567e-05, + "loss": 1.3154, + "step": 15991 + }, + { + "epoch": 0.4695519408068589, + "grad_norm": 0.0, + "learning_rate": 1.1466775616900108e-05, + "loss": 1.2939, + "step": 15992 + }, + { + "epoch": 0.4695813024839979, + "grad_norm": 0.0, + "learning_rate": 1.146583493515422e-05, + "loss": 1.3555, + "step": 15993 + }, + { + "epoch": 0.4696106641611369, + "grad_norm": 0.0, + "learning_rate": 1.1464894240152417e-05, + "loss": 1.3604, + "step": 15994 + }, + { + "epoch": 0.4696400258382759, + "grad_norm": 0.0, + "learning_rate": 1.1463953531903196e-05, + "loss": 1.4541, + "step": 15995 + }, + { + "epoch": 0.46966938751541487, + "grad_norm": 0.0, + "learning_rate": 1.1463012810415069e-05, + "loss": 1.3965, + "step": 15996 + }, + { + "epoch": 0.4696987491925539, + "grad_norm": 0.0, + "learning_rate": 1.1462072075696543e-05, + "loss": 1.2217, + "step": 15997 + }, + { + "epoch": 0.46972811086969285, + "grad_norm": 0.0, + "learning_rate": 1.1461131327756125e-05, + "loss": 1.3838, + "step": 15998 + }, + { + "epoch": 0.46975747254683187, + "grad_norm": 0.0, + "learning_rate": 1.1460190566602322e-05, + "loss": 1.2588, + "step": 15999 + }, + { + "epoch": 0.4697868342239709, + "grad_norm": 0.0, + "learning_rate": 1.1459249792243644e-05, + "loss": 1.3389, + "step": 16000 + }, + { + "epoch": 0.46981619590110985, + "grad_norm": 0.0, + "learning_rate": 1.1458309004688593e-05, + "loss": 1.1943, + "step": 16001 + }, + { + "epoch": 0.46984555757824886, + "grad_norm": 0.0, + "learning_rate": 1.1457368203945683e-05, + "loss": 1.3525, + "step": 16002 + }, + { + "epoch": 0.4698749192553879, + "grad_norm": 0.0, + "learning_rate": 1.1456427390023417e-05, + "loss": 1.248, + "step": 16003 + }, + { + "epoch": 0.46990428093252684, + "grad_norm": 0.0, + "learning_rate": 1.1455486562930305e-05, + "loss": 1.2324, + "step": 16004 + }, + { + "epoch": 0.46993364260966586, + "grad_norm": 0.0, + "learning_rate": 1.1454545722674855e-05, + "loss": 1.335, + "step": 16005 + }, + { + "epoch": 0.4699630042868049, + "grad_norm": 0.0, + "learning_rate": 1.1453604869265576e-05, + "loss": 1.1978, + "step": 16006 + }, + { + "epoch": 0.46999236596394384, + "grad_norm": 0.0, + "learning_rate": 1.1452664002710976e-05, + "loss": 1.3428, + "step": 16007 + }, + { + "epoch": 0.47002172764108285, + "grad_norm": 0.0, + "learning_rate": 1.1451723123019562e-05, + "loss": 1.3447, + "step": 16008 + }, + { + "epoch": 0.47005108931822187, + "grad_norm": 0.0, + "learning_rate": 1.1450782230199846e-05, + "loss": 1.3223, + "step": 16009 + }, + { + "epoch": 0.47008045099536083, + "grad_norm": 0.0, + "learning_rate": 1.1449841324260335e-05, + "loss": 1.2812, + "step": 16010 + }, + { + "epoch": 0.47010981267249985, + "grad_norm": 0.0, + "learning_rate": 1.1448900405209538e-05, + "loss": 1.1982, + "step": 16011 + }, + { + "epoch": 0.47013917434963887, + "grad_norm": 0.0, + "learning_rate": 1.1447959473055964e-05, + "loss": 1.4072, + "step": 16012 + }, + { + "epoch": 0.47016853602677783, + "grad_norm": 0.0, + "learning_rate": 1.1447018527808118e-05, + "loss": 1.2822, + "step": 16013 + }, + { + "epoch": 0.47019789770391685, + "grad_norm": 0.0, + "learning_rate": 1.1446077569474517e-05, + "loss": 1.3438, + "step": 16014 + }, + { + "epoch": 0.47022725938105586, + "grad_norm": 0.0, + "learning_rate": 1.1445136598063662e-05, + "loss": 1.4023, + "step": 16015 + }, + { + "epoch": 0.4702566210581948, + "grad_norm": 0.0, + "learning_rate": 1.1444195613584068e-05, + "loss": 1.415, + "step": 16016 + }, + { + "epoch": 0.47028598273533384, + "grad_norm": 0.0, + "learning_rate": 1.1443254616044244e-05, + "loss": 1.1382, + "step": 16017 + }, + { + "epoch": 0.47031534441247286, + "grad_norm": 0.0, + "learning_rate": 1.1442313605452699e-05, + "loss": 1.2422, + "step": 16018 + }, + { + "epoch": 0.4703447060896118, + "grad_norm": 0.0, + "learning_rate": 1.1441372581817941e-05, + "loss": 1.3379, + "step": 16019 + }, + { + "epoch": 0.47037406776675084, + "grad_norm": 0.0, + "learning_rate": 1.1440431545148487e-05, + "loss": 1.2676, + "step": 16020 + }, + { + "epoch": 0.47040342944388985, + "grad_norm": 0.0, + "learning_rate": 1.1439490495452836e-05, + "loss": 1.29, + "step": 16021 + }, + { + "epoch": 0.4704327911210288, + "grad_norm": 0.0, + "learning_rate": 1.1438549432739505e-05, + "loss": 1.3809, + "step": 16022 + }, + { + "epoch": 0.47046215279816783, + "grad_norm": 0.0, + "learning_rate": 1.1437608357017005e-05, + "loss": 1.2959, + "step": 16023 + }, + { + "epoch": 0.47049151447530685, + "grad_norm": 0.0, + "learning_rate": 1.1436667268293844e-05, + "loss": 1.4062, + "step": 16024 + }, + { + "epoch": 0.4705208761524458, + "grad_norm": 0.0, + "learning_rate": 1.1435726166578532e-05, + "loss": 1.4893, + "step": 16025 + }, + { + "epoch": 0.4705502378295848, + "grad_norm": 0.0, + "learning_rate": 1.1434785051879577e-05, + "loss": 1.3525, + "step": 16026 + }, + { + "epoch": 0.47057959950672384, + "grad_norm": 0.0, + "learning_rate": 1.1433843924205499e-05, + "loss": 1.2783, + "step": 16027 + }, + { + "epoch": 0.4706089611838628, + "grad_norm": 0.0, + "learning_rate": 1.1432902783564801e-05, + "loss": 1.2549, + "step": 16028 + }, + { + "epoch": 0.4706383228610018, + "grad_norm": 0.0, + "learning_rate": 1.1431961629965997e-05, + "loss": 1.2769, + "step": 16029 + }, + { + "epoch": 0.47066768453814084, + "grad_norm": 0.0, + "learning_rate": 1.1431020463417596e-05, + "loss": 1.3418, + "step": 16030 + }, + { + "epoch": 0.4706970462152798, + "grad_norm": 0.0, + "learning_rate": 1.1430079283928112e-05, + "loss": 1.2627, + "step": 16031 + }, + { + "epoch": 0.4707264078924188, + "grad_norm": 0.0, + "learning_rate": 1.1429138091506053e-05, + "loss": 1.3369, + "step": 16032 + }, + { + "epoch": 0.47075576956955784, + "grad_norm": 0.0, + "learning_rate": 1.1428196886159933e-05, + "loss": 1.3555, + "step": 16033 + }, + { + "epoch": 0.4707851312466968, + "grad_norm": 0.0, + "learning_rate": 1.1427255667898262e-05, + "loss": 1.3682, + "step": 16034 + }, + { + "epoch": 0.4708144929238358, + "grad_norm": 0.0, + "learning_rate": 1.1426314436729555e-05, + "loss": 1.2881, + "step": 16035 + }, + { + "epoch": 0.47084385460097483, + "grad_norm": 0.0, + "learning_rate": 1.142537319266232e-05, + "loss": 1.3418, + "step": 16036 + }, + { + "epoch": 0.4708732162781138, + "grad_norm": 0.0, + "learning_rate": 1.1424431935705067e-05, + "loss": 1.3047, + "step": 16037 + }, + { + "epoch": 0.4709025779552528, + "grad_norm": 0.0, + "learning_rate": 1.1423490665866314e-05, + "loss": 1.3555, + "step": 16038 + }, + { + "epoch": 0.4709319396323918, + "grad_norm": 0.0, + "learning_rate": 1.1422549383154573e-05, + "loss": 1.4141, + "step": 16039 + }, + { + "epoch": 0.4709613013095308, + "grad_norm": 0.0, + "learning_rate": 1.1421608087578351e-05, + "loss": 1.3799, + "step": 16040 + }, + { + "epoch": 0.4709906629866698, + "grad_norm": 0.0, + "learning_rate": 1.1420666779146163e-05, + "loss": 1.3975, + "step": 16041 + }, + { + "epoch": 0.4710200246638088, + "grad_norm": 0.0, + "learning_rate": 1.141972545786652e-05, + "loss": 1.2188, + "step": 16042 + }, + { + "epoch": 0.4710493863409478, + "grad_norm": 0.0, + "learning_rate": 1.1418784123747939e-05, + "loss": 1.4365, + "step": 16043 + }, + { + "epoch": 0.4710787480180868, + "grad_norm": 0.0, + "learning_rate": 1.141784277679893e-05, + "loss": 1.3926, + "step": 16044 + }, + { + "epoch": 0.4711081096952258, + "grad_norm": 0.0, + "learning_rate": 1.1416901417028001e-05, + "loss": 1.4053, + "step": 16045 + }, + { + "epoch": 0.4711374713723648, + "grad_norm": 0.0, + "learning_rate": 1.141596004444367e-05, + "loss": 1.4443, + "step": 16046 + }, + { + "epoch": 0.4711668330495038, + "grad_norm": 0.0, + "learning_rate": 1.1415018659054455e-05, + "loss": 1.2275, + "step": 16047 + }, + { + "epoch": 0.47119619472664276, + "grad_norm": 0.0, + "learning_rate": 1.1414077260868862e-05, + "loss": 1.373, + "step": 16048 + }, + { + "epoch": 0.4712255564037818, + "grad_norm": 0.0, + "learning_rate": 1.1413135849895405e-05, + "loss": 1.3452, + "step": 16049 + }, + { + "epoch": 0.4712549180809208, + "grad_norm": 0.0, + "learning_rate": 1.1412194426142597e-05, + "loss": 1.3164, + "step": 16050 + }, + { + "epoch": 0.47128427975805975, + "grad_norm": 0.0, + "learning_rate": 1.1411252989618956e-05, + "loss": 1.5391, + "step": 16051 + }, + { + "epoch": 0.47131364143519877, + "grad_norm": 0.0, + "learning_rate": 1.1410311540332993e-05, + "loss": 1.3613, + "step": 16052 + }, + { + "epoch": 0.4713430031123378, + "grad_norm": 0.0, + "learning_rate": 1.1409370078293218e-05, + "loss": 1.3184, + "step": 16053 + }, + { + "epoch": 0.47137236478947675, + "grad_norm": 0.0, + "learning_rate": 1.1408428603508153e-05, + "loss": 1.2832, + "step": 16054 + }, + { + "epoch": 0.47140172646661577, + "grad_norm": 0.0, + "learning_rate": 1.1407487115986305e-05, + "loss": 1.3652, + "step": 16055 + }, + { + "epoch": 0.4714310881437548, + "grad_norm": 0.0, + "learning_rate": 1.1406545615736191e-05, + "loss": 1.3418, + "step": 16056 + }, + { + "epoch": 0.47146044982089375, + "grad_norm": 0.0, + "learning_rate": 1.1405604102766324e-05, + "loss": 1.3652, + "step": 16057 + }, + { + "epoch": 0.47148981149803276, + "grad_norm": 0.0, + "learning_rate": 1.1404662577085218e-05, + "loss": 1.374, + "step": 16058 + }, + { + "epoch": 0.4715191731751718, + "grad_norm": 0.0, + "learning_rate": 1.1403721038701392e-05, + "loss": 1.2354, + "step": 16059 + }, + { + "epoch": 0.47154853485231074, + "grad_norm": 0.0, + "learning_rate": 1.1402779487623357e-05, + "loss": 1.3086, + "step": 16060 + }, + { + "epoch": 0.47157789652944976, + "grad_norm": 0.0, + "learning_rate": 1.1401837923859624e-05, + "loss": 1.3818, + "step": 16061 + }, + { + "epoch": 0.4716072582065888, + "grad_norm": 0.0, + "learning_rate": 1.1400896347418712e-05, + "loss": 1.2881, + "step": 16062 + }, + { + "epoch": 0.47163661988372774, + "grad_norm": 0.0, + "learning_rate": 1.1399954758309138e-05, + "loss": 1.3389, + "step": 16063 + }, + { + "epoch": 0.47166598156086675, + "grad_norm": 0.0, + "learning_rate": 1.1399013156539416e-05, + "loss": 1.3555, + "step": 16064 + }, + { + "epoch": 0.47169534323800577, + "grad_norm": 0.0, + "learning_rate": 1.1398071542118057e-05, + "loss": 1.3467, + "step": 16065 + }, + { + "epoch": 0.47172470491514473, + "grad_norm": 0.0, + "learning_rate": 1.139712991505358e-05, + "loss": 1.3662, + "step": 16066 + }, + { + "epoch": 0.47175406659228375, + "grad_norm": 0.0, + "learning_rate": 1.13961882753545e-05, + "loss": 1.2686, + "step": 16067 + }, + { + "epoch": 0.47178342826942277, + "grad_norm": 0.0, + "learning_rate": 1.139524662302933e-05, + "loss": 1.4229, + "step": 16068 + }, + { + "epoch": 0.47181278994656173, + "grad_norm": 0.0, + "learning_rate": 1.1394304958086591e-05, + "loss": 1.3281, + "step": 16069 + }, + { + "epoch": 0.47184215162370075, + "grad_norm": 0.0, + "learning_rate": 1.139336328053479e-05, + "loss": 1.2959, + "step": 16070 + }, + { + "epoch": 0.47187151330083976, + "grad_norm": 0.0, + "learning_rate": 1.1392421590382449e-05, + "loss": 1.2549, + "step": 16071 + }, + { + "epoch": 0.4719008749779787, + "grad_norm": 0.0, + "learning_rate": 1.1391479887638086e-05, + "loss": 1.457, + "step": 16072 + }, + { + "epoch": 0.47193023665511774, + "grad_norm": 0.0, + "learning_rate": 1.1390538172310213e-05, + "loss": 1.2686, + "step": 16073 + }, + { + "epoch": 0.47195959833225676, + "grad_norm": 0.0, + "learning_rate": 1.1389596444407346e-05, + "loss": 1.3613, + "step": 16074 + }, + { + "epoch": 0.4719889600093957, + "grad_norm": 0.0, + "learning_rate": 1.1388654703938002e-05, + "loss": 1.3838, + "step": 16075 + }, + { + "epoch": 0.47201832168653474, + "grad_norm": 0.0, + "learning_rate": 1.13877129509107e-05, + "loss": 1.3608, + "step": 16076 + }, + { + "epoch": 0.47204768336367375, + "grad_norm": 0.0, + "learning_rate": 1.1386771185333953e-05, + "loss": 1.3154, + "step": 16077 + }, + { + "epoch": 0.4720770450408127, + "grad_norm": 0.0, + "learning_rate": 1.138582940721628e-05, + "loss": 1.2891, + "step": 16078 + }, + { + "epoch": 0.47210640671795173, + "grad_norm": 0.0, + "learning_rate": 1.1384887616566198e-05, + "loss": 1.3389, + "step": 16079 + }, + { + "epoch": 0.47213576839509075, + "grad_norm": 0.0, + "learning_rate": 1.1383945813392222e-05, + "loss": 1.2529, + "step": 16080 + }, + { + "epoch": 0.4721651300722297, + "grad_norm": 0.0, + "learning_rate": 1.1383003997702869e-05, + "loss": 1.3916, + "step": 16081 + }, + { + "epoch": 0.4721944917493687, + "grad_norm": 0.0, + "learning_rate": 1.1382062169506653e-05, + "loss": 1.3672, + "step": 16082 + }, + { + "epoch": 0.47222385342650774, + "grad_norm": 0.0, + "learning_rate": 1.1381120328812098e-05, + "loss": 1.3086, + "step": 16083 + }, + { + "epoch": 0.4722532151036467, + "grad_norm": 0.0, + "learning_rate": 1.1380178475627719e-05, + "loss": 1.3691, + "step": 16084 + }, + { + "epoch": 0.4722825767807857, + "grad_norm": 0.0, + "learning_rate": 1.1379236609962032e-05, + "loss": 1.2979, + "step": 16085 + }, + { + "epoch": 0.47231193845792474, + "grad_norm": 0.0, + "learning_rate": 1.1378294731823554e-05, + "loss": 1.3809, + "step": 16086 + }, + { + "epoch": 0.4723413001350637, + "grad_norm": 0.0, + "learning_rate": 1.1377352841220803e-05, + "loss": 1.3271, + "step": 16087 + }, + { + "epoch": 0.4723706618122027, + "grad_norm": 0.0, + "learning_rate": 1.13764109381623e-05, + "loss": 1.3027, + "step": 16088 + }, + { + "epoch": 0.47240002348934174, + "grad_norm": 0.0, + "learning_rate": 1.1375469022656563e-05, + "loss": 1.2412, + "step": 16089 + }, + { + "epoch": 0.4724293851664807, + "grad_norm": 0.0, + "learning_rate": 1.1374527094712102e-05, + "loss": 1.3408, + "step": 16090 + }, + { + "epoch": 0.4724587468436197, + "grad_norm": 0.0, + "learning_rate": 1.1373585154337441e-05, + "loss": 1.2764, + "step": 16091 + }, + { + "epoch": 0.47248810852075873, + "grad_norm": 0.0, + "learning_rate": 1.13726432015411e-05, + "loss": 1.2988, + "step": 16092 + }, + { + "epoch": 0.4725174701978977, + "grad_norm": 0.0, + "learning_rate": 1.1371701236331594e-05, + "loss": 1.3721, + "step": 16093 + }, + { + "epoch": 0.4725468318750367, + "grad_norm": 0.0, + "learning_rate": 1.1370759258717441e-05, + "loss": 1.2783, + "step": 16094 + }, + { + "epoch": 0.4725761935521757, + "grad_norm": 0.0, + "learning_rate": 1.1369817268707161e-05, + "loss": 1.2954, + "step": 16095 + }, + { + "epoch": 0.4726055552293147, + "grad_norm": 0.0, + "learning_rate": 1.1368875266309277e-05, + "loss": 1.2871, + "step": 16096 + }, + { + "epoch": 0.4726349169064537, + "grad_norm": 0.0, + "learning_rate": 1.1367933251532299e-05, + "loss": 1.1899, + "step": 16097 + }, + { + "epoch": 0.47266427858359267, + "grad_norm": 0.0, + "learning_rate": 1.1366991224384754e-05, + "loss": 1.2607, + "step": 16098 + }, + { + "epoch": 0.4726936402607317, + "grad_norm": 0.0, + "learning_rate": 1.1366049184875153e-05, + "loss": 1.2617, + "step": 16099 + }, + { + "epoch": 0.4727230019378707, + "grad_norm": 0.0, + "learning_rate": 1.1365107133012021e-05, + "loss": 1.3335, + "step": 16100 + }, + { + "epoch": 0.47275236361500966, + "grad_norm": 0.0, + "learning_rate": 1.1364165068803876e-05, + "loss": 1.3887, + "step": 16101 + }, + { + "epoch": 0.4727817252921487, + "grad_norm": 0.0, + "learning_rate": 1.1363222992259238e-05, + "loss": 1.292, + "step": 16102 + }, + { + "epoch": 0.4728110869692877, + "grad_norm": 0.0, + "learning_rate": 1.1362280903386622e-05, + "loss": 1.2778, + "step": 16103 + }, + { + "epoch": 0.47284044864642666, + "grad_norm": 0.0, + "learning_rate": 1.1361338802194555e-05, + "loss": 1.291, + "step": 16104 + }, + { + "epoch": 0.4728698103235657, + "grad_norm": 0.0, + "learning_rate": 1.1360396688691549e-05, + "loss": 1.4453, + "step": 16105 + }, + { + "epoch": 0.4728991720007047, + "grad_norm": 0.0, + "learning_rate": 1.1359454562886127e-05, + "loss": 1.4277, + "step": 16106 + }, + { + "epoch": 0.47292853367784365, + "grad_norm": 0.0, + "learning_rate": 1.1358512424786812e-05, + "loss": 1.3203, + "step": 16107 + }, + { + "epoch": 0.47295789535498267, + "grad_norm": 0.0, + "learning_rate": 1.1357570274402121e-05, + "loss": 1.3652, + "step": 16108 + }, + { + "epoch": 0.4729872570321217, + "grad_norm": 0.0, + "learning_rate": 1.1356628111740575e-05, + "loss": 1.2695, + "step": 16109 + }, + { + "epoch": 0.47301661870926065, + "grad_norm": 0.0, + "learning_rate": 1.1355685936810692e-05, + "loss": 1.29, + "step": 16110 + }, + { + "epoch": 0.47304598038639967, + "grad_norm": 0.0, + "learning_rate": 1.1354743749620993e-05, + "loss": 1.2754, + "step": 16111 + }, + { + "epoch": 0.4730753420635387, + "grad_norm": 0.0, + "learning_rate": 1.1353801550179999e-05, + "loss": 1.3691, + "step": 16112 + }, + { + "epoch": 0.47310470374067765, + "grad_norm": 0.0, + "learning_rate": 1.1352859338496237e-05, + "loss": 1.2422, + "step": 16113 + }, + { + "epoch": 0.47313406541781666, + "grad_norm": 0.0, + "learning_rate": 1.1351917114578215e-05, + "loss": 1.4092, + "step": 16114 + }, + { + "epoch": 0.4731634270949557, + "grad_norm": 0.0, + "learning_rate": 1.135097487843446e-05, + "loss": 1.2842, + "step": 16115 + }, + { + "epoch": 0.47319278877209464, + "grad_norm": 0.0, + "learning_rate": 1.1350032630073495e-05, + "loss": 1.3604, + "step": 16116 + }, + { + "epoch": 0.47322215044923366, + "grad_norm": 0.0, + "learning_rate": 1.1349090369503841e-05, + "loss": 1.2656, + "step": 16117 + }, + { + "epoch": 0.4732515121263727, + "grad_norm": 0.0, + "learning_rate": 1.1348148096734017e-05, + "loss": 1.3291, + "step": 16118 + }, + { + "epoch": 0.47328087380351164, + "grad_norm": 0.0, + "learning_rate": 1.1347205811772542e-05, + "loss": 1.3994, + "step": 16119 + }, + { + "epoch": 0.47331023548065065, + "grad_norm": 0.0, + "learning_rate": 1.1346263514627939e-05, + "loss": 1.3438, + "step": 16120 + }, + { + "epoch": 0.47333959715778967, + "grad_norm": 0.0, + "learning_rate": 1.1345321205308735e-05, + "loss": 1.2295, + "step": 16121 + }, + { + "epoch": 0.47336895883492863, + "grad_norm": 0.0, + "learning_rate": 1.1344378883823443e-05, + "loss": 1.2949, + "step": 16122 + }, + { + "epoch": 0.47339832051206765, + "grad_norm": 0.0, + "learning_rate": 1.1343436550180589e-05, + "loss": 1.293, + "step": 16123 + }, + { + "epoch": 0.47342768218920667, + "grad_norm": 0.0, + "learning_rate": 1.1342494204388695e-05, + "loss": 1.3984, + "step": 16124 + }, + { + "epoch": 0.47345704386634563, + "grad_norm": 0.0, + "learning_rate": 1.1341551846456283e-05, + "loss": 1.2988, + "step": 16125 + }, + { + "epoch": 0.47348640554348465, + "grad_norm": 0.0, + "learning_rate": 1.1340609476391873e-05, + "loss": 1.3115, + "step": 16126 + }, + { + "epoch": 0.47351576722062366, + "grad_norm": 0.0, + "learning_rate": 1.1339667094203988e-05, + "loss": 1.2964, + "step": 16127 + }, + { + "epoch": 0.4735451288977626, + "grad_norm": 0.0, + "learning_rate": 1.1338724699901152e-05, + "loss": 1.3643, + "step": 16128 + }, + { + "epoch": 0.47357449057490164, + "grad_norm": 0.0, + "learning_rate": 1.1337782293491884e-05, + "loss": 1.5303, + "step": 16129 + }, + { + "epoch": 0.47360385225204066, + "grad_norm": 0.0, + "learning_rate": 1.1336839874984709e-05, + "loss": 1.2852, + "step": 16130 + }, + { + "epoch": 0.4736332139291796, + "grad_norm": 0.0, + "learning_rate": 1.1335897444388146e-05, + "loss": 1.1963, + "step": 16131 + }, + { + "epoch": 0.47366257560631864, + "grad_norm": 0.0, + "learning_rate": 1.1334955001710721e-05, + "loss": 1.3057, + "step": 16132 + }, + { + "epoch": 0.47369193728345765, + "grad_norm": 0.0, + "learning_rate": 1.133401254696096e-05, + "loss": 1.3525, + "step": 16133 + }, + { + "epoch": 0.4737212989605966, + "grad_norm": 0.0, + "learning_rate": 1.1333070080147377e-05, + "loss": 1.293, + "step": 16134 + }, + { + "epoch": 0.47375066063773563, + "grad_norm": 0.0, + "learning_rate": 1.1332127601278503e-05, + "loss": 1.2183, + "step": 16135 + }, + { + "epoch": 0.47378002231487465, + "grad_norm": 0.0, + "learning_rate": 1.1331185110362855e-05, + "loss": 1.335, + "step": 16136 + }, + { + "epoch": 0.4738093839920136, + "grad_norm": 0.0, + "learning_rate": 1.1330242607408963e-05, + "loss": 1.3545, + "step": 16137 + }, + { + "epoch": 0.47383874566915263, + "grad_norm": 0.0, + "learning_rate": 1.1329300092425345e-05, + "loss": 1.5039, + "step": 16138 + }, + { + "epoch": 0.47386810734629164, + "grad_norm": 0.0, + "learning_rate": 1.1328357565420524e-05, + "loss": 1.3154, + "step": 16139 + }, + { + "epoch": 0.4738974690234306, + "grad_norm": 0.0, + "learning_rate": 1.1327415026403029e-05, + "loss": 1.4004, + "step": 16140 + }, + { + "epoch": 0.4739268307005696, + "grad_norm": 0.0, + "learning_rate": 1.1326472475381376e-05, + "loss": 1.3906, + "step": 16141 + }, + { + "epoch": 0.47395619237770864, + "grad_norm": 0.0, + "learning_rate": 1.1325529912364095e-05, + "loss": 1.2236, + "step": 16142 + }, + { + "epoch": 0.4739855540548476, + "grad_norm": 0.0, + "learning_rate": 1.1324587337359705e-05, + "loss": 1.4082, + "step": 16143 + }, + { + "epoch": 0.4740149157319866, + "grad_norm": 0.0, + "learning_rate": 1.1323644750376735e-05, + "loss": 1.3066, + "step": 16144 + }, + { + "epoch": 0.47404427740912564, + "grad_norm": 0.0, + "learning_rate": 1.1322702151423709e-05, + "loss": 1.2217, + "step": 16145 + }, + { + "epoch": 0.4740736390862646, + "grad_norm": 0.0, + "learning_rate": 1.1321759540509142e-05, + "loss": 1.3447, + "step": 16146 + }, + { + "epoch": 0.4741030007634036, + "grad_norm": 0.0, + "learning_rate": 1.1320816917641569e-05, + "loss": 1.3594, + "step": 16147 + }, + { + "epoch": 0.4741323624405426, + "grad_norm": 0.0, + "learning_rate": 1.131987428282951e-05, + "loss": 1.3838, + "step": 16148 + }, + { + "epoch": 0.4741617241176816, + "grad_norm": 0.0, + "learning_rate": 1.131893163608149e-05, + "loss": 1.2988, + "step": 16149 + }, + { + "epoch": 0.4741910857948206, + "grad_norm": 0.0, + "learning_rate": 1.1317988977406034e-05, + "loss": 1.3311, + "step": 16150 + }, + { + "epoch": 0.47422044747195957, + "grad_norm": 0.0, + "learning_rate": 1.1317046306811665e-05, + "loss": 1.1958, + "step": 16151 + }, + { + "epoch": 0.4742498091490986, + "grad_norm": 0.0, + "learning_rate": 1.131610362430691e-05, + "loss": 1.3965, + "step": 16152 + }, + { + "epoch": 0.4742791708262376, + "grad_norm": 0.0, + "learning_rate": 1.1315160929900288e-05, + "loss": 1.375, + "step": 16153 + }, + { + "epoch": 0.47430853250337657, + "grad_norm": 0.0, + "learning_rate": 1.1314218223600335e-05, + "loss": 1.3926, + "step": 16154 + }, + { + "epoch": 0.4743378941805156, + "grad_norm": 0.0, + "learning_rate": 1.1313275505415568e-05, + "loss": 1.3613, + "step": 16155 + }, + { + "epoch": 0.4743672558576546, + "grad_norm": 0.0, + "learning_rate": 1.1312332775354513e-05, + "loss": 1.3662, + "step": 16156 + }, + { + "epoch": 0.47439661753479356, + "grad_norm": 0.0, + "learning_rate": 1.13113900334257e-05, + "loss": 1.4121, + "step": 16157 + }, + { + "epoch": 0.4744259792119326, + "grad_norm": 0.0, + "learning_rate": 1.1310447279637651e-05, + "loss": 1.2217, + "step": 16158 + }, + { + "epoch": 0.4744553408890716, + "grad_norm": 0.0, + "learning_rate": 1.1309504513998888e-05, + "loss": 1.2324, + "step": 16159 + }, + { + "epoch": 0.47448470256621056, + "grad_norm": 0.0, + "learning_rate": 1.130856173651794e-05, + "loss": 1.3281, + "step": 16160 + }, + { + "epoch": 0.4745140642433496, + "grad_norm": 0.0, + "learning_rate": 1.1307618947203336e-05, + "loss": 1.2832, + "step": 16161 + }, + { + "epoch": 0.4745434259204886, + "grad_norm": 0.0, + "learning_rate": 1.13066761460636e-05, + "loss": 1.4639, + "step": 16162 + }, + { + "epoch": 0.47457278759762755, + "grad_norm": 0.0, + "learning_rate": 1.1305733333107254e-05, + "loss": 1.4443, + "step": 16163 + }, + { + "epoch": 0.47460214927476657, + "grad_norm": 0.0, + "learning_rate": 1.1304790508342827e-05, + "loss": 1.1738, + "step": 16164 + }, + { + "epoch": 0.4746315109519056, + "grad_norm": 0.0, + "learning_rate": 1.1303847671778844e-05, + "loss": 1.3604, + "step": 16165 + }, + { + "epoch": 0.47466087262904455, + "grad_norm": 0.0, + "learning_rate": 1.1302904823423838e-05, + "loss": 1.2412, + "step": 16166 + }, + { + "epoch": 0.47469023430618357, + "grad_norm": 0.0, + "learning_rate": 1.1301961963286328e-05, + "loss": 1.4512, + "step": 16167 + }, + { + "epoch": 0.4747195959833226, + "grad_norm": 0.0, + "learning_rate": 1.130101909137484e-05, + "loss": 1.4639, + "step": 16168 + }, + { + "epoch": 0.47474895766046155, + "grad_norm": 0.0, + "learning_rate": 1.1300076207697903e-05, + "loss": 1.2217, + "step": 16169 + }, + { + "epoch": 0.47477831933760056, + "grad_norm": 0.0, + "learning_rate": 1.1299133312264048e-05, + "loss": 1.3154, + "step": 16170 + }, + { + "epoch": 0.4748076810147396, + "grad_norm": 0.0, + "learning_rate": 1.1298190405081795e-05, + "loss": 1.3711, + "step": 16171 + }, + { + "epoch": 0.47483704269187854, + "grad_norm": 0.0, + "learning_rate": 1.1297247486159672e-05, + "loss": 1.4102, + "step": 16172 + }, + { + "epoch": 0.47486640436901756, + "grad_norm": 0.0, + "learning_rate": 1.1296304555506211e-05, + "loss": 1.3848, + "step": 16173 + }, + { + "epoch": 0.4748957660461566, + "grad_norm": 0.0, + "learning_rate": 1.1295361613129934e-05, + "loss": 1.457, + "step": 16174 + }, + { + "epoch": 0.47492512772329554, + "grad_norm": 0.0, + "learning_rate": 1.129441865903937e-05, + "loss": 1.4639, + "step": 16175 + }, + { + "epoch": 0.47495448940043455, + "grad_norm": 0.0, + "learning_rate": 1.1293475693243048e-05, + "loss": 1.3672, + "step": 16176 + }, + { + "epoch": 0.47498385107757357, + "grad_norm": 0.0, + "learning_rate": 1.1292532715749493e-05, + "loss": 1.3438, + "step": 16177 + }, + { + "epoch": 0.47501321275471253, + "grad_norm": 0.0, + "learning_rate": 1.1291589726567234e-05, + "loss": 1.3428, + "step": 16178 + }, + { + "epoch": 0.47504257443185155, + "grad_norm": 0.0, + "learning_rate": 1.1290646725704799e-05, + "loss": 1.3467, + "step": 16179 + }, + { + "epoch": 0.47507193610899057, + "grad_norm": 0.0, + "learning_rate": 1.1289703713170714e-05, + "loss": 1.2764, + "step": 16180 + }, + { + "epoch": 0.47510129778612953, + "grad_norm": 0.0, + "learning_rate": 1.1288760688973508e-05, + "loss": 1.3267, + "step": 16181 + }, + { + "epoch": 0.47513065946326855, + "grad_norm": 0.0, + "learning_rate": 1.128781765312171e-05, + "loss": 1.2852, + "step": 16182 + }, + { + "epoch": 0.47516002114040756, + "grad_norm": 0.0, + "learning_rate": 1.1286874605623847e-05, + "loss": 1.3037, + "step": 16183 + }, + { + "epoch": 0.4751893828175465, + "grad_norm": 0.0, + "learning_rate": 1.1285931546488447e-05, + "loss": 1.2515, + "step": 16184 + }, + { + "epoch": 0.47521874449468554, + "grad_norm": 0.0, + "learning_rate": 1.1284988475724037e-05, + "loss": 1.3037, + "step": 16185 + }, + { + "epoch": 0.47524810617182456, + "grad_norm": 0.0, + "learning_rate": 1.1284045393339152e-05, + "loss": 1.4307, + "step": 16186 + }, + { + "epoch": 0.4752774678489635, + "grad_norm": 0.0, + "learning_rate": 1.1283102299342312e-05, + "loss": 1.3936, + "step": 16187 + }, + { + "epoch": 0.47530682952610254, + "grad_norm": 0.0, + "learning_rate": 1.1282159193742049e-05, + "loss": 1.3564, + "step": 16188 + }, + { + "epoch": 0.47533619120324155, + "grad_norm": 0.0, + "learning_rate": 1.1281216076546893e-05, + "loss": 1.3594, + "step": 16189 + }, + { + "epoch": 0.4753655528803805, + "grad_norm": 0.0, + "learning_rate": 1.128027294776537e-05, + "loss": 1.3389, + "step": 16190 + }, + { + "epoch": 0.47539491455751953, + "grad_norm": 0.0, + "learning_rate": 1.1279329807406015e-05, + "loss": 1.2622, + "step": 16191 + }, + { + "epoch": 0.47542427623465855, + "grad_norm": 0.0, + "learning_rate": 1.127838665547735e-05, + "loss": 1.292, + "step": 16192 + }, + { + "epoch": 0.4754536379117975, + "grad_norm": 0.0, + "learning_rate": 1.1277443491987906e-05, + "loss": 1.2432, + "step": 16193 + }, + { + "epoch": 0.47548299958893653, + "grad_norm": 0.0, + "learning_rate": 1.1276500316946217e-05, + "loss": 1.2314, + "step": 16194 + }, + { + "epoch": 0.47551236126607554, + "grad_norm": 0.0, + "learning_rate": 1.1275557130360807e-05, + "loss": 1.4014, + "step": 16195 + }, + { + "epoch": 0.4755417229432145, + "grad_norm": 0.0, + "learning_rate": 1.1274613932240204e-05, + "loss": 1.3018, + "step": 16196 + }, + { + "epoch": 0.4755710846203535, + "grad_norm": 0.0, + "learning_rate": 1.1273670722592945e-05, + "loss": 1.2324, + "step": 16197 + }, + { + "epoch": 0.4756004462974925, + "grad_norm": 0.0, + "learning_rate": 1.1272727501427554e-05, + "loss": 1.3325, + "step": 16198 + }, + { + "epoch": 0.4756298079746315, + "grad_norm": 0.0, + "learning_rate": 1.1271784268752564e-05, + "loss": 1.2656, + "step": 16199 + }, + { + "epoch": 0.4756591696517705, + "grad_norm": 0.0, + "learning_rate": 1.12708410245765e-05, + "loss": 1.3311, + "step": 16200 + }, + { + "epoch": 0.4756885313289095, + "grad_norm": 0.0, + "learning_rate": 1.1269897768907895e-05, + "loss": 1.3584, + "step": 16201 + }, + { + "epoch": 0.4757178930060485, + "grad_norm": 0.0, + "learning_rate": 1.126895450175528e-05, + "loss": 1.3657, + "step": 16202 + }, + { + "epoch": 0.4757472546831875, + "grad_norm": 0.0, + "learning_rate": 1.1268011223127187e-05, + "loss": 1.2959, + "step": 16203 + }, + { + "epoch": 0.4757766163603265, + "grad_norm": 0.0, + "learning_rate": 1.126706793303214e-05, + "loss": 1.4014, + "step": 16204 + }, + { + "epoch": 0.4758059780374655, + "grad_norm": 0.0, + "learning_rate": 1.1266124631478676e-05, + "loss": 1.2666, + "step": 16205 + }, + { + "epoch": 0.4758353397146045, + "grad_norm": 0.0, + "learning_rate": 1.1265181318475323e-05, + "loss": 1.2959, + "step": 16206 + }, + { + "epoch": 0.47586470139174347, + "grad_norm": 0.0, + "learning_rate": 1.1264237994030612e-05, + "loss": 1.4521, + "step": 16207 + }, + { + "epoch": 0.4758940630688825, + "grad_norm": 0.0, + "learning_rate": 1.126329465815307e-05, + "loss": 1.3467, + "step": 16208 + }, + { + "epoch": 0.4759234247460215, + "grad_norm": 0.0, + "learning_rate": 1.1262351310851232e-05, + "loss": 1.3223, + "step": 16209 + }, + { + "epoch": 0.47595278642316047, + "grad_norm": 0.0, + "learning_rate": 1.1261407952133627e-05, + "loss": 1.4346, + "step": 16210 + }, + { + "epoch": 0.4759821481002995, + "grad_norm": 0.0, + "learning_rate": 1.126046458200879e-05, + "loss": 1.2852, + "step": 16211 + }, + { + "epoch": 0.4760115097774385, + "grad_norm": 0.0, + "learning_rate": 1.1259521200485247e-05, + "loss": 1.3486, + "step": 16212 + }, + { + "epoch": 0.47604087145457746, + "grad_norm": 0.0, + "learning_rate": 1.1258577807571528e-05, + "loss": 1.3486, + "step": 16213 + }, + { + "epoch": 0.4760702331317165, + "grad_norm": 0.0, + "learning_rate": 1.1257634403276173e-05, + "loss": 1.3076, + "step": 16214 + }, + { + "epoch": 0.4760995948088555, + "grad_norm": 0.0, + "learning_rate": 1.1256690987607705e-05, + "loss": 1.2705, + "step": 16215 + }, + { + "epoch": 0.47612895648599446, + "grad_norm": 0.0, + "learning_rate": 1.125574756057466e-05, + "loss": 1.3926, + "step": 16216 + }, + { + "epoch": 0.4761583181631335, + "grad_norm": 0.0, + "learning_rate": 1.1254804122185567e-05, + "loss": 1.3174, + "step": 16217 + }, + { + "epoch": 0.4761876798402725, + "grad_norm": 0.0, + "learning_rate": 1.125386067244896e-05, + "loss": 1.1621, + "step": 16218 + }, + { + "epoch": 0.47621704151741145, + "grad_norm": 0.0, + "learning_rate": 1.1252917211373369e-05, + "loss": 1.3623, + "step": 16219 + }, + { + "epoch": 0.47624640319455047, + "grad_norm": 0.0, + "learning_rate": 1.1251973738967326e-05, + "loss": 1.2959, + "step": 16220 + }, + { + "epoch": 0.4762757648716895, + "grad_norm": 0.0, + "learning_rate": 1.1251030255239364e-05, + "loss": 1.3242, + "step": 16221 + }, + { + "epoch": 0.47630512654882845, + "grad_norm": 0.0, + "learning_rate": 1.1250086760198015e-05, + "loss": 1.3604, + "step": 16222 + }, + { + "epoch": 0.47633448822596747, + "grad_norm": 0.0, + "learning_rate": 1.1249143253851813e-05, + "loss": 1.2686, + "step": 16223 + }, + { + "epoch": 0.4763638499031065, + "grad_norm": 0.0, + "learning_rate": 1.1248199736209288e-05, + "loss": 1.4541, + "step": 16224 + }, + { + "epoch": 0.47639321158024545, + "grad_norm": 0.0, + "learning_rate": 1.1247256207278972e-05, + "loss": 1.3584, + "step": 16225 + }, + { + "epoch": 0.47642257325738446, + "grad_norm": 0.0, + "learning_rate": 1.12463126670694e-05, + "loss": 1.2578, + "step": 16226 + }, + { + "epoch": 0.4764519349345235, + "grad_norm": 0.0, + "learning_rate": 1.1245369115589102e-05, + "loss": 1.373, + "step": 16227 + }, + { + "epoch": 0.47648129661166244, + "grad_norm": 0.0, + "learning_rate": 1.1244425552846616e-05, + "loss": 1.2881, + "step": 16228 + }, + { + "epoch": 0.47651065828880146, + "grad_norm": 0.0, + "learning_rate": 1.1243481978850466e-05, + "loss": 1.2842, + "step": 16229 + }, + { + "epoch": 0.4765400199659405, + "grad_norm": 0.0, + "learning_rate": 1.1242538393609192e-05, + "loss": 1.2749, + "step": 16230 + }, + { + "epoch": 0.47656938164307944, + "grad_norm": 0.0, + "learning_rate": 1.1241594797131324e-05, + "loss": 1.3389, + "step": 16231 + }, + { + "epoch": 0.47659874332021845, + "grad_norm": 0.0, + "learning_rate": 1.12406511894254e-05, + "loss": 1.2808, + "step": 16232 + }, + { + "epoch": 0.47662810499735747, + "grad_norm": 0.0, + "learning_rate": 1.1239707570499945e-05, + "loss": 1.3818, + "step": 16233 + }, + { + "epoch": 0.47665746667449643, + "grad_norm": 0.0, + "learning_rate": 1.1238763940363498e-05, + "loss": 1.3018, + "step": 16234 + }, + { + "epoch": 0.47668682835163545, + "grad_norm": 0.0, + "learning_rate": 1.1237820299024593e-05, + "loss": 1.2334, + "step": 16235 + }, + { + "epoch": 0.47671619002877447, + "grad_norm": 0.0, + "learning_rate": 1.1236876646491763e-05, + "loss": 1.3652, + "step": 16236 + }, + { + "epoch": 0.47674555170591343, + "grad_norm": 0.0, + "learning_rate": 1.1235932982773539e-05, + "loss": 1.3252, + "step": 16237 + }, + { + "epoch": 0.47677491338305245, + "grad_norm": 0.0, + "learning_rate": 1.1234989307878456e-05, + "loss": 1.3057, + "step": 16238 + }, + { + "epoch": 0.47680427506019146, + "grad_norm": 0.0, + "learning_rate": 1.1234045621815048e-05, + "loss": 1.4395, + "step": 16239 + }, + { + "epoch": 0.4768336367373304, + "grad_norm": 0.0, + "learning_rate": 1.1233101924591851e-05, + "loss": 1.3379, + "step": 16240 + }, + { + "epoch": 0.47686299841446944, + "grad_norm": 0.0, + "learning_rate": 1.1232158216217397e-05, + "loss": 1.2002, + "step": 16241 + }, + { + "epoch": 0.47689236009160846, + "grad_norm": 0.0, + "learning_rate": 1.123121449670022e-05, + "loss": 1.3135, + "step": 16242 + }, + { + "epoch": 0.4769217217687474, + "grad_norm": 0.0, + "learning_rate": 1.1230270766048853e-05, + "loss": 1.3301, + "step": 16243 + }, + { + "epoch": 0.47695108344588644, + "grad_norm": 0.0, + "learning_rate": 1.1229327024271839e-05, + "loss": 1.3457, + "step": 16244 + }, + { + "epoch": 0.47698044512302545, + "grad_norm": 0.0, + "learning_rate": 1.12283832713777e-05, + "loss": 1.4541, + "step": 16245 + }, + { + "epoch": 0.4770098068001644, + "grad_norm": 0.0, + "learning_rate": 1.122743950737498e-05, + "loss": 1.2754, + "step": 16246 + }, + { + "epoch": 0.47703916847730343, + "grad_norm": 0.0, + "learning_rate": 1.1226495732272207e-05, + "loss": 1.2202, + "step": 16247 + }, + { + "epoch": 0.47706853015444245, + "grad_norm": 0.0, + "learning_rate": 1.1225551946077922e-05, + "loss": 1.3086, + "step": 16248 + }, + { + "epoch": 0.4770978918315814, + "grad_norm": 0.0, + "learning_rate": 1.1224608148800656e-05, + "loss": 1.3389, + "step": 16249 + }, + { + "epoch": 0.47712725350872043, + "grad_norm": 0.0, + "learning_rate": 1.1223664340448942e-05, + "loss": 1.3008, + "step": 16250 + }, + { + "epoch": 0.4771566151858594, + "grad_norm": 0.0, + "learning_rate": 1.1222720521031321e-05, + "loss": 1.2773, + "step": 16251 + }, + { + "epoch": 0.4771859768629984, + "grad_norm": 0.0, + "learning_rate": 1.1221776690556325e-05, + "loss": 1.3633, + "step": 16252 + }, + { + "epoch": 0.4772153385401374, + "grad_norm": 0.0, + "learning_rate": 1.1220832849032487e-05, + "loss": 1.4482, + "step": 16253 + }, + { + "epoch": 0.4772447002172764, + "grad_norm": 0.0, + "learning_rate": 1.1219888996468346e-05, + "loss": 1.2363, + "step": 16254 + }, + { + "epoch": 0.4772740618944154, + "grad_norm": 0.0, + "learning_rate": 1.1218945132872436e-05, + "loss": 1.3164, + "step": 16255 + }, + { + "epoch": 0.4773034235715544, + "grad_norm": 0.0, + "learning_rate": 1.1218001258253298e-05, + "loss": 1.4727, + "step": 16256 + }, + { + "epoch": 0.4773327852486934, + "grad_norm": 0.0, + "learning_rate": 1.121705737261946e-05, + "loss": 1.4258, + "step": 16257 + }, + { + "epoch": 0.4773621469258324, + "grad_norm": 0.0, + "learning_rate": 1.1216113475979458e-05, + "loss": 1.3105, + "step": 16258 + }, + { + "epoch": 0.4773915086029714, + "grad_norm": 0.0, + "learning_rate": 1.121516956834183e-05, + "loss": 1.2832, + "step": 16259 + }, + { + "epoch": 0.4774208702801104, + "grad_norm": 0.0, + "learning_rate": 1.1214225649715116e-05, + "loss": 1.2129, + "step": 16260 + }, + { + "epoch": 0.4774502319572494, + "grad_norm": 0.0, + "learning_rate": 1.1213281720107846e-05, + "loss": 1.2197, + "step": 16261 + }, + { + "epoch": 0.4774795936343884, + "grad_norm": 0.0, + "learning_rate": 1.1212337779528559e-05, + "loss": 1.3545, + "step": 16262 + }, + { + "epoch": 0.47750895531152737, + "grad_norm": 0.0, + "learning_rate": 1.121139382798579e-05, + "loss": 1.3945, + "step": 16263 + }, + { + "epoch": 0.4775383169886664, + "grad_norm": 0.0, + "learning_rate": 1.121044986548808e-05, + "loss": 1.2754, + "step": 16264 + }, + { + "epoch": 0.4775676786658054, + "grad_norm": 0.0, + "learning_rate": 1.1209505892043957e-05, + "loss": 1.2271, + "step": 16265 + }, + { + "epoch": 0.47759704034294437, + "grad_norm": 0.0, + "learning_rate": 1.1208561907661968e-05, + "loss": 1.3242, + "step": 16266 + }, + { + "epoch": 0.4776264020200834, + "grad_norm": 0.0, + "learning_rate": 1.1207617912350638e-05, + "loss": 1.3115, + "step": 16267 + }, + { + "epoch": 0.4776557636972224, + "grad_norm": 0.0, + "learning_rate": 1.1206673906118515e-05, + "loss": 1.2227, + "step": 16268 + }, + { + "epoch": 0.47768512537436136, + "grad_norm": 0.0, + "learning_rate": 1.1205729888974129e-05, + "loss": 1.3574, + "step": 16269 + }, + { + "epoch": 0.4777144870515004, + "grad_norm": 0.0, + "learning_rate": 1.1204785860926018e-05, + "loss": 1.3135, + "step": 16270 + }, + { + "epoch": 0.4777438487286394, + "grad_norm": 0.0, + "learning_rate": 1.120384182198272e-05, + "loss": 1.2783, + "step": 16271 + }, + { + "epoch": 0.47777321040577836, + "grad_norm": 0.0, + "learning_rate": 1.1202897772152774e-05, + "loss": 1.4033, + "step": 16272 + }, + { + "epoch": 0.4778025720829174, + "grad_norm": 0.0, + "learning_rate": 1.1201953711444713e-05, + "loss": 1.4639, + "step": 16273 + }, + { + "epoch": 0.4778319337600564, + "grad_norm": 0.0, + "learning_rate": 1.1201009639867077e-05, + "loss": 1.3008, + "step": 16274 + }, + { + "epoch": 0.47786129543719535, + "grad_norm": 0.0, + "learning_rate": 1.1200065557428404e-05, + "loss": 1.313, + "step": 16275 + }, + { + "epoch": 0.47789065711433437, + "grad_norm": 0.0, + "learning_rate": 1.1199121464137233e-05, + "loss": 1.1973, + "step": 16276 + }, + { + "epoch": 0.4779200187914734, + "grad_norm": 0.0, + "learning_rate": 1.1198177360002099e-05, + "loss": 1.3936, + "step": 16277 + }, + { + "epoch": 0.47794938046861235, + "grad_norm": 0.0, + "learning_rate": 1.1197233245031538e-05, + "loss": 1.3213, + "step": 16278 + }, + { + "epoch": 0.47797874214575137, + "grad_norm": 0.0, + "learning_rate": 1.119628911923409e-05, + "loss": 1.2393, + "step": 16279 + }, + { + "epoch": 0.4780081038228904, + "grad_norm": 0.0, + "learning_rate": 1.1195344982618292e-05, + "loss": 1.2266, + "step": 16280 + }, + { + "epoch": 0.47803746550002935, + "grad_norm": 0.0, + "learning_rate": 1.1194400835192686e-05, + "loss": 1.3232, + "step": 16281 + }, + { + "epoch": 0.47806682717716836, + "grad_norm": 0.0, + "learning_rate": 1.1193456676965806e-05, + "loss": 1.1055, + "step": 16282 + }, + { + "epoch": 0.4780961888543074, + "grad_norm": 0.0, + "learning_rate": 1.1192512507946191e-05, + "loss": 1.2393, + "step": 16283 + }, + { + "epoch": 0.47812555053144634, + "grad_norm": 0.0, + "learning_rate": 1.1191568328142384e-05, + "loss": 1.2939, + "step": 16284 + }, + { + "epoch": 0.47815491220858536, + "grad_norm": 0.0, + "learning_rate": 1.1190624137562918e-05, + "loss": 1.3506, + "step": 16285 + }, + { + "epoch": 0.4781842738857244, + "grad_norm": 0.0, + "learning_rate": 1.1189679936216331e-05, + "loss": 1.3115, + "step": 16286 + }, + { + "epoch": 0.47821363556286334, + "grad_norm": 0.0, + "learning_rate": 1.1188735724111161e-05, + "loss": 1.4287, + "step": 16287 + }, + { + "epoch": 0.47824299724000235, + "grad_norm": 0.0, + "learning_rate": 1.1187791501255953e-05, + "loss": 1.1846, + "step": 16288 + }, + { + "epoch": 0.47827235891714137, + "grad_norm": 0.0, + "learning_rate": 1.1186847267659245e-05, + "loss": 1.293, + "step": 16289 + }, + { + "epoch": 0.47830172059428033, + "grad_norm": 0.0, + "learning_rate": 1.1185903023329567e-05, + "loss": 1.2285, + "step": 16290 + }, + { + "epoch": 0.47833108227141935, + "grad_norm": 0.0, + "learning_rate": 1.1184958768275467e-05, + "loss": 1.3301, + "step": 16291 + }, + { + "epoch": 0.47836044394855837, + "grad_norm": 0.0, + "learning_rate": 1.118401450250548e-05, + "loss": 1.3525, + "step": 16292 + }, + { + "epoch": 0.47838980562569733, + "grad_norm": 0.0, + "learning_rate": 1.118307022602815e-05, + "loss": 1.3242, + "step": 16293 + }, + { + "epoch": 0.47841916730283635, + "grad_norm": 0.0, + "learning_rate": 1.1182125938852011e-05, + "loss": 1.2959, + "step": 16294 + }, + { + "epoch": 0.47844852897997536, + "grad_norm": 0.0, + "learning_rate": 1.1181181640985605e-05, + "loss": 1.3633, + "step": 16295 + }, + { + "epoch": 0.4784778906571143, + "grad_norm": 0.0, + "learning_rate": 1.1180237332437472e-05, + "loss": 1.2432, + "step": 16296 + }, + { + "epoch": 0.47850725233425334, + "grad_norm": 0.0, + "learning_rate": 1.117929301321615e-05, + "loss": 1.0645, + "step": 16297 + }, + { + "epoch": 0.47853661401139236, + "grad_norm": 0.0, + "learning_rate": 1.1178348683330178e-05, + "loss": 1.3633, + "step": 16298 + }, + { + "epoch": 0.4785659756885313, + "grad_norm": 0.0, + "learning_rate": 1.1177404342788096e-05, + "loss": 1.418, + "step": 16299 + }, + { + "epoch": 0.47859533736567034, + "grad_norm": 0.0, + "learning_rate": 1.1176459991598446e-05, + "loss": 1.3838, + "step": 16300 + }, + { + "epoch": 0.4786246990428093, + "grad_norm": 0.0, + "learning_rate": 1.1175515629769768e-05, + "loss": 1.2773, + "step": 16301 + }, + { + "epoch": 0.4786540607199483, + "grad_norm": 0.0, + "learning_rate": 1.11745712573106e-05, + "loss": 1.269, + "step": 16302 + }, + { + "epoch": 0.47868342239708733, + "grad_norm": 0.0, + "learning_rate": 1.1173626874229483e-05, + "loss": 1.3594, + "step": 16303 + }, + { + "epoch": 0.4787127840742263, + "grad_norm": 0.0, + "learning_rate": 1.1172682480534957e-05, + "loss": 1.2607, + "step": 16304 + }, + { + "epoch": 0.4787421457513653, + "grad_norm": 0.0, + "learning_rate": 1.1171738076235568e-05, + "loss": 1.2549, + "step": 16305 + }, + { + "epoch": 0.47877150742850433, + "grad_norm": 0.0, + "learning_rate": 1.117079366133985e-05, + "loss": 1.3555, + "step": 16306 + }, + { + "epoch": 0.4788008691056433, + "grad_norm": 0.0, + "learning_rate": 1.1169849235856341e-05, + "loss": 1.3828, + "step": 16307 + }, + { + "epoch": 0.4788302307827823, + "grad_norm": 0.0, + "learning_rate": 1.1168904799793588e-05, + "loss": 1.2085, + "step": 16308 + }, + { + "epoch": 0.4788595924599213, + "grad_norm": 0.0, + "learning_rate": 1.116796035316013e-05, + "loss": 1.248, + "step": 16309 + }, + { + "epoch": 0.4788889541370603, + "grad_norm": 0.0, + "learning_rate": 1.1167015895964505e-05, + "loss": 1.2656, + "step": 16310 + }, + { + "epoch": 0.4789183158141993, + "grad_norm": 0.0, + "learning_rate": 1.1166071428215257e-05, + "loss": 1.2222, + "step": 16311 + }, + { + "epoch": 0.4789476774913383, + "grad_norm": 0.0, + "learning_rate": 1.1165126949920928e-05, + "loss": 1.4521, + "step": 16312 + }, + { + "epoch": 0.4789770391684773, + "grad_norm": 0.0, + "learning_rate": 1.1164182461090059e-05, + "loss": 1.3613, + "step": 16313 + }, + { + "epoch": 0.4790064008456163, + "grad_norm": 0.0, + "learning_rate": 1.1163237961731186e-05, + "loss": 1.4004, + "step": 16314 + }, + { + "epoch": 0.4790357625227553, + "grad_norm": 0.0, + "learning_rate": 1.1162293451852859e-05, + "loss": 1.3945, + "step": 16315 + }, + { + "epoch": 0.4790651241998943, + "grad_norm": 0.0, + "learning_rate": 1.116134893146361e-05, + "loss": 1.2266, + "step": 16316 + }, + { + "epoch": 0.4790944858770333, + "grad_norm": 0.0, + "learning_rate": 1.1160404400571988e-05, + "loss": 1.501, + "step": 16317 + }, + { + "epoch": 0.4791238475541723, + "grad_norm": 0.0, + "learning_rate": 1.115945985918653e-05, + "loss": 1.374, + "step": 16318 + }, + { + "epoch": 0.4791532092313113, + "grad_norm": 0.0, + "learning_rate": 1.115851530731578e-05, + "loss": 1.1851, + "step": 16319 + }, + { + "epoch": 0.4791825709084503, + "grad_norm": 0.0, + "learning_rate": 1.115757074496828e-05, + "loss": 1.3545, + "step": 16320 + }, + { + "epoch": 0.4792119325855893, + "grad_norm": 0.0, + "learning_rate": 1.1156626172152569e-05, + "loss": 1.2812, + "step": 16321 + }, + { + "epoch": 0.47924129426272827, + "grad_norm": 0.0, + "learning_rate": 1.1155681588877197e-05, + "loss": 1.3779, + "step": 16322 + }, + { + "epoch": 0.4792706559398673, + "grad_norm": 0.0, + "learning_rate": 1.1154736995150695e-05, + "loss": 1.5244, + "step": 16323 + }, + { + "epoch": 0.4793000176170063, + "grad_norm": 0.0, + "learning_rate": 1.115379239098161e-05, + "loss": 1.2656, + "step": 16324 + }, + { + "epoch": 0.47932937929414526, + "grad_norm": 0.0, + "learning_rate": 1.115284777637849e-05, + "loss": 1.2588, + "step": 16325 + }, + { + "epoch": 0.4793587409712843, + "grad_norm": 0.0, + "learning_rate": 1.1151903151349872e-05, + "loss": 1.2969, + "step": 16326 + }, + { + "epoch": 0.4793881026484233, + "grad_norm": 0.0, + "learning_rate": 1.1150958515904296e-05, + "loss": 1.4141, + "step": 16327 + }, + { + "epoch": 0.47941746432556226, + "grad_norm": 0.0, + "learning_rate": 1.1150013870050304e-05, + "loss": 1.3135, + "step": 16328 + }, + { + "epoch": 0.4794468260027013, + "grad_norm": 0.0, + "learning_rate": 1.1149069213796447e-05, + "loss": 1.3203, + "step": 16329 + }, + { + "epoch": 0.4794761876798403, + "grad_norm": 0.0, + "learning_rate": 1.1148124547151263e-05, + "loss": 1.4004, + "step": 16330 + }, + { + "epoch": 0.47950554935697925, + "grad_norm": 0.0, + "learning_rate": 1.1147179870123292e-05, + "loss": 1.3242, + "step": 16331 + }, + { + "epoch": 0.47953491103411827, + "grad_norm": 0.0, + "learning_rate": 1.1146235182721081e-05, + "loss": 1.3516, + "step": 16332 + }, + { + "epoch": 0.4795642727112573, + "grad_norm": 0.0, + "learning_rate": 1.114529048495317e-05, + "loss": 1.3789, + "step": 16333 + }, + { + "epoch": 0.47959363438839625, + "grad_norm": 0.0, + "learning_rate": 1.1144345776828108e-05, + "loss": 1.3428, + "step": 16334 + }, + { + "epoch": 0.47962299606553527, + "grad_norm": 0.0, + "learning_rate": 1.1143401058354433e-05, + "loss": 1.2578, + "step": 16335 + }, + { + "epoch": 0.4796523577426743, + "grad_norm": 0.0, + "learning_rate": 1.1142456329540687e-05, + "loss": 1.4346, + "step": 16336 + }, + { + "epoch": 0.47968171941981325, + "grad_norm": 0.0, + "learning_rate": 1.1141511590395415e-05, + "loss": 1.3574, + "step": 16337 + }, + { + "epoch": 0.47971108109695226, + "grad_norm": 0.0, + "learning_rate": 1.1140566840927164e-05, + "loss": 1.3271, + "step": 16338 + }, + { + "epoch": 0.4797404427740913, + "grad_norm": 0.0, + "learning_rate": 1.1139622081144475e-05, + "loss": 1.373, + "step": 16339 + }, + { + "epoch": 0.47976980445123024, + "grad_norm": 0.0, + "learning_rate": 1.113867731105589e-05, + "loss": 1.3623, + "step": 16340 + }, + { + "epoch": 0.47979916612836926, + "grad_norm": 0.0, + "learning_rate": 1.1137732530669954e-05, + "loss": 1.4668, + "step": 16341 + }, + { + "epoch": 0.4798285278055083, + "grad_norm": 0.0, + "learning_rate": 1.1136787739995214e-05, + "loss": 1.2573, + "step": 16342 + }, + { + "epoch": 0.47985788948264724, + "grad_norm": 0.0, + "learning_rate": 1.113584293904021e-05, + "loss": 1.1553, + "step": 16343 + }, + { + "epoch": 0.47988725115978625, + "grad_norm": 0.0, + "learning_rate": 1.113489812781349e-05, + "loss": 1.3076, + "step": 16344 + }, + { + "epoch": 0.47991661283692527, + "grad_norm": 0.0, + "learning_rate": 1.113395330632359e-05, + "loss": 1.3555, + "step": 16345 + }, + { + "epoch": 0.47994597451406423, + "grad_norm": 0.0, + "learning_rate": 1.1133008474579064e-05, + "loss": 1.3145, + "step": 16346 + }, + { + "epoch": 0.47997533619120325, + "grad_norm": 0.0, + "learning_rate": 1.113206363258845e-05, + "loss": 1.3223, + "step": 16347 + }, + { + "epoch": 0.48000469786834227, + "grad_norm": 0.0, + "learning_rate": 1.1131118780360296e-05, + "loss": 1.4717, + "step": 16348 + }, + { + "epoch": 0.48003405954548123, + "grad_norm": 0.0, + "learning_rate": 1.1130173917903144e-05, + "loss": 1.2588, + "step": 16349 + }, + { + "epoch": 0.48006342122262025, + "grad_norm": 0.0, + "learning_rate": 1.112922904522554e-05, + "loss": 1.3208, + "step": 16350 + }, + { + "epoch": 0.4800927828997592, + "grad_norm": 0.0, + "learning_rate": 1.1128284162336029e-05, + "loss": 1.3506, + "step": 16351 + }, + { + "epoch": 0.4801221445768982, + "grad_norm": 0.0, + "learning_rate": 1.1127339269243155e-05, + "loss": 1.2046, + "step": 16352 + }, + { + "epoch": 0.48015150625403724, + "grad_norm": 0.0, + "learning_rate": 1.1126394365955465e-05, + "loss": 1.3799, + "step": 16353 + }, + { + "epoch": 0.4801808679311762, + "grad_norm": 0.0, + "learning_rate": 1.11254494524815e-05, + "loss": 1.3154, + "step": 16354 + }, + { + "epoch": 0.4802102296083152, + "grad_norm": 0.0, + "learning_rate": 1.1124504528829812e-05, + "loss": 1.4014, + "step": 16355 + }, + { + "epoch": 0.48023959128545424, + "grad_norm": 0.0, + "learning_rate": 1.1123559595008937e-05, + "loss": 1.186, + "step": 16356 + }, + { + "epoch": 0.4802689529625932, + "grad_norm": 0.0, + "learning_rate": 1.1122614651027423e-05, + "loss": 1.4043, + "step": 16357 + }, + { + "epoch": 0.4802983146397322, + "grad_norm": 0.0, + "learning_rate": 1.112166969689382e-05, + "loss": 1.3848, + "step": 16358 + }, + { + "epoch": 0.48032767631687123, + "grad_norm": 0.0, + "learning_rate": 1.1120724732616673e-05, + "loss": 1.1675, + "step": 16359 + }, + { + "epoch": 0.4803570379940102, + "grad_norm": 0.0, + "learning_rate": 1.1119779758204521e-05, + "loss": 1.4453, + "step": 16360 + }, + { + "epoch": 0.4803863996711492, + "grad_norm": 0.0, + "learning_rate": 1.1118834773665917e-05, + "loss": 1.3867, + "step": 16361 + }, + { + "epoch": 0.48041576134828823, + "grad_norm": 0.0, + "learning_rate": 1.1117889779009404e-05, + "loss": 1.4932, + "step": 16362 + }, + { + "epoch": 0.4804451230254272, + "grad_norm": 0.0, + "learning_rate": 1.1116944774243524e-05, + "loss": 1.4004, + "step": 16363 + }, + { + "epoch": 0.4804744847025662, + "grad_norm": 0.0, + "learning_rate": 1.111599975937683e-05, + "loss": 1.3789, + "step": 16364 + }, + { + "epoch": 0.4805038463797052, + "grad_norm": 0.0, + "learning_rate": 1.1115054734417861e-05, + "loss": 1.293, + "step": 16365 + }, + { + "epoch": 0.4805332080568442, + "grad_norm": 0.0, + "learning_rate": 1.1114109699375166e-05, + "loss": 1.2969, + "step": 16366 + }, + { + "epoch": 0.4805625697339832, + "grad_norm": 0.0, + "learning_rate": 1.1113164654257295e-05, + "loss": 1.2939, + "step": 16367 + }, + { + "epoch": 0.4805919314111222, + "grad_norm": 0.0, + "learning_rate": 1.1112219599072789e-05, + "loss": 1.3711, + "step": 16368 + }, + { + "epoch": 0.4806212930882612, + "grad_norm": 0.0, + "learning_rate": 1.1111274533830196e-05, + "loss": 1.3135, + "step": 16369 + }, + { + "epoch": 0.4806506547654002, + "grad_norm": 0.0, + "learning_rate": 1.1110329458538063e-05, + "loss": 1.3525, + "step": 16370 + }, + { + "epoch": 0.4806800164425392, + "grad_norm": 0.0, + "learning_rate": 1.110938437320494e-05, + "loss": 1.375, + "step": 16371 + }, + { + "epoch": 0.4807093781196782, + "grad_norm": 0.0, + "learning_rate": 1.1108439277839365e-05, + "loss": 1.3203, + "step": 16372 + }, + { + "epoch": 0.4807387397968172, + "grad_norm": 0.0, + "learning_rate": 1.1107494172449891e-05, + "loss": 1.3916, + "step": 16373 + }, + { + "epoch": 0.4807681014739562, + "grad_norm": 0.0, + "learning_rate": 1.1106549057045067e-05, + "loss": 1.4082, + "step": 16374 + }, + { + "epoch": 0.4807974631510952, + "grad_norm": 0.0, + "learning_rate": 1.1105603931633435e-05, + "loss": 1.293, + "step": 16375 + }, + { + "epoch": 0.4808268248282342, + "grad_norm": 0.0, + "learning_rate": 1.1104658796223541e-05, + "loss": 1.293, + "step": 16376 + }, + { + "epoch": 0.4808561865053732, + "grad_norm": 0.0, + "learning_rate": 1.1103713650823935e-05, + "loss": 1.2446, + "step": 16377 + }, + { + "epoch": 0.48088554818251217, + "grad_norm": 0.0, + "learning_rate": 1.1102768495443163e-05, + "loss": 1.3564, + "step": 16378 + }, + { + "epoch": 0.4809149098596512, + "grad_norm": 0.0, + "learning_rate": 1.1101823330089779e-05, + "loss": 1.4336, + "step": 16379 + }, + { + "epoch": 0.4809442715367902, + "grad_norm": 0.0, + "learning_rate": 1.1100878154772317e-05, + "loss": 1.2227, + "step": 16380 + }, + { + "epoch": 0.48097363321392916, + "grad_norm": 0.0, + "learning_rate": 1.1099932969499333e-05, + "loss": 1.3545, + "step": 16381 + }, + { + "epoch": 0.4810029948910682, + "grad_norm": 0.0, + "learning_rate": 1.1098987774279378e-05, + "loss": 1.1826, + "step": 16382 + }, + { + "epoch": 0.4810323565682072, + "grad_norm": 0.0, + "learning_rate": 1.1098042569120993e-05, + "loss": 1.3203, + "step": 16383 + }, + { + "epoch": 0.48106171824534616, + "grad_norm": 0.0, + "learning_rate": 1.1097097354032729e-05, + "loss": 1.4053, + "step": 16384 + }, + { + "epoch": 0.4810910799224852, + "grad_norm": 0.0, + "learning_rate": 1.109615212902313e-05, + "loss": 1.3438, + "step": 16385 + }, + { + "epoch": 0.4811204415996242, + "grad_norm": 0.0, + "learning_rate": 1.1095206894100747e-05, + "loss": 1.2407, + "step": 16386 + }, + { + "epoch": 0.48114980327676315, + "grad_norm": 0.0, + "learning_rate": 1.109426164927413e-05, + "loss": 1.3691, + "step": 16387 + }, + { + "epoch": 0.48117916495390217, + "grad_norm": 0.0, + "learning_rate": 1.1093316394551823e-05, + "loss": 1.2959, + "step": 16388 + }, + { + "epoch": 0.4812085266310412, + "grad_norm": 0.0, + "learning_rate": 1.1092371129942374e-05, + "loss": 1.3379, + "step": 16389 + }, + { + "epoch": 0.48123788830818015, + "grad_norm": 0.0, + "learning_rate": 1.1091425855454334e-05, + "loss": 1.2148, + "step": 16390 + }, + { + "epoch": 0.48126724998531917, + "grad_norm": 0.0, + "learning_rate": 1.1090480571096254e-05, + "loss": 1.291, + "step": 16391 + }, + { + "epoch": 0.4812966116624582, + "grad_norm": 0.0, + "learning_rate": 1.1089535276876675e-05, + "loss": 1.208, + "step": 16392 + }, + { + "epoch": 0.48132597333959715, + "grad_norm": 0.0, + "learning_rate": 1.1088589972804149e-05, + "loss": 1.3516, + "step": 16393 + }, + { + "epoch": 0.48135533501673616, + "grad_norm": 0.0, + "learning_rate": 1.1087644658887228e-05, + "loss": 1.4346, + "step": 16394 + }, + { + "epoch": 0.4813846966938752, + "grad_norm": 0.0, + "learning_rate": 1.1086699335134456e-05, + "loss": 1.3125, + "step": 16395 + }, + { + "epoch": 0.48141405837101414, + "grad_norm": 0.0, + "learning_rate": 1.1085754001554387e-05, + "loss": 1.1953, + "step": 16396 + }, + { + "epoch": 0.48144342004815316, + "grad_norm": 0.0, + "learning_rate": 1.1084808658155564e-05, + "loss": 1.3271, + "step": 16397 + }, + { + "epoch": 0.4814727817252922, + "grad_norm": 0.0, + "learning_rate": 1.1083863304946536e-05, + "loss": 1.3594, + "step": 16398 + }, + { + "epoch": 0.48150214340243114, + "grad_norm": 0.0, + "learning_rate": 1.1082917941935859e-05, + "loss": 1.3486, + "step": 16399 + }, + { + "epoch": 0.48153150507957015, + "grad_norm": 0.0, + "learning_rate": 1.1081972569132073e-05, + "loss": 1.3213, + "step": 16400 + }, + { + "epoch": 0.4815608667567091, + "grad_norm": 0.0, + "learning_rate": 1.1081027186543736e-05, + "loss": 1.2393, + "step": 16401 + }, + { + "epoch": 0.48159022843384813, + "grad_norm": 0.0, + "learning_rate": 1.108008179417939e-05, + "loss": 1.3428, + "step": 16402 + }, + { + "epoch": 0.48161959011098715, + "grad_norm": 0.0, + "learning_rate": 1.107913639204759e-05, + "loss": 1.2373, + "step": 16403 + }, + { + "epoch": 0.4816489517881261, + "grad_norm": 0.0, + "learning_rate": 1.1078190980156885e-05, + "loss": 1.4238, + "step": 16404 + }, + { + "epoch": 0.48167831346526513, + "grad_norm": 0.0, + "learning_rate": 1.1077245558515818e-05, + "loss": 1.418, + "step": 16405 + }, + { + "epoch": 0.48170767514240415, + "grad_norm": 0.0, + "learning_rate": 1.1076300127132946e-05, + "loss": 1.2344, + "step": 16406 + }, + { + "epoch": 0.4817370368195431, + "grad_norm": 0.0, + "learning_rate": 1.1075354686016817e-05, + "loss": 1.2114, + "step": 16407 + }, + { + "epoch": 0.4817663984966821, + "grad_norm": 0.0, + "learning_rate": 1.107440923517598e-05, + "loss": 1.3467, + "step": 16408 + }, + { + "epoch": 0.48179576017382114, + "grad_norm": 0.0, + "learning_rate": 1.1073463774618985e-05, + "loss": 1.3193, + "step": 16409 + }, + { + "epoch": 0.4818251218509601, + "grad_norm": 0.0, + "learning_rate": 1.107251830435438e-05, + "loss": 1.3232, + "step": 16410 + }, + { + "epoch": 0.4818544835280991, + "grad_norm": 0.0, + "learning_rate": 1.1071572824390719e-05, + "loss": 1.2979, + "step": 16411 + }, + { + "epoch": 0.48188384520523814, + "grad_norm": 0.0, + "learning_rate": 1.1070627334736553e-05, + "loss": 1.2227, + "step": 16412 + }, + { + "epoch": 0.4819132068823771, + "grad_norm": 0.0, + "learning_rate": 1.1069681835400428e-05, + "loss": 1.4033, + "step": 16413 + }, + { + "epoch": 0.4819425685595161, + "grad_norm": 0.0, + "learning_rate": 1.1068736326390895e-05, + "loss": 1.3662, + "step": 16414 + }, + { + "epoch": 0.48197193023665513, + "grad_norm": 0.0, + "learning_rate": 1.1067790807716506e-05, + "loss": 1.3174, + "step": 16415 + }, + { + "epoch": 0.4820012919137941, + "grad_norm": 0.0, + "learning_rate": 1.1066845279385813e-05, + "loss": 1.25, + "step": 16416 + }, + { + "epoch": 0.4820306535909331, + "grad_norm": 0.0, + "learning_rate": 1.1065899741407362e-05, + "loss": 1.291, + "step": 16417 + }, + { + "epoch": 0.48206001526807213, + "grad_norm": 0.0, + "learning_rate": 1.1064954193789708e-05, + "loss": 1.3057, + "step": 16418 + }, + { + "epoch": 0.4820893769452111, + "grad_norm": 0.0, + "learning_rate": 1.10640086365414e-05, + "loss": 1.2773, + "step": 16419 + }, + { + "epoch": 0.4821187386223501, + "grad_norm": 0.0, + "learning_rate": 1.1063063069670992e-05, + "loss": 1.4004, + "step": 16420 + }, + { + "epoch": 0.4821481002994891, + "grad_norm": 0.0, + "learning_rate": 1.106211749318703e-05, + "loss": 1.251, + "step": 16421 + }, + { + "epoch": 0.4821774619766281, + "grad_norm": 0.0, + "learning_rate": 1.1061171907098067e-05, + "loss": 1.334, + "step": 16422 + }, + { + "epoch": 0.4822068236537671, + "grad_norm": 0.0, + "learning_rate": 1.1060226311412656e-05, + "loss": 1.2949, + "step": 16423 + }, + { + "epoch": 0.4822361853309061, + "grad_norm": 0.0, + "learning_rate": 1.1059280706139348e-05, + "loss": 1.3652, + "step": 16424 + }, + { + "epoch": 0.4822655470080451, + "grad_norm": 0.0, + "learning_rate": 1.1058335091286692e-05, + "loss": 1.3496, + "step": 16425 + }, + { + "epoch": 0.4822949086851841, + "grad_norm": 0.0, + "learning_rate": 1.1057389466863239e-05, + "loss": 1.2534, + "step": 16426 + }, + { + "epoch": 0.4823242703623231, + "grad_norm": 0.0, + "learning_rate": 1.1056443832877543e-05, + "loss": 1.2876, + "step": 16427 + }, + { + "epoch": 0.4823536320394621, + "grad_norm": 0.0, + "learning_rate": 1.1055498189338158e-05, + "loss": 1.3076, + "step": 16428 + }, + { + "epoch": 0.4823829937166011, + "grad_norm": 0.0, + "learning_rate": 1.105455253625363e-05, + "loss": 1.2583, + "step": 16429 + }, + { + "epoch": 0.4824123553937401, + "grad_norm": 0.0, + "learning_rate": 1.1053606873632513e-05, + "loss": 1.3848, + "step": 16430 + }, + { + "epoch": 0.4824417170708791, + "grad_norm": 0.0, + "learning_rate": 1.105266120148336e-05, + "loss": 1.4688, + "step": 16431 + }, + { + "epoch": 0.4824710787480181, + "grad_norm": 0.0, + "learning_rate": 1.1051715519814723e-05, + "loss": 1.1753, + "step": 16432 + }, + { + "epoch": 0.4825004404251571, + "grad_norm": 0.0, + "learning_rate": 1.1050769828635152e-05, + "loss": 1.416, + "step": 16433 + }, + { + "epoch": 0.48252980210229607, + "grad_norm": 0.0, + "learning_rate": 1.1049824127953201e-05, + "loss": 1.3379, + "step": 16434 + }, + { + "epoch": 0.4825591637794351, + "grad_norm": 0.0, + "learning_rate": 1.104887841777742e-05, + "loss": 1.2832, + "step": 16435 + }, + { + "epoch": 0.4825885254565741, + "grad_norm": 0.0, + "learning_rate": 1.1047932698116363e-05, + "loss": 1.1787, + "step": 16436 + }, + { + "epoch": 0.48261788713371306, + "grad_norm": 0.0, + "learning_rate": 1.1046986968978586e-05, + "loss": 1.25, + "step": 16437 + }, + { + "epoch": 0.4826472488108521, + "grad_norm": 0.0, + "learning_rate": 1.1046041230372632e-05, + "loss": 1.2432, + "step": 16438 + }, + { + "epoch": 0.4826766104879911, + "grad_norm": 0.0, + "learning_rate": 1.1045095482307062e-05, + "loss": 1.2002, + "step": 16439 + }, + { + "epoch": 0.48270597216513006, + "grad_norm": 0.0, + "learning_rate": 1.1044149724790427e-05, + "loss": 1.3975, + "step": 16440 + }, + { + "epoch": 0.4827353338422691, + "grad_norm": 0.0, + "learning_rate": 1.1043203957831277e-05, + "loss": 1.252, + "step": 16441 + }, + { + "epoch": 0.4827646955194081, + "grad_norm": 0.0, + "learning_rate": 1.1042258181438168e-05, + "loss": 1.3369, + "step": 16442 + }, + { + "epoch": 0.48279405719654706, + "grad_norm": 0.0, + "learning_rate": 1.1041312395619652e-05, + "loss": 1.3779, + "step": 16443 + }, + { + "epoch": 0.48282341887368607, + "grad_norm": 0.0, + "learning_rate": 1.1040366600384279e-05, + "loss": 1.2275, + "step": 16444 + }, + { + "epoch": 0.4828527805508251, + "grad_norm": 0.0, + "learning_rate": 1.1039420795740606e-05, + "loss": 1.3984, + "step": 16445 + }, + { + "epoch": 0.48288214222796405, + "grad_norm": 0.0, + "learning_rate": 1.1038474981697183e-05, + "loss": 1.2461, + "step": 16446 + }, + { + "epoch": 0.48291150390510307, + "grad_norm": 0.0, + "learning_rate": 1.1037529158262563e-05, + "loss": 1.3652, + "step": 16447 + }, + { + "epoch": 0.4829408655822421, + "grad_norm": 0.0, + "learning_rate": 1.1036583325445305e-05, + "loss": 1.2764, + "step": 16448 + }, + { + "epoch": 0.48297022725938105, + "grad_norm": 0.0, + "learning_rate": 1.1035637483253958e-05, + "loss": 1.3457, + "step": 16449 + }, + { + "epoch": 0.48299958893652006, + "grad_norm": 0.0, + "learning_rate": 1.1034691631697074e-05, + "loss": 1.3389, + "step": 16450 + }, + { + "epoch": 0.483028950613659, + "grad_norm": 0.0, + "learning_rate": 1.1033745770783208e-05, + "loss": 1.3057, + "step": 16451 + }, + { + "epoch": 0.48305831229079804, + "grad_norm": 0.0, + "learning_rate": 1.1032799900520919e-05, + "loss": 1.2529, + "step": 16452 + }, + { + "epoch": 0.48308767396793706, + "grad_norm": 0.0, + "learning_rate": 1.1031854020918754e-05, + "loss": 1.3857, + "step": 16453 + }, + { + "epoch": 0.483117035645076, + "grad_norm": 0.0, + "learning_rate": 1.1030908131985267e-05, + "loss": 1.458, + "step": 16454 + }, + { + "epoch": 0.48314639732221504, + "grad_norm": 0.0, + "learning_rate": 1.1029962233729015e-05, + "loss": 1.25, + "step": 16455 + }, + { + "epoch": 0.48317575899935405, + "grad_norm": 0.0, + "learning_rate": 1.1029016326158548e-05, + "loss": 1.3965, + "step": 16456 + }, + { + "epoch": 0.483205120676493, + "grad_norm": 0.0, + "learning_rate": 1.1028070409282427e-05, + "loss": 1.3418, + "step": 16457 + }, + { + "epoch": 0.48323448235363203, + "grad_norm": 0.0, + "learning_rate": 1.10271244831092e-05, + "loss": 1.4141, + "step": 16458 + }, + { + "epoch": 0.48326384403077105, + "grad_norm": 0.0, + "learning_rate": 1.1026178547647422e-05, + "loss": 1.335, + "step": 16459 + }, + { + "epoch": 0.48329320570791, + "grad_norm": 0.0, + "learning_rate": 1.102523260290565e-05, + "loss": 1.373, + "step": 16460 + }, + { + "epoch": 0.48332256738504903, + "grad_norm": 0.0, + "learning_rate": 1.1024286648892438e-05, + "loss": 1.4209, + "step": 16461 + }, + { + "epoch": 0.48335192906218805, + "grad_norm": 0.0, + "learning_rate": 1.1023340685616338e-05, + "loss": 1.3584, + "step": 16462 + }, + { + "epoch": 0.483381290739327, + "grad_norm": 0.0, + "learning_rate": 1.1022394713085906e-05, + "loss": 1.2764, + "step": 16463 + }, + { + "epoch": 0.483410652416466, + "grad_norm": 0.0, + "learning_rate": 1.1021448731309699e-05, + "loss": 1.332, + "step": 16464 + }, + { + "epoch": 0.48344001409360504, + "grad_norm": 0.0, + "learning_rate": 1.1020502740296268e-05, + "loss": 1.334, + "step": 16465 + }, + { + "epoch": 0.483469375770744, + "grad_norm": 0.0, + "learning_rate": 1.1019556740054167e-05, + "loss": 1.332, + "step": 16466 + }, + { + "epoch": 0.483498737447883, + "grad_norm": 0.0, + "learning_rate": 1.1018610730591957e-05, + "loss": 1.2773, + "step": 16467 + }, + { + "epoch": 0.48352809912502204, + "grad_norm": 0.0, + "learning_rate": 1.1017664711918186e-05, + "loss": 1.3945, + "step": 16468 + }, + { + "epoch": 0.483557460802161, + "grad_norm": 0.0, + "learning_rate": 1.1016718684041416e-05, + "loss": 1.3545, + "step": 16469 + }, + { + "epoch": 0.4835868224793, + "grad_norm": 0.0, + "learning_rate": 1.1015772646970195e-05, + "loss": 1.293, + "step": 16470 + }, + { + "epoch": 0.48361618415643903, + "grad_norm": 0.0, + "learning_rate": 1.1014826600713082e-05, + "loss": 1.4492, + "step": 16471 + }, + { + "epoch": 0.483645545833578, + "grad_norm": 0.0, + "learning_rate": 1.1013880545278637e-05, + "loss": 1.4219, + "step": 16472 + }, + { + "epoch": 0.483674907510717, + "grad_norm": 0.0, + "learning_rate": 1.1012934480675406e-05, + "loss": 1.3311, + "step": 16473 + }, + { + "epoch": 0.48370426918785603, + "grad_norm": 0.0, + "learning_rate": 1.101198840691195e-05, + "loss": 1.3643, + "step": 16474 + }, + { + "epoch": 0.483733630864995, + "grad_norm": 0.0, + "learning_rate": 1.1011042323996826e-05, + "loss": 1.3066, + "step": 16475 + }, + { + "epoch": 0.483762992542134, + "grad_norm": 0.0, + "learning_rate": 1.1010096231938582e-05, + "loss": 1.46, + "step": 16476 + }, + { + "epoch": 0.483792354219273, + "grad_norm": 0.0, + "learning_rate": 1.1009150130745784e-05, + "loss": 1.3145, + "step": 16477 + }, + { + "epoch": 0.483821715896412, + "grad_norm": 0.0, + "learning_rate": 1.1008204020426982e-05, + "loss": 1.2559, + "step": 16478 + }, + { + "epoch": 0.483851077573551, + "grad_norm": 0.0, + "learning_rate": 1.1007257900990731e-05, + "loss": 1.3926, + "step": 16479 + }, + { + "epoch": 0.48388043925069, + "grad_norm": 0.0, + "learning_rate": 1.1006311772445591e-05, + "loss": 1.1753, + "step": 16480 + }, + { + "epoch": 0.483909800927829, + "grad_norm": 0.0, + "learning_rate": 1.1005365634800115e-05, + "loss": 1.2217, + "step": 16481 + }, + { + "epoch": 0.483939162604968, + "grad_norm": 0.0, + "learning_rate": 1.100441948806286e-05, + "loss": 1.3164, + "step": 16482 + }, + { + "epoch": 0.483968524282107, + "grad_norm": 0.0, + "learning_rate": 1.1003473332242382e-05, + "loss": 1.2603, + "step": 16483 + }, + { + "epoch": 0.483997885959246, + "grad_norm": 0.0, + "learning_rate": 1.100252716734724e-05, + "loss": 1.3662, + "step": 16484 + }, + { + "epoch": 0.484027247636385, + "grad_norm": 0.0, + "learning_rate": 1.1001580993385983e-05, + "loss": 1.3184, + "step": 16485 + }, + { + "epoch": 0.484056609313524, + "grad_norm": 0.0, + "learning_rate": 1.1000634810367177e-05, + "loss": 1.3467, + "step": 16486 + }, + { + "epoch": 0.484085970990663, + "grad_norm": 0.0, + "learning_rate": 1.0999688618299374e-05, + "loss": 1.3115, + "step": 16487 + }, + { + "epoch": 0.484115332667802, + "grad_norm": 0.0, + "learning_rate": 1.0998742417191126e-05, + "loss": 1.3701, + "step": 16488 + }, + { + "epoch": 0.484144694344941, + "grad_norm": 0.0, + "learning_rate": 1.0997796207051001e-05, + "loss": 1.1919, + "step": 16489 + }, + { + "epoch": 0.48417405602207997, + "grad_norm": 0.0, + "learning_rate": 1.0996849987887545e-05, + "loss": 1.4331, + "step": 16490 + }, + { + "epoch": 0.484203417699219, + "grad_norm": 0.0, + "learning_rate": 1.0995903759709321e-05, + "loss": 1.2559, + "step": 16491 + }, + { + "epoch": 0.484232779376358, + "grad_norm": 0.0, + "learning_rate": 1.0994957522524886e-05, + "loss": 1.375, + "step": 16492 + }, + { + "epoch": 0.48426214105349696, + "grad_norm": 0.0, + "learning_rate": 1.0994011276342792e-05, + "loss": 1.2109, + "step": 16493 + }, + { + "epoch": 0.484291502730636, + "grad_norm": 0.0, + "learning_rate": 1.0993065021171603e-05, + "loss": 1.4097, + "step": 16494 + }, + { + "epoch": 0.484320864407775, + "grad_norm": 0.0, + "learning_rate": 1.099211875701987e-05, + "loss": 1.3027, + "step": 16495 + }, + { + "epoch": 0.48435022608491396, + "grad_norm": 0.0, + "learning_rate": 1.099117248389615e-05, + "loss": 1.2544, + "step": 16496 + }, + { + "epoch": 0.484379587762053, + "grad_norm": 0.0, + "learning_rate": 1.0990226201809006e-05, + "loss": 1.3721, + "step": 16497 + }, + { + "epoch": 0.484408949439192, + "grad_norm": 0.0, + "learning_rate": 1.0989279910766997e-05, + "loss": 1.1602, + "step": 16498 + }, + { + "epoch": 0.48443831111633096, + "grad_norm": 0.0, + "learning_rate": 1.0988333610778672e-05, + "loss": 1.2031, + "step": 16499 + }, + { + "epoch": 0.48446767279347, + "grad_norm": 0.0, + "learning_rate": 1.0987387301852592e-05, + "loss": 1.376, + "step": 16500 + }, + { + "epoch": 0.48449703447060893, + "grad_norm": 0.0, + "learning_rate": 1.0986440983997317e-05, + "loss": 1.3369, + "step": 16501 + }, + { + "epoch": 0.48452639614774795, + "grad_norm": 0.0, + "learning_rate": 1.0985494657221409e-05, + "loss": 1.3027, + "step": 16502 + }, + { + "epoch": 0.48455575782488697, + "grad_norm": 0.0, + "learning_rate": 1.0984548321533415e-05, + "loss": 1.2485, + "step": 16503 + }, + { + "epoch": 0.48458511950202593, + "grad_norm": 0.0, + "learning_rate": 1.09836019769419e-05, + "loss": 1.3721, + "step": 16504 + }, + { + "epoch": 0.48461448117916495, + "grad_norm": 0.0, + "learning_rate": 1.0982655623455419e-05, + "loss": 1.3896, + "step": 16505 + }, + { + "epoch": 0.48464384285630396, + "grad_norm": 0.0, + "learning_rate": 1.0981709261082532e-05, + "loss": 1.3066, + "step": 16506 + }, + { + "epoch": 0.4846732045334429, + "grad_norm": 0.0, + "learning_rate": 1.0980762889831797e-05, + "loss": 1.334, + "step": 16507 + }, + { + "epoch": 0.48470256621058194, + "grad_norm": 0.0, + "learning_rate": 1.0979816509711771e-05, + "loss": 1.3789, + "step": 16508 + }, + { + "epoch": 0.48473192788772096, + "grad_norm": 0.0, + "learning_rate": 1.0978870120731013e-05, + "loss": 1.3047, + "step": 16509 + }, + { + "epoch": 0.4847612895648599, + "grad_norm": 0.0, + "learning_rate": 1.0977923722898085e-05, + "loss": 1.3818, + "step": 16510 + }, + { + "epoch": 0.48479065124199894, + "grad_norm": 0.0, + "learning_rate": 1.097697731622154e-05, + "loss": 1.334, + "step": 16511 + }, + { + "epoch": 0.48482001291913795, + "grad_norm": 0.0, + "learning_rate": 1.097603090070994e-05, + "loss": 1.2471, + "step": 16512 + }, + { + "epoch": 0.4848493745962769, + "grad_norm": 0.0, + "learning_rate": 1.0975084476371842e-05, + "loss": 1.3887, + "step": 16513 + }, + { + "epoch": 0.48487873627341593, + "grad_norm": 0.0, + "learning_rate": 1.0974138043215807e-05, + "loss": 1.3018, + "step": 16514 + }, + { + "epoch": 0.48490809795055495, + "grad_norm": 0.0, + "learning_rate": 1.0973191601250389e-05, + "loss": 1.3125, + "step": 16515 + }, + { + "epoch": 0.4849374596276939, + "grad_norm": 0.0, + "learning_rate": 1.097224515048415e-05, + "loss": 1.3462, + "step": 16516 + }, + { + "epoch": 0.48496682130483293, + "grad_norm": 0.0, + "learning_rate": 1.0971298690925652e-05, + "loss": 1.3789, + "step": 16517 + }, + { + "epoch": 0.48499618298197195, + "grad_norm": 0.0, + "learning_rate": 1.0970352222583451e-05, + "loss": 1.3369, + "step": 16518 + }, + { + "epoch": 0.4850255446591109, + "grad_norm": 0.0, + "learning_rate": 1.0969405745466105e-05, + "loss": 1.3555, + "step": 16519 + }, + { + "epoch": 0.4850549063362499, + "grad_norm": 0.0, + "learning_rate": 1.0968459259582174e-05, + "loss": 1.2412, + "step": 16520 + }, + { + "epoch": 0.48508426801338894, + "grad_norm": 0.0, + "learning_rate": 1.0967512764940217e-05, + "loss": 1.29, + "step": 16521 + }, + { + "epoch": 0.4851136296905279, + "grad_norm": 0.0, + "learning_rate": 1.0966566261548797e-05, + "loss": 1.2891, + "step": 16522 + }, + { + "epoch": 0.4851429913676669, + "grad_norm": 0.0, + "learning_rate": 1.0965619749416472e-05, + "loss": 1.3584, + "step": 16523 + }, + { + "epoch": 0.48517235304480594, + "grad_norm": 0.0, + "learning_rate": 1.0964673228551796e-05, + "loss": 1.2998, + "step": 16524 + }, + { + "epoch": 0.4852017147219449, + "grad_norm": 0.0, + "learning_rate": 1.0963726698963335e-05, + "loss": 1.3906, + "step": 16525 + }, + { + "epoch": 0.4852310763990839, + "grad_norm": 0.0, + "learning_rate": 1.0962780160659646e-05, + "loss": 1.332, + "step": 16526 + }, + { + "epoch": 0.48526043807622293, + "grad_norm": 0.0, + "learning_rate": 1.0961833613649291e-05, + "loss": 1.3213, + "step": 16527 + }, + { + "epoch": 0.4852897997533619, + "grad_norm": 0.0, + "learning_rate": 1.0960887057940828e-05, + "loss": 1.1982, + "step": 16528 + }, + { + "epoch": 0.4853191614305009, + "grad_norm": 0.0, + "learning_rate": 1.0959940493542816e-05, + "loss": 1.1396, + "step": 16529 + }, + { + "epoch": 0.48534852310763993, + "grad_norm": 0.0, + "learning_rate": 1.0958993920463818e-05, + "loss": 1.2256, + "step": 16530 + }, + { + "epoch": 0.4853778847847789, + "grad_norm": 0.0, + "learning_rate": 1.095804733871239e-05, + "loss": 1.291, + "step": 16531 + }, + { + "epoch": 0.4854072464619179, + "grad_norm": 0.0, + "learning_rate": 1.0957100748297099e-05, + "loss": 1.2949, + "step": 16532 + }, + { + "epoch": 0.4854366081390569, + "grad_norm": 0.0, + "learning_rate": 1.0956154149226495e-05, + "loss": 1.209, + "step": 16533 + }, + { + "epoch": 0.4854659698161959, + "grad_norm": 0.0, + "learning_rate": 1.0955207541509148e-05, + "loss": 1.4062, + "step": 16534 + }, + { + "epoch": 0.4854953314933349, + "grad_norm": 0.0, + "learning_rate": 1.0954260925153612e-05, + "loss": 1.4053, + "step": 16535 + }, + { + "epoch": 0.4855246931704739, + "grad_norm": 0.0, + "learning_rate": 1.0953314300168452e-05, + "loss": 1.4023, + "step": 16536 + }, + { + "epoch": 0.4855540548476129, + "grad_norm": 0.0, + "learning_rate": 1.0952367666562226e-05, + "loss": 1.3296, + "step": 16537 + }, + { + "epoch": 0.4855834165247519, + "grad_norm": 0.0, + "learning_rate": 1.0951421024343494e-05, + "loss": 1.3701, + "step": 16538 + }, + { + "epoch": 0.4856127782018909, + "grad_norm": 0.0, + "learning_rate": 1.095047437352082e-05, + "loss": 1.3965, + "step": 16539 + }, + { + "epoch": 0.4856421398790299, + "grad_norm": 0.0, + "learning_rate": 1.094952771410276e-05, + "loss": 1.3149, + "step": 16540 + }, + { + "epoch": 0.4856715015561689, + "grad_norm": 0.0, + "learning_rate": 1.0948581046097883e-05, + "loss": 1.3984, + "step": 16541 + }, + { + "epoch": 0.4857008632333079, + "grad_norm": 0.0, + "learning_rate": 1.0947634369514742e-05, + "loss": 1.2705, + "step": 16542 + }, + { + "epoch": 0.4857302249104469, + "grad_norm": 0.0, + "learning_rate": 1.09466876843619e-05, + "loss": 1.249, + "step": 16543 + }, + { + "epoch": 0.4857595865875859, + "grad_norm": 0.0, + "learning_rate": 1.0945740990647919e-05, + "loss": 1.4336, + "step": 16544 + }, + { + "epoch": 0.4857889482647249, + "grad_norm": 0.0, + "learning_rate": 1.094479428838136e-05, + "loss": 1.2959, + "step": 16545 + }, + { + "epoch": 0.48581830994186387, + "grad_norm": 0.0, + "learning_rate": 1.0943847577570783e-05, + "loss": 1.2827, + "step": 16546 + }, + { + "epoch": 0.4858476716190029, + "grad_norm": 0.0, + "learning_rate": 1.0942900858224754e-05, + "loss": 1.25, + "step": 16547 + }, + { + "epoch": 0.4858770332961419, + "grad_norm": 0.0, + "learning_rate": 1.0941954130351829e-05, + "loss": 1.3037, + "step": 16548 + }, + { + "epoch": 0.48590639497328086, + "grad_norm": 0.0, + "learning_rate": 1.094100739396057e-05, + "loss": 1.3428, + "step": 16549 + }, + { + "epoch": 0.4859357566504199, + "grad_norm": 0.0, + "learning_rate": 1.0940060649059541e-05, + "loss": 1.332, + "step": 16550 + }, + { + "epoch": 0.48596511832755884, + "grad_norm": 0.0, + "learning_rate": 1.0939113895657304e-05, + "loss": 1.3047, + "step": 16551 + }, + { + "epoch": 0.48599448000469786, + "grad_norm": 0.0, + "learning_rate": 1.0938167133762421e-05, + "loss": 1.3662, + "step": 16552 + }, + { + "epoch": 0.4860238416818369, + "grad_norm": 0.0, + "learning_rate": 1.0937220363383449e-05, + "loss": 1.2637, + "step": 16553 + }, + { + "epoch": 0.48605320335897584, + "grad_norm": 0.0, + "learning_rate": 1.0936273584528957e-05, + "loss": 1.2261, + "step": 16554 + }, + { + "epoch": 0.48608256503611486, + "grad_norm": 0.0, + "learning_rate": 1.0935326797207501e-05, + "loss": 1.3193, + "step": 16555 + }, + { + "epoch": 0.4861119267132539, + "grad_norm": 0.0, + "learning_rate": 1.0934380001427645e-05, + "loss": 1.1514, + "step": 16556 + }, + { + "epoch": 0.48614128839039283, + "grad_norm": 0.0, + "learning_rate": 1.0933433197197948e-05, + "loss": 1.2217, + "step": 16557 + }, + { + "epoch": 0.48617065006753185, + "grad_norm": 0.0, + "learning_rate": 1.0932486384526979e-05, + "loss": 1.0977, + "step": 16558 + }, + { + "epoch": 0.48620001174467087, + "grad_norm": 0.0, + "learning_rate": 1.09315395634233e-05, + "loss": 1.2422, + "step": 16559 + }, + { + "epoch": 0.48622937342180983, + "grad_norm": 0.0, + "learning_rate": 1.0930592733895466e-05, + "loss": 1.3711, + "step": 16560 + }, + { + "epoch": 0.48625873509894885, + "grad_norm": 0.0, + "learning_rate": 1.0929645895952045e-05, + "loss": 1.3501, + "step": 16561 + }, + { + "epoch": 0.48628809677608786, + "grad_norm": 0.0, + "learning_rate": 1.0928699049601597e-05, + "loss": 1.4072, + "step": 16562 + }, + { + "epoch": 0.4863174584532268, + "grad_norm": 0.0, + "learning_rate": 1.0927752194852685e-05, + "loss": 1.3125, + "step": 16563 + }, + { + "epoch": 0.48634682013036584, + "grad_norm": 0.0, + "learning_rate": 1.0926805331713876e-05, + "loss": 1.3496, + "step": 16564 + }, + { + "epoch": 0.48637618180750486, + "grad_norm": 0.0, + "learning_rate": 1.0925858460193725e-05, + "loss": 1.3408, + "step": 16565 + }, + { + "epoch": 0.4864055434846438, + "grad_norm": 0.0, + "learning_rate": 1.0924911580300801e-05, + "loss": 1.25, + "step": 16566 + }, + { + "epoch": 0.48643490516178284, + "grad_norm": 0.0, + "learning_rate": 1.0923964692043666e-05, + "loss": 1.3145, + "step": 16567 + }, + { + "epoch": 0.48646426683892185, + "grad_norm": 0.0, + "learning_rate": 1.0923017795430879e-05, + "loss": 1.2305, + "step": 16568 + }, + { + "epoch": 0.4864936285160608, + "grad_norm": 0.0, + "learning_rate": 1.0922070890471005e-05, + "loss": 1.4385, + "step": 16569 + }, + { + "epoch": 0.48652299019319983, + "grad_norm": 0.0, + "learning_rate": 1.0921123977172608e-05, + "loss": 1.3145, + "step": 16570 + }, + { + "epoch": 0.48655235187033885, + "grad_norm": 0.0, + "learning_rate": 1.0920177055544255e-05, + "loss": 1.4111, + "step": 16571 + }, + { + "epoch": 0.4865817135474778, + "grad_norm": 0.0, + "learning_rate": 1.0919230125594502e-05, + "loss": 1.418, + "step": 16572 + }, + { + "epoch": 0.48661107522461683, + "grad_norm": 0.0, + "learning_rate": 1.0918283187331918e-05, + "loss": 1.2778, + "step": 16573 + }, + { + "epoch": 0.48664043690175585, + "grad_norm": 0.0, + "learning_rate": 1.091733624076506e-05, + "loss": 1.3232, + "step": 16574 + }, + { + "epoch": 0.4866697985788948, + "grad_norm": 0.0, + "learning_rate": 1.0916389285902496e-05, + "loss": 1.3154, + "step": 16575 + }, + { + "epoch": 0.4866991602560338, + "grad_norm": 0.0, + "learning_rate": 1.0915442322752791e-05, + "loss": 1.2275, + "step": 16576 + }, + { + "epoch": 0.48672852193317284, + "grad_norm": 0.0, + "learning_rate": 1.0914495351324506e-05, + "loss": 1.314, + "step": 16577 + }, + { + "epoch": 0.4867578836103118, + "grad_norm": 0.0, + "learning_rate": 1.0913548371626206e-05, + "loss": 1.1816, + "step": 16578 + }, + { + "epoch": 0.4867872452874508, + "grad_norm": 0.0, + "learning_rate": 1.0912601383666454e-05, + "loss": 1.3887, + "step": 16579 + }, + { + "epoch": 0.48681660696458984, + "grad_norm": 0.0, + "learning_rate": 1.0911654387453812e-05, + "loss": 1.3584, + "step": 16580 + }, + { + "epoch": 0.4868459686417288, + "grad_norm": 0.0, + "learning_rate": 1.091070738299685e-05, + "loss": 1.4189, + "step": 16581 + }, + { + "epoch": 0.4868753303188678, + "grad_norm": 0.0, + "learning_rate": 1.0909760370304123e-05, + "loss": 1.2861, + "step": 16582 + }, + { + "epoch": 0.48690469199600683, + "grad_norm": 0.0, + "learning_rate": 1.0908813349384201e-05, + "loss": 1.335, + "step": 16583 + }, + { + "epoch": 0.4869340536731458, + "grad_norm": 0.0, + "learning_rate": 1.0907866320245652e-05, + "loss": 1.2505, + "step": 16584 + }, + { + "epoch": 0.4869634153502848, + "grad_norm": 0.0, + "learning_rate": 1.090691928289703e-05, + "loss": 1.3838, + "step": 16585 + }, + { + "epoch": 0.48699277702742383, + "grad_norm": 0.0, + "learning_rate": 1.0905972237346906e-05, + "loss": 1.2935, + "step": 16586 + }, + { + "epoch": 0.4870221387045628, + "grad_norm": 0.0, + "learning_rate": 1.0905025183603844e-05, + "loss": 1.1616, + "step": 16587 + }, + { + "epoch": 0.4870515003817018, + "grad_norm": 0.0, + "learning_rate": 1.0904078121676408e-05, + "loss": 1.2441, + "step": 16588 + }, + { + "epoch": 0.4870808620588408, + "grad_norm": 0.0, + "learning_rate": 1.090313105157316e-05, + "loss": 1.2832, + "step": 16589 + }, + { + "epoch": 0.4871102237359798, + "grad_norm": 0.0, + "learning_rate": 1.0902183973302667e-05, + "loss": 1.3027, + "step": 16590 + }, + { + "epoch": 0.4871395854131188, + "grad_norm": 0.0, + "learning_rate": 1.0901236886873494e-05, + "loss": 1.374, + "step": 16591 + }, + { + "epoch": 0.4871689470902578, + "grad_norm": 0.0, + "learning_rate": 1.0900289792294205e-05, + "loss": 1.1929, + "step": 16592 + }, + { + "epoch": 0.4871983087673968, + "grad_norm": 0.0, + "learning_rate": 1.0899342689573365e-05, + "loss": 1.3242, + "step": 16593 + }, + { + "epoch": 0.4872276704445358, + "grad_norm": 0.0, + "learning_rate": 1.0898395578719539e-05, + "loss": 1.4814, + "step": 16594 + }, + { + "epoch": 0.4872570321216748, + "grad_norm": 0.0, + "learning_rate": 1.0897448459741289e-05, + "loss": 1.3711, + "step": 16595 + }, + { + "epoch": 0.4872863937988138, + "grad_norm": 0.0, + "learning_rate": 1.0896501332647186e-05, + "loss": 1.3594, + "step": 16596 + }, + { + "epoch": 0.4873157554759528, + "grad_norm": 0.0, + "learning_rate": 1.089555419744579e-05, + "loss": 1.3584, + "step": 16597 + }, + { + "epoch": 0.4873451171530918, + "grad_norm": 0.0, + "learning_rate": 1.089460705414567e-05, + "loss": 1.3027, + "step": 16598 + }, + { + "epoch": 0.4873744788302308, + "grad_norm": 0.0, + "learning_rate": 1.0893659902755384e-05, + "loss": 1.2285, + "step": 16599 + }, + { + "epoch": 0.4874038405073698, + "grad_norm": 0.0, + "learning_rate": 1.089271274328351e-05, + "loss": 1.4121, + "step": 16600 + }, + { + "epoch": 0.48743320218450875, + "grad_norm": 0.0, + "learning_rate": 1.0891765575738605e-05, + "loss": 1.1943, + "step": 16601 + }, + { + "epoch": 0.48746256386164777, + "grad_norm": 0.0, + "learning_rate": 1.089081840012923e-05, + "loss": 1.2656, + "step": 16602 + }, + { + "epoch": 0.4874919255387868, + "grad_norm": 0.0, + "learning_rate": 1.088987121646396e-05, + "loss": 1.2085, + "step": 16603 + }, + { + "epoch": 0.48752128721592575, + "grad_norm": 0.0, + "learning_rate": 1.0888924024751357e-05, + "loss": 1.2881, + "step": 16604 + }, + { + "epoch": 0.48755064889306476, + "grad_norm": 0.0, + "learning_rate": 1.0887976824999985e-05, + "loss": 1.2803, + "step": 16605 + }, + { + "epoch": 0.4875800105702038, + "grad_norm": 0.0, + "learning_rate": 1.088702961721841e-05, + "loss": 1.3447, + "step": 16606 + }, + { + "epoch": 0.48760937224734274, + "grad_norm": 0.0, + "learning_rate": 1.0886082401415202e-05, + "loss": 1.2607, + "step": 16607 + }, + { + "epoch": 0.48763873392448176, + "grad_norm": 0.0, + "learning_rate": 1.0885135177598922e-05, + "loss": 1.2422, + "step": 16608 + }, + { + "epoch": 0.4876680956016208, + "grad_norm": 0.0, + "learning_rate": 1.0884187945778137e-05, + "loss": 1.3076, + "step": 16609 + }, + { + "epoch": 0.48769745727875974, + "grad_norm": 0.0, + "learning_rate": 1.0883240705961418e-05, + "loss": 1.4717, + "step": 16610 + }, + { + "epoch": 0.48772681895589876, + "grad_norm": 0.0, + "learning_rate": 1.0882293458157323e-05, + "loss": 1.248, + "step": 16611 + }, + { + "epoch": 0.4877561806330378, + "grad_norm": 0.0, + "learning_rate": 1.0881346202374422e-05, + "loss": 1.3955, + "step": 16612 + }, + { + "epoch": 0.48778554231017673, + "grad_norm": 0.0, + "learning_rate": 1.0880398938621286e-05, + "loss": 1.3496, + "step": 16613 + }, + { + "epoch": 0.48781490398731575, + "grad_norm": 0.0, + "learning_rate": 1.0879451666906474e-05, + "loss": 1.3613, + "step": 16614 + }, + { + "epoch": 0.48784426566445477, + "grad_norm": 0.0, + "learning_rate": 1.0878504387238552e-05, + "loss": 1.2607, + "step": 16615 + }, + { + "epoch": 0.48787362734159373, + "grad_norm": 0.0, + "learning_rate": 1.0877557099626096e-05, + "loss": 1.2627, + "step": 16616 + }, + { + "epoch": 0.48790298901873275, + "grad_norm": 0.0, + "learning_rate": 1.0876609804077665e-05, + "loss": 1.3193, + "step": 16617 + }, + { + "epoch": 0.48793235069587176, + "grad_norm": 0.0, + "learning_rate": 1.0875662500601824e-05, + "loss": 1.2998, + "step": 16618 + }, + { + "epoch": 0.4879617123730107, + "grad_norm": 0.0, + "learning_rate": 1.0874715189207144e-05, + "loss": 1.1792, + "step": 16619 + }, + { + "epoch": 0.48799107405014974, + "grad_norm": 0.0, + "learning_rate": 1.0873767869902192e-05, + "loss": 1.4463, + "step": 16620 + }, + { + "epoch": 0.48802043572728876, + "grad_norm": 0.0, + "learning_rate": 1.0872820542695534e-05, + "loss": 1.2783, + "step": 16621 + }, + { + "epoch": 0.4880497974044277, + "grad_norm": 0.0, + "learning_rate": 1.0871873207595733e-05, + "loss": 1.3896, + "step": 16622 + }, + { + "epoch": 0.48807915908156674, + "grad_norm": 0.0, + "learning_rate": 1.087092586461136e-05, + "loss": 1.3271, + "step": 16623 + }, + { + "epoch": 0.48810852075870576, + "grad_norm": 0.0, + "learning_rate": 1.0869978513750982e-05, + "loss": 1.2637, + "step": 16624 + }, + { + "epoch": 0.4881378824358447, + "grad_norm": 0.0, + "learning_rate": 1.0869031155023169e-05, + "loss": 1.3525, + "step": 16625 + }, + { + "epoch": 0.48816724411298373, + "grad_norm": 0.0, + "learning_rate": 1.086808378843648e-05, + "loss": 1.335, + "step": 16626 + }, + { + "epoch": 0.48819660579012275, + "grad_norm": 0.0, + "learning_rate": 1.0867136413999489e-05, + "loss": 1.3164, + "step": 16627 + }, + { + "epoch": 0.4882259674672617, + "grad_norm": 0.0, + "learning_rate": 1.086618903172076e-05, + "loss": 1.2803, + "step": 16628 + }, + { + "epoch": 0.48825532914440073, + "grad_norm": 0.0, + "learning_rate": 1.0865241641608863e-05, + "loss": 1.2803, + "step": 16629 + }, + { + "epoch": 0.48828469082153975, + "grad_norm": 0.0, + "learning_rate": 1.0864294243672367e-05, + "loss": 1.2852, + "step": 16630 + }, + { + "epoch": 0.4883140524986787, + "grad_norm": 0.0, + "learning_rate": 1.086334683791983e-05, + "loss": 1.2856, + "step": 16631 + }, + { + "epoch": 0.4883434141758177, + "grad_norm": 0.0, + "learning_rate": 1.086239942435983e-05, + "loss": 1.2793, + "step": 16632 + }, + { + "epoch": 0.48837277585295674, + "grad_norm": 0.0, + "learning_rate": 1.0861452003000934e-05, + "loss": 1.249, + "step": 16633 + }, + { + "epoch": 0.4884021375300957, + "grad_norm": 0.0, + "learning_rate": 1.0860504573851703e-05, + "loss": 1.2959, + "step": 16634 + }, + { + "epoch": 0.4884314992072347, + "grad_norm": 0.0, + "learning_rate": 1.0859557136920708e-05, + "loss": 1.3389, + "step": 16635 + }, + { + "epoch": 0.48846086088437374, + "grad_norm": 0.0, + "learning_rate": 1.085860969221652e-05, + "loss": 1.3135, + "step": 16636 + }, + { + "epoch": 0.4884902225615127, + "grad_norm": 0.0, + "learning_rate": 1.0857662239747706e-05, + "loss": 1.3828, + "step": 16637 + }, + { + "epoch": 0.4885195842386517, + "grad_norm": 0.0, + "learning_rate": 1.085671477952283e-05, + "loss": 1.2773, + "step": 16638 + }, + { + "epoch": 0.48854894591579073, + "grad_norm": 0.0, + "learning_rate": 1.0855767311550463e-05, + "loss": 1.2959, + "step": 16639 + }, + { + "epoch": 0.4885783075929297, + "grad_norm": 0.0, + "learning_rate": 1.0854819835839177e-05, + "loss": 1.2832, + "step": 16640 + }, + { + "epoch": 0.4886076692700687, + "grad_norm": 0.0, + "learning_rate": 1.0853872352397532e-05, + "loss": 1.3721, + "step": 16641 + }, + { + "epoch": 0.48863703094720773, + "grad_norm": 0.0, + "learning_rate": 1.0852924861234101e-05, + "loss": 1.2861, + "step": 16642 + }, + { + "epoch": 0.4886663926243467, + "grad_norm": 0.0, + "learning_rate": 1.0851977362357453e-05, + "loss": 1.3794, + "step": 16643 + }, + { + "epoch": 0.4886957543014857, + "grad_norm": 0.0, + "learning_rate": 1.0851029855776156e-05, + "loss": 1.2573, + "step": 16644 + }, + { + "epoch": 0.4887251159786247, + "grad_norm": 0.0, + "learning_rate": 1.0850082341498777e-05, + "loss": 1.2051, + "step": 16645 + }, + { + "epoch": 0.4887544776557637, + "grad_norm": 0.0, + "learning_rate": 1.0849134819533886e-05, + "loss": 1.2598, + "step": 16646 + }, + { + "epoch": 0.4887838393329027, + "grad_norm": 0.0, + "learning_rate": 1.084818728989005e-05, + "loss": 1.4209, + "step": 16647 + }, + { + "epoch": 0.4888132010100417, + "grad_norm": 0.0, + "learning_rate": 1.084723975257584e-05, + "loss": 1.3252, + "step": 16648 + }, + { + "epoch": 0.4888425626871807, + "grad_norm": 0.0, + "learning_rate": 1.0846292207599826e-05, + "loss": 1.2256, + "step": 16649 + }, + { + "epoch": 0.4888719243643197, + "grad_norm": 0.0, + "learning_rate": 1.0845344654970576e-05, + "loss": 1.2793, + "step": 16650 + }, + { + "epoch": 0.4889012860414587, + "grad_norm": 0.0, + "learning_rate": 1.0844397094696655e-05, + "loss": 1.3818, + "step": 16651 + }, + { + "epoch": 0.4889306477185977, + "grad_norm": 0.0, + "learning_rate": 1.0843449526786633e-05, + "loss": 1.3555, + "step": 16652 + }, + { + "epoch": 0.4889600093957367, + "grad_norm": 0.0, + "learning_rate": 1.0842501951249082e-05, + "loss": 1.249, + "step": 16653 + }, + { + "epoch": 0.48898937107287566, + "grad_norm": 0.0, + "learning_rate": 1.0841554368092572e-05, + "loss": 1.3779, + "step": 16654 + }, + { + "epoch": 0.4890187327500147, + "grad_norm": 0.0, + "learning_rate": 1.084060677732567e-05, + "loss": 1.4023, + "step": 16655 + }, + { + "epoch": 0.4890480944271537, + "grad_norm": 0.0, + "learning_rate": 1.0839659178956942e-05, + "loss": 1.4092, + "step": 16656 + }, + { + "epoch": 0.48907745610429265, + "grad_norm": 0.0, + "learning_rate": 1.0838711572994967e-05, + "loss": 1.4229, + "step": 16657 + }, + { + "epoch": 0.48910681778143167, + "grad_norm": 0.0, + "learning_rate": 1.0837763959448304e-05, + "loss": 1.373, + "step": 16658 + }, + { + "epoch": 0.4891361794585707, + "grad_norm": 0.0, + "learning_rate": 1.0836816338325532e-05, + "loss": 1.3457, + "step": 16659 + }, + { + "epoch": 0.48916554113570965, + "grad_norm": 0.0, + "learning_rate": 1.083586870963521e-05, + "loss": 1.2422, + "step": 16660 + }, + { + "epoch": 0.48919490281284866, + "grad_norm": 0.0, + "learning_rate": 1.0834921073385914e-05, + "loss": 1.2285, + "step": 16661 + }, + { + "epoch": 0.4892242644899877, + "grad_norm": 0.0, + "learning_rate": 1.0833973429586215e-05, + "loss": 1.3232, + "step": 16662 + }, + { + "epoch": 0.48925362616712664, + "grad_norm": 0.0, + "learning_rate": 1.083302577824468e-05, + "loss": 1.3564, + "step": 16663 + }, + { + "epoch": 0.48928298784426566, + "grad_norm": 0.0, + "learning_rate": 1.0832078119369877e-05, + "loss": 1.229, + "step": 16664 + }, + { + "epoch": 0.4893123495214047, + "grad_norm": 0.0, + "learning_rate": 1.083113045297038e-05, + "loss": 1.3623, + "step": 16665 + }, + { + "epoch": 0.48934171119854364, + "grad_norm": 0.0, + "learning_rate": 1.0830182779054761e-05, + "loss": 1.2969, + "step": 16666 + }, + { + "epoch": 0.48937107287568266, + "grad_norm": 0.0, + "learning_rate": 1.0829235097631583e-05, + "loss": 1.3545, + "step": 16667 + }, + { + "epoch": 0.4894004345528217, + "grad_norm": 0.0, + "learning_rate": 1.082828740870942e-05, + "loss": 1.4482, + "step": 16668 + }, + { + "epoch": 0.48942979622996063, + "grad_norm": 0.0, + "learning_rate": 1.0827339712296843e-05, + "loss": 1.3223, + "step": 16669 + }, + { + "epoch": 0.48945915790709965, + "grad_norm": 0.0, + "learning_rate": 1.0826392008402423e-05, + "loss": 1.2388, + "step": 16670 + }, + { + "epoch": 0.48948851958423867, + "grad_norm": 0.0, + "learning_rate": 1.0825444297034724e-05, + "loss": 1.3008, + "step": 16671 + }, + { + "epoch": 0.48951788126137763, + "grad_norm": 0.0, + "learning_rate": 1.0824496578202322e-05, + "loss": 1.3408, + "step": 16672 + }, + { + "epoch": 0.48954724293851665, + "grad_norm": 0.0, + "learning_rate": 1.0823548851913785e-05, + "loss": 1.3521, + "step": 16673 + }, + { + "epoch": 0.48957660461565566, + "grad_norm": 0.0, + "learning_rate": 1.0822601118177688e-05, + "loss": 1.3555, + "step": 16674 + }, + { + "epoch": 0.4896059662927946, + "grad_norm": 0.0, + "learning_rate": 1.0821653377002596e-05, + "loss": 1.3135, + "step": 16675 + }, + { + "epoch": 0.48963532796993364, + "grad_norm": 0.0, + "learning_rate": 1.0820705628397084e-05, + "loss": 1.3535, + "step": 16676 + }, + { + "epoch": 0.48966468964707266, + "grad_norm": 0.0, + "learning_rate": 1.0819757872369719e-05, + "loss": 1.3711, + "step": 16677 + }, + { + "epoch": 0.4896940513242116, + "grad_norm": 0.0, + "learning_rate": 1.0818810108929073e-05, + "loss": 1.4014, + "step": 16678 + }, + { + "epoch": 0.48972341300135064, + "grad_norm": 0.0, + "learning_rate": 1.0817862338083722e-05, + "loss": 1.3848, + "step": 16679 + }, + { + "epoch": 0.48975277467848966, + "grad_norm": 0.0, + "learning_rate": 1.0816914559842228e-05, + "loss": 1.4609, + "step": 16680 + }, + { + "epoch": 0.4897821363556286, + "grad_norm": 0.0, + "learning_rate": 1.0815966774213167e-05, + "loss": 1.2998, + "step": 16681 + }, + { + "epoch": 0.48981149803276763, + "grad_norm": 0.0, + "learning_rate": 1.0815018981205112e-05, + "loss": 1.3496, + "step": 16682 + }, + { + "epoch": 0.48984085970990665, + "grad_norm": 0.0, + "learning_rate": 1.0814071180826631e-05, + "loss": 1.2422, + "step": 16683 + }, + { + "epoch": 0.4898702213870456, + "grad_norm": 0.0, + "learning_rate": 1.0813123373086295e-05, + "loss": 1.4355, + "step": 16684 + }, + { + "epoch": 0.48989958306418463, + "grad_norm": 0.0, + "learning_rate": 1.0812175557992672e-05, + "loss": 1.4365, + "step": 16685 + }, + { + "epoch": 0.48992894474132365, + "grad_norm": 0.0, + "learning_rate": 1.0811227735554344e-05, + "loss": 1.2969, + "step": 16686 + }, + { + "epoch": 0.4899583064184626, + "grad_norm": 0.0, + "learning_rate": 1.0810279905779872e-05, + "loss": 1.4082, + "step": 16687 + }, + { + "epoch": 0.4899876680956016, + "grad_norm": 0.0, + "learning_rate": 1.0809332068677832e-05, + "loss": 1.3496, + "step": 16688 + }, + { + "epoch": 0.49001702977274064, + "grad_norm": 0.0, + "learning_rate": 1.0808384224256797e-05, + "loss": 1.2783, + "step": 16689 + }, + { + "epoch": 0.4900463914498796, + "grad_norm": 0.0, + "learning_rate": 1.0807436372525334e-05, + "loss": 1.2969, + "step": 16690 + }, + { + "epoch": 0.4900757531270186, + "grad_norm": 0.0, + "learning_rate": 1.080648851349202e-05, + "loss": 1.3574, + "step": 16691 + }, + { + "epoch": 0.49010511480415764, + "grad_norm": 0.0, + "learning_rate": 1.080554064716542e-05, + "loss": 1.2402, + "step": 16692 + }, + { + "epoch": 0.4901344764812966, + "grad_norm": 0.0, + "learning_rate": 1.080459277355411e-05, + "loss": 1.3896, + "step": 16693 + }, + { + "epoch": 0.4901638381584356, + "grad_norm": 0.0, + "learning_rate": 1.0803644892666664e-05, + "loss": 1.2725, + "step": 16694 + }, + { + "epoch": 0.49019319983557463, + "grad_norm": 0.0, + "learning_rate": 1.080269700451165e-05, + "loss": 1.1963, + "step": 16695 + }, + { + "epoch": 0.4902225615127136, + "grad_norm": 0.0, + "learning_rate": 1.0801749109097641e-05, + "loss": 1.2705, + "step": 16696 + }, + { + "epoch": 0.4902519231898526, + "grad_norm": 0.0, + "learning_rate": 1.080080120643321e-05, + "loss": 1.1616, + "step": 16697 + }, + { + "epoch": 0.49028128486699163, + "grad_norm": 0.0, + "learning_rate": 1.079985329652693e-05, + "loss": 1.2793, + "step": 16698 + }, + { + "epoch": 0.4903106465441306, + "grad_norm": 0.0, + "learning_rate": 1.079890537938737e-05, + "loss": 1.2695, + "step": 16699 + }, + { + "epoch": 0.4903400082212696, + "grad_norm": 0.0, + "learning_rate": 1.0797957455023106e-05, + "loss": 1.3887, + "step": 16700 + }, + { + "epoch": 0.4903693698984086, + "grad_norm": 0.0, + "learning_rate": 1.0797009523442705e-05, + "loss": 1.4795, + "step": 16701 + }, + { + "epoch": 0.4903987315755476, + "grad_norm": 0.0, + "learning_rate": 1.0796061584654742e-05, + "loss": 1.3252, + "step": 16702 + }, + { + "epoch": 0.4904280932526866, + "grad_norm": 0.0, + "learning_rate": 1.0795113638667796e-05, + "loss": 1.2637, + "step": 16703 + }, + { + "epoch": 0.49045745492982556, + "grad_norm": 0.0, + "learning_rate": 1.079416568549043e-05, + "loss": 1.3896, + "step": 16704 + }, + { + "epoch": 0.4904868166069646, + "grad_norm": 0.0, + "learning_rate": 1.079321772513122e-05, + "loss": 1.3721, + "step": 16705 + }, + { + "epoch": 0.4905161782841036, + "grad_norm": 0.0, + "learning_rate": 1.0792269757598738e-05, + "loss": 1.3701, + "step": 16706 + }, + { + "epoch": 0.49054553996124256, + "grad_norm": 0.0, + "learning_rate": 1.0791321782901562e-05, + "loss": 1.2476, + "step": 16707 + }, + { + "epoch": 0.4905749016383816, + "grad_norm": 0.0, + "learning_rate": 1.0790373801048257e-05, + "loss": 1.2627, + "step": 16708 + }, + { + "epoch": 0.4906042633155206, + "grad_norm": 0.0, + "learning_rate": 1.0789425812047402e-05, + "loss": 1.3496, + "step": 16709 + }, + { + "epoch": 0.49063362499265956, + "grad_norm": 0.0, + "learning_rate": 1.0788477815907565e-05, + "loss": 1.2173, + "step": 16710 + }, + { + "epoch": 0.4906629866697986, + "grad_norm": 0.0, + "learning_rate": 1.0787529812637323e-05, + "loss": 1.3311, + "step": 16711 + }, + { + "epoch": 0.4906923483469376, + "grad_norm": 0.0, + "learning_rate": 1.0786581802245244e-05, + "loss": 1.2295, + "step": 16712 + }, + { + "epoch": 0.49072171002407655, + "grad_norm": 0.0, + "learning_rate": 1.0785633784739906e-05, + "loss": 1.3926, + "step": 16713 + }, + { + "epoch": 0.49075107170121557, + "grad_norm": 0.0, + "learning_rate": 1.078468576012988e-05, + "loss": 1.3037, + "step": 16714 + }, + { + "epoch": 0.4907804333783546, + "grad_norm": 0.0, + "learning_rate": 1.0783737728423743e-05, + "loss": 1.4346, + "step": 16715 + }, + { + "epoch": 0.49080979505549355, + "grad_norm": 0.0, + "learning_rate": 1.078278968963006e-05, + "loss": 1.2461, + "step": 16716 + }, + { + "epoch": 0.49083915673263256, + "grad_norm": 0.0, + "learning_rate": 1.0781841643757411e-05, + "loss": 1.4209, + "step": 16717 + }, + { + "epoch": 0.4908685184097716, + "grad_norm": 0.0, + "learning_rate": 1.0780893590814367e-05, + "loss": 1.2842, + "step": 16718 + }, + { + "epoch": 0.49089788008691054, + "grad_norm": 0.0, + "learning_rate": 1.0779945530809508e-05, + "loss": 1.1992, + "step": 16719 + }, + { + "epoch": 0.49092724176404956, + "grad_norm": 0.0, + "learning_rate": 1.0778997463751397e-05, + "loss": 1.3032, + "step": 16720 + }, + { + "epoch": 0.4909566034411886, + "grad_norm": 0.0, + "learning_rate": 1.0778049389648613e-05, + "loss": 1.334, + "step": 16721 + }, + { + "epoch": 0.49098596511832754, + "grad_norm": 0.0, + "learning_rate": 1.0777101308509728e-05, + "loss": 1.3633, + "step": 16722 + }, + { + "epoch": 0.49101532679546656, + "grad_norm": 0.0, + "learning_rate": 1.0776153220343319e-05, + "loss": 1.4307, + "step": 16723 + }, + { + "epoch": 0.4910446884726056, + "grad_norm": 0.0, + "learning_rate": 1.0775205125157958e-05, + "loss": 1.4102, + "step": 16724 + }, + { + "epoch": 0.49107405014974453, + "grad_norm": 0.0, + "learning_rate": 1.0774257022962216e-05, + "loss": 1.3076, + "step": 16725 + }, + { + "epoch": 0.49110341182688355, + "grad_norm": 0.0, + "learning_rate": 1.0773308913764669e-05, + "loss": 1.3242, + "step": 16726 + }, + { + "epoch": 0.49113277350402257, + "grad_norm": 0.0, + "learning_rate": 1.0772360797573895e-05, + "loss": 1.3984, + "step": 16727 + }, + { + "epoch": 0.49116213518116153, + "grad_norm": 0.0, + "learning_rate": 1.0771412674398465e-05, + "loss": 1.5215, + "step": 16728 + }, + { + "epoch": 0.49119149685830055, + "grad_norm": 0.0, + "learning_rate": 1.0770464544246949e-05, + "loss": 1.333, + "step": 16729 + }, + { + "epoch": 0.49122085853543956, + "grad_norm": 0.0, + "learning_rate": 1.0769516407127924e-05, + "loss": 1.377, + "step": 16730 + }, + { + "epoch": 0.4912502202125785, + "grad_norm": 0.0, + "learning_rate": 1.0768568263049968e-05, + "loss": 1.2812, + "step": 16731 + }, + { + "epoch": 0.49127958188971754, + "grad_norm": 0.0, + "learning_rate": 1.0767620112021652e-05, + "loss": 1.2544, + "step": 16732 + }, + { + "epoch": 0.49130894356685656, + "grad_norm": 0.0, + "learning_rate": 1.0766671954051549e-05, + "loss": 1.2295, + "step": 16733 + }, + { + "epoch": 0.4913383052439955, + "grad_norm": 0.0, + "learning_rate": 1.0765723789148237e-05, + "loss": 1.3799, + "step": 16734 + }, + { + "epoch": 0.49136766692113454, + "grad_norm": 0.0, + "learning_rate": 1.076477561732029e-05, + "loss": 1.4062, + "step": 16735 + }, + { + "epoch": 0.49139702859827356, + "grad_norm": 0.0, + "learning_rate": 1.0763827438576278e-05, + "loss": 1.3018, + "step": 16736 + }, + { + "epoch": 0.4914263902754125, + "grad_norm": 0.0, + "learning_rate": 1.0762879252924777e-05, + "loss": 1.2812, + "step": 16737 + }, + { + "epoch": 0.49145575195255153, + "grad_norm": 0.0, + "learning_rate": 1.0761931060374368e-05, + "loss": 1.2451, + "step": 16738 + }, + { + "epoch": 0.49148511362969055, + "grad_norm": 0.0, + "learning_rate": 1.076098286093362e-05, + "loss": 1.4248, + "step": 16739 + }, + { + "epoch": 0.4915144753068295, + "grad_norm": 0.0, + "learning_rate": 1.076003465461111e-05, + "loss": 1.4043, + "step": 16740 + }, + { + "epoch": 0.49154383698396853, + "grad_norm": 0.0, + "learning_rate": 1.0759086441415412e-05, + "loss": 1.2349, + "step": 16741 + }, + { + "epoch": 0.49157319866110755, + "grad_norm": 0.0, + "learning_rate": 1.0758138221355098e-05, + "loss": 1.3398, + "step": 16742 + }, + { + "epoch": 0.4916025603382465, + "grad_norm": 0.0, + "learning_rate": 1.0757189994438748e-05, + "loss": 1.4062, + "step": 16743 + }, + { + "epoch": 0.4916319220153855, + "grad_norm": 0.0, + "learning_rate": 1.0756241760674934e-05, + "loss": 1.208, + "step": 16744 + }, + { + "epoch": 0.49166128369252454, + "grad_norm": 0.0, + "learning_rate": 1.0755293520072234e-05, + "loss": 1.2148, + "step": 16745 + }, + { + "epoch": 0.4916906453696635, + "grad_norm": 0.0, + "learning_rate": 1.0754345272639219e-05, + "loss": 1.3848, + "step": 16746 + }, + { + "epoch": 0.4917200070468025, + "grad_norm": 0.0, + "learning_rate": 1.0753397018384468e-05, + "loss": 1.2002, + "step": 16747 + }, + { + "epoch": 0.49174936872394154, + "grad_norm": 0.0, + "learning_rate": 1.0752448757316558e-05, + "loss": 1.2612, + "step": 16748 + }, + { + "epoch": 0.4917787304010805, + "grad_norm": 0.0, + "learning_rate": 1.0751500489444055e-05, + "loss": 1.2637, + "step": 16749 + }, + { + "epoch": 0.4918080920782195, + "grad_norm": 0.0, + "learning_rate": 1.0750552214775543e-05, + "loss": 1.2646, + "step": 16750 + }, + { + "epoch": 0.49183745375535853, + "grad_norm": 0.0, + "learning_rate": 1.0749603933319595e-05, + "loss": 1.3594, + "step": 16751 + }, + { + "epoch": 0.4918668154324975, + "grad_norm": 0.0, + "learning_rate": 1.074865564508479e-05, + "loss": 1.3398, + "step": 16752 + }, + { + "epoch": 0.4918961771096365, + "grad_norm": 0.0, + "learning_rate": 1.0747707350079694e-05, + "loss": 1.375, + "step": 16753 + }, + { + "epoch": 0.4919255387867755, + "grad_norm": 0.0, + "learning_rate": 1.0746759048312892e-05, + "loss": 1.3369, + "step": 16754 + }, + { + "epoch": 0.4919549004639145, + "grad_norm": 0.0, + "learning_rate": 1.0745810739792957e-05, + "loss": 1.2178, + "step": 16755 + }, + { + "epoch": 0.4919842621410535, + "grad_norm": 0.0, + "learning_rate": 1.0744862424528467e-05, + "loss": 1.3042, + "step": 16756 + }, + { + "epoch": 0.49201362381819247, + "grad_norm": 0.0, + "learning_rate": 1.074391410252799e-05, + "loss": 1.1904, + "step": 16757 + }, + { + "epoch": 0.4920429854953315, + "grad_norm": 0.0, + "learning_rate": 1.0742965773800111e-05, + "loss": 1.2627, + "step": 16758 + }, + { + "epoch": 0.4920723471724705, + "grad_norm": 0.0, + "learning_rate": 1.07420174383534e-05, + "loss": 1.2891, + "step": 16759 + }, + { + "epoch": 0.49210170884960946, + "grad_norm": 0.0, + "learning_rate": 1.0741069096196439e-05, + "loss": 1.3799, + "step": 16760 + }, + { + "epoch": 0.4921310705267485, + "grad_norm": 0.0, + "learning_rate": 1.0740120747337797e-05, + "loss": 1.2637, + "step": 16761 + }, + { + "epoch": 0.4921604322038875, + "grad_norm": 0.0, + "learning_rate": 1.0739172391786052e-05, + "loss": 1.3535, + "step": 16762 + }, + { + "epoch": 0.49218979388102646, + "grad_norm": 0.0, + "learning_rate": 1.0738224029549783e-05, + "loss": 1.2573, + "step": 16763 + }, + { + "epoch": 0.4922191555581655, + "grad_norm": 0.0, + "learning_rate": 1.073727566063757e-05, + "loss": 1.2568, + "step": 16764 + }, + { + "epoch": 0.4922485172353045, + "grad_norm": 0.0, + "learning_rate": 1.073632728505798e-05, + "loss": 1.3164, + "step": 16765 + }, + { + "epoch": 0.49227787891244346, + "grad_norm": 0.0, + "learning_rate": 1.0735378902819593e-05, + "loss": 1.377, + "step": 16766 + }, + { + "epoch": 0.4923072405895825, + "grad_norm": 0.0, + "learning_rate": 1.0734430513930987e-05, + "loss": 1.3457, + "step": 16767 + }, + { + "epoch": 0.4923366022667215, + "grad_norm": 0.0, + "learning_rate": 1.073348211840074e-05, + "loss": 1.3018, + "step": 16768 + }, + { + "epoch": 0.49236596394386045, + "grad_norm": 0.0, + "learning_rate": 1.0732533716237426e-05, + "loss": 1.2637, + "step": 16769 + }, + { + "epoch": 0.49239532562099947, + "grad_norm": 0.0, + "learning_rate": 1.0731585307449619e-05, + "loss": 1.5391, + "step": 16770 + }, + { + "epoch": 0.4924246872981385, + "grad_norm": 0.0, + "learning_rate": 1.07306368920459e-05, + "loss": 1.2471, + "step": 16771 + }, + { + "epoch": 0.49245404897527745, + "grad_norm": 0.0, + "learning_rate": 1.0729688470034848e-05, + "loss": 1.4482, + "step": 16772 + }, + { + "epoch": 0.49248341065241646, + "grad_norm": 0.0, + "learning_rate": 1.072874004142503e-05, + "loss": 1.2412, + "step": 16773 + }, + { + "epoch": 0.4925127723295555, + "grad_norm": 0.0, + "learning_rate": 1.0727791606225032e-05, + "loss": 1.2969, + "step": 16774 + }, + { + "epoch": 0.49254213400669444, + "grad_norm": 0.0, + "learning_rate": 1.0726843164443428e-05, + "loss": 1.3584, + "step": 16775 + }, + { + "epoch": 0.49257149568383346, + "grad_norm": 0.0, + "learning_rate": 1.0725894716088796e-05, + "loss": 1.2764, + "step": 16776 + }, + { + "epoch": 0.4926008573609725, + "grad_norm": 0.0, + "learning_rate": 1.0724946261169711e-05, + "loss": 1.2842, + "step": 16777 + }, + { + "epoch": 0.49263021903811144, + "grad_norm": 0.0, + "learning_rate": 1.0723997799694754e-05, + "loss": 1.3647, + "step": 16778 + }, + { + "epoch": 0.49265958071525046, + "grad_norm": 0.0, + "learning_rate": 1.0723049331672498e-05, + "loss": 1.2656, + "step": 16779 + }, + { + "epoch": 0.4926889423923895, + "grad_norm": 0.0, + "learning_rate": 1.072210085711152e-05, + "loss": 1.252, + "step": 16780 + }, + { + "epoch": 0.49271830406952843, + "grad_norm": 0.0, + "learning_rate": 1.07211523760204e-05, + "loss": 1.2764, + "step": 16781 + }, + { + "epoch": 0.49274766574666745, + "grad_norm": 0.0, + "learning_rate": 1.0720203888407714e-05, + "loss": 1.2598, + "step": 16782 + }, + { + "epoch": 0.49277702742380647, + "grad_norm": 0.0, + "learning_rate": 1.071925539428204e-05, + "loss": 1.3887, + "step": 16783 + }, + { + "epoch": 0.49280638910094543, + "grad_norm": 0.0, + "learning_rate": 1.0718306893651957e-05, + "loss": 1.2559, + "step": 16784 + }, + { + "epoch": 0.49283575077808445, + "grad_norm": 0.0, + "learning_rate": 1.071735838652604e-05, + "loss": 1.3721, + "step": 16785 + }, + { + "epoch": 0.49286511245522346, + "grad_norm": 0.0, + "learning_rate": 1.0716409872912866e-05, + "loss": 1.417, + "step": 16786 + }, + { + "epoch": 0.4928944741323624, + "grad_norm": 0.0, + "learning_rate": 1.0715461352821018e-05, + "loss": 1.334, + "step": 16787 + }, + { + "epoch": 0.49292383580950144, + "grad_norm": 0.0, + "learning_rate": 1.0714512826259064e-05, + "loss": 1.3594, + "step": 16788 + }, + { + "epoch": 0.49295319748664046, + "grad_norm": 0.0, + "learning_rate": 1.0713564293235591e-05, + "loss": 1.3774, + "step": 16789 + }, + { + "epoch": 0.4929825591637794, + "grad_norm": 0.0, + "learning_rate": 1.0712615753759172e-05, + "loss": 1.3848, + "step": 16790 + }, + { + "epoch": 0.49301192084091844, + "grad_norm": 0.0, + "learning_rate": 1.0711667207838386e-05, + "loss": 1.3262, + "step": 16791 + }, + { + "epoch": 0.49304128251805746, + "grad_norm": 0.0, + "learning_rate": 1.0710718655481811e-05, + "loss": 1.3242, + "step": 16792 + }, + { + "epoch": 0.4930706441951964, + "grad_norm": 0.0, + "learning_rate": 1.0709770096698028e-05, + "loss": 1.3135, + "step": 16793 + }, + { + "epoch": 0.49310000587233543, + "grad_norm": 0.0, + "learning_rate": 1.070882153149561e-05, + "loss": 1.2441, + "step": 16794 + }, + { + "epoch": 0.49312936754947445, + "grad_norm": 0.0, + "learning_rate": 1.0707872959883136e-05, + "loss": 1.3936, + "step": 16795 + }, + { + "epoch": 0.4931587292266134, + "grad_norm": 0.0, + "learning_rate": 1.0706924381869188e-05, + "loss": 1.2891, + "step": 16796 + }, + { + "epoch": 0.49318809090375243, + "grad_norm": 0.0, + "learning_rate": 1.0705975797462345e-05, + "loss": 1.4355, + "step": 16797 + }, + { + "epoch": 0.49321745258089145, + "grad_norm": 0.0, + "learning_rate": 1.0705027206671178e-05, + "loss": 1.3896, + "step": 16798 + }, + { + "epoch": 0.4932468142580304, + "grad_norm": 0.0, + "learning_rate": 1.0704078609504266e-05, + "loss": 1.2617, + "step": 16799 + }, + { + "epoch": 0.4932761759351694, + "grad_norm": 0.0, + "learning_rate": 1.0703130005970194e-05, + "loss": 1.333, + "step": 16800 + }, + { + "epoch": 0.49330553761230844, + "grad_norm": 0.0, + "learning_rate": 1.070218139607754e-05, + "loss": 1.2949, + "step": 16801 + }, + { + "epoch": 0.4933348992894474, + "grad_norm": 0.0, + "learning_rate": 1.0701232779834875e-05, + "loss": 1.3145, + "step": 16802 + }, + { + "epoch": 0.4933642609665864, + "grad_norm": 0.0, + "learning_rate": 1.0700284157250783e-05, + "loss": 1.4424, + "step": 16803 + }, + { + "epoch": 0.4933936226437254, + "grad_norm": 0.0, + "learning_rate": 1.0699335528333845e-05, + "loss": 1.2881, + "step": 16804 + }, + { + "epoch": 0.4934229843208644, + "grad_norm": 0.0, + "learning_rate": 1.0698386893092638e-05, + "loss": 1.4346, + "step": 16805 + }, + { + "epoch": 0.4934523459980034, + "grad_norm": 0.0, + "learning_rate": 1.0697438251535735e-05, + "loss": 1.3809, + "step": 16806 + }, + { + "epoch": 0.4934817076751424, + "grad_norm": 0.0, + "learning_rate": 1.0696489603671723e-05, + "loss": 1.3623, + "step": 16807 + }, + { + "epoch": 0.4935110693522814, + "grad_norm": 0.0, + "learning_rate": 1.0695540949509176e-05, + "loss": 1.2549, + "step": 16808 + }, + { + "epoch": 0.4935404310294204, + "grad_norm": 0.0, + "learning_rate": 1.0694592289056674e-05, + "loss": 1.3535, + "step": 16809 + }, + { + "epoch": 0.4935697927065594, + "grad_norm": 0.0, + "learning_rate": 1.0693643622322794e-05, + "loss": 1.2939, + "step": 16810 + }, + { + "epoch": 0.4935991543836984, + "grad_norm": 0.0, + "learning_rate": 1.0692694949316118e-05, + "loss": 1.2534, + "step": 16811 + }, + { + "epoch": 0.4936285160608374, + "grad_norm": 0.0, + "learning_rate": 1.0691746270045224e-05, + "loss": 1.2861, + "step": 16812 + }, + { + "epoch": 0.49365787773797637, + "grad_norm": 0.0, + "learning_rate": 1.0690797584518693e-05, + "loss": 1.4072, + "step": 16813 + }, + { + "epoch": 0.4936872394151154, + "grad_norm": 0.0, + "learning_rate": 1.0689848892745103e-05, + "loss": 1.1475, + "step": 16814 + }, + { + "epoch": 0.4937166010922544, + "grad_norm": 0.0, + "learning_rate": 1.0688900194733029e-05, + "loss": 1.252, + "step": 16815 + }, + { + "epoch": 0.49374596276939337, + "grad_norm": 0.0, + "learning_rate": 1.0687951490491055e-05, + "loss": 1.3975, + "step": 16816 + }, + { + "epoch": 0.4937753244465324, + "grad_norm": 0.0, + "learning_rate": 1.0687002780027764e-05, + "loss": 1.3691, + "step": 16817 + }, + { + "epoch": 0.4938046861236714, + "grad_norm": 0.0, + "learning_rate": 1.068605406335173e-05, + "loss": 1.3525, + "step": 16818 + }, + { + "epoch": 0.49383404780081036, + "grad_norm": 0.0, + "learning_rate": 1.068510534047153e-05, + "loss": 1.3936, + "step": 16819 + }, + { + "epoch": 0.4938634094779494, + "grad_norm": 0.0, + "learning_rate": 1.0684156611395747e-05, + "loss": 1.2261, + "step": 16820 + }, + { + "epoch": 0.4938927711550884, + "grad_norm": 0.0, + "learning_rate": 1.0683207876132961e-05, + "loss": 1.4072, + "step": 16821 + }, + { + "epoch": 0.49392213283222736, + "grad_norm": 0.0, + "learning_rate": 1.0682259134691753e-05, + "loss": 1.1738, + "step": 16822 + }, + { + "epoch": 0.4939514945093664, + "grad_norm": 0.0, + "learning_rate": 1.0681310387080699e-05, + "loss": 1.3247, + "step": 16823 + }, + { + "epoch": 0.4939808561865054, + "grad_norm": 0.0, + "learning_rate": 1.068036163330838e-05, + "loss": 1.335, + "step": 16824 + }, + { + "epoch": 0.49401021786364435, + "grad_norm": 0.0, + "learning_rate": 1.0679412873383381e-05, + "loss": 1.2949, + "step": 16825 + }, + { + "epoch": 0.49403957954078337, + "grad_norm": 0.0, + "learning_rate": 1.0678464107314274e-05, + "loss": 1.4102, + "step": 16826 + }, + { + "epoch": 0.4940689412179224, + "grad_norm": 0.0, + "learning_rate": 1.0677515335109644e-05, + "loss": 1.2842, + "step": 16827 + }, + { + "epoch": 0.49409830289506135, + "grad_norm": 0.0, + "learning_rate": 1.0676566556778067e-05, + "loss": 1.4287, + "step": 16828 + }, + { + "epoch": 0.49412766457220036, + "grad_norm": 0.0, + "learning_rate": 1.0675617772328126e-05, + "loss": 1.2354, + "step": 16829 + }, + { + "epoch": 0.4941570262493394, + "grad_norm": 0.0, + "learning_rate": 1.0674668981768403e-05, + "loss": 1.4014, + "step": 16830 + }, + { + "epoch": 0.49418638792647834, + "grad_norm": 0.0, + "learning_rate": 1.0673720185107472e-05, + "loss": 1.2979, + "step": 16831 + }, + { + "epoch": 0.49421574960361736, + "grad_norm": 0.0, + "learning_rate": 1.0672771382353918e-05, + "loss": 1.3633, + "step": 16832 + }, + { + "epoch": 0.4942451112807564, + "grad_norm": 0.0, + "learning_rate": 1.0671822573516319e-05, + "loss": 1.3291, + "step": 16833 + }, + { + "epoch": 0.49427447295789534, + "grad_norm": 0.0, + "learning_rate": 1.0670873758603259e-05, + "loss": 1.2715, + "step": 16834 + }, + { + "epoch": 0.49430383463503436, + "grad_norm": 0.0, + "learning_rate": 1.0669924937623316e-05, + "loss": 1.3462, + "step": 16835 + }, + { + "epoch": 0.4943331963121734, + "grad_norm": 0.0, + "learning_rate": 1.0668976110585067e-05, + "loss": 1.2085, + "step": 16836 + }, + { + "epoch": 0.49436255798931233, + "grad_norm": 0.0, + "learning_rate": 1.06680272774971e-05, + "loss": 1.3867, + "step": 16837 + }, + { + "epoch": 0.49439191966645135, + "grad_norm": 0.0, + "learning_rate": 1.066707843836799e-05, + "loss": 1.2769, + "step": 16838 + }, + { + "epoch": 0.49442128134359037, + "grad_norm": 0.0, + "learning_rate": 1.0666129593206316e-05, + "loss": 1.2837, + "step": 16839 + }, + { + "epoch": 0.49445064302072933, + "grad_norm": 0.0, + "learning_rate": 1.0665180742020661e-05, + "loss": 1.3267, + "step": 16840 + }, + { + "epoch": 0.49448000469786835, + "grad_norm": 0.0, + "learning_rate": 1.0664231884819609e-05, + "loss": 1.2466, + "step": 16841 + }, + { + "epoch": 0.49450936637500736, + "grad_norm": 0.0, + "learning_rate": 1.066328302161174e-05, + "loss": 1.1865, + "step": 16842 + }, + { + "epoch": 0.4945387280521463, + "grad_norm": 0.0, + "learning_rate": 1.066233415240563e-05, + "loss": 1.3223, + "step": 16843 + }, + { + "epoch": 0.49456808972928534, + "grad_norm": 0.0, + "learning_rate": 1.0661385277209862e-05, + "loss": 1.3574, + "step": 16844 + }, + { + "epoch": 0.49459745140642436, + "grad_norm": 0.0, + "learning_rate": 1.0660436396033016e-05, + "loss": 1.3872, + "step": 16845 + }, + { + "epoch": 0.4946268130835633, + "grad_norm": 0.0, + "learning_rate": 1.065948750888368e-05, + "loss": 1.2881, + "step": 16846 + }, + { + "epoch": 0.49465617476070234, + "grad_norm": 0.0, + "learning_rate": 1.0658538615770427e-05, + "loss": 1.1699, + "step": 16847 + }, + { + "epoch": 0.49468553643784136, + "grad_norm": 0.0, + "learning_rate": 1.0657589716701837e-05, + "loss": 1.2373, + "step": 16848 + }, + { + "epoch": 0.4947148981149803, + "grad_norm": 0.0, + "learning_rate": 1.0656640811686498e-05, + "loss": 1.2607, + "step": 16849 + }, + { + "epoch": 0.49474425979211933, + "grad_norm": 0.0, + "learning_rate": 1.065569190073299e-05, + "loss": 1.2695, + "step": 16850 + }, + { + "epoch": 0.49477362146925835, + "grad_norm": 0.0, + "learning_rate": 1.0654742983849888e-05, + "loss": 1.3584, + "step": 16851 + }, + { + "epoch": 0.4948029831463973, + "grad_norm": 0.0, + "learning_rate": 1.065379406104578e-05, + "loss": 1.3584, + "step": 16852 + }, + { + "epoch": 0.49483234482353633, + "grad_norm": 0.0, + "learning_rate": 1.0652845132329244e-05, + "loss": 1.3184, + "step": 16853 + }, + { + "epoch": 0.4948617065006753, + "grad_norm": 0.0, + "learning_rate": 1.0651896197708863e-05, + "loss": 1.3428, + "step": 16854 + }, + { + "epoch": 0.4948910681778143, + "grad_norm": 0.0, + "learning_rate": 1.0650947257193216e-05, + "loss": 1.2324, + "step": 16855 + }, + { + "epoch": 0.4949204298549533, + "grad_norm": 0.0, + "learning_rate": 1.064999831079089e-05, + "loss": 1.3838, + "step": 16856 + }, + { + "epoch": 0.4949497915320923, + "grad_norm": 0.0, + "learning_rate": 1.0649049358510459e-05, + "loss": 1.2183, + "step": 16857 + }, + { + "epoch": 0.4949791532092313, + "grad_norm": 0.0, + "learning_rate": 1.0648100400360511e-05, + "loss": 1.4238, + "step": 16858 + }, + { + "epoch": 0.4950085148863703, + "grad_norm": 0.0, + "learning_rate": 1.0647151436349624e-05, + "loss": 1.2808, + "step": 16859 + }, + { + "epoch": 0.4950378765635093, + "grad_norm": 0.0, + "learning_rate": 1.0646202466486379e-05, + "loss": 1.3604, + "step": 16860 + }, + { + "epoch": 0.4950672382406483, + "grad_norm": 0.0, + "learning_rate": 1.0645253490779361e-05, + "loss": 1.1196, + "step": 16861 + }, + { + "epoch": 0.4950965999177873, + "grad_norm": 0.0, + "learning_rate": 1.0644304509237153e-05, + "loss": 1.4551, + "step": 16862 + }, + { + "epoch": 0.4951259615949263, + "grad_norm": 0.0, + "learning_rate": 1.0643355521868331e-05, + "loss": 1.3535, + "step": 16863 + }, + { + "epoch": 0.4951553232720653, + "grad_norm": 0.0, + "learning_rate": 1.0642406528681482e-05, + "loss": 1.3838, + "step": 16864 + }, + { + "epoch": 0.4951846849492043, + "grad_norm": 0.0, + "learning_rate": 1.0641457529685185e-05, + "loss": 1.3252, + "step": 16865 + }, + { + "epoch": 0.4952140466263433, + "grad_norm": 0.0, + "learning_rate": 1.0640508524888024e-05, + "loss": 1.2998, + "step": 16866 + }, + { + "epoch": 0.4952434083034823, + "grad_norm": 0.0, + "learning_rate": 1.063955951429858e-05, + "loss": 1.1797, + "step": 16867 + }, + { + "epoch": 0.4952727699806213, + "grad_norm": 0.0, + "learning_rate": 1.0638610497925437e-05, + "loss": 1.2627, + "step": 16868 + }, + { + "epoch": 0.49530213165776027, + "grad_norm": 0.0, + "learning_rate": 1.0637661475777173e-05, + "loss": 1.4893, + "step": 16869 + }, + { + "epoch": 0.4953314933348993, + "grad_norm": 0.0, + "learning_rate": 1.0636712447862375e-05, + "loss": 1.3008, + "step": 16870 + }, + { + "epoch": 0.4953608550120383, + "grad_norm": 0.0, + "learning_rate": 1.0635763414189624e-05, + "loss": 1.2998, + "step": 16871 + }, + { + "epoch": 0.49539021668917727, + "grad_norm": 0.0, + "learning_rate": 1.06348143747675e-05, + "loss": 1.1079, + "step": 16872 + }, + { + "epoch": 0.4954195783663163, + "grad_norm": 0.0, + "learning_rate": 1.0633865329604585e-05, + "loss": 1.376, + "step": 16873 + }, + { + "epoch": 0.4954489400434553, + "grad_norm": 0.0, + "learning_rate": 1.0632916278709469e-05, + "loss": 1.3682, + "step": 16874 + }, + { + "epoch": 0.49547830172059426, + "grad_norm": 0.0, + "learning_rate": 1.0631967222090725e-05, + "loss": 1.3428, + "step": 16875 + }, + { + "epoch": 0.4955076633977333, + "grad_norm": 0.0, + "learning_rate": 1.063101815975694e-05, + "loss": 1.3975, + "step": 16876 + }, + { + "epoch": 0.4955370250748723, + "grad_norm": 0.0, + "learning_rate": 1.0630069091716697e-05, + "loss": 1.332, + "step": 16877 + }, + { + "epoch": 0.49556638675201126, + "grad_norm": 0.0, + "learning_rate": 1.0629120017978575e-05, + "loss": 1.3096, + "step": 16878 + }, + { + "epoch": 0.4955957484291503, + "grad_norm": 0.0, + "learning_rate": 1.0628170938551165e-05, + "loss": 1.3223, + "step": 16879 + }, + { + "epoch": 0.4956251101062893, + "grad_norm": 0.0, + "learning_rate": 1.062722185344304e-05, + "loss": 1.2197, + "step": 16880 + }, + { + "epoch": 0.49565447178342825, + "grad_norm": 0.0, + "learning_rate": 1.0626272762662787e-05, + "loss": 1.3604, + "step": 16881 + }, + { + "epoch": 0.49568383346056727, + "grad_norm": 0.0, + "learning_rate": 1.062532366621899e-05, + "loss": 1.2539, + "step": 16882 + }, + { + "epoch": 0.4957131951377063, + "grad_norm": 0.0, + "learning_rate": 1.0624374564120233e-05, + "loss": 1.3418, + "step": 16883 + }, + { + "epoch": 0.49574255681484525, + "grad_norm": 0.0, + "learning_rate": 1.0623425456375093e-05, + "loss": 1.3867, + "step": 16884 + }, + { + "epoch": 0.49577191849198426, + "grad_norm": 0.0, + "learning_rate": 1.0622476342992161e-05, + "loss": 1.3008, + "step": 16885 + }, + { + "epoch": 0.4958012801691233, + "grad_norm": 0.0, + "learning_rate": 1.0621527223980015e-05, + "loss": 1.4209, + "step": 16886 + }, + { + "epoch": 0.49583064184626224, + "grad_norm": 0.0, + "learning_rate": 1.062057809934724e-05, + "loss": 1.3564, + "step": 16887 + }, + { + "epoch": 0.49586000352340126, + "grad_norm": 0.0, + "learning_rate": 1.0619628969102415e-05, + "loss": 1.2388, + "step": 16888 + }, + { + "epoch": 0.4958893652005403, + "grad_norm": 0.0, + "learning_rate": 1.0618679833254128e-05, + "loss": 1.2451, + "step": 16889 + }, + { + "epoch": 0.49591872687767924, + "grad_norm": 0.0, + "learning_rate": 1.0617730691810962e-05, + "loss": 1.1528, + "step": 16890 + }, + { + "epoch": 0.49594808855481826, + "grad_norm": 0.0, + "learning_rate": 1.0616781544781498e-05, + "loss": 1.2871, + "step": 16891 + }, + { + "epoch": 0.4959774502319573, + "grad_norm": 0.0, + "learning_rate": 1.0615832392174322e-05, + "loss": 1.3271, + "step": 16892 + }, + { + "epoch": 0.49600681190909623, + "grad_norm": 0.0, + "learning_rate": 1.0614883233998013e-05, + "loss": 1.3223, + "step": 16893 + }, + { + "epoch": 0.49603617358623525, + "grad_norm": 0.0, + "learning_rate": 1.0613934070261157e-05, + "loss": 1.3193, + "step": 16894 + }, + { + "epoch": 0.49606553526337427, + "grad_norm": 0.0, + "learning_rate": 1.0612984900972344e-05, + "loss": 1.2676, + "step": 16895 + }, + { + "epoch": 0.49609489694051323, + "grad_norm": 0.0, + "learning_rate": 1.0612035726140148e-05, + "loss": 1.3438, + "step": 16896 + }, + { + "epoch": 0.49612425861765225, + "grad_norm": 0.0, + "learning_rate": 1.0611086545773154e-05, + "loss": 1.2227, + "step": 16897 + }, + { + "epoch": 0.49615362029479126, + "grad_norm": 0.0, + "learning_rate": 1.061013735987995e-05, + "loss": 1.3203, + "step": 16898 + }, + { + "epoch": 0.4961829819719302, + "grad_norm": 0.0, + "learning_rate": 1.0609188168469118e-05, + "loss": 1.3662, + "step": 16899 + }, + { + "epoch": 0.49621234364906924, + "grad_norm": 0.0, + "learning_rate": 1.060823897154924e-05, + "loss": 1.3848, + "step": 16900 + }, + { + "epoch": 0.49624170532620826, + "grad_norm": 0.0, + "learning_rate": 1.0607289769128902e-05, + "loss": 1.2646, + "step": 16901 + }, + { + "epoch": 0.4962710670033472, + "grad_norm": 0.0, + "learning_rate": 1.0606340561216685e-05, + "loss": 1.2471, + "step": 16902 + }, + { + "epoch": 0.49630042868048624, + "grad_norm": 0.0, + "learning_rate": 1.0605391347821178e-05, + "loss": 1.2539, + "step": 16903 + }, + { + "epoch": 0.4963297903576252, + "grad_norm": 0.0, + "learning_rate": 1.0604442128950962e-05, + "loss": 1.4209, + "step": 16904 + }, + { + "epoch": 0.4963591520347642, + "grad_norm": 0.0, + "learning_rate": 1.0603492904614619e-05, + "loss": 1.3584, + "step": 16905 + }, + { + "epoch": 0.49638851371190323, + "grad_norm": 0.0, + "learning_rate": 1.0602543674820737e-05, + "loss": 1.2402, + "step": 16906 + }, + { + "epoch": 0.4964178753890422, + "grad_norm": 0.0, + "learning_rate": 1.0601594439577898e-05, + "loss": 1.1592, + "step": 16907 + }, + { + "epoch": 0.4964472370661812, + "grad_norm": 0.0, + "learning_rate": 1.0600645198894686e-05, + "loss": 1.2109, + "step": 16908 + }, + { + "epoch": 0.49647659874332023, + "grad_norm": 0.0, + "learning_rate": 1.0599695952779685e-05, + "loss": 1.4023, + "step": 16909 + }, + { + "epoch": 0.4965059604204592, + "grad_norm": 0.0, + "learning_rate": 1.059874670124148e-05, + "loss": 1.2822, + "step": 16910 + }, + { + "epoch": 0.4965353220975982, + "grad_norm": 0.0, + "learning_rate": 1.0597797444288656e-05, + "loss": 1.3828, + "step": 16911 + }, + { + "epoch": 0.4965646837747372, + "grad_norm": 0.0, + "learning_rate": 1.0596848181929797e-05, + "loss": 1.2725, + "step": 16912 + }, + { + "epoch": 0.4965940454518762, + "grad_norm": 0.0, + "learning_rate": 1.0595898914173486e-05, + "loss": 1.2803, + "step": 16913 + }, + { + "epoch": 0.4966234071290152, + "grad_norm": 0.0, + "learning_rate": 1.0594949641028307e-05, + "loss": 1.3281, + "step": 16914 + }, + { + "epoch": 0.4966527688061542, + "grad_norm": 0.0, + "learning_rate": 1.059400036250285e-05, + "loss": 1.2578, + "step": 16915 + }, + { + "epoch": 0.4966821304832932, + "grad_norm": 0.0, + "learning_rate": 1.0593051078605694e-05, + "loss": 1.2378, + "step": 16916 + }, + { + "epoch": 0.4967114921604322, + "grad_norm": 0.0, + "learning_rate": 1.0592101789345424e-05, + "loss": 1.2529, + "step": 16917 + }, + { + "epoch": 0.4967408538375712, + "grad_norm": 0.0, + "learning_rate": 1.0591152494730626e-05, + "loss": 1.2832, + "step": 16918 + }, + { + "epoch": 0.4967702155147102, + "grad_norm": 0.0, + "learning_rate": 1.0590203194769884e-05, + "loss": 1.2461, + "step": 16919 + }, + { + "epoch": 0.4967995771918492, + "grad_norm": 0.0, + "learning_rate": 1.0589253889471787e-05, + "loss": 1.3535, + "step": 16920 + }, + { + "epoch": 0.4968289388689882, + "grad_norm": 0.0, + "learning_rate": 1.0588304578844912e-05, + "loss": 1.3047, + "step": 16921 + }, + { + "epoch": 0.4968583005461272, + "grad_norm": 0.0, + "learning_rate": 1.058735526289785e-05, + "loss": 1.2974, + "step": 16922 + }, + { + "epoch": 0.4968876622232662, + "grad_norm": 0.0, + "learning_rate": 1.0586405941639181e-05, + "loss": 1.1421, + "step": 16923 + }, + { + "epoch": 0.4969170239004052, + "grad_norm": 0.0, + "learning_rate": 1.0585456615077496e-05, + "loss": 1.3193, + "step": 16924 + }, + { + "epoch": 0.49694638557754417, + "grad_norm": 0.0, + "learning_rate": 1.058450728322138e-05, + "loss": 1.376, + "step": 16925 + }, + { + "epoch": 0.4969757472546832, + "grad_norm": 0.0, + "learning_rate": 1.058355794607941e-05, + "loss": 1.333, + "step": 16926 + }, + { + "epoch": 0.4970051089318222, + "grad_norm": 0.0, + "learning_rate": 1.0582608603660176e-05, + "loss": 1.2993, + "step": 16927 + }, + { + "epoch": 0.49703447060896117, + "grad_norm": 0.0, + "learning_rate": 1.0581659255972266e-05, + "loss": 1.2432, + "step": 16928 + }, + { + "epoch": 0.4970638322861002, + "grad_norm": 0.0, + "learning_rate": 1.058070990302426e-05, + "loss": 1.3174, + "step": 16929 + }, + { + "epoch": 0.4970931939632392, + "grad_norm": 0.0, + "learning_rate": 1.0579760544824747e-05, + "loss": 1.3525, + "step": 16930 + }, + { + "epoch": 0.49712255564037816, + "grad_norm": 0.0, + "learning_rate": 1.0578811181382308e-05, + "loss": 1.3472, + "step": 16931 + }, + { + "epoch": 0.4971519173175172, + "grad_norm": 0.0, + "learning_rate": 1.0577861812705537e-05, + "loss": 1.3018, + "step": 16932 + }, + { + "epoch": 0.4971812789946562, + "grad_norm": 0.0, + "learning_rate": 1.057691243880301e-05, + "loss": 1.1777, + "step": 16933 + }, + { + "epoch": 0.49721064067179516, + "grad_norm": 0.0, + "learning_rate": 1.0575963059683314e-05, + "loss": 1.2246, + "step": 16934 + }, + { + "epoch": 0.4972400023489342, + "grad_norm": 0.0, + "learning_rate": 1.057501367535504e-05, + "loss": 1.2959, + "step": 16935 + }, + { + "epoch": 0.4972693640260732, + "grad_norm": 0.0, + "learning_rate": 1.0574064285826768e-05, + "loss": 1.3564, + "step": 16936 + }, + { + "epoch": 0.49729872570321215, + "grad_norm": 0.0, + "learning_rate": 1.0573114891107087e-05, + "loss": 1.3311, + "step": 16937 + }, + { + "epoch": 0.49732808738035117, + "grad_norm": 0.0, + "learning_rate": 1.0572165491204582e-05, + "loss": 1.2998, + "step": 16938 + }, + { + "epoch": 0.4973574490574902, + "grad_norm": 0.0, + "learning_rate": 1.0571216086127835e-05, + "loss": 1.3408, + "step": 16939 + }, + { + "epoch": 0.49738681073462915, + "grad_norm": 0.0, + "learning_rate": 1.0570266675885439e-05, + "loss": 1.3838, + "step": 16940 + }, + { + "epoch": 0.49741617241176816, + "grad_norm": 0.0, + "learning_rate": 1.0569317260485973e-05, + "loss": 1.1265, + "step": 16941 + }, + { + "epoch": 0.4974455340889072, + "grad_norm": 0.0, + "learning_rate": 1.0568367839938023e-05, + "loss": 1.3818, + "step": 16942 + }, + { + "epoch": 0.49747489576604614, + "grad_norm": 0.0, + "learning_rate": 1.056741841425018e-05, + "loss": 1.2588, + "step": 16943 + }, + { + "epoch": 0.49750425744318516, + "grad_norm": 0.0, + "learning_rate": 1.0566468983431026e-05, + "loss": 1.4541, + "step": 16944 + }, + { + "epoch": 0.4975336191203242, + "grad_norm": 0.0, + "learning_rate": 1.0565519547489149e-05, + "loss": 1.335, + "step": 16945 + }, + { + "epoch": 0.49756298079746314, + "grad_norm": 0.0, + "learning_rate": 1.0564570106433134e-05, + "loss": 1.3115, + "step": 16946 + }, + { + "epoch": 0.49759234247460216, + "grad_norm": 0.0, + "learning_rate": 1.0563620660271565e-05, + "loss": 1.1362, + "step": 16947 + }, + { + "epoch": 0.4976217041517412, + "grad_norm": 0.0, + "learning_rate": 1.0562671209013029e-05, + "loss": 1.3135, + "step": 16948 + }, + { + "epoch": 0.49765106582888013, + "grad_norm": 0.0, + "learning_rate": 1.0561721752666116e-05, + "loss": 1.3389, + "step": 16949 + }, + { + "epoch": 0.49768042750601915, + "grad_norm": 0.0, + "learning_rate": 1.056077229123941e-05, + "loss": 1.3115, + "step": 16950 + }, + { + "epoch": 0.49770978918315817, + "grad_norm": 0.0, + "learning_rate": 1.0559822824741492e-05, + "loss": 1.3652, + "step": 16951 + }, + { + "epoch": 0.49773915086029713, + "grad_norm": 0.0, + "learning_rate": 1.0558873353180958e-05, + "loss": 1.3281, + "step": 16952 + }, + { + "epoch": 0.49776851253743615, + "grad_norm": 0.0, + "learning_rate": 1.0557923876566385e-05, + "loss": 1.4189, + "step": 16953 + }, + { + "epoch": 0.4977978742145751, + "grad_norm": 0.0, + "learning_rate": 1.0556974394906365e-05, + "loss": 1.2891, + "step": 16954 + }, + { + "epoch": 0.4978272358917141, + "grad_norm": 0.0, + "learning_rate": 1.0556024908209486e-05, + "loss": 1.2783, + "step": 16955 + }, + { + "epoch": 0.49785659756885314, + "grad_norm": 0.0, + "learning_rate": 1.0555075416484328e-05, + "loss": 1.2227, + "step": 16956 + }, + { + "epoch": 0.4978859592459921, + "grad_norm": 0.0, + "learning_rate": 1.0554125919739482e-05, + "loss": 1.2422, + "step": 16957 + }, + { + "epoch": 0.4979153209231311, + "grad_norm": 0.0, + "learning_rate": 1.0553176417983533e-05, + "loss": 1.3555, + "step": 16958 + }, + { + "epoch": 0.49794468260027014, + "grad_norm": 0.0, + "learning_rate": 1.0552226911225068e-05, + "loss": 1.3262, + "step": 16959 + }, + { + "epoch": 0.4979740442774091, + "grad_norm": 0.0, + "learning_rate": 1.055127739947267e-05, + "loss": 1.25, + "step": 16960 + }, + { + "epoch": 0.4980034059545481, + "grad_norm": 0.0, + "learning_rate": 1.0550327882734937e-05, + "loss": 1.1621, + "step": 16961 + }, + { + "epoch": 0.49803276763168713, + "grad_norm": 0.0, + "learning_rate": 1.0549378361020442e-05, + "loss": 1.3428, + "step": 16962 + }, + { + "epoch": 0.4980621293088261, + "grad_norm": 0.0, + "learning_rate": 1.0548428834337779e-05, + "loss": 1.3799, + "step": 16963 + }, + { + "epoch": 0.4980914909859651, + "grad_norm": 0.0, + "learning_rate": 1.0547479302695536e-05, + "loss": 1.2544, + "step": 16964 + }, + { + "epoch": 0.49812085266310413, + "grad_norm": 0.0, + "learning_rate": 1.0546529766102296e-05, + "loss": 1.2173, + "step": 16965 + }, + { + "epoch": 0.4981502143402431, + "grad_norm": 0.0, + "learning_rate": 1.0545580224566647e-05, + "loss": 1.2578, + "step": 16966 + }, + { + "epoch": 0.4981795760173821, + "grad_norm": 0.0, + "learning_rate": 1.0544630678097174e-05, + "loss": 1.3809, + "step": 16967 + }, + { + "epoch": 0.4982089376945211, + "grad_norm": 0.0, + "learning_rate": 1.054368112670247e-05, + "loss": 1.1929, + "step": 16968 + }, + { + "epoch": 0.4982382993716601, + "grad_norm": 0.0, + "learning_rate": 1.0542731570391119e-05, + "loss": 1.2168, + "step": 16969 + }, + { + "epoch": 0.4982676610487991, + "grad_norm": 0.0, + "learning_rate": 1.0541782009171702e-05, + "loss": 1.2861, + "step": 16970 + }, + { + "epoch": 0.4982970227259381, + "grad_norm": 0.0, + "learning_rate": 1.0540832443052816e-05, + "loss": 1.3164, + "step": 16971 + }, + { + "epoch": 0.4983263844030771, + "grad_norm": 0.0, + "learning_rate": 1.0539882872043041e-05, + "loss": 1.3428, + "step": 16972 + }, + { + "epoch": 0.4983557460802161, + "grad_norm": 0.0, + "learning_rate": 1.053893329615097e-05, + "loss": 1.3291, + "step": 16973 + }, + { + "epoch": 0.4983851077573551, + "grad_norm": 0.0, + "learning_rate": 1.0537983715385187e-05, + "loss": 1.2393, + "step": 16974 + }, + { + "epoch": 0.4984144694344941, + "grad_norm": 0.0, + "learning_rate": 1.0537034129754277e-05, + "loss": 1.2676, + "step": 16975 + }, + { + "epoch": 0.4984438311116331, + "grad_norm": 0.0, + "learning_rate": 1.0536084539266831e-05, + "loss": 1.2979, + "step": 16976 + }, + { + "epoch": 0.4984731927887721, + "grad_norm": 0.0, + "learning_rate": 1.0535134943931436e-05, + "loss": 1.3955, + "step": 16977 + }, + { + "epoch": 0.4985025544659111, + "grad_norm": 0.0, + "learning_rate": 1.0534185343756677e-05, + "loss": 1.3906, + "step": 16978 + }, + { + "epoch": 0.4985319161430501, + "grad_norm": 0.0, + "learning_rate": 1.0533235738751144e-05, + "loss": 1.3037, + "step": 16979 + }, + { + "epoch": 0.4985612778201891, + "grad_norm": 0.0, + "learning_rate": 1.0532286128923421e-05, + "loss": 1.3774, + "step": 16980 + }, + { + "epoch": 0.49859063949732807, + "grad_norm": 0.0, + "learning_rate": 1.0531336514282104e-05, + "loss": 1.4619, + "step": 16981 + }, + { + "epoch": 0.4986200011744671, + "grad_norm": 0.0, + "learning_rate": 1.0530386894835772e-05, + "loss": 1.3262, + "step": 16982 + }, + { + "epoch": 0.4986493628516061, + "grad_norm": 0.0, + "learning_rate": 1.0529437270593012e-05, + "loss": 1.2368, + "step": 16983 + }, + { + "epoch": 0.49867872452874507, + "grad_norm": 0.0, + "learning_rate": 1.0528487641562421e-05, + "loss": 1.3682, + "step": 16984 + }, + { + "epoch": 0.4987080862058841, + "grad_norm": 0.0, + "learning_rate": 1.0527538007752579e-05, + "loss": 1.3066, + "step": 16985 + }, + { + "epoch": 0.4987374478830231, + "grad_norm": 0.0, + "learning_rate": 1.0526588369172075e-05, + "loss": 1.3594, + "step": 16986 + }, + { + "epoch": 0.49876680956016206, + "grad_norm": 0.0, + "learning_rate": 1.0525638725829497e-05, + "loss": 1.2891, + "step": 16987 + }, + { + "epoch": 0.4987961712373011, + "grad_norm": 0.0, + "learning_rate": 1.0524689077733433e-05, + "loss": 1.3418, + "step": 16988 + }, + { + "epoch": 0.4988255329144401, + "grad_norm": 0.0, + "learning_rate": 1.0523739424892472e-05, + "loss": 1.3945, + "step": 16989 + }, + { + "epoch": 0.49885489459157906, + "grad_norm": 0.0, + "learning_rate": 1.0522789767315202e-05, + "loss": 1.1475, + "step": 16990 + }, + { + "epoch": 0.4988842562687181, + "grad_norm": 0.0, + "learning_rate": 1.0521840105010207e-05, + "loss": 1.3027, + "step": 16991 + }, + { + "epoch": 0.4989136179458571, + "grad_norm": 0.0, + "learning_rate": 1.0520890437986082e-05, + "loss": 1.3311, + "step": 16992 + }, + { + "epoch": 0.49894297962299605, + "grad_norm": 0.0, + "learning_rate": 1.051994076625141e-05, + "loss": 1.2666, + "step": 16993 + }, + { + "epoch": 0.49897234130013507, + "grad_norm": 0.0, + "learning_rate": 1.0518991089814784e-05, + "loss": 1.2285, + "step": 16994 + }, + { + "epoch": 0.4990017029772741, + "grad_norm": 0.0, + "learning_rate": 1.0518041408684785e-05, + "loss": 1.4141, + "step": 16995 + }, + { + "epoch": 0.49903106465441305, + "grad_norm": 0.0, + "learning_rate": 1.0517091722870003e-05, + "loss": 1.2988, + "step": 16996 + }, + { + "epoch": 0.49906042633155207, + "grad_norm": 0.0, + "learning_rate": 1.0516142032379031e-05, + "loss": 1.2617, + "step": 16997 + }, + { + "epoch": 0.4990897880086911, + "grad_norm": 0.0, + "learning_rate": 1.0515192337220454e-05, + "loss": 1.252, + "step": 16998 + }, + { + "epoch": 0.49911914968583004, + "grad_norm": 0.0, + "learning_rate": 1.051424263740286e-05, + "loss": 1.1973, + "step": 16999 + }, + { + "epoch": 0.49914851136296906, + "grad_norm": 0.0, + "learning_rate": 1.0513292932934837e-05, + "loss": 1.3438, + "step": 17000 + }, + { + "epoch": 0.4991778730401081, + "grad_norm": 0.0, + "learning_rate": 1.0512343223824977e-05, + "loss": 1.2959, + "step": 17001 + }, + { + "epoch": 0.49920723471724704, + "grad_norm": 0.0, + "learning_rate": 1.0511393510081867e-05, + "loss": 1.1274, + "step": 17002 + }, + { + "epoch": 0.49923659639438606, + "grad_norm": 0.0, + "learning_rate": 1.051044379171409e-05, + "loss": 1.2676, + "step": 17003 + }, + { + "epoch": 0.499265958071525, + "grad_norm": 0.0, + "learning_rate": 1.0509494068730244e-05, + "loss": 1.2666, + "step": 17004 + }, + { + "epoch": 0.49929531974866403, + "grad_norm": 0.0, + "learning_rate": 1.0508544341138911e-05, + "loss": 1.3574, + "step": 17005 + }, + { + "epoch": 0.49932468142580305, + "grad_norm": 0.0, + "learning_rate": 1.0507594608948682e-05, + "loss": 1.4102, + "step": 17006 + }, + { + "epoch": 0.499354043102942, + "grad_norm": 0.0, + "learning_rate": 1.0506644872168144e-05, + "loss": 1.1968, + "step": 17007 + }, + { + "epoch": 0.49938340478008103, + "grad_norm": 0.0, + "learning_rate": 1.0505695130805885e-05, + "loss": 1.3193, + "step": 17008 + }, + { + "epoch": 0.49941276645722005, + "grad_norm": 0.0, + "learning_rate": 1.0504745384870496e-05, + "loss": 1.3018, + "step": 17009 + }, + { + "epoch": 0.499442128134359, + "grad_norm": 0.0, + "learning_rate": 1.0503795634370568e-05, + "loss": 1.4102, + "step": 17010 + }, + { + "epoch": 0.499471489811498, + "grad_norm": 0.0, + "learning_rate": 1.0502845879314683e-05, + "loss": 1.4043, + "step": 17011 + }, + { + "epoch": 0.49950085148863704, + "grad_norm": 0.0, + "learning_rate": 1.0501896119711437e-05, + "loss": 1.0913, + "step": 17012 + }, + { + "epoch": 0.499530213165776, + "grad_norm": 0.0, + "learning_rate": 1.0500946355569412e-05, + "loss": 1.3672, + "step": 17013 + }, + { + "epoch": 0.499559574842915, + "grad_norm": 0.0, + "learning_rate": 1.0499996586897208e-05, + "loss": 1.375, + "step": 17014 + }, + { + "epoch": 0.49958893652005404, + "grad_norm": 0.0, + "learning_rate": 1.0499046813703402e-05, + "loss": 1.271, + "step": 17015 + }, + { + "epoch": 0.499618298197193, + "grad_norm": 0.0, + "learning_rate": 1.0498097035996588e-05, + "loss": 1.3301, + "step": 17016 + }, + { + "epoch": 0.499647659874332, + "grad_norm": 0.0, + "learning_rate": 1.0497147253785354e-05, + "loss": 1.2959, + "step": 17017 + }, + { + "epoch": 0.49967702155147103, + "grad_norm": 0.0, + "learning_rate": 1.0496197467078294e-05, + "loss": 1.3213, + "step": 17018 + }, + { + "epoch": 0.49970638322861, + "grad_norm": 0.0, + "learning_rate": 1.0495247675883988e-05, + "loss": 1.2227, + "step": 17019 + }, + { + "epoch": 0.499735744905749, + "grad_norm": 0.0, + "learning_rate": 1.0494297880211034e-05, + "loss": 1.3623, + "step": 17020 + }, + { + "epoch": 0.49976510658288803, + "grad_norm": 0.0, + "learning_rate": 1.0493348080068015e-05, + "loss": 1.3408, + "step": 17021 + }, + { + "epoch": 0.499794468260027, + "grad_norm": 0.0, + "learning_rate": 1.0492398275463526e-05, + "loss": 1.3115, + "step": 17022 + }, + { + "epoch": 0.499823829937166, + "grad_norm": 0.0, + "learning_rate": 1.0491448466406152e-05, + "loss": 1.2422, + "step": 17023 + }, + { + "epoch": 0.499853191614305, + "grad_norm": 0.0, + "learning_rate": 1.0490498652904483e-05, + "loss": 1.4033, + "step": 17024 + }, + { + "epoch": 0.499882553291444, + "grad_norm": 0.0, + "learning_rate": 1.0489548834967112e-05, + "loss": 1.3252, + "step": 17025 + }, + { + "epoch": 0.499911914968583, + "grad_norm": 0.0, + "learning_rate": 1.048859901260262e-05, + "loss": 1.3193, + "step": 17026 + }, + { + "epoch": 0.499941276645722, + "grad_norm": 0.0, + "learning_rate": 1.0487649185819608e-05, + "loss": 1.2256, + "step": 17027 + }, + { + "epoch": 0.499970638322861, + "grad_norm": 0.0, + "learning_rate": 1.0486699354626655e-05, + "loss": 1.3735, + "step": 17028 + }, + { + "epoch": 0.5, + "grad_norm": 0.0, + "learning_rate": 1.0485749519032358e-05, + "loss": 1.3223, + "step": 17029 + }, + { + "epoch": 0.500029361677139, + "grad_norm": 0.0, + "learning_rate": 1.0484799679045303e-05, + "loss": 1.231, + "step": 17030 + }, + { + "epoch": 0.500058723354278, + "grad_norm": 0.0, + "learning_rate": 1.0483849834674078e-05, + "loss": 1.2793, + "step": 17031 + }, + { + "epoch": 0.500088085031417, + "grad_norm": 0.0, + "learning_rate": 1.0482899985927277e-05, + "loss": 1.2192, + "step": 17032 + }, + { + "epoch": 0.500117446708556, + "grad_norm": 0.0, + "learning_rate": 1.0481950132813488e-05, + "loss": 1.2446, + "step": 17033 + }, + { + "epoch": 0.500146808385695, + "grad_norm": 0.0, + "learning_rate": 1.04810002753413e-05, + "loss": 1.2627, + "step": 17034 + }, + { + "epoch": 0.500176170062834, + "grad_norm": 0.0, + "learning_rate": 1.0480050413519304e-05, + "loss": 1.3086, + "step": 17035 + }, + { + "epoch": 0.500205531739973, + "grad_norm": 0.0, + "learning_rate": 1.047910054735609e-05, + "loss": 1.2549, + "step": 17036 + }, + { + "epoch": 0.500234893417112, + "grad_norm": 0.0, + "learning_rate": 1.0478150676860244e-05, + "loss": 1.2192, + "step": 17037 + }, + { + "epoch": 0.500264255094251, + "grad_norm": 0.0, + "learning_rate": 1.047720080204036e-05, + "loss": 1.3311, + "step": 17038 + }, + { + "epoch": 0.50029361677139, + "grad_norm": 0.0, + "learning_rate": 1.0476250922905029e-05, + "loss": 1.4219, + "step": 17039 + }, + { + "epoch": 0.500322978448529, + "grad_norm": 0.0, + "learning_rate": 1.0475301039462837e-05, + "loss": 1.375, + "step": 17040 + }, + { + "epoch": 0.500352340125668, + "grad_norm": 0.0, + "learning_rate": 1.0474351151722375e-05, + "loss": 1.3281, + "step": 17041 + }, + { + "epoch": 0.5003817018028069, + "grad_norm": 0.0, + "learning_rate": 1.0473401259692238e-05, + "loss": 1.3223, + "step": 17042 + }, + { + "epoch": 0.500411063479946, + "grad_norm": 0.0, + "learning_rate": 1.0472451363381011e-05, + "loss": 1.2715, + "step": 17043 + }, + { + "epoch": 0.500440425157085, + "grad_norm": 0.0, + "learning_rate": 1.0471501462797282e-05, + "loss": 1.3037, + "step": 17044 + }, + { + "epoch": 0.5004697868342239, + "grad_norm": 0.0, + "learning_rate": 1.0470551557949646e-05, + "loss": 1.2773, + "step": 17045 + }, + { + "epoch": 0.500499148511363, + "grad_norm": 0.0, + "learning_rate": 1.0469601648846692e-05, + "loss": 1.4248, + "step": 17046 + }, + { + "epoch": 0.500528510188502, + "grad_norm": 0.0, + "learning_rate": 1.0468651735497014e-05, + "loss": 1.1787, + "step": 17047 + }, + { + "epoch": 0.5005578718656409, + "grad_norm": 0.0, + "learning_rate": 1.0467701817909192e-05, + "loss": 1.4131, + "step": 17048 + }, + { + "epoch": 0.50058723354278, + "grad_norm": 0.0, + "learning_rate": 1.0466751896091825e-05, + "loss": 1.3086, + "step": 17049 + }, + { + "epoch": 0.500616595219919, + "grad_norm": 0.0, + "learning_rate": 1.04658019700535e-05, + "loss": 1.418, + "step": 17050 + }, + { + "epoch": 0.5006459568970579, + "grad_norm": 0.0, + "learning_rate": 1.0464852039802812e-05, + "loss": 1.377, + "step": 17051 + }, + { + "epoch": 0.500675318574197, + "grad_norm": 0.0, + "learning_rate": 1.0463902105348344e-05, + "loss": 1.3232, + "step": 17052 + }, + { + "epoch": 0.500704680251336, + "grad_norm": 0.0, + "learning_rate": 1.0462952166698695e-05, + "loss": 1.3926, + "step": 17053 + }, + { + "epoch": 0.5007340419284749, + "grad_norm": 0.0, + "learning_rate": 1.0462002223862446e-05, + "loss": 1.3613, + "step": 17054 + }, + { + "epoch": 0.500763403605614, + "grad_norm": 0.0, + "learning_rate": 1.0461052276848198e-05, + "loss": 1.293, + "step": 17055 + }, + { + "epoch": 0.500792765282753, + "grad_norm": 0.0, + "learning_rate": 1.0460102325664531e-05, + "loss": 1.4512, + "step": 17056 + }, + { + "epoch": 0.5008221269598919, + "grad_norm": 0.0, + "learning_rate": 1.0459152370320043e-05, + "loss": 1.2188, + "step": 17057 + }, + { + "epoch": 0.500851488637031, + "grad_norm": 0.0, + "learning_rate": 1.0458202410823322e-05, + "loss": 1.3965, + "step": 17058 + }, + { + "epoch": 0.50088085031417, + "grad_norm": 0.0, + "learning_rate": 1.0457252447182961e-05, + "loss": 1.375, + "step": 17059 + }, + { + "epoch": 0.5009102119913089, + "grad_norm": 0.0, + "learning_rate": 1.0456302479407547e-05, + "loss": 1.3203, + "step": 17060 + }, + { + "epoch": 0.500939573668448, + "grad_norm": 0.0, + "learning_rate": 1.0455352507505672e-05, + "loss": 1.2905, + "step": 17061 + }, + { + "epoch": 0.500968935345587, + "grad_norm": 0.0, + "learning_rate": 1.045440253148593e-05, + "loss": 1.3389, + "step": 17062 + }, + { + "epoch": 0.5009982970227259, + "grad_norm": 0.0, + "learning_rate": 1.0453452551356908e-05, + "loss": 1.2842, + "step": 17063 + }, + { + "epoch": 0.501027658699865, + "grad_norm": 0.0, + "learning_rate": 1.0452502567127202e-05, + "loss": 1.3682, + "step": 17064 + }, + { + "epoch": 0.501057020377004, + "grad_norm": 0.0, + "learning_rate": 1.0451552578805396e-05, + "loss": 1.3389, + "step": 17065 + }, + { + "epoch": 0.5010863820541429, + "grad_norm": 0.0, + "learning_rate": 1.0450602586400084e-05, + "loss": 1.2695, + "step": 17066 + }, + { + "epoch": 0.501115743731282, + "grad_norm": 0.0, + "learning_rate": 1.0449652589919862e-05, + "loss": 1.3643, + "step": 17067 + }, + { + "epoch": 0.5011451054084209, + "grad_norm": 0.0, + "learning_rate": 1.044870258937331e-05, + "loss": 1.2056, + "step": 17068 + }, + { + "epoch": 0.5011744670855599, + "grad_norm": 0.0, + "learning_rate": 1.0447752584769026e-05, + "loss": 1.251, + "step": 17069 + }, + { + "epoch": 0.501203828762699, + "grad_norm": 0.0, + "learning_rate": 1.0446802576115605e-05, + "loss": 1.2881, + "step": 17070 + }, + { + "epoch": 0.5012331904398379, + "grad_norm": 0.0, + "learning_rate": 1.0445852563421632e-05, + "loss": 1.3359, + "step": 17071 + }, + { + "epoch": 0.5012625521169769, + "grad_norm": 0.0, + "learning_rate": 1.0444902546695699e-05, + "loss": 1.3555, + "step": 17072 + }, + { + "epoch": 0.501291913794116, + "grad_norm": 0.0, + "learning_rate": 1.0443952525946402e-05, + "loss": 1.2275, + "step": 17073 + }, + { + "epoch": 0.5013212754712549, + "grad_norm": 0.0, + "learning_rate": 1.0443002501182324e-05, + "loss": 1.2471, + "step": 17074 + }, + { + "epoch": 0.5013506371483939, + "grad_norm": 0.0, + "learning_rate": 1.0442052472412064e-05, + "loss": 1.3398, + "step": 17075 + }, + { + "epoch": 0.501379998825533, + "grad_norm": 0.0, + "learning_rate": 1.0441102439644209e-05, + "loss": 1.2671, + "step": 17076 + }, + { + "epoch": 0.5014093605026719, + "grad_norm": 0.0, + "learning_rate": 1.0440152402887351e-05, + "loss": 1.2305, + "step": 17077 + }, + { + "epoch": 0.5014387221798109, + "grad_norm": 0.0, + "learning_rate": 1.0439202362150082e-05, + "loss": 1.374, + "step": 17078 + }, + { + "epoch": 0.50146808385695, + "grad_norm": 0.0, + "learning_rate": 1.0438252317440998e-05, + "loss": 1.293, + "step": 17079 + }, + { + "epoch": 0.5014974455340889, + "grad_norm": 0.0, + "learning_rate": 1.0437302268768682e-05, + "loss": 1.2852, + "step": 17080 + }, + { + "epoch": 0.5015268072112279, + "grad_norm": 0.0, + "learning_rate": 1.0436352216141728e-05, + "loss": 1.3379, + "step": 17081 + }, + { + "epoch": 0.5015561688883668, + "grad_norm": 0.0, + "learning_rate": 1.0435402159568731e-05, + "loss": 1.209, + "step": 17082 + }, + { + "epoch": 0.5015855305655059, + "grad_norm": 0.0, + "learning_rate": 1.043445209905828e-05, + "loss": 1.2979, + "step": 17083 + }, + { + "epoch": 0.5016148922426449, + "grad_norm": 0.0, + "learning_rate": 1.0433502034618972e-05, + "loss": 1.3447, + "step": 17084 + }, + { + "epoch": 0.5016442539197838, + "grad_norm": 0.0, + "learning_rate": 1.043255196625939e-05, + "loss": 1.2119, + "step": 17085 + }, + { + "epoch": 0.5016736155969229, + "grad_norm": 0.0, + "learning_rate": 1.043160189398813e-05, + "loss": 1.3203, + "step": 17086 + }, + { + "epoch": 0.5017029772740619, + "grad_norm": 0.0, + "learning_rate": 1.0430651817813781e-05, + "loss": 1.3359, + "step": 17087 + }, + { + "epoch": 0.5017323389512008, + "grad_norm": 0.0, + "learning_rate": 1.0429701737744944e-05, + "loss": 1.3662, + "step": 17088 + }, + { + "epoch": 0.5017617006283399, + "grad_norm": 0.0, + "learning_rate": 1.0428751653790199e-05, + "loss": 1.4619, + "step": 17089 + }, + { + "epoch": 0.5017910623054789, + "grad_norm": 0.0, + "learning_rate": 1.0427801565958145e-05, + "loss": 1.4043, + "step": 17090 + }, + { + "epoch": 0.5018204239826178, + "grad_norm": 0.0, + "learning_rate": 1.0426851474257368e-05, + "loss": 1.2842, + "step": 17091 + }, + { + "epoch": 0.5018497856597569, + "grad_norm": 0.0, + "learning_rate": 1.042590137869647e-05, + "loss": 1.3047, + "step": 17092 + }, + { + "epoch": 0.5018791473368959, + "grad_norm": 0.0, + "learning_rate": 1.0424951279284034e-05, + "loss": 1.3623, + "step": 17093 + }, + { + "epoch": 0.5019085090140348, + "grad_norm": 0.0, + "learning_rate": 1.0424001176028656e-05, + "loss": 1.2959, + "step": 17094 + }, + { + "epoch": 0.5019378706911739, + "grad_norm": 0.0, + "learning_rate": 1.0423051068938923e-05, + "loss": 1.4443, + "step": 17095 + }, + { + "epoch": 0.5019672323683129, + "grad_norm": 0.0, + "learning_rate": 1.0422100958023435e-05, + "loss": 1.2783, + "step": 17096 + }, + { + "epoch": 0.5019965940454518, + "grad_norm": 0.0, + "learning_rate": 1.0421150843290777e-05, + "loss": 1.0845, + "step": 17097 + }, + { + "epoch": 0.5020259557225909, + "grad_norm": 0.0, + "learning_rate": 1.0420200724749546e-05, + "loss": 1.3076, + "step": 17098 + }, + { + "epoch": 0.5020553173997299, + "grad_norm": 0.0, + "learning_rate": 1.0419250602408328e-05, + "loss": 1.4004, + "step": 17099 + }, + { + "epoch": 0.5020846790768688, + "grad_norm": 0.0, + "learning_rate": 1.0418300476275725e-05, + "loss": 1.3672, + "step": 17100 + }, + { + "epoch": 0.5021140407540079, + "grad_norm": 0.0, + "learning_rate": 1.041735034636032e-05, + "loss": 1.4707, + "step": 17101 + }, + { + "epoch": 0.5021434024311469, + "grad_norm": 0.0, + "learning_rate": 1.0416400212670712e-05, + "loss": 1.3281, + "step": 17102 + }, + { + "epoch": 0.5021727641082858, + "grad_norm": 0.0, + "learning_rate": 1.041545007521549e-05, + "loss": 1.2188, + "step": 17103 + }, + { + "epoch": 0.5022021257854249, + "grad_norm": 0.0, + "learning_rate": 1.0414499934003247e-05, + "loss": 1.2959, + "step": 17104 + }, + { + "epoch": 0.5022314874625639, + "grad_norm": 0.0, + "learning_rate": 1.0413549789042571e-05, + "loss": 1.3271, + "step": 17105 + }, + { + "epoch": 0.5022608491397028, + "grad_norm": 0.0, + "learning_rate": 1.0412599640342061e-05, + "loss": 1.3076, + "step": 17106 + }, + { + "epoch": 0.5022902108168419, + "grad_norm": 0.0, + "learning_rate": 1.0411649487910305e-05, + "loss": 1.3867, + "step": 17107 + }, + { + "epoch": 0.5023195724939808, + "grad_norm": 0.0, + "learning_rate": 1.0410699331755901e-05, + "loss": 1.2461, + "step": 17108 + }, + { + "epoch": 0.5023489341711198, + "grad_norm": 0.0, + "learning_rate": 1.0409749171887434e-05, + "loss": 1.248, + "step": 17109 + }, + { + "epoch": 0.5023782958482589, + "grad_norm": 0.0, + "learning_rate": 1.04087990083135e-05, + "loss": 1.416, + "step": 17110 + }, + { + "epoch": 0.5024076575253978, + "grad_norm": 0.0, + "learning_rate": 1.0407848841042691e-05, + "loss": 1.2563, + "step": 17111 + }, + { + "epoch": 0.5024370192025368, + "grad_norm": 0.0, + "learning_rate": 1.0406898670083605e-05, + "loss": 1.3105, + "step": 17112 + }, + { + "epoch": 0.5024663808796759, + "grad_norm": 0.0, + "learning_rate": 1.040594849544483e-05, + "loss": 1.3027, + "step": 17113 + }, + { + "epoch": 0.5024957425568148, + "grad_norm": 0.0, + "learning_rate": 1.0404998317134955e-05, + "loss": 1.3594, + "step": 17114 + }, + { + "epoch": 0.5025251042339538, + "grad_norm": 0.0, + "learning_rate": 1.0404048135162579e-05, + "loss": 1.1694, + "step": 17115 + }, + { + "epoch": 0.5025544659110929, + "grad_norm": 0.0, + "learning_rate": 1.0403097949536291e-05, + "loss": 1.333, + "step": 17116 + }, + { + "epoch": 0.5025838275882318, + "grad_norm": 0.0, + "learning_rate": 1.0402147760264686e-05, + "loss": 1.3291, + "step": 17117 + }, + { + "epoch": 0.5026131892653708, + "grad_norm": 0.0, + "learning_rate": 1.0401197567356356e-05, + "loss": 1.1011, + "step": 17118 + }, + { + "epoch": 0.5026425509425099, + "grad_norm": 0.0, + "learning_rate": 1.0400247370819892e-05, + "loss": 1.2764, + "step": 17119 + }, + { + "epoch": 0.5026719126196488, + "grad_norm": 0.0, + "learning_rate": 1.0399297170663892e-05, + "loss": 1.4053, + "step": 17120 + }, + { + "epoch": 0.5027012742967878, + "grad_norm": 0.0, + "learning_rate": 1.0398346966896941e-05, + "loss": 1.3242, + "step": 17121 + }, + { + "epoch": 0.5027306359739269, + "grad_norm": 0.0, + "learning_rate": 1.0397396759527641e-05, + "loss": 1.3525, + "step": 17122 + }, + { + "epoch": 0.5027599976510658, + "grad_norm": 0.0, + "learning_rate": 1.0396446548564576e-05, + "loss": 1.3018, + "step": 17123 + }, + { + "epoch": 0.5027893593282048, + "grad_norm": 0.0, + "learning_rate": 1.0395496334016345e-05, + "loss": 1.335, + "step": 17124 + }, + { + "epoch": 0.5028187210053439, + "grad_norm": 0.0, + "learning_rate": 1.039454611589154e-05, + "loss": 1.1211, + "step": 17125 + }, + { + "epoch": 0.5028480826824828, + "grad_norm": 0.0, + "learning_rate": 1.0393595894198752e-05, + "loss": 1.291, + "step": 17126 + }, + { + "epoch": 0.5028774443596218, + "grad_norm": 0.0, + "learning_rate": 1.0392645668946575e-05, + "loss": 1.2754, + "step": 17127 + }, + { + "epoch": 0.5029068060367609, + "grad_norm": 0.0, + "learning_rate": 1.0391695440143604e-05, + "loss": 1.3447, + "step": 17128 + }, + { + "epoch": 0.5029361677138998, + "grad_norm": 0.0, + "learning_rate": 1.0390745207798432e-05, + "loss": 1.3984, + "step": 17129 + }, + { + "epoch": 0.5029655293910388, + "grad_norm": 0.0, + "learning_rate": 1.038979497191965e-05, + "loss": 1.3657, + "step": 17130 + }, + { + "epoch": 0.5029948910681779, + "grad_norm": 0.0, + "learning_rate": 1.0388844732515851e-05, + "loss": 1.3174, + "step": 17131 + }, + { + "epoch": 0.5030242527453168, + "grad_norm": 0.0, + "learning_rate": 1.0387894489595631e-05, + "loss": 1.332, + "step": 17132 + }, + { + "epoch": 0.5030536144224558, + "grad_norm": 0.0, + "learning_rate": 1.0386944243167583e-05, + "loss": 1.2192, + "step": 17133 + }, + { + "epoch": 0.5030829760995948, + "grad_norm": 0.0, + "learning_rate": 1.0385993993240298e-05, + "loss": 1.3154, + "step": 17134 + }, + { + "epoch": 0.5031123377767338, + "grad_norm": 0.0, + "learning_rate": 1.038504373982237e-05, + "loss": 1.3545, + "step": 17135 + }, + { + "epoch": 0.5031416994538728, + "grad_norm": 0.0, + "learning_rate": 1.0384093482922391e-05, + "loss": 1.4678, + "step": 17136 + }, + { + "epoch": 0.5031710611310118, + "grad_norm": 0.0, + "learning_rate": 1.0383143222548959e-05, + "loss": 1.2568, + "step": 17137 + }, + { + "epoch": 0.5032004228081508, + "grad_norm": 0.0, + "learning_rate": 1.0382192958710664e-05, + "loss": 1.3047, + "step": 17138 + }, + { + "epoch": 0.5032297844852898, + "grad_norm": 0.0, + "learning_rate": 1.0381242691416098e-05, + "loss": 1.1191, + "step": 17139 + }, + { + "epoch": 0.5032591461624288, + "grad_norm": 0.0, + "learning_rate": 1.038029242067386e-05, + "loss": 1.2222, + "step": 17140 + }, + { + "epoch": 0.5032885078395678, + "grad_norm": 0.0, + "learning_rate": 1.037934214649254e-05, + "loss": 1.2832, + "step": 17141 + }, + { + "epoch": 0.5033178695167068, + "grad_norm": 0.0, + "learning_rate": 1.037839186888073e-05, + "loss": 1.3662, + "step": 17142 + }, + { + "epoch": 0.5033472311938458, + "grad_norm": 0.0, + "learning_rate": 1.0377441587847025e-05, + "loss": 1.3477, + "step": 17143 + }, + { + "epoch": 0.5033765928709848, + "grad_norm": 0.0, + "learning_rate": 1.037649130340002e-05, + "loss": 1.25, + "step": 17144 + }, + { + "epoch": 0.5034059545481238, + "grad_norm": 0.0, + "learning_rate": 1.037554101554831e-05, + "loss": 1.3027, + "step": 17145 + }, + { + "epoch": 0.5034353162252628, + "grad_norm": 0.0, + "learning_rate": 1.0374590724300484e-05, + "loss": 1.2886, + "step": 17146 + }, + { + "epoch": 0.5034646779024018, + "grad_norm": 0.0, + "learning_rate": 1.0373640429665138e-05, + "loss": 1.2314, + "step": 17147 + }, + { + "epoch": 0.5034940395795408, + "grad_norm": 0.0, + "learning_rate": 1.0372690131650864e-05, + "loss": 1.2339, + "step": 17148 + }, + { + "epoch": 0.5035234012566798, + "grad_norm": 0.0, + "learning_rate": 1.0371739830266263e-05, + "loss": 1.3066, + "step": 17149 + }, + { + "epoch": 0.5035527629338188, + "grad_norm": 0.0, + "learning_rate": 1.037078952551992e-05, + "loss": 1.3457, + "step": 17150 + }, + { + "epoch": 0.5035821246109577, + "grad_norm": 0.0, + "learning_rate": 1.0369839217420432e-05, + "loss": 1.2236, + "step": 17151 + }, + { + "epoch": 0.5036114862880968, + "grad_norm": 0.0, + "learning_rate": 1.0368888905976395e-05, + "loss": 1.2412, + "step": 17152 + }, + { + "epoch": 0.5036408479652358, + "grad_norm": 0.0, + "learning_rate": 1.03679385911964e-05, + "loss": 1.3027, + "step": 17153 + }, + { + "epoch": 0.5036702096423747, + "grad_norm": 0.0, + "learning_rate": 1.0366988273089044e-05, + "loss": 1.3232, + "step": 17154 + }, + { + "epoch": 0.5036995713195138, + "grad_norm": 0.0, + "learning_rate": 1.0366037951662917e-05, + "loss": 1.1895, + "step": 17155 + }, + { + "epoch": 0.5037289329966528, + "grad_norm": 0.0, + "learning_rate": 1.0365087626926616e-05, + "loss": 1.3506, + "step": 17156 + }, + { + "epoch": 0.5037582946737917, + "grad_norm": 0.0, + "learning_rate": 1.0364137298888734e-05, + "loss": 1.2383, + "step": 17157 + }, + { + "epoch": 0.5037876563509308, + "grad_norm": 0.0, + "learning_rate": 1.0363186967557863e-05, + "loss": 1.3887, + "step": 17158 + }, + { + "epoch": 0.5038170180280698, + "grad_norm": 0.0, + "learning_rate": 1.0362236632942602e-05, + "loss": 1.3486, + "step": 17159 + }, + { + "epoch": 0.5038463797052087, + "grad_norm": 0.0, + "learning_rate": 1.036128629505154e-05, + "loss": 1.4473, + "step": 17160 + }, + { + "epoch": 0.5038757413823478, + "grad_norm": 0.0, + "learning_rate": 1.0360335953893278e-05, + "loss": 1.2627, + "step": 17161 + }, + { + "epoch": 0.5039051030594868, + "grad_norm": 0.0, + "learning_rate": 1.0359385609476403e-05, + "loss": 1.3848, + "step": 17162 + }, + { + "epoch": 0.5039344647366257, + "grad_norm": 0.0, + "learning_rate": 1.035843526180951e-05, + "loss": 1.2759, + "step": 17163 + }, + { + "epoch": 0.5039638264137648, + "grad_norm": 0.0, + "learning_rate": 1.0357484910901197e-05, + "loss": 1.2549, + "step": 17164 + }, + { + "epoch": 0.5039931880909038, + "grad_norm": 0.0, + "learning_rate": 1.0356534556760053e-05, + "loss": 1.4043, + "step": 17165 + }, + { + "epoch": 0.5040225497680427, + "grad_norm": 0.0, + "learning_rate": 1.0355584199394682e-05, + "loss": 1.3682, + "step": 17166 + }, + { + "epoch": 0.5040519114451818, + "grad_norm": 0.0, + "learning_rate": 1.0354633838813666e-05, + "loss": 1.2363, + "step": 17167 + }, + { + "epoch": 0.5040812731223208, + "grad_norm": 0.0, + "learning_rate": 1.0353683475025608e-05, + "loss": 1.2676, + "step": 17168 + }, + { + "epoch": 0.5041106347994597, + "grad_norm": 0.0, + "learning_rate": 1.0352733108039102e-05, + "loss": 1.3975, + "step": 17169 + }, + { + "epoch": 0.5041399964765988, + "grad_norm": 0.0, + "learning_rate": 1.0351782737862737e-05, + "loss": 1.4766, + "step": 17170 + }, + { + "epoch": 0.5041693581537378, + "grad_norm": 0.0, + "learning_rate": 1.0350832364505113e-05, + "loss": 1.4453, + "step": 17171 + }, + { + "epoch": 0.5041987198308767, + "grad_norm": 0.0, + "learning_rate": 1.0349881987974819e-05, + "loss": 1.4424, + "step": 17172 + }, + { + "epoch": 0.5042280815080158, + "grad_norm": 0.0, + "learning_rate": 1.0348931608280455e-05, + "loss": 1.2314, + "step": 17173 + }, + { + "epoch": 0.5042574431851548, + "grad_norm": 0.0, + "learning_rate": 1.0347981225430612e-05, + "loss": 1.145, + "step": 17174 + }, + { + "epoch": 0.5042868048622937, + "grad_norm": 0.0, + "learning_rate": 1.0347030839433887e-05, + "loss": 1.2246, + "step": 17175 + }, + { + "epoch": 0.5043161665394328, + "grad_norm": 0.0, + "learning_rate": 1.0346080450298872e-05, + "loss": 1.3398, + "step": 17176 + }, + { + "epoch": 0.5043455282165717, + "grad_norm": 0.0, + "learning_rate": 1.0345130058034162e-05, + "loss": 1.1729, + "step": 17177 + }, + { + "epoch": 0.5043748898937107, + "grad_norm": 0.0, + "learning_rate": 1.0344179662648355e-05, + "loss": 1.3789, + "step": 17178 + }, + { + "epoch": 0.5044042515708498, + "grad_norm": 0.0, + "learning_rate": 1.034322926415004e-05, + "loss": 1.2568, + "step": 17179 + }, + { + "epoch": 0.5044336132479887, + "grad_norm": 0.0, + "learning_rate": 1.0342278862547817e-05, + "loss": 1.2754, + "step": 17180 + }, + { + "epoch": 0.5044629749251277, + "grad_norm": 0.0, + "learning_rate": 1.0341328457850279e-05, + "loss": 1.2988, + "step": 17181 + }, + { + "epoch": 0.5044923366022667, + "grad_norm": 0.0, + "learning_rate": 1.0340378050066022e-05, + "loss": 1.3037, + "step": 17182 + }, + { + "epoch": 0.5045216982794057, + "grad_norm": 0.0, + "learning_rate": 1.0339427639203636e-05, + "loss": 1.3076, + "step": 17183 + }, + { + "epoch": 0.5045510599565447, + "grad_norm": 0.0, + "learning_rate": 1.0338477225271719e-05, + "loss": 1.2695, + "step": 17184 + }, + { + "epoch": 0.5045804216336837, + "grad_norm": 0.0, + "learning_rate": 1.0337526808278864e-05, + "loss": 1.2939, + "step": 17185 + }, + { + "epoch": 0.5046097833108227, + "grad_norm": 0.0, + "learning_rate": 1.0336576388233672e-05, + "loss": 1.2617, + "step": 17186 + }, + { + "epoch": 0.5046391449879617, + "grad_norm": 0.0, + "learning_rate": 1.033562596514473e-05, + "loss": 1.2549, + "step": 17187 + }, + { + "epoch": 0.5046685066651007, + "grad_norm": 0.0, + "learning_rate": 1.0334675539020637e-05, + "loss": 1.292, + "step": 17188 + }, + { + "epoch": 0.5046978683422397, + "grad_norm": 0.0, + "learning_rate": 1.033372510986999e-05, + "loss": 1.3418, + "step": 17189 + }, + { + "epoch": 0.5047272300193787, + "grad_norm": 0.0, + "learning_rate": 1.033277467770138e-05, + "loss": 1.2407, + "step": 17190 + }, + { + "epoch": 0.5047565916965177, + "grad_norm": 0.0, + "learning_rate": 1.0331824242523406e-05, + "loss": 1.3291, + "step": 17191 + }, + { + "epoch": 0.5047859533736567, + "grad_norm": 0.0, + "learning_rate": 1.0330873804344656e-05, + "loss": 1.3779, + "step": 17192 + }, + { + "epoch": 0.5048153150507957, + "grad_norm": 0.0, + "learning_rate": 1.032992336317373e-05, + "loss": 1.3408, + "step": 17193 + }, + { + "epoch": 0.5048446767279346, + "grad_norm": 0.0, + "learning_rate": 1.0328972919019224e-05, + "loss": 1.3184, + "step": 17194 + }, + { + "epoch": 0.5048740384050737, + "grad_norm": 0.0, + "learning_rate": 1.0328022471889731e-05, + "loss": 1.5342, + "step": 17195 + }, + { + "epoch": 0.5049034000822127, + "grad_norm": 0.0, + "learning_rate": 1.0327072021793848e-05, + "loss": 1.2744, + "step": 17196 + }, + { + "epoch": 0.5049327617593516, + "grad_norm": 0.0, + "learning_rate": 1.0326121568740166e-05, + "loss": 1.4014, + "step": 17197 + }, + { + "epoch": 0.5049621234364907, + "grad_norm": 0.0, + "learning_rate": 1.0325171112737286e-05, + "loss": 1.2783, + "step": 17198 + }, + { + "epoch": 0.5049914851136297, + "grad_norm": 0.0, + "learning_rate": 1.03242206537938e-05, + "loss": 1.25, + "step": 17199 + }, + { + "epoch": 0.5050208467907686, + "grad_norm": 0.0, + "learning_rate": 1.03232701919183e-05, + "loss": 1.3896, + "step": 17200 + }, + { + "epoch": 0.5050502084679077, + "grad_norm": 0.0, + "learning_rate": 1.032231972711939e-05, + "loss": 1.3701, + "step": 17201 + }, + { + "epoch": 0.5050795701450467, + "grad_norm": 0.0, + "learning_rate": 1.0321369259405659e-05, + "loss": 1.29, + "step": 17202 + }, + { + "epoch": 0.5051089318221856, + "grad_norm": 0.0, + "learning_rate": 1.0320418788785703e-05, + "loss": 1.3105, + "step": 17203 + }, + { + "epoch": 0.5051382934993247, + "grad_norm": 0.0, + "learning_rate": 1.0319468315268115e-05, + "loss": 1.2822, + "step": 17204 + }, + { + "epoch": 0.5051676551764637, + "grad_norm": 0.0, + "learning_rate": 1.0318517838861494e-05, + "loss": 1.1206, + "step": 17205 + }, + { + "epoch": 0.5051970168536026, + "grad_norm": 0.0, + "learning_rate": 1.0317567359574437e-05, + "loss": 1.3262, + "step": 17206 + }, + { + "epoch": 0.5052263785307417, + "grad_norm": 0.0, + "learning_rate": 1.0316616877415537e-05, + "loss": 1.4346, + "step": 17207 + }, + { + "epoch": 0.5052557402078807, + "grad_norm": 0.0, + "learning_rate": 1.0315666392393389e-05, + "loss": 1.3066, + "step": 17208 + }, + { + "epoch": 0.5052851018850196, + "grad_norm": 0.0, + "learning_rate": 1.0314715904516588e-05, + "loss": 1.2812, + "step": 17209 + }, + { + "epoch": 0.5053144635621587, + "grad_norm": 0.0, + "learning_rate": 1.0313765413793733e-05, + "loss": 1.3945, + "step": 17210 + }, + { + "epoch": 0.5053438252392977, + "grad_norm": 0.0, + "learning_rate": 1.0312814920233417e-05, + "loss": 1.2275, + "step": 17211 + }, + { + "epoch": 0.5053731869164366, + "grad_norm": 0.0, + "learning_rate": 1.0311864423844232e-05, + "loss": 1.3438, + "step": 17212 + }, + { + "epoch": 0.5054025485935757, + "grad_norm": 0.0, + "learning_rate": 1.0310913924634777e-05, + "loss": 1.3447, + "step": 17213 + }, + { + "epoch": 0.5054319102707147, + "grad_norm": 0.0, + "learning_rate": 1.0309963422613648e-05, + "loss": 1.3906, + "step": 17214 + }, + { + "epoch": 0.5054612719478536, + "grad_norm": 0.0, + "learning_rate": 1.0309012917789445e-05, + "loss": 1.3477, + "step": 17215 + }, + { + "epoch": 0.5054906336249927, + "grad_norm": 0.0, + "learning_rate": 1.0308062410170754e-05, + "loss": 1.3584, + "step": 17216 + }, + { + "epoch": 0.5055199953021317, + "grad_norm": 0.0, + "learning_rate": 1.0307111899766178e-05, + "loss": 1.3193, + "step": 17217 + }, + { + "epoch": 0.5055493569792706, + "grad_norm": 0.0, + "learning_rate": 1.0306161386584309e-05, + "loss": 1.3594, + "step": 17218 + }, + { + "epoch": 0.5055787186564097, + "grad_norm": 0.0, + "learning_rate": 1.0305210870633747e-05, + "loss": 1.3027, + "step": 17219 + }, + { + "epoch": 0.5056080803335486, + "grad_norm": 0.0, + "learning_rate": 1.0304260351923082e-05, + "loss": 1.207, + "step": 17220 + }, + { + "epoch": 0.5056374420106876, + "grad_norm": 0.0, + "learning_rate": 1.0303309830460915e-05, + "loss": 1.3154, + "step": 17221 + }, + { + "epoch": 0.5056668036878267, + "grad_norm": 0.0, + "learning_rate": 1.0302359306255837e-05, + "loss": 1.1494, + "step": 17222 + }, + { + "epoch": 0.5056961653649656, + "grad_norm": 0.0, + "learning_rate": 1.030140877931645e-05, + "loss": 1.2402, + "step": 17223 + }, + { + "epoch": 0.5057255270421046, + "grad_norm": 0.0, + "learning_rate": 1.0300458249651344e-05, + "loss": 1.3003, + "step": 17224 + }, + { + "epoch": 0.5057548887192437, + "grad_norm": 0.0, + "learning_rate": 1.0299507717269117e-05, + "loss": 1.293, + "step": 17225 + }, + { + "epoch": 0.5057842503963826, + "grad_norm": 0.0, + "learning_rate": 1.0298557182178363e-05, + "loss": 1.2417, + "step": 17226 + }, + { + "epoch": 0.5058136120735216, + "grad_norm": 0.0, + "learning_rate": 1.0297606644387683e-05, + "loss": 1.3174, + "step": 17227 + }, + { + "epoch": 0.5058429737506607, + "grad_norm": 0.0, + "learning_rate": 1.029665610390567e-05, + "loss": 1.4199, + "step": 17228 + }, + { + "epoch": 0.5058723354277996, + "grad_norm": 0.0, + "learning_rate": 1.0295705560740918e-05, + "loss": 1.2627, + "step": 17229 + }, + { + "epoch": 0.5059016971049386, + "grad_norm": 0.0, + "learning_rate": 1.0294755014902027e-05, + "loss": 1.376, + "step": 17230 + }, + { + "epoch": 0.5059310587820777, + "grad_norm": 0.0, + "learning_rate": 1.0293804466397588e-05, + "loss": 1.3711, + "step": 17231 + }, + { + "epoch": 0.5059604204592166, + "grad_norm": 0.0, + "learning_rate": 1.0292853915236205e-05, + "loss": 1.4404, + "step": 17232 + }, + { + "epoch": 0.5059897821363556, + "grad_norm": 0.0, + "learning_rate": 1.0291903361426464e-05, + "loss": 1.3071, + "step": 17233 + }, + { + "epoch": 0.5060191438134947, + "grad_norm": 0.0, + "learning_rate": 1.0290952804976966e-05, + "loss": 1.291, + "step": 17234 + }, + { + "epoch": 0.5060485054906336, + "grad_norm": 0.0, + "learning_rate": 1.0290002245896311e-05, + "loss": 1.2334, + "step": 17235 + }, + { + "epoch": 0.5060778671677726, + "grad_norm": 0.0, + "learning_rate": 1.028905168419309e-05, + "loss": 1.3926, + "step": 17236 + }, + { + "epoch": 0.5061072288449117, + "grad_norm": 0.0, + "learning_rate": 1.0288101119875899e-05, + "loss": 1.249, + "step": 17237 + }, + { + "epoch": 0.5061365905220506, + "grad_norm": 0.0, + "learning_rate": 1.0287150552953335e-05, + "loss": 1.4219, + "step": 17238 + }, + { + "epoch": 0.5061659521991896, + "grad_norm": 0.0, + "learning_rate": 1.0286199983434e-05, + "loss": 1.2607, + "step": 17239 + }, + { + "epoch": 0.5061953138763287, + "grad_norm": 0.0, + "learning_rate": 1.0285249411326484e-05, + "loss": 1.2109, + "step": 17240 + }, + { + "epoch": 0.5062246755534676, + "grad_norm": 0.0, + "learning_rate": 1.028429883663938e-05, + "loss": 1.3359, + "step": 17241 + }, + { + "epoch": 0.5062540372306066, + "grad_norm": 0.0, + "learning_rate": 1.0283348259381292e-05, + "loss": 1.2979, + "step": 17242 + }, + { + "epoch": 0.5062833989077457, + "grad_norm": 0.0, + "learning_rate": 1.0282397679560812e-05, + "loss": 1.2646, + "step": 17243 + }, + { + "epoch": 0.5063127605848846, + "grad_norm": 0.0, + "learning_rate": 1.0281447097186539e-05, + "loss": 1.3252, + "step": 17244 + }, + { + "epoch": 0.5063421222620236, + "grad_norm": 0.0, + "learning_rate": 1.0280496512267066e-05, + "loss": 1.2305, + "step": 17245 + }, + { + "epoch": 0.5063714839391626, + "grad_norm": 0.0, + "learning_rate": 1.0279545924810992e-05, + "loss": 1.1338, + "step": 17246 + }, + { + "epoch": 0.5064008456163016, + "grad_norm": 0.0, + "learning_rate": 1.0278595334826913e-05, + "loss": 1.3428, + "step": 17247 + }, + { + "epoch": 0.5064302072934406, + "grad_norm": 0.0, + "learning_rate": 1.0277644742323426e-05, + "loss": 1.291, + "step": 17248 + }, + { + "epoch": 0.5064595689705796, + "grad_norm": 0.0, + "learning_rate": 1.0276694147309122e-05, + "loss": 1.2109, + "step": 17249 + }, + { + "epoch": 0.5064889306477186, + "grad_norm": 0.0, + "learning_rate": 1.0275743549792608e-05, + "loss": 1.2764, + "step": 17250 + }, + { + "epoch": 0.5065182923248576, + "grad_norm": 0.0, + "learning_rate": 1.027479294978247e-05, + "loss": 1.3535, + "step": 17251 + }, + { + "epoch": 0.5065476540019966, + "grad_norm": 0.0, + "learning_rate": 1.0273842347287313e-05, + "loss": 1.3242, + "step": 17252 + }, + { + "epoch": 0.5065770156791356, + "grad_norm": 0.0, + "learning_rate": 1.0272891742315728e-05, + "loss": 1.3975, + "step": 17253 + }, + { + "epoch": 0.5066063773562746, + "grad_norm": 0.0, + "learning_rate": 1.0271941134876308e-05, + "loss": 1.334, + "step": 17254 + }, + { + "epoch": 0.5066357390334136, + "grad_norm": 0.0, + "learning_rate": 1.027099052497766e-05, + "loss": 1.4053, + "step": 17255 + }, + { + "epoch": 0.5066651007105526, + "grad_norm": 0.0, + "learning_rate": 1.0270039912628376e-05, + "loss": 1.3555, + "step": 17256 + }, + { + "epoch": 0.5066944623876916, + "grad_norm": 0.0, + "learning_rate": 1.026908929783705e-05, + "loss": 1.3232, + "step": 17257 + }, + { + "epoch": 0.5067238240648306, + "grad_norm": 0.0, + "learning_rate": 1.026813868061228e-05, + "loss": 1.3359, + "step": 17258 + }, + { + "epoch": 0.5067531857419696, + "grad_norm": 0.0, + "learning_rate": 1.0267188060962665e-05, + "loss": 1.2734, + "step": 17259 + }, + { + "epoch": 0.5067825474191086, + "grad_norm": 0.0, + "learning_rate": 1.02662374388968e-05, + "loss": 1.3945, + "step": 17260 + }, + { + "epoch": 0.5068119090962476, + "grad_norm": 0.0, + "learning_rate": 1.0265286814423278e-05, + "loss": 1.3501, + "step": 17261 + }, + { + "epoch": 0.5068412707733866, + "grad_norm": 0.0, + "learning_rate": 1.0264336187550704e-05, + "loss": 1.3447, + "step": 17262 + }, + { + "epoch": 0.5068706324505255, + "grad_norm": 0.0, + "learning_rate": 1.0263385558287667e-05, + "loss": 1.4072, + "step": 17263 + }, + { + "epoch": 0.5068999941276646, + "grad_norm": 0.0, + "learning_rate": 1.0262434926642768e-05, + "loss": 1.3232, + "step": 17264 + }, + { + "epoch": 0.5069293558048036, + "grad_norm": 0.0, + "learning_rate": 1.0261484292624602e-05, + "loss": 1.3037, + "step": 17265 + }, + { + "epoch": 0.5069587174819425, + "grad_norm": 0.0, + "learning_rate": 1.0260533656241767e-05, + "loss": 1.3213, + "step": 17266 + }, + { + "epoch": 0.5069880791590816, + "grad_norm": 0.0, + "learning_rate": 1.025958301750286e-05, + "loss": 1.2939, + "step": 17267 + }, + { + "epoch": 0.5070174408362206, + "grad_norm": 0.0, + "learning_rate": 1.0258632376416478e-05, + "loss": 1.3379, + "step": 17268 + }, + { + "epoch": 0.5070468025133595, + "grad_norm": 0.0, + "learning_rate": 1.0257681732991214e-05, + "loss": 1.3906, + "step": 17269 + }, + { + "epoch": 0.5070761641904986, + "grad_norm": 0.0, + "learning_rate": 1.0256731087235672e-05, + "loss": 1.3975, + "step": 17270 + }, + { + "epoch": 0.5071055258676376, + "grad_norm": 0.0, + "learning_rate": 1.0255780439158443e-05, + "loss": 1.3789, + "step": 17271 + }, + { + "epoch": 0.5071348875447765, + "grad_norm": 0.0, + "learning_rate": 1.0254829788768127e-05, + "loss": 1.3438, + "step": 17272 + }, + { + "epoch": 0.5071642492219156, + "grad_norm": 0.0, + "learning_rate": 1.025387913607332e-05, + "loss": 1.2871, + "step": 17273 + }, + { + "epoch": 0.5071936108990546, + "grad_norm": 0.0, + "learning_rate": 1.0252928481082618e-05, + "loss": 1.4707, + "step": 17274 + }, + { + "epoch": 0.5072229725761935, + "grad_norm": 0.0, + "learning_rate": 1.0251977823804617e-05, + "loss": 1.4023, + "step": 17275 + }, + { + "epoch": 0.5072523342533326, + "grad_norm": 0.0, + "learning_rate": 1.025102716424792e-05, + "loss": 1.4023, + "step": 17276 + }, + { + "epoch": 0.5072816959304716, + "grad_norm": 0.0, + "learning_rate": 1.0250076502421118e-05, + "loss": 1.1904, + "step": 17277 + }, + { + "epoch": 0.5073110576076105, + "grad_norm": 0.0, + "learning_rate": 1.024912583833281e-05, + "loss": 1.269, + "step": 17278 + }, + { + "epoch": 0.5073404192847496, + "grad_norm": 0.0, + "learning_rate": 1.0248175171991593e-05, + "loss": 1.2334, + "step": 17279 + }, + { + "epoch": 0.5073697809618886, + "grad_norm": 0.0, + "learning_rate": 1.0247224503406066e-05, + "loss": 1.3154, + "step": 17280 + }, + { + "epoch": 0.5073991426390275, + "grad_norm": 0.0, + "learning_rate": 1.0246273832584825e-05, + "loss": 1.3564, + "step": 17281 + }, + { + "epoch": 0.5074285043161665, + "grad_norm": 0.0, + "learning_rate": 1.0245323159536465e-05, + "loss": 1.2041, + "step": 17282 + }, + { + "epoch": 0.5074578659933056, + "grad_norm": 0.0, + "learning_rate": 1.0244372484269583e-05, + "loss": 1.4805, + "step": 17283 + }, + { + "epoch": 0.5074872276704445, + "grad_norm": 0.0, + "learning_rate": 1.0243421806792782e-05, + "loss": 1.2832, + "step": 17284 + }, + { + "epoch": 0.5075165893475835, + "grad_norm": 0.0, + "learning_rate": 1.0242471127114652e-05, + "loss": 1.1753, + "step": 17285 + }, + { + "epoch": 0.5075459510247226, + "grad_norm": 0.0, + "learning_rate": 1.0241520445243795e-05, + "loss": 1.3281, + "step": 17286 + }, + { + "epoch": 0.5075753127018615, + "grad_norm": 0.0, + "learning_rate": 1.0240569761188806e-05, + "loss": 1.3672, + "step": 17287 + }, + { + "epoch": 0.5076046743790005, + "grad_norm": 0.0, + "learning_rate": 1.0239619074958285e-05, + "loss": 1.2324, + "step": 17288 + }, + { + "epoch": 0.5076340360561395, + "grad_norm": 0.0, + "learning_rate": 1.0238668386560826e-05, + "loss": 1.3789, + "step": 17289 + }, + { + "epoch": 0.5076633977332785, + "grad_norm": 0.0, + "learning_rate": 1.0237717696005028e-05, + "loss": 1.2148, + "step": 17290 + }, + { + "epoch": 0.5076927594104175, + "grad_norm": 0.0, + "learning_rate": 1.0236767003299486e-05, + "loss": 1.2734, + "step": 17291 + }, + { + "epoch": 0.5077221210875565, + "grad_norm": 0.0, + "learning_rate": 1.0235816308452802e-05, + "loss": 1.3066, + "step": 17292 + }, + { + "epoch": 0.5077514827646955, + "grad_norm": 0.0, + "learning_rate": 1.023486561147357e-05, + "loss": 1.4023, + "step": 17293 + }, + { + "epoch": 0.5077808444418345, + "grad_norm": 0.0, + "learning_rate": 1.0233914912370386e-05, + "loss": 1.1523, + "step": 17294 + }, + { + "epoch": 0.5078102061189735, + "grad_norm": 0.0, + "learning_rate": 1.0232964211151853e-05, + "loss": 1.2754, + "step": 17295 + }, + { + "epoch": 0.5078395677961125, + "grad_norm": 0.0, + "learning_rate": 1.023201350782656e-05, + "loss": 1.2812, + "step": 17296 + }, + { + "epoch": 0.5078689294732515, + "grad_norm": 0.0, + "learning_rate": 1.0231062802403115e-05, + "loss": 1.1323, + "step": 17297 + }, + { + "epoch": 0.5078982911503905, + "grad_norm": 0.0, + "learning_rate": 1.0230112094890108e-05, + "loss": 1.2085, + "step": 17298 + }, + { + "epoch": 0.5079276528275295, + "grad_norm": 0.0, + "learning_rate": 1.0229161385296138e-05, + "loss": 1.3477, + "step": 17299 + }, + { + "epoch": 0.5079570145046685, + "grad_norm": 0.0, + "learning_rate": 1.0228210673629804e-05, + "loss": 1.4629, + "step": 17300 + }, + { + "epoch": 0.5079863761818075, + "grad_norm": 0.0, + "learning_rate": 1.0227259959899702e-05, + "loss": 1.3975, + "step": 17301 + }, + { + "epoch": 0.5080157378589465, + "grad_norm": 0.0, + "learning_rate": 1.022630924411443e-05, + "loss": 1.1094, + "step": 17302 + }, + { + "epoch": 0.5080450995360855, + "grad_norm": 0.0, + "learning_rate": 1.0225358526282582e-05, + "loss": 1.4062, + "step": 17303 + }, + { + "epoch": 0.5080744612132245, + "grad_norm": 0.0, + "learning_rate": 1.0224407806412761e-05, + "loss": 1.3105, + "step": 17304 + }, + { + "epoch": 0.5081038228903635, + "grad_norm": 0.0, + "learning_rate": 1.0223457084513566e-05, + "loss": 1.25, + "step": 17305 + }, + { + "epoch": 0.5081331845675024, + "grad_norm": 0.0, + "learning_rate": 1.022250636059359e-05, + "loss": 1.271, + "step": 17306 + }, + { + "epoch": 0.5081625462446415, + "grad_norm": 0.0, + "learning_rate": 1.0221555634661429e-05, + "loss": 1.2588, + "step": 17307 + }, + { + "epoch": 0.5081919079217805, + "grad_norm": 0.0, + "learning_rate": 1.0220604906725686e-05, + "loss": 1.3828, + "step": 17308 + }, + { + "epoch": 0.5082212695989194, + "grad_norm": 0.0, + "learning_rate": 1.021965417679496e-05, + "loss": 1.3301, + "step": 17309 + }, + { + "epoch": 0.5082506312760585, + "grad_norm": 0.0, + "learning_rate": 1.021870344487784e-05, + "loss": 1.4209, + "step": 17310 + }, + { + "epoch": 0.5082799929531975, + "grad_norm": 0.0, + "learning_rate": 1.021775271098293e-05, + "loss": 1.3633, + "step": 17311 + }, + { + "epoch": 0.5083093546303364, + "grad_norm": 0.0, + "learning_rate": 1.0216801975118825e-05, + "loss": 1.3721, + "step": 17312 + }, + { + "epoch": 0.5083387163074755, + "grad_norm": 0.0, + "learning_rate": 1.0215851237294126e-05, + "loss": 1.314, + "step": 17313 + }, + { + "epoch": 0.5083680779846145, + "grad_norm": 0.0, + "learning_rate": 1.0214900497517429e-05, + "loss": 1.3101, + "step": 17314 + }, + { + "epoch": 0.5083974396617534, + "grad_norm": 0.0, + "learning_rate": 1.021394975579733e-05, + "loss": 1.415, + "step": 17315 + }, + { + "epoch": 0.5084268013388925, + "grad_norm": 0.0, + "learning_rate": 1.0212999012142428e-05, + "loss": 1.3555, + "step": 17316 + }, + { + "epoch": 0.5084561630160315, + "grad_norm": 0.0, + "learning_rate": 1.0212048266561326e-05, + "loss": 1.2744, + "step": 17317 + }, + { + "epoch": 0.5084855246931704, + "grad_norm": 0.0, + "learning_rate": 1.0211097519062613e-05, + "loss": 1.4492, + "step": 17318 + }, + { + "epoch": 0.5085148863703095, + "grad_norm": 0.0, + "learning_rate": 1.0210146769654893e-05, + "loss": 1.3184, + "step": 17319 + }, + { + "epoch": 0.5085442480474485, + "grad_norm": 0.0, + "learning_rate": 1.0209196018346764e-05, + "loss": 1.2744, + "step": 17320 + }, + { + "epoch": 0.5085736097245874, + "grad_norm": 0.0, + "learning_rate": 1.0208245265146816e-05, + "loss": 1.2344, + "step": 17321 + }, + { + "epoch": 0.5086029714017265, + "grad_norm": 0.0, + "learning_rate": 1.0207294510063659e-05, + "loss": 1.3252, + "step": 17322 + }, + { + "epoch": 0.5086323330788655, + "grad_norm": 0.0, + "learning_rate": 1.020634375310588e-05, + "loss": 1.3291, + "step": 17323 + }, + { + "epoch": 0.5086616947560044, + "grad_norm": 0.0, + "learning_rate": 1.0205392994282082e-05, + "loss": 1.3594, + "step": 17324 + }, + { + "epoch": 0.5086910564331435, + "grad_norm": 0.0, + "learning_rate": 1.0204442233600867e-05, + "loss": 1.3179, + "step": 17325 + }, + { + "epoch": 0.5087204181102825, + "grad_norm": 0.0, + "learning_rate": 1.0203491471070823e-05, + "loss": 1.3291, + "step": 17326 + }, + { + "epoch": 0.5087497797874214, + "grad_norm": 0.0, + "learning_rate": 1.0202540706700557e-05, + "loss": 1.3613, + "step": 17327 + }, + { + "epoch": 0.5087791414645605, + "grad_norm": 0.0, + "learning_rate": 1.020158994049866e-05, + "loss": 1.2227, + "step": 17328 + }, + { + "epoch": 0.5088085031416995, + "grad_norm": 0.0, + "learning_rate": 1.0200639172473738e-05, + "loss": 1.3164, + "step": 17329 + }, + { + "epoch": 0.5088378648188384, + "grad_norm": 0.0, + "learning_rate": 1.0199688402634384e-05, + "loss": 1.2852, + "step": 17330 + }, + { + "epoch": 0.5088672264959775, + "grad_norm": 0.0, + "learning_rate": 1.0198737630989195e-05, + "loss": 1.2295, + "step": 17331 + }, + { + "epoch": 0.5088965881731164, + "grad_norm": 0.0, + "learning_rate": 1.019778685754677e-05, + "loss": 1.3447, + "step": 17332 + }, + { + "epoch": 0.5089259498502554, + "grad_norm": 0.0, + "learning_rate": 1.0196836082315706e-05, + "loss": 1.3926, + "step": 17333 + }, + { + "epoch": 0.5089553115273945, + "grad_norm": 0.0, + "learning_rate": 1.0195885305304609e-05, + "loss": 1.1851, + "step": 17334 + }, + { + "epoch": 0.5089846732045334, + "grad_norm": 0.0, + "learning_rate": 1.0194934526522066e-05, + "loss": 1.3037, + "step": 17335 + }, + { + "epoch": 0.5090140348816724, + "grad_norm": 0.0, + "learning_rate": 1.0193983745976681e-05, + "loss": 1.2837, + "step": 17336 + }, + { + "epoch": 0.5090433965588115, + "grad_norm": 0.0, + "learning_rate": 1.0193032963677055e-05, + "loss": 1.1138, + "step": 17337 + }, + { + "epoch": 0.5090727582359504, + "grad_norm": 0.0, + "learning_rate": 1.0192082179631777e-05, + "loss": 1.3252, + "step": 17338 + }, + { + "epoch": 0.5091021199130894, + "grad_norm": 0.0, + "learning_rate": 1.0191131393849456e-05, + "loss": 1.2764, + "step": 17339 + }, + { + "epoch": 0.5091314815902285, + "grad_norm": 0.0, + "learning_rate": 1.019018060633868e-05, + "loss": 1.3965, + "step": 17340 + }, + { + "epoch": 0.5091608432673674, + "grad_norm": 0.0, + "learning_rate": 1.0189229817108056e-05, + "loss": 1.2471, + "step": 17341 + }, + { + "epoch": 0.5091902049445064, + "grad_norm": 0.0, + "learning_rate": 1.0188279026166177e-05, + "loss": 1.2646, + "step": 17342 + }, + { + "epoch": 0.5092195666216455, + "grad_norm": 0.0, + "learning_rate": 1.018732823352164e-05, + "loss": 1.3271, + "step": 17343 + }, + { + "epoch": 0.5092489282987844, + "grad_norm": 0.0, + "learning_rate": 1.0186377439183046e-05, + "loss": 1.2725, + "step": 17344 + }, + { + "epoch": 0.5092782899759234, + "grad_norm": 0.0, + "learning_rate": 1.0185426643158995e-05, + "loss": 1.2598, + "step": 17345 + }, + { + "epoch": 0.5093076516530625, + "grad_norm": 0.0, + "learning_rate": 1.0184475845458083e-05, + "loss": 1.4004, + "step": 17346 + }, + { + "epoch": 0.5093370133302014, + "grad_norm": 0.0, + "learning_rate": 1.018352504608891e-05, + "loss": 1.1597, + "step": 17347 + }, + { + "epoch": 0.5093663750073404, + "grad_norm": 0.0, + "learning_rate": 1.0182574245060071e-05, + "loss": 1.2646, + "step": 17348 + }, + { + "epoch": 0.5093957366844795, + "grad_norm": 0.0, + "learning_rate": 1.0181623442380169e-05, + "loss": 1.3604, + "step": 17349 + }, + { + "epoch": 0.5094250983616184, + "grad_norm": 0.0, + "learning_rate": 1.01806726380578e-05, + "loss": 1.1465, + "step": 17350 + }, + { + "epoch": 0.5094544600387574, + "grad_norm": 0.0, + "learning_rate": 1.0179721832101558e-05, + "loss": 1.3174, + "step": 17351 + }, + { + "epoch": 0.5094838217158965, + "grad_norm": 0.0, + "learning_rate": 1.0178771024520046e-05, + "loss": 1.417, + "step": 17352 + }, + { + "epoch": 0.5095131833930354, + "grad_norm": 0.0, + "learning_rate": 1.0177820215321863e-05, + "loss": 1.3574, + "step": 17353 + }, + { + "epoch": 0.5095425450701744, + "grad_norm": 0.0, + "learning_rate": 1.0176869404515608e-05, + "loss": 1.2568, + "step": 17354 + }, + { + "epoch": 0.5095719067473135, + "grad_norm": 0.0, + "learning_rate": 1.0175918592109877e-05, + "loss": 1.2407, + "step": 17355 + }, + { + "epoch": 0.5096012684244524, + "grad_norm": 0.0, + "learning_rate": 1.0174967778113268e-05, + "loss": 1.2969, + "step": 17356 + }, + { + "epoch": 0.5096306301015914, + "grad_norm": 0.0, + "learning_rate": 1.0174016962534379e-05, + "loss": 1.4287, + "step": 17357 + }, + { + "epoch": 0.5096599917787304, + "grad_norm": 0.0, + "learning_rate": 1.0173066145381814e-05, + "loss": 1.4463, + "step": 17358 + }, + { + "epoch": 0.5096893534558694, + "grad_norm": 0.0, + "learning_rate": 1.0172115326664166e-05, + "loss": 1.4336, + "step": 17359 + }, + { + "epoch": 0.5097187151330084, + "grad_norm": 0.0, + "learning_rate": 1.0171164506390035e-05, + "loss": 1.375, + "step": 17360 + }, + { + "epoch": 0.5097480768101474, + "grad_norm": 0.0, + "learning_rate": 1.0170213684568018e-05, + "loss": 1.4326, + "step": 17361 + }, + { + "epoch": 0.5097774384872864, + "grad_norm": 0.0, + "learning_rate": 1.0169262861206718e-05, + "loss": 1.3115, + "step": 17362 + }, + { + "epoch": 0.5098068001644254, + "grad_norm": 0.0, + "learning_rate": 1.0168312036314727e-05, + "loss": 1.3223, + "step": 17363 + }, + { + "epoch": 0.5098361618415644, + "grad_norm": 0.0, + "learning_rate": 1.0167361209900648e-05, + "loss": 1.2568, + "step": 17364 + }, + { + "epoch": 0.5098655235187034, + "grad_norm": 0.0, + "learning_rate": 1.0166410381973079e-05, + "loss": 1.4531, + "step": 17365 + }, + { + "epoch": 0.5098948851958424, + "grad_norm": 0.0, + "learning_rate": 1.0165459552540619e-05, + "loss": 1.334, + "step": 17366 + }, + { + "epoch": 0.5099242468729814, + "grad_norm": 0.0, + "learning_rate": 1.0164508721611865e-05, + "loss": 1.377, + "step": 17367 + }, + { + "epoch": 0.5099536085501204, + "grad_norm": 0.0, + "learning_rate": 1.0163557889195421e-05, + "loss": 1.3105, + "step": 17368 + }, + { + "epoch": 0.5099829702272594, + "grad_norm": 0.0, + "learning_rate": 1.0162607055299875e-05, + "loss": 1.3486, + "step": 17369 + }, + { + "epoch": 0.5100123319043984, + "grad_norm": 0.0, + "learning_rate": 1.0161656219933834e-05, + "loss": 1.293, + "step": 17370 + }, + { + "epoch": 0.5100416935815374, + "grad_norm": 0.0, + "learning_rate": 1.0160705383105895e-05, + "loss": 1.3945, + "step": 17371 + }, + { + "epoch": 0.5100710552586764, + "grad_norm": 0.0, + "learning_rate": 1.0159754544824656e-05, + "loss": 1.3203, + "step": 17372 + }, + { + "epoch": 0.5101004169358154, + "grad_norm": 0.0, + "learning_rate": 1.0158803705098714e-05, + "loss": 1.3926, + "step": 17373 + }, + { + "epoch": 0.5101297786129544, + "grad_norm": 0.0, + "learning_rate": 1.0157852863936672e-05, + "loss": 1.3076, + "step": 17374 + }, + { + "epoch": 0.5101591402900933, + "grad_norm": 0.0, + "learning_rate": 1.0156902021347126e-05, + "loss": 1.3799, + "step": 17375 + }, + { + "epoch": 0.5101885019672324, + "grad_norm": 0.0, + "learning_rate": 1.0155951177338672e-05, + "loss": 1.2227, + "step": 17376 + }, + { + "epoch": 0.5102178636443714, + "grad_norm": 0.0, + "learning_rate": 1.0155000331919913e-05, + "loss": 1.3584, + "step": 17377 + }, + { + "epoch": 0.5102472253215103, + "grad_norm": 0.0, + "learning_rate": 1.0154049485099449e-05, + "loss": 1.373, + "step": 17378 + }, + { + "epoch": 0.5102765869986494, + "grad_norm": 0.0, + "learning_rate": 1.0153098636885874e-05, + "loss": 1.3154, + "step": 17379 + }, + { + "epoch": 0.5103059486757884, + "grad_norm": 0.0, + "learning_rate": 1.0152147787287787e-05, + "loss": 1.3564, + "step": 17380 + }, + { + "epoch": 0.5103353103529273, + "grad_norm": 0.0, + "learning_rate": 1.015119693631379e-05, + "loss": 1.4287, + "step": 17381 + }, + { + "epoch": 0.5103646720300663, + "grad_norm": 0.0, + "learning_rate": 1.0150246083972479e-05, + "loss": 1.3013, + "step": 17382 + }, + { + "epoch": 0.5103940337072054, + "grad_norm": 0.0, + "learning_rate": 1.0149295230272459e-05, + "loss": 1.2812, + "step": 17383 + }, + { + "epoch": 0.5104233953843443, + "grad_norm": 0.0, + "learning_rate": 1.0148344375222319e-05, + "loss": 1.2734, + "step": 17384 + }, + { + "epoch": 0.5104527570614833, + "grad_norm": 0.0, + "learning_rate": 1.0147393518830663e-05, + "loss": 1.3779, + "step": 17385 + }, + { + "epoch": 0.5104821187386224, + "grad_norm": 0.0, + "learning_rate": 1.0146442661106091e-05, + "loss": 1.3438, + "step": 17386 + }, + { + "epoch": 0.5105114804157613, + "grad_norm": 0.0, + "learning_rate": 1.0145491802057202e-05, + "loss": 1.3271, + "step": 17387 + }, + { + "epoch": 0.5105408420929003, + "grad_norm": 0.0, + "learning_rate": 1.0144540941692593e-05, + "loss": 1.3076, + "step": 17388 + }, + { + "epoch": 0.5105702037700394, + "grad_norm": 0.0, + "learning_rate": 1.0143590080020863e-05, + "loss": 1.1289, + "step": 17389 + }, + { + "epoch": 0.5105995654471783, + "grad_norm": 0.0, + "learning_rate": 1.0142639217050607e-05, + "loss": 1.2676, + "step": 17390 + }, + { + "epoch": 0.5106289271243173, + "grad_norm": 0.0, + "learning_rate": 1.0141688352790432e-05, + "loss": 1.1885, + "step": 17391 + }, + { + "epoch": 0.5106582888014564, + "grad_norm": 0.0, + "learning_rate": 1.0140737487248932e-05, + "loss": 1.2549, + "step": 17392 + }, + { + "epoch": 0.5106876504785953, + "grad_norm": 0.0, + "learning_rate": 1.013978662043471e-05, + "loss": 1.2451, + "step": 17393 + }, + { + "epoch": 0.5107170121557343, + "grad_norm": 0.0, + "learning_rate": 1.0138835752356356e-05, + "loss": 1.3516, + "step": 17394 + }, + { + "epoch": 0.5107463738328734, + "grad_norm": 0.0, + "learning_rate": 1.0137884883022479e-05, + "loss": 1.3105, + "step": 17395 + }, + { + "epoch": 0.5107757355100123, + "grad_norm": 0.0, + "learning_rate": 1.0136934012441673e-05, + "loss": 1.335, + "step": 17396 + }, + { + "epoch": 0.5108050971871513, + "grad_norm": 0.0, + "learning_rate": 1.0135983140622536e-05, + "loss": 1.2266, + "step": 17397 + }, + { + "epoch": 0.5108344588642904, + "grad_norm": 0.0, + "learning_rate": 1.013503226757367e-05, + "loss": 1.3906, + "step": 17398 + }, + { + "epoch": 0.5108638205414293, + "grad_norm": 0.0, + "learning_rate": 1.0134081393303674e-05, + "loss": 1.292, + "step": 17399 + }, + { + "epoch": 0.5108931822185683, + "grad_norm": 0.0, + "learning_rate": 1.0133130517821144e-05, + "loss": 1.2666, + "step": 17400 + }, + { + "epoch": 0.5109225438957073, + "grad_norm": 0.0, + "learning_rate": 1.0132179641134678e-05, + "loss": 1.3599, + "step": 17401 + }, + { + "epoch": 0.5109519055728463, + "grad_norm": 0.0, + "learning_rate": 1.013122876325288e-05, + "loss": 1.373, + "step": 17402 + }, + { + "epoch": 0.5109812672499853, + "grad_norm": 0.0, + "learning_rate": 1.013027788418435e-05, + "loss": 1.3223, + "step": 17403 + }, + { + "epoch": 0.5110106289271243, + "grad_norm": 0.0, + "learning_rate": 1.0129327003937679e-05, + "loss": 1.3242, + "step": 17404 + }, + { + "epoch": 0.5110399906042633, + "grad_norm": 0.0, + "learning_rate": 1.0128376122521473e-05, + "loss": 1.2461, + "step": 17405 + }, + { + "epoch": 0.5110693522814023, + "grad_norm": 0.0, + "learning_rate": 1.0127425239944326e-05, + "loss": 1.2119, + "step": 17406 + }, + { + "epoch": 0.5110987139585413, + "grad_norm": 0.0, + "learning_rate": 1.0126474356214843e-05, + "loss": 1.3867, + "step": 17407 + }, + { + "epoch": 0.5111280756356803, + "grad_norm": 0.0, + "learning_rate": 1.012552347134162e-05, + "loss": 1.335, + "step": 17408 + }, + { + "epoch": 0.5111574373128193, + "grad_norm": 0.0, + "learning_rate": 1.0124572585333256e-05, + "loss": 1.3984, + "step": 17409 + }, + { + "epoch": 0.5111867989899583, + "grad_norm": 0.0, + "learning_rate": 1.0123621698198349e-05, + "loss": 1.2559, + "step": 17410 + }, + { + "epoch": 0.5112161606670973, + "grad_norm": 0.0, + "learning_rate": 1.0122670809945498e-05, + "loss": 1.3877, + "step": 17411 + }, + { + "epoch": 0.5112455223442363, + "grad_norm": 0.0, + "learning_rate": 1.0121719920583309e-05, + "loss": 1.333, + "step": 17412 + }, + { + "epoch": 0.5112748840213753, + "grad_norm": 0.0, + "learning_rate": 1.012076903012037e-05, + "loss": 1.3574, + "step": 17413 + }, + { + "epoch": 0.5113042456985143, + "grad_norm": 0.0, + "learning_rate": 1.0119818138565287e-05, + "loss": 1.1382, + "step": 17414 + }, + { + "epoch": 0.5113336073756533, + "grad_norm": 0.0, + "learning_rate": 1.011886724592666e-05, + "loss": 1.2432, + "step": 17415 + }, + { + "epoch": 0.5113629690527923, + "grad_norm": 0.0, + "learning_rate": 1.0117916352213085e-05, + "loss": 1.2393, + "step": 17416 + }, + { + "epoch": 0.5113923307299313, + "grad_norm": 0.0, + "learning_rate": 1.0116965457433161e-05, + "loss": 1.3262, + "step": 17417 + }, + { + "epoch": 0.5114216924070702, + "grad_norm": 0.0, + "learning_rate": 1.0116014561595488e-05, + "loss": 1.3418, + "step": 17418 + }, + { + "epoch": 0.5114510540842093, + "grad_norm": 0.0, + "learning_rate": 1.0115063664708666e-05, + "loss": 1.1299, + "step": 17419 + }, + { + "epoch": 0.5114804157613483, + "grad_norm": 0.0, + "learning_rate": 1.0114112766781297e-05, + "loss": 1.3379, + "step": 17420 + }, + { + "epoch": 0.5115097774384872, + "grad_norm": 0.0, + "learning_rate": 1.0113161867821975e-05, + "loss": 1.4121, + "step": 17421 + }, + { + "epoch": 0.5115391391156263, + "grad_norm": 0.0, + "learning_rate": 1.01122109678393e-05, + "loss": 1.1318, + "step": 17422 + }, + { + "epoch": 0.5115685007927653, + "grad_norm": 0.0, + "learning_rate": 1.0111260066841872e-05, + "loss": 1.374, + "step": 17423 + }, + { + "epoch": 0.5115978624699042, + "grad_norm": 0.0, + "learning_rate": 1.0110309164838292e-05, + "loss": 1.3457, + "step": 17424 + }, + { + "epoch": 0.5116272241470433, + "grad_norm": 0.0, + "learning_rate": 1.010935826183716e-05, + "loss": 1.2485, + "step": 17425 + }, + { + "epoch": 0.5116565858241823, + "grad_norm": 0.0, + "learning_rate": 1.010840735784707e-05, + "loss": 1.2783, + "step": 17426 + }, + { + "epoch": 0.5116859475013212, + "grad_norm": 0.0, + "learning_rate": 1.0107456452876627e-05, + "loss": 1.2578, + "step": 17427 + }, + { + "epoch": 0.5117153091784603, + "grad_norm": 0.0, + "learning_rate": 1.0106505546934427e-05, + "loss": 1.3428, + "step": 17428 + }, + { + "epoch": 0.5117446708555993, + "grad_norm": 0.0, + "learning_rate": 1.0105554640029069e-05, + "loss": 1.2988, + "step": 17429 + }, + { + "epoch": 0.5117740325327382, + "grad_norm": 0.0, + "learning_rate": 1.0104603732169154e-05, + "loss": 1.1099, + "step": 17430 + }, + { + "epoch": 0.5118033942098773, + "grad_norm": 0.0, + "learning_rate": 1.0103652823363279e-05, + "loss": 1.2051, + "step": 17431 + }, + { + "epoch": 0.5118327558870163, + "grad_norm": 0.0, + "learning_rate": 1.0102701913620047e-05, + "loss": 1.4355, + "step": 17432 + }, + { + "epoch": 0.5118621175641552, + "grad_norm": 0.0, + "learning_rate": 1.0101751002948056e-05, + "loss": 1.2168, + "step": 17433 + }, + { + "epoch": 0.5118914792412943, + "grad_norm": 0.0, + "learning_rate": 1.0100800091355902e-05, + "loss": 1.2695, + "step": 17434 + }, + { + "epoch": 0.5119208409184333, + "grad_norm": 0.0, + "learning_rate": 1.0099849178852188e-05, + "loss": 1.2578, + "step": 17435 + }, + { + "epoch": 0.5119502025955722, + "grad_norm": 0.0, + "learning_rate": 1.0098898265445513e-05, + "loss": 1.2656, + "step": 17436 + }, + { + "epoch": 0.5119795642727113, + "grad_norm": 0.0, + "learning_rate": 1.009794735114448e-05, + "loss": 1.3906, + "step": 17437 + }, + { + "epoch": 0.5120089259498503, + "grad_norm": 0.0, + "learning_rate": 1.0096996435957677e-05, + "loss": 1.272, + "step": 17438 + }, + { + "epoch": 0.5120382876269892, + "grad_norm": 0.0, + "learning_rate": 1.0096045519893711e-05, + "loss": 1.3037, + "step": 17439 + }, + { + "epoch": 0.5120676493041283, + "grad_norm": 0.0, + "learning_rate": 1.0095094602961186e-05, + "loss": 1.2422, + "step": 17440 + }, + { + "epoch": 0.5120970109812673, + "grad_norm": 0.0, + "learning_rate": 1.0094143685168691e-05, + "loss": 1.3848, + "step": 17441 + }, + { + "epoch": 0.5121263726584062, + "grad_norm": 0.0, + "learning_rate": 1.009319276652483e-05, + "loss": 1.3604, + "step": 17442 + }, + { + "epoch": 0.5121557343355453, + "grad_norm": 0.0, + "learning_rate": 1.0092241847038206e-05, + "loss": 1.3145, + "step": 17443 + }, + { + "epoch": 0.5121850960126842, + "grad_norm": 0.0, + "learning_rate": 1.0091290926717416e-05, + "loss": 1.3232, + "step": 17444 + }, + { + "epoch": 0.5122144576898232, + "grad_norm": 0.0, + "learning_rate": 1.0090340005571058e-05, + "loss": 1.2212, + "step": 17445 + }, + { + "epoch": 0.5122438193669623, + "grad_norm": 0.0, + "learning_rate": 1.008938908360773e-05, + "loss": 1.4229, + "step": 17446 + }, + { + "epoch": 0.5122731810441012, + "grad_norm": 0.0, + "learning_rate": 1.0088438160836036e-05, + "loss": 1.3838, + "step": 17447 + }, + { + "epoch": 0.5123025427212402, + "grad_norm": 0.0, + "learning_rate": 1.0087487237264573e-05, + "loss": 1.2188, + "step": 17448 + }, + { + "epoch": 0.5123319043983793, + "grad_norm": 0.0, + "learning_rate": 1.008653631290194e-05, + "loss": 1.2783, + "step": 17449 + }, + { + "epoch": 0.5123612660755182, + "grad_norm": 0.0, + "learning_rate": 1.0085585387756736e-05, + "loss": 1.3838, + "step": 17450 + }, + { + "epoch": 0.5123906277526572, + "grad_norm": 0.0, + "learning_rate": 1.0084634461837562e-05, + "loss": 1.3438, + "step": 17451 + }, + { + "epoch": 0.5124199894297963, + "grad_norm": 0.0, + "learning_rate": 1.0083683535153017e-05, + "loss": 1.3032, + "step": 17452 + }, + { + "epoch": 0.5124493511069352, + "grad_norm": 0.0, + "learning_rate": 1.0082732607711701e-05, + "loss": 1.2212, + "step": 17453 + }, + { + "epoch": 0.5124787127840742, + "grad_norm": 0.0, + "learning_rate": 1.0081781679522211e-05, + "loss": 1.3252, + "step": 17454 + }, + { + "epoch": 0.5125080744612133, + "grad_norm": 0.0, + "learning_rate": 1.008083075059315e-05, + "loss": 1.3594, + "step": 17455 + }, + { + "epoch": 0.5125374361383522, + "grad_norm": 0.0, + "learning_rate": 1.0079879820933117e-05, + "loss": 1.2773, + "step": 17456 + }, + { + "epoch": 0.5125667978154912, + "grad_norm": 0.0, + "learning_rate": 1.007892889055071e-05, + "loss": 1.2803, + "step": 17457 + }, + { + "epoch": 0.5125961594926303, + "grad_norm": 0.0, + "learning_rate": 1.0077977959454526e-05, + "loss": 1.3086, + "step": 17458 + }, + { + "epoch": 0.5126255211697692, + "grad_norm": 0.0, + "learning_rate": 1.0077027027653168e-05, + "loss": 1.3076, + "step": 17459 + }, + { + "epoch": 0.5126548828469082, + "grad_norm": 0.0, + "learning_rate": 1.0076076095155233e-05, + "loss": 1.2637, + "step": 17460 + }, + { + "epoch": 0.5126842445240473, + "grad_norm": 0.0, + "learning_rate": 1.0075125161969328e-05, + "loss": 1.2021, + "step": 17461 + }, + { + "epoch": 0.5127136062011862, + "grad_norm": 0.0, + "learning_rate": 1.0074174228104043e-05, + "loss": 1.375, + "step": 17462 + }, + { + "epoch": 0.5127429678783252, + "grad_norm": 0.0, + "learning_rate": 1.0073223293567982e-05, + "loss": 1.2383, + "step": 17463 + }, + { + "epoch": 0.5127723295554643, + "grad_norm": 0.0, + "learning_rate": 1.0072272358369745e-05, + "loss": 1.2676, + "step": 17464 + }, + { + "epoch": 0.5128016912326032, + "grad_norm": 0.0, + "learning_rate": 1.007132142251793e-05, + "loss": 1.3096, + "step": 17465 + }, + { + "epoch": 0.5128310529097422, + "grad_norm": 0.0, + "learning_rate": 1.0070370486021138e-05, + "loss": 1.3398, + "step": 17466 + }, + { + "epoch": 0.5128604145868813, + "grad_norm": 0.0, + "learning_rate": 1.0069419548887966e-05, + "loss": 1.2998, + "step": 17467 + }, + { + "epoch": 0.5128897762640202, + "grad_norm": 0.0, + "learning_rate": 1.0068468611127015e-05, + "loss": 1.4297, + "step": 17468 + }, + { + "epoch": 0.5129191379411592, + "grad_norm": 0.0, + "learning_rate": 1.0067517672746889e-05, + "loss": 1.1719, + "step": 17469 + }, + { + "epoch": 0.5129484996182982, + "grad_norm": 0.0, + "learning_rate": 1.006656673375618e-05, + "loss": 1.4326, + "step": 17470 + }, + { + "epoch": 0.5129778612954372, + "grad_norm": 0.0, + "learning_rate": 1.006561579416349e-05, + "loss": 1.3457, + "step": 17471 + }, + { + "epoch": 0.5130072229725762, + "grad_norm": 0.0, + "learning_rate": 1.006466485397742e-05, + "loss": 1.2544, + "step": 17472 + }, + { + "epoch": 0.5130365846497152, + "grad_norm": 0.0, + "learning_rate": 1.0063713913206572e-05, + "loss": 1.3887, + "step": 17473 + }, + { + "epoch": 0.5130659463268542, + "grad_norm": 0.0, + "learning_rate": 1.006276297185954e-05, + "loss": 1.3662, + "step": 17474 + }, + { + "epoch": 0.5130953080039932, + "grad_norm": 0.0, + "learning_rate": 1.0061812029944926e-05, + "loss": 1.4473, + "step": 17475 + }, + { + "epoch": 0.5131246696811322, + "grad_norm": 0.0, + "learning_rate": 1.006086108747133e-05, + "loss": 1.2988, + "step": 17476 + }, + { + "epoch": 0.5131540313582712, + "grad_norm": 0.0, + "learning_rate": 1.0059910144447358e-05, + "loss": 1.3579, + "step": 17477 + }, + { + "epoch": 0.5131833930354102, + "grad_norm": 0.0, + "learning_rate": 1.0058959200881598e-05, + "loss": 1.3955, + "step": 17478 + }, + { + "epoch": 0.5132127547125492, + "grad_norm": 0.0, + "learning_rate": 1.0058008256782652e-05, + "loss": 1.3086, + "step": 17479 + }, + { + "epoch": 0.5132421163896882, + "grad_norm": 0.0, + "learning_rate": 1.0057057312159124e-05, + "loss": 1.2471, + "step": 17480 + }, + { + "epoch": 0.5132714780668272, + "grad_norm": 0.0, + "learning_rate": 1.0056106367019617e-05, + "loss": 1.3213, + "step": 17481 + }, + { + "epoch": 0.5133008397439662, + "grad_norm": 0.0, + "learning_rate": 1.005515542137272e-05, + "loss": 1.2988, + "step": 17482 + }, + { + "epoch": 0.5133302014211052, + "grad_norm": 0.0, + "learning_rate": 1.0054204475227041e-05, + "loss": 1.3867, + "step": 17483 + }, + { + "epoch": 0.5133595630982442, + "grad_norm": 0.0, + "learning_rate": 1.0053253528591174e-05, + "loss": 1.2363, + "step": 17484 + }, + { + "epoch": 0.5133889247753831, + "grad_norm": 0.0, + "learning_rate": 1.0052302581473728e-05, + "loss": 1.248, + "step": 17485 + }, + { + "epoch": 0.5134182864525222, + "grad_norm": 0.0, + "learning_rate": 1.0051351633883292e-05, + "loss": 1.1836, + "step": 17486 + }, + { + "epoch": 0.5134476481296611, + "grad_norm": 0.0, + "learning_rate": 1.005040068582847e-05, + "loss": 1.2637, + "step": 17487 + }, + { + "epoch": 0.5134770098068001, + "grad_norm": 0.0, + "learning_rate": 1.0049449737317862e-05, + "loss": 1.2271, + "step": 17488 + }, + { + "epoch": 0.5135063714839392, + "grad_norm": 0.0, + "learning_rate": 1.0048498788360068e-05, + "loss": 1.4844, + "step": 17489 + }, + { + "epoch": 0.5135357331610781, + "grad_norm": 0.0, + "learning_rate": 1.0047547838963686e-05, + "loss": 1.2793, + "step": 17490 + }, + { + "epoch": 0.5135650948382171, + "grad_norm": 0.0, + "learning_rate": 1.0046596889137319e-05, + "loss": 1.2441, + "step": 17491 + }, + { + "epoch": 0.5135944565153562, + "grad_norm": 0.0, + "learning_rate": 1.0045645938889561e-05, + "loss": 1.3359, + "step": 17492 + }, + { + "epoch": 0.5136238181924951, + "grad_norm": 0.0, + "learning_rate": 1.0044694988229017e-05, + "loss": 1.2275, + "step": 17493 + }, + { + "epoch": 0.5136531798696341, + "grad_norm": 0.0, + "learning_rate": 1.0043744037164284e-05, + "loss": 1.4297, + "step": 17494 + }, + { + "epoch": 0.5136825415467732, + "grad_norm": 0.0, + "learning_rate": 1.0042793085703963e-05, + "loss": 1.3828, + "step": 17495 + }, + { + "epoch": 0.5137119032239121, + "grad_norm": 0.0, + "learning_rate": 1.0041842133856657e-05, + "loss": 1.3711, + "step": 17496 + }, + { + "epoch": 0.5137412649010511, + "grad_norm": 0.0, + "learning_rate": 1.0040891181630956e-05, + "loss": 1.3354, + "step": 17497 + }, + { + "epoch": 0.5137706265781902, + "grad_norm": 0.0, + "learning_rate": 1.0039940229035469e-05, + "loss": 1.27, + "step": 17498 + }, + { + "epoch": 0.5137999882553291, + "grad_norm": 0.0, + "learning_rate": 1.003898927607879e-05, + "loss": 1.3096, + "step": 17499 + }, + { + "epoch": 0.5138293499324681, + "grad_norm": 0.0, + "learning_rate": 1.0038038322769523e-05, + "loss": 1.1733, + "step": 17500 + }, + { + "epoch": 0.5138587116096072, + "grad_norm": 0.0, + "learning_rate": 1.0037087369116262e-05, + "loss": 1.3359, + "step": 17501 + }, + { + "epoch": 0.5138880732867461, + "grad_norm": 0.0, + "learning_rate": 1.0036136415127616e-05, + "loss": 1.2905, + "step": 17502 + }, + { + "epoch": 0.5139174349638851, + "grad_norm": 0.0, + "learning_rate": 1.0035185460812175e-05, + "loss": 1.3711, + "step": 17503 + }, + { + "epoch": 0.5139467966410242, + "grad_norm": 0.0, + "learning_rate": 1.0034234506178544e-05, + "loss": 1.2119, + "step": 17504 + }, + { + "epoch": 0.5139761583181631, + "grad_norm": 0.0, + "learning_rate": 1.0033283551235323e-05, + "loss": 1.2119, + "step": 17505 + }, + { + "epoch": 0.5140055199953021, + "grad_norm": 0.0, + "learning_rate": 1.003233259599111e-05, + "loss": 1.2139, + "step": 17506 + }, + { + "epoch": 0.5140348816724412, + "grad_norm": 0.0, + "learning_rate": 1.0031381640454505e-05, + "loss": 1.3486, + "step": 17507 + }, + { + "epoch": 0.5140642433495801, + "grad_norm": 0.0, + "learning_rate": 1.0030430684634105e-05, + "loss": 1.3467, + "step": 17508 + }, + { + "epoch": 0.5140936050267191, + "grad_norm": 0.0, + "learning_rate": 1.0029479728538513e-05, + "loss": 1.2773, + "step": 17509 + }, + { + "epoch": 0.5141229667038582, + "grad_norm": 0.0, + "learning_rate": 1.0028528772176331e-05, + "loss": 1.2451, + "step": 17510 + }, + { + "epoch": 0.5141523283809971, + "grad_norm": 0.0, + "learning_rate": 1.0027577815556155e-05, + "loss": 1.2129, + "step": 17511 + }, + { + "epoch": 0.5141816900581361, + "grad_norm": 0.0, + "learning_rate": 1.0026626858686585e-05, + "loss": 1.3389, + "step": 17512 + }, + { + "epoch": 0.5142110517352751, + "grad_norm": 0.0, + "learning_rate": 1.0025675901576222e-05, + "loss": 1.3301, + "step": 17513 + }, + { + "epoch": 0.5142404134124141, + "grad_norm": 0.0, + "learning_rate": 1.0024724944233666e-05, + "loss": 1.2832, + "step": 17514 + }, + { + "epoch": 0.5142697750895531, + "grad_norm": 0.0, + "learning_rate": 1.0023773986667516e-05, + "loss": 1.3516, + "step": 17515 + }, + { + "epoch": 0.5142991367666921, + "grad_norm": 0.0, + "learning_rate": 1.0022823028886371e-05, + "loss": 1.2637, + "step": 17516 + }, + { + "epoch": 0.5143284984438311, + "grad_norm": 0.0, + "learning_rate": 1.0021872070898831e-05, + "loss": 1.3071, + "step": 17517 + }, + { + "epoch": 0.5143578601209701, + "grad_norm": 0.0, + "learning_rate": 1.0020921112713497e-05, + "loss": 1.2153, + "step": 17518 + }, + { + "epoch": 0.5143872217981091, + "grad_norm": 0.0, + "learning_rate": 1.001997015433897e-05, + "loss": 1.3623, + "step": 17519 + }, + { + "epoch": 0.5144165834752481, + "grad_norm": 0.0, + "learning_rate": 1.0019019195783843e-05, + "loss": 1.2959, + "step": 17520 + }, + { + "epoch": 0.5144459451523871, + "grad_norm": 0.0, + "learning_rate": 1.0018068237056721e-05, + "loss": 1.3125, + "step": 17521 + }, + { + "epoch": 0.5144753068295261, + "grad_norm": 0.0, + "learning_rate": 1.0017117278166209e-05, + "loss": 1.4756, + "step": 17522 + }, + { + "epoch": 0.5145046685066651, + "grad_norm": 0.0, + "learning_rate": 1.0016166319120897e-05, + "loss": 1.2705, + "step": 17523 + }, + { + "epoch": 0.5145340301838041, + "grad_norm": 0.0, + "learning_rate": 1.0015215359929386e-05, + "loss": 1.2744, + "step": 17524 + }, + { + "epoch": 0.5145633918609431, + "grad_norm": 0.0, + "learning_rate": 1.0014264400600282e-05, + "loss": 1.2549, + "step": 17525 + }, + { + "epoch": 0.5145927535380821, + "grad_norm": 0.0, + "learning_rate": 1.0013313441142182e-05, + "loss": 1.3613, + "step": 17526 + }, + { + "epoch": 0.514622115215221, + "grad_norm": 0.0, + "learning_rate": 1.0012362481563686e-05, + "loss": 1.2998, + "step": 17527 + }, + { + "epoch": 0.5146514768923601, + "grad_norm": 0.0, + "learning_rate": 1.0011411521873393e-05, + "loss": 1.2686, + "step": 17528 + }, + { + "epoch": 0.5146808385694991, + "grad_norm": 0.0, + "learning_rate": 1.00104605620799e-05, + "loss": 1.3545, + "step": 17529 + }, + { + "epoch": 0.514710200246638, + "grad_norm": 0.0, + "learning_rate": 1.0009509602191812e-05, + "loss": 1.3398, + "step": 17530 + }, + { + "epoch": 0.5147395619237771, + "grad_norm": 0.0, + "learning_rate": 1.0008558642217722e-05, + "loss": 1.251, + "step": 17531 + }, + { + "epoch": 0.5147689236009161, + "grad_norm": 0.0, + "learning_rate": 1.0007607682166239e-05, + "loss": 1.3506, + "step": 17532 + }, + { + "epoch": 0.514798285278055, + "grad_norm": 0.0, + "learning_rate": 1.0006656722045953e-05, + "loss": 1.249, + "step": 17533 + }, + { + "epoch": 0.5148276469551941, + "grad_norm": 0.0, + "learning_rate": 1.0005705761865472e-05, + "loss": 1.3169, + "step": 17534 + }, + { + "epoch": 0.5148570086323331, + "grad_norm": 0.0, + "learning_rate": 1.0004754801633391e-05, + "loss": 1.0625, + "step": 17535 + }, + { + "epoch": 0.514886370309472, + "grad_norm": 0.0, + "learning_rate": 1.0003803841358313e-05, + "loss": 1.3867, + "step": 17536 + }, + { + "epoch": 0.5149157319866111, + "grad_norm": 0.0, + "learning_rate": 1.0002852881048834e-05, + "loss": 1.4873, + "step": 17537 + }, + { + "epoch": 0.5149450936637501, + "grad_norm": 0.0, + "learning_rate": 1.0001901920713554e-05, + "loss": 1.252, + "step": 17538 + }, + { + "epoch": 0.514974455340889, + "grad_norm": 0.0, + "learning_rate": 1.000095096036108e-05, + "loss": 1.3086, + "step": 17539 + }, + { + "epoch": 0.5150038170180281, + "grad_norm": 0.0, + "learning_rate": 1e-05, + "loss": 1.2881, + "step": 17540 + }, + { + "epoch": 0.5150331786951671, + "grad_norm": 0.0, + "learning_rate": 9.999049039638925e-06, + "loss": 1.1729, + "step": 17541 + }, + { + "epoch": 0.515062540372306, + "grad_norm": 0.0, + "learning_rate": 9.998098079286447e-06, + "loss": 1.3477, + "step": 17542 + }, + { + "epoch": 0.5150919020494451, + "grad_norm": 0.0, + "learning_rate": 9.997147118951171e-06, + "loss": 1.3936, + "step": 17543 + }, + { + "epoch": 0.5151212637265841, + "grad_norm": 0.0, + "learning_rate": 9.996196158641688e-06, + "loss": 1.373, + "step": 17544 + }, + { + "epoch": 0.515150625403723, + "grad_norm": 0.0, + "learning_rate": 9.99524519836661e-06, + "loss": 1.2949, + "step": 17545 + }, + { + "epoch": 0.5151799870808621, + "grad_norm": 0.0, + "learning_rate": 9.99429423813453e-06, + "loss": 1.2861, + "step": 17546 + }, + { + "epoch": 0.5152093487580011, + "grad_norm": 0.0, + "learning_rate": 9.99334327795405e-06, + "loss": 1.2822, + "step": 17547 + }, + { + "epoch": 0.51523871043514, + "grad_norm": 0.0, + "learning_rate": 9.992392317833763e-06, + "loss": 1.3623, + "step": 17548 + }, + { + "epoch": 0.5152680721122791, + "grad_norm": 0.0, + "learning_rate": 9.99144135778228e-06, + "loss": 1.3389, + "step": 17549 + }, + { + "epoch": 0.5152974337894181, + "grad_norm": 0.0, + "learning_rate": 9.990490397808193e-06, + "loss": 1.4561, + "step": 17550 + }, + { + "epoch": 0.515326795466557, + "grad_norm": 0.0, + "learning_rate": 9.989539437920104e-06, + "loss": 1.2129, + "step": 17551 + }, + { + "epoch": 0.5153561571436961, + "grad_norm": 0.0, + "learning_rate": 9.988588478126612e-06, + "loss": 1.3008, + "step": 17552 + }, + { + "epoch": 0.515385518820835, + "grad_norm": 0.0, + "learning_rate": 9.987637518436316e-06, + "loss": 1.2979, + "step": 17553 + }, + { + "epoch": 0.515414880497974, + "grad_norm": 0.0, + "learning_rate": 9.98668655885782e-06, + "loss": 1.2783, + "step": 17554 + }, + { + "epoch": 0.5154442421751131, + "grad_norm": 0.0, + "learning_rate": 9.985735599399717e-06, + "loss": 1.3984, + "step": 17555 + }, + { + "epoch": 0.515473603852252, + "grad_norm": 0.0, + "learning_rate": 9.984784640070615e-06, + "loss": 1.1655, + "step": 17556 + }, + { + "epoch": 0.515502965529391, + "grad_norm": 0.0, + "learning_rate": 9.983833680879104e-06, + "loss": 1.3286, + "step": 17557 + }, + { + "epoch": 0.5155323272065301, + "grad_norm": 0.0, + "learning_rate": 9.982882721833795e-06, + "loss": 1.3057, + "step": 17558 + }, + { + "epoch": 0.515561688883669, + "grad_norm": 0.0, + "learning_rate": 9.981931762943277e-06, + "loss": 1.3496, + "step": 17559 + }, + { + "epoch": 0.515591050560808, + "grad_norm": 0.0, + "learning_rate": 9.980980804216159e-06, + "loss": 1.2998, + "step": 17560 + }, + { + "epoch": 0.5156204122379471, + "grad_norm": 0.0, + "learning_rate": 9.980029845661037e-06, + "loss": 1.2451, + "step": 17561 + }, + { + "epoch": 0.515649773915086, + "grad_norm": 0.0, + "learning_rate": 9.979078887286505e-06, + "loss": 1.272, + "step": 17562 + }, + { + "epoch": 0.515679135592225, + "grad_norm": 0.0, + "learning_rate": 9.978127929101172e-06, + "loss": 1.4541, + "step": 17563 + }, + { + "epoch": 0.5157084972693641, + "grad_norm": 0.0, + "learning_rate": 9.97717697111363e-06, + "loss": 1.4551, + "step": 17564 + }, + { + "epoch": 0.515737858946503, + "grad_norm": 0.0, + "learning_rate": 9.976226013332489e-06, + "loss": 1.2998, + "step": 17565 + }, + { + "epoch": 0.515767220623642, + "grad_norm": 0.0, + "learning_rate": 9.975275055766334e-06, + "loss": 1.2988, + "step": 17566 + }, + { + "epoch": 0.5157965823007811, + "grad_norm": 0.0, + "learning_rate": 9.974324098423781e-06, + "loss": 1.3252, + "step": 17567 + }, + { + "epoch": 0.51582594397792, + "grad_norm": 0.0, + "learning_rate": 9.973373141313415e-06, + "loss": 1.2852, + "step": 17568 + }, + { + "epoch": 0.515855305655059, + "grad_norm": 0.0, + "learning_rate": 9.972422184443847e-06, + "loss": 1.4062, + "step": 17569 + }, + { + "epoch": 0.5158846673321981, + "grad_norm": 0.0, + "learning_rate": 9.971471227823672e-06, + "loss": 1.4434, + "step": 17570 + }, + { + "epoch": 0.515914029009337, + "grad_norm": 0.0, + "learning_rate": 9.97052027146149e-06, + "loss": 1.3701, + "step": 17571 + }, + { + "epoch": 0.515943390686476, + "grad_norm": 0.0, + "learning_rate": 9.9695693153659e-06, + "loss": 1.3271, + "step": 17572 + }, + { + "epoch": 0.5159727523636151, + "grad_norm": 0.0, + "learning_rate": 9.968618359545498e-06, + "loss": 1.457, + "step": 17573 + }, + { + "epoch": 0.516002114040754, + "grad_norm": 0.0, + "learning_rate": 9.967667404008894e-06, + "loss": 1.2754, + "step": 17574 + }, + { + "epoch": 0.516031475717893, + "grad_norm": 0.0, + "learning_rate": 9.966716448764679e-06, + "loss": 1.2842, + "step": 17575 + }, + { + "epoch": 0.5160608373950321, + "grad_norm": 0.0, + "learning_rate": 9.965765493821458e-06, + "loss": 1.2915, + "step": 17576 + }, + { + "epoch": 0.516090199072171, + "grad_norm": 0.0, + "learning_rate": 9.964814539187825e-06, + "loss": 1.25, + "step": 17577 + }, + { + "epoch": 0.51611956074931, + "grad_norm": 0.0, + "learning_rate": 9.963863584872388e-06, + "loss": 1.3799, + "step": 17578 + }, + { + "epoch": 0.516148922426449, + "grad_norm": 0.0, + "learning_rate": 9.962912630883738e-06, + "loss": 1.1904, + "step": 17579 + }, + { + "epoch": 0.516178284103588, + "grad_norm": 0.0, + "learning_rate": 9.96196167723048e-06, + "loss": 1.2656, + "step": 17580 + }, + { + "epoch": 0.516207645780727, + "grad_norm": 0.0, + "learning_rate": 9.961010723921214e-06, + "loss": 1.2808, + "step": 17581 + }, + { + "epoch": 0.516237007457866, + "grad_norm": 0.0, + "learning_rate": 9.960059770964533e-06, + "loss": 1.2939, + "step": 17582 + }, + { + "epoch": 0.516266369135005, + "grad_norm": 0.0, + "learning_rate": 9.959108818369047e-06, + "loss": 1.4141, + "step": 17583 + }, + { + "epoch": 0.516295730812144, + "grad_norm": 0.0, + "learning_rate": 9.958157866143347e-06, + "loss": 1.251, + "step": 17584 + }, + { + "epoch": 0.5163250924892829, + "grad_norm": 0.0, + "learning_rate": 9.957206914296038e-06, + "loss": 1.1592, + "step": 17585 + }, + { + "epoch": 0.516354454166422, + "grad_norm": 0.0, + "learning_rate": 9.956255962835714e-06, + "loss": 1.2881, + "step": 17586 + }, + { + "epoch": 0.516383815843561, + "grad_norm": 0.0, + "learning_rate": 9.955305011770984e-06, + "loss": 1.1606, + "step": 17587 + }, + { + "epoch": 0.5164131775206999, + "grad_norm": 0.0, + "learning_rate": 9.954354061110439e-06, + "loss": 1.3867, + "step": 17588 + }, + { + "epoch": 0.516442539197839, + "grad_norm": 0.0, + "learning_rate": 9.953403110862684e-06, + "loss": 1.1738, + "step": 17589 + }, + { + "epoch": 0.516471900874978, + "grad_norm": 0.0, + "learning_rate": 9.952452161036319e-06, + "loss": 1.3643, + "step": 17590 + }, + { + "epoch": 0.5165012625521169, + "grad_norm": 0.0, + "learning_rate": 9.951501211639934e-06, + "loss": 1.2871, + "step": 17591 + }, + { + "epoch": 0.516530624229256, + "grad_norm": 0.0, + "learning_rate": 9.950550262682143e-06, + "loss": 1.2148, + "step": 17592 + }, + { + "epoch": 0.516559985906395, + "grad_norm": 0.0, + "learning_rate": 9.949599314171531e-06, + "loss": 1.2661, + "step": 17593 + }, + { + "epoch": 0.5165893475835339, + "grad_norm": 0.0, + "learning_rate": 9.948648366116713e-06, + "loss": 1.1318, + "step": 17594 + }, + { + "epoch": 0.516618709260673, + "grad_norm": 0.0, + "learning_rate": 9.947697418526277e-06, + "loss": 1.3535, + "step": 17595 + }, + { + "epoch": 0.516648070937812, + "grad_norm": 0.0, + "learning_rate": 9.946746471408828e-06, + "loss": 1.4482, + "step": 17596 + }, + { + "epoch": 0.5166774326149509, + "grad_norm": 0.0, + "learning_rate": 9.94579552477296e-06, + "loss": 1.271, + "step": 17597 + }, + { + "epoch": 0.51670679429209, + "grad_norm": 0.0, + "learning_rate": 9.944844578627284e-06, + "loss": 1.1895, + "step": 17598 + }, + { + "epoch": 0.516736155969229, + "grad_norm": 0.0, + "learning_rate": 9.943893632980387e-06, + "loss": 1.4395, + "step": 17599 + }, + { + "epoch": 0.5167655176463679, + "grad_norm": 0.0, + "learning_rate": 9.942942687840878e-06, + "loss": 1.4199, + "step": 17600 + }, + { + "epoch": 0.516794879323507, + "grad_norm": 0.0, + "learning_rate": 9.941991743217353e-06, + "loss": 1.2725, + "step": 17601 + }, + { + "epoch": 0.516824241000646, + "grad_norm": 0.0, + "learning_rate": 9.941040799118407e-06, + "loss": 1.2979, + "step": 17602 + }, + { + "epoch": 0.5168536026777849, + "grad_norm": 0.0, + "learning_rate": 9.940089855552648e-06, + "loss": 1.2109, + "step": 17603 + }, + { + "epoch": 0.516882964354924, + "grad_norm": 0.0, + "learning_rate": 9.93913891252867e-06, + "loss": 1.3618, + "step": 17604 + }, + { + "epoch": 0.5169123260320629, + "grad_norm": 0.0, + "learning_rate": 9.938187970055076e-06, + "loss": 1.3389, + "step": 17605 + }, + { + "epoch": 0.5169416877092019, + "grad_norm": 0.0, + "learning_rate": 9.93723702814046e-06, + "loss": 1.3145, + "step": 17606 + }, + { + "epoch": 0.516971049386341, + "grad_norm": 0.0, + "learning_rate": 9.93628608679343e-06, + "loss": 1.375, + "step": 17607 + }, + { + "epoch": 0.5170004110634799, + "grad_norm": 0.0, + "learning_rate": 9.93533514602258e-06, + "loss": 1.1436, + "step": 17608 + }, + { + "epoch": 0.5170297727406189, + "grad_norm": 0.0, + "learning_rate": 9.934384205836513e-06, + "loss": 1.4014, + "step": 17609 + }, + { + "epoch": 0.517059134417758, + "grad_norm": 0.0, + "learning_rate": 9.933433266243825e-06, + "loss": 1.2754, + "step": 17610 + }, + { + "epoch": 0.5170884960948969, + "grad_norm": 0.0, + "learning_rate": 9.932482327253113e-06, + "loss": 1.2686, + "step": 17611 + }, + { + "epoch": 0.5171178577720359, + "grad_norm": 0.0, + "learning_rate": 9.931531388872987e-06, + "loss": 1.2861, + "step": 17612 + }, + { + "epoch": 0.517147219449175, + "grad_norm": 0.0, + "learning_rate": 9.930580451112036e-06, + "loss": 1.4277, + "step": 17613 + }, + { + "epoch": 0.5171765811263139, + "grad_norm": 0.0, + "learning_rate": 9.929629513978866e-06, + "loss": 1.2881, + "step": 17614 + }, + { + "epoch": 0.5172059428034529, + "grad_norm": 0.0, + "learning_rate": 9.92867857748207e-06, + "loss": 1.2383, + "step": 17615 + }, + { + "epoch": 0.517235304480592, + "grad_norm": 0.0, + "learning_rate": 9.927727641630257e-06, + "loss": 1.333, + "step": 17616 + }, + { + "epoch": 0.5172646661577309, + "grad_norm": 0.0, + "learning_rate": 9.92677670643202e-06, + "loss": 1.1738, + "step": 17617 + }, + { + "epoch": 0.5172940278348699, + "grad_norm": 0.0, + "learning_rate": 9.925825771895959e-06, + "loss": 1.2529, + "step": 17618 + }, + { + "epoch": 0.517323389512009, + "grad_norm": 0.0, + "learning_rate": 9.924874838030677e-06, + "loss": 1.3057, + "step": 17619 + }, + { + "epoch": 0.5173527511891479, + "grad_norm": 0.0, + "learning_rate": 9.923923904844768e-06, + "loss": 1.3057, + "step": 17620 + }, + { + "epoch": 0.5173821128662869, + "grad_norm": 0.0, + "learning_rate": 9.922972972346837e-06, + "loss": 1.3618, + "step": 17621 + }, + { + "epoch": 0.517411474543426, + "grad_norm": 0.0, + "learning_rate": 9.922022040545478e-06, + "loss": 1.3174, + "step": 17622 + }, + { + "epoch": 0.5174408362205649, + "grad_norm": 0.0, + "learning_rate": 9.921071109449296e-06, + "loss": 1.3125, + "step": 17623 + }, + { + "epoch": 0.5174701978977039, + "grad_norm": 0.0, + "learning_rate": 9.920120179066887e-06, + "loss": 1.334, + "step": 17624 + }, + { + "epoch": 0.517499559574843, + "grad_norm": 0.0, + "learning_rate": 9.919169249406854e-06, + "loss": 1.3877, + "step": 17625 + }, + { + "epoch": 0.5175289212519819, + "grad_norm": 0.0, + "learning_rate": 9.918218320477789e-06, + "loss": 1.3496, + "step": 17626 + }, + { + "epoch": 0.5175582829291209, + "grad_norm": 0.0, + "learning_rate": 9.917267392288302e-06, + "loss": 1.335, + "step": 17627 + }, + { + "epoch": 0.51758764460626, + "grad_norm": 0.0, + "learning_rate": 9.916316464846984e-06, + "loss": 1.2197, + "step": 17628 + }, + { + "epoch": 0.5176170062833989, + "grad_norm": 0.0, + "learning_rate": 9.915365538162442e-06, + "loss": 1.3857, + "step": 17629 + }, + { + "epoch": 0.5176463679605379, + "grad_norm": 0.0, + "learning_rate": 9.914414612243269e-06, + "loss": 1.3057, + "step": 17630 + }, + { + "epoch": 0.5176757296376769, + "grad_norm": 0.0, + "learning_rate": 9.913463687098063e-06, + "loss": 1.6494, + "step": 17631 + }, + { + "epoch": 0.5177050913148159, + "grad_norm": 0.0, + "learning_rate": 9.912512762735432e-06, + "loss": 1.4268, + "step": 17632 + }, + { + "epoch": 0.5177344529919549, + "grad_norm": 0.0, + "learning_rate": 9.911561839163967e-06, + "loss": 1.3125, + "step": 17633 + }, + { + "epoch": 0.5177638146690939, + "grad_norm": 0.0, + "learning_rate": 9.910610916392273e-06, + "loss": 1.3545, + "step": 17634 + }, + { + "epoch": 0.5177931763462329, + "grad_norm": 0.0, + "learning_rate": 9.909659994428944e-06, + "loss": 1.3311, + "step": 17635 + }, + { + "epoch": 0.5178225380233719, + "grad_norm": 0.0, + "learning_rate": 9.908709073282587e-06, + "loss": 1.2988, + "step": 17636 + }, + { + "epoch": 0.5178518997005109, + "grad_norm": 0.0, + "learning_rate": 9.907758152961794e-06, + "loss": 1.1621, + "step": 17637 + }, + { + "epoch": 0.5178812613776499, + "grad_norm": 0.0, + "learning_rate": 9.906807233475171e-06, + "loss": 1.3027, + "step": 17638 + }, + { + "epoch": 0.5179106230547889, + "grad_norm": 0.0, + "learning_rate": 9.905856314831314e-06, + "loss": 1.4775, + "step": 17639 + }, + { + "epoch": 0.5179399847319279, + "grad_norm": 0.0, + "learning_rate": 9.904905397038818e-06, + "loss": 1.2231, + "step": 17640 + }, + { + "epoch": 0.5179693464090669, + "grad_norm": 0.0, + "learning_rate": 9.90395448010629e-06, + "loss": 1.3467, + "step": 17641 + }, + { + "epoch": 0.5179987080862059, + "grad_norm": 0.0, + "learning_rate": 9.903003564042326e-06, + "loss": 1.2627, + "step": 17642 + }, + { + "epoch": 0.5180280697633449, + "grad_norm": 0.0, + "learning_rate": 9.902052648855527e-06, + "loss": 1.2578, + "step": 17643 + }, + { + "epoch": 0.5180574314404839, + "grad_norm": 0.0, + "learning_rate": 9.901101734554489e-06, + "loss": 1.2617, + "step": 17644 + }, + { + "epoch": 0.5180867931176228, + "grad_norm": 0.0, + "learning_rate": 9.900150821147816e-06, + "loss": 1.3135, + "step": 17645 + }, + { + "epoch": 0.5181161547947619, + "grad_norm": 0.0, + "learning_rate": 9.899199908644098e-06, + "loss": 1.4453, + "step": 17646 + }, + { + "epoch": 0.5181455164719009, + "grad_norm": 0.0, + "learning_rate": 9.898248997051946e-06, + "loss": 1.3057, + "step": 17647 + }, + { + "epoch": 0.5181748781490398, + "grad_norm": 0.0, + "learning_rate": 9.897298086379955e-06, + "loss": 1.3896, + "step": 17648 + }, + { + "epoch": 0.5182042398261789, + "grad_norm": 0.0, + "learning_rate": 9.896347176636724e-06, + "loss": 1.3359, + "step": 17649 + }, + { + "epoch": 0.5182336015033179, + "grad_norm": 0.0, + "learning_rate": 9.895396267830852e-06, + "loss": 1.3223, + "step": 17650 + }, + { + "epoch": 0.5182629631804568, + "grad_norm": 0.0, + "learning_rate": 9.894445359970933e-06, + "loss": 1.251, + "step": 17651 + }, + { + "epoch": 0.5182923248575959, + "grad_norm": 0.0, + "learning_rate": 9.893494453065577e-06, + "loss": 1.1924, + "step": 17652 + }, + { + "epoch": 0.5183216865347349, + "grad_norm": 0.0, + "learning_rate": 9.892543547123377e-06, + "loss": 1.2759, + "step": 17653 + }, + { + "epoch": 0.5183510482118738, + "grad_norm": 0.0, + "learning_rate": 9.891592642152935e-06, + "loss": 1.3721, + "step": 17654 + }, + { + "epoch": 0.5183804098890129, + "grad_norm": 0.0, + "learning_rate": 9.890641738162842e-06, + "loss": 1.3672, + "step": 17655 + }, + { + "epoch": 0.5184097715661519, + "grad_norm": 0.0, + "learning_rate": 9.88969083516171e-06, + "loss": 1.3711, + "step": 17656 + }, + { + "epoch": 0.5184391332432908, + "grad_norm": 0.0, + "learning_rate": 9.88873993315813e-06, + "loss": 1.1982, + "step": 17657 + }, + { + "epoch": 0.5184684949204299, + "grad_norm": 0.0, + "learning_rate": 9.887789032160705e-06, + "loss": 1.1704, + "step": 17658 + }, + { + "epoch": 0.5184978565975689, + "grad_norm": 0.0, + "learning_rate": 9.886838132178032e-06, + "loss": 1.2217, + "step": 17659 + }, + { + "epoch": 0.5185272182747078, + "grad_norm": 0.0, + "learning_rate": 9.885887233218707e-06, + "loss": 1.2334, + "step": 17660 + }, + { + "epoch": 0.5185565799518469, + "grad_norm": 0.0, + "learning_rate": 9.884936335291335e-06, + "loss": 1.3662, + "step": 17661 + }, + { + "epoch": 0.5185859416289859, + "grad_norm": 0.0, + "learning_rate": 9.883985438404514e-06, + "loss": 1.3281, + "step": 17662 + }, + { + "epoch": 0.5186153033061248, + "grad_norm": 0.0, + "learning_rate": 9.883034542566844e-06, + "loss": 1.2432, + "step": 17663 + }, + { + "epoch": 0.5186446649832639, + "grad_norm": 0.0, + "learning_rate": 9.882083647786917e-06, + "loss": 1.1982, + "step": 17664 + }, + { + "epoch": 0.5186740266604029, + "grad_norm": 0.0, + "learning_rate": 9.881132754073343e-06, + "loss": 1.2559, + "step": 17665 + }, + { + "epoch": 0.5187033883375418, + "grad_norm": 0.0, + "learning_rate": 9.880181861434715e-06, + "loss": 1.4268, + "step": 17666 + }, + { + "epoch": 0.5187327500146809, + "grad_norm": 0.0, + "learning_rate": 9.879230969879634e-06, + "loss": 1.2534, + "step": 17667 + }, + { + "epoch": 0.5187621116918198, + "grad_norm": 0.0, + "learning_rate": 9.878280079416698e-06, + "loss": 1.27, + "step": 17668 + }, + { + "epoch": 0.5187914733689588, + "grad_norm": 0.0, + "learning_rate": 9.877329190054504e-06, + "loss": 1.3604, + "step": 17669 + }, + { + "epoch": 0.5188208350460979, + "grad_norm": 0.0, + "learning_rate": 9.876378301801657e-06, + "loss": 1.3975, + "step": 17670 + }, + { + "epoch": 0.5188501967232368, + "grad_norm": 0.0, + "learning_rate": 9.875427414666747e-06, + "loss": 1.4707, + "step": 17671 + }, + { + "epoch": 0.5188795584003758, + "grad_norm": 0.0, + "learning_rate": 9.874476528658383e-06, + "loss": 1.3301, + "step": 17672 + }, + { + "epoch": 0.5189089200775149, + "grad_norm": 0.0, + "learning_rate": 9.873525643785158e-06, + "loss": 1.3047, + "step": 17673 + }, + { + "epoch": 0.5189382817546538, + "grad_norm": 0.0, + "learning_rate": 9.872574760055677e-06, + "loss": 1.0825, + "step": 17674 + }, + { + "epoch": 0.5189676434317928, + "grad_norm": 0.0, + "learning_rate": 9.871623877478528e-06, + "loss": 1.333, + "step": 17675 + }, + { + "epoch": 0.5189970051089319, + "grad_norm": 0.0, + "learning_rate": 9.870672996062323e-06, + "loss": 1.2959, + "step": 17676 + }, + { + "epoch": 0.5190263667860708, + "grad_norm": 0.0, + "learning_rate": 9.869722115815655e-06, + "loss": 1.3369, + "step": 17677 + }, + { + "epoch": 0.5190557284632098, + "grad_norm": 0.0, + "learning_rate": 9.868771236747122e-06, + "loss": 1.3379, + "step": 17678 + }, + { + "epoch": 0.5190850901403489, + "grad_norm": 0.0, + "learning_rate": 9.867820358865326e-06, + "loss": 1.2607, + "step": 17679 + }, + { + "epoch": 0.5191144518174878, + "grad_norm": 0.0, + "learning_rate": 9.866869482178859e-06, + "loss": 1.2275, + "step": 17680 + }, + { + "epoch": 0.5191438134946268, + "grad_norm": 0.0, + "learning_rate": 9.865918606696331e-06, + "loss": 1.3047, + "step": 17681 + }, + { + "epoch": 0.5191731751717659, + "grad_norm": 0.0, + "learning_rate": 9.864967732426333e-06, + "loss": 1.3755, + "step": 17682 + }, + { + "epoch": 0.5192025368489048, + "grad_norm": 0.0, + "learning_rate": 9.864016859377468e-06, + "loss": 1.334, + "step": 17683 + }, + { + "epoch": 0.5192318985260438, + "grad_norm": 0.0, + "learning_rate": 9.863065987558329e-06, + "loss": 1.3125, + "step": 17684 + }, + { + "epoch": 0.5192612602031828, + "grad_norm": 0.0, + "learning_rate": 9.862115116977523e-06, + "loss": 1.25, + "step": 17685 + }, + { + "epoch": 0.5192906218803218, + "grad_norm": 0.0, + "learning_rate": 9.861164247643644e-06, + "loss": 1.4062, + "step": 17686 + }, + { + "epoch": 0.5193199835574608, + "grad_norm": 0.0, + "learning_rate": 9.860213379565296e-06, + "loss": 1.1108, + "step": 17687 + }, + { + "epoch": 0.5193493452345997, + "grad_norm": 0.0, + "learning_rate": 9.859262512751073e-06, + "loss": 1.2969, + "step": 17688 + }, + { + "epoch": 0.5193787069117388, + "grad_norm": 0.0, + "learning_rate": 9.85831164720957e-06, + "loss": 1.0522, + "step": 17689 + }, + { + "epoch": 0.5194080685888778, + "grad_norm": 0.0, + "learning_rate": 9.857360782949394e-06, + "loss": 1.3179, + "step": 17690 + }, + { + "epoch": 0.5194374302660167, + "grad_norm": 0.0, + "learning_rate": 9.856409919979142e-06, + "loss": 1.4268, + "step": 17691 + }, + { + "epoch": 0.5194667919431558, + "grad_norm": 0.0, + "learning_rate": 9.855459058307412e-06, + "loss": 1.2793, + "step": 17692 + }, + { + "epoch": 0.5194961536202948, + "grad_norm": 0.0, + "learning_rate": 9.8545081979428e-06, + "loss": 1.3232, + "step": 17693 + }, + { + "epoch": 0.5195255152974337, + "grad_norm": 0.0, + "learning_rate": 9.853557338893912e-06, + "loss": 1.2075, + "step": 17694 + }, + { + "epoch": 0.5195548769745728, + "grad_norm": 0.0, + "learning_rate": 9.852606481169338e-06, + "loss": 1.2241, + "step": 17695 + }, + { + "epoch": 0.5195842386517118, + "grad_norm": 0.0, + "learning_rate": 9.851655624777685e-06, + "loss": 1.3936, + "step": 17696 + }, + { + "epoch": 0.5196136003288507, + "grad_norm": 0.0, + "learning_rate": 9.850704769727545e-06, + "loss": 1.106, + "step": 17697 + }, + { + "epoch": 0.5196429620059898, + "grad_norm": 0.0, + "learning_rate": 9.849753916027523e-06, + "loss": 1.2695, + "step": 17698 + }, + { + "epoch": 0.5196723236831288, + "grad_norm": 0.0, + "learning_rate": 9.848803063686215e-06, + "loss": 1.3418, + "step": 17699 + }, + { + "epoch": 0.5197016853602677, + "grad_norm": 0.0, + "learning_rate": 9.847852212712216e-06, + "loss": 1.3711, + "step": 17700 + }, + { + "epoch": 0.5197310470374068, + "grad_norm": 0.0, + "learning_rate": 9.846901363114132e-06, + "loss": 1.3584, + "step": 17701 + }, + { + "epoch": 0.5197604087145458, + "grad_norm": 0.0, + "learning_rate": 9.845950514900555e-06, + "loss": 1.3066, + "step": 17702 + }, + { + "epoch": 0.5197897703916847, + "grad_norm": 0.0, + "learning_rate": 9.84499966808009e-06, + "loss": 1.3057, + "step": 17703 + }, + { + "epoch": 0.5198191320688238, + "grad_norm": 0.0, + "learning_rate": 9.844048822661328e-06, + "loss": 1.2236, + "step": 17704 + }, + { + "epoch": 0.5198484937459628, + "grad_norm": 0.0, + "learning_rate": 9.843097978652878e-06, + "loss": 1.3213, + "step": 17705 + }, + { + "epoch": 0.5198778554231017, + "grad_norm": 0.0, + "learning_rate": 9.842147136063331e-06, + "loss": 1.3071, + "step": 17706 + }, + { + "epoch": 0.5199072171002408, + "grad_norm": 0.0, + "learning_rate": 9.841196294901289e-06, + "loss": 1.4385, + "step": 17707 + }, + { + "epoch": 0.5199365787773798, + "grad_norm": 0.0, + "learning_rate": 9.840245455175349e-06, + "loss": 1.3018, + "step": 17708 + }, + { + "epoch": 0.5199659404545187, + "grad_norm": 0.0, + "learning_rate": 9.839294616894107e-06, + "loss": 1.1289, + "step": 17709 + }, + { + "epoch": 0.5199953021316578, + "grad_norm": 0.0, + "learning_rate": 9.838343780066168e-06, + "loss": 1.4229, + "step": 17710 + }, + { + "epoch": 0.5200246638087967, + "grad_norm": 0.0, + "learning_rate": 9.837392944700127e-06, + "loss": 1.2803, + "step": 17711 + }, + { + "epoch": 0.5200540254859357, + "grad_norm": 0.0, + "learning_rate": 9.836442110804586e-06, + "loss": 1.3232, + "step": 17712 + }, + { + "epoch": 0.5200833871630748, + "grad_norm": 0.0, + "learning_rate": 9.835491278388133e-06, + "loss": 1.2871, + "step": 17713 + }, + { + "epoch": 0.5201127488402137, + "grad_norm": 0.0, + "learning_rate": 9.834540447459383e-06, + "loss": 1.2393, + "step": 17714 + }, + { + "epoch": 0.5201421105173527, + "grad_norm": 0.0, + "learning_rate": 9.833589618026923e-06, + "loss": 1.3145, + "step": 17715 + }, + { + "epoch": 0.5201714721944918, + "grad_norm": 0.0, + "learning_rate": 9.832638790099355e-06, + "loss": 1.3193, + "step": 17716 + }, + { + "epoch": 0.5202008338716307, + "grad_norm": 0.0, + "learning_rate": 9.831687963685273e-06, + "loss": 1.3604, + "step": 17717 + }, + { + "epoch": 0.5202301955487697, + "grad_norm": 0.0, + "learning_rate": 9.830737138793286e-06, + "loss": 1.3389, + "step": 17718 + }, + { + "epoch": 0.5202595572259088, + "grad_norm": 0.0, + "learning_rate": 9.829786315431987e-06, + "loss": 1.3008, + "step": 17719 + }, + { + "epoch": 0.5202889189030477, + "grad_norm": 0.0, + "learning_rate": 9.828835493609968e-06, + "loss": 1.3291, + "step": 17720 + }, + { + "epoch": 0.5203182805801867, + "grad_norm": 0.0, + "learning_rate": 9.827884673335839e-06, + "loss": 1.3809, + "step": 17721 + }, + { + "epoch": 0.5203476422573258, + "grad_norm": 0.0, + "learning_rate": 9.82693385461819e-06, + "loss": 1.2822, + "step": 17722 + }, + { + "epoch": 0.5203770039344647, + "grad_norm": 0.0, + "learning_rate": 9.825983037465625e-06, + "loss": 1.3389, + "step": 17723 + }, + { + "epoch": 0.5204063656116037, + "grad_norm": 0.0, + "learning_rate": 9.825032221886734e-06, + "loss": 1.2773, + "step": 17724 + }, + { + "epoch": 0.5204357272887428, + "grad_norm": 0.0, + "learning_rate": 9.824081407890126e-06, + "loss": 1.3711, + "step": 17725 + }, + { + "epoch": 0.5204650889658817, + "grad_norm": 0.0, + "learning_rate": 9.823130595484394e-06, + "loss": 1.3672, + "step": 17726 + }, + { + "epoch": 0.5204944506430207, + "grad_norm": 0.0, + "learning_rate": 9.822179784678139e-06, + "loss": 1.3359, + "step": 17727 + }, + { + "epoch": 0.5205238123201598, + "grad_norm": 0.0, + "learning_rate": 9.821228975479958e-06, + "loss": 1.3252, + "step": 17728 + }, + { + "epoch": 0.5205531739972987, + "grad_norm": 0.0, + "learning_rate": 9.820278167898444e-06, + "loss": 1.2598, + "step": 17729 + }, + { + "epoch": 0.5205825356744377, + "grad_norm": 0.0, + "learning_rate": 9.819327361942206e-06, + "loss": 1.3135, + "step": 17730 + }, + { + "epoch": 0.5206118973515768, + "grad_norm": 0.0, + "learning_rate": 9.818376557619834e-06, + "loss": 1.4141, + "step": 17731 + }, + { + "epoch": 0.5206412590287157, + "grad_norm": 0.0, + "learning_rate": 9.817425754939932e-06, + "loss": 1.3594, + "step": 17732 + }, + { + "epoch": 0.5206706207058547, + "grad_norm": 0.0, + "learning_rate": 9.81647495391109e-06, + "loss": 1.2988, + "step": 17733 + }, + { + "epoch": 0.5206999823829938, + "grad_norm": 0.0, + "learning_rate": 9.815524154541919e-06, + "loss": 1.3247, + "step": 17734 + }, + { + "epoch": 0.5207293440601327, + "grad_norm": 0.0, + "learning_rate": 9.814573356841005e-06, + "loss": 1.2725, + "step": 17735 + }, + { + "epoch": 0.5207587057372717, + "grad_norm": 0.0, + "learning_rate": 9.813622560816956e-06, + "loss": 1.3086, + "step": 17736 + }, + { + "epoch": 0.5207880674144107, + "grad_norm": 0.0, + "learning_rate": 9.812671766478365e-06, + "loss": 1.3105, + "step": 17737 + }, + { + "epoch": 0.5208174290915497, + "grad_norm": 0.0, + "learning_rate": 9.811720973833828e-06, + "loss": 1.3369, + "step": 17738 + }, + { + "epoch": 0.5208467907686887, + "grad_norm": 0.0, + "learning_rate": 9.81077018289195e-06, + "loss": 1.23, + "step": 17739 + }, + { + "epoch": 0.5208761524458277, + "grad_norm": 0.0, + "learning_rate": 9.809819393661321e-06, + "loss": 1.168, + "step": 17740 + }, + { + "epoch": 0.5209055141229667, + "grad_norm": 0.0, + "learning_rate": 9.80886860615055e-06, + "loss": 1.3652, + "step": 17741 + }, + { + "epoch": 0.5209348758001057, + "grad_norm": 0.0, + "learning_rate": 9.807917820368223e-06, + "loss": 1.3018, + "step": 17742 + }, + { + "epoch": 0.5209642374772447, + "grad_norm": 0.0, + "learning_rate": 9.806967036322949e-06, + "loss": 1.2031, + "step": 17743 + }, + { + "epoch": 0.5209935991543837, + "grad_norm": 0.0, + "learning_rate": 9.80601625402332e-06, + "loss": 1.3291, + "step": 17744 + }, + { + "epoch": 0.5210229608315227, + "grad_norm": 0.0, + "learning_rate": 9.805065473477936e-06, + "loss": 1.334, + "step": 17745 + }, + { + "epoch": 0.5210523225086617, + "grad_norm": 0.0, + "learning_rate": 9.804114694695393e-06, + "loss": 1.2305, + "step": 17746 + }, + { + "epoch": 0.5210816841858007, + "grad_norm": 0.0, + "learning_rate": 9.803163917684296e-06, + "loss": 1.3984, + "step": 17747 + }, + { + "epoch": 0.5211110458629397, + "grad_norm": 0.0, + "learning_rate": 9.802213142453235e-06, + "loss": 1.2998, + "step": 17748 + }, + { + "epoch": 0.5211404075400787, + "grad_norm": 0.0, + "learning_rate": 9.80126236901081e-06, + "loss": 1.2749, + "step": 17749 + }, + { + "epoch": 0.5211697692172177, + "grad_norm": 0.0, + "learning_rate": 9.800311597365622e-06, + "loss": 1.3486, + "step": 17750 + }, + { + "epoch": 0.5211991308943567, + "grad_norm": 0.0, + "learning_rate": 9.799360827526265e-06, + "loss": 1.3164, + "step": 17751 + }, + { + "epoch": 0.5212284925714957, + "grad_norm": 0.0, + "learning_rate": 9.798410059501342e-06, + "loss": 1.3472, + "step": 17752 + }, + { + "epoch": 0.5212578542486347, + "grad_norm": 0.0, + "learning_rate": 9.797459293299444e-06, + "loss": 1.4316, + "step": 17753 + }, + { + "epoch": 0.5212872159257737, + "grad_norm": 0.0, + "learning_rate": 9.796508528929179e-06, + "loss": 1.2852, + "step": 17754 + }, + { + "epoch": 0.5213165776029127, + "grad_norm": 0.0, + "learning_rate": 9.795557766399137e-06, + "loss": 1.3525, + "step": 17755 + }, + { + "epoch": 0.5213459392800517, + "grad_norm": 0.0, + "learning_rate": 9.79460700571792e-06, + "loss": 1.4023, + "step": 17756 + }, + { + "epoch": 0.5213753009571906, + "grad_norm": 0.0, + "learning_rate": 9.793656246894126e-06, + "loss": 1.3672, + "step": 17757 + }, + { + "epoch": 0.5214046626343297, + "grad_norm": 0.0, + "learning_rate": 9.792705489936344e-06, + "loss": 1.3135, + "step": 17758 + }, + { + "epoch": 0.5214340243114687, + "grad_norm": 0.0, + "learning_rate": 9.791754734853185e-06, + "loss": 1.4131, + "step": 17759 + }, + { + "epoch": 0.5214633859886076, + "grad_norm": 0.0, + "learning_rate": 9.790803981653241e-06, + "loss": 1.3623, + "step": 17760 + }, + { + "epoch": 0.5214927476657467, + "grad_norm": 0.0, + "learning_rate": 9.789853230345112e-06, + "loss": 1.3799, + "step": 17761 + }, + { + "epoch": 0.5215221093428857, + "grad_norm": 0.0, + "learning_rate": 9.788902480937389e-06, + "loss": 1.2622, + "step": 17762 + }, + { + "epoch": 0.5215514710200246, + "grad_norm": 0.0, + "learning_rate": 9.787951733438678e-06, + "loss": 1.1782, + "step": 17763 + }, + { + "epoch": 0.5215808326971637, + "grad_norm": 0.0, + "learning_rate": 9.787000987857572e-06, + "loss": 1.3408, + "step": 17764 + }, + { + "epoch": 0.5216101943743027, + "grad_norm": 0.0, + "learning_rate": 9.786050244202673e-06, + "loss": 1.2812, + "step": 17765 + }, + { + "epoch": 0.5216395560514416, + "grad_norm": 0.0, + "learning_rate": 9.785099502482573e-06, + "loss": 1.3262, + "step": 17766 + }, + { + "epoch": 0.5216689177285807, + "grad_norm": 0.0, + "learning_rate": 9.784148762705875e-06, + "loss": 1.4033, + "step": 17767 + }, + { + "epoch": 0.5216982794057197, + "grad_norm": 0.0, + "learning_rate": 9.783198024881179e-06, + "loss": 1.3096, + "step": 17768 + }, + { + "epoch": 0.5217276410828586, + "grad_norm": 0.0, + "learning_rate": 9.782247289017074e-06, + "loss": 1.4492, + "step": 17769 + }, + { + "epoch": 0.5217570027599977, + "grad_norm": 0.0, + "learning_rate": 9.781296555122164e-06, + "loss": 1.3164, + "step": 17770 + }, + { + "epoch": 0.5217863644371367, + "grad_norm": 0.0, + "learning_rate": 9.780345823205045e-06, + "loss": 1.3008, + "step": 17771 + }, + { + "epoch": 0.5218157261142756, + "grad_norm": 0.0, + "learning_rate": 9.779395093274317e-06, + "loss": 1.3428, + "step": 17772 + }, + { + "epoch": 0.5218450877914147, + "grad_norm": 0.0, + "learning_rate": 9.778444365338571e-06, + "loss": 1.3691, + "step": 17773 + }, + { + "epoch": 0.5218744494685537, + "grad_norm": 0.0, + "learning_rate": 9.777493639406413e-06, + "loss": 1.2568, + "step": 17774 + }, + { + "epoch": 0.5219038111456926, + "grad_norm": 0.0, + "learning_rate": 9.776542915486437e-06, + "loss": 1.2061, + "step": 17775 + }, + { + "epoch": 0.5219331728228317, + "grad_norm": 0.0, + "learning_rate": 9.77559219358724e-06, + "loss": 1.3799, + "step": 17776 + }, + { + "epoch": 0.5219625344999707, + "grad_norm": 0.0, + "learning_rate": 9.774641473717423e-06, + "loss": 1.3096, + "step": 17777 + }, + { + "epoch": 0.5219918961771096, + "grad_norm": 0.0, + "learning_rate": 9.773690755885574e-06, + "loss": 1.3174, + "step": 17778 + }, + { + "epoch": 0.5220212578542487, + "grad_norm": 0.0, + "learning_rate": 9.772740040100303e-06, + "loss": 1.207, + "step": 17779 + }, + { + "epoch": 0.5220506195313876, + "grad_norm": 0.0, + "learning_rate": 9.7717893263702e-06, + "loss": 1.4492, + "step": 17780 + }, + { + "epoch": 0.5220799812085266, + "grad_norm": 0.0, + "learning_rate": 9.770838614703865e-06, + "loss": 1.1167, + "step": 17781 + }, + { + "epoch": 0.5221093428856657, + "grad_norm": 0.0, + "learning_rate": 9.769887905109892e-06, + "loss": 1.2949, + "step": 17782 + }, + { + "epoch": 0.5221387045628046, + "grad_norm": 0.0, + "learning_rate": 9.768937197596888e-06, + "loss": 1.2236, + "step": 17783 + }, + { + "epoch": 0.5221680662399436, + "grad_norm": 0.0, + "learning_rate": 9.76798649217344e-06, + "loss": 1.3232, + "step": 17784 + }, + { + "epoch": 0.5221974279170826, + "grad_norm": 0.0, + "learning_rate": 9.767035788848152e-06, + "loss": 1.3223, + "step": 17785 + }, + { + "epoch": 0.5222267895942216, + "grad_norm": 0.0, + "learning_rate": 9.766085087629612e-06, + "loss": 1.2695, + "step": 17786 + }, + { + "epoch": 0.5222561512713606, + "grad_norm": 0.0, + "learning_rate": 9.765134388526432e-06, + "loss": 1.2764, + "step": 17787 + }, + { + "epoch": 0.5222855129484996, + "grad_norm": 0.0, + "learning_rate": 9.764183691547203e-06, + "loss": 1.2549, + "step": 17788 + }, + { + "epoch": 0.5223148746256386, + "grad_norm": 0.0, + "learning_rate": 9.763232996700516e-06, + "loss": 1.2627, + "step": 17789 + }, + { + "epoch": 0.5223442363027776, + "grad_norm": 0.0, + "learning_rate": 9.762282303994977e-06, + "loss": 1.3545, + "step": 17790 + }, + { + "epoch": 0.5223735979799166, + "grad_norm": 0.0, + "learning_rate": 9.761331613439176e-06, + "loss": 1.1016, + "step": 17791 + }, + { + "epoch": 0.5224029596570556, + "grad_norm": 0.0, + "learning_rate": 9.760380925041718e-06, + "loss": 1.3379, + "step": 17792 + }, + { + "epoch": 0.5224323213341946, + "grad_norm": 0.0, + "learning_rate": 9.759430238811196e-06, + "loss": 1.2041, + "step": 17793 + }, + { + "epoch": 0.5224616830113336, + "grad_norm": 0.0, + "learning_rate": 9.758479554756208e-06, + "loss": 1.251, + "step": 17794 + }, + { + "epoch": 0.5224910446884726, + "grad_norm": 0.0, + "learning_rate": 9.757528872885348e-06, + "loss": 1.3086, + "step": 17795 + }, + { + "epoch": 0.5225204063656116, + "grad_norm": 0.0, + "learning_rate": 9.756578193207221e-06, + "loss": 1.4355, + "step": 17796 + }, + { + "epoch": 0.5225497680427506, + "grad_norm": 0.0, + "learning_rate": 9.755627515730422e-06, + "loss": 1.2744, + "step": 17797 + }, + { + "epoch": 0.5225791297198896, + "grad_norm": 0.0, + "learning_rate": 9.754676840463539e-06, + "loss": 1.335, + "step": 17798 + }, + { + "epoch": 0.5226084913970286, + "grad_norm": 0.0, + "learning_rate": 9.75372616741518e-06, + "loss": 1.417, + "step": 17799 + }, + { + "epoch": 0.5226378530741675, + "grad_norm": 0.0, + "learning_rate": 9.752775496593937e-06, + "loss": 1.3008, + "step": 17800 + }, + { + "epoch": 0.5226672147513066, + "grad_norm": 0.0, + "learning_rate": 9.75182482800841e-06, + "loss": 1.3418, + "step": 17801 + }, + { + "epoch": 0.5226965764284456, + "grad_norm": 0.0, + "learning_rate": 9.75087416166719e-06, + "loss": 1.2715, + "step": 17802 + }, + { + "epoch": 0.5227259381055845, + "grad_norm": 0.0, + "learning_rate": 9.749923497578884e-06, + "loss": 1.3486, + "step": 17803 + }, + { + "epoch": 0.5227552997827236, + "grad_norm": 0.0, + "learning_rate": 9.748972835752082e-06, + "loss": 1.2852, + "step": 17804 + }, + { + "epoch": 0.5227846614598626, + "grad_norm": 0.0, + "learning_rate": 9.748022176195385e-06, + "loss": 1.3467, + "step": 17805 + }, + { + "epoch": 0.5228140231370015, + "grad_norm": 0.0, + "learning_rate": 9.747071518917389e-06, + "loss": 1.4023, + "step": 17806 + }, + { + "epoch": 0.5228433848141406, + "grad_norm": 0.0, + "learning_rate": 9.746120863926683e-06, + "loss": 1.3301, + "step": 17807 + }, + { + "epoch": 0.5228727464912796, + "grad_norm": 0.0, + "learning_rate": 9.745170211231878e-06, + "loss": 1.2749, + "step": 17808 + }, + { + "epoch": 0.5229021081684185, + "grad_norm": 0.0, + "learning_rate": 9.74421956084156e-06, + "loss": 1.2466, + "step": 17809 + }, + { + "epoch": 0.5229314698455576, + "grad_norm": 0.0, + "learning_rate": 9.743268912764331e-06, + "loss": 1.2305, + "step": 17810 + }, + { + "epoch": 0.5229608315226966, + "grad_norm": 0.0, + "learning_rate": 9.742318267008786e-06, + "loss": 1.3555, + "step": 17811 + }, + { + "epoch": 0.5229901931998355, + "grad_norm": 0.0, + "learning_rate": 9.741367623583526e-06, + "loss": 1.376, + "step": 17812 + }, + { + "epoch": 0.5230195548769746, + "grad_norm": 0.0, + "learning_rate": 9.740416982497142e-06, + "loss": 1.3491, + "step": 17813 + }, + { + "epoch": 0.5230489165541136, + "grad_norm": 0.0, + "learning_rate": 9.739466343758236e-06, + "loss": 1.377, + "step": 17814 + }, + { + "epoch": 0.5230782782312525, + "grad_norm": 0.0, + "learning_rate": 9.7385157073754e-06, + "loss": 1.2764, + "step": 17815 + }, + { + "epoch": 0.5231076399083916, + "grad_norm": 0.0, + "learning_rate": 9.737565073357235e-06, + "loss": 1.1802, + "step": 17816 + }, + { + "epoch": 0.5231370015855306, + "grad_norm": 0.0, + "learning_rate": 9.736614441712338e-06, + "loss": 1.3037, + "step": 17817 + }, + { + "epoch": 0.5231663632626695, + "grad_norm": 0.0, + "learning_rate": 9.735663812449301e-06, + "loss": 1.3145, + "step": 17818 + }, + { + "epoch": 0.5231957249398086, + "grad_norm": 0.0, + "learning_rate": 9.734713185576727e-06, + "loss": 1.2446, + "step": 17819 + }, + { + "epoch": 0.5232250866169476, + "grad_norm": 0.0, + "learning_rate": 9.733762561103204e-06, + "loss": 1.3477, + "step": 17820 + }, + { + "epoch": 0.5232544482940865, + "grad_norm": 0.0, + "learning_rate": 9.732811939037339e-06, + "loss": 1.2988, + "step": 17821 + }, + { + "epoch": 0.5232838099712256, + "grad_norm": 0.0, + "learning_rate": 9.731861319387722e-06, + "loss": 1.1768, + "step": 17822 + }, + { + "epoch": 0.5233131716483646, + "grad_norm": 0.0, + "learning_rate": 9.730910702162955e-06, + "loss": 1.2407, + "step": 17823 + }, + { + "epoch": 0.5233425333255035, + "grad_norm": 0.0, + "learning_rate": 9.729960087371627e-06, + "loss": 1.2544, + "step": 17824 + }, + { + "epoch": 0.5233718950026426, + "grad_norm": 0.0, + "learning_rate": 9.729009475022342e-06, + "loss": 1.2207, + "step": 17825 + }, + { + "epoch": 0.5234012566797815, + "grad_norm": 0.0, + "learning_rate": 9.728058865123695e-06, + "loss": 1.2207, + "step": 17826 + }, + { + "epoch": 0.5234306183569205, + "grad_norm": 0.0, + "learning_rate": 9.727108257684277e-06, + "loss": 1.2344, + "step": 17827 + }, + { + "epoch": 0.5234599800340596, + "grad_norm": 0.0, + "learning_rate": 9.726157652712692e-06, + "loss": 1.1821, + "step": 17828 + }, + { + "epoch": 0.5234893417111985, + "grad_norm": 0.0, + "learning_rate": 9.725207050217533e-06, + "loss": 1.3926, + "step": 17829 + }, + { + "epoch": 0.5235187033883375, + "grad_norm": 0.0, + "learning_rate": 9.724256450207397e-06, + "loss": 1.2334, + "step": 17830 + }, + { + "epoch": 0.5235480650654766, + "grad_norm": 0.0, + "learning_rate": 9.723305852690878e-06, + "loss": 1.3145, + "step": 17831 + }, + { + "epoch": 0.5235774267426155, + "grad_norm": 0.0, + "learning_rate": 9.722355257676578e-06, + "loss": 1.418, + "step": 17832 + }, + { + "epoch": 0.5236067884197545, + "grad_norm": 0.0, + "learning_rate": 9.72140466517309e-06, + "loss": 1.2744, + "step": 17833 + }, + { + "epoch": 0.5236361500968936, + "grad_norm": 0.0, + "learning_rate": 9.720454075189013e-06, + "loss": 1.2383, + "step": 17834 + }, + { + "epoch": 0.5236655117740325, + "grad_norm": 0.0, + "learning_rate": 9.719503487732934e-06, + "loss": 1.2949, + "step": 17835 + }, + { + "epoch": 0.5236948734511715, + "grad_norm": 0.0, + "learning_rate": 9.718552902813464e-06, + "loss": 1.3027, + "step": 17836 + }, + { + "epoch": 0.5237242351283106, + "grad_norm": 0.0, + "learning_rate": 9.717602320439192e-06, + "loss": 1.2773, + "step": 17837 + }, + { + "epoch": 0.5237535968054495, + "grad_norm": 0.0, + "learning_rate": 9.716651740618711e-06, + "loss": 1.3477, + "step": 17838 + }, + { + "epoch": 0.5237829584825885, + "grad_norm": 0.0, + "learning_rate": 9.715701163360624e-06, + "loss": 1.3115, + "step": 17839 + }, + { + "epoch": 0.5238123201597276, + "grad_norm": 0.0, + "learning_rate": 9.714750588673521e-06, + "loss": 1.2471, + "step": 17840 + }, + { + "epoch": 0.5238416818368665, + "grad_norm": 0.0, + "learning_rate": 9.713800016566004e-06, + "loss": 1.374, + "step": 17841 + }, + { + "epoch": 0.5238710435140055, + "grad_norm": 0.0, + "learning_rate": 9.712849447046665e-06, + "loss": 1.3623, + "step": 17842 + }, + { + "epoch": 0.5239004051911446, + "grad_norm": 0.0, + "learning_rate": 9.711898880124104e-06, + "loss": 1.4033, + "step": 17843 + }, + { + "epoch": 0.5239297668682835, + "grad_norm": 0.0, + "learning_rate": 9.710948315806912e-06, + "loss": 1.3301, + "step": 17844 + }, + { + "epoch": 0.5239591285454225, + "grad_norm": 0.0, + "learning_rate": 9.709997754103692e-06, + "loss": 1.1196, + "step": 17845 + }, + { + "epoch": 0.5239884902225616, + "grad_norm": 0.0, + "learning_rate": 9.709047195023036e-06, + "loss": 1.292, + "step": 17846 + }, + { + "epoch": 0.5240178518997005, + "grad_norm": 0.0, + "learning_rate": 9.708096638573538e-06, + "loss": 1.3115, + "step": 17847 + }, + { + "epoch": 0.5240472135768395, + "grad_norm": 0.0, + "learning_rate": 9.707146084763801e-06, + "loss": 1.3574, + "step": 17848 + }, + { + "epoch": 0.5240765752539785, + "grad_norm": 0.0, + "learning_rate": 9.706195533602413e-06, + "loss": 1.4111, + "step": 17849 + }, + { + "epoch": 0.5241059369311175, + "grad_norm": 0.0, + "learning_rate": 9.705244985097978e-06, + "loss": 1.2783, + "step": 17850 + }, + { + "epoch": 0.5241352986082565, + "grad_norm": 0.0, + "learning_rate": 9.704294439259084e-06, + "loss": 1.3369, + "step": 17851 + }, + { + "epoch": 0.5241646602853955, + "grad_norm": 0.0, + "learning_rate": 9.703343896094334e-06, + "loss": 1.2051, + "step": 17852 + }, + { + "epoch": 0.5241940219625345, + "grad_norm": 0.0, + "learning_rate": 9.702393355612319e-06, + "loss": 1.3125, + "step": 17853 + }, + { + "epoch": 0.5242233836396735, + "grad_norm": 0.0, + "learning_rate": 9.701442817821639e-06, + "loss": 1.292, + "step": 17854 + }, + { + "epoch": 0.5242527453168125, + "grad_norm": 0.0, + "learning_rate": 9.700492282730885e-06, + "loss": 1.2207, + "step": 17855 + }, + { + "epoch": 0.5242821069939515, + "grad_norm": 0.0, + "learning_rate": 9.69954175034866e-06, + "loss": 1.2998, + "step": 17856 + }, + { + "epoch": 0.5243114686710905, + "grad_norm": 0.0, + "learning_rate": 9.698591220683554e-06, + "loss": 1.1885, + "step": 17857 + }, + { + "epoch": 0.5243408303482295, + "grad_norm": 0.0, + "learning_rate": 9.697640693744164e-06, + "loss": 1.2783, + "step": 17858 + }, + { + "epoch": 0.5243701920253685, + "grad_norm": 0.0, + "learning_rate": 9.69669016953909e-06, + "loss": 1.0205, + "step": 17859 + }, + { + "epoch": 0.5243995537025075, + "grad_norm": 0.0, + "learning_rate": 9.695739648076918e-06, + "loss": 1.2275, + "step": 17860 + }, + { + "epoch": 0.5244289153796465, + "grad_norm": 0.0, + "learning_rate": 9.694789129366257e-06, + "loss": 1.4219, + "step": 17861 + }, + { + "epoch": 0.5244582770567855, + "grad_norm": 0.0, + "learning_rate": 9.693838613415691e-06, + "loss": 1.2588, + "step": 17862 + }, + { + "epoch": 0.5244876387339245, + "grad_norm": 0.0, + "learning_rate": 9.692888100233826e-06, + "loss": 1.2607, + "step": 17863 + }, + { + "epoch": 0.5245170004110635, + "grad_norm": 0.0, + "learning_rate": 9.691937589829245e-06, + "loss": 1.2715, + "step": 17864 + }, + { + "epoch": 0.5245463620882025, + "grad_norm": 0.0, + "learning_rate": 9.690987082210558e-06, + "loss": 1.4229, + "step": 17865 + }, + { + "epoch": 0.5245757237653415, + "grad_norm": 0.0, + "learning_rate": 9.690036577386353e-06, + "loss": 1.1846, + "step": 17866 + }, + { + "epoch": 0.5246050854424805, + "grad_norm": 0.0, + "learning_rate": 9.689086075365226e-06, + "loss": 1.3477, + "step": 17867 + }, + { + "epoch": 0.5246344471196195, + "grad_norm": 0.0, + "learning_rate": 9.688135576155774e-06, + "loss": 1.3418, + "step": 17868 + }, + { + "epoch": 0.5246638087967584, + "grad_norm": 0.0, + "learning_rate": 9.687185079766588e-06, + "loss": 1.3359, + "step": 17869 + }, + { + "epoch": 0.5246931704738975, + "grad_norm": 0.0, + "learning_rate": 9.686234586206272e-06, + "loss": 1.2705, + "step": 17870 + }, + { + "epoch": 0.5247225321510365, + "grad_norm": 0.0, + "learning_rate": 9.685284095483413e-06, + "loss": 1.3916, + "step": 17871 + }, + { + "epoch": 0.5247518938281754, + "grad_norm": 0.0, + "learning_rate": 9.684333607606615e-06, + "loss": 1.2871, + "step": 17872 + }, + { + "epoch": 0.5247812555053145, + "grad_norm": 0.0, + "learning_rate": 9.683383122584463e-06, + "loss": 1.2324, + "step": 17873 + }, + { + "epoch": 0.5248106171824535, + "grad_norm": 0.0, + "learning_rate": 9.682432640425567e-06, + "loss": 1.1089, + "step": 17874 + }, + { + "epoch": 0.5248399788595924, + "grad_norm": 0.0, + "learning_rate": 9.68148216113851e-06, + "loss": 1.333, + "step": 17875 + }, + { + "epoch": 0.5248693405367315, + "grad_norm": 0.0, + "learning_rate": 9.680531684731888e-06, + "loss": 1.2295, + "step": 17876 + }, + { + "epoch": 0.5248987022138705, + "grad_norm": 0.0, + "learning_rate": 9.679581211214302e-06, + "loss": 1.2832, + "step": 17877 + }, + { + "epoch": 0.5249280638910094, + "grad_norm": 0.0, + "learning_rate": 9.678630740594346e-06, + "loss": 1.3096, + "step": 17878 + }, + { + "epoch": 0.5249574255681485, + "grad_norm": 0.0, + "learning_rate": 9.677680272880615e-06, + "loss": 1.2559, + "step": 17879 + }, + { + "epoch": 0.5249867872452875, + "grad_norm": 0.0, + "learning_rate": 9.6767298080817e-06, + "loss": 1.2969, + "step": 17880 + }, + { + "epoch": 0.5250161489224264, + "grad_norm": 0.0, + "learning_rate": 9.675779346206205e-06, + "loss": 1.5, + "step": 17881 + }, + { + "epoch": 0.5250455105995655, + "grad_norm": 0.0, + "learning_rate": 9.674828887262717e-06, + "loss": 1.334, + "step": 17882 + }, + { + "epoch": 0.5250748722767045, + "grad_norm": 0.0, + "learning_rate": 9.673878431259838e-06, + "loss": 1.1538, + "step": 17883 + }, + { + "epoch": 0.5251042339538434, + "grad_norm": 0.0, + "learning_rate": 9.672927978206154e-06, + "loss": 1.3125, + "step": 17884 + }, + { + "epoch": 0.5251335956309825, + "grad_norm": 0.0, + "learning_rate": 9.67197752811027e-06, + "loss": 1.2715, + "step": 17885 + }, + { + "epoch": 0.5251629573081215, + "grad_norm": 0.0, + "learning_rate": 9.67102708098078e-06, + "loss": 1.2095, + "step": 17886 + }, + { + "epoch": 0.5251923189852604, + "grad_norm": 0.0, + "learning_rate": 9.670076636826273e-06, + "loss": 1.29, + "step": 17887 + }, + { + "epoch": 0.5252216806623994, + "grad_norm": 0.0, + "learning_rate": 9.669126195655349e-06, + "loss": 1.4043, + "step": 17888 + }, + { + "epoch": 0.5252510423395385, + "grad_norm": 0.0, + "learning_rate": 9.668175757476598e-06, + "loss": 1.2646, + "step": 17889 + }, + { + "epoch": 0.5252804040166774, + "grad_norm": 0.0, + "learning_rate": 9.667225322298623e-06, + "loss": 1.25, + "step": 17890 + }, + { + "epoch": 0.5253097656938164, + "grad_norm": 0.0, + "learning_rate": 9.666274890130012e-06, + "loss": 1.3818, + "step": 17891 + }, + { + "epoch": 0.5253391273709554, + "grad_norm": 0.0, + "learning_rate": 9.665324460979365e-06, + "loss": 1.3301, + "step": 17892 + }, + { + "epoch": 0.5253684890480944, + "grad_norm": 0.0, + "learning_rate": 9.66437403485527e-06, + "loss": 1.3896, + "step": 17893 + }, + { + "epoch": 0.5253978507252334, + "grad_norm": 0.0, + "learning_rate": 9.66342361176633e-06, + "loss": 1.3477, + "step": 17894 + }, + { + "epoch": 0.5254272124023724, + "grad_norm": 0.0, + "learning_rate": 9.662473191721138e-06, + "loss": 1.2764, + "step": 17895 + }, + { + "epoch": 0.5254565740795114, + "grad_norm": 0.0, + "learning_rate": 9.661522774728284e-06, + "loss": 1.3721, + "step": 17896 + }, + { + "epoch": 0.5254859357566504, + "grad_norm": 0.0, + "learning_rate": 9.66057236079637e-06, + "loss": 1.2573, + "step": 17897 + }, + { + "epoch": 0.5255152974337894, + "grad_norm": 0.0, + "learning_rate": 9.659621949933981e-06, + "loss": 1.3057, + "step": 17898 + }, + { + "epoch": 0.5255446591109284, + "grad_norm": 0.0, + "learning_rate": 9.658671542149726e-06, + "loss": 1.3545, + "step": 17899 + }, + { + "epoch": 0.5255740207880674, + "grad_norm": 0.0, + "learning_rate": 9.657721137452183e-06, + "loss": 1.3594, + "step": 17900 + }, + { + "epoch": 0.5256033824652064, + "grad_norm": 0.0, + "learning_rate": 9.656770735849961e-06, + "loss": 1.3779, + "step": 17901 + }, + { + "epoch": 0.5256327441423454, + "grad_norm": 0.0, + "learning_rate": 9.655820337351649e-06, + "loss": 1.2886, + "step": 17902 + }, + { + "epoch": 0.5256621058194844, + "grad_norm": 0.0, + "learning_rate": 9.65486994196584e-06, + "loss": 1.2271, + "step": 17903 + }, + { + "epoch": 0.5256914674966234, + "grad_norm": 0.0, + "learning_rate": 9.65391954970113e-06, + "loss": 1.2783, + "step": 17904 + }, + { + "epoch": 0.5257208291737624, + "grad_norm": 0.0, + "learning_rate": 9.652969160566118e-06, + "loss": 1.2622, + "step": 17905 + }, + { + "epoch": 0.5257501908509014, + "grad_norm": 0.0, + "learning_rate": 9.652018774569391e-06, + "loss": 1.5332, + "step": 17906 + }, + { + "epoch": 0.5257795525280404, + "grad_norm": 0.0, + "learning_rate": 9.651068391719549e-06, + "loss": 1.2944, + "step": 17907 + }, + { + "epoch": 0.5258089142051794, + "grad_norm": 0.0, + "learning_rate": 9.650118012025185e-06, + "loss": 1.1924, + "step": 17908 + }, + { + "epoch": 0.5258382758823184, + "grad_norm": 0.0, + "learning_rate": 9.64916763549489e-06, + "loss": 1.2705, + "step": 17909 + }, + { + "epoch": 0.5258676375594574, + "grad_norm": 0.0, + "learning_rate": 9.648217262137266e-06, + "loss": 1.3057, + "step": 17910 + }, + { + "epoch": 0.5258969992365964, + "grad_norm": 0.0, + "learning_rate": 9.647266891960902e-06, + "loss": 1.3711, + "step": 17911 + }, + { + "epoch": 0.5259263609137353, + "grad_norm": 0.0, + "learning_rate": 9.646316524974395e-06, + "loss": 1.334, + "step": 17912 + }, + { + "epoch": 0.5259557225908744, + "grad_norm": 0.0, + "learning_rate": 9.645366161186334e-06, + "loss": 1.2212, + "step": 17913 + }, + { + "epoch": 0.5259850842680134, + "grad_norm": 0.0, + "learning_rate": 9.644415800605323e-06, + "loss": 1.1885, + "step": 17914 + }, + { + "epoch": 0.5260144459451523, + "grad_norm": 0.0, + "learning_rate": 9.643465443239949e-06, + "loss": 1.3433, + "step": 17915 + }, + { + "epoch": 0.5260438076222914, + "grad_norm": 0.0, + "learning_rate": 9.642515089098807e-06, + "loss": 1.376, + "step": 17916 + }, + { + "epoch": 0.5260731692994304, + "grad_norm": 0.0, + "learning_rate": 9.641564738190495e-06, + "loss": 1.3428, + "step": 17917 + }, + { + "epoch": 0.5261025309765693, + "grad_norm": 0.0, + "learning_rate": 9.6406143905236e-06, + "loss": 1.2163, + "step": 17918 + }, + { + "epoch": 0.5261318926537084, + "grad_norm": 0.0, + "learning_rate": 9.639664046106727e-06, + "loss": 1.2871, + "step": 17919 + }, + { + "epoch": 0.5261612543308474, + "grad_norm": 0.0, + "learning_rate": 9.638713704948461e-06, + "loss": 1.3359, + "step": 17920 + }, + { + "epoch": 0.5261906160079863, + "grad_norm": 0.0, + "learning_rate": 9.637763367057403e-06, + "loss": 1.1777, + "step": 17921 + }, + { + "epoch": 0.5262199776851254, + "grad_norm": 0.0, + "learning_rate": 9.636813032442137e-06, + "loss": 1.208, + "step": 17922 + }, + { + "epoch": 0.5262493393622644, + "grad_norm": 0.0, + "learning_rate": 9.63586270111127e-06, + "loss": 1.4482, + "step": 17923 + }, + { + "epoch": 0.5262787010394033, + "grad_norm": 0.0, + "learning_rate": 9.634912373073391e-06, + "loss": 1.3223, + "step": 17924 + }, + { + "epoch": 0.5263080627165424, + "grad_norm": 0.0, + "learning_rate": 9.633962048337086e-06, + "loss": 1.3496, + "step": 17925 + }, + { + "epoch": 0.5263374243936814, + "grad_norm": 0.0, + "learning_rate": 9.63301172691096e-06, + "loss": 1.147, + "step": 17926 + }, + { + "epoch": 0.5263667860708203, + "grad_norm": 0.0, + "learning_rate": 9.632061408803603e-06, + "loss": 1.3105, + "step": 17927 + }, + { + "epoch": 0.5263961477479594, + "grad_norm": 0.0, + "learning_rate": 9.63111109402361e-06, + "loss": 1.2383, + "step": 17928 + }, + { + "epoch": 0.5264255094250984, + "grad_norm": 0.0, + "learning_rate": 9.63016078257957e-06, + "loss": 1.3237, + "step": 17929 + }, + { + "epoch": 0.5264548711022373, + "grad_norm": 0.0, + "learning_rate": 9.629210474480084e-06, + "loss": 1.3994, + "step": 17930 + }, + { + "epoch": 0.5264842327793764, + "grad_norm": 0.0, + "learning_rate": 9.62826016973374e-06, + "loss": 1.2793, + "step": 17931 + }, + { + "epoch": 0.5265135944565154, + "grad_norm": 0.0, + "learning_rate": 9.627309868349138e-06, + "loss": 1.2676, + "step": 17932 + }, + { + "epoch": 0.5265429561336543, + "grad_norm": 0.0, + "learning_rate": 9.626359570334864e-06, + "loss": 1.1636, + "step": 17933 + }, + { + "epoch": 0.5265723178107934, + "grad_norm": 0.0, + "learning_rate": 9.62540927569952e-06, + "loss": 1.2949, + "step": 17934 + }, + { + "epoch": 0.5266016794879324, + "grad_norm": 0.0, + "learning_rate": 9.624458984451696e-06, + "loss": 1.2441, + "step": 17935 + }, + { + "epoch": 0.5266310411650713, + "grad_norm": 0.0, + "learning_rate": 9.623508696599983e-06, + "loss": 1.3818, + "step": 17936 + }, + { + "epoch": 0.5266604028422104, + "grad_norm": 0.0, + "learning_rate": 9.62255841215298e-06, + "loss": 1.4229, + "step": 17937 + }, + { + "epoch": 0.5266897645193493, + "grad_norm": 0.0, + "learning_rate": 9.621608131119272e-06, + "loss": 1.3398, + "step": 17938 + }, + { + "epoch": 0.5267191261964883, + "grad_norm": 0.0, + "learning_rate": 9.620657853507465e-06, + "loss": 1.2725, + "step": 17939 + }, + { + "epoch": 0.5267484878736274, + "grad_norm": 0.0, + "learning_rate": 9.619707579326142e-06, + "loss": 1.3086, + "step": 17940 + }, + { + "epoch": 0.5267778495507663, + "grad_norm": 0.0, + "learning_rate": 9.618757308583905e-06, + "loss": 1.2705, + "step": 17941 + }, + { + "epoch": 0.5268072112279053, + "grad_norm": 0.0, + "learning_rate": 9.617807041289338e-06, + "loss": 1.2529, + "step": 17942 + }, + { + "epoch": 0.5268365729050444, + "grad_norm": 0.0, + "learning_rate": 9.616856777451043e-06, + "loss": 1.3042, + "step": 17943 + }, + { + "epoch": 0.5268659345821833, + "grad_norm": 0.0, + "learning_rate": 9.615906517077612e-06, + "loss": 1.3965, + "step": 17944 + }, + { + "epoch": 0.5268952962593223, + "grad_norm": 0.0, + "learning_rate": 9.614956260177635e-06, + "loss": 1.3418, + "step": 17945 + }, + { + "epoch": 0.5269246579364614, + "grad_norm": 0.0, + "learning_rate": 9.614006006759707e-06, + "loss": 1.2603, + "step": 17946 + }, + { + "epoch": 0.5269540196136003, + "grad_norm": 0.0, + "learning_rate": 9.613055756832419e-06, + "loss": 1.1523, + "step": 17947 + }, + { + "epoch": 0.5269833812907393, + "grad_norm": 0.0, + "learning_rate": 9.61210551040437e-06, + "loss": 1.3369, + "step": 17948 + }, + { + "epoch": 0.5270127429678784, + "grad_norm": 0.0, + "learning_rate": 9.61115526748415e-06, + "loss": 1.3115, + "step": 17949 + }, + { + "epoch": 0.5270421046450173, + "grad_norm": 0.0, + "learning_rate": 9.610205028080354e-06, + "loss": 1.3604, + "step": 17950 + }, + { + "epoch": 0.5270714663221563, + "grad_norm": 0.0, + "learning_rate": 9.60925479220157e-06, + "loss": 1.2031, + "step": 17951 + }, + { + "epoch": 0.5271008279992954, + "grad_norm": 0.0, + "learning_rate": 9.608304559856398e-06, + "loss": 1.2979, + "step": 17952 + }, + { + "epoch": 0.5271301896764343, + "grad_norm": 0.0, + "learning_rate": 9.607354331053425e-06, + "loss": 1.2935, + "step": 17953 + }, + { + "epoch": 0.5271595513535733, + "grad_norm": 0.0, + "learning_rate": 9.60640410580125e-06, + "loss": 1.252, + "step": 17954 + }, + { + "epoch": 0.5271889130307124, + "grad_norm": 0.0, + "learning_rate": 9.605453884108465e-06, + "loss": 1.2656, + "step": 17955 + }, + { + "epoch": 0.5272182747078513, + "grad_norm": 0.0, + "learning_rate": 9.604503665983659e-06, + "loss": 1.1748, + "step": 17956 + }, + { + "epoch": 0.5272476363849903, + "grad_norm": 0.0, + "learning_rate": 9.603553451435429e-06, + "loss": 1.3057, + "step": 17957 + }, + { + "epoch": 0.5272769980621294, + "grad_norm": 0.0, + "learning_rate": 9.602603240472364e-06, + "loss": 1.2793, + "step": 17958 + }, + { + "epoch": 0.5273063597392683, + "grad_norm": 0.0, + "learning_rate": 9.601653033103062e-06, + "loss": 1.1699, + "step": 17959 + }, + { + "epoch": 0.5273357214164073, + "grad_norm": 0.0, + "learning_rate": 9.600702829336113e-06, + "loss": 1.4365, + "step": 17960 + }, + { + "epoch": 0.5273650830935463, + "grad_norm": 0.0, + "learning_rate": 9.599752629180111e-06, + "loss": 1.3945, + "step": 17961 + }, + { + "epoch": 0.5273944447706853, + "grad_norm": 0.0, + "learning_rate": 9.598802432643646e-06, + "loss": 1.3242, + "step": 17962 + }, + { + "epoch": 0.5274238064478243, + "grad_norm": 0.0, + "learning_rate": 9.597852239735315e-06, + "loss": 1.3623, + "step": 17963 + }, + { + "epoch": 0.5274531681249633, + "grad_norm": 0.0, + "learning_rate": 9.596902050463712e-06, + "loss": 1.1953, + "step": 17964 + }, + { + "epoch": 0.5274825298021023, + "grad_norm": 0.0, + "learning_rate": 9.595951864837424e-06, + "loss": 1.3945, + "step": 17965 + }, + { + "epoch": 0.5275118914792413, + "grad_norm": 0.0, + "learning_rate": 9.595001682865048e-06, + "loss": 1.4121, + "step": 17966 + }, + { + "epoch": 0.5275412531563803, + "grad_norm": 0.0, + "learning_rate": 9.594051504555173e-06, + "loss": 1.4355, + "step": 17967 + }, + { + "epoch": 0.5275706148335193, + "grad_norm": 0.0, + "learning_rate": 9.593101329916398e-06, + "loss": 1.1338, + "step": 17968 + }, + { + "epoch": 0.5275999765106583, + "grad_norm": 0.0, + "learning_rate": 9.592151158957308e-06, + "loss": 1.3164, + "step": 17969 + }, + { + "epoch": 0.5276293381877973, + "grad_norm": 0.0, + "learning_rate": 9.591200991686504e-06, + "loss": 1.3008, + "step": 17970 + }, + { + "epoch": 0.5276586998649363, + "grad_norm": 0.0, + "learning_rate": 9.590250828112568e-06, + "loss": 1.1841, + "step": 17971 + }, + { + "epoch": 0.5276880615420753, + "grad_norm": 0.0, + "learning_rate": 9.589300668244104e-06, + "loss": 1.3779, + "step": 17972 + }, + { + "epoch": 0.5277174232192143, + "grad_norm": 0.0, + "learning_rate": 9.588350512089697e-06, + "loss": 1.2451, + "step": 17973 + }, + { + "epoch": 0.5277467848963533, + "grad_norm": 0.0, + "learning_rate": 9.587400359657944e-06, + "loss": 1.2715, + "step": 17974 + }, + { + "epoch": 0.5277761465734923, + "grad_norm": 0.0, + "learning_rate": 9.586450210957434e-06, + "loss": 1.29, + "step": 17975 + }, + { + "epoch": 0.5278055082506313, + "grad_norm": 0.0, + "learning_rate": 9.585500065996758e-06, + "loss": 1.3643, + "step": 17976 + }, + { + "epoch": 0.5278348699277703, + "grad_norm": 0.0, + "learning_rate": 9.584549924784516e-06, + "loss": 1.1313, + "step": 17977 + }, + { + "epoch": 0.5278642316049093, + "grad_norm": 0.0, + "learning_rate": 9.58359978732929e-06, + "loss": 1.375, + "step": 17978 + }, + { + "epoch": 0.5278935932820483, + "grad_norm": 0.0, + "learning_rate": 9.582649653639682e-06, + "loss": 1.3242, + "step": 17979 + }, + { + "epoch": 0.5279229549591873, + "grad_norm": 0.0, + "learning_rate": 9.581699523724277e-06, + "loss": 1.3135, + "step": 17980 + }, + { + "epoch": 0.5279523166363262, + "grad_norm": 0.0, + "learning_rate": 9.580749397591674e-06, + "loss": 1.4756, + "step": 17981 + }, + { + "epoch": 0.5279816783134653, + "grad_norm": 0.0, + "learning_rate": 9.579799275250456e-06, + "loss": 1.3721, + "step": 17982 + }, + { + "epoch": 0.5280110399906043, + "grad_norm": 0.0, + "learning_rate": 9.578849156709226e-06, + "loss": 1.3193, + "step": 17983 + }, + { + "epoch": 0.5280404016677432, + "grad_norm": 0.0, + "learning_rate": 9.57789904197657e-06, + "loss": 1.2939, + "step": 17984 + }, + { + "epoch": 0.5280697633448823, + "grad_norm": 0.0, + "learning_rate": 9.57694893106108e-06, + "loss": 1.2959, + "step": 17985 + }, + { + "epoch": 0.5280991250220213, + "grad_norm": 0.0, + "learning_rate": 9.575998823971351e-06, + "loss": 1.2251, + "step": 17986 + }, + { + "epoch": 0.5281284866991602, + "grad_norm": 0.0, + "learning_rate": 9.575048720715968e-06, + "loss": 1.3125, + "step": 17987 + }, + { + "epoch": 0.5281578483762992, + "grad_norm": 0.0, + "learning_rate": 9.574098621303534e-06, + "loss": 1.3789, + "step": 17988 + }, + { + "epoch": 0.5281872100534383, + "grad_norm": 0.0, + "learning_rate": 9.573148525742632e-06, + "loss": 1.2549, + "step": 17989 + }, + { + "epoch": 0.5282165717305772, + "grad_norm": 0.0, + "learning_rate": 9.57219843404186e-06, + "loss": 1.3711, + "step": 17990 + }, + { + "epoch": 0.5282459334077162, + "grad_norm": 0.0, + "learning_rate": 9.571248346209801e-06, + "loss": 1.2144, + "step": 17991 + }, + { + "epoch": 0.5282752950848553, + "grad_norm": 0.0, + "learning_rate": 9.57029826225506e-06, + "loss": 1.3262, + "step": 17992 + }, + { + "epoch": 0.5283046567619942, + "grad_norm": 0.0, + "learning_rate": 9.56934818218622e-06, + "loss": 1.2852, + "step": 17993 + }, + { + "epoch": 0.5283340184391332, + "grad_norm": 0.0, + "learning_rate": 9.568398106011873e-06, + "loss": 1.374, + "step": 17994 + }, + { + "epoch": 0.5283633801162723, + "grad_norm": 0.0, + "learning_rate": 9.567448033740614e-06, + "loss": 1.2422, + "step": 17995 + }, + { + "epoch": 0.5283927417934112, + "grad_norm": 0.0, + "learning_rate": 9.56649796538103e-06, + "loss": 1.3271, + "step": 17996 + }, + { + "epoch": 0.5284221034705502, + "grad_norm": 0.0, + "learning_rate": 9.565547900941721e-06, + "loss": 1.1138, + "step": 17997 + }, + { + "epoch": 0.5284514651476893, + "grad_norm": 0.0, + "learning_rate": 9.56459784043127e-06, + "loss": 1.3672, + "step": 17998 + }, + { + "epoch": 0.5284808268248282, + "grad_norm": 0.0, + "learning_rate": 9.563647783858275e-06, + "loss": 1.2646, + "step": 17999 + }, + { + "epoch": 0.5285101885019672, + "grad_norm": 0.0, + "learning_rate": 9.56269773123132e-06, + "loss": 1.3496, + "step": 18000 + }, + { + "epoch": 0.5285395501791063, + "grad_norm": 0.0, + "learning_rate": 9.561747682559006e-06, + "loss": 1.2617, + "step": 18001 + }, + { + "epoch": 0.5285689118562452, + "grad_norm": 0.0, + "learning_rate": 9.560797637849918e-06, + "loss": 1.375, + "step": 18002 + }, + { + "epoch": 0.5285982735333842, + "grad_norm": 0.0, + "learning_rate": 9.559847597112652e-06, + "loss": 1.3291, + "step": 18003 + }, + { + "epoch": 0.5286276352105233, + "grad_norm": 0.0, + "learning_rate": 9.558897560355796e-06, + "loss": 1.3291, + "step": 18004 + }, + { + "epoch": 0.5286569968876622, + "grad_norm": 0.0, + "learning_rate": 9.55794752758794e-06, + "loss": 1.4165, + "step": 18005 + }, + { + "epoch": 0.5286863585648012, + "grad_norm": 0.0, + "learning_rate": 9.556997498817681e-06, + "loss": 1.3428, + "step": 18006 + }, + { + "epoch": 0.5287157202419402, + "grad_norm": 0.0, + "learning_rate": 9.556047474053602e-06, + "loss": 1.4072, + "step": 18007 + }, + { + "epoch": 0.5287450819190792, + "grad_norm": 0.0, + "learning_rate": 9.555097453304303e-06, + "loss": 1.3672, + "step": 18008 + }, + { + "epoch": 0.5287744435962182, + "grad_norm": 0.0, + "learning_rate": 9.55414743657837e-06, + "loss": 1.3076, + "step": 18009 + }, + { + "epoch": 0.5288038052733572, + "grad_norm": 0.0, + "learning_rate": 9.5531974238844e-06, + "loss": 1.292, + "step": 18010 + }, + { + "epoch": 0.5288331669504962, + "grad_norm": 0.0, + "learning_rate": 9.552247415230974e-06, + "loss": 1.3965, + "step": 18011 + }, + { + "epoch": 0.5288625286276352, + "grad_norm": 0.0, + "learning_rate": 9.551297410626693e-06, + "loss": 1.4053, + "step": 18012 + }, + { + "epoch": 0.5288918903047742, + "grad_norm": 0.0, + "learning_rate": 9.550347410080145e-06, + "loss": 1.2842, + "step": 18013 + }, + { + "epoch": 0.5289212519819132, + "grad_norm": 0.0, + "learning_rate": 9.549397413599919e-06, + "loss": 1.2871, + "step": 18014 + }, + { + "epoch": 0.5289506136590522, + "grad_norm": 0.0, + "learning_rate": 9.548447421194609e-06, + "loss": 1.2822, + "step": 18015 + }, + { + "epoch": 0.5289799753361912, + "grad_norm": 0.0, + "learning_rate": 9.547497432872801e-06, + "loss": 1.415, + "step": 18016 + }, + { + "epoch": 0.5290093370133302, + "grad_norm": 0.0, + "learning_rate": 9.546547448643094e-06, + "loss": 1.248, + "step": 18017 + }, + { + "epoch": 0.5290386986904692, + "grad_norm": 0.0, + "learning_rate": 9.545597468514071e-06, + "loss": 1.168, + "step": 18018 + }, + { + "epoch": 0.5290680603676082, + "grad_norm": 0.0, + "learning_rate": 9.54464749249433e-06, + "loss": 1.4814, + "step": 18019 + }, + { + "epoch": 0.5290974220447472, + "grad_norm": 0.0, + "learning_rate": 9.543697520592455e-06, + "loss": 1.3428, + "step": 18020 + }, + { + "epoch": 0.5291267837218862, + "grad_norm": 0.0, + "learning_rate": 9.542747552817042e-06, + "loss": 1.334, + "step": 18021 + }, + { + "epoch": 0.5291561453990252, + "grad_norm": 0.0, + "learning_rate": 9.54179758917668e-06, + "loss": 1.2217, + "step": 18022 + }, + { + "epoch": 0.5291855070761642, + "grad_norm": 0.0, + "learning_rate": 9.54084762967996e-06, + "loss": 1.3379, + "step": 18023 + }, + { + "epoch": 0.5292148687533031, + "grad_norm": 0.0, + "learning_rate": 9.539897674335474e-06, + "loss": 1.2485, + "step": 18024 + }, + { + "epoch": 0.5292442304304422, + "grad_norm": 0.0, + "learning_rate": 9.538947723151805e-06, + "loss": 1.3809, + "step": 18025 + }, + { + "epoch": 0.5292735921075812, + "grad_norm": 0.0, + "learning_rate": 9.537997776137556e-06, + "loss": 1.3687, + "step": 18026 + }, + { + "epoch": 0.5293029537847201, + "grad_norm": 0.0, + "learning_rate": 9.537047833301309e-06, + "loss": 1.4609, + "step": 18027 + }, + { + "epoch": 0.5293323154618592, + "grad_norm": 0.0, + "learning_rate": 9.536097894651657e-06, + "loss": 1.3105, + "step": 18028 + }, + { + "epoch": 0.5293616771389982, + "grad_norm": 0.0, + "learning_rate": 9.535147960197191e-06, + "loss": 1.3252, + "step": 18029 + }, + { + "epoch": 0.5293910388161371, + "grad_norm": 0.0, + "learning_rate": 9.534198029946501e-06, + "loss": 1.3594, + "step": 18030 + }, + { + "epoch": 0.5294204004932762, + "grad_norm": 0.0, + "learning_rate": 9.533248103908176e-06, + "loss": 1.2295, + "step": 18031 + }, + { + "epoch": 0.5294497621704152, + "grad_norm": 0.0, + "learning_rate": 9.532298182090811e-06, + "loss": 1.2676, + "step": 18032 + }, + { + "epoch": 0.5294791238475541, + "grad_norm": 0.0, + "learning_rate": 9.531348264502993e-06, + "loss": 1.2471, + "step": 18033 + }, + { + "epoch": 0.5295084855246932, + "grad_norm": 0.0, + "learning_rate": 9.530398351153311e-06, + "loss": 1.3105, + "step": 18034 + }, + { + "epoch": 0.5295378472018322, + "grad_norm": 0.0, + "learning_rate": 9.529448442050358e-06, + "loss": 1.3027, + "step": 18035 + }, + { + "epoch": 0.5295672088789711, + "grad_norm": 0.0, + "learning_rate": 9.52849853720272e-06, + "loss": 1.2407, + "step": 18036 + }, + { + "epoch": 0.5295965705561102, + "grad_norm": 0.0, + "learning_rate": 9.527548636618994e-06, + "loss": 1.3125, + "step": 18037 + }, + { + "epoch": 0.5296259322332492, + "grad_norm": 0.0, + "learning_rate": 9.526598740307766e-06, + "loss": 1.1636, + "step": 18038 + }, + { + "epoch": 0.5296552939103881, + "grad_norm": 0.0, + "learning_rate": 9.525648848277627e-06, + "loss": 1.2656, + "step": 18039 + }, + { + "epoch": 0.5296846555875272, + "grad_norm": 0.0, + "learning_rate": 9.524698960537165e-06, + "loss": 1.3662, + "step": 18040 + }, + { + "epoch": 0.5297140172646662, + "grad_norm": 0.0, + "learning_rate": 9.523749077094974e-06, + "loss": 1.2065, + "step": 18041 + }, + { + "epoch": 0.5297433789418051, + "grad_norm": 0.0, + "learning_rate": 9.52279919795964e-06, + "loss": 1.2769, + "step": 18042 + }, + { + "epoch": 0.5297727406189442, + "grad_norm": 0.0, + "learning_rate": 9.521849323139759e-06, + "loss": 1.4414, + "step": 18043 + }, + { + "epoch": 0.5298021022960832, + "grad_norm": 0.0, + "learning_rate": 9.520899452643915e-06, + "loss": 1.2441, + "step": 18044 + }, + { + "epoch": 0.5298314639732221, + "grad_norm": 0.0, + "learning_rate": 9.519949586480697e-06, + "loss": 1.2939, + "step": 18045 + }, + { + "epoch": 0.5298608256503612, + "grad_norm": 0.0, + "learning_rate": 9.518999724658703e-06, + "loss": 1.2397, + "step": 18046 + }, + { + "epoch": 0.5298901873275002, + "grad_norm": 0.0, + "learning_rate": 9.518049867186514e-06, + "loss": 1.3633, + "step": 18047 + }, + { + "epoch": 0.5299195490046391, + "grad_norm": 0.0, + "learning_rate": 9.517100014072726e-06, + "loss": 1.4023, + "step": 18048 + }, + { + "epoch": 0.5299489106817782, + "grad_norm": 0.0, + "learning_rate": 9.516150165325922e-06, + "loss": 1.3828, + "step": 18049 + }, + { + "epoch": 0.5299782723589171, + "grad_norm": 0.0, + "learning_rate": 9.5152003209547e-06, + "loss": 1.3477, + "step": 18050 + }, + { + "epoch": 0.5300076340360561, + "grad_norm": 0.0, + "learning_rate": 9.514250480967643e-06, + "loss": 1.2734, + "step": 18051 + }, + { + "epoch": 0.5300369957131952, + "grad_norm": 0.0, + "learning_rate": 9.513300645373346e-06, + "loss": 1.3906, + "step": 18052 + }, + { + "epoch": 0.5300663573903341, + "grad_norm": 0.0, + "learning_rate": 9.512350814180397e-06, + "loss": 1.1475, + "step": 18053 + }, + { + "epoch": 0.5300957190674731, + "grad_norm": 0.0, + "learning_rate": 9.511400987397383e-06, + "loss": 1.3916, + "step": 18054 + }, + { + "epoch": 0.5301250807446122, + "grad_norm": 0.0, + "learning_rate": 9.510451165032895e-06, + "loss": 1.3154, + "step": 18055 + }, + { + "epoch": 0.5301544424217511, + "grad_norm": 0.0, + "learning_rate": 9.509501347095518e-06, + "loss": 1.249, + "step": 18056 + }, + { + "epoch": 0.5301838040988901, + "grad_norm": 0.0, + "learning_rate": 9.508551533593852e-06, + "loss": 1.3623, + "step": 18057 + }, + { + "epoch": 0.5302131657760292, + "grad_norm": 0.0, + "learning_rate": 9.507601724536475e-06, + "loss": 1.2158, + "step": 18058 + }, + { + "epoch": 0.5302425274531681, + "grad_norm": 0.0, + "learning_rate": 9.506651919931987e-06, + "loss": 1.3311, + "step": 18059 + }, + { + "epoch": 0.5302718891303071, + "grad_norm": 0.0, + "learning_rate": 9.505702119788968e-06, + "loss": 1.3037, + "step": 18060 + }, + { + "epoch": 0.5303012508074462, + "grad_norm": 0.0, + "learning_rate": 9.504752324116013e-06, + "loss": 1.3975, + "step": 18061 + }, + { + "epoch": 0.5303306124845851, + "grad_norm": 0.0, + "learning_rate": 9.503802532921711e-06, + "loss": 1.2266, + "step": 18062 + }, + { + "epoch": 0.5303599741617241, + "grad_norm": 0.0, + "learning_rate": 9.502852746214648e-06, + "loss": 1.3799, + "step": 18063 + }, + { + "epoch": 0.5303893358388632, + "grad_norm": 0.0, + "learning_rate": 9.501902964003418e-06, + "loss": 1.2471, + "step": 18064 + }, + { + "epoch": 0.5304186975160021, + "grad_norm": 0.0, + "learning_rate": 9.500953186296602e-06, + "loss": 1.4756, + "step": 18065 + }, + { + "epoch": 0.5304480591931411, + "grad_norm": 0.0, + "learning_rate": 9.500003413102797e-06, + "loss": 1.2939, + "step": 18066 + }, + { + "epoch": 0.5304774208702802, + "grad_norm": 0.0, + "learning_rate": 9.499053644430588e-06, + "loss": 1.3545, + "step": 18067 + }, + { + "epoch": 0.5305067825474191, + "grad_norm": 0.0, + "learning_rate": 9.498103880288567e-06, + "loss": 1.3936, + "step": 18068 + }, + { + "epoch": 0.5305361442245581, + "grad_norm": 0.0, + "learning_rate": 9.497154120685317e-06, + "loss": 1.2969, + "step": 18069 + }, + { + "epoch": 0.5305655059016972, + "grad_norm": 0.0, + "learning_rate": 9.496204365629436e-06, + "loss": 1.2852, + "step": 18070 + }, + { + "epoch": 0.5305948675788361, + "grad_norm": 0.0, + "learning_rate": 9.495254615129505e-06, + "loss": 1.3096, + "step": 18071 + }, + { + "epoch": 0.5306242292559751, + "grad_norm": 0.0, + "learning_rate": 9.494304869194118e-06, + "loss": 1.3643, + "step": 18072 + }, + { + "epoch": 0.5306535909331141, + "grad_norm": 0.0, + "learning_rate": 9.493355127831863e-06, + "loss": 1.2832, + "step": 18073 + }, + { + "epoch": 0.5306829526102531, + "grad_norm": 0.0, + "learning_rate": 9.492405391051321e-06, + "loss": 1.3164, + "step": 18074 + }, + { + "epoch": 0.5307123142873921, + "grad_norm": 0.0, + "learning_rate": 9.491455658861092e-06, + "loss": 1.292, + "step": 18075 + }, + { + "epoch": 0.5307416759645311, + "grad_norm": 0.0, + "learning_rate": 9.490505931269758e-06, + "loss": 1.3721, + "step": 18076 + }, + { + "epoch": 0.5307710376416701, + "grad_norm": 0.0, + "learning_rate": 9.489556208285911e-06, + "loss": 1.291, + "step": 18077 + }, + { + "epoch": 0.5308003993188091, + "grad_norm": 0.0, + "learning_rate": 9.488606489918135e-06, + "loss": 1.1875, + "step": 18078 + }, + { + "epoch": 0.5308297609959481, + "grad_norm": 0.0, + "learning_rate": 9.487656776175026e-06, + "loss": 1.2959, + "step": 18079 + }, + { + "epoch": 0.5308591226730871, + "grad_norm": 0.0, + "learning_rate": 9.486707067065161e-06, + "loss": 1.2607, + "step": 18080 + }, + { + "epoch": 0.5308884843502261, + "grad_norm": 0.0, + "learning_rate": 9.485757362597143e-06, + "loss": 1.4062, + "step": 18081 + }, + { + "epoch": 0.5309178460273651, + "grad_norm": 0.0, + "learning_rate": 9.484807662779551e-06, + "loss": 1.2842, + "step": 18082 + }, + { + "epoch": 0.5309472077045041, + "grad_norm": 0.0, + "learning_rate": 9.483857967620972e-06, + "loss": 1.3848, + "step": 18083 + }, + { + "epoch": 0.5309765693816431, + "grad_norm": 0.0, + "learning_rate": 9.482908277130002e-06, + "loss": 1.1489, + "step": 18084 + }, + { + "epoch": 0.5310059310587821, + "grad_norm": 0.0, + "learning_rate": 9.481958591315218e-06, + "loss": 1.377, + "step": 18085 + }, + { + "epoch": 0.5310352927359211, + "grad_norm": 0.0, + "learning_rate": 9.481008910185221e-06, + "loss": 1.3008, + "step": 18086 + }, + { + "epoch": 0.53106465441306, + "grad_norm": 0.0, + "learning_rate": 9.480059233748592e-06, + "loss": 1.3594, + "step": 18087 + }, + { + "epoch": 0.531094016090199, + "grad_norm": 0.0, + "learning_rate": 9.479109562013921e-06, + "loss": 1.375, + "step": 18088 + }, + { + "epoch": 0.5311233777673381, + "grad_norm": 0.0, + "learning_rate": 9.478159894989792e-06, + "loss": 1.3418, + "step": 18089 + }, + { + "epoch": 0.531152739444477, + "grad_norm": 0.0, + "learning_rate": 9.477210232684801e-06, + "loss": 1.2783, + "step": 18090 + }, + { + "epoch": 0.531182101121616, + "grad_norm": 0.0, + "learning_rate": 9.47626057510753e-06, + "loss": 1.3037, + "step": 18091 + }, + { + "epoch": 0.5312114627987551, + "grad_norm": 0.0, + "learning_rate": 9.47531092226657e-06, + "loss": 1.4014, + "step": 18092 + }, + { + "epoch": 0.531240824475894, + "grad_norm": 0.0, + "learning_rate": 9.47436127417051e-06, + "loss": 1.2773, + "step": 18093 + }, + { + "epoch": 0.531270186153033, + "grad_norm": 0.0, + "learning_rate": 9.47341163082793e-06, + "loss": 1.3042, + "step": 18094 + }, + { + "epoch": 0.5312995478301721, + "grad_norm": 0.0, + "learning_rate": 9.472461992247427e-06, + "loss": 1.3877, + "step": 18095 + }, + { + "epoch": 0.531328909507311, + "grad_norm": 0.0, + "learning_rate": 9.471512358437582e-06, + "loss": 1.1748, + "step": 18096 + }, + { + "epoch": 0.53135827118445, + "grad_norm": 0.0, + "learning_rate": 9.47056272940699e-06, + "loss": 1.373, + "step": 18097 + }, + { + "epoch": 0.5313876328615891, + "grad_norm": 0.0, + "learning_rate": 9.46961310516423e-06, + "loss": 1.291, + "step": 18098 + }, + { + "epoch": 0.531416994538728, + "grad_norm": 0.0, + "learning_rate": 9.4686634857179e-06, + "loss": 1.3809, + "step": 18099 + }, + { + "epoch": 0.531446356215867, + "grad_norm": 0.0, + "learning_rate": 9.467713871076579e-06, + "loss": 1.2988, + "step": 18100 + }, + { + "epoch": 0.5314757178930061, + "grad_norm": 0.0, + "learning_rate": 9.46676426124886e-06, + "loss": 1.4062, + "step": 18101 + }, + { + "epoch": 0.531505079570145, + "grad_norm": 0.0, + "learning_rate": 9.465814656243328e-06, + "loss": 1.3027, + "step": 18102 + }, + { + "epoch": 0.531534441247284, + "grad_norm": 0.0, + "learning_rate": 9.464865056068567e-06, + "loss": 1.2979, + "step": 18103 + }, + { + "epoch": 0.5315638029244231, + "grad_norm": 0.0, + "learning_rate": 9.463915460733176e-06, + "loss": 1.4414, + "step": 18104 + }, + { + "epoch": 0.531593164601562, + "grad_norm": 0.0, + "learning_rate": 9.462965870245726e-06, + "loss": 1.3818, + "step": 18105 + }, + { + "epoch": 0.531622526278701, + "grad_norm": 0.0, + "learning_rate": 9.46201628461482e-06, + "loss": 1.3008, + "step": 18106 + }, + { + "epoch": 0.5316518879558401, + "grad_norm": 0.0, + "learning_rate": 9.461066703849033e-06, + "loss": 1.2227, + "step": 18107 + }, + { + "epoch": 0.531681249632979, + "grad_norm": 0.0, + "learning_rate": 9.460117127956962e-06, + "loss": 1.3721, + "step": 18108 + }, + { + "epoch": 0.531710611310118, + "grad_norm": 0.0, + "learning_rate": 9.459167556947186e-06, + "loss": 1.3145, + "step": 18109 + }, + { + "epoch": 0.5317399729872571, + "grad_norm": 0.0, + "learning_rate": 9.4582179908283e-06, + "loss": 1.2598, + "step": 18110 + }, + { + "epoch": 0.531769334664396, + "grad_norm": 0.0, + "learning_rate": 9.457268429608888e-06, + "loss": 1.3252, + "step": 18111 + }, + { + "epoch": 0.531798696341535, + "grad_norm": 0.0, + "learning_rate": 9.456318873297534e-06, + "loss": 1.2393, + "step": 18112 + }, + { + "epoch": 0.531828058018674, + "grad_norm": 0.0, + "learning_rate": 9.45536932190283e-06, + "loss": 1.3467, + "step": 18113 + }, + { + "epoch": 0.531857419695813, + "grad_norm": 0.0, + "learning_rate": 9.454419775433356e-06, + "loss": 1.3828, + "step": 18114 + }, + { + "epoch": 0.531886781372952, + "grad_norm": 0.0, + "learning_rate": 9.45347023389771e-06, + "loss": 1.2734, + "step": 18115 + }, + { + "epoch": 0.531916143050091, + "grad_norm": 0.0, + "learning_rate": 9.452520697304467e-06, + "loss": 1.3867, + "step": 18116 + }, + { + "epoch": 0.53194550472723, + "grad_norm": 0.0, + "learning_rate": 9.451571165662225e-06, + "loss": 1.2285, + "step": 18117 + }, + { + "epoch": 0.531974866404369, + "grad_norm": 0.0, + "learning_rate": 9.45062163897956e-06, + "loss": 1.4014, + "step": 18118 + }, + { + "epoch": 0.532004228081508, + "grad_norm": 0.0, + "learning_rate": 9.449672117265068e-06, + "loss": 1.2812, + "step": 18119 + }, + { + "epoch": 0.532033589758647, + "grad_norm": 0.0, + "learning_rate": 9.448722600527328e-06, + "loss": 1.3262, + "step": 18120 + }, + { + "epoch": 0.532062951435786, + "grad_norm": 0.0, + "learning_rate": 9.447773088774937e-06, + "loss": 1.4131, + "step": 18121 + }, + { + "epoch": 0.532092313112925, + "grad_norm": 0.0, + "learning_rate": 9.446823582016472e-06, + "loss": 1.3462, + "step": 18122 + }, + { + "epoch": 0.532121674790064, + "grad_norm": 0.0, + "learning_rate": 9.445874080260522e-06, + "loss": 1.415, + "step": 18123 + }, + { + "epoch": 0.532151036467203, + "grad_norm": 0.0, + "learning_rate": 9.444924583515675e-06, + "loss": 1.4141, + "step": 18124 + }, + { + "epoch": 0.532180398144342, + "grad_norm": 0.0, + "learning_rate": 9.443975091790517e-06, + "loss": 1.4229, + "step": 18125 + }, + { + "epoch": 0.532209759821481, + "grad_norm": 0.0, + "learning_rate": 9.443025605093638e-06, + "loss": 1.2461, + "step": 18126 + }, + { + "epoch": 0.53223912149862, + "grad_norm": 0.0, + "learning_rate": 9.442076123433614e-06, + "loss": 1.4121, + "step": 18127 + }, + { + "epoch": 0.532268483175759, + "grad_norm": 0.0, + "learning_rate": 9.441126646819046e-06, + "loss": 1.3281, + "step": 18128 + }, + { + "epoch": 0.532297844852898, + "grad_norm": 0.0, + "learning_rate": 9.440177175258508e-06, + "loss": 1.25, + "step": 18129 + }, + { + "epoch": 0.532327206530037, + "grad_norm": 0.0, + "learning_rate": 9.439227708760594e-06, + "loss": 1.3799, + "step": 18130 + }, + { + "epoch": 0.532356568207176, + "grad_norm": 0.0, + "learning_rate": 9.438278247333889e-06, + "loss": 1.2915, + "step": 18131 + }, + { + "epoch": 0.532385929884315, + "grad_norm": 0.0, + "learning_rate": 9.437328790986975e-06, + "loss": 1.3848, + "step": 18132 + }, + { + "epoch": 0.532415291561454, + "grad_norm": 0.0, + "learning_rate": 9.436379339728441e-06, + "loss": 1.3188, + "step": 18133 + }, + { + "epoch": 0.532444653238593, + "grad_norm": 0.0, + "learning_rate": 9.43542989356687e-06, + "loss": 1.3369, + "step": 18134 + }, + { + "epoch": 0.532474014915732, + "grad_norm": 0.0, + "learning_rate": 9.434480452510855e-06, + "loss": 1.3965, + "step": 18135 + }, + { + "epoch": 0.532503376592871, + "grad_norm": 0.0, + "learning_rate": 9.433531016568976e-06, + "loss": 1.3135, + "step": 18136 + }, + { + "epoch": 0.53253273827001, + "grad_norm": 0.0, + "learning_rate": 9.432581585749824e-06, + "loss": 1.3359, + "step": 18137 + }, + { + "epoch": 0.532562099947149, + "grad_norm": 0.0, + "learning_rate": 9.431632160061977e-06, + "loss": 1.3613, + "step": 18138 + }, + { + "epoch": 0.5325914616242879, + "grad_norm": 0.0, + "learning_rate": 9.43068273951403e-06, + "loss": 1.2109, + "step": 18139 + }, + { + "epoch": 0.532620823301427, + "grad_norm": 0.0, + "learning_rate": 9.429733324114564e-06, + "loss": 1.2314, + "step": 18140 + }, + { + "epoch": 0.532650184978566, + "grad_norm": 0.0, + "learning_rate": 9.428783913872167e-06, + "loss": 1.4277, + "step": 18141 + }, + { + "epoch": 0.5326795466557049, + "grad_norm": 0.0, + "learning_rate": 9.427834508795423e-06, + "loss": 1.2744, + "step": 18142 + }, + { + "epoch": 0.532708908332844, + "grad_norm": 0.0, + "learning_rate": 9.426885108892914e-06, + "loss": 1.3047, + "step": 18143 + }, + { + "epoch": 0.532738270009983, + "grad_norm": 0.0, + "learning_rate": 9.425935714173234e-06, + "loss": 1.3652, + "step": 18144 + }, + { + "epoch": 0.5327676316871219, + "grad_norm": 0.0, + "learning_rate": 9.424986324644963e-06, + "loss": 1.2861, + "step": 18145 + }, + { + "epoch": 0.532796993364261, + "grad_norm": 0.0, + "learning_rate": 9.42403694031669e-06, + "loss": 1.3438, + "step": 18146 + }, + { + "epoch": 0.5328263550414, + "grad_norm": 0.0, + "learning_rate": 9.423087561196992e-06, + "loss": 1.3818, + "step": 18147 + }, + { + "epoch": 0.5328557167185389, + "grad_norm": 0.0, + "learning_rate": 9.422138187294466e-06, + "loss": 1.1899, + "step": 18148 + }, + { + "epoch": 0.532885078395678, + "grad_norm": 0.0, + "learning_rate": 9.421188818617692e-06, + "loss": 1.2358, + "step": 18149 + }, + { + "epoch": 0.532914440072817, + "grad_norm": 0.0, + "learning_rate": 9.420239455175257e-06, + "loss": 1.2188, + "step": 18150 + }, + { + "epoch": 0.5329438017499559, + "grad_norm": 0.0, + "learning_rate": 9.419290096975746e-06, + "loss": 1.4307, + "step": 18151 + }, + { + "epoch": 0.532973163427095, + "grad_norm": 0.0, + "learning_rate": 9.418340744027736e-06, + "loss": 1.3179, + "step": 18152 + }, + { + "epoch": 0.533002525104234, + "grad_norm": 0.0, + "learning_rate": 9.417391396339827e-06, + "loss": 1.3896, + "step": 18153 + }, + { + "epoch": 0.5330318867813729, + "grad_norm": 0.0, + "learning_rate": 9.416442053920593e-06, + "loss": 1.3936, + "step": 18154 + }, + { + "epoch": 0.533061248458512, + "grad_norm": 0.0, + "learning_rate": 9.415492716778626e-06, + "loss": 1.3301, + "step": 18155 + }, + { + "epoch": 0.533090610135651, + "grad_norm": 0.0, + "learning_rate": 9.414543384922504e-06, + "loss": 1.3271, + "step": 18156 + }, + { + "epoch": 0.5331199718127899, + "grad_norm": 0.0, + "learning_rate": 9.41359405836082e-06, + "loss": 1.3496, + "step": 18157 + }, + { + "epoch": 0.533149333489929, + "grad_norm": 0.0, + "learning_rate": 9.412644737102152e-06, + "loss": 1.3711, + "step": 18158 + }, + { + "epoch": 0.533178695167068, + "grad_norm": 0.0, + "learning_rate": 9.411695421155091e-06, + "loss": 1.3018, + "step": 18159 + }, + { + "epoch": 0.5332080568442069, + "grad_norm": 0.0, + "learning_rate": 9.410746110528217e-06, + "loss": 1.2783, + "step": 18160 + }, + { + "epoch": 0.533237418521346, + "grad_norm": 0.0, + "learning_rate": 9.40979680523012e-06, + "loss": 1.3193, + "step": 18161 + }, + { + "epoch": 0.533266780198485, + "grad_norm": 0.0, + "learning_rate": 9.408847505269379e-06, + "loss": 1.4062, + "step": 18162 + }, + { + "epoch": 0.5332961418756239, + "grad_norm": 0.0, + "learning_rate": 9.40789821065458e-06, + "loss": 1.3555, + "step": 18163 + }, + { + "epoch": 0.533325503552763, + "grad_norm": 0.0, + "learning_rate": 9.406948921394311e-06, + "loss": 1.4531, + "step": 18164 + }, + { + "epoch": 0.5333548652299019, + "grad_norm": 0.0, + "learning_rate": 9.405999637497154e-06, + "loss": 1.3506, + "step": 18165 + }, + { + "epoch": 0.5333842269070409, + "grad_norm": 0.0, + "learning_rate": 9.405050358971696e-06, + "loss": 1.292, + "step": 18166 + }, + { + "epoch": 0.53341358858418, + "grad_norm": 0.0, + "learning_rate": 9.404101085826516e-06, + "loss": 1.2764, + "step": 18167 + }, + { + "epoch": 0.5334429502613189, + "grad_norm": 0.0, + "learning_rate": 9.403151818070207e-06, + "loss": 1.3564, + "step": 18168 + }, + { + "epoch": 0.5334723119384579, + "grad_norm": 0.0, + "learning_rate": 9.402202555711345e-06, + "loss": 1.3037, + "step": 18169 + }, + { + "epoch": 0.533501673615597, + "grad_norm": 0.0, + "learning_rate": 9.401253298758523e-06, + "loss": 1.3467, + "step": 18170 + }, + { + "epoch": 0.5335310352927359, + "grad_norm": 0.0, + "learning_rate": 9.40030404722032e-06, + "loss": 1.3691, + "step": 18171 + }, + { + "epoch": 0.5335603969698749, + "grad_norm": 0.0, + "learning_rate": 9.399354801105316e-06, + "loss": 1.3213, + "step": 18172 + }, + { + "epoch": 0.533589758647014, + "grad_norm": 0.0, + "learning_rate": 9.398405560422106e-06, + "loss": 1.1943, + "step": 18173 + }, + { + "epoch": 0.5336191203241529, + "grad_norm": 0.0, + "learning_rate": 9.397456325179264e-06, + "loss": 1.3604, + "step": 18174 + }, + { + "epoch": 0.5336484820012919, + "grad_norm": 0.0, + "learning_rate": 9.396507095385383e-06, + "loss": 1.1763, + "step": 18175 + }, + { + "epoch": 0.533677843678431, + "grad_norm": 0.0, + "learning_rate": 9.39555787104904e-06, + "loss": 1.3281, + "step": 18176 + }, + { + "epoch": 0.5337072053555699, + "grad_norm": 0.0, + "learning_rate": 9.394608652178823e-06, + "loss": 1.3955, + "step": 18177 + }, + { + "epoch": 0.5337365670327089, + "grad_norm": 0.0, + "learning_rate": 9.393659438783315e-06, + "loss": 1.333, + "step": 18178 + }, + { + "epoch": 0.533765928709848, + "grad_norm": 0.0, + "learning_rate": 9.392710230871101e-06, + "loss": 1.3223, + "step": 18179 + }, + { + "epoch": 0.5337952903869869, + "grad_norm": 0.0, + "learning_rate": 9.391761028450765e-06, + "loss": 1.4297, + "step": 18180 + }, + { + "epoch": 0.5338246520641259, + "grad_norm": 0.0, + "learning_rate": 9.390811831530884e-06, + "loss": 1.3496, + "step": 18181 + }, + { + "epoch": 0.533854013741265, + "grad_norm": 0.0, + "learning_rate": 9.389862640120055e-06, + "loss": 1.2104, + "step": 18182 + }, + { + "epoch": 0.5338833754184039, + "grad_norm": 0.0, + "learning_rate": 9.388913454226847e-06, + "loss": 1.2788, + "step": 18183 + }, + { + "epoch": 0.5339127370955429, + "grad_norm": 0.0, + "learning_rate": 9.387964273859857e-06, + "loss": 1.3154, + "step": 18184 + }, + { + "epoch": 0.533942098772682, + "grad_norm": 0.0, + "learning_rate": 9.38701509902766e-06, + "loss": 1.252, + "step": 18185 + }, + { + "epoch": 0.5339714604498209, + "grad_norm": 0.0, + "learning_rate": 9.386065929738844e-06, + "loss": 1.3584, + "step": 18186 + }, + { + "epoch": 0.5340008221269599, + "grad_norm": 0.0, + "learning_rate": 9.385116766001987e-06, + "loss": 1.3174, + "step": 18187 + }, + { + "epoch": 0.5340301838040988, + "grad_norm": 0.0, + "learning_rate": 9.384167607825683e-06, + "loss": 1.3418, + "step": 18188 + }, + { + "epoch": 0.5340595454812379, + "grad_norm": 0.0, + "learning_rate": 9.383218455218504e-06, + "loss": 1.3145, + "step": 18189 + }, + { + "epoch": 0.5340889071583769, + "grad_norm": 0.0, + "learning_rate": 9.382269308189043e-06, + "loss": 1.1963, + "step": 18190 + }, + { + "epoch": 0.5341182688355158, + "grad_norm": 0.0, + "learning_rate": 9.381320166745877e-06, + "loss": 1.4297, + "step": 18191 + }, + { + "epoch": 0.5341476305126549, + "grad_norm": 0.0, + "learning_rate": 9.380371030897587e-06, + "loss": 1.3691, + "step": 18192 + }, + { + "epoch": 0.5341769921897939, + "grad_norm": 0.0, + "learning_rate": 9.379421900652765e-06, + "loss": 1.2275, + "step": 18193 + }, + { + "epoch": 0.5342063538669328, + "grad_norm": 0.0, + "learning_rate": 9.378472776019987e-06, + "loss": 1.2178, + "step": 18194 + }, + { + "epoch": 0.5342357155440719, + "grad_norm": 0.0, + "learning_rate": 9.377523657007842e-06, + "loss": 1.2197, + "step": 18195 + }, + { + "epoch": 0.5342650772212109, + "grad_norm": 0.0, + "learning_rate": 9.376574543624905e-06, + "loss": 1.3486, + "step": 18196 + }, + { + "epoch": 0.5342944388983498, + "grad_norm": 0.0, + "learning_rate": 9.375625435879769e-06, + "loss": 1.2754, + "step": 18197 + }, + { + "epoch": 0.5343238005754889, + "grad_norm": 0.0, + "learning_rate": 9.37467633378101e-06, + "loss": 1.208, + "step": 18198 + }, + { + "epoch": 0.5343531622526279, + "grad_norm": 0.0, + "learning_rate": 9.373727237337216e-06, + "loss": 1.293, + "step": 18199 + }, + { + "epoch": 0.5343825239297668, + "grad_norm": 0.0, + "learning_rate": 9.372778146556965e-06, + "loss": 1.3281, + "step": 18200 + }, + { + "epoch": 0.5344118856069059, + "grad_norm": 0.0, + "learning_rate": 9.371829061448839e-06, + "loss": 1.25, + "step": 18201 + }, + { + "epoch": 0.5344412472840449, + "grad_norm": 0.0, + "learning_rate": 9.370879982021426e-06, + "loss": 1.2471, + "step": 18202 + }, + { + "epoch": 0.5344706089611838, + "grad_norm": 0.0, + "learning_rate": 9.369930908283306e-06, + "loss": 1.269, + "step": 18203 + }, + { + "epoch": 0.5344999706383229, + "grad_norm": 0.0, + "learning_rate": 9.368981840243064e-06, + "loss": 1.2871, + "step": 18204 + }, + { + "epoch": 0.5345293323154618, + "grad_norm": 0.0, + "learning_rate": 9.368032777909277e-06, + "loss": 1.2871, + "step": 18205 + }, + { + "epoch": 0.5345586939926008, + "grad_norm": 0.0, + "learning_rate": 9.367083721290536e-06, + "loss": 1.3398, + "step": 18206 + }, + { + "epoch": 0.5345880556697399, + "grad_norm": 0.0, + "learning_rate": 9.366134670395415e-06, + "loss": 1.4248, + "step": 18207 + }, + { + "epoch": 0.5346174173468788, + "grad_norm": 0.0, + "learning_rate": 9.365185625232505e-06, + "loss": 1.2505, + "step": 18208 + }, + { + "epoch": 0.5346467790240178, + "grad_norm": 0.0, + "learning_rate": 9.36423658581038e-06, + "loss": 1.2305, + "step": 18209 + }, + { + "epoch": 0.5346761407011569, + "grad_norm": 0.0, + "learning_rate": 9.363287552137629e-06, + "loss": 1.3369, + "step": 18210 + }, + { + "epoch": 0.5347055023782958, + "grad_norm": 0.0, + "learning_rate": 9.362338524222832e-06, + "loss": 1.2222, + "step": 18211 + }, + { + "epoch": 0.5347348640554348, + "grad_norm": 0.0, + "learning_rate": 9.361389502074566e-06, + "loss": 1.2979, + "step": 18212 + }, + { + "epoch": 0.5347642257325739, + "grad_norm": 0.0, + "learning_rate": 9.360440485701423e-06, + "loss": 1.2881, + "step": 18213 + }, + { + "epoch": 0.5347935874097128, + "grad_norm": 0.0, + "learning_rate": 9.359491475111979e-06, + "loss": 1.3281, + "step": 18214 + }, + { + "epoch": 0.5348229490868518, + "grad_norm": 0.0, + "learning_rate": 9.35854247031482e-06, + "loss": 1.3262, + "step": 18215 + }, + { + "epoch": 0.5348523107639909, + "grad_norm": 0.0, + "learning_rate": 9.35759347131852e-06, + "loss": 1.4248, + "step": 18216 + }, + { + "epoch": 0.5348816724411298, + "grad_norm": 0.0, + "learning_rate": 9.356644478131672e-06, + "loss": 1.3281, + "step": 18217 + }, + { + "epoch": 0.5349110341182688, + "grad_norm": 0.0, + "learning_rate": 9.35569549076285e-06, + "loss": 1.3496, + "step": 18218 + }, + { + "epoch": 0.5349403957954079, + "grad_norm": 0.0, + "learning_rate": 9.35474650922064e-06, + "loss": 1.251, + "step": 18219 + }, + { + "epoch": 0.5349697574725468, + "grad_norm": 0.0, + "learning_rate": 9.353797533513626e-06, + "loss": 1.3301, + "step": 18220 + }, + { + "epoch": 0.5349991191496858, + "grad_norm": 0.0, + "learning_rate": 9.352848563650378e-06, + "loss": 1.3818, + "step": 18221 + }, + { + "epoch": 0.5350284808268249, + "grad_norm": 0.0, + "learning_rate": 9.351899599639494e-06, + "loss": 1.4258, + "step": 18222 + }, + { + "epoch": 0.5350578425039638, + "grad_norm": 0.0, + "learning_rate": 9.350950641489544e-06, + "loss": 1.2451, + "step": 18223 + }, + { + "epoch": 0.5350872041811028, + "grad_norm": 0.0, + "learning_rate": 9.350001689209115e-06, + "loss": 1.3125, + "step": 18224 + }, + { + "epoch": 0.5351165658582419, + "grad_norm": 0.0, + "learning_rate": 9.349052742806784e-06, + "loss": 1.2734, + "step": 18225 + }, + { + "epoch": 0.5351459275353808, + "grad_norm": 0.0, + "learning_rate": 9.34810380229114e-06, + "loss": 1.4326, + "step": 18226 + }, + { + "epoch": 0.5351752892125198, + "grad_norm": 0.0, + "learning_rate": 9.347154867670758e-06, + "loss": 1.3408, + "step": 18227 + }, + { + "epoch": 0.5352046508896589, + "grad_norm": 0.0, + "learning_rate": 9.346205938954224e-06, + "loss": 1.2715, + "step": 18228 + }, + { + "epoch": 0.5352340125667978, + "grad_norm": 0.0, + "learning_rate": 9.345257016150112e-06, + "loss": 1.3613, + "step": 18229 + }, + { + "epoch": 0.5352633742439368, + "grad_norm": 0.0, + "learning_rate": 9.344308099267013e-06, + "loss": 1.2793, + "step": 18230 + }, + { + "epoch": 0.5352927359210758, + "grad_norm": 0.0, + "learning_rate": 9.343359188313504e-06, + "loss": 1.2949, + "step": 18231 + }, + { + "epoch": 0.5353220975982148, + "grad_norm": 0.0, + "learning_rate": 9.342410283298165e-06, + "loss": 1.1899, + "step": 18232 + }, + { + "epoch": 0.5353514592753538, + "grad_norm": 0.0, + "learning_rate": 9.341461384229578e-06, + "loss": 1.2607, + "step": 18233 + }, + { + "epoch": 0.5353808209524928, + "grad_norm": 0.0, + "learning_rate": 9.340512491116324e-06, + "loss": 1.3379, + "step": 18234 + }, + { + "epoch": 0.5354101826296318, + "grad_norm": 0.0, + "learning_rate": 9.339563603966987e-06, + "loss": 1.3984, + "step": 18235 + }, + { + "epoch": 0.5354395443067708, + "grad_norm": 0.0, + "learning_rate": 9.33861472279014e-06, + "loss": 1.3145, + "step": 18236 + }, + { + "epoch": 0.5354689059839098, + "grad_norm": 0.0, + "learning_rate": 9.337665847594376e-06, + "loss": 1.1729, + "step": 18237 + }, + { + "epoch": 0.5354982676610488, + "grad_norm": 0.0, + "learning_rate": 9.336716978388264e-06, + "loss": 1.2666, + "step": 18238 + }, + { + "epoch": 0.5355276293381878, + "grad_norm": 0.0, + "learning_rate": 9.335768115180394e-06, + "loss": 1.2607, + "step": 18239 + }, + { + "epoch": 0.5355569910153268, + "grad_norm": 0.0, + "learning_rate": 9.334819257979342e-06, + "loss": 1.2783, + "step": 18240 + }, + { + "epoch": 0.5355863526924658, + "grad_norm": 0.0, + "learning_rate": 9.333870406793687e-06, + "loss": 1.2568, + "step": 18241 + }, + { + "epoch": 0.5356157143696048, + "grad_norm": 0.0, + "learning_rate": 9.332921561632015e-06, + "loss": 1.2051, + "step": 18242 + }, + { + "epoch": 0.5356450760467438, + "grad_norm": 0.0, + "learning_rate": 9.331972722502903e-06, + "loss": 1.3154, + "step": 18243 + }, + { + "epoch": 0.5356744377238828, + "grad_norm": 0.0, + "learning_rate": 9.331023889414936e-06, + "loss": 1.3789, + "step": 18244 + }, + { + "epoch": 0.5357037994010218, + "grad_norm": 0.0, + "learning_rate": 9.330075062376686e-06, + "loss": 1.2725, + "step": 18245 + }, + { + "epoch": 0.5357331610781608, + "grad_norm": 0.0, + "learning_rate": 9.329126241396743e-06, + "loss": 1.3926, + "step": 18246 + }, + { + "epoch": 0.5357625227552998, + "grad_norm": 0.0, + "learning_rate": 9.328177426483681e-06, + "loss": 1.3105, + "step": 18247 + }, + { + "epoch": 0.5357918844324387, + "grad_norm": 0.0, + "learning_rate": 9.327228617646084e-06, + "loss": 1.5078, + "step": 18248 + }, + { + "epoch": 0.5358212461095778, + "grad_norm": 0.0, + "learning_rate": 9.326279814892533e-06, + "loss": 1.2324, + "step": 18249 + }, + { + "epoch": 0.5358506077867168, + "grad_norm": 0.0, + "learning_rate": 9.3253310182316e-06, + "loss": 1.3506, + "step": 18250 + }, + { + "epoch": 0.5358799694638557, + "grad_norm": 0.0, + "learning_rate": 9.324382227671876e-06, + "loss": 1.2617, + "step": 18251 + }, + { + "epoch": 0.5359093311409948, + "grad_norm": 0.0, + "learning_rate": 9.323433443221936e-06, + "loss": 1.333, + "step": 18252 + }, + { + "epoch": 0.5359386928181338, + "grad_norm": 0.0, + "learning_rate": 9.32248466489036e-06, + "loss": 1.4385, + "step": 18253 + }, + { + "epoch": 0.5359680544952727, + "grad_norm": 0.0, + "learning_rate": 9.321535892685727e-06, + "loss": 1.4385, + "step": 18254 + }, + { + "epoch": 0.5359974161724118, + "grad_norm": 0.0, + "learning_rate": 9.32058712661662e-06, + "loss": 1.3262, + "step": 18255 + }, + { + "epoch": 0.5360267778495508, + "grad_norm": 0.0, + "learning_rate": 9.319638366691619e-06, + "loss": 1.373, + "step": 18256 + }, + { + "epoch": 0.5360561395266897, + "grad_norm": 0.0, + "learning_rate": 9.318689612919303e-06, + "loss": 1.4014, + "step": 18257 + }, + { + "epoch": 0.5360855012038288, + "grad_norm": 0.0, + "learning_rate": 9.317740865308247e-06, + "loss": 1.4219, + "step": 18258 + }, + { + "epoch": 0.5361148628809678, + "grad_norm": 0.0, + "learning_rate": 9.316792123867042e-06, + "loss": 1.2607, + "step": 18259 + }, + { + "epoch": 0.5361442245581067, + "grad_norm": 0.0, + "learning_rate": 9.315843388604258e-06, + "loss": 1.2812, + "step": 18260 + }, + { + "epoch": 0.5361735862352458, + "grad_norm": 0.0, + "learning_rate": 9.314894659528472e-06, + "loss": 1.2725, + "step": 18261 + }, + { + "epoch": 0.5362029479123848, + "grad_norm": 0.0, + "learning_rate": 9.313945936648276e-06, + "loss": 1.2158, + "step": 18262 + }, + { + "epoch": 0.5362323095895237, + "grad_norm": 0.0, + "learning_rate": 9.31299721997224e-06, + "loss": 1.3301, + "step": 18263 + }, + { + "epoch": 0.5362616712666628, + "grad_norm": 0.0, + "learning_rate": 9.312048509508947e-06, + "loss": 1.2588, + "step": 18264 + }, + { + "epoch": 0.5362910329438018, + "grad_norm": 0.0, + "learning_rate": 9.311099805266971e-06, + "loss": 1.3975, + "step": 18265 + }, + { + "epoch": 0.5363203946209407, + "grad_norm": 0.0, + "learning_rate": 9.310151107254902e-06, + "loss": 1.2905, + "step": 18266 + }, + { + "epoch": 0.5363497562980798, + "grad_norm": 0.0, + "learning_rate": 9.309202415481308e-06, + "loss": 1.3408, + "step": 18267 + }, + { + "epoch": 0.5363791179752188, + "grad_norm": 0.0, + "learning_rate": 9.308253729954778e-06, + "loss": 1.2832, + "step": 18268 + }, + { + "epoch": 0.5364084796523577, + "grad_norm": 0.0, + "learning_rate": 9.307305050683887e-06, + "loss": 1.2705, + "step": 18269 + }, + { + "epoch": 0.5364378413294968, + "grad_norm": 0.0, + "learning_rate": 9.306356377677208e-06, + "loss": 1.3857, + "step": 18270 + }, + { + "epoch": 0.5364672030066358, + "grad_norm": 0.0, + "learning_rate": 9.305407710943331e-06, + "loss": 1.3018, + "step": 18271 + }, + { + "epoch": 0.5364965646837747, + "grad_norm": 0.0, + "learning_rate": 9.304459050490828e-06, + "loss": 1.3604, + "step": 18272 + }, + { + "epoch": 0.5365259263609138, + "grad_norm": 0.0, + "learning_rate": 9.303510396328282e-06, + "loss": 1.2349, + "step": 18273 + }, + { + "epoch": 0.5365552880380527, + "grad_norm": 0.0, + "learning_rate": 9.302561748464265e-06, + "loss": 1.4238, + "step": 18274 + }, + { + "epoch": 0.5365846497151917, + "grad_norm": 0.0, + "learning_rate": 9.301613106907365e-06, + "loss": 1.2793, + "step": 18275 + }, + { + "epoch": 0.5366140113923308, + "grad_norm": 0.0, + "learning_rate": 9.300664471666155e-06, + "loss": 1.2793, + "step": 18276 + }, + { + "epoch": 0.5366433730694697, + "grad_norm": 0.0, + "learning_rate": 9.299715842749218e-06, + "loss": 1.2852, + "step": 18277 + }, + { + "epoch": 0.5366727347466087, + "grad_norm": 0.0, + "learning_rate": 9.298767220165125e-06, + "loss": 1.3848, + "step": 18278 + }, + { + "epoch": 0.5367020964237478, + "grad_norm": 0.0, + "learning_rate": 9.297818603922464e-06, + "loss": 1.2842, + "step": 18279 + }, + { + "epoch": 0.5367314581008867, + "grad_norm": 0.0, + "learning_rate": 9.296869994029807e-06, + "loss": 1.3438, + "step": 18280 + }, + { + "epoch": 0.5367608197780257, + "grad_norm": 0.0, + "learning_rate": 9.295921390495735e-06, + "loss": 1.2705, + "step": 18281 + }, + { + "epoch": 0.5367901814551648, + "grad_norm": 0.0, + "learning_rate": 9.294972793328829e-06, + "loss": 1.3838, + "step": 18282 + }, + { + "epoch": 0.5368195431323037, + "grad_norm": 0.0, + "learning_rate": 9.29402420253766e-06, + "loss": 1.415, + "step": 18283 + }, + { + "epoch": 0.5368489048094427, + "grad_norm": 0.0, + "learning_rate": 9.293075618130815e-06, + "loss": 1.3115, + "step": 18284 + }, + { + "epoch": 0.5368782664865818, + "grad_norm": 0.0, + "learning_rate": 9.292127040116864e-06, + "loss": 1.2471, + "step": 18285 + }, + { + "epoch": 0.5369076281637207, + "grad_norm": 0.0, + "learning_rate": 9.291178468504392e-06, + "loss": 1.2168, + "step": 18286 + }, + { + "epoch": 0.5369369898408597, + "grad_norm": 0.0, + "learning_rate": 9.290229903301975e-06, + "loss": 1.3018, + "step": 18287 + }, + { + "epoch": 0.5369663515179988, + "grad_norm": 0.0, + "learning_rate": 9.28928134451819e-06, + "loss": 1.2656, + "step": 18288 + }, + { + "epoch": 0.5369957131951377, + "grad_norm": 0.0, + "learning_rate": 9.288332792161619e-06, + "loss": 1.292, + "step": 18289 + }, + { + "epoch": 0.5370250748722767, + "grad_norm": 0.0, + "learning_rate": 9.287384246240831e-06, + "loss": 1.2402, + "step": 18290 + }, + { + "epoch": 0.5370544365494156, + "grad_norm": 0.0, + "learning_rate": 9.286435706764414e-06, + "loss": 1.2969, + "step": 18291 + }, + { + "epoch": 0.5370837982265547, + "grad_norm": 0.0, + "learning_rate": 9.285487173740939e-06, + "loss": 1.2236, + "step": 18292 + }, + { + "epoch": 0.5371131599036937, + "grad_norm": 0.0, + "learning_rate": 9.284538647178989e-06, + "loss": 1.3154, + "step": 18293 + }, + { + "epoch": 0.5371425215808326, + "grad_norm": 0.0, + "learning_rate": 9.283590127087136e-06, + "loss": 1.2451, + "step": 18294 + }, + { + "epoch": 0.5371718832579717, + "grad_norm": 0.0, + "learning_rate": 9.282641613473963e-06, + "loss": 1.3271, + "step": 18295 + }, + { + "epoch": 0.5372012449351107, + "grad_norm": 0.0, + "learning_rate": 9.281693106348045e-06, + "loss": 1.3643, + "step": 18296 + }, + { + "epoch": 0.5372306066122496, + "grad_norm": 0.0, + "learning_rate": 9.280744605717961e-06, + "loss": 1.2637, + "step": 18297 + }, + { + "epoch": 0.5372599682893887, + "grad_norm": 0.0, + "learning_rate": 9.279796111592289e-06, + "loss": 1.2979, + "step": 18298 + }, + { + "epoch": 0.5372893299665277, + "grad_norm": 0.0, + "learning_rate": 9.278847623979601e-06, + "loss": 1.2402, + "step": 18299 + }, + { + "epoch": 0.5373186916436666, + "grad_norm": 0.0, + "learning_rate": 9.277899142888484e-06, + "loss": 1.3125, + "step": 18300 + }, + { + "epoch": 0.5373480533208057, + "grad_norm": 0.0, + "learning_rate": 9.276950668327506e-06, + "loss": 1.4004, + "step": 18301 + }, + { + "epoch": 0.5373774149979447, + "grad_norm": 0.0, + "learning_rate": 9.276002200305251e-06, + "loss": 1.1855, + "step": 18302 + }, + { + "epoch": 0.5374067766750836, + "grad_norm": 0.0, + "learning_rate": 9.275053738830289e-06, + "loss": 1.0913, + "step": 18303 + }, + { + "epoch": 0.5374361383522227, + "grad_norm": 0.0, + "learning_rate": 9.274105283911206e-06, + "loss": 1.3701, + "step": 18304 + }, + { + "epoch": 0.5374655000293617, + "grad_norm": 0.0, + "learning_rate": 9.273156835556573e-06, + "loss": 1.2285, + "step": 18305 + }, + { + "epoch": 0.5374948617065006, + "grad_norm": 0.0, + "learning_rate": 9.272208393774971e-06, + "loss": 1.2261, + "step": 18306 + }, + { + "epoch": 0.5375242233836397, + "grad_norm": 0.0, + "learning_rate": 9.271259958574969e-06, + "loss": 1.3467, + "step": 18307 + }, + { + "epoch": 0.5375535850607787, + "grad_norm": 0.0, + "learning_rate": 9.270311529965157e-06, + "loss": 1.2305, + "step": 18308 + }, + { + "epoch": 0.5375829467379176, + "grad_norm": 0.0, + "learning_rate": 9.269363107954105e-06, + "loss": 1.2295, + "step": 18309 + }, + { + "epoch": 0.5376123084150567, + "grad_norm": 0.0, + "learning_rate": 9.268414692550383e-06, + "loss": 1.4072, + "step": 18310 + }, + { + "epoch": 0.5376416700921957, + "grad_norm": 0.0, + "learning_rate": 9.267466283762579e-06, + "loss": 1.2383, + "step": 18311 + }, + { + "epoch": 0.5376710317693346, + "grad_norm": 0.0, + "learning_rate": 9.266517881599264e-06, + "loss": 1.2607, + "step": 18312 + }, + { + "epoch": 0.5377003934464737, + "grad_norm": 0.0, + "learning_rate": 9.265569486069017e-06, + "loss": 1.2441, + "step": 18313 + }, + { + "epoch": 0.5377297551236127, + "grad_norm": 0.0, + "learning_rate": 9.264621097180409e-06, + "loss": 1.3145, + "step": 18314 + }, + { + "epoch": 0.5377591168007516, + "grad_norm": 0.0, + "learning_rate": 9.263672714942024e-06, + "loss": 1.251, + "step": 18315 + }, + { + "epoch": 0.5377884784778907, + "grad_norm": 0.0, + "learning_rate": 9.262724339362433e-06, + "loss": 1.249, + "step": 18316 + }, + { + "epoch": 0.5378178401550296, + "grad_norm": 0.0, + "learning_rate": 9.261775970450218e-06, + "loss": 1.3779, + "step": 18317 + }, + { + "epoch": 0.5378472018321686, + "grad_norm": 0.0, + "learning_rate": 9.260827608213951e-06, + "loss": 1.2627, + "step": 18318 + }, + { + "epoch": 0.5378765635093077, + "grad_norm": 0.0, + "learning_rate": 9.259879252662207e-06, + "loss": 1.2803, + "step": 18319 + }, + { + "epoch": 0.5379059251864466, + "grad_norm": 0.0, + "learning_rate": 9.258930903803566e-06, + "loss": 1.2358, + "step": 18320 + }, + { + "epoch": 0.5379352868635856, + "grad_norm": 0.0, + "learning_rate": 9.257982561646603e-06, + "loss": 1.3262, + "step": 18321 + }, + { + "epoch": 0.5379646485407247, + "grad_norm": 0.0, + "learning_rate": 9.257034226199894e-06, + "loss": 1.3008, + "step": 18322 + }, + { + "epoch": 0.5379940102178636, + "grad_norm": 0.0, + "learning_rate": 9.256085897472011e-06, + "loss": 1.3486, + "step": 18323 + }, + { + "epoch": 0.5380233718950026, + "grad_norm": 0.0, + "learning_rate": 9.255137575471538e-06, + "loss": 1.2705, + "step": 18324 + }, + { + "epoch": 0.5380527335721417, + "grad_norm": 0.0, + "learning_rate": 9.254189260207044e-06, + "loss": 1.4619, + "step": 18325 + }, + { + "epoch": 0.5380820952492806, + "grad_norm": 0.0, + "learning_rate": 9.25324095168711e-06, + "loss": 1.3242, + "step": 18326 + }, + { + "epoch": 0.5381114569264196, + "grad_norm": 0.0, + "learning_rate": 9.252292649920306e-06, + "loss": 1.2866, + "step": 18327 + }, + { + "epoch": 0.5381408186035587, + "grad_norm": 0.0, + "learning_rate": 9.251344354915215e-06, + "loss": 1.2891, + "step": 18328 + }, + { + "epoch": 0.5381701802806976, + "grad_norm": 0.0, + "learning_rate": 9.250396066680408e-06, + "loss": 1.2627, + "step": 18329 + }, + { + "epoch": 0.5381995419578366, + "grad_norm": 0.0, + "learning_rate": 9.249447785224459e-06, + "loss": 1.2993, + "step": 18330 + }, + { + "epoch": 0.5382289036349757, + "grad_norm": 0.0, + "learning_rate": 9.24849951055595e-06, + "loss": 1.3965, + "step": 18331 + }, + { + "epoch": 0.5382582653121146, + "grad_norm": 0.0, + "learning_rate": 9.247551242683446e-06, + "loss": 1.2949, + "step": 18332 + }, + { + "epoch": 0.5382876269892536, + "grad_norm": 0.0, + "learning_rate": 9.246602981615533e-06, + "loss": 1.3652, + "step": 18333 + }, + { + "epoch": 0.5383169886663927, + "grad_norm": 0.0, + "learning_rate": 9.245654727360783e-06, + "loss": 1.3096, + "step": 18334 + }, + { + "epoch": 0.5383463503435316, + "grad_norm": 0.0, + "learning_rate": 9.24470647992777e-06, + "loss": 1.1445, + "step": 18335 + }, + { + "epoch": 0.5383757120206706, + "grad_norm": 0.0, + "learning_rate": 9.243758239325064e-06, + "loss": 1.2178, + "step": 18336 + }, + { + "epoch": 0.5384050736978097, + "grad_norm": 0.0, + "learning_rate": 9.242810005561255e-06, + "loss": 1.4336, + "step": 18337 + }, + { + "epoch": 0.5384344353749486, + "grad_norm": 0.0, + "learning_rate": 9.241861778644907e-06, + "loss": 1.3496, + "step": 18338 + }, + { + "epoch": 0.5384637970520876, + "grad_norm": 0.0, + "learning_rate": 9.240913558584592e-06, + "loss": 1.3311, + "step": 18339 + }, + { + "epoch": 0.5384931587292267, + "grad_norm": 0.0, + "learning_rate": 9.239965345388894e-06, + "loss": 1.2412, + "step": 18340 + }, + { + "epoch": 0.5385225204063656, + "grad_norm": 0.0, + "learning_rate": 9.239017139066382e-06, + "loss": 1.2539, + "step": 18341 + }, + { + "epoch": 0.5385518820835046, + "grad_norm": 0.0, + "learning_rate": 9.238068939625637e-06, + "loss": 1.3281, + "step": 18342 + }, + { + "epoch": 0.5385812437606436, + "grad_norm": 0.0, + "learning_rate": 9.237120747075222e-06, + "loss": 1.3086, + "step": 18343 + }, + { + "epoch": 0.5386106054377826, + "grad_norm": 0.0, + "learning_rate": 9.236172561423727e-06, + "loss": 1.335, + "step": 18344 + }, + { + "epoch": 0.5386399671149216, + "grad_norm": 0.0, + "learning_rate": 9.235224382679716e-06, + "loss": 1.3779, + "step": 18345 + }, + { + "epoch": 0.5386693287920606, + "grad_norm": 0.0, + "learning_rate": 9.234276210851768e-06, + "loss": 1.2861, + "step": 18346 + }, + { + "epoch": 0.5386986904691996, + "grad_norm": 0.0, + "learning_rate": 9.233328045948451e-06, + "loss": 1.2559, + "step": 18347 + }, + { + "epoch": 0.5387280521463386, + "grad_norm": 0.0, + "learning_rate": 9.232379887978351e-06, + "loss": 1.1855, + "step": 18348 + }, + { + "epoch": 0.5387574138234776, + "grad_norm": 0.0, + "learning_rate": 9.231431736950036e-06, + "loss": 1.2998, + "step": 18349 + }, + { + "epoch": 0.5387867755006166, + "grad_norm": 0.0, + "learning_rate": 9.230483592872078e-06, + "loss": 1.3418, + "step": 18350 + }, + { + "epoch": 0.5388161371777556, + "grad_norm": 0.0, + "learning_rate": 9.229535455753056e-06, + "loss": 1.3652, + "step": 18351 + }, + { + "epoch": 0.5388454988548946, + "grad_norm": 0.0, + "learning_rate": 9.22858732560154e-06, + "loss": 1.3936, + "step": 18352 + }, + { + "epoch": 0.5388748605320336, + "grad_norm": 0.0, + "learning_rate": 9.227639202426108e-06, + "loss": 1.3623, + "step": 18353 + }, + { + "epoch": 0.5389042222091726, + "grad_norm": 0.0, + "learning_rate": 9.226691086235331e-06, + "loss": 1.1841, + "step": 18354 + }, + { + "epoch": 0.5389335838863116, + "grad_norm": 0.0, + "learning_rate": 9.225742977037787e-06, + "loss": 1.3325, + "step": 18355 + }, + { + "epoch": 0.5389629455634506, + "grad_norm": 0.0, + "learning_rate": 9.224794874842044e-06, + "loss": 1.3262, + "step": 18356 + }, + { + "epoch": 0.5389923072405896, + "grad_norm": 0.0, + "learning_rate": 9.223846779656683e-06, + "loss": 1.374, + "step": 18357 + }, + { + "epoch": 0.5390216689177286, + "grad_norm": 0.0, + "learning_rate": 9.222898691490274e-06, + "loss": 1.1816, + "step": 18358 + }, + { + "epoch": 0.5390510305948676, + "grad_norm": 0.0, + "learning_rate": 9.22195061035139e-06, + "loss": 1.27, + "step": 18359 + }, + { + "epoch": 0.5390803922720065, + "grad_norm": 0.0, + "learning_rate": 9.221002536248606e-06, + "loss": 1.2212, + "step": 18360 + }, + { + "epoch": 0.5391097539491456, + "grad_norm": 0.0, + "learning_rate": 9.220054469190493e-06, + "loss": 1.2305, + "step": 18361 + }, + { + "epoch": 0.5391391156262846, + "grad_norm": 0.0, + "learning_rate": 9.219106409185634e-06, + "loss": 1.4355, + "step": 18362 + }, + { + "epoch": 0.5391684773034235, + "grad_norm": 0.0, + "learning_rate": 9.218158356242589e-06, + "loss": 1.2646, + "step": 18363 + }, + { + "epoch": 0.5391978389805626, + "grad_norm": 0.0, + "learning_rate": 9.217210310369942e-06, + "loss": 1.2725, + "step": 18364 + }, + { + "epoch": 0.5392272006577016, + "grad_norm": 0.0, + "learning_rate": 9.21626227157626e-06, + "loss": 1.3877, + "step": 18365 + }, + { + "epoch": 0.5392565623348405, + "grad_norm": 0.0, + "learning_rate": 9.215314239870122e-06, + "loss": 1.4043, + "step": 18366 + }, + { + "epoch": 0.5392859240119796, + "grad_norm": 0.0, + "learning_rate": 9.2143662152601e-06, + "loss": 1.2139, + "step": 18367 + }, + { + "epoch": 0.5393152856891186, + "grad_norm": 0.0, + "learning_rate": 9.213418197754758e-06, + "loss": 1.3301, + "step": 18368 + }, + { + "epoch": 0.5393446473662575, + "grad_norm": 0.0, + "learning_rate": 9.212470187362682e-06, + "loss": 1.3193, + "step": 18369 + }, + { + "epoch": 0.5393740090433966, + "grad_norm": 0.0, + "learning_rate": 9.211522184092438e-06, + "loss": 1.355, + "step": 18370 + }, + { + "epoch": 0.5394033707205356, + "grad_norm": 0.0, + "learning_rate": 9.210574187952603e-06, + "loss": 1.3311, + "step": 18371 + }, + { + "epoch": 0.5394327323976745, + "grad_norm": 0.0, + "learning_rate": 9.209626198951743e-06, + "loss": 1.2544, + "step": 18372 + }, + { + "epoch": 0.5394620940748136, + "grad_norm": 0.0, + "learning_rate": 9.208678217098441e-06, + "loss": 1.2412, + "step": 18373 + }, + { + "epoch": 0.5394914557519526, + "grad_norm": 0.0, + "learning_rate": 9.207730242401262e-06, + "loss": 1.292, + "step": 18374 + }, + { + "epoch": 0.5395208174290915, + "grad_norm": 0.0, + "learning_rate": 9.206782274868783e-06, + "loss": 1.2461, + "step": 18375 + }, + { + "epoch": 0.5395501791062306, + "grad_norm": 0.0, + "learning_rate": 9.205834314509571e-06, + "loss": 1.2505, + "step": 18376 + }, + { + "epoch": 0.5395795407833696, + "grad_norm": 0.0, + "learning_rate": 9.204886361332208e-06, + "loss": 1.3555, + "step": 18377 + }, + { + "epoch": 0.5396089024605085, + "grad_norm": 0.0, + "learning_rate": 9.20393841534526e-06, + "loss": 1.2803, + "step": 18378 + }, + { + "epoch": 0.5396382641376476, + "grad_norm": 0.0, + "learning_rate": 9.202990476557298e-06, + "loss": 1.123, + "step": 18379 + }, + { + "epoch": 0.5396676258147866, + "grad_norm": 0.0, + "learning_rate": 9.2020425449769e-06, + "loss": 1.3174, + "step": 18380 + }, + { + "epoch": 0.5396969874919255, + "grad_norm": 0.0, + "learning_rate": 9.201094620612632e-06, + "loss": 1.1523, + "step": 18381 + }, + { + "epoch": 0.5397263491690646, + "grad_norm": 0.0, + "learning_rate": 9.200146703473075e-06, + "loss": 1.2793, + "step": 18382 + }, + { + "epoch": 0.5397557108462036, + "grad_norm": 0.0, + "learning_rate": 9.199198793566792e-06, + "loss": 1.4082, + "step": 18383 + }, + { + "epoch": 0.5397850725233425, + "grad_norm": 0.0, + "learning_rate": 9.198250890902362e-06, + "loss": 1.2114, + "step": 18384 + }, + { + "epoch": 0.5398144342004816, + "grad_norm": 0.0, + "learning_rate": 9.197302995488351e-06, + "loss": 1.3125, + "step": 18385 + }, + { + "epoch": 0.5398437958776205, + "grad_norm": 0.0, + "learning_rate": 9.19635510733334e-06, + "loss": 1.1846, + "step": 18386 + }, + { + "epoch": 0.5398731575547595, + "grad_norm": 0.0, + "learning_rate": 9.195407226445894e-06, + "loss": 1.335, + "step": 18387 + }, + { + "epoch": 0.5399025192318986, + "grad_norm": 0.0, + "learning_rate": 9.194459352834584e-06, + "loss": 1.2026, + "step": 18388 + }, + { + "epoch": 0.5399318809090375, + "grad_norm": 0.0, + "learning_rate": 9.193511486507987e-06, + "loss": 1.3633, + "step": 18389 + }, + { + "epoch": 0.5399612425861765, + "grad_norm": 0.0, + "learning_rate": 9.192563627474668e-06, + "loss": 1.251, + "step": 18390 + }, + { + "epoch": 0.5399906042633155, + "grad_norm": 0.0, + "learning_rate": 9.191615775743208e-06, + "loss": 1.1689, + "step": 18391 + }, + { + "epoch": 0.5400199659404545, + "grad_norm": 0.0, + "learning_rate": 9.19066793132217e-06, + "loss": 1.4043, + "step": 18392 + }, + { + "epoch": 0.5400493276175935, + "grad_norm": 0.0, + "learning_rate": 9.189720094220131e-06, + "loss": 1.459, + "step": 18393 + }, + { + "epoch": 0.5400786892947325, + "grad_norm": 0.0, + "learning_rate": 9.188772264445659e-06, + "loss": 1.3267, + "step": 18394 + }, + { + "epoch": 0.5401080509718715, + "grad_norm": 0.0, + "learning_rate": 9.18782444200733e-06, + "loss": 1.3232, + "step": 18395 + }, + { + "epoch": 0.5401374126490105, + "grad_norm": 0.0, + "learning_rate": 9.186876626913707e-06, + "loss": 1.2964, + "step": 18396 + }, + { + "epoch": 0.5401667743261495, + "grad_norm": 0.0, + "learning_rate": 9.185928819173372e-06, + "loss": 1.2969, + "step": 18397 + }, + { + "epoch": 0.5401961360032885, + "grad_norm": 0.0, + "learning_rate": 9.184981018794893e-06, + "loss": 1.417, + "step": 18398 + }, + { + "epoch": 0.5402254976804275, + "grad_norm": 0.0, + "learning_rate": 9.184033225786835e-06, + "loss": 1.1924, + "step": 18399 + }, + { + "epoch": 0.5402548593575665, + "grad_norm": 0.0, + "learning_rate": 9.183085440157777e-06, + "loss": 1.3564, + "step": 18400 + }, + { + "epoch": 0.5402842210347055, + "grad_norm": 0.0, + "learning_rate": 9.182137661916281e-06, + "loss": 1.3496, + "step": 18401 + }, + { + "epoch": 0.5403135827118445, + "grad_norm": 0.0, + "learning_rate": 9.181189891070928e-06, + "loss": 1.335, + "step": 18402 + }, + { + "epoch": 0.5403429443889834, + "grad_norm": 0.0, + "learning_rate": 9.180242127630283e-06, + "loss": 1.4531, + "step": 18403 + }, + { + "epoch": 0.5403723060661225, + "grad_norm": 0.0, + "learning_rate": 9.179294371602921e-06, + "loss": 1.2812, + "step": 18404 + }, + { + "epoch": 0.5404016677432615, + "grad_norm": 0.0, + "learning_rate": 9.178346622997406e-06, + "loss": 1.4043, + "step": 18405 + }, + { + "epoch": 0.5404310294204004, + "grad_norm": 0.0, + "learning_rate": 9.177398881822314e-06, + "loss": 1.1758, + "step": 18406 + }, + { + "epoch": 0.5404603910975395, + "grad_norm": 0.0, + "learning_rate": 9.176451148086218e-06, + "loss": 1.2227, + "step": 18407 + }, + { + "epoch": 0.5404897527746785, + "grad_norm": 0.0, + "learning_rate": 9.175503421797683e-06, + "loss": 1.2036, + "step": 18408 + }, + { + "epoch": 0.5405191144518174, + "grad_norm": 0.0, + "learning_rate": 9.174555702965283e-06, + "loss": 1.3037, + "step": 18409 + }, + { + "epoch": 0.5405484761289565, + "grad_norm": 0.0, + "learning_rate": 9.173607991597582e-06, + "loss": 1.3057, + "step": 18410 + }, + { + "epoch": 0.5405778378060955, + "grad_norm": 0.0, + "learning_rate": 9.17266028770316e-06, + "loss": 1.3779, + "step": 18411 + }, + { + "epoch": 0.5406071994832344, + "grad_norm": 0.0, + "learning_rate": 9.171712591290582e-06, + "loss": 1.335, + "step": 18412 + }, + { + "epoch": 0.5406365611603735, + "grad_norm": 0.0, + "learning_rate": 9.17076490236842e-06, + "loss": 1.3037, + "step": 18413 + }, + { + "epoch": 0.5406659228375125, + "grad_norm": 0.0, + "learning_rate": 9.169817220945242e-06, + "loss": 1.3496, + "step": 18414 + }, + { + "epoch": 0.5406952845146514, + "grad_norm": 0.0, + "learning_rate": 9.168869547029622e-06, + "loss": 1.3818, + "step": 18415 + }, + { + "epoch": 0.5407246461917905, + "grad_norm": 0.0, + "learning_rate": 9.167921880630123e-06, + "loss": 1.2012, + "step": 18416 + }, + { + "epoch": 0.5407540078689295, + "grad_norm": 0.0, + "learning_rate": 9.166974221755324e-06, + "loss": 1.3301, + "step": 18417 + }, + { + "epoch": 0.5407833695460684, + "grad_norm": 0.0, + "learning_rate": 9.16602657041379e-06, + "loss": 1.2549, + "step": 18418 + }, + { + "epoch": 0.5408127312232075, + "grad_norm": 0.0, + "learning_rate": 9.165078926614088e-06, + "loss": 1.3706, + "step": 18419 + }, + { + "epoch": 0.5408420929003465, + "grad_norm": 0.0, + "learning_rate": 9.164131290364796e-06, + "loss": 1.3467, + "step": 18420 + }, + { + "epoch": 0.5408714545774854, + "grad_norm": 0.0, + "learning_rate": 9.163183661674472e-06, + "loss": 1.3389, + "step": 18421 + }, + { + "epoch": 0.5409008162546245, + "grad_norm": 0.0, + "learning_rate": 9.162236040551698e-06, + "loss": 1.2764, + "step": 18422 + }, + { + "epoch": 0.5409301779317635, + "grad_norm": 0.0, + "learning_rate": 9.161288427005037e-06, + "loss": 1.3896, + "step": 18423 + }, + { + "epoch": 0.5409595396089024, + "grad_norm": 0.0, + "learning_rate": 9.16034082104306e-06, + "loss": 1.3682, + "step": 18424 + }, + { + "epoch": 0.5409889012860415, + "grad_norm": 0.0, + "learning_rate": 9.159393222674332e-06, + "loss": 1.4033, + "step": 18425 + }, + { + "epoch": 0.5410182629631805, + "grad_norm": 0.0, + "learning_rate": 9.15844563190743e-06, + "loss": 1.2314, + "step": 18426 + }, + { + "epoch": 0.5410476246403194, + "grad_norm": 0.0, + "learning_rate": 9.15749804875092e-06, + "loss": 1.3145, + "step": 18427 + }, + { + "epoch": 0.5410769863174585, + "grad_norm": 0.0, + "learning_rate": 9.156550473213368e-06, + "loss": 1.3115, + "step": 18428 + }, + { + "epoch": 0.5411063479945974, + "grad_norm": 0.0, + "learning_rate": 9.155602905303352e-06, + "loss": 1.2676, + "step": 18429 + }, + { + "epoch": 0.5411357096717364, + "grad_norm": 0.0, + "learning_rate": 9.154655345029428e-06, + "loss": 1.3125, + "step": 18430 + }, + { + "epoch": 0.5411650713488755, + "grad_norm": 0.0, + "learning_rate": 9.153707792400175e-06, + "loss": 1.2119, + "step": 18431 + }, + { + "epoch": 0.5411944330260144, + "grad_norm": 0.0, + "learning_rate": 9.152760247424159e-06, + "loss": 1.3301, + "step": 18432 + }, + { + "epoch": 0.5412237947031534, + "grad_norm": 0.0, + "learning_rate": 9.151812710109952e-06, + "loss": 1.4365, + "step": 18433 + }, + { + "epoch": 0.5412531563802925, + "grad_norm": 0.0, + "learning_rate": 9.150865180466114e-06, + "loss": 1.2998, + "step": 18434 + }, + { + "epoch": 0.5412825180574314, + "grad_norm": 0.0, + "learning_rate": 9.149917658501225e-06, + "loss": 1.3389, + "step": 18435 + }, + { + "epoch": 0.5413118797345704, + "grad_norm": 0.0, + "learning_rate": 9.14897014422385e-06, + "loss": 1.3799, + "step": 18436 + }, + { + "epoch": 0.5413412414117095, + "grad_norm": 0.0, + "learning_rate": 9.14802263764255e-06, + "loss": 1.2793, + "step": 18437 + }, + { + "epoch": 0.5413706030888484, + "grad_norm": 0.0, + "learning_rate": 9.147075138765904e-06, + "loss": 1.3994, + "step": 18438 + }, + { + "epoch": 0.5413999647659874, + "grad_norm": 0.0, + "learning_rate": 9.146127647602471e-06, + "loss": 1.3633, + "step": 18439 + }, + { + "epoch": 0.5414293264431265, + "grad_norm": 0.0, + "learning_rate": 9.14518016416083e-06, + "loss": 1.3501, + "step": 18440 + }, + { + "epoch": 0.5414586881202654, + "grad_norm": 0.0, + "learning_rate": 9.144232688449537e-06, + "loss": 1.2114, + "step": 18441 + }, + { + "epoch": 0.5414880497974044, + "grad_norm": 0.0, + "learning_rate": 9.143285220477172e-06, + "loss": 1.1426, + "step": 18442 + }, + { + "epoch": 0.5415174114745435, + "grad_norm": 0.0, + "learning_rate": 9.142337760252298e-06, + "loss": 1.2578, + "step": 18443 + }, + { + "epoch": 0.5415467731516824, + "grad_norm": 0.0, + "learning_rate": 9.141390307783483e-06, + "loss": 1.3408, + "step": 18444 + }, + { + "epoch": 0.5415761348288214, + "grad_norm": 0.0, + "learning_rate": 9.140442863079292e-06, + "loss": 1.3936, + "step": 18445 + }, + { + "epoch": 0.5416054965059605, + "grad_norm": 0.0, + "learning_rate": 9.139495426148299e-06, + "loss": 1.4805, + "step": 18446 + }, + { + "epoch": 0.5416348581830994, + "grad_norm": 0.0, + "learning_rate": 9.138547996999071e-06, + "loss": 1.1973, + "step": 18447 + }, + { + "epoch": 0.5416642198602384, + "grad_norm": 0.0, + "learning_rate": 9.137600575640173e-06, + "loss": 1.2061, + "step": 18448 + }, + { + "epoch": 0.5416935815373775, + "grad_norm": 0.0, + "learning_rate": 9.136653162080173e-06, + "loss": 1.3154, + "step": 18449 + }, + { + "epoch": 0.5417229432145164, + "grad_norm": 0.0, + "learning_rate": 9.135705756327638e-06, + "loss": 1.1396, + "step": 18450 + }, + { + "epoch": 0.5417523048916554, + "grad_norm": 0.0, + "learning_rate": 9.134758358391139e-06, + "loss": 1.3369, + "step": 18451 + }, + { + "epoch": 0.5417816665687945, + "grad_norm": 0.0, + "learning_rate": 9.133810968279242e-06, + "loss": 1.2598, + "step": 18452 + }, + { + "epoch": 0.5418110282459334, + "grad_norm": 0.0, + "learning_rate": 9.132863586000516e-06, + "loss": 1.2603, + "step": 18453 + }, + { + "epoch": 0.5418403899230724, + "grad_norm": 0.0, + "learning_rate": 9.131916211563521e-06, + "loss": 1.2783, + "step": 18454 + }, + { + "epoch": 0.5418697516002114, + "grad_norm": 0.0, + "learning_rate": 9.130968844976834e-06, + "loss": 1.2812, + "step": 18455 + }, + { + "epoch": 0.5418991132773504, + "grad_norm": 0.0, + "learning_rate": 9.13002148624902e-06, + "loss": 1.2412, + "step": 18456 + }, + { + "epoch": 0.5419284749544894, + "grad_norm": 0.0, + "learning_rate": 9.129074135388641e-06, + "loss": 1.252, + "step": 18457 + }, + { + "epoch": 0.5419578366316284, + "grad_norm": 0.0, + "learning_rate": 9.12812679240427e-06, + "loss": 1.3555, + "step": 18458 + }, + { + "epoch": 0.5419871983087674, + "grad_norm": 0.0, + "learning_rate": 9.12717945730447e-06, + "loss": 1.2173, + "step": 18459 + }, + { + "epoch": 0.5420165599859064, + "grad_norm": 0.0, + "learning_rate": 9.126232130097811e-06, + "loss": 1.3691, + "step": 18460 + }, + { + "epoch": 0.5420459216630454, + "grad_norm": 0.0, + "learning_rate": 9.125284810792858e-06, + "loss": 1.2119, + "step": 18461 + }, + { + "epoch": 0.5420752833401844, + "grad_norm": 0.0, + "learning_rate": 9.124337499398179e-06, + "loss": 1.2744, + "step": 18462 + }, + { + "epoch": 0.5421046450173234, + "grad_norm": 0.0, + "learning_rate": 9.123390195922337e-06, + "loss": 1.3159, + "step": 18463 + }, + { + "epoch": 0.5421340066944624, + "grad_norm": 0.0, + "learning_rate": 9.122442900373908e-06, + "loss": 1.2119, + "step": 18464 + }, + { + "epoch": 0.5421633683716014, + "grad_norm": 0.0, + "learning_rate": 9.121495612761446e-06, + "loss": 1.3286, + "step": 18465 + }, + { + "epoch": 0.5421927300487404, + "grad_norm": 0.0, + "learning_rate": 9.12054833309353e-06, + "loss": 1.374, + "step": 18466 + }, + { + "epoch": 0.5422220917258794, + "grad_norm": 0.0, + "learning_rate": 9.119601061378719e-06, + "loss": 1.3438, + "step": 18467 + }, + { + "epoch": 0.5422514534030184, + "grad_norm": 0.0, + "learning_rate": 9.11865379762558e-06, + "loss": 1.2949, + "step": 18468 + }, + { + "epoch": 0.5422808150801574, + "grad_norm": 0.0, + "learning_rate": 9.11770654184268e-06, + "loss": 1.2266, + "step": 18469 + }, + { + "epoch": 0.5423101767572964, + "grad_norm": 0.0, + "learning_rate": 9.116759294038585e-06, + "loss": 1.3457, + "step": 18470 + }, + { + "epoch": 0.5423395384344354, + "grad_norm": 0.0, + "learning_rate": 9.115812054221865e-06, + "loss": 1.3242, + "step": 18471 + }, + { + "epoch": 0.5423689001115743, + "grad_norm": 0.0, + "learning_rate": 9.11486482240108e-06, + "loss": 1.2568, + "step": 18472 + }, + { + "epoch": 0.5423982617887134, + "grad_norm": 0.0, + "learning_rate": 9.113917598584803e-06, + "loss": 1.2988, + "step": 18473 + }, + { + "epoch": 0.5424276234658524, + "grad_norm": 0.0, + "learning_rate": 9.112970382781589e-06, + "loss": 1.2969, + "step": 18474 + }, + { + "epoch": 0.5424569851429913, + "grad_norm": 0.0, + "learning_rate": 9.112023175000017e-06, + "loss": 1.3018, + "step": 18475 + }, + { + "epoch": 0.5424863468201304, + "grad_norm": 0.0, + "learning_rate": 9.111075975248648e-06, + "loss": 1.3779, + "step": 18476 + }, + { + "epoch": 0.5425157084972694, + "grad_norm": 0.0, + "learning_rate": 9.110128783536043e-06, + "loss": 1.1572, + "step": 18477 + }, + { + "epoch": 0.5425450701744083, + "grad_norm": 0.0, + "learning_rate": 9.109181599870773e-06, + "loss": 1.3457, + "step": 18478 + }, + { + "epoch": 0.5425744318515474, + "grad_norm": 0.0, + "learning_rate": 9.108234424261398e-06, + "loss": 1.2891, + "step": 18479 + }, + { + "epoch": 0.5426037935286864, + "grad_norm": 0.0, + "learning_rate": 9.107287256716492e-06, + "loss": 1.3359, + "step": 18480 + }, + { + "epoch": 0.5426331552058253, + "grad_norm": 0.0, + "learning_rate": 9.106340097244614e-06, + "loss": 1.1777, + "step": 18481 + }, + { + "epoch": 0.5426625168829644, + "grad_norm": 0.0, + "learning_rate": 9.105392945854334e-06, + "loss": 1.207, + "step": 18482 + }, + { + "epoch": 0.5426918785601034, + "grad_norm": 0.0, + "learning_rate": 9.10444580255421e-06, + "loss": 1.4023, + "step": 18483 + }, + { + "epoch": 0.5427212402372423, + "grad_norm": 0.0, + "learning_rate": 9.103498667352815e-06, + "loss": 1.23, + "step": 18484 + }, + { + "epoch": 0.5427506019143814, + "grad_norm": 0.0, + "learning_rate": 9.102551540258713e-06, + "loss": 1.3428, + "step": 18485 + }, + { + "epoch": 0.5427799635915204, + "grad_norm": 0.0, + "learning_rate": 9.101604421280465e-06, + "loss": 1.2529, + "step": 18486 + }, + { + "epoch": 0.5428093252686593, + "grad_norm": 0.0, + "learning_rate": 9.10065731042664e-06, + "loss": 1.3486, + "step": 18487 + }, + { + "epoch": 0.5428386869457984, + "grad_norm": 0.0, + "learning_rate": 9.099710207705797e-06, + "loss": 1.1973, + "step": 18488 + }, + { + "epoch": 0.5428680486229374, + "grad_norm": 0.0, + "learning_rate": 9.09876311312651e-06, + "loss": 1.3574, + "step": 18489 + }, + { + "epoch": 0.5428974103000763, + "grad_norm": 0.0, + "learning_rate": 9.097816026697335e-06, + "loss": 1.3467, + "step": 18490 + }, + { + "epoch": 0.5429267719772153, + "grad_norm": 0.0, + "learning_rate": 9.096868948426843e-06, + "loss": 1.3281, + "step": 18491 + }, + { + "epoch": 0.5429561336543544, + "grad_norm": 0.0, + "learning_rate": 9.095921878323596e-06, + "loss": 1.2871, + "step": 18492 + }, + { + "epoch": 0.5429854953314933, + "grad_norm": 0.0, + "learning_rate": 9.094974816396161e-06, + "loss": 1.3105, + "step": 18493 + }, + { + "epoch": 0.5430148570086323, + "grad_norm": 0.0, + "learning_rate": 9.094027762653096e-06, + "loss": 1.21, + "step": 18494 + }, + { + "epoch": 0.5430442186857714, + "grad_norm": 0.0, + "learning_rate": 9.093080717102972e-06, + "loss": 1.2754, + "step": 18495 + }, + { + "epoch": 0.5430735803629103, + "grad_norm": 0.0, + "learning_rate": 9.092133679754353e-06, + "loss": 1.2705, + "step": 18496 + }, + { + "epoch": 0.5431029420400493, + "grad_norm": 0.0, + "learning_rate": 9.0911866506158e-06, + "loss": 1.3154, + "step": 18497 + }, + { + "epoch": 0.5431323037171883, + "grad_norm": 0.0, + "learning_rate": 9.09023962969588e-06, + "loss": 1.4727, + "step": 18498 + }, + { + "epoch": 0.5431616653943273, + "grad_norm": 0.0, + "learning_rate": 9.089292617003154e-06, + "loss": 1.3916, + "step": 18499 + }, + { + "epoch": 0.5431910270714663, + "grad_norm": 0.0, + "learning_rate": 9.08834561254619e-06, + "loss": 1.3672, + "step": 18500 + }, + { + "epoch": 0.5432203887486053, + "grad_norm": 0.0, + "learning_rate": 9.08739861633355e-06, + "loss": 1.1973, + "step": 18501 + }, + { + "epoch": 0.5432497504257443, + "grad_norm": 0.0, + "learning_rate": 9.086451628373799e-06, + "loss": 1.2979, + "step": 18502 + }, + { + "epoch": 0.5432791121028833, + "grad_norm": 0.0, + "learning_rate": 9.085504648675494e-06, + "loss": 1.2734, + "step": 18503 + }, + { + "epoch": 0.5433084737800223, + "grad_norm": 0.0, + "learning_rate": 9.08455767724721e-06, + "loss": 1.2861, + "step": 18504 + }, + { + "epoch": 0.5433378354571613, + "grad_norm": 0.0, + "learning_rate": 9.083610714097506e-06, + "loss": 1.4521, + "step": 18505 + }, + { + "epoch": 0.5433671971343003, + "grad_norm": 0.0, + "learning_rate": 9.082663759234944e-06, + "loss": 1.3311, + "step": 18506 + }, + { + "epoch": 0.5433965588114393, + "grad_norm": 0.0, + "learning_rate": 9.081716812668089e-06, + "loss": 1.2036, + "step": 18507 + }, + { + "epoch": 0.5434259204885783, + "grad_norm": 0.0, + "learning_rate": 9.0807698744055e-06, + "loss": 1.2422, + "step": 18508 + }, + { + "epoch": 0.5434552821657173, + "grad_norm": 0.0, + "learning_rate": 9.079822944455749e-06, + "loss": 1.3799, + "step": 18509 + }, + { + "epoch": 0.5434846438428563, + "grad_norm": 0.0, + "learning_rate": 9.078876022827392e-06, + "loss": 1.2656, + "step": 18510 + }, + { + "epoch": 0.5435140055199953, + "grad_norm": 0.0, + "learning_rate": 9.077929109528998e-06, + "loss": 1.3604, + "step": 18511 + }, + { + "epoch": 0.5435433671971343, + "grad_norm": 0.0, + "learning_rate": 9.076982204569123e-06, + "loss": 1.2451, + "step": 18512 + }, + { + "epoch": 0.5435727288742733, + "grad_norm": 0.0, + "learning_rate": 9.076035307956338e-06, + "loss": 1.3516, + "step": 18513 + }, + { + "epoch": 0.5436020905514123, + "grad_norm": 0.0, + "learning_rate": 9.0750884196992e-06, + "loss": 1.2983, + "step": 18514 + }, + { + "epoch": 0.5436314522285512, + "grad_norm": 0.0, + "learning_rate": 9.074141539806276e-06, + "loss": 1.3506, + "step": 18515 + }, + { + "epoch": 0.5436608139056903, + "grad_norm": 0.0, + "learning_rate": 9.073194668286129e-06, + "loss": 1.0532, + "step": 18516 + }, + { + "epoch": 0.5436901755828293, + "grad_norm": 0.0, + "learning_rate": 9.072247805147317e-06, + "loss": 1.3691, + "step": 18517 + }, + { + "epoch": 0.5437195372599682, + "grad_norm": 0.0, + "learning_rate": 9.071300950398408e-06, + "loss": 1.3008, + "step": 18518 + }, + { + "epoch": 0.5437488989371073, + "grad_norm": 0.0, + "learning_rate": 9.070354104047956e-06, + "loss": 1.2334, + "step": 18519 + }, + { + "epoch": 0.5437782606142463, + "grad_norm": 0.0, + "learning_rate": 9.069407266104537e-06, + "loss": 1.2754, + "step": 18520 + }, + { + "epoch": 0.5438076222913852, + "grad_norm": 0.0, + "learning_rate": 9.068460436576704e-06, + "loss": 1.1431, + "step": 18521 + }, + { + "epoch": 0.5438369839685243, + "grad_norm": 0.0, + "learning_rate": 9.067513615473023e-06, + "loss": 1.3936, + "step": 18522 + }, + { + "epoch": 0.5438663456456633, + "grad_norm": 0.0, + "learning_rate": 9.06656680280205e-06, + "loss": 1.3516, + "step": 18523 + }, + { + "epoch": 0.5438957073228022, + "grad_norm": 0.0, + "learning_rate": 9.06561999857236e-06, + "loss": 1.25, + "step": 18524 + }, + { + "epoch": 0.5439250689999413, + "grad_norm": 0.0, + "learning_rate": 9.064673202792504e-06, + "loss": 1.2783, + "step": 18525 + }, + { + "epoch": 0.5439544306770803, + "grad_norm": 0.0, + "learning_rate": 9.063726415471049e-06, + "loss": 1.2607, + "step": 18526 + }, + { + "epoch": 0.5439837923542192, + "grad_norm": 0.0, + "learning_rate": 9.062779636616555e-06, + "loss": 1.3232, + "step": 18527 + }, + { + "epoch": 0.5440131540313583, + "grad_norm": 0.0, + "learning_rate": 9.06183286623758e-06, + "loss": 1.4326, + "step": 18528 + }, + { + "epoch": 0.5440425157084973, + "grad_norm": 0.0, + "learning_rate": 9.060886104342697e-06, + "loss": 1.1841, + "step": 18529 + }, + { + "epoch": 0.5440718773856362, + "grad_norm": 0.0, + "learning_rate": 9.059939350940459e-06, + "loss": 1.3242, + "step": 18530 + }, + { + "epoch": 0.5441012390627753, + "grad_norm": 0.0, + "learning_rate": 9.058992606039433e-06, + "loss": 1.3481, + "step": 18531 + }, + { + "epoch": 0.5441306007399143, + "grad_norm": 0.0, + "learning_rate": 9.058045869648171e-06, + "loss": 1.2158, + "step": 18532 + }, + { + "epoch": 0.5441599624170532, + "grad_norm": 0.0, + "learning_rate": 9.057099141775249e-06, + "loss": 1.1807, + "step": 18533 + }, + { + "epoch": 0.5441893240941923, + "grad_norm": 0.0, + "learning_rate": 9.056152422429217e-06, + "loss": 1.3975, + "step": 18534 + }, + { + "epoch": 0.5442186857713313, + "grad_norm": 0.0, + "learning_rate": 9.055205711618643e-06, + "loss": 1.374, + "step": 18535 + }, + { + "epoch": 0.5442480474484702, + "grad_norm": 0.0, + "learning_rate": 9.054259009352086e-06, + "loss": 1.2847, + "step": 18536 + }, + { + "epoch": 0.5442774091256093, + "grad_norm": 0.0, + "learning_rate": 9.053312315638101e-06, + "loss": 1.3628, + "step": 18537 + }, + { + "epoch": 0.5443067708027483, + "grad_norm": 0.0, + "learning_rate": 9.052365630485262e-06, + "loss": 1.3633, + "step": 18538 + }, + { + "epoch": 0.5443361324798872, + "grad_norm": 0.0, + "learning_rate": 9.051418953902119e-06, + "loss": 1.3486, + "step": 18539 + }, + { + "epoch": 0.5443654941570263, + "grad_norm": 0.0, + "learning_rate": 9.050472285897241e-06, + "loss": 1.4258, + "step": 18540 + }, + { + "epoch": 0.5443948558341652, + "grad_norm": 0.0, + "learning_rate": 9.04952562647918e-06, + "loss": 1.1738, + "step": 18541 + }, + { + "epoch": 0.5444242175113042, + "grad_norm": 0.0, + "learning_rate": 9.04857897565651e-06, + "loss": 1.2617, + "step": 18542 + }, + { + "epoch": 0.5444535791884433, + "grad_norm": 0.0, + "learning_rate": 9.047632333437776e-06, + "loss": 1.2773, + "step": 18543 + }, + { + "epoch": 0.5444829408655822, + "grad_norm": 0.0, + "learning_rate": 9.046685699831552e-06, + "loss": 1.1543, + "step": 18544 + }, + { + "epoch": 0.5445123025427212, + "grad_norm": 0.0, + "learning_rate": 9.045739074846391e-06, + "loss": 1.292, + "step": 18545 + }, + { + "epoch": 0.5445416642198603, + "grad_norm": 0.0, + "learning_rate": 9.044792458490857e-06, + "loss": 1.334, + "step": 18546 + }, + { + "epoch": 0.5445710258969992, + "grad_norm": 0.0, + "learning_rate": 9.043845850773509e-06, + "loss": 1.2988, + "step": 18547 + }, + { + "epoch": 0.5446003875741382, + "grad_norm": 0.0, + "learning_rate": 9.042899251702906e-06, + "loss": 1.2178, + "step": 18548 + }, + { + "epoch": 0.5446297492512773, + "grad_norm": 0.0, + "learning_rate": 9.041952661287614e-06, + "loss": 1.1299, + "step": 18549 + }, + { + "epoch": 0.5446591109284162, + "grad_norm": 0.0, + "learning_rate": 9.041006079536185e-06, + "loss": 1.2617, + "step": 18550 + }, + { + "epoch": 0.5446884726055552, + "grad_norm": 0.0, + "learning_rate": 9.040059506457187e-06, + "loss": 1.3418, + "step": 18551 + }, + { + "epoch": 0.5447178342826943, + "grad_norm": 0.0, + "learning_rate": 9.039112942059174e-06, + "loss": 1.3154, + "step": 18552 + }, + { + "epoch": 0.5447471959598332, + "grad_norm": 0.0, + "learning_rate": 9.03816638635071e-06, + "loss": 1.3496, + "step": 18553 + }, + { + "epoch": 0.5447765576369722, + "grad_norm": 0.0, + "learning_rate": 9.037219839340356e-06, + "loss": 1.2173, + "step": 18554 + }, + { + "epoch": 0.5448059193141113, + "grad_norm": 0.0, + "learning_rate": 9.036273301036668e-06, + "loss": 1.3047, + "step": 18555 + }, + { + "epoch": 0.5448352809912502, + "grad_norm": 0.0, + "learning_rate": 9.035326771448209e-06, + "loss": 1.2812, + "step": 18556 + }, + { + "epoch": 0.5448646426683892, + "grad_norm": 0.0, + "learning_rate": 9.034380250583532e-06, + "loss": 1.3965, + "step": 18557 + }, + { + "epoch": 0.5448940043455283, + "grad_norm": 0.0, + "learning_rate": 9.033433738451205e-06, + "loss": 1.3018, + "step": 18558 + }, + { + "epoch": 0.5449233660226672, + "grad_norm": 0.0, + "learning_rate": 9.032487235059784e-06, + "loss": 1.3442, + "step": 18559 + }, + { + "epoch": 0.5449527276998062, + "grad_norm": 0.0, + "learning_rate": 9.03154074041783e-06, + "loss": 1.3516, + "step": 18560 + }, + { + "epoch": 0.5449820893769453, + "grad_norm": 0.0, + "learning_rate": 9.030594254533897e-06, + "loss": 1.4023, + "step": 18561 + }, + { + "epoch": 0.5450114510540842, + "grad_norm": 0.0, + "learning_rate": 9.029647777416552e-06, + "loss": 1.2749, + "step": 18562 + }, + { + "epoch": 0.5450408127312232, + "grad_norm": 0.0, + "learning_rate": 9.028701309074348e-06, + "loss": 1.2539, + "step": 18563 + }, + { + "epoch": 0.5450701744083623, + "grad_norm": 0.0, + "learning_rate": 9.027754849515853e-06, + "loss": 1.2109, + "step": 18564 + }, + { + "epoch": 0.5450995360855012, + "grad_norm": 0.0, + "learning_rate": 9.026808398749616e-06, + "loss": 1.3477, + "step": 18565 + }, + { + "epoch": 0.5451288977626402, + "grad_norm": 0.0, + "learning_rate": 9.025861956784197e-06, + "loss": 1.2266, + "step": 18566 + }, + { + "epoch": 0.5451582594397792, + "grad_norm": 0.0, + "learning_rate": 9.024915523628163e-06, + "loss": 1.2529, + "step": 18567 + }, + { + "epoch": 0.5451876211169182, + "grad_norm": 0.0, + "learning_rate": 9.023969099290062e-06, + "loss": 1.3584, + "step": 18568 + }, + { + "epoch": 0.5452169827940572, + "grad_norm": 0.0, + "learning_rate": 9.023022683778463e-06, + "loss": 1.1973, + "step": 18569 + }, + { + "epoch": 0.5452463444711962, + "grad_norm": 0.0, + "learning_rate": 9.022076277101918e-06, + "loss": 1.3926, + "step": 18570 + }, + { + "epoch": 0.5452757061483352, + "grad_norm": 0.0, + "learning_rate": 9.02112987926899e-06, + "loss": 1.3652, + "step": 18571 + }, + { + "epoch": 0.5453050678254742, + "grad_norm": 0.0, + "learning_rate": 9.02018349028823e-06, + "loss": 1.3271, + "step": 18572 + }, + { + "epoch": 0.5453344295026132, + "grad_norm": 0.0, + "learning_rate": 9.019237110168206e-06, + "loss": 1.2002, + "step": 18573 + }, + { + "epoch": 0.5453637911797522, + "grad_norm": 0.0, + "learning_rate": 9.018290738917473e-06, + "loss": 1.3721, + "step": 18574 + }, + { + "epoch": 0.5453931528568912, + "grad_norm": 0.0, + "learning_rate": 9.017344376544586e-06, + "loss": 1.3135, + "step": 18575 + }, + { + "epoch": 0.5454225145340302, + "grad_norm": 0.0, + "learning_rate": 9.016398023058106e-06, + "loss": 1.2778, + "step": 18576 + }, + { + "epoch": 0.5454518762111692, + "grad_norm": 0.0, + "learning_rate": 9.015451678466587e-06, + "loss": 1.3867, + "step": 18577 + }, + { + "epoch": 0.5454812378883082, + "grad_norm": 0.0, + "learning_rate": 9.014505342778596e-06, + "loss": 1.3418, + "step": 18578 + }, + { + "epoch": 0.5455105995654472, + "grad_norm": 0.0, + "learning_rate": 9.013559016002683e-06, + "loss": 1.2505, + "step": 18579 + }, + { + "epoch": 0.5455399612425862, + "grad_norm": 0.0, + "learning_rate": 9.01261269814741e-06, + "loss": 1.2949, + "step": 18580 + }, + { + "epoch": 0.5455693229197252, + "grad_norm": 0.0, + "learning_rate": 9.011666389221328e-06, + "loss": 1.3369, + "step": 18581 + }, + { + "epoch": 0.5455986845968642, + "grad_norm": 0.0, + "learning_rate": 9.010720089233006e-06, + "loss": 1.3149, + "step": 18582 + }, + { + "epoch": 0.5456280462740032, + "grad_norm": 0.0, + "learning_rate": 9.009773798190994e-06, + "loss": 1.3223, + "step": 18583 + }, + { + "epoch": 0.5456574079511421, + "grad_norm": 0.0, + "learning_rate": 9.008827516103852e-06, + "loss": 1.3184, + "step": 18584 + }, + { + "epoch": 0.5456867696282812, + "grad_norm": 0.0, + "learning_rate": 9.007881242980136e-06, + "loss": 1.3291, + "step": 18585 + }, + { + "epoch": 0.5457161313054202, + "grad_norm": 0.0, + "learning_rate": 9.0069349788284e-06, + "loss": 1.2744, + "step": 18586 + }, + { + "epoch": 0.5457454929825591, + "grad_norm": 0.0, + "learning_rate": 9.00598872365721e-06, + "loss": 1.2266, + "step": 18587 + }, + { + "epoch": 0.5457748546596982, + "grad_norm": 0.0, + "learning_rate": 9.005042477475118e-06, + "loss": 1.2012, + "step": 18588 + }, + { + "epoch": 0.5458042163368372, + "grad_norm": 0.0, + "learning_rate": 9.004096240290682e-06, + "loss": 1.2764, + "step": 18589 + }, + { + "epoch": 0.5458335780139761, + "grad_norm": 0.0, + "learning_rate": 9.003150012112455e-06, + "loss": 1.4785, + "step": 18590 + }, + { + "epoch": 0.5458629396911151, + "grad_norm": 0.0, + "learning_rate": 9.002203792949002e-06, + "loss": 1.417, + "step": 18591 + }, + { + "epoch": 0.5458923013682542, + "grad_norm": 0.0, + "learning_rate": 9.001257582808873e-06, + "loss": 1.4287, + "step": 18592 + }, + { + "epoch": 0.5459216630453931, + "grad_norm": 0.0, + "learning_rate": 9.00031138170063e-06, + "loss": 1.2861, + "step": 18593 + }, + { + "epoch": 0.5459510247225321, + "grad_norm": 0.0, + "learning_rate": 8.999365189632826e-06, + "loss": 1.186, + "step": 18594 + }, + { + "epoch": 0.5459803863996712, + "grad_norm": 0.0, + "learning_rate": 8.99841900661402e-06, + "loss": 1.1943, + "step": 18595 + }, + { + "epoch": 0.5460097480768101, + "grad_norm": 0.0, + "learning_rate": 8.997472832652767e-06, + "loss": 1.2637, + "step": 18596 + }, + { + "epoch": 0.5460391097539491, + "grad_norm": 0.0, + "learning_rate": 8.99652666775762e-06, + "loss": 1.3486, + "step": 18597 + }, + { + "epoch": 0.5460684714310882, + "grad_norm": 0.0, + "learning_rate": 8.995580511937145e-06, + "loss": 1.2949, + "step": 18598 + }, + { + "epoch": 0.5460978331082271, + "grad_norm": 0.0, + "learning_rate": 8.994634365199888e-06, + "loss": 1.2617, + "step": 18599 + }, + { + "epoch": 0.5461271947853661, + "grad_norm": 0.0, + "learning_rate": 8.993688227554414e-06, + "loss": 1.4385, + "step": 18600 + }, + { + "epoch": 0.5461565564625052, + "grad_norm": 0.0, + "learning_rate": 8.992742099009269e-06, + "loss": 1.3032, + "step": 18601 + }, + { + "epoch": 0.5461859181396441, + "grad_norm": 0.0, + "learning_rate": 8.991795979573021e-06, + "loss": 1.1055, + "step": 18602 + }, + { + "epoch": 0.5462152798167831, + "grad_norm": 0.0, + "learning_rate": 8.990849869254217e-06, + "loss": 1.3232, + "step": 18603 + }, + { + "epoch": 0.5462446414939222, + "grad_norm": 0.0, + "learning_rate": 8.98990376806142e-06, + "loss": 1.2749, + "step": 18604 + }, + { + "epoch": 0.5462740031710611, + "grad_norm": 0.0, + "learning_rate": 8.98895767600318e-06, + "loss": 1.1826, + "step": 18605 + }, + { + "epoch": 0.5463033648482001, + "grad_norm": 0.0, + "learning_rate": 8.988011593088051e-06, + "loss": 1.2363, + "step": 18606 + }, + { + "epoch": 0.5463327265253392, + "grad_norm": 0.0, + "learning_rate": 8.987065519324597e-06, + "loss": 1.3232, + "step": 18607 + }, + { + "epoch": 0.5463620882024781, + "grad_norm": 0.0, + "learning_rate": 8.986119454721368e-06, + "loss": 1.3545, + "step": 18608 + }, + { + "epoch": 0.5463914498796171, + "grad_norm": 0.0, + "learning_rate": 8.98517339928692e-06, + "loss": 1.2627, + "step": 18609 + }, + { + "epoch": 0.5464208115567561, + "grad_norm": 0.0, + "learning_rate": 8.984227353029805e-06, + "loss": 1.3252, + "step": 18610 + }, + { + "epoch": 0.5464501732338951, + "grad_norm": 0.0, + "learning_rate": 8.983281315958587e-06, + "loss": 1.0991, + "step": 18611 + }, + { + "epoch": 0.5464795349110341, + "grad_norm": 0.0, + "learning_rate": 8.982335288081815e-06, + "loss": 1.2046, + "step": 18612 + }, + { + "epoch": 0.5465088965881731, + "grad_norm": 0.0, + "learning_rate": 8.981389269408048e-06, + "loss": 1.2725, + "step": 18613 + }, + { + "epoch": 0.5465382582653121, + "grad_norm": 0.0, + "learning_rate": 8.980443259945837e-06, + "loss": 1.3428, + "step": 18614 + }, + { + "epoch": 0.5465676199424511, + "grad_norm": 0.0, + "learning_rate": 8.979497259703735e-06, + "loss": 1.3848, + "step": 18615 + }, + { + "epoch": 0.5465969816195901, + "grad_norm": 0.0, + "learning_rate": 8.978551268690306e-06, + "loss": 1.2368, + "step": 18616 + }, + { + "epoch": 0.5466263432967291, + "grad_norm": 0.0, + "learning_rate": 8.977605286914097e-06, + "loss": 1.334, + "step": 18617 + }, + { + "epoch": 0.5466557049738681, + "grad_norm": 0.0, + "learning_rate": 8.976659314383667e-06, + "loss": 1.2031, + "step": 18618 + }, + { + "epoch": 0.5466850666510071, + "grad_norm": 0.0, + "learning_rate": 8.975713351107566e-06, + "loss": 1.3711, + "step": 18619 + }, + { + "epoch": 0.5467144283281461, + "grad_norm": 0.0, + "learning_rate": 8.974767397094354e-06, + "loss": 1.292, + "step": 18620 + }, + { + "epoch": 0.5467437900052851, + "grad_norm": 0.0, + "learning_rate": 8.97382145235258e-06, + "loss": 1.2881, + "step": 18621 + }, + { + "epoch": 0.5467731516824241, + "grad_norm": 0.0, + "learning_rate": 8.972875516890804e-06, + "loss": 1.376, + "step": 18622 + }, + { + "epoch": 0.5468025133595631, + "grad_norm": 0.0, + "learning_rate": 8.971929590717578e-06, + "loss": 1.3418, + "step": 18623 + }, + { + "epoch": 0.546831875036702, + "grad_norm": 0.0, + "learning_rate": 8.970983673841455e-06, + "loss": 1.249, + "step": 18624 + }, + { + "epoch": 0.5468612367138411, + "grad_norm": 0.0, + "learning_rate": 8.970037766270992e-06, + "loss": 1.3145, + "step": 18625 + }, + { + "epoch": 0.5468905983909801, + "grad_norm": 0.0, + "learning_rate": 8.969091868014737e-06, + "loss": 1.2871, + "step": 18626 + }, + { + "epoch": 0.546919960068119, + "grad_norm": 0.0, + "learning_rate": 8.968145979081251e-06, + "loss": 1.3105, + "step": 18627 + }, + { + "epoch": 0.5469493217452581, + "grad_norm": 0.0, + "learning_rate": 8.967200099479084e-06, + "loss": 1.3018, + "step": 18628 + }, + { + "epoch": 0.5469786834223971, + "grad_norm": 0.0, + "learning_rate": 8.966254229216794e-06, + "loss": 1.2568, + "step": 18629 + }, + { + "epoch": 0.547008045099536, + "grad_norm": 0.0, + "learning_rate": 8.965308368302926e-06, + "loss": 1.4297, + "step": 18630 + }, + { + "epoch": 0.5470374067766751, + "grad_norm": 0.0, + "learning_rate": 8.964362516746046e-06, + "loss": 1.3428, + "step": 18631 + }, + { + "epoch": 0.5470667684538141, + "grad_norm": 0.0, + "learning_rate": 8.963416674554696e-06, + "loss": 1.2842, + "step": 18632 + }, + { + "epoch": 0.547096130130953, + "grad_norm": 0.0, + "learning_rate": 8.962470841737438e-06, + "loss": 1.3955, + "step": 18633 + }, + { + "epoch": 0.5471254918080921, + "grad_norm": 0.0, + "learning_rate": 8.961525018302822e-06, + "loss": 1.417, + "step": 18634 + }, + { + "epoch": 0.5471548534852311, + "grad_norm": 0.0, + "learning_rate": 8.960579204259398e-06, + "loss": 1.2734, + "step": 18635 + }, + { + "epoch": 0.54718421516237, + "grad_norm": 0.0, + "learning_rate": 8.959633399615725e-06, + "loss": 1.332, + "step": 18636 + }, + { + "epoch": 0.5472135768395091, + "grad_norm": 0.0, + "learning_rate": 8.958687604380351e-06, + "loss": 1.252, + "step": 18637 + }, + { + "epoch": 0.5472429385166481, + "grad_norm": 0.0, + "learning_rate": 8.957741818561837e-06, + "loss": 1.3369, + "step": 18638 + }, + { + "epoch": 0.547272300193787, + "grad_norm": 0.0, + "learning_rate": 8.956796042168723e-06, + "loss": 1.3672, + "step": 18639 + }, + { + "epoch": 0.5473016618709261, + "grad_norm": 0.0, + "learning_rate": 8.955850275209575e-06, + "loss": 1.3232, + "step": 18640 + }, + { + "epoch": 0.5473310235480651, + "grad_norm": 0.0, + "learning_rate": 8.954904517692938e-06, + "loss": 1.3691, + "step": 18641 + }, + { + "epoch": 0.547360385225204, + "grad_norm": 0.0, + "learning_rate": 8.95395876962737e-06, + "loss": 1.377, + "step": 18642 + }, + { + "epoch": 0.5473897469023431, + "grad_norm": 0.0, + "learning_rate": 8.95301303102142e-06, + "loss": 1.335, + "step": 18643 + }, + { + "epoch": 0.5474191085794821, + "grad_norm": 0.0, + "learning_rate": 8.95206730188364e-06, + "loss": 1.2354, + "step": 18644 + }, + { + "epoch": 0.547448470256621, + "grad_norm": 0.0, + "learning_rate": 8.951121582222584e-06, + "loss": 1.2793, + "step": 18645 + }, + { + "epoch": 0.5474778319337601, + "grad_norm": 0.0, + "learning_rate": 8.950175872046802e-06, + "loss": 1.2988, + "step": 18646 + }, + { + "epoch": 0.5475071936108991, + "grad_norm": 0.0, + "learning_rate": 8.949230171364853e-06, + "loss": 1.269, + "step": 18647 + }, + { + "epoch": 0.547536555288038, + "grad_norm": 0.0, + "learning_rate": 8.948284480185282e-06, + "loss": 1.3105, + "step": 18648 + }, + { + "epoch": 0.5475659169651771, + "grad_norm": 0.0, + "learning_rate": 8.947338798516644e-06, + "loss": 1.3105, + "step": 18649 + }, + { + "epoch": 0.547595278642316, + "grad_norm": 0.0, + "learning_rate": 8.946393126367487e-06, + "loss": 1.2686, + "step": 18650 + }, + { + "epoch": 0.547624640319455, + "grad_norm": 0.0, + "learning_rate": 8.945447463746372e-06, + "loss": 1.2637, + "step": 18651 + }, + { + "epoch": 0.5476540019965941, + "grad_norm": 0.0, + "learning_rate": 8.944501810661844e-06, + "loss": 1.2046, + "step": 18652 + }, + { + "epoch": 0.547683363673733, + "grad_norm": 0.0, + "learning_rate": 8.94355616712246e-06, + "loss": 1.2637, + "step": 18653 + }, + { + "epoch": 0.547712725350872, + "grad_norm": 0.0, + "learning_rate": 8.942610533136766e-06, + "loss": 1.2998, + "step": 18654 + }, + { + "epoch": 0.5477420870280111, + "grad_norm": 0.0, + "learning_rate": 8.941664908713311e-06, + "loss": 1.3184, + "step": 18655 + }, + { + "epoch": 0.54777144870515, + "grad_norm": 0.0, + "learning_rate": 8.940719293860655e-06, + "loss": 1.3047, + "step": 18656 + }, + { + "epoch": 0.547800810382289, + "grad_norm": 0.0, + "learning_rate": 8.939773688587347e-06, + "loss": 1.2593, + "step": 18657 + }, + { + "epoch": 0.5478301720594281, + "grad_norm": 0.0, + "learning_rate": 8.938828092901936e-06, + "loss": 1.2188, + "step": 18658 + }, + { + "epoch": 0.547859533736567, + "grad_norm": 0.0, + "learning_rate": 8.937882506812972e-06, + "loss": 1.2671, + "step": 18659 + }, + { + "epoch": 0.547888895413706, + "grad_norm": 0.0, + "learning_rate": 8.936936930329011e-06, + "loss": 1.3672, + "step": 18660 + }, + { + "epoch": 0.5479182570908451, + "grad_norm": 0.0, + "learning_rate": 8.935991363458601e-06, + "loss": 1.3389, + "step": 18661 + }, + { + "epoch": 0.547947618767984, + "grad_norm": 0.0, + "learning_rate": 8.935045806210296e-06, + "loss": 1.3867, + "step": 18662 + }, + { + "epoch": 0.547976980445123, + "grad_norm": 0.0, + "learning_rate": 8.934100258592643e-06, + "loss": 1.2588, + "step": 18663 + }, + { + "epoch": 0.5480063421222621, + "grad_norm": 0.0, + "learning_rate": 8.93315472061419e-06, + "loss": 1.1299, + "step": 18664 + }, + { + "epoch": 0.548035703799401, + "grad_norm": 0.0, + "learning_rate": 8.932209192283498e-06, + "loss": 1.373, + "step": 18665 + }, + { + "epoch": 0.54806506547654, + "grad_norm": 0.0, + "learning_rate": 8.931263673609107e-06, + "loss": 1.2617, + "step": 18666 + }, + { + "epoch": 0.5480944271536791, + "grad_norm": 0.0, + "learning_rate": 8.930318164599577e-06, + "loss": 1.4346, + "step": 18667 + }, + { + "epoch": 0.548123788830818, + "grad_norm": 0.0, + "learning_rate": 8.929372665263447e-06, + "loss": 1.293, + "step": 18668 + }, + { + "epoch": 0.548153150507957, + "grad_norm": 0.0, + "learning_rate": 8.928427175609283e-06, + "loss": 1.4297, + "step": 18669 + }, + { + "epoch": 0.5481825121850961, + "grad_norm": 0.0, + "learning_rate": 8.92748169564562e-06, + "loss": 1.2734, + "step": 18670 + }, + { + "epoch": 0.548211873862235, + "grad_norm": 0.0, + "learning_rate": 8.926536225381019e-06, + "loss": 1.4165, + "step": 18671 + }, + { + "epoch": 0.548241235539374, + "grad_norm": 0.0, + "learning_rate": 8.925590764824023e-06, + "loss": 1.292, + "step": 18672 + }, + { + "epoch": 0.5482705972165131, + "grad_norm": 0.0, + "learning_rate": 8.924645313983186e-06, + "loss": 1.3594, + "step": 18673 + }, + { + "epoch": 0.548299958893652, + "grad_norm": 0.0, + "learning_rate": 8.923699872867057e-06, + "loss": 1.2773, + "step": 18674 + }, + { + "epoch": 0.548329320570791, + "grad_norm": 0.0, + "learning_rate": 8.922754441484183e-06, + "loss": 1.3115, + "step": 18675 + }, + { + "epoch": 0.54835868224793, + "grad_norm": 0.0, + "learning_rate": 8.92180901984312e-06, + "loss": 1.3965, + "step": 18676 + }, + { + "epoch": 0.548388043925069, + "grad_norm": 0.0, + "learning_rate": 8.920863607952413e-06, + "loss": 1.3247, + "step": 18677 + }, + { + "epoch": 0.548417405602208, + "grad_norm": 0.0, + "learning_rate": 8.919918205820612e-06, + "loss": 1.2412, + "step": 18678 + }, + { + "epoch": 0.548446767279347, + "grad_norm": 0.0, + "learning_rate": 8.918972813456265e-06, + "loss": 1.2471, + "step": 18679 + }, + { + "epoch": 0.548476128956486, + "grad_norm": 0.0, + "learning_rate": 8.918027430867929e-06, + "loss": 1.3232, + "step": 18680 + }, + { + "epoch": 0.548505490633625, + "grad_norm": 0.0, + "learning_rate": 8.917082058064145e-06, + "loss": 1.4131, + "step": 18681 + }, + { + "epoch": 0.548534852310764, + "grad_norm": 0.0, + "learning_rate": 8.916136695053467e-06, + "loss": 1.3589, + "step": 18682 + }, + { + "epoch": 0.548564213987903, + "grad_norm": 0.0, + "learning_rate": 8.915191341844443e-06, + "loss": 1.3232, + "step": 18683 + }, + { + "epoch": 0.548593575665042, + "grad_norm": 0.0, + "learning_rate": 8.914245998445616e-06, + "loss": 1.2529, + "step": 18684 + }, + { + "epoch": 0.548622937342181, + "grad_norm": 0.0, + "learning_rate": 8.913300664865545e-06, + "loss": 1.2012, + "step": 18685 + }, + { + "epoch": 0.54865229901932, + "grad_norm": 0.0, + "learning_rate": 8.912355341112773e-06, + "loss": 1.3672, + "step": 18686 + }, + { + "epoch": 0.548681660696459, + "grad_norm": 0.0, + "learning_rate": 8.911410027195853e-06, + "loss": 1.3101, + "step": 18687 + }, + { + "epoch": 0.548711022373598, + "grad_norm": 0.0, + "learning_rate": 8.910464723123325e-06, + "loss": 1.2095, + "step": 18688 + }, + { + "epoch": 0.548740384050737, + "grad_norm": 0.0, + "learning_rate": 8.909519428903751e-06, + "loss": 1.2725, + "step": 18689 + }, + { + "epoch": 0.548769745727876, + "grad_norm": 0.0, + "learning_rate": 8.908574144545666e-06, + "loss": 1.3311, + "step": 18690 + }, + { + "epoch": 0.548799107405015, + "grad_norm": 0.0, + "learning_rate": 8.907628870057628e-06, + "loss": 1.3223, + "step": 18691 + }, + { + "epoch": 0.548828469082154, + "grad_norm": 0.0, + "learning_rate": 8.906683605448182e-06, + "loss": 1.2852, + "step": 18692 + }, + { + "epoch": 0.548857830759293, + "grad_norm": 0.0, + "learning_rate": 8.905738350725873e-06, + "loss": 1.2109, + "step": 18693 + }, + { + "epoch": 0.5488871924364319, + "grad_norm": 0.0, + "learning_rate": 8.904793105899258e-06, + "loss": 1.2432, + "step": 18694 + }, + { + "epoch": 0.548916554113571, + "grad_norm": 0.0, + "learning_rate": 8.903847870976873e-06, + "loss": 1.293, + "step": 18695 + }, + { + "epoch": 0.54894591579071, + "grad_norm": 0.0, + "learning_rate": 8.902902645967276e-06, + "loss": 1.1719, + "step": 18696 + }, + { + "epoch": 0.5489752774678489, + "grad_norm": 0.0, + "learning_rate": 8.90195743087901e-06, + "loss": 1.3525, + "step": 18697 + }, + { + "epoch": 0.549004639144988, + "grad_norm": 0.0, + "learning_rate": 8.901012225720627e-06, + "loss": 1.3516, + "step": 18698 + }, + { + "epoch": 0.5490340008221269, + "grad_norm": 0.0, + "learning_rate": 8.900067030500665e-06, + "loss": 1.252, + "step": 18699 + }, + { + "epoch": 0.5490633624992659, + "grad_norm": 0.0, + "learning_rate": 8.899121845227685e-06, + "loss": 1.2979, + "step": 18700 + }, + { + "epoch": 0.549092724176405, + "grad_norm": 0.0, + "learning_rate": 8.898176669910226e-06, + "loss": 1.3281, + "step": 18701 + }, + { + "epoch": 0.5491220858535439, + "grad_norm": 0.0, + "learning_rate": 8.897231504556839e-06, + "loss": 1.2227, + "step": 18702 + }, + { + "epoch": 0.5491514475306829, + "grad_norm": 0.0, + "learning_rate": 8.89628634917607e-06, + "loss": 1.2441, + "step": 18703 + }, + { + "epoch": 0.549180809207822, + "grad_norm": 0.0, + "learning_rate": 8.895341203776462e-06, + "loss": 1.4561, + "step": 18704 + }, + { + "epoch": 0.5492101708849609, + "grad_norm": 0.0, + "learning_rate": 8.89439606836657e-06, + "loss": 1.1772, + "step": 18705 + }, + { + "epoch": 0.5492395325620999, + "grad_norm": 0.0, + "learning_rate": 8.893450942954936e-06, + "loss": 1.25, + "step": 18706 + }, + { + "epoch": 0.549268894239239, + "grad_norm": 0.0, + "learning_rate": 8.892505827550112e-06, + "loss": 1.4619, + "step": 18707 + }, + { + "epoch": 0.5492982559163779, + "grad_norm": 0.0, + "learning_rate": 8.891560722160635e-06, + "loss": 1.1533, + "step": 18708 + }, + { + "epoch": 0.5493276175935169, + "grad_norm": 0.0, + "learning_rate": 8.890615626795064e-06, + "loss": 1.3457, + "step": 18709 + }, + { + "epoch": 0.549356979270656, + "grad_norm": 0.0, + "learning_rate": 8.889670541461937e-06, + "loss": 1.2676, + "step": 18710 + }, + { + "epoch": 0.5493863409477949, + "grad_norm": 0.0, + "learning_rate": 8.888725466169807e-06, + "loss": 1.3193, + "step": 18711 + }, + { + "epoch": 0.5494157026249339, + "grad_norm": 0.0, + "learning_rate": 8.887780400927215e-06, + "loss": 1.3223, + "step": 18712 + }, + { + "epoch": 0.549445064302073, + "grad_norm": 0.0, + "learning_rate": 8.886835345742706e-06, + "loss": 1.5059, + "step": 18713 + }, + { + "epoch": 0.5494744259792119, + "grad_norm": 0.0, + "learning_rate": 8.885890300624836e-06, + "loss": 1.3281, + "step": 18714 + }, + { + "epoch": 0.5495037876563509, + "grad_norm": 0.0, + "learning_rate": 8.884945265582142e-06, + "loss": 1.3643, + "step": 18715 + }, + { + "epoch": 0.54953314933349, + "grad_norm": 0.0, + "learning_rate": 8.884000240623177e-06, + "loss": 1.2949, + "step": 18716 + }, + { + "epoch": 0.5495625110106289, + "grad_norm": 0.0, + "learning_rate": 8.883055225756478e-06, + "loss": 1.3096, + "step": 18717 + }, + { + "epoch": 0.5495918726877679, + "grad_norm": 0.0, + "learning_rate": 8.882110220990601e-06, + "loss": 1.248, + "step": 18718 + }, + { + "epoch": 0.549621234364907, + "grad_norm": 0.0, + "learning_rate": 8.881165226334085e-06, + "loss": 1.2529, + "step": 18719 + }, + { + "epoch": 0.5496505960420459, + "grad_norm": 0.0, + "learning_rate": 8.88022024179548e-06, + "loss": 1.2324, + "step": 18720 + }, + { + "epoch": 0.5496799577191849, + "grad_norm": 0.0, + "learning_rate": 8.879275267383329e-06, + "loss": 1.2266, + "step": 18721 + }, + { + "epoch": 0.549709319396324, + "grad_norm": 0.0, + "learning_rate": 8.878330303106183e-06, + "loss": 1.4131, + "step": 18722 + }, + { + "epoch": 0.5497386810734629, + "grad_norm": 0.0, + "learning_rate": 8.877385348972582e-06, + "loss": 1.4209, + "step": 18723 + }, + { + "epoch": 0.5497680427506019, + "grad_norm": 0.0, + "learning_rate": 8.876440404991067e-06, + "loss": 1.1753, + "step": 18724 + }, + { + "epoch": 0.5497974044277409, + "grad_norm": 0.0, + "learning_rate": 8.875495471170195e-06, + "loss": 1.332, + "step": 18725 + }, + { + "epoch": 0.5498267661048799, + "grad_norm": 0.0, + "learning_rate": 8.874550547518502e-06, + "loss": 1.3154, + "step": 18726 + }, + { + "epoch": 0.5498561277820189, + "grad_norm": 0.0, + "learning_rate": 8.87360563404454e-06, + "loss": 1.2715, + "step": 18727 + }, + { + "epoch": 0.5498854894591579, + "grad_norm": 0.0, + "learning_rate": 8.872660730756846e-06, + "loss": 1.2617, + "step": 18728 + }, + { + "epoch": 0.5499148511362969, + "grad_norm": 0.0, + "learning_rate": 8.871715837663973e-06, + "loss": 1.2275, + "step": 18729 + }, + { + "epoch": 0.5499442128134359, + "grad_norm": 0.0, + "learning_rate": 8.870770954774462e-06, + "loss": 1.2744, + "step": 18730 + }, + { + "epoch": 0.5499735744905749, + "grad_norm": 0.0, + "learning_rate": 8.86982608209686e-06, + "loss": 1.251, + "step": 18731 + }, + { + "epoch": 0.5500029361677139, + "grad_norm": 0.0, + "learning_rate": 8.86888121963971e-06, + "loss": 1.3535, + "step": 18732 + }, + { + "epoch": 0.5500322978448529, + "grad_norm": 0.0, + "learning_rate": 8.867936367411553e-06, + "loss": 1.3145, + "step": 18733 + }, + { + "epoch": 0.5500616595219919, + "grad_norm": 0.0, + "learning_rate": 8.866991525420941e-06, + "loss": 1.1143, + "step": 18734 + }, + { + "epoch": 0.5500910211991309, + "grad_norm": 0.0, + "learning_rate": 8.866046693676413e-06, + "loss": 1.2588, + "step": 18735 + }, + { + "epoch": 0.5501203828762699, + "grad_norm": 0.0, + "learning_rate": 8.865101872186518e-06, + "loss": 1.3037, + "step": 18736 + }, + { + "epoch": 0.5501497445534089, + "grad_norm": 0.0, + "learning_rate": 8.864157060959792e-06, + "loss": 1.3398, + "step": 18737 + }, + { + "epoch": 0.5501791062305479, + "grad_norm": 0.0, + "learning_rate": 8.86321226000479e-06, + "loss": 1.3369, + "step": 18738 + }, + { + "epoch": 0.5502084679076868, + "grad_norm": 0.0, + "learning_rate": 8.862267469330046e-06, + "loss": 1.4131, + "step": 18739 + }, + { + "epoch": 0.5502378295848259, + "grad_norm": 0.0, + "learning_rate": 8.861322688944113e-06, + "loss": 1.4199, + "step": 18740 + }, + { + "epoch": 0.5502671912619649, + "grad_norm": 0.0, + "learning_rate": 8.86037791885553e-06, + "loss": 1.2705, + "step": 18741 + }, + { + "epoch": 0.5502965529391038, + "grad_norm": 0.0, + "learning_rate": 8.859433159072837e-06, + "loss": 1.3086, + "step": 18742 + }, + { + "epoch": 0.5503259146162429, + "grad_norm": 0.0, + "learning_rate": 8.858488409604586e-06, + "loss": 1.2144, + "step": 18743 + }, + { + "epoch": 0.5503552762933819, + "grad_norm": 0.0, + "learning_rate": 8.857543670459316e-06, + "loss": 1.3032, + "step": 18744 + }, + { + "epoch": 0.5503846379705208, + "grad_norm": 0.0, + "learning_rate": 8.856598941645572e-06, + "loss": 1.2153, + "step": 18745 + }, + { + "epoch": 0.5504139996476599, + "grad_norm": 0.0, + "learning_rate": 8.855654223171894e-06, + "loss": 1.293, + "step": 18746 + }, + { + "epoch": 0.5504433613247989, + "grad_norm": 0.0, + "learning_rate": 8.854709515046832e-06, + "loss": 1.2432, + "step": 18747 + }, + { + "epoch": 0.5504727230019378, + "grad_norm": 0.0, + "learning_rate": 8.85376481727892e-06, + "loss": 1.3359, + "step": 18748 + }, + { + "epoch": 0.5505020846790769, + "grad_norm": 0.0, + "learning_rate": 8.85282012987671e-06, + "loss": 1.3398, + "step": 18749 + }, + { + "epoch": 0.5505314463562159, + "grad_norm": 0.0, + "learning_rate": 8.851875452848739e-06, + "loss": 1.2842, + "step": 18750 + }, + { + "epoch": 0.5505608080333548, + "grad_norm": 0.0, + "learning_rate": 8.850930786203556e-06, + "loss": 1.3145, + "step": 18751 + }, + { + "epoch": 0.5505901697104939, + "grad_norm": 0.0, + "learning_rate": 8.8499861299497e-06, + "loss": 1.2773, + "step": 18752 + }, + { + "epoch": 0.5506195313876329, + "grad_norm": 0.0, + "learning_rate": 8.84904148409571e-06, + "loss": 1.3789, + "step": 18753 + }, + { + "epoch": 0.5506488930647718, + "grad_norm": 0.0, + "learning_rate": 8.848096848650135e-06, + "loss": 1.3398, + "step": 18754 + }, + { + "epoch": 0.5506782547419109, + "grad_norm": 0.0, + "learning_rate": 8.847152223621514e-06, + "loss": 1.2871, + "step": 18755 + }, + { + "epoch": 0.5507076164190499, + "grad_norm": 0.0, + "learning_rate": 8.846207609018391e-06, + "loss": 1.333, + "step": 18756 + }, + { + "epoch": 0.5507369780961888, + "grad_norm": 0.0, + "learning_rate": 8.845263004849306e-06, + "loss": 1.248, + "step": 18757 + }, + { + "epoch": 0.5507663397733279, + "grad_norm": 0.0, + "learning_rate": 8.844318411122808e-06, + "loss": 1.2598, + "step": 18758 + }, + { + "epoch": 0.5507957014504669, + "grad_norm": 0.0, + "learning_rate": 8.843373827847431e-06, + "loss": 1.2939, + "step": 18759 + }, + { + "epoch": 0.5508250631276058, + "grad_norm": 0.0, + "learning_rate": 8.842429255031723e-06, + "loss": 1.2188, + "step": 18760 + }, + { + "epoch": 0.5508544248047449, + "grad_norm": 0.0, + "learning_rate": 8.841484692684225e-06, + "loss": 1.3486, + "step": 18761 + }, + { + "epoch": 0.5508837864818839, + "grad_norm": 0.0, + "learning_rate": 8.840540140813472e-06, + "loss": 1.3047, + "step": 18762 + }, + { + "epoch": 0.5509131481590228, + "grad_norm": 0.0, + "learning_rate": 8.839595599428017e-06, + "loss": 1.4121, + "step": 18763 + }, + { + "epoch": 0.5509425098361619, + "grad_norm": 0.0, + "learning_rate": 8.838651068536392e-06, + "loss": 1.3174, + "step": 18764 + }, + { + "epoch": 0.5509718715133008, + "grad_norm": 0.0, + "learning_rate": 8.837706548147146e-06, + "loss": 1.3789, + "step": 18765 + }, + { + "epoch": 0.5510012331904398, + "grad_norm": 0.0, + "learning_rate": 8.836762038268814e-06, + "loss": 1.4131, + "step": 18766 + }, + { + "epoch": 0.5510305948675789, + "grad_norm": 0.0, + "learning_rate": 8.835817538909945e-06, + "loss": 1.2695, + "step": 18767 + }, + { + "epoch": 0.5510599565447178, + "grad_norm": 0.0, + "learning_rate": 8.834873050079073e-06, + "loss": 1.3008, + "step": 18768 + }, + { + "epoch": 0.5510893182218568, + "grad_norm": 0.0, + "learning_rate": 8.833928571784744e-06, + "loss": 1.2485, + "step": 18769 + }, + { + "epoch": 0.5511186798989959, + "grad_norm": 0.0, + "learning_rate": 8.832984104035495e-06, + "loss": 1.1489, + "step": 18770 + }, + { + "epoch": 0.5511480415761348, + "grad_norm": 0.0, + "learning_rate": 8.832039646839872e-06, + "loss": 1.3955, + "step": 18771 + }, + { + "epoch": 0.5511774032532738, + "grad_norm": 0.0, + "learning_rate": 8.831095200206419e-06, + "loss": 1.2041, + "step": 18772 + }, + { + "epoch": 0.5512067649304129, + "grad_norm": 0.0, + "learning_rate": 8.830150764143662e-06, + "loss": 1.2021, + "step": 18773 + }, + { + "epoch": 0.5512361266075518, + "grad_norm": 0.0, + "learning_rate": 8.829206338660157e-06, + "loss": 1.3037, + "step": 18774 + }, + { + "epoch": 0.5512654882846908, + "grad_norm": 0.0, + "learning_rate": 8.828261923764437e-06, + "loss": 1.2979, + "step": 18775 + }, + { + "epoch": 0.5512948499618299, + "grad_norm": 0.0, + "learning_rate": 8.827317519465045e-06, + "loss": 1.3447, + "step": 18776 + }, + { + "epoch": 0.5513242116389688, + "grad_norm": 0.0, + "learning_rate": 8.826373125770518e-06, + "loss": 1.4111, + "step": 18777 + }, + { + "epoch": 0.5513535733161078, + "grad_norm": 0.0, + "learning_rate": 8.825428742689403e-06, + "loss": 1.2109, + "step": 18778 + }, + { + "epoch": 0.5513829349932469, + "grad_norm": 0.0, + "learning_rate": 8.824484370230234e-06, + "loss": 1.4043, + "step": 18779 + }, + { + "epoch": 0.5514122966703858, + "grad_norm": 0.0, + "learning_rate": 8.823540008401556e-06, + "loss": 1.2061, + "step": 18780 + }, + { + "epoch": 0.5514416583475248, + "grad_norm": 0.0, + "learning_rate": 8.822595657211909e-06, + "loss": 1.1699, + "step": 18781 + }, + { + "epoch": 0.5514710200246639, + "grad_norm": 0.0, + "learning_rate": 8.821651316669827e-06, + "loss": 1.2949, + "step": 18782 + }, + { + "epoch": 0.5515003817018028, + "grad_norm": 0.0, + "learning_rate": 8.820706986783856e-06, + "loss": 1.3315, + "step": 18783 + }, + { + "epoch": 0.5515297433789418, + "grad_norm": 0.0, + "learning_rate": 8.819762667562532e-06, + "loss": 1.3633, + "step": 18784 + }, + { + "epoch": 0.5515591050560809, + "grad_norm": 0.0, + "learning_rate": 8.818818359014398e-06, + "loss": 1.2812, + "step": 18785 + }, + { + "epoch": 0.5515884667332198, + "grad_norm": 0.0, + "learning_rate": 8.817874061147989e-06, + "loss": 1.2847, + "step": 18786 + }, + { + "epoch": 0.5516178284103588, + "grad_norm": 0.0, + "learning_rate": 8.816929773971852e-06, + "loss": 1.335, + "step": 18787 + }, + { + "epoch": 0.5516471900874979, + "grad_norm": 0.0, + "learning_rate": 8.81598549749452e-06, + "loss": 1.3975, + "step": 18788 + }, + { + "epoch": 0.5516765517646368, + "grad_norm": 0.0, + "learning_rate": 8.815041231724536e-06, + "loss": 1.3301, + "step": 18789 + }, + { + "epoch": 0.5517059134417758, + "grad_norm": 0.0, + "learning_rate": 8.814096976670433e-06, + "loss": 1.2607, + "step": 18790 + }, + { + "epoch": 0.5517352751189148, + "grad_norm": 0.0, + "learning_rate": 8.81315273234076e-06, + "loss": 1.3916, + "step": 18791 + }, + { + "epoch": 0.5517646367960538, + "grad_norm": 0.0, + "learning_rate": 8.81220849874405e-06, + "loss": 1.3105, + "step": 18792 + }, + { + "epoch": 0.5517939984731928, + "grad_norm": 0.0, + "learning_rate": 8.81126427588884e-06, + "loss": 1.3564, + "step": 18793 + }, + { + "epoch": 0.5518233601503317, + "grad_norm": 0.0, + "learning_rate": 8.810320063783676e-06, + "loss": 1.252, + "step": 18794 + }, + { + "epoch": 0.5518527218274708, + "grad_norm": 0.0, + "learning_rate": 8.809375862437086e-06, + "loss": 1.2666, + "step": 18795 + }, + { + "epoch": 0.5518820835046098, + "grad_norm": 0.0, + "learning_rate": 8.80843167185762e-06, + "loss": 1.2432, + "step": 18796 + }, + { + "epoch": 0.5519114451817487, + "grad_norm": 0.0, + "learning_rate": 8.807487492053809e-06, + "loss": 1.2383, + "step": 18797 + }, + { + "epoch": 0.5519408068588878, + "grad_norm": 0.0, + "learning_rate": 8.806543323034197e-06, + "loss": 1.4307, + "step": 18798 + }, + { + "epoch": 0.5519701685360268, + "grad_norm": 0.0, + "learning_rate": 8.805599164807316e-06, + "loss": 1.3535, + "step": 18799 + }, + { + "epoch": 0.5519995302131657, + "grad_norm": 0.0, + "learning_rate": 8.804655017381712e-06, + "loss": 1.3584, + "step": 18800 + }, + { + "epoch": 0.5520288918903048, + "grad_norm": 0.0, + "learning_rate": 8.803710880765917e-06, + "loss": 1.2627, + "step": 18801 + }, + { + "epoch": 0.5520582535674438, + "grad_norm": 0.0, + "learning_rate": 8.802766754968465e-06, + "loss": 1.3398, + "step": 18802 + }, + { + "epoch": 0.5520876152445827, + "grad_norm": 0.0, + "learning_rate": 8.801822639997906e-06, + "loss": 1.165, + "step": 18803 + }, + { + "epoch": 0.5521169769217218, + "grad_norm": 0.0, + "learning_rate": 8.800878535862772e-06, + "loss": 1.3516, + "step": 18804 + }, + { + "epoch": 0.5521463385988608, + "grad_norm": 0.0, + "learning_rate": 8.7999344425716e-06, + "loss": 1.3457, + "step": 18805 + }, + { + "epoch": 0.5521757002759997, + "grad_norm": 0.0, + "learning_rate": 8.798990360132923e-06, + "loss": 1.3271, + "step": 18806 + }, + { + "epoch": 0.5522050619531388, + "grad_norm": 0.0, + "learning_rate": 8.798046288555289e-06, + "loss": 1.3564, + "step": 18807 + }, + { + "epoch": 0.5522344236302777, + "grad_norm": 0.0, + "learning_rate": 8.79710222784723e-06, + "loss": 1.1309, + "step": 18808 + }, + { + "epoch": 0.5522637853074167, + "grad_norm": 0.0, + "learning_rate": 8.796158178017283e-06, + "loss": 1.2715, + "step": 18809 + }, + { + "epoch": 0.5522931469845558, + "grad_norm": 0.0, + "learning_rate": 8.795214139073987e-06, + "loss": 1.2642, + "step": 18810 + }, + { + "epoch": 0.5523225086616947, + "grad_norm": 0.0, + "learning_rate": 8.794270111025873e-06, + "loss": 1.3784, + "step": 18811 + }, + { + "epoch": 0.5523518703388337, + "grad_norm": 0.0, + "learning_rate": 8.793326093881488e-06, + "loss": 1.3418, + "step": 18812 + }, + { + "epoch": 0.5523812320159728, + "grad_norm": 0.0, + "learning_rate": 8.792382087649363e-06, + "loss": 1.3564, + "step": 18813 + }, + { + "epoch": 0.5524105936931117, + "grad_norm": 0.0, + "learning_rate": 8.791438092338039e-06, + "loss": 1.2393, + "step": 18814 + }, + { + "epoch": 0.5524399553702507, + "grad_norm": 0.0, + "learning_rate": 8.790494107956042e-06, + "loss": 1.4189, + "step": 18815 + }, + { + "epoch": 0.5524693170473898, + "grad_norm": 0.0, + "learning_rate": 8.789550134511923e-06, + "loss": 1.1924, + "step": 18816 + }, + { + "epoch": 0.5524986787245287, + "grad_norm": 0.0, + "learning_rate": 8.78860617201421e-06, + "loss": 1.2881, + "step": 18817 + }, + { + "epoch": 0.5525280404016677, + "grad_norm": 0.0, + "learning_rate": 8.787662220471444e-06, + "loss": 1.3418, + "step": 18818 + }, + { + "epoch": 0.5525574020788068, + "grad_norm": 0.0, + "learning_rate": 8.786718279892155e-06, + "loss": 1.335, + "step": 18819 + }, + { + "epoch": 0.5525867637559457, + "grad_norm": 0.0, + "learning_rate": 8.785774350284885e-06, + "loss": 1.3169, + "step": 18820 + }, + { + "epoch": 0.5526161254330847, + "grad_norm": 0.0, + "learning_rate": 8.784830431658174e-06, + "loss": 1.1777, + "step": 18821 + }, + { + "epoch": 0.5526454871102238, + "grad_norm": 0.0, + "learning_rate": 8.783886524020545e-06, + "loss": 1.2017, + "step": 18822 + }, + { + "epoch": 0.5526748487873627, + "grad_norm": 0.0, + "learning_rate": 8.782942627380546e-06, + "loss": 1.3091, + "step": 18823 + }, + { + "epoch": 0.5527042104645017, + "grad_norm": 0.0, + "learning_rate": 8.781998741746706e-06, + "loss": 1.293, + "step": 18824 + }, + { + "epoch": 0.5527335721416408, + "grad_norm": 0.0, + "learning_rate": 8.781054867127566e-06, + "loss": 1.1694, + "step": 18825 + }, + { + "epoch": 0.5527629338187797, + "grad_norm": 0.0, + "learning_rate": 8.780111003531654e-06, + "loss": 1.0874, + "step": 18826 + }, + { + "epoch": 0.5527922954959187, + "grad_norm": 0.0, + "learning_rate": 8.779167150967514e-06, + "loss": 1.2627, + "step": 18827 + }, + { + "epoch": 0.5528216571730578, + "grad_norm": 0.0, + "learning_rate": 8.778223309443678e-06, + "loss": 1.2578, + "step": 18828 + }, + { + "epoch": 0.5528510188501967, + "grad_norm": 0.0, + "learning_rate": 8.777279478968684e-06, + "loss": 1.2324, + "step": 18829 + }, + { + "epoch": 0.5528803805273357, + "grad_norm": 0.0, + "learning_rate": 8.776335659551063e-06, + "loss": 1.3184, + "step": 18830 + }, + { + "epoch": 0.5529097422044748, + "grad_norm": 0.0, + "learning_rate": 8.775391851199348e-06, + "loss": 1.2129, + "step": 18831 + }, + { + "epoch": 0.5529391038816137, + "grad_norm": 0.0, + "learning_rate": 8.774448053922082e-06, + "loss": 1.2871, + "step": 18832 + }, + { + "epoch": 0.5529684655587527, + "grad_norm": 0.0, + "learning_rate": 8.773504267727795e-06, + "loss": 1.2275, + "step": 18833 + }, + { + "epoch": 0.5529978272358917, + "grad_norm": 0.0, + "learning_rate": 8.772560492625025e-06, + "loss": 1.2231, + "step": 18834 + }, + { + "epoch": 0.5530271889130307, + "grad_norm": 0.0, + "learning_rate": 8.771616728622301e-06, + "loss": 1.2061, + "step": 18835 + }, + { + "epoch": 0.5530565505901697, + "grad_norm": 0.0, + "learning_rate": 8.770672975728164e-06, + "loss": 1.3047, + "step": 18836 + }, + { + "epoch": 0.5530859122673087, + "grad_norm": 0.0, + "learning_rate": 8.769729233951145e-06, + "loss": 1.3145, + "step": 18837 + }, + { + "epoch": 0.5531152739444477, + "grad_norm": 0.0, + "learning_rate": 8.768785503299783e-06, + "loss": 1.2759, + "step": 18838 + }, + { + "epoch": 0.5531446356215867, + "grad_norm": 0.0, + "learning_rate": 8.767841783782603e-06, + "loss": 1.4097, + "step": 18839 + }, + { + "epoch": 0.5531739972987257, + "grad_norm": 0.0, + "learning_rate": 8.766898075408152e-06, + "loss": 1.333, + "step": 18840 + }, + { + "epoch": 0.5532033589758647, + "grad_norm": 0.0, + "learning_rate": 8.765954378184954e-06, + "loss": 1.249, + "step": 18841 + }, + { + "epoch": 0.5532327206530037, + "grad_norm": 0.0, + "learning_rate": 8.765010692121547e-06, + "loss": 1.2773, + "step": 18842 + }, + { + "epoch": 0.5532620823301427, + "grad_norm": 0.0, + "learning_rate": 8.764067017226466e-06, + "loss": 1.3379, + "step": 18843 + }, + { + "epoch": 0.5532914440072817, + "grad_norm": 0.0, + "learning_rate": 8.763123353508239e-06, + "loss": 1.2661, + "step": 18844 + }, + { + "epoch": 0.5533208056844207, + "grad_norm": 0.0, + "learning_rate": 8.762179700975409e-06, + "loss": 1.2935, + "step": 18845 + }, + { + "epoch": 0.5533501673615597, + "grad_norm": 0.0, + "learning_rate": 8.761236059636502e-06, + "loss": 1.3584, + "step": 18846 + }, + { + "epoch": 0.5533795290386987, + "grad_norm": 0.0, + "learning_rate": 8.760292429500058e-06, + "loss": 1.3799, + "step": 18847 + }, + { + "epoch": 0.5534088907158377, + "grad_norm": 0.0, + "learning_rate": 8.759348810574602e-06, + "loss": 1.335, + "step": 18848 + }, + { + "epoch": 0.5534382523929767, + "grad_norm": 0.0, + "learning_rate": 8.758405202868678e-06, + "loss": 1.2471, + "step": 18849 + }, + { + "epoch": 0.5534676140701157, + "grad_norm": 0.0, + "learning_rate": 8.757461606390813e-06, + "loss": 1.3301, + "step": 18850 + }, + { + "epoch": 0.5534969757472546, + "grad_norm": 0.0, + "learning_rate": 8.756518021149536e-06, + "loss": 1.2773, + "step": 18851 + }, + { + "epoch": 0.5535263374243937, + "grad_norm": 0.0, + "learning_rate": 8.75557444715339e-06, + "loss": 1.3965, + "step": 18852 + }, + { + "epoch": 0.5535556991015327, + "grad_norm": 0.0, + "learning_rate": 8.7546308844109e-06, + "loss": 1.2695, + "step": 18853 + }, + { + "epoch": 0.5535850607786716, + "grad_norm": 0.0, + "learning_rate": 8.753687332930605e-06, + "loss": 1.2637, + "step": 18854 + }, + { + "epoch": 0.5536144224558107, + "grad_norm": 0.0, + "learning_rate": 8.75274379272103e-06, + "loss": 1.3115, + "step": 18855 + }, + { + "epoch": 0.5536437841329497, + "grad_norm": 0.0, + "learning_rate": 8.751800263790715e-06, + "loss": 1.1133, + "step": 18856 + }, + { + "epoch": 0.5536731458100886, + "grad_norm": 0.0, + "learning_rate": 8.750856746148188e-06, + "loss": 1.2104, + "step": 18857 + }, + { + "epoch": 0.5537025074872277, + "grad_norm": 0.0, + "learning_rate": 8.749913239801988e-06, + "loss": 1.293, + "step": 18858 + }, + { + "epoch": 0.5537318691643667, + "grad_norm": 0.0, + "learning_rate": 8.748969744760636e-06, + "loss": 1.2725, + "step": 18859 + }, + { + "epoch": 0.5537612308415056, + "grad_norm": 0.0, + "learning_rate": 8.748026261032675e-06, + "loss": 1.335, + "step": 18860 + }, + { + "epoch": 0.5537905925186447, + "grad_norm": 0.0, + "learning_rate": 8.747082788626634e-06, + "loss": 1.4346, + "step": 18861 + }, + { + "epoch": 0.5538199541957837, + "grad_norm": 0.0, + "learning_rate": 8.746139327551045e-06, + "loss": 1.3223, + "step": 18862 + }, + { + "epoch": 0.5538493158729226, + "grad_norm": 0.0, + "learning_rate": 8.745195877814438e-06, + "loss": 1.1758, + "step": 18863 + }, + { + "epoch": 0.5538786775500617, + "grad_norm": 0.0, + "learning_rate": 8.744252439425342e-06, + "loss": 1.3174, + "step": 18864 + }, + { + "epoch": 0.5539080392272007, + "grad_norm": 0.0, + "learning_rate": 8.743309012392298e-06, + "loss": 1.2666, + "step": 18865 + }, + { + "epoch": 0.5539374009043396, + "grad_norm": 0.0, + "learning_rate": 8.742365596723829e-06, + "loss": 1.3032, + "step": 18866 + }, + { + "epoch": 0.5539667625814787, + "grad_norm": 0.0, + "learning_rate": 8.741422192428473e-06, + "loss": 1.2637, + "step": 18867 + }, + { + "epoch": 0.5539961242586177, + "grad_norm": 0.0, + "learning_rate": 8.740478799514755e-06, + "loss": 1.3506, + "step": 18868 + }, + { + "epoch": 0.5540254859357566, + "grad_norm": 0.0, + "learning_rate": 8.739535417991212e-06, + "loss": 1.2031, + "step": 18869 + }, + { + "epoch": 0.5540548476128957, + "grad_norm": 0.0, + "learning_rate": 8.738592047866375e-06, + "loss": 1.251, + "step": 18870 + }, + { + "epoch": 0.5540842092900347, + "grad_norm": 0.0, + "learning_rate": 8.73764868914877e-06, + "loss": 1.2793, + "step": 18871 + }, + { + "epoch": 0.5541135709671736, + "grad_norm": 0.0, + "learning_rate": 8.736705341846933e-06, + "loss": 1.3379, + "step": 18872 + }, + { + "epoch": 0.5541429326443127, + "grad_norm": 0.0, + "learning_rate": 8.73576200596939e-06, + "loss": 1.291, + "step": 18873 + }, + { + "epoch": 0.5541722943214517, + "grad_norm": 0.0, + "learning_rate": 8.734818681524682e-06, + "loss": 1.2656, + "step": 18874 + }, + { + "epoch": 0.5542016559985906, + "grad_norm": 0.0, + "learning_rate": 8.733875368521324e-06, + "loss": 1.3154, + "step": 18875 + }, + { + "epoch": 0.5542310176757297, + "grad_norm": 0.0, + "learning_rate": 8.732932066967861e-06, + "loss": 1.3564, + "step": 18876 + }, + { + "epoch": 0.5542603793528686, + "grad_norm": 0.0, + "learning_rate": 8.731988776872814e-06, + "loss": 1.1567, + "step": 18877 + }, + { + "epoch": 0.5542897410300076, + "grad_norm": 0.0, + "learning_rate": 8.731045498244722e-06, + "loss": 1.2314, + "step": 18878 + }, + { + "epoch": 0.5543191027071467, + "grad_norm": 0.0, + "learning_rate": 8.73010223109211e-06, + "loss": 1.3047, + "step": 18879 + }, + { + "epoch": 0.5543484643842856, + "grad_norm": 0.0, + "learning_rate": 8.729158975423502e-06, + "loss": 1.2002, + "step": 18880 + }, + { + "epoch": 0.5543778260614246, + "grad_norm": 0.0, + "learning_rate": 8.728215731247443e-06, + "loss": 1.3877, + "step": 18881 + }, + { + "epoch": 0.5544071877385637, + "grad_norm": 0.0, + "learning_rate": 8.72727249857245e-06, + "loss": 1.2715, + "step": 18882 + }, + { + "epoch": 0.5544365494157026, + "grad_norm": 0.0, + "learning_rate": 8.72632927740706e-06, + "loss": 1.373, + "step": 18883 + }, + { + "epoch": 0.5544659110928416, + "grad_norm": 0.0, + "learning_rate": 8.725386067759796e-06, + "loss": 1.2939, + "step": 18884 + }, + { + "epoch": 0.5544952727699807, + "grad_norm": 0.0, + "learning_rate": 8.724442869639198e-06, + "loss": 1.2822, + "step": 18885 + }, + { + "epoch": 0.5545246344471196, + "grad_norm": 0.0, + "learning_rate": 8.723499683053787e-06, + "loss": 1.2993, + "step": 18886 + }, + { + "epoch": 0.5545539961242586, + "grad_norm": 0.0, + "learning_rate": 8.722556508012096e-06, + "loss": 1.2119, + "step": 18887 + }, + { + "epoch": 0.5545833578013977, + "grad_norm": 0.0, + "learning_rate": 8.721613344522652e-06, + "loss": 1.251, + "step": 18888 + }, + { + "epoch": 0.5546127194785366, + "grad_norm": 0.0, + "learning_rate": 8.720670192593988e-06, + "loss": 1.2705, + "step": 18889 + }, + { + "epoch": 0.5546420811556756, + "grad_norm": 0.0, + "learning_rate": 8.719727052234632e-06, + "loss": 1.3008, + "step": 18890 + }, + { + "epoch": 0.5546714428328147, + "grad_norm": 0.0, + "learning_rate": 8.71878392345311e-06, + "loss": 1.2637, + "step": 18891 + }, + { + "epoch": 0.5547008045099536, + "grad_norm": 0.0, + "learning_rate": 8.717840806257956e-06, + "loss": 1.4844, + "step": 18892 + }, + { + "epoch": 0.5547301661870926, + "grad_norm": 0.0, + "learning_rate": 8.71689770065769e-06, + "loss": 1.3086, + "step": 18893 + }, + { + "epoch": 0.5547595278642315, + "grad_norm": 0.0, + "learning_rate": 8.715954606660852e-06, + "loss": 1.2363, + "step": 18894 + }, + { + "epoch": 0.5547888895413706, + "grad_norm": 0.0, + "learning_rate": 8.715011524275964e-06, + "loss": 1.2783, + "step": 18895 + }, + { + "epoch": 0.5548182512185096, + "grad_norm": 0.0, + "learning_rate": 8.714068453511558e-06, + "loss": 1.2651, + "step": 18896 + }, + { + "epoch": 0.5548476128956485, + "grad_norm": 0.0, + "learning_rate": 8.713125394376155e-06, + "loss": 1.2383, + "step": 18897 + }, + { + "epoch": 0.5548769745727876, + "grad_norm": 0.0, + "learning_rate": 8.712182346878291e-06, + "loss": 1.3779, + "step": 18898 + }, + { + "epoch": 0.5549063362499266, + "grad_norm": 0.0, + "learning_rate": 8.711239311026497e-06, + "loss": 1.3926, + "step": 18899 + }, + { + "epoch": 0.5549356979270655, + "grad_norm": 0.0, + "learning_rate": 8.710296286829289e-06, + "loss": 1.3179, + "step": 18900 + }, + { + "epoch": 0.5549650596042046, + "grad_norm": 0.0, + "learning_rate": 8.709353274295204e-06, + "loss": 1.3574, + "step": 18901 + }, + { + "epoch": 0.5549944212813436, + "grad_norm": 0.0, + "learning_rate": 8.70841027343277e-06, + "loss": 1.3633, + "step": 18902 + }, + { + "epoch": 0.5550237829584825, + "grad_norm": 0.0, + "learning_rate": 8.707467284250512e-06, + "loss": 1.2139, + "step": 18903 + }, + { + "epoch": 0.5550531446356216, + "grad_norm": 0.0, + "learning_rate": 8.706524306756954e-06, + "loss": 1.2451, + "step": 18904 + }, + { + "epoch": 0.5550825063127606, + "grad_norm": 0.0, + "learning_rate": 8.705581340960633e-06, + "loss": 1.417, + "step": 18905 + }, + { + "epoch": 0.5551118679898995, + "grad_norm": 0.0, + "learning_rate": 8.704638386870068e-06, + "loss": 1.2568, + "step": 18906 + }, + { + "epoch": 0.5551412296670386, + "grad_norm": 0.0, + "learning_rate": 8.703695444493794e-06, + "loss": 1.1953, + "step": 18907 + }, + { + "epoch": 0.5551705913441776, + "grad_norm": 0.0, + "learning_rate": 8.702752513840328e-06, + "loss": 1.251, + "step": 18908 + }, + { + "epoch": 0.5551999530213165, + "grad_norm": 0.0, + "learning_rate": 8.701809594918208e-06, + "loss": 1.3389, + "step": 18909 + }, + { + "epoch": 0.5552293146984556, + "grad_norm": 0.0, + "learning_rate": 8.700866687735957e-06, + "loss": 1.3145, + "step": 18910 + }, + { + "epoch": 0.5552586763755946, + "grad_norm": 0.0, + "learning_rate": 8.699923792302099e-06, + "loss": 1.2705, + "step": 18911 + }, + { + "epoch": 0.5552880380527335, + "grad_norm": 0.0, + "learning_rate": 8.698980908625166e-06, + "loss": 1.1968, + "step": 18912 + }, + { + "epoch": 0.5553173997298726, + "grad_norm": 0.0, + "learning_rate": 8.698038036713675e-06, + "loss": 1.2329, + "step": 18913 + }, + { + "epoch": 0.5553467614070116, + "grad_norm": 0.0, + "learning_rate": 8.697095176576166e-06, + "loss": 1.2661, + "step": 18914 + }, + { + "epoch": 0.5553761230841505, + "grad_norm": 0.0, + "learning_rate": 8.696152328221156e-06, + "loss": 1.374, + "step": 18915 + }, + { + "epoch": 0.5554054847612896, + "grad_norm": 0.0, + "learning_rate": 8.695209491657176e-06, + "loss": 1.3486, + "step": 18916 + }, + { + "epoch": 0.5554348464384286, + "grad_norm": 0.0, + "learning_rate": 8.694266666892747e-06, + "loss": 1.2363, + "step": 18917 + }, + { + "epoch": 0.5554642081155675, + "grad_norm": 0.0, + "learning_rate": 8.693323853936405e-06, + "loss": 1.25, + "step": 18918 + }, + { + "epoch": 0.5554935697927066, + "grad_norm": 0.0, + "learning_rate": 8.692381052796668e-06, + "loss": 1.252, + "step": 18919 + }, + { + "epoch": 0.5555229314698455, + "grad_norm": 0.0, + "learning_rate": 8.691438263482062e-06, + "loss": 1.3662, + "step": 18920 + }, + { + "epoch": 0.5555522931469845, + "grad_norm": 0.0, + "learning_rate": 8.690495486001117e-06, + "loss": 1.4019, + "step": 18921 + }, + { + "epoch": 0.5555816548241236, + "grad_norm": 0.0, + "learning_rate": 8.689552720362352e-06, + "loss": 1.2314, + "step": 18922 + }, + { + "epoch": 0.5556110165012625, + "grad_norm": 0.0, + "learning_rate": 8.688609966574304e-06, + "loss": 1.4531, + "step": 18923 + }, + { + "epoch": 0.5556403781784015, + "grad_norm": 0.0, + "learning_rate": 8.687667224645487e-06, + "loss": 1.2642, + "step": 18924 + }, + { + "epoch": 0.5556697398555406, + "grad_norm": 0.0, + "learning_rate": 8.686724494584435e-06, + "loss": 1.1685, + "step": 18925 + }, + { + "epoch": 0.5556991015326795, + "grad_norm": 0.0, + "learning_rate": 8.685781776399665e-06, + "loss": 1.2827, + "step": 18926 + }, + { + "epoch": 0.5557284632098185, + "grad_norm": 0.0, + "learning_rate": 8.684839070099713e-06, + "loss": 1.3174, + "step": 18927 + }, + { + "epoch": 0.5557578248869576, + "grad_norm": 0.0, + "learning_rate": 8.683896375693097e-06, + "loss": 1.3867, + "step": 18928 + }, + { + "epoch": 0.5557871865640965, + "grad_norm": 0.0, + "learning_rate": 8.682953693188338e-06, + "loss": 1.2793, + "step": 18929 + }, + { + "epoch": 0.5558165482412355, + "grad_norm": 0.0, + "learning_rate": 8.682011022593971e-06, + "loss": 1.4111, + "step": 18930 + }, + { + "epoch": 0.5558459099183746, + "grad_norm": 0.0, + "learning_rate": 8.681068363918514e-06, + "loss": 1.3662, + "step": 18931 + }, + { + "epoch": 0.5558752715955135, + "grad_norm": 0.0, + "learning_rate": 8.680125717170494e-06, + "loss": 1.228, + "step": 18932 + }, + { + "epoch": 0.5559046332726525, + "grad_norm": 0.0, + "learning_rate": 8.679183082358431e-06, + "loss": 1.2002, + "step": 18933 + }, + { + "epoch": 0.5559339949497916, + "grad_norm": 0.0, + "learning_rate": 8.678240459490861e-06, + "loss": 1.3711, + "step": 18934 + }, + { + "epoch": 0.5559633566269305, + "grad_norm": 0.0, + "learning_rate": 8.677297848576296e-06, + "loss": 1.374, + "step": 18935 + }, + { + "epoch": 0.5559927183040695, + "grad_norm": 0.0, + "learning_rate": 8.676355249623268e-06, + "loss": 1.2539, + "step": 18936 + }, + { + "epoch": 0.5560220799812086, + "grad_norm": 0.0, + "learning_rate": 8.675412662640295e-06, + "loss": 1.3379, + "step": 18937 + }, + { + "epoch": 0.5560514416583475, + "grad_norm": 0.0, + "learning_rate": 8.674470087635908e-06, + "loss": 1.3311, + "step": 18938 + }, + { + "epoch": 0.5560808033354865, + "grad_norm": 0.0, + "learning_rate": 8.673527524618627e-06, + "loss": 1.3486, + "step": 18939 + }, + { + "epoch": 0.5561101650126256, + "grad_norm": 0.0, + "learning_rate": 8.672584973596976e-06, + "loss": 1.2734, + "step": 18940 + }, + { + "epoch": 0.5561395266897645, + "grad_norm": 0.0, + "learning_rate": 8.67164243457948e-06, + "loss": 1.2998, + "step": 18941 + }, + { + "epoch": 0.5561688883669035, + "grad_norm": 0.0, + "learning_rate": 8.670699907574658e-06, + "loss": 1.209, + "step": 18942 + }, + { + "epoch": 0.5561982500440426, + "grad_norm": 0.0, + "learning_rate": 8.66975739259104e-06, + "loss": 1.3164, + "step": 18943 + }, + { + "epoch": 0.5562276117211815, + "grad_norm": 0.0, + "learning_rate": 8.668814889637145e-06, + "loss": 1.3257, + "step": 18944 + }, + { + "epoch": 0.5562569733983205, + "grad_norm": 0.0, + "learning_rate": 8.6678723987215e-06, + "loss": 1.2627, + "step": 18945 + }, + { + "epoch": 0.5562863350754595, + "grad_norm": 0.0, + "learning_rate": 8.666929919852623e-06, + "loss": 1.2812, + "step": 18946 + }, + { + "epoch": 0.5563156967525985, + "grad_norm": 0.0, + "learning_rate": 8.665987453039044e-06, + "loss": 1.2202, + "step": 18947 + }, + { + "epoch": 0.5563450584297375, + "grad_norm": 0.0, + "learning_rate": 8.665044998289282e-06, + "loss": 1.1899, + "step": 18948 + }, + { + "epoch": 0.5563744201068765, + "grad_norm": 0.0, + "learning_rate": 8.664102555611857e-06, + "loss": 1.3389, + "step": 18949 + }, + { + "epoch": 0.5564037817840155, + "grad_norm": 0.0, + "learning_rate": 8.663160125015298e-06, + "loss": 1.2607, + "step": 18950 + }, + { + "epoch": 0.5564331434611545, + "grad_norm": 0.0, + "learning_rate": 8.662217706508118e-06, + "loss": 1.2378, + "step": 18951 + }, + { + "epoch": 0.5564625051382935, + "grad_norm": 0.0, + "learning_rate": 8.661275300098855e-06, + "loss": 1.3066, + "step": 18952 + }, + { + "epoch": 0.5564918668154325, + "grad_norm": 0.0, + "learning_rate": 8.660332905796014e-06, + "loss": 1.3574, + "step": 18953 + }, + { + "epoch": 0.5565212284925715, + "grad_norm": 0.0, + "learning_rate": 8.65939052360813e-06, + "loss": 1.2969, + "step": 18954 + }, + { + "epoch": 0.5565505901697105, + "grad_norm": 0.0, + "learning_rate": 8.658448153543719e-06, + "loss": 1.4678, + "step": 18955 + }, + { + "epoch": 0.5565799518468495, + "grad_norm": 0.0, + "learning_rate": 8.657505795611308e-06, + "loss": 1.2256, + "step": 18956 + }, + { + "epoch": 0.5566093135239885, + "grad_norm": 0.0, + "learning_rate": 8.656563449819411e-06, + "loss": 1.2256, + "step": 18957 + }, + { + "epoch": 0.5566386752011275, + "grad_norm": 0.0, + "learning_rate": 8.655621116176559e-06, + "loss": 1.3799, + "step": 18958 + }, + { + "epoch": 0.5566680368782665, + "grad_norm": 0.0, + "learning_rate": 8.65467879469127e-06, + "loss": 1.2656, + "step": 18959 + }, + { + "epoch": 0.5566973985554055, + "grad_norm": 0.0, + "learning_rate": 8.653736485372063e-06, + "loss": 1.2578, + "step": 18960 + }, + { + "epoch": 0.5567267602325445, + "grad_norm": 0.0, + "learning_rate": 8.652794188227463e-06, + "loss": 1.2031, + "step": 18961 + }, + { + "epoch": 0.5567561219096835, + "grad_norm": 0.0, + "learning_rate": 8.651851903265986e-06, + "loss": 1.1997, + "step": 18962 + }, + { + "epoch": 0.5567854835868224, + "grad_norm": 0.0, + "learning_rate": 8.650909630496162e-06, + "loss": 1.2134, + "step": 18963 + }, + { + "epoch": 0.5568148452639615, + "grad_norm": 0.0, + "learning_rate": 8.649967369926507e-06, + "loss": 1.3496, + "step": 18964 + }, + { + "epoch": 0.5568442069411005, + "grad_norm": 0.0, + "learning_rate": 8.649025121565543e-06, + "loss": 1.272, + "step": 18965 + }, + { + "epoch": 0.5568735686182394, + "grad_norm": 0.0, + "learning_rate": 8.648082885421787e-06, + "loss": 1.2939, + "step": 18966 + }, + { + "epoch": 0.5569029302953785, + "grad_norm": 0.0, + "learning_rate": 8.647140661503768e-06, + "loss": 1.2656, + "step": 18967 + }, + { + "epoch": 0.5569322919725175, + "grad_norm": 0.0, + "learning_rate": 8.646198449820003e-06, + "loss": 1.3896, + "step": 18968 + }, + { + "epoch": 0.5569616536496564, + "grad_norm": 0.0, + "learning_rate": 8.645256250379009e-06, + "loss": 1.2686, + "step": 18969 + }, + { + "epoch": 0.5569910153267955, + "grad_norm": 0.0, + "learning_rate": 8.644314063189313e-06, + "loss": 1.3467, + "step": 18970 + }, + { + "epoch": 0.5570203770039345, + "grad_norm": 0.0, + "learning_rate": 8.643371888259428e-06, + "loss": 1.3057, + "step": 18971 + }, + { + "epoch": 0.5570497386810734, + "grad_norm": 0.0, + "learning_rate": 8.642429725597882e-06, + "loss": 1.2773, + "step": 18972 + }, + { + "epoch": 0.5570791003582125, + "grad_norm": 0.0, + "learning_rate": 8.64148757521319e-06, + "loss": 1.2539, + "step": 18973 + }, + { + "epoch": 0.5571084620353515, + "grad_norm": 0.0, + "learning_rate": 8.640545437113874e-06, + "loss": 1.2358, + "step": 18974 + }, + { + "epoch": 0.5571378237124904, + "grad_norm": 0.0, + "learning_rate": 8.639603311308451e-06, + "loss": 1.2642, + "step": 18975 + }, + { + "epoch": 0.5571671853896295, + "grad_norm": 0.0, + "learning_rate": 8.638661197805448e-06, + "loss": 1.3691, + "step": 18976 + }, + { + "epoch": 0.5571965470667685, + "grad_norm": 0.0, + "learning_rate": 8.637719096613378e-06, + "loss": 1.2686, + "step": 18977 + }, + { + "epoch": 0.5572259087439074, + "grad_norm": 0.0, + "learning_rate": 8.636777007740767e-06, + "loss": 1.2363, + "step": 18978 + }, + { + "epoch": 0.5572552704210465, + "grad_norm": 0.0, + "learning_rate": 8.635834931196129e-06, + "loss": 1.2725, + "step": 18979 + }, + { + "epoch": 0.5572846320981855, + "grad_norm": 0.0, + "learning_rate": 8.634892866987982e-06, + "loss": 1.25, + "step": 18980 + }, + { + "epoch": 0.5573139937753244, + "grad_norm": 0.0, + "learning_rate": 8.633950815124852e-06, + "loss": 1.2256, + "step": 18981 + }, + { + "epoch": 0.5573433554524635, + "grad_norm": 0.0, + "learning_rate": 8.63300877561525e-06, + "loss": 1.1875, + "step": 18982 + }, + { + "epoch": 0.5573727171296025, + "grad_norm": 0.0, + "learning_rate": 8.632066748467704e-06, + "loss": 1.3232, + "step": 18983 + }, + { + "epoch": 0.5574020788067414, + "grad_norm": 0.0, + "learning_rate": 8.631124733690727e-06, + "loss": 1.2939, + "step": 18984 + }, + { + "epoch": 0.5574314404838805, + "grad_norm": 0.0, + "learning_rate": 8.63018273129284e-06, + "loss": 1.2129, + "step": 18985 + }, + { + "epoch": 0.5574608021610195, + "grad_norm": 0.0, + "learning_rate": 8.629240741282559e-06, + "loss": 1.3174, + "step": 18986 + }, + { + "epoch": 0.5574901638381584, + "grad_norm": 0.0, + "learning_rate": 8.62829876366841e-06, + "loss": 1.1553, + "step": 18987 + }, + { + "epoch": 0.5575195255152975, + "grad_norm": 0.0, + "learning_rate": 8.627356798458904e-06, + "loss": 1.2529, + "step": 18988 + }, + { + "epoch": 0.5575488871924364, + "grad_norm": 0.0, + "learning_rate": 8.626414845662562e-06, + "loss": 1.3359, + "step": 18989 + }, + { + "epoch": 0.5575782488695754, + "grad_norm": 0.0, + "learning_rate": 8.625472905287903e-06, + "loss": 1.2031, + "step": 18990 + }, + { + "epoch": 0.5576076105467145, + "grad_norm": 0.0, + "learning_rate": 8.624530977343442e-06, + "loss": 1.3643, + "step": 18991 + }, + { + "epoch": 0.5576369722238534, + "grad_norm": 0.0, + "learning_rate": 8.623589061837703e-06, + "loss": 1.3379, + "step": 18992 + }, + { + "epoch": 0.5576663339009924, + "grad_norm": 0.0, + "learning_rate": 8.622647158779197e-06, + "loss": 1.3125, + "step": 18993 + }, + { + "epoch": 0.5576956955781314, + "grad_norm": 0.0, + "learning_rate": 8.62170526817645e-06, + "loss": 1.3174, + "step": 18994 + }, + { + "epoch": 0.5577250572552704, + "grad_norm": 0.0, + "learning_rate": 8.62076339003797e-06, + "loss": 1.3389, + "step": 18995 + }, + { + "epoch": 0.5577544189324094, + "grad_norm": 0.0, + "learning_rate": 8.619821524372284e-06, + "loss": 1.2188, + "step": 18996 + }, + { + "epoch": 0.5577837806095484, + "grad_norm": 0.0, + "learning_rate": 8.618879671187905e-06, + "loss": 1.3252, + "step": 18997 + }, + { + "epoch": 0.5578131422866874, + "grad_norm": 0.0, + "learning_rate": 8.617937830493349e-06, + "loss": 1.3691, + "step": 18998 + }, + { + "epoch": 0.5578425039638264, + "grad_norm": 0.0, + "learning_rate": 8.616996002297138e-06, + "loss": 1.2002, + "step": 18999 + }, + { + "epoch": 0.5578718656409654, + "grad_norm": 0.0, + "learning_rate": 8.61605418660778e-06, + "loss": 1.3672, + "step": 19000 + }, + { + "epoch": 0.5579012273181044, + "grad_norm": 0.0, + "learning_rate": 8.615112383433808e-06, + "loss": 1.2446, + "step": 19001 + }, + { + "epoch": 0.5579305889952434, + "grad_norm": 0.0, + "learning_rate": 8.614170592783722e-06, + "loss": 1.2773, + "step": 19002 + }, + { + "epoch": 0.5579599506723824, + "grad_norm": 0.0, + "learning_rate": 8.613228814666049e-06, + "loss": 1.3115, + "step": 19003 + }, + { + "epoch": 0.5579893123495214, + "grad_norm": 0.0, + "learning_rate": 8.612287049089301e-06, + "loss": 1.2393, + "step": 19004 + }, + { + "epoch": 0.5580186740266604, + "grad_norm": 0.0, + "learning_rate": 8.611345296062001e-06, + "loss": 1.4033, + "step": 19005 + }, + { + "epoch": 0.5580480357037993, + "grad_norm": 0.0, + "learning_rate": 8.610403555592655e-06, + "loss": 1.1738, + "step": 19006 + }, + { + "epoch": 0.5580773973809384, + "grad_norm": 0.0, + "learning_rate": 8.60946182768979e-06, + "loss": 1.3457, + "step": 19007 + }, + { + "epoch": 0.5581067590580774, + "grad_norm": 0.0, + "learning_rate": 8.608520112361918e-06, + "loss": 1.2988, + "step": 19008 + }, + { + "epoch": 0.5581361207352163, + "grad_norm": 0.0, + "learning_rate": 8.607578409617553e-06, + "loss": 1.2793, + "step": 19009 + }, + { + "epoch": 0.5581654824123554, + "grad_norm": 0.0, + "learning_rate": 8.606636719465215e-06, + "loss": 1.1025, + "step": 19010 + }, + { + "epoch": 0.5581948440894944, + "grad_norm": 0.0, + "learning_rate": 8.605695041913414e-06, + "loss": 1.166, + "step": 19011 + }, + { + "epoch": 0.5582242057666333, + "grad_norm": 0.0, + "learning_rate": 8.604753376970674e-06, + "loss": 1.291, + "step": 19012 + }, + { + "epoch": 0.5582535674437724, + "grad_norm": 0.0, + "learning_rate": 8.603811724645503e-06, + "loss": 1.375, + "step": 19013 + }, + { + "epoch": 0.5582829291209114, + "grad_norm": 0.0, + "learning_rate": 8.602870084946424e-06, + "loss": 1.4053, + "step": 19014 + }, + { + "epoch": 0.5583122907980503, + "grad_norm": 0.0, + "learning_rate": 8.601928457881943e-06, + "loss": 1.3125, + "step": 19015 + }, + { + "epoch": 0.5583416524751894, + "grad_norm": 0.0, + "learning_rate": 8.600986843460587e-06, + "loss": 1.2559, + "step": 19016 + }, + { + "epoch": 0.5583710141523284, + "grad_norm": 0.0, + "learning_rate": 8.600045241690864e-06, + "loss": 1.3359, + "step": 19017 + }, + { + "epoch": 0.5584003758294673, + "grad_norm": 0.0, + "learning_rate": 8.59910365258129e-06, + "loss": 1.228, + "step": 19018 + }, + { + "epoch": 0.5584297375066064, + "grad_norm": 0.0, + "learning_rate": 8.59816207614038e-06, + "loss": 1.165, + "step": 19019 + }, + { + "epoch": 0.5584590991837454, + "grad_norm": 0.0, + "learning_rate": 8.597220512376646e-06, + "loss": 1.3066, + "step": 19020 + }, + { + "epoch": 0.5584884608608843, + "grad_norm": 0.0, + "learning_rate": 8.596278961298613e-06, + "loss": 1.2236, + "step": 19021 + }, + { + "epoch": 0.5585178225380234, + "grad_norm": 0.0, + "learning_rate": 8.595337422914782e-06, + "loss": 1.3271, + "step": 19022 + }, + { + "epoch": 0.5585471842151624, + "grad_norm": 0.0, + "learning_rate": 8.594395897233681e-06, + "loss": 1.2568, + "step": 19023 + }, + { + "epoch": 0.5585765458923013, + "grad_norm": 0.0, + "learning_rate": 8.59345438426381e-06, + "loss": 1.1958, + "step": 19024 + }, + { + "epoch": 0.5586059075694404, + "grad_norm": 0.0, + "learning_rate": 8.592512884013697e-06, + "loss": 1.3057, + "step": 19025 + }, + { + "epoch": 0.5586352692465794, + "grad_norm": 0.0, + "learning_rate": 8.591571396491848e-06, + "loss": 1.3936, + "step": 19026 + }, + { + "epoch": 0.5586646309237183, + "grad_norm": 0.0, + "learning_rate": 8.590629921706784e-06, + "loss": 1.1738, + "step": 19027 + }, + { + "epoch": 0.5586939926008574, + "grad_norm": 0.0, + "learning_rate": 8.589688459667012e-06, + "loss": 1.1787, + "step": 19028 + }, + { + "epoch": 0.5587233542779964, + "grad_norm": 0.0, + "learning_rate": 8.588747010381048e-06, + "loss": 1.2271, + "step": 19029 + }, + { + "epoch": 0.5587527159551353, + "grad_norm": 0.0, + "learning_rate": 8.587805573857407e-06, + "loss": 1.3525, + "step": 19030 + }, + { + "epoch": 0.5587820776322744, + "grad_norm": 0.0, + "learning_rate": 8.586864150104598e-06, + "loss": 1.1826, + "step": 19031 + }, + { + "epoch": 0.5588114393094133, + "grad_norm": 0.0, + "learning_rate": 8.585922739131143e-06, + "loss": 1.3164, + "step": 19032 + }, + { + "epoch": 0.5588408009865523, + "grad_norm": 0.0, + "learning_rate": 8.584981340945548e-06, + "loss": 1.4189, + "step": 19033 + }, + { + "epoch": 0.5588701626636914, + "grad_norm": 0.0, + "learning_rate": 8.584039955556331e-06, + "loss": 1.4414, + "step": 19034 + }, + { + "epoch": 0.5588995243408303, + "grad_norm": 0.0, + "learning_rate": 8.583098582972e-06, + "loss": 1.416, + "step": 19035 + }, + { + "epoch": 0.5589288860179693, + "grad_norm": 0.0, + "learning_rate": 8.582157223201076e-06, + "loss": 1.2305, + "step": 19036 + }, + { + "epoch": 0.5589582476951084, + "grad_norm": 0.0, + "learning_rate": 8.581215876252067e-06, + "loss": 1.3486, + "step": 19037 + }, + { + "epoch": 0.5589876093722473, + "grad_norm": 0.0, + "learning_rate": 8.580274542133484e-06, + "loss": 1.335, + "step": 19038 + }, + { + "epoch": 0.5590169710493863, + "grad_norm": 0.0, + "learning_rate": 8.579333220853842e-06, + "loss": 1.334, + "step": 19039 + }, + { + "epoch": 0.5590463327265254, + "grad_norm": 0.0, + "learning_rate": 8.578391912421652e-06, + "loss": 1.3018, + "step": 19040 + }, + { + "epoch": 0.5590756944036643, + "grad_norm": 0.0, + "learning_rate": 8.57745061684543e-06, + "loss": 1.3799, + "step": 19041 + }, + { + "epoch": 0.5591050560808033, + "grad_norm": 0.0, + "learning_rate": 8.576509334133687e-06, + "loss": 1.1426, + "step": 19042 + }, + { + "epoch": 0.5591344177579424, + "grad_norm": 0.0, + "learning_rate": 8.575568064294934e-06, + "loss": 1.3271, + "step": 19043 + }, + { + "epoch": 0.5591637794350813, + "grad_norm": 0.0, + "learning_rate": 8.574626807337682e-06, + "loss": 1.3467, + "step": 19044 + }, + { + "epoch": 0.5591931411122203, + "grad_norm": 0.0, + "learning_rate": 8.573685563270447e-06, + "loss": 1.2734, + "step": 19045 + }, + { + "epoch": 0.5592225027893594, + "grad_norm": 0.0, + "learning_rate": 8.57274433210174e-06, + "loss": 1.2344, + "step": 19046 + }, + { + "epoch": 0.5592518644664983, + "grad_norm": 0.0, + "learning_rate": 8.571803113840069e-06, + "loss": 1.1919, + "step": 19047 + }, + { + "epoch": 0.5592812261436373, + "grad_norm": 0.0, + "learning_rate": 8.57086190849395e-06, + "loss": 1.3369, + "step": 19048 + }, + { + "epoch": 0.5593105878207764, + "grad_norm": 0.0, + "learning_rate": 8.56992071607189e-06, + "loss": 1.2534, + "step": 19049 + }, + { + "epoch": 0.5593399494979153, + "grad_norm": 0.0, + "learning_rate": 8.568979536582407e-06, + "loss": 1.1909, + "step": 19050 + }, + { + "epoch": 0.5593693111750543, + "grad_norm": 0.0, + "learning_rate": 8.568038370034006e-06, + "loss": 1.2783, + "step": 19051 + }, + { + "epoch": 0.5593986728521934, + "grad_norm": 0.0, + "learning_rate": 8.567097216435202e-06, + "loss": 1.1899, + "step": 19052 + }, + { + "epoch": 0.5594280345293323, + "grad_norm": 0.0, + "learning_rate": 8.566156075794501e-06, + "loss": 1.2822, + "step": 19053 + }, + { + "epoch": 0.5594573962064713, + "grad_norm": 0.0, + "learning_rate": 8.565214948120425e-06, + "loss": 1.2563, + "step": 19054 + }, + { + "epoch": 0.5594867578836104, + "grad_norm": 0.0, + "learning_rate": 8.56427383342147e-06, + "loss": 1.2852, + "step": 19055 + }, + { + "epoch": 0.5595161195607493, + "grad_norm": 0.0, + "learning_rate": 8.56333273170616e-06, + "loss": 1.2051, + "step": 19056 + }, + { + "epoch": 0.5595454812378883, + "grad_norm": 0.0, + "learning_rate": 8.562391642982998e-06, + "loss": 1.2812, + "step": 19057 + }, + { + "epoch": 0.5595748429150273, + "grad_norm": 0.0, + "learning_rate": 8.561450567260498e-06, + "loss": 1.334, + "step": 19058 + }, + { + "epoch": 0.5596042045921663, + "grad_norm": 0.0, + "learning_rate": 8.560509504547169e-06, + "loss": 1.2998, + "step": 19059 + }, + { + "epoch": 0.5596335662693053, + "grad_norm": 0.0, + "learning_rate": 8.559568454851517e-06, + "loss": 1.167, + "step": 19060 + }, + { + "epoch": 0.5596629279464443, + "grad_norm": 0.0, + "learning_rate": 8.55862741818206e-06, + "loss": 1.2832, + "step": 19061 + }, + { + "epoch": 0.5596922896235833, + "grad_norm": 0.0, + "learning_rate": 8.557686394547303e-06, + "loss": 1.3662, + "step": 19062 + }, + { + "epoch": 0.5597216513007223, + "grad_norm": 0.0, + "learning_rate": 8.55674538395576e-06, + "loss": 1.3662, + "step": 19063 + }, + { + "epoch": 0.5597510129778613, + "grad_norm": 0.0, + "learning_rate": 8.555804386415932e-06, + "loss": 1.3086, + "step": 19064 + }, + { + "epoch": 0.5597803746550003, + "grad_norm": 0.0, + "learning_rate": 8.554863401936341e-06, + "loss": 1.3594, + "step": 19065 + }, + { + "epoch": 0.5598097363321393, + "grad_norm": 0.0, + "learning_rate": 8.55392243052549e-06, + "loss": 1.2271, + "step": 19066 + }, + { + "epoch": 0.5598390980092783, + "grad_norm": 0.0, + "learning_rate": 8.552981472191885e-06, + "loss": 1.2227, + "step": 19067 + }, + { + "epoch": 0.5598684596864173, + "grad_norm": 0.0, + "learning_rate": 8.552040526944043e-06, + "loss": 1.3418, + "step": 19068 + }, + { + "epoch": 0.5598978213635563, + "grad_norm": 0.0, + "learning_rate": 8.551099594790463e-06, + "loss": 1.374, + "step": 19069 + }, + { + "epoch": 0.5599271830406953, + "grad_norm": 0.0, + "learning_rate": 8.550158675739668e-06, + "loss": 1.2515, + "step": 19070 + }, + { + "epoch": 0.5599565447178343, + "grad_norm": 0.0, + "learning_rate": 8.549217769800155e-06, + "loss": 1.3164, + "step": 19071 + }, + { + "epoch": 0.5599859063949733, + "grad_norm": 0.0, + "learning_rate": 8.548276876980441e-06, + "loss": 1.3125, + "step": 19072 + }, + { + "epoch": 0.5600152680721123, + "grad_norm": 0.0, + "learning_rate": 8.547335997289026e-06, + "loss": 1.2705, + "step": 19073 + }, + { + "epoch": 0.5600446297492513, + "grad_norm": 0.0, + "learning_rate": 8.546395130734426e-06, + "loss": 1.2227, + "step": 19074 + }, + { + "epoch": 0.5600739914263902, + "grad_norm": 0.0, + "learning_rate": 8.545454277325147e-06, + "loss": 1.2578, + "step": 19075 + }, + { + "epoch": 0.5601033531035293, + "grad_norm": 0.0, + "learning_rate": 8.544513437069699e-06, + "loss": 1.3164, + "step": 19076 + }, + { + "epoch": 0.5601327147806683, + "grad_norm": 0.0, + "learning_rate": 8.54357260997659e-06, + "loss": 1.3135, + "step": 19077 + }, + { + "epoch": 0.5601620764578072, + "grad_norm": 0.0, + "learning_rate": 8.54263179605432e-06, + "loss": 1.0835, + "step": 19078 + }, + { + "epoch": 0.5601914381349463, + "grad_norm": 0.0, + "learning_rate": 8.541690995311412e-06, + "loss": 1.3369, + "step": 19079 + }, + { + "epoch": 0.5602207998120853, + "grad_norm": 0.0, + "learning_rate": 8.54075020775636e-06, + "loss": 1.0981, + "step": 19080 + }, + { + "epoch": 0.5602501614892242, + "grad_norm": 0.0, + "learning_rate": 8.539809433397681e-06, + "loss": 1.2969, + "step": 19081 + }, + { + "epoch": 0.5602795231663633, + "grad_norm": 0.0, + "learning_rate": 8.538868672243878e-06, + "loss": 1.1729, + "step": 19082 + }, + { + "epoch": 0.5603088848435023, + "grad_norm": 0.0, + "learning_rate": 8.53792792430346e-06, + "loss": 1.3594, + "step": 19083 + }, + { + "epoch": 0.5603382465206412, + "grad_norm": 0.0, + "learning_rate": 8.536987189584931e-06, + "loss": 1.1357, + "step": 19084 + }, + { + "epoch": 0.5603676081977803, + "grad_norm": 0.0, + "learning_rate": 8.536046468096805e-06, + "loss": 1.2949, + "step": 19085 + }, + { + "epoch": 0.5603969698749193, + "grad_norm": 0.0, + "learning_rate": 8.535105759847588e-06, + "loss": 1.3252, + "step": 19086 + }, + { + "epoch": 0.5604263315520582, + "grad_norm": 0.0, + "learning_rate": 8.534165064845781e-06, + "loss": 1.3242, + "step": 19087 + }, + { + "epoch": 0.5604556932291973, + "grad_norm": 0.0, + "learning_rate": 8.533224383099897e-06, + "loss": 1.2471, + "step": 19088 + }, + { + "epoch": 0.5604850549063363, + "grad_norm": 0.0, + "learning_rate": 8.532283714618436e-06, + "loss": 1.3218, + "step": 19089 + }, + { + "epoch": 0.5605144165834752, + "grad_norm": 0.0, + "learning_rate": 8.531343059409915e-06, + "loss": 1.3916, + "step": 19090 + }, + { + "epoch": 0.5605437782606143, + "grad_norm": 0.0, + "learning_rate": 8.530402417482832e-06, + "loss": 1.1997, + "step": 19091 + }, + { + "epoch": 0.5605731399377533, + "grad_norm": 0.0, + "learning_rate": 8.529461788845697e-06, + "loss": 1.2539, + "step": 19092 + }, + { + "epoch": 0.5606025016148922, + "grad_norm": 0.0, + "learning_rate": 8.528521173507012e-06, + "loss": 1.3486, + "step": 19093 + }, + { + "epoch": 0.5606318632920313, + "grad_norm": 0.0, + "learning_rate": 8.527580571475292e-06, + "loss": 1.3291, + "step": 19094 + }, + { + "epoch": 0.5606612249691703, + "grad_norm": 0.0, + "learning_rate": 8.526639982759036e-06, + "loss": 1.2266, + "step": 19095 + }, + { + "epoch": 0.5606905866463092, + "grad_norm": 0.0, + "learning_rate": 8.525699407366753e-06, + "loss": 1.2383, + "step": 19096 + }, + { + "epoch": 0.5607199483234482, + "grad_norm": 0.0, + "learning_rate": 8.524758845306948e-06, + "loss": 1.3711, + "step": 19097 + }, + { + "epoch": 0.5607493100005873, + "grad_norm": 0.0, + "learning_rate": 8.523818296588123e-06, + "loss": 1.2754, + "step": 19098 + }, + { + "epoch": 0.5607786716777262, + "grad_norm": 0.0, + "learning_rate": 8.522877761218792e-06, + "loss": 1.1436, + "step": 19099 + }, + { + "epoch": 0.5608080333548652, + "grad_norm": 0.0, + "learning_rate": 8.521937239207453e-06, + "loss": 1.2466, + "step": 19100 + }, + { + "epoch": 0.5608373950320042, + "grad_norm": 0.0, + "learning_rate": 8.520996730562615e-06, + "loss": 1.2588, + "step": 19101 + }, + { + "epoch": 0.5608667567091432, + "grad_norm": 0.0, + "learning_rate": 8.52005623529278e-06, + "loss": 1.2451, + "step": 19102 + }, + { + "epoch": 0.5608961183862822, + "grad_norm": 0.0, + "learning_rate": 8.519115753406459e-06, + "loss": 1.3496, + "step": 19103 + }, + { + "epoch": 0.5609254800634212, + "grad_norm": 0.0, + "learning_rate": 8.51817528491215e-06, + "loss": 1.2695, + "step": 19104 + }, + { + "epoch": 0.5609548417405602, + "grad_norm": 0.0, + "learning_rate": 8.517234829818365e-06, + "loss": 1.2568, + "step": 19105 + }, + { + "epoch": 0.5609842034176992, + "grad_norm": 0.0, + "learning_rate": 8.516294388133604e-06, + "loss": 1.3281, + "step": 19106 + }, + { + "epoch": 0.5610135650948382, + "grad_norm": 0.0, + "learning_rate": 8.515353959866372e-06, + "loss": 1.3789, + "step": 19107 + }, + { + "epoch": 0.5610429267719772, + "grad_norm": 0.0, + "learning_rate": 8.514413545025176e-06, + "loss": 1.335, + "step": 19108 + }, + { + "epoch": 0.5610722884491162, + "grad_norm": 0.0, + "learning_rate": 8.513473143618514e-06, + "loss": 1.2715, + "step": 19109 + }, + { + "epoch": 0.5611016501262552, + "grad_norm": 0.0, + "learning_rate": 8.5125327556549e-06, + "loss": 1.4141, + "step": 19110 + }, + { + "epoch": 0.5611310118033942, + "grad_norm": 0.0, + "learning_rate": 8.511592381142832e-06, + "loss": 1.3945, + "step": 19111 + }, + { + "epoch": 0.5611603734805332, + "grad_norm": 0.0, + "learning_rate": 8.510652020090817e-06, + "loss": 1.3174, + "step": 19112 + }, + { + "epoch": 0.5611897351576722, + "grad_norm": 0.0, + "learning_rate": 8.50971167250735e-06, + "loss": 1.2642, + "step": 19113 + }, + { + "epoch": 0.5612190968348112, + "grad_norm": 0.0, + "learning_rate": 8.50877133840095e-06, + "loss": 1.2314, + "step": 19114 + }, + { + "epoch": 0.5612484585119502, + "grad_norm": 0.0, + "learning_rate": 8.507831017780109e-06, + "loss": 1.2422, + "step": 19115 + }, + { + "epoch": 0.5612778201890892, + "grad_norm": 0.0, + "learning_rate": 8.506890710653337e-06, + "loss": 1.2266, + "step": 19116 + }, + { + "epoch": 0.5613071818662282, + "grad_norm": 0.0, + "learning_rate": 8.505950417029134e-06, + "loss": 1.2158, + "step": 19117 + }, + { + "epoch": 0.5613365435433672, + "grad_norm": 0.0, + "learning_rate": 8.505010136916e-06, + "loss": 1.3125, + "step": 19118 + }, + { + "epoch": 0.5613659052205062, + "grad_norm": 0.0, + "learning_rate": 8.504069870322447e-06, + "loss": 1.2988, + "step": 19119 + }, + { + "epoch": 0.5613952668976452, + "grad_norm": 0.0, + "learning_rate": 8.50312961725697e-06, + "loss": 1.3379, + "step": 19120 + }, + { + "epoch": 0.5614246285747841, + "grad_norm": 0.0, + "learning_rate": 8.502189377728078e-06, + "loss": 1.335, + "step": 19121 + }, + { + "epoch": 0.5614539902519232, + "grad_norm": 0.0, + "learning_rate": 8.501249151744267e-06, + "loss": 1.2954, + "step": 19122 + }, + { + "epoch": 0.5614833519290622, + "grad_norm": 0.0, + "learning_rate": 8.500308939314049e-06, + "loss": 1.21, + "step": 19123 + }, + { + "epoch": 0.5615127136062011, + "grad_norm": 0.0, + "learning_rate": 8.499368740445917e-06, + "loss": 1.1943, + "step": 19124 + }, + { + "epoch": 0.5615420752833402, + "grad_norm": 0.0, + "learning_rate": 8.498428555148382e-06, + "loss": 1.3164, + "step": 19125 + }, + { + "epoch": 0.5615714369604792, + "grad_norm": 0.0, + "learning_rate": 8.49748838342994e-06, + "loss": 1.2617, + "step": 19126 + }, + { + "epoch": 0.5616007986376181, + "grad_norm": 0.0, + "learning_rate": 8.496548225299092e-06, + "loss": 1.3389, + "step": 19127 + }, + { + "epoch": 0.5616301603147572, + "grad_norm": 0.0, + "learning_rate": 8.495608080764348e-06, + "loss": 1.3623, + "step": 19128 + }, + { + "epoch": 0.5616595219918962, + "grad_norm": 0.0, + "learning_rate": 8.494667949834204e-06, + "loss": 1.3584, + "step": 19129 + }, + { + "epoch": 0.5616888836690351, + "grad_norm": 0.0, + "learning_rate": 8.493727832517164e-06, + "loss": 1.3594, + "step": 19130 + }, + { + "epoch": 0.5617182453461742, + "grad_norm": 0.0, + "learning_rate": 8.492787728821726e-06, + "loss": 1.3525, + "step": 19131 + }, + { + "epoch": 0.5617476070233132, + "grad_norm": 0.0, + "learning_rate": 8.491847638756398e-06, + "loss": 1.293, + "step": 19132 + }, + { + "epoch": 0.5617769687004521, + "grad_norm": 0.0, + "learning_rate": 8.490907562329676e-06, + "loss": 1.3672, + "step": 19133 + }, + { + "epoch": 0.5618063303775912, + "grad_norm": 0.0, + "learning_rate": 8.489967499550064e-06, + "loss": 1.3428, + "step": 19134 + }, + { + "epoch": 0.5618356920547302, + "grad_norm": 0.0, + "learning_rate": 8.489027450426065e-06, + "loss": 1.3232, + "step": 19135 + }, + { + "epoch": 0.5618650537318691, + "grad_norm": 0.0, + "learning_rate": 8.488087414966174e-06, + "loss": 1.2275, + "step": 19136 + }, + { + "epoch": 0.5618944154090082, + "grad_norm": 0.0, + "learning_rate": 8.487147393178898e-06, + "loss": 1.1509, + "step": 19137 + }, + { + "epoch": 0.5619237770861472, + "grad_norm": 0.0, + "learning_rate": 8.486207385072734e-06, + "loss": 1.291, + "step": 19138 + }, + { + "epoch": 0.5619531387632861, + "grad_norm": 0.0, + "learning_rate": 8.485267390656186e-06, + "loss": 1.2349, + "step": 19139 + }, + { + "epoch": 0.5619825004404252, + "grad_norm": 0.0, + "learning_rate": 8.484327409937751e-06, + "loss": 1.3672, + "step": 19140 + }, + { + "epoch": 0.5620118621175642, + "grad_norm": 0.0, + "learning_rate": 8.483387442925934e-06, + "loss": 1.3584, + "step": 19141 + }, + { + "epoch": 0.5620412237947031, + "grad_norm": 0.0, + "learning_rate": 8.482447489629227e-06, + "loss": 1.3291, + "step": 19142 + }, + { + "epoch": 0.5620705854718422, + "grad_norm": 0.0, + "learning_rate": 8.481507550056143e-06, + "loss": 1.2759, + "step": 19143 + }, + { + "epoch": 0.5620999471489811, + "grad_norm": 0.0, + "learning_rate": 8.48056762421517e-06, + "loss": 1.2471, + "step": 19144 + }, + { + "epoch": 0.5621293088261201, + "grad_norm": 0.0, + "learning_rate": 8.479627712114817e-06, + "loss": 1.3193, + "step": 19145 + }, + { + "epoch": 0.5621586705032592, + "grad_norm": 0.0, + "learning_rate": 8.47868781376358e-06, + "loss": 1.2378, + "step": 19146 + }, + { + "epoch": 0.5621880321803981, + "grad_norm": 0.0, + "learning_rate": 8.477747929169954e-06, + "loss": 1.3467, + "step": 19147 + }, + { + "epoch": 0.5622173938575371, + "grad_norm": 0.0, + "learning_rate": 8.476808058342449e-06, + "loss": 1.3799, + "step": 19148 + }, + { + "epoch": 0.5622467555346762, + "grad_norm": 0.0, + "learning_rate": 8.475868201289554e-06, + "loss": 1.3037, + "step": 19149 + }, + { + "epoch": 0.5622761172118151, + "grad_norm": 0.0, + "learning_rate": 8.474928358019777e-06, + "loss": 1.3701, + "step": 19150 + }, + { + "epoch": 0.5623054788889541, + "grad_norm": 0.0, + "learning_rate": 8.47398852854161e-06, + "loss": 1.3242, + "step": 19151 + }, + { + "epoch": 0.5623348405660932, + "grad_norm": 0.0, + "learning_rate": 8.473048712863558e-06, + "loss": 1.2915, + "step": 19152 + }, + { + "epoch": 0.5623642022432321, + "grad_norm": 0.0, + "learning_rate": 8.472108910994116e-06, + "loss": 1.3604, + "step": 19153 + }, + { + "epoch": 0.5623935639203711, + "grad_norm": 0.0, + "learning_rate": 8.471169122941787e-06, + "loss": 1.2734, + "step": 19154 + }, + { + "epoch": 0.5624229255975102, + "grad_norm": 0.0, + "learning_rate": 8.470229348715067e-06, + "loss": 1.1494, + "step": 19155 + }, + { + "epoch": 0.5624522872746491, + "grad_norm": 0.0, + "learning_rate": 8.46928958832245e-06, + "loss": 1.2852, + "step": 19156 + }, + { + "epoch": 0.5624816489517881, + "grad_norm": 0.0, + "learning_rate": 8.468349841772446e-06, + "loss": 1.2319, + "step": 19157 + }, + { + "epoch": 0.5625110106289272, + "grad_norm": 0.0, + "learning_rate": 8.46741010907354e-06, + "loss": 1.3262, + "step": 19158 + }, + { + "epoch": 0.5625403723060661, + "grad_norm": 0.0, + "learning_rate": 8.466470390234242e-06, + "loss": 1.1572, + "step": 19159 + }, + { + "epoch": 0.5625697339832051, + "grad_norm": 0.0, + "learning_rate": 8.465530685263042e-06, + "loss": 1.2764, + "step": 19160 + }, + { + "epoch": 0.5625990956603442, + "grad_norm": 0.0, + "learning_rate": 8.464590994168445e-06, + "loss": 1.1533, + "step": 19161 + }, + { + "epoch": 0.5626284573374831, + "grad_norm": 0.0, + "learning_rate": 8.463651316958939e-06, + "loss": 1.2583, + "step": 19162 + }, + { + "epoch": 0.5626578190146221, + "grad_norm": 0.0, + "learning_rate": 8.462711653643031e-06, + "loss": 1.29, + "step": 19163 + }, + { + "epoch": 0.5626871806917612, + "grad_norm": 0.0, + "learning_rate": 8.461772004229214e-06, + "loss": 1.21, + "step": 19164 + }, + { + "epoch": 0.5627165423689001, + "grad_norm": 0.0, + "learning_rate": 8.460832368725988e-06, + "loss": 1.2422, + "step": 19165 + }, + { + "epoch": 0.5627459040460391, + "grad_norm": 0.0, + "learning_rate": 8.459892747141852e-06, + "loss": 1.2598, + "step": 19166 + }, + { + "epoch": 0.5627752657231782, + "grad_norm": 0.0, + "learning_rate": 8.458953139485292e-06, + "loss": 1.4678, + "step": 19167 + }, + { + "epoch": 0.5628046274003171, + "grad_norm": 0.0, + "learning_rate": 8.45801354576482e-06, + "loss": 1.249, + "step": 19168 + }, + { + "epoch": 0.5628339890774561, + "grad_norm": 0.0, + "learning_rate": 8.457073965988925e-06, + "loss": 1.3154, + "step": 19169 + }, + { + "epoch": 0.5628633507545951, + "grad_norm": 0.0, + "learning_rate": 8.456134400166106e-06, + "loss": 1.3252, + "step": 19170 + }, + { + "epoch": 0.5628927124317341, + "grad_norm": 0.0, + "learning_rate": 8.455194848304856e-06, + "loss": 1.2783, + "step": 19171 + }, + { + "epoch": 0.5629220741088731, + "grad_norm": 0.0, + "learning_rate": 8.454255310413677e-06, + "loss": 1.4268, + "step": 19172 + }, + { + "epoch": 0.5629514357860121, + "grad_norm": 0.0, + "learning_rate": 8.453315786501063e-06, + "loss": 1.3613, + "step": 19173 + }, + { + "epoch": 0.5629807974631511, + "grad_norm": 0.0, + "learning_rate": 8.452376276575512e-06, + "loss": 1.3887, + "step": 19174 + }, + { + "epoch": 0.5630101591402901, + "grad_norm": 0.0, + "learning_rate": 8.451436780645517e-06, + "loss": 1.1392, + "step": 19175 + }, + { + "epoch": 0.5630395208174291, + "grad_norm": 0.0, + "learning_rate": 8.450497298719574e-06, + "loss": 1.3467, + "step": 19176 + }, + { + "epoch": 0.5630688824945681, + "grad_norm": 0.0, + "learning_rate": 8.449557830806183e-06, + "loss": 1.2109, + "step": 19177 + }, + { + "epoch": 0.5630982441717071, + "grad_norm": 0.0, + "learning_rate": 8.448618376913836e-06, + "loss": 1.3076, + "step": 19178 + }, + { + "epoch": 0.5631276058488461, + "grad_norm": 0.0, + "learning_rate": 8.447678937051033e-06, + "loss": 1.2207, + "step": 19179 + }, + { + "epoch": 0.5631569675259851, + "grad_norm": 0.0, + "learning_rate": 8.446739511226263e-06, + "loss": 1.2871, + "step": 19180 + }, + { + "epoch": 0.5631863292031241, + "grad_norm": 0.0, + "learning_rate": 8.445800099448029e-06, + "loss": 1.3281, + "step": 19181 + }, + { + "epoch": 0.5632156908802631, + "grad_norm": 0.0, + "learning_rate": 8.44486070172482e-06, + "loss": 1.0903, + "step": 19182 + }, + { + "epoch": 0.5632450525574021, + "grad_norm": 0.0, + "learning_rate": 8.443921318065137e-06, + "loss": 1.3301, + "step": 19183 + }, + { + "epoch": 0.563274414234541, + "grad_norm": 0.0, + "learning_rate": 8.442981948477472e-06, + "loss": 1.3389, + "step": 19184 + }, + { + "epoch": 0.5633037759116801, + "grad_norm": 0.0, + "learning_rate": 8.442042592970317e-06, + "loss": 1.3711, + "step": 19185 + }, + { + "epoch": 0.5633331375888191, + "grad_norm": 0.0, + "learning_rate": 8.441103251552173e-06, + "loss": 1.4131, + "step": 19186 + }, + { + "epoch": 0.563362499265958, + "grad_norm": 0.0, + "learning_rate": 8.440163924231526e-06, + "loss": 1.2754, + "step": 19187 + }, + { + "epoch": 0.5633918609430971, + "grad_norm": 0.0, + "learning_rate": 8.439224611016882e-06, + "loss": 1.3691, + "step": 19188 + }, + { + "epoch": 0.5634212226202361, + "grad_norm": 0.0, + "learning_rate": 8.438285311916728e-06, + "loss": 1.2114, + "step": 19189 + }, + { + "epoch": 0.563450584297375, + "grad_norm": 0.0, + "learning_rate": 8.43734602693956e-06, + "loss": 1.3516, + "step": 19190 + }, + { + "epoch": 0.5634799459745141, + "grad_norm": 0.0, + "learning_rate": 8.43640675609387e-06, + "loss": 1.3467, + "step": 19191 + }, + { + "epoch": 0.5635093076516531, + "grad_norm": 0.0, + "learning_rate": 8.435467499388154e-06, + "loss": 1.2144, + "step": 19192 + }, + { + "epoch": 0.563538669328792, + "grad_norm": 0.0, + "learning_rate": 8.434528256830908e-06, + "loss": 1.3672, + "step": 19193 + }, + { + "epoch": 0.5635680310059311, + "grad_norm": 0.0, + "learning_rate": 8.433589028430625e-06, + "loss": 1.3848, + "step": 19194 + }, + { + "epoch": 0.5635973926830701, + "grad_norm": 0.0, + "learning_rate": 8.432649814195798e-06, + "loss": 1.1875, + "step": 19195 + }, + { + "epoch": 0.563626754360209, + "grad_norm": 0.0, + "learning_rate": 8.431710614134914e-06, + "loss": 1.3066, + "step": 19196 + }, + { + "epoch": 0.563656116037348, + "grad_norm": 0.0, + "learning_rate": 8.43077142825648e-06, + "loss": 1.2407, + "step": 19197 + }, + { + "epoch": 0.5636854777144871, + "grad_norm": 0.0, + "learning_rate": 8.429832256568978e-06, + "loss": 1.3506, + "step": 19198 + }, + { + "epoch": 0.563714839391626, + "grad_norm": 0.0, + "learning_rate": 8.428893099080906e-06, + "loss": 1.2939, + "step": 19199 + }, + { + "epoch": 0.563744201068765, + "grad_norm": 0.0, + "learning_rate": 8.427953955800752e-06, + "loss": 1.2334, + "step": 19200 + }, + { + "epoch": 0.5637735627459041, + "grad_norm": 0.0, + "learning_rate": 8.427014826737019e-06, + "loss": 1.3047, + "step": 19201 + }, + { + "epoch": 0.563802924423043, + "grad_norm": 0.0, + "learning_rate": 8.426075711898192e-06, + "loss": 1.1914, + "step": 19202 + }, + { + "epoch": 0.563832286100182, + "grad_norm": 0.0, + "learning_rate": 8.425136611292765e-06, + "loss": 1.2432, + "step": 19203 + }, + { + "epoch": 0.5638616477773211, + "grad_norm": 0.0, + "learning_rate": 8.424197524929233e-06, + "loss": 1.46, + "step": 19204 + }, + { + "epoch": 0.56389100945446, + "grad_norm": 0.0, + "learning_rate": 8.423258452816081e-06, + "loss": 1.3252, + "step": 19205 + }, + { + "epoch": 0.563920371131599, + "grad_norm": 0.0, + "learning_rate": 8.422319394961813e-06, + "loss": 1.4443, + "step": 19206 + }, + { + "epoch": 0.5639497328087381, + "grad_norm": 0.0, + "learning_rate": 8.421380351374909e-06, + "loss": 1.3164, + "step": 19207 + }, + { + "epoch": 0.563979094485877, + "grad_norm": 0.0, + "learning_rate": 8.420441322063872e-06, + "loss": 1.3438, + "step": 19208 + }, + { + "epoch": 0.564008456163016, + "grad_norm": 0.0, + "learning_rate": 8.419502307037184e-06, + "loss": 1.3115, + "step": 19209 + }, + { + "epoch": 0.564037817840155, + "grad_norm": 0.0, + "learning_rate": 8.418563306303346e-06, + "loss": 1.4062, + "step": 19210 + }, + { + "epoch": 0.564067179517294, + "grad_norm": 0.0, + "learning_rate": 8.417624319870838e-06, + "loss": 1.2666, + "step": 19211 + }, + { + "epoch": 0.564096541194433, + "grad_norm": 0.0, + "learning_rate": 8.416685347748166e-06, + "loss": 1.2017, + "step": 19212 + }, + { + "epoch": 0.564125902871572, + "grad_norm": 0.0, + "learning_rate": 8.41574638994381e-06, + "loss": 1.1875, + "step": 19213 + }, + { + "epoch": 0.564155264548711, + "grad_norm": 0.0, + "learning_rate": 8.414807446466267e-06, + "loss": 1.2646, + "step": 19214 + }, + { + "epoch": 0.56418462622585, + "grad_norm": 0.0, + "learning_rate": 8.413868517324026e-06, + "loss": 1.3633, + "step": 19215 + }, + { + "epoch": 0.564213987902989, + "grad_norm": 0.0, + "learning_rate": 8.412929602525575e-06, + "loss": 1.2383, + "step": 19216 + }, + { + "epoch": 0.564243349580128, + "grad_norm": 0.0, + "learning_rate": 8.411990702079412e-06, + "loss": 1.3711, + "step": 19217 + }, + { + "epoch": 0.564272711257267, + "grad_norm": 0.0, + "learning_rate": 8.411051815994022e-06, + "loss": 1.2539, + "step": 19218 + }, + { + "epoch": 0.564302072934406, + "grad_norm": 0.0, + "learning_rate": 8.4101129442779e-06, + "loss": 1.3252, + "step": 19219 + }, + { + "epoch": 0.564331434611545, + "grad_norm": 0.0, + "learning_rate": 8.409174086939527e-06, + "loss": 1.3398, + "step": 19220 + }, + { + "epoch": 0.564360796288684, + "grad_norm": 0.0, + "learning_rate": 8.408235243987408e-06, + "loss": 1.2598, + "step": 19221 + }, + { + "epoch": 0.564390157965823, + "grad_norm": 0.0, + "learning_rate": 8.40729641543002e-06, + "loss": 1.2109, + "step": 19222 + }, + { + "epoch": 0.564419519642962, + "grad_norm": 0.0, + "learning_rate": 8.406357601275864e-06, + "loss": 1.2949, + "step": 19223 + }, + { + "epoch": 0.564448881320101, + "grad_norm": 0.0, + "learning_rate": 8.40541880153342e-06, + "loss": 1.2666, + "step": 19224 + }, + { + "epoch": 0.56447824299724, + "grad_norm": 0.0, + "learning_rate": 8.40448001621118e-06, + "loss": 1.3027, + "step": 19225 + }, + { + "epoch": 0.564507604674379, + "grad_norm": 0.0, + "learning_rate": 8.40354124531764e-06, + "loss": 1.2402, + "step": 19226 + }, + { + "epoch": 0.564536966351518, + "grad_norm": 0.0, + "learning_rate": 8.402602488861285e-06, + "loss": 1.3555, + "step": 19227 + }, + { + "epoch": 0.564566328028657, + "grad_norm": 0.0, + "learning_rate": 8.401663746850605e-06, + "loss": 1.1826, + "step": 19228 + }, + { + "epoch": 0.564595689705796, + "grad_norm": 0.0, + "learning_rate": 8.400725019294085e-06, + "loss": 1.1909, + "step": 19229 + }, + { + "epoch": 0.564625051382935, + "grad_norm": 0.0, + "learning_rate": 8.399786306200222e-06, + "loss": 1.334, + "step": 19230 + }, + { + "epoch": 0.564654413060074, + "grad_norm": 0.0, + "learning_rate": 8.3988476075775e-06, + "loss": 1.2593, + "step": 19231 + }, + { + "epoch": 0.564683774737213, + "grad_norm": 0.0, + "learning_rate": 8.397908923434411e-06, + "loss": 1.207, + "step": 19232 + }, + { + "epoch": 0.564713136414352, + "grad_norm": 0.0, + "learning_rate": 8.396970253779437e-06, + "loss": 1.1885, + "step": 19233 + }, + { + "epoch": 0.564742498091491, + "grad_norm": 0.0, + "learning_rate": 8.39603159862108e-06, + "loss": 1.3877, + "step": 19234 + }, + { + "epoch": 0.56477185976863, + "grad_norm": 0.0, + "learning_rate": 8.395092957967816e-06, + "loss": 1.0889, + "step": 19235 + }, + { + "epoch": 0.5648012214457689, + "grad_norm": 0.0, + "learning_rate": 8.394154331828134e-06, + "loss": 1.4043, + "step": 19236 + }, + { + "epoch": 0.564830583122908, + "grad_norm": 0.0, + "learning_rate": 8.39321572021053e-06, + "loss": 1.1162, + "step": 19237 + }, + { + "epoch": 0.564859944800047, + "grad_norm": 0.0, + "learning_rate": 8.392277123123484e-06, + "loss": 1.3857, + "step": 19238 + }, + { + "epoch": 0.5648893064771859, + "grad_norm": 0.0, + "learning_rate": 8.391338540575491e-06, + "loss": 1.2842, + "step": 19239 + }, + { + "epoch": 0.564918668154325, + "grad_norm": 0.0, + "learning_rate": 8.390399972575032e-06, + "loss": 1.2939, + "step": 19240 + }, + { + "epoch": 0.564948029831464, + "grad_norm": 0.0, + "learning_rate": 8.389461419130603e-06, + "loss": 1.4277, + "step": 19241 + }, + { + "epoch": 0.5649773915086029, + "grad_norm": 0.0, + "learning_rate": 8.388522880250682e-06, + "loss": 1.252, + "step": 19242 + }, + { + "epoch": 0.565006753185742, + "grad_norm": 0.0, + "learning_rate": 8.387584355943765e-06, + "loss": 1.2695, + "step": 19243 + }, + { + "epoch": 0.565036114862881, + "grad_norm": 0.0, + "learning_rate": 8.386645846218337e-06, + "loss": 1.3228, + "step": 19244 + }, + { + "epoch": 0.5650654765400199, + "grad_norm": 0.0, + "learning_rate": 8.385707351082877e-06, + "loss": 1.3945, + "step": 19245 + }, + { + "epoch": 0.565094838217159, + "grad_norm": 0.0, + "learning_rate": 8.384768870545884e-06, + "loss": 1.3057, + "step": 19246 + }, + { + "epoch": 0.565124199894298, + "grad_norm": 0.0, + "learning_rate": 8.383830404615838e-06, + "loss": 1.3286, + "step": 19247 + }, + { + "epoch": 0.5651535615714369, + "grad_norm": 0.0, + "learning_rate": 8.38289195330123e-06, + "loss": 1.1733, + "step": 19248 + }, + { + "epoch": 0.565182923248576, + "grad_norm": 0.0, + "learning_rate": 8.38195351661054e-06, + "loss": 1.3193, + "step": 19249 + }, + { + "epoch": 0.565212284925715, + "grad_norm": 0.0, + "learning_rate": 8.381015094552263e-06, + "loss": 1.2256, + "step": 19250 + }, + { + "epoch": 0.5652416466028539, + "grad_norm": 0.0, + "learning_rate": 8.380076687134878e-06, + "loss": 1.3311, + "step": 19251 + }, + { + "epoch": 0.565271008279993, + "grad_norm": 0.0, + "learning_rate": 8.379138294366878e-06, + "loss": 1.248, + "step": 19252 + }, + { + "epoch": 0.565300369957132, + "grad_norm": 0.0, + "learning_rate": 8.378199916256745e-06, + "loss": 1.2686, + "step": 19253 + }, + { + "epoch": 0.5653297316342709, + "grad_norm": 0.0, + "learning_rate": 8.37726155281296e-06, + "loss": 1.2168, + "step": 19254 + }, + { + "epoch": 0.56535909331141, + "grad_norm": 0.0, + "learning_rate": 8.376323204044019e-06, + "loss": 1.3022, + "step": 19255 + }, + { + "epoch": 0.565388454988549, + "grad_norm": 0.0, + "learning_rate": 8.375384869958401e-06, + "loss": 1.2002, + "step": 19256 + }, + { + "epoch": 0.5654178166656879, + "grad_norm": 0.0, + "learning_rate": 8.374446550564597e-06, + "loss": 1.373, + "step": 19257 + }, + { + "epoch": 0.565447178342827, + "grad_norm": 0.0, + "learning_rate": 8.373508245871083e-06, + "loss": 1.1895, + "step": 19258 + }, + { + "epoch": 0.565476540019966, + "grad_norm": 0.0, + "learning_rate": 8.372569955886358e-06, + "loss": 1.1934, + "step": 19259 + }, + { + "epoch": 0.5655059016971049, + "grad_norm": 0.0, + "learning_rate": 8.371631680618894e-06, + "loss": 1.2954, + "step": 19260 + }, + { + "epoch": 0.565535263374244, + "grad_norm": 0.0, + "learning_rate": 8.370693420077185e-06, + "loss": 1.2959, + "step": 19261 + }, + { + "epoch": 0.5655646250513829, + "grad_norm": 0.0, + "learning_rate": 8.369755174269712e-06, + "loss": 1.1831, + "step": 19262 + }, + { + "epoch": 0.5655939867285219, + "grad_norm": 0.0, + "learning_rate": 8.36881694320496e-06, + "loss": 1.2842, + "step": 19263 + }, + { + "epoch": 0.565623348405661, + "grad_norm": 0.0, + "learning_rate": 8.367878726891417e-06, + "loss": 1.3369, + "step": 19264 + }, + { + "epoch": 0.5656527100827999, + "grad_norm": 0.0, + "learning_rate": 8.36694052533756e-06, + "loss": 1.3857, + "step": 19265 + }, + { + "epoch": 0.5656820717599389, + "grad_norm": 0.0, + "learning_rate": 8.366002338551879e-06, + "loss": 1.3086, + "step": 19266 + }, + { + "epoch": 0.565711433437078, + "grad_norm": 0.0, + "learning_rate": 8.365064166542858e-06, + "loss": 1.2432, + "step": 19267 + }, + { + "epoch": 0.5657407951142169, + "grad_norm": 0.0, + "learning_rate": 8.364126009318983e-06, + "loss": 1.2764, + "step": 19268 + }, + { + "epoch": 0.5657701567913559, + "grad_norm": 0.0, + "learning_rate": 8.36318786688873e-06, + "loss": 1.3203, + "step": 19269 + }, + { + "epoch": 0.565799518468495, + "grad_norm": 0.0, + "learning_rate": 8.362249739260593e-06, + "loss": 1.2681, + "step": 19270 + }, + { + "epoch": 0.5658288801456339, + "grad_norm": 0.0, + "learning_rate": 8.361311626443048e-06, + "loss": 1.3125, + "step": 19271 + }, + { + "epoch": 0.5658582418227729, + "grad_norm": 0.0, + "learning_rate": 8.360373528444586e-06, + "loss": 1.2627, + "step": 19272 + }, + { + "epoch": 0.565887603499912, + "grad_norm": 0.0, + "learning_rate": 8.359435445273684e-06, + "loss": 1.4092, + "step": 19273 + }, + { + "epoch": 0.5659169651770509, + "grad_norm": 0.0, + "learning_rate": 8.358497376938823e-06, + "loss": 1.2666, + "step": 19274 + }, + { + "epoch": 0.5659463268541899, + "grad_norm": 0.0, + "learning_rate": 8.357559323448497e-06, + "loss": 1.3594, + "step": 19275 + }, + { + "epoch": 0.565975688531329, + "grad_norm": 0.0, + "learning_rate": 8.356621284811177e-06, + "loss": 1.207, + "step": 19276 + }, + { + "epoch": 0.5660050502084679, + "grad_norm": 0.0, + "learning_rate": 8.355683261035358e-06, + "loss": 1.2988, + "step": 19277 + }, + { + "epoch": 0.5660344118856069, + "grad_norm": 0.0, + "learning_rate": 8.354745252129509e-06, + "loss": 1.3345, + "step": 19278 + }, + { + "epoch": 0.566063773562746, + "grad_norm": 0.0, + "learning_rate": 8.353807258102125e-06, + "loss": 1.2734, + "step": 19279 + }, + { + "epoch": 0.5660931352398849, + "grad_norm": 0.0, + "learning_rate": 8.352869278961683e-06, + "loss": 1.4004, + "step": 19280 + }, + { + "epoch": 0.5661224969170239, + "grad_norm": 0.0, + "learning_rate": 8.351931314716667e-06, + "loss": 1.4141, + "step": 19281 + }, + { + "epoch": 0.566151858594163, + "grad_norm": 0.0, + "learning_rate": 8.350993365375554e-06, + "loss": 1.2998, + "step": 19282 + }, + { + "epoch": 0.5661812202713019, + "grad_norm": 0.0, + "learning_rate": 8.350055430946835e-06, + "loss": 1.2334, + "step": 19283 + }, + { + "epoch": 0.5662105819484409, + "grad_norm": 0.0, + "learning_rate": 8.349117511438987e-06, + "loss": 1.375, + "step": 19284 + }, + { + "epoch": 0.56623994362558, + "grad_norm": 0.0, + "learning_rate": 8.348179606860489e-06, + "loss": 1.1499, + "step": 19285 + }, + { + "epoch": 0.5662693053027189, + "grad_norm": 0.0, + "learning_rate": 8.347241717219828e-06, + "loss": 1.2148, + "step": 19286 + }, + { + "epoch": 0.5662986669798579, + "grad_norm": 0.0, + "learning_rate": 8.346303842525482e-06, + "loss": 1.2246, + "step": 19287 + }, + { + "epoch": 0.5663280286569969, + "grad_norm": 0.0, + "learning_rate": 8.345365982785936e-06, + "loss": 1.2617, + "step": 19288 + }, + { + "epoch": 0.5663573903341359, + "grad_norm": 0.0, + "learning_rate": 8.344428138009664e-06, + "loss": 1.3975, + "step": 19289 + }, + { + "epoch": 0.5663867520112749, + "grad_norm": 0.0, + "learning_rate": 8.343490308205159e-06, + "loss": 1.2578, + "step": 19290 + }, + { + "epoch": 0.5664161136884139, + "grad_norm": 0.0, + "learning_rate": 8.34255249338089e-06, + "loss": 1.3799, + "step": 19291 + }, + { + "epoch": 0.5664454753655529, + "grad_norm": 0.0, + "learning_rate": 8.341614693545347e-06, + "loss": 1.2637, + "step": 19292 + }, + { + "epoch": 0.5664748370426919, + "grad_norm": 0.0, + "learning_rate": 8.340676908707008e-06, + "loss": 1.416, + "step": 19293 + }, + { + "epoch": 0.5665041987198309, + "grad_norm": 0.0, + "learning_rate": 8.339739138874347e-06, + "loss": 1.4512, + "step": 19294 + }, + { + "epoch": 0.5665335603969699, + "grad_norm": 0.0, + "learning_rate": 8.338801384055856e-06, + "loss": 1.3276, + "step": 19295 + }, + { + "epoch": 0.5665629220741089, + "grad_norm": 0.0, + "learning_rate": 8.337863644260006e-06, + "loss": 1.1768, + "step": 19296 + }, + { + "epoch": 0.5665922837512478, + "grad_norm": 0.0, + "learning_rate": 8.336925919495284e-06, + "loss": 1.3086, + "step": 19297 + }, + { + "epoch": 0.5666216454283869, + "grad_norm": 0.0, + "learning_rate": 8.335988209770162e-06, + "loss": 1.3022, + "step": 19298 + }, + { + "epoch": 0.5666510071055259, + "grad_norm": 0.0, + "learning_rate": 8.33505051509313e-06, + "loss": 1.3208, + "step": 19299 + }, + { + "epoch": 0.5666803687826648, + "grad_norm": 0.0, + "learning_rate": 8.334112835472658e-06, + "loss": 1.3042, + "step": 19300 + }, + { + "epoch": 0.5667097304598039, + "grad_norm": 0.0, + "learning_rate": 8.333175170917234e-06, + "loss": 1.1992, + "step": 19301 + }, + { + "epoch": 0.5667390921369428, + "grad_norm": 0.0, + "learning_rate": 8.33223752143533e-06, + "loss": 1.2793, + "step": 19302 + }, + { + "epoch": 0.5667684538140818, + "grad_norm": 0.0, + "learning_rate": 8.331299887035432e-06, + "loss": 1.2881, + "step": 19303 + }, + { + "epoch": 0.5667978154912209, + "grad_norm": 0.0, + "learning_rate": 8.330362267726017e-06, + "loss": 1.3203, + "step": 19304 + }, + { + "epoch": 0.5668271771683598, + "grad_norm": 0.0, + "learning_rate": 8.329424663515562e-06, + "loss": 1.292, + "step": 19305 + }, + { + "epoch": 0.5668565388454988, + "grad_norm": 0.0, + "learning_rate": 8.32848707441255e-06, + "loss": 1.2939, + "step": 19306 + }, + { + "epoch": 0.5668859005226379, + "grad_norm": 0.0, + "learning_rate": 8.327549500425453e-06, + "loss": 1.291, + "step": 19307 + }, + { + "epoch": 0.5669152621997768, + "grad_norm": 0.0, + "learning_rate": 8.326611941562759e-06, + "loss": 1.2549, + "step": 19308 + }, + { + "epoch": 0.5669446238769158, + "grad_norm": 0.0, + "learning_rate": 8.325674397832938e-06, + "loss": 1.3379, + "step": 19309 + }, + { + "epoch": 0.5669739855540549, + "grad_norm": 0.0, + "learning_rate": 8.324736869244476e-06, + "loss": 1.2695, + "step": 19310 + }, + { + "epoch": 0.5670033472311938, + "grad_norm": 0.0, + "learning_rate": 8.323799355805841e-06, + "loss": 1.2549, + "step": 19311 + }, + { + "epoch": 0.5670327089083328, + "grad_norm": 0.0, + "learning_rate": 8.322861857525527e-06, + "loss": 1.3867, + "step": 19312 + }, + { + "epoch": 0.5670620705854719, + "grad_norm": 0.0, + "learning_rate": 8.321924374411998e-06, + "loss": 1.2314, + "step": 19313 + }, + { + "epoch": 0.5670914322626108, + "grad_norm": 0.0, + "learning_rate": 8.320986906473734e-06, + "loss": 1.3018, + "step": 19314 + }, + { + "epoch": 0.5671207939397498, + "grad_norm": 0.0, + "learning_rate": 8.32004945371922e-06, + "loss": 1.2573, + "step": 19315 + }, + { + "epoch": 0.5671501556168889, + "grad_norm": 0.0, + "learning_rate": 8.319112016156926e-06, + "loss": 1.3691, + "step": 19316 + }, + { + "epoch": 0.5671795172940278, + "grad_norm": 0.0, + "learning_rate": 8.318174593795334e-06, + "loss": 1.2695, + "step": 19317 + }, + { + "epoch": 0.5672088789711668, + "grad_norm": 0.0, + "learning_rate": 8.317237186642916e-06, + "loss": 1.3037, + "step": 19318 + }, + { + "epoch": 0.5672382406483059, + "grad_norm": 0.0, + "learning_rate": 8.316299794708159e-06, + "loss": 1.3955, + "step": 19319 + }, + { + "epoch": 0.5672676023254448, + "grad_norm": 0.0, + "learning_rate": 8.315362417999529e-06, + "loss": 1.2295, + "step": 19320 + }, + { + "epoch": 0.5672969640025838, + "grad_norm": 0.0, + "learning_rate": 8.314425056525512e-06, + "loss": 1.1611, + "step": 19321 + }, + { + "epoch": 0.5673263256797229, + "grad_norm": 0.0, + "learning_rate": 8.313487710294581e-06, + "loss": 1.3867, + "step": 19322 + }, + { + "epoch": 0.5673556873568618, + "grad_norm": 0.0, + "learning_rate": 8.312550379315208e-06, + "loss": 1.2705, + "step": 19323 + }, + { + "epoch": 0.5673850490340008, + "grad_norm": 0.0, + "learning_rate": 8.31161306359588e-06, + "loss": 1.3008, + "step": 19324 + }, + { + "epoch": 0.5674144107111398, + "grad_norm": 0.0, + "learning_rate": 8.310675763145062e-06, + "loss": 1.3066, + "step": 19325 + }, + { + "epoch": 0.5674437723882788, + "grad_norm": 0.0, + "learning_rate": 8.309738477971241e-06, + "loss": 1.3232, + "step": 19326 + }, + { + "epoch": 0.5674731340654178, + "grad_norm": 0.0, + "learning_rate": 8.308801208082881e-06, + "loss": 1.127, + "step": 19327 + }, + { + "epoch": 0.5675024957425568, + "grad_norm": 0.0, + "learning_rate": 8.307863953488471e-06, + "loss": 1.335, + "step": 19328 + }, + { + "epoch": 0.5675318574196958, + "grad_norm": 0.0, + "learning_rate": 8.306926714196478e-06, + "loss": 1.333, + "step": 19329 + }, + { + "epoch": 0.5675612190968348, + "grad_norm": 0.0, + "learning_rate": 8.305989490215383e-06, + "loss": 1.3135, + "step": 19330 + }, + { + "epoch": 0.5675905807739738, + "grad_norm": 0.0, + "learning_rate": 8.305052281553653e-06, + "loss": 1.2744, + "step": 19331 + }, + { + "epoch": 0.5676199424511128, + "grad_norm": 0.0, + "learning_rate": 8.304115088219776e-06, + "loss": 1.3779, + "step": 19332 + }, + { + "epoch": 0.5676493041282518, + "grad_norm": 0.0, + "learning_rate": 8.303177910222217e-06, + "loss": 1.3213, + "step": 19333 + }, + { + "epoch": 0.5676786658053908, + "grad_norm": 0.0, + "learning_rate": 8.302240747569456e-06, + "loss": 1.2295, + "step": 19334 + }, + { + "epoch": 0.5677080274825298, + "grad_norm": 0.0, + "learning_rate": 8.301303600269967e-06, + "loss": 1.3721, + "step": 19335 + }, + { + "epoch": 0.5677373891596688, + "grad_norm": 0.0, + "learning_rate": 8.30036646833222e-06, + "loss": 1.2568, + "step": 19336 + }, + { + "epoch": 0.5677667508368078, + "grad_norm": 0.0, + "learning_rate": 8.299429351764702e-06, + "loss": 1.3154, + "step": 19337 + }, + { + "epoch": 0.5677961125139468, + "grad_norm": 0.0, + "learning_rate": 8.298492250575873e-06, + "loss": 1.2271, + "step": 19338 + }, + { + "epoch": 0.5678254741910858, + "grad_norm": 0.0, + "learning_rate": 8.297555164774219e-06, + "loss": 1.2021, + "step": 19339 + }, + { + "epoch": 0.5678548358682248, + "grad_norm": 0.0, + "learning_rate": 8.296618094368207e-06, + "loss": 1.2578, + "step": 19340 + }, + { + "epoch": 0.5678841975453638, + "grad_norm": 0.0, + "learning_rate": 8.295681039366318e-06, + "loss": 1.3408, + "step": 19341 + }, + { + "epoch": 0.5679135592225028, + "grad_norm": 0.0, + "learning_rate": 8.29474399977702e-06, + "loss": 1.3184, + "step": 19342 + }, + { + "epoch": 0.5679429208996418, + "grad_norm": 0.0, + "learning_rate": 8.293806975608783e-06, + "loss": 1.3857, + "step": 19343 + }, + { + "epoch": 0.5679722825767808, + "grad_norm": 0.0, + "learning_rate": 8.292869966870091e-06, + "loss": 1.2441, + "step": 19344 + }, + { + "epoch": 0.5680016442539197, + "grad_norm": 0.0, + "learning_rate": 8.291932973569414e-06, + "loss": 1.2344, + "step": 19345 + }, + { + "epoch": 0.5680310059310588, + "grad_norm": 0.0, + "learning_rate": 8.290995995715223e-06, + "loss": 1.2891, + "step": 19346 + }, + { + "epoch": 0.5680603676081978, + "grad_norm": 0.0, + "learning_rate": 8.290059033315991e-06, + "loss": 1.4082, + "step": 19347 + }, + { + "epoch": 0.5680897292853367, + "grad_norm": 0.0, + "learning_rate": 8.289122086380196e-06, + "loss": 1.1587, + "step": 19348 + }, + { + "epoch": 0.5681190909624758, + "grad_norm": 0.0, + "learning_rate": 8.288185154916307e-06, + "loss": 1.2979, + "step": 19349 + }, + { + "epoch": 0.5681484526396148, + "grad_norm": 0.0, + "learning_rate": 8.287248238932801e-06, + "loss": 1.3281, + "step": 19350 + }, + { + "epoch": 0.5681778143167537, + "grad_norm": 0.0, + "learning_rate": 8.286311338438142e-06, + "loss": 1.3633, + "step": 19351 + }, + { + "epoch": 0.5682071759938928, + "grad_norm": 0.0, + "learning_rate": 8.285374453440814e-06, + "loss": 1.3154, + "step": 19352 + }, + { + "epoch": 0.5682365376710318, + "grad_norm": 0.0, + "learning_rate": 8.284437583949284e-06, + "loss": 1.1562, + "step": 19353 + }, + { + "epoch": 0.5682658993481707, + "grad_norm": 0.0, + "learning_rate": 8.28350072997202e-06, + "loss": 1.2334, + "step": 19354 + }, + { + "epoch": 0.5682952610253098, + "grad_norm": 0.0, + "learning_rate": 8.282563891517501e-06, + "loss": 1.3076, + "step": 19355 + }, + { + "epoch": 0.5683246227024488, + "grad_norm": 0.0, + "learning_rate": 8.281627068594193e-06, + "loss": 1.3506, + "step": 19356 + }, + { + "epoch": 0.5683539843795877, + "grad_norm": 0.0, + "learning_rate": 8.280690261210576e-06, + "loss": 1.3145, + "step": 19357 + }, + { + "epoch": 0.5683833460567268, + "grad_norm": 0.0, + "learning_rate": 8.279753469375117e-06, + "loss": 1.1875, + "step": 19358 + }, + { + "epoch": 0.5684127077338658, + "grad_norm": 0.0, + "learning_rate": 8.278816693096288e-06, + "loss": 1.2695, + "step": 19359 + }, + { + "epoch": 0.5684420694110047, + "grad_norm": 0.0, + "learning_rate": 8.277879932382555e-06, + "loss": 1.2236, + "step": 19360 + }, + { + "epoch": 0.5684714310881438, + "grad_norm": 0.0, + "learning_rate": 8.276943187242401e-06, + "loss": 1.209, + "step": 19361 + }, + { + "epoch": 0.5685007927652828, + "grad_norm": 0.0, + "learning_rate": 8.276006457684293e-06, + "loss": 1.4297, + "step": 19362 + }, + { + "epoch": 0.5685301544424217, + "grad_norm": 0.0, + "learning_rate": 8.275069743716693e-06, + "loss": 1.2539, + "step": 19363 + }, + { + "epoch": 0.5685595161195608, + "grad_norm": 0.0, + "learning_rate": 8.274133045348084e-06, + "loss": 1.2344, + "step": 19364 + }, + { + "epoch": 0.5685888777966998, + "grad_norm": 0.0, + "learning_rate": 8.273196362586929e-06, + "loss": 1.2295, + "step": 19365 + }, + { + "epoch": 0.5686182394738387, + "grad_norm": 0.0, + "learning_rate": 8.272259695441705e-06, + "loss": 1.2856, + "step": 19366 + }, + { + "epoch": 0.5686476011509778, + "grad_norm": 0.0, + "learning_rate": 8.271323043920874e-06, + "loss": 1.2842, + "step": 19367 + }, + { + "epoch": 0.5686769628281167, + "grad_norm": 0.0, + "learning_rate": 8.270386408032916e-06, + "loss": 1.1763, + "step": 19368 + }, + { + "epoch": 0.5687063245052557, + "grad_norm": 0.0, + "learning_rate": 8.269449787786294e-06, + "loss": 1.3804, + "step": 19369 + }, + { + "epoch": 0.5687356861823948, + "grad_norm": 0.0, + "learning_rate": 8.268513183189482e-06, + "loss": 1.2881, + "step": 19370 + }, + { + "epoch": 0.5687650478595337, + "grad_norm": 0.0, + "learning_rate": 8.26757659425095e-06, + "loss": 1.1846, + "step": 19371 + }, + { + "epoch": 0.5687944095366727, + "grad_norm": 0.0, + "learning_rate": 8.266640020979162e-06, + "loss": 1.2217, + "step": 19372 + }, + { + "epoch": 0.5688237712138118, + "grad_norm": 0.0, + "learning_rate": 8.265703463382597e-06, + "loss": 1.3027, + "step": 19373 + }, + { + "epoch": 0.5688531328909507, + "grad_norm": 0.0, + "learning_rate": 8.264766921469718e-06, + "loss": 1.2471, + "step": 19374 + }, + { + "epoch": 0.5688824945680897, + "grad_norm": 0.0, + "learning_rate": 8.263830395248998e-06, + "loss": 1.3135, + "step": 19375 + }, + { + "epoch": 0.5689118562452288, + "grad_norm": 0.0, + "learning_rate": 8.2628938847289e-06, + "loss": 1.3027, + "step": 19376 + }, + { + "epoch": 0.5689412179223677, + "grad_norm": 0.0, + "learning_rate": 8.2619573899179e-06, + "loss": 1.2539, + "step": 19377 + }, + { + "epoch": 0.5689705795995067, + "grad_norm": 0.0, + "learning_rate": 8.261020910824463e-06, + "loss": 1.168, + "step": 19378 + }, + { + "epoch": 0.5689999412766458, + "grad_norm": 0.0, + "learning_rate": 8.260084447457062e-06, + "loss": 1.3887, + "step": 19379 + }, + { + "epoch": 0.5690293029537847, + "grad_norm": 0.0, + "learning_rate": 8.25914799982416e-06, + "loss": 1.2988, + "step": 19380 + }, + { + "epoch": 0.5690586646309237, + "grad_norm": 0.0, + "learning_rate": 8.258211567934231e-06, + "loss": 1.3252, + "step": 19381 + }, + { + "epoch": 0.5690880263080628, + "grad_norm": 0.0, + "learning_rate": 8.25727515179574e-06, + "loss": 1.2021, + "step": 19382 + }, + { + "epoch": 0.5691173879852017, + "grad_norm": 0.0, + "learning_rate": 8.256338751417156e-06, + "loss": 1.3535, + "step": 19383 + }, + { + "epoch": 0.5691467496623407, + "grad_norm": 0.0, + "learning_rate": 8.255402366806948e-06, + "loss": 1.2783, + "step": 19384 + }, + { + "epoch": 0.5691761113394798, + "grad_norm": 0.0, + "learning_rate": 8.25446599797358e-06, + "loss": 1.2148, + "step": 19385 + }, + { + "epoch": 0.5692054730166187, + "grad_norm": 0.0, + "learning_rate": 8.253529644925529e-06, + "loss": 1.2656, + "step": 19386 + }, + { + "epoch": 0.5692348346937577, + "grad_norm": 0.0, + "learning_rate": 8.25259330767125e-06, + "loss": 1.3457, + "step": 19387 + }, + { + "epoch": 0.5692641963708968, + "grad_norm": 0.0, + "learning_rate": 8.251656986219221e-06, + "loss": 1.1689, + "step": 19388 + }, + { + "epoch": 0.5692935580480357, + "grad_norm": 0.0, + "learning_rate": 8.250720680577905e-06, + "loss": 1.124, + "step": 19389 + }, + { + "epoch": 0.5693229197251747, + "grad_norm": 0.0, + "learning_rate": 8.249784390755772e-06, + "loss": 1.2354, + "step": 19390 + }, + { + "epoch": 0.5693522814023138, + "grad_norm": 0.0, + "learning_rate": 8.248848116761285e-06, + "loss": 1.2939, + "step": 19391 + }, + { + "epoch": 0.5693816430794527, + "grad_norm": 0.0, + "learning_rate": 8.24791185860291e-06, + "loss": 1.2324, + "step": 19392 + }, + { + "epoch": 0.5694110047565917, + "grad_norm": 0.0, + "learning_rate": 8.246975616289122e-06, + "loss": 1.334, + "step": 19393 + }, + { + "epoch": 0.5694403664337307, + "grad_norm": 0.0, + "learning_rate": 8.24603938982838e-06, + "loss": 1.3418, + "step": 19394 + }, + { + "epoch": 0.5694697281108697, + "grad_norm": 0.0, + "learning_rate": 8.245103179229154e-06, + "loss": 1.3076, + "step": 19395 + }, + { + "epoch": 0.5694990897880087, + "grad_norm": 0.0, + "learning_rate": 8.244166984499905e-06, + "loss": 1.3643, + "step": 19396 + }, + { + "epoch": 0.5695284514651476, + "grad_norm": 0.0, + "learning_rate": 8.243230805649108e-06, + "loss": 1.2969, + "step": 19397 + }, + { + "epoch": 0.5695578131422867, + "grad_norm": 0.0, + "learning_rate": 8.242294642685223e-06, + "loss": 1.415, + "step": 19398 + }, + { + "epoch": 0.5695871748194257, + "grad_norm": 0.0, + "learning_rate": 8.24135849561672e-06, + "loss": 1.3281, + "step": 19399 + }, + { + "epoch": 0.5696165364965646, + "grad_norm": 0.0, + "learning_rate": 8.240422364452058e-06, + "loss": 1.2793, + "step": 19400 + }, + { + "epoch": 0.5696458981737037, + "grad_norm": 0.0, + "learning_rate": 8.239486249199713e-06, + "loss": 1.3535, + "step": 19401 + }, + { + "epoch": 0.5696752598508427, + "grad_norm": 0.0, + "learning_rate": 8.238550149868142e-06, + "loss": 1.2754, + "step": 19402 + }, + { + "epoch": 0.5697046215279816, + "grad_norm": 0.0, + "learning_rate": 8.237614066465814e-06, + "loss": 1.1885, + "step": 19403 + }, + { + "epoch": 0.5697339832051207, + "grad_norm": 0.0, + "learning_rate": 8.236677999001192e-06, + "loss": 1.2827, + "step": 19404 + }, + { + "epoch": 0.5697633448822597, + "grad_norm": 0.0, + "learning_rate": 8.23574194748274e-06, + "loss": 1.1489, + "step": 19405 + }, + { + "epoch": 0.5697927065593986, + "grad_norm": 0.0, + "learning_rate": 8.23480591191893e-06, + "loss": 1.2534, + "step": 19406 + }, + { + "epoch": 0.5698220682365377, + "grad_norm": 0.0, + "learning_rate": 8.23386989231822e-06, + "loss": 1.3223, + "step": 19407 + }, + { + "epoch": 0.5698514299136767, + "grad_norm": 0.0, + "learning_rate": 8.232933888689078e-06, + "loss": 1.332, + "step": 19408 + }, + { + "epoch": 0.5698807915908156, + "grad_norm": 0.0, + "learning_rate": 8.231997901039965e-06, + "loss": 1.3921, + "step": 19409 + }, + { + "epoch": 0.5699101532679547, + "grad_norm": 0.0, + "learning_rate": 8.23106192937935e-06, + "loss": 1.4932, + "step": 19410 + }, + { + "epoch": 0.5699395149450937, + "grad_norm": 0.0, + "learning_rate": 8.230125973715699e-06, + "loss": 1.2358, + "step": 19411 + }, + { + "epoch": 0.5699688766222326, + "grad_norm": 0.0, + "learning_rate": 8.229190034057465e-06, + "loss": 1.2119, + "step": 19412 + }, + { + "epoch": 0.5699982382993717, + "grad_norm": 0.0, + "learning_rate": 8.228254110413123e-06, + "loss": 1.3232, + "step": 19413 + }, + { + "epoch": 0.5700275999765106, + "grad_norm": 0.0, + "learning_rate": 8.22731820279113e-06, + "loss": 1.2852, + "step": 19414 + }, + { + "epoch": 0.5700569616536496, + "grad_norm": 0.0, + "learning_rate": 8.226382311199956e-06, + "loss": 1.3486, + "step": 19415 + }, + { + "epoch": 0.5700863233307887, + "grad_norm": 0.0, + "learning_rate": 8.225446435648057e-06, + "loss": 1.29, + "step": 19416 + }, + { + "epoch": 0.5701156850079276, + "grad_norm": 0.0, + "learning_rate": 8.224510576143902e-06, + "loss": 1.2944, + "step": 19417 + }, + { + "epoch": 0.5701450466850666, + "grad_norm": 0.0, + "learning_rate": 8.223574732695953e-06, + "loss": 1.3047, + "step": 19418 + }, + { + "epoch": 0.5701744083622057, + "grad_norm": 0.0, + "learning_rate": 8.222638905312674e-06, + "loss": 1.1772, + "step": 19419 + }, + { + "epoch": 0.5702037700393446, + "grad_norm": 0.0, + "learning_rate": 8.221703094002522e-06, + "loss": 1.3271, + "step": 19420 + }, + { + "epoch": 0.5702331317164836, + "grad_norm": 0.0, + "learning_rate": 8.220767298773968e-06, + "loss": 1.3271, + "step": 19421 + }, + { + "epoch": 0.5702624933936227, + "grad_norm": 0.0, + "learning_rate": 8.219831519635472e-06, + "loss": 1.3574, + "step": 19422 + }, + { + "epoch": 0.5702918550707616, + "grad_norm": 0.0, + "learning_rate": 8.218895756595493e-06, + "loss": 1.2373, + "step": 19423 + }, + { + "epoch": 0.5703212167479006, + "grad_norm": 0.0, + "learning_rate": 8.217960009662498e-06, + "loss": 1.2422, + "step": 19424 + }, + { + "epoch": 0.5703505784250397, + "grad_norm": 0.0, + "learning_rate": 8.217024278844944e-06, + "loss": 1.2705, + "step": 19425 + }, + { + "epoch": 0.5703799401021786, + "grad_norm": 0.0, + "learning_rate": 8.216088564151298e-06, + "loss": 1.2832, + "step": 19426 + }, + { + "epoch": 0.5704093017793176, + "grad_norm": 0.0, + "learning_rate": 8.21515286559002e-06, + "loss": 1.2856, + "step": 19427 + }, + { + "epoch": 0.5704386634564567, + "grad_norm": 0.0, + "learning_rate": 8.214217183169572e-06, + "loss": 1.167, + "step": 19428 + }, + { + "epoch": 0.5704680251335956, + "grad_norm": 0.0, + "learning_rate": 8.213281516898413e-06, + "loss": 1.2959, + "step": 19429 + }, + { + "epoch": 0.5704973868107346, + "grad_norm": 0.0, + "learning_rate": 8.21234586678501e-06, + "loss": 1.3594, + "step": 19430 + }, + { + "epoch": 0.5705267484878737, + "grad_norm": 0.0, + "learning_rate": 8.211410232837822e-06, + "loss": 1.2969, + "step": 19431 + }, + { + "epoch": 0.5705561101650126, + "grad_norm": 0.0, + "learning_rate": 8.210474615065307e-06, + "loss": 1.25, + "step": 19432 + }, + { + "epoch": 0.5705854718421516, + "grad_norm": 0.0, + "learning_rate": 8.209539013475928e-06, + "loss": 1.3174, + "step": 19433 + }, + { + "epoch": 0.5706148335192907, + "grad_norm": 0.0, + "learning_rate": 8.208603428078145e-06, + "loss": 1.2666, + "step": 19434 + }, + { + "epoch": 0.5706441951964296, + "grad_norm": 0.0, + "learning_rate": 8.207667858880423e-06, + "loss": 1.335, + "step": 19435 + }, + { + "epoch": 0.5706735568735686, + "grad_norm": 0.0, + "learning_rate": 8.206732305891219e-06, + "loss": 1.3896, + "step": 19436 + }, + { + "epoch": 0.5707029185507076, + "grad_norm": 0.0, + "learning_rate": 8.205796769118994e-06, + "loss": 1.3237, + "step": 19437 + }, + { + "epoch": 0.5707322802278466, + "grad_norm": 0.0, + "learning_rate": 8.204861248572205e-06, + "loss": 1.2793, + "step": 19438 + }, + { + "epoch": 0.5707616419049856, + "grad_norm": 0.0, + "learning_rate": 8.20392574425932e-06, + "loss": 1.2837, + "step": 19439 + }, + { + "epoch": 0.5707910035821246, + "grad_norm": 0.0, + "learning_rate": 8.202990256188795e-06, + "loss": 1.1006, + "step": 19440 + }, + { + "epoch": 0.5708203652592636, + "grad_norm": 0.0, + "learning_rate": 8.202054784369085e-06, + "loss": 1.4766, + "step": 19441 + }, + { + "epoch": 0.5708497269364026, + "grad_norm": 0.0, + "learning_rate": 8.201119328808658e-06, + "loss": 1.2539, + "step": 19442 + }, + { + "epoch": 0.5708790886135416, + "grad_norm": 0.0, + "learning_rate": 8.200183889515966e-06, + "loss": 1.3271, + "step": 19443 + }, + { + "epoch": 0.5709084502906806, + "grad_norm": 0.0, + "learning_rate": 8.199248466499475e-06, + "loss": 1.2783, + "step": 19444 + }, + { + "epoch": 0.5709378119678196, + "grad_norm": 0.0, + "learning_rate": 8.198313059767637e-06, + "loss": 1.376, + "step": 19445 + }, + { + "epoch": 0.5709671736449586, + "grad_norm": 0.0, + "learning_rate": 8.19737766932892e-06, + "loss": 1.2393, + "step": 19446 + }, + { + "epoch": 0.5709965353220976, + "grad_norm": 0.0, + "learning_rate": 8.196442295191775e-06, + "loss": 1.1787, + "step": 19447 + }, + { + "epoch": 0.5710258969992366, + "grad_norm": 0.0, + "learning_rate": 8.195506937364669e-06, + "loss": 1.2656, + "step": 19448 + }, + { + "epoch": 0.5710552586763756, + "grad_norm": 0.0, + "learning_rate": 8.194571595856049e-06, + "loss": 1.2432, + "step": 19449 + }, + { + "epoch": 0.5710846203535146, + "grad_norm": 0.0, + "learning_rate": 8.193636270674385e-06, + "loss": 1.3213, + "step": 19450 + }, + { + "epoch": 0.5711139820306536, + "grad_norm": 0.0, + "learning_rate": 8.192700961828133e-06, + "loss": 1.3115, + "step": 19451 + }, + { + "epoch": 0.5711433437077926, + "grad_norm": 0.0, + "learning_rate": 8.191765669325746e-06, + "loss": 1.1899, + "step": 19452 + }, + { + "epoch": 0.5711727053849316, + "grad_norm": 0.0, + "learning_rate": 8.190830393175686e-06, + "loss": 1.2168, + "step": 19453 + }, + { + "epoch": 0.5712020670620706, + "grad_norm": 0.0, + "learning_rate": 8.189895133386406e-06, + "loss": 1.3311, + "step": 19454 + }, + { + "epoch": 0.5712314287392096, + "grad_norm": 0.0, + "learning_rate": 8.188959889966374e-06, + "loss": 1.2324, + "step": 19455 + }, + { + "epoch": 0.5712607904163486, + "grad_norm": 0.0, + "learning_rate": 8.188024662924039e-06, + "loss": 1.3262, + "step": 19456 + }, + { + "epoch": 0.5712901520934875, + "grad_norm": 0.0, + "learning_rate": 8.187089452267862e-06, + "loss": 1.3076, + "step": 19457 + }, + { + "epoch": 0.5713195137706266, + "grad_norm": 0.0, + "learning_rate": 8.186154258006294e-06, + "loss": 1.2783, + "step": 19458 + }, + { + "epoch": 0.5713488754477656, + "grad_norm": 0.0, + "learning_rate": 8.185219080147804e-06, + "loss": 1.2324, + "step": 19459 + }, + { + "epoch": 0.5713782371249045, + "grad_norm": 0.0, + "learning_rate": 8.184283918700841e-06, + "loss": 1.2451, + "step": 19460 + }, + { + "epoch": 0.5714075988020436, + "grad_norm": 0.0, + "learning_rate": 8.183348773673863e-06, + "loss": 1.1533, + "step": 19461 + }, + { + "epoch": 0.5714369604791826, + "grad_norm": 0.0, + "learning_rate": 8.18241364507533e-06, + "loss": 1.1997, + "step": 19462 + }, + { + "epoch": 0.5714663221563215, + "grad_norm": 0.0, + "learning_rate": 8.181478532913688e-06, + "loss": 1.2529, + "step": 19463 + }, + { + "epoch": 0.5714956838334606, + "grad_norm": 0.0, + "learning_rate": 8.180543437197411e-06, + "loss": 1.2988, + "step": 19464 + }, + { + "epoch": 0.5715250455105996, + "grad_norm": 0.0, + "learning_rate": 8.179608357934938e-06, + "loss": 1.2178, + "step": 19465 + }, + { + "epoch": 0.5715544071877385, + "grad_norm": 0.0, + "learning_rate": 8.178673295134739e-06, + "loss": 1.4062, + "step": 19466 + }, + { + "epoch": 0.5715837688648776, + "grad_norm": 0.0, + "learning_rate": 8.17773824880526e-06, + "loss": 1.4082, + "step": 19467 + }, + { + "epoch": 0.5716131305420166, + "grad_norm": 0.0, + "learning_rate": 8.176803218954962e-06, + "loss": 1.4014, + "step": 19468 + }, + { + "epoch": 0.5716424922191555, + "grad_norm": 0.0, + "learning_rate": 8.175868205592296e-06, + "loss": 1.2725, + "step": 19469 + }, + { + "epoch": 0.5716718538962946, + "grad_norm": 0.0, + "learning_rate": 8.174933208725726e-06, + "loss": 1.3613, + "step": 19470 + }, + { + "epoch": 0.5717012155734336, + "grad_norm": 0.0, + "learning_rate": 8.173998228363702e-06, + "loss": 1.3145, + "step": 19471 + }, + { + "epoch": 0.5717305772505725, + "grad_norm": 0.0, + "learning_rate": 8.173063264514676e-06, + "loss": 1.2744, + "step": 19472 + }, + { + "epoch": 0.5717599389277116, + "grad_norm": 0.0, + "learning_rate": 8.17212831718711e-06, + "loss": 1.2578, + "step": 19473 + }, + { + "epoch": 0.5717893006048506, + "grad_norm": 0.0, + "learning_rate": 8.171193386389454e-06, + "loss": 1.2461, + "step": 19474 + }, + { + "epoch": 0.5718186622819895, + "grad_norm": 0.0, + "learning_rate": 8.170258472130167e-06, + "loss": 1.23, + "step": 19475 + }, + { + "epoch": 0.5718480239591286, + "grad_norm": 0.0, + "learning_rate": 8.169323574417697e-06, + "loss": 1.2495, + "step": 19476 + }, + { + "epoch": 0.5718773856362676, + "grad_norm": 0.0, + "learning_rate": 8.168388693260507e-06, + "loss": 1.3682, + "step": 19477 + }, + { + "epoch": 0.5719067473134065, + "grad_norm": 0.0, + "learning_rate": 8.167453828667043e-06, + "loss": 1.3799, + "step": 19478 + }, + { + "epoch": 0.5719361089905456, + "grad_norm": 0.0, + "learning_rate": 8.166518980645766e-06, + "loss": 1.3057, + "step": 19479 + }, + { + "epoch": 0.5719654706676846, + "grad_norm": 0.0, + "learning_rate": 8.16558414920513e-06, + "loss": 1.3027, + "step": 19480 + }, + { + "epoch": 0.5719948323448235, + "grad_norm": 0.0, + "learning_rate": 8.164649334353581e-06, + "loss": 1.3047, + "step": 19481 + }, + { + "epoch": 0.5720241940219626, + "grad_norm": 0.0, + "learning_rate": 8.163714536099583e-06, + "loss": 1.2427, + "step": 19482 + }, + { + "epoch": 0.5720535556991015, + "grad_norm": 0.0, + "learning_rate": 8.162779754451579e-06, + "loss": 1.4004, + "step": 19483 + }, + { + "epoch": 0.5720829173762405, + "grad_norm": 0.0, + "learning_rate": 8.161844989418033e-06, + "loss": 1.2388, + "step": 19484 + }, + { + "epoch": 0.5721122790533796, + "grad_norm": 0.0, + "learning_rate": 8.160910241007391e-06, + "loss": 1.2344, + "step": 19485 + }, + { + "epoch": 0.5721416407305185, + "grad_norm": 0.0, + "learning_rate": 8.15997550922811e-06, + "loss": 1.2549, + "step": 19486 + }, + { + "epoch": 0.5721710024076575, + "grad_norm": 0.0, + "learning_rate": 8.15904079408864e-06, + "loss": 1.2344, + "step": 19487 + }, + { + "epoch": 0.5722003640847966, + "grad_norm": 0.0, + "learning_rate": 8.158106095597438e-06, + "loss": 1.248, + "step": 19488 + }, + { + "epoch": 0.5722297257619355, + "grad_norm": 0.0, + "learning_rate": 8.157171413762951e-06, + "loss": 1.2119, + "step": 19489 + }, + { + "epoch": 0.5722590874390745, + "grad_norm": 0.0, + "learning_rate": 8.15623674859364e-06, + "loss": 1.2383, + "step": 19490 + }, + { + "epoch": 0.5722884491162136, + "grad_norm": 0.0, + "learning_rate": 8.155302100097951e-06, + "loss": 1.2383, + "step": 19491 + }, + { + "epoch": 0.5723178107933525, + "grad_norm": 0.0, + "learning_rate": 8.154367468284334e-06, + "loss": 1.377, + "step": 19492 + }, + { + "epoch": 0.5723471724704915, + "grad_norm": 0.0, + "learning_rate": 8.153432853161249e-06, + "loss": 1.5, + "step": 19493 + }, + { + "epoch": 0.5723765341476306, + "grad_norm": 0.0, + "learning_rate": 8.152498254737137e-06, + "loss": 1.3896, + "step": 19494 + }, + { + "epoch": 0.5724058958247695, + "grad_norm": 0.0, + "learning_rate": 8.151563673020464e-06, + "loss": 1.3086, + "step": 19495 + }, + { + "epoch": 0.5724352575019085, + "grad_norm": 0.0, + "learning_rate": 8.15062910801967e-06, + "loss": 1.2822, + "step": 19496 + }, + { + "epoch": 0.5724646191790476, + "grad_norm": 0.0, + "learning_rate": 8.149694559743212e-06, + "loss": 1.2617, + "step": 19497 + }, + { + "epoch": 0.5724939808561865, + "grad_norm": 0.0, + "learning_rate": 8.148760028199536e-06, + "loss": 1.2207, + "step": 19498 + }, + { + "epoch": 0.5725233425333255, + "grad_norm": 0.0, + "learning_rate": 8.147825513397103e-06, + "loss": 1.1973, + "step": 19499 + }, + { + "epoch": 0.5725527042104644, + "grad_norm": 0.0, + "learning_rate": 8.146891015344356e-06, + "loss": 1.2627, + "step": 19500 + }, + { + "epoch": 0.5725820658876035, + "grad_norm": 0.0, + "learning_rate": 8.145956534049748e-06, + "loss": 1.3701, + "step": 19501 + }, + { + "epoch": 0.5726114275647425, + "grad_norm": 0.0, + "learning_rate": 8.145022069521731e-06, + "loss": 1.1567, + "step": 19502 + }, + { + "epoch": 0.5726407892418814, + "grad_norm": 0.0, + "learning_rate": 8.14408762176875e-06, + "loss": 1.3271, + "step": 19503 + }, + { + "epoch": 0.5726701509190205, + "grad_norm": 0.0, + "learning_rate": 8.143153190799264e-06, + "loss": 1.2363, + "step": 19504 + }, + { + "epoch": 0.5726995125961595, + "grad_norm": 0.0, + "learning_rate": 8.142218776621718e-06, + "loss": 1.3037, + "step": 19505 + }, + { + "epoch": 0.5727288742732984, + "grad_norm": 0.0, + "learning_rate": 8.141284379244566e-06, + "loss": 1.1587, + "step": 19506 + }, + { + "epoch": 0.5727582359504375, + "grad_norm": 0.0, + "learning_rate": 8.140349998676251e-06, + "loss": 1.3975, + "step": 19507 + }, + { + "epoch": 0.5727875976275765, + "grad_norm": 0.0, + "learning_rate": 8.13941563492523e-06, + "loss": 1.2666, + "step": 19508 + }, + { + "epoch": 0.5728169593047154, + "grad_norm": 0.0, + "learning_rate": 8.138481287999952e-06, + "loss": 1.4736, + "step": 19509 + }, + { + "epoch": 0.5728463209818545, + "grad_norm": 0.0, + "learning_rate": 8.137546957908859e-06, + "loss": 1.2803, + "step": 19510 + }, + { + "epoch": 0.5728756826589935, + "grad_norm": 0.0, + "learning_rate": 8.13661264466041e-06, + "loss": 1.1772, + "step": 19511 + }, + { + "epoch": 0.5729050443361324, + "grad_norm": 0.0, + "learning_rate": 8.135678348263047e-06, + "loss": 1.3535, + "step": 19512 + }, + { + "epoch": 0.5729344060132715, + "grad_norm": 0.0, + "learning_rate": 8.134744068725225e-06, + "loss": 1.3008, + "step": 19513 + }, + { + "epoch": 0.5729637676904105, + "grad_norm": 0.0, + "learning_rate": 8.133809806055388e-06, + "loss": 1.2144, + "step": 19514 + }, + { + "epoch": 0.5729931293675494, + "grad_norm": 0.0, + "learning_rate": 8.132875560261989e-06, + "loss": 1.2676, + "step": 19515 + }, + { + "epoch": 0.5730224910446885, + "grad_norm": 0.0, + "learning_rate": 8.13194133135347e-06, + "loss": 1.3145, + "step": 19516 + }, + { + "epoch": 0.5730518527218275, + "grad_norm": 0.0, + "learning_rate": 8.131007119338292e-06, + "loss": 1.2744, + "step": 19517 + }, + { + "epoch": 0.5730812143989664, + "grad_norm": 0.0, + "learning_rate": 8.130072924224888e-06, + "loss": 1.248, + "step": 19518 + }, + { + "epoch": 0.5731105760761055, + "grad_norm": 0.0, + "learning_rate": 8.129138746021716e-06, + "loss": 1.2944, + "step": 19519 + }, + { + "epoch": 0.5731399377532445, + "grad_norm": 0.0, + "learning_rate": 8.128204584737224e-06, + "loss": 1.2559, + "step": 19520 + }, + { + "epoch": 0.5731692994303834, + "grad_norm": 0.0, + "learning_rate": 8.127270440379853e-06, + "loss": 1.2812, + "step": 19521 + }, + { + "epoch": 0.5731986611075225, + "grad_norm": 0.0, + "learning_rate": 8.126336312958058e-06, + "loss": 1.2207, + "step": 19522 + }, + { + "epoch": 0.5732280227846615, + "grad_norm": 0.0, + "learning_rate": 8.12540220248028e-06, + "loss": 1.2319, + "step": 19523 + }, + { + "epoch": 0.5732573844618004, + "grad_norm": 0.0, + "learning_rate": 8.124468108954974e-06, + "loss": 1.3086, + "step": 19524 + }, + { + "epoch": 0.5732867461389395, + "grad_norm": 0.0, + "learning_rate": 8.123534032390581e-06, + "loss": 1.3843, + "step": 19525 + }, + { + "epoch": 0.5733161078160784, + "grad_norm": 0.0, + "learning_rate": 8.122599972795552e-06, + "loss": 1.3408, + "step": 19526 + }, + { + "epoch": 0.5733454694932174, + "grad_norm": 0.0, + "learning_rate": 8.121665930178328e-06, + "loss": 1.2129, + "step": 19527 + }, + { + "epoch": 0.5733748311703565, + "grad_norm": 0.0, + "learning_rate": 8.120731904547365e-06, + "loss": 1.2754, + "step": 19528 + }, + { + "epoch": 0.5734041928474954, + "grad_norm": 0.0, + "learning_rate": 8.119797895911104e-06, + "loss": 1.3076, + "step": 19529 + }, + { + "epoch": 0.5734335545246344, + "grad_norm": 0.0, + "learning_rate": 8.11886390427799e-06, + "loss": 1.2285, + "step": 19530 + }, + { + "epoch": 0.5734629162017735, + "grad_norm": 0.0, + "learning_rate": 8.117929929656475e-06, + "loss": 1.1514, + "step": 19531 + }, + { + "epoch": 0.5734922778789124, + "grad_norm": 0.0, + "learning_rate": 8.116995972054995e-06, + "loss": 1.3701, + "step": 19532 + }, + { + "epoch": 0.5735216395560514, + "grad_norm": 0.0, + "learning_rate": 8.116062031482008e-06, + "loss": 1.3223, + "step": 19533 + }, + { + "epoch": 0.5735510012331905, + "grad_norm": 0.0, + "learning_rate": 8.115128107945953e-06, + "loss": 1.3047, + "step": 19534 + }, + { + "epoch": 0.5735803629103294, + "grad_norm": 0.0, + "learning_rate": 8.114194201455278e-06, + "loss": 1.1797, + "step": 19535 + }, + { + "epoch": 0.5736097245874684, + "grad_norm": 0.0, + "learning_rate": 8.113260312018425e-06, + "loss": 1.2666, + "step": 19536 + }, + { + "epoch": 0.5736390862646075, + "grad_norm": 0.0, + "learning_rate": 8.112326439643847e-06, + "loss": 1.4922, + "step": 19537 + }, + { + "epoch": 0.5736684479417464, + "grad_norm": 0.0, + "learning_rate": 8.111392584339981e-06, + "loss": 1.167, + "step": 19538 + }, + { + "epoch": 0.5736978096188854, + "grad_norm": 0.0, + "learning_rate": 8.110458746115277e-06, + "loss": 1.2485, + "step": 19539 + }, + { + "epoch": 0.5737271712960245, + "grad_norm": 0.0, + "learning_rate": 8.109524924978181e-06, + "loss": 1.2212, + "step": 19540 + }, + { + "epoch": 0.5737565329731634, + "grad_norm": 0.0, + "learning_rate": 8.108591120937129e-06, + "loss": 1.2188, + "step": 19541 + }, + { + "epoch": 0.5737858946503024, + "grad_norm": 0.0, + "learning_rate": 8.107657334000578e-06, + "loss": 1.248, + "step": 19542 + }, + { + "epoch": 0.5738152563274415, + "grad_norm": 0.0, + "learning_rate": 8.106723564176961e-06, + "loss": 1.2012, + "step": 19543 + }, + { + "epoch": 0.5738446180045804, + "grad_norm": 0.0, + "learning_rate": 8.10578981147473e-06, + "loss": 1.3516, + "step": 19544 + }, + { + "epoch": 0.5738739796817194, + "grad_norm": 0.0, + "learning_rate": 8.104856075902328e-06, + "loss": 1.2021, + "step": 19545 + }, + { + "epoch": 0.5739033413588585, + "grad_norm": 0.0, + "learning_rate": 8.103922357468198e-06, + "loss": 1.2686, + "step": 19546 + }, + { + "epoch": 0.5739327030359974, + "grad_norm": 0.0, + "learning_rate": 8.10298865618078e-06, + "loss": 1.2778, + "step": 19547 + }, + { + "epoch": 0.5739620647131364, + "grad_norm": 0.0, + "learning_rate": 8.102054972048526e-06, + "loss": 1.2969, + "step": 19548 + }, + { + "epoch": 0.5739914263902754, + "grad_norm": 0.0, + "learning_rate": 8.101121305079873e-06, + "loss": 1.335, + "step": 19549 + }, + { + "epoch": 0.5740207880674144, + "grad_norm": 0.0, + "learning_rate": 8.100187655283267e-06, + "loss": 1.3125, + "step": 19550 + }, + { + "epoch": 0.5740501497445534, + "grad_norm": 0.0, + "learning_rate": 8.099254022667152e-06, + "loss": 1.3994, + "step": 19551 + }, + { + "epoch": 0.5740795114216924, + "grad_norm": 0.0, + "learning_rate": 8.098320407239963e-06, + "loss": 1.2529, + "step": 19552 + }, + { + "epoch": 0.5741088730988314, + "grad_norm": 0.0, + "learning_rate": 8.097386809010156e-06, + "loss": 1.2109, + "step": 19553 + }, + { + "epoch": 0.5741382347759704, + "grad_norm": 0.0, + "learning_rate": 8.096453227986166e-06, + "loss": 1.2363, + "step": 19554 + }, + { + "epoch": 0.5741675964531094, + "grad_norm": 0.0, + "learning_rate": 8.095519664176436e-06, + "loss": 1.3037, + "step": 19555 + }, + { + "epoch": 0.5741969581302484, + "grad_norm": 0.0, + "learning_rate": 8.094586117589409e-06, + "loss": 1.3057, + "step": 19556 + }, + { + "epoch": 0.5742263198073874, + "grad_norm": 0.0, + "learning_rate": 8.09365258823353e-06, + "loss": 1.2349, + "step": 19557 + }, + { + "epoch": 0.5742556814845264, + "grad_norm": 0.0, + "learning_rate": 8.092719076117238e-06, + "loss": 1.418, + "step": 19558 + }, + { + "epoch": 0.5742850431616654, + "grad_norm": 0.0, + "learning_rate": 8.091785581248974e-06, + "loss": 1.3057, + "step": 19559 + }, + { + "epoch": 0.5743144048388044, + "grad_norm": 0.0, + "learning_rate": 8.090852103637186e-06, + "loss": 1.2529, + "step": 19560 + }, + { + "epoch": 0.5743437665159434, + "grad_norm": 0.0, + "learning_rate": 8.089918643290305e-06, + "loss": 1.3447, + "step": 19561 + }, + { + "epoch": 0.5743731281930824, + "grad_norm": 0.0, + "learning_rate": 8.088985200216783e-06, + "loss": 1.1616, + "step": 19562 + }, + { + "epoch": 0.5744024898702214, + "grad_norm": 0.0, + "learning_rate": 8.088051774425057e-06, + "loss": 1.2988, + "step": 19563 + }, + { + "epoch": 0.5744318515473604, + "grad_norm": 0.0, + "learning_rate": 8.087118365923568e-06, + "loss": 1.2471, + "step": 19564 + }, + { + "epoch": 0.5744612132244994, + "grad_norm": 0.0, + "learning_rate": 8.086184974720755e-06, + "loss": 1.3281, + "step": 19565 + }, + { + "epoch": 0.5744905749016384, + "grad_norm": 0.0, + "learning_rate": 8.085251600825069e-06, + "loss": 1.3359, + "step": 19566 + }, + { + "epoch": 0.5745199365787774, + "grad_norm": 0.0, + "learning_rate": 8.084318244244934e-06, + "loss": 1.2681, + "step": 19567 + }, + { + "epoch": 0.5745492982559164, + "grad_norm": 0.0, + "learning_rate": 8.083384904988806e-06, + "loss": 1.2324, + "step": 19568 + }, + { + "epoch": 0.5745786599330553, + "grad_norm": 0.0, + "learning_rate": 8.082451583065118e-06, + "loss": 1.2295, + "step": 19569 + }, + { + "epoch": 0.5746080216101944, + "grad_norm": 0.0, + "learning_rate": 8.08151827848231e-06, + "loss": 1.3271, + "step": 19570 + }, + { + "epoch": 0.5746373832873334, + "grad_norm": 0.0, + "learning_rate": 8.080584991248826e-06, + "loss": 1.3257, + "step": 19571 + }, + { + "epoch": 0.5746667449644723, + "grad_norm": 0.0, + "learning_rate": 8.079651721373098e-06, + "loss": 1.3359, + "step": 19572 + }, + { + "epoch": 0.5746961066416114, + "grad_norm": 0.0, + "learning_rate": 8.078718468863575e-06, + "loss": 1.3291, + "step": 19573 + }, + { + "epoch": 0.5747254683187504, + "grad_norm": 0.0, + "learning_rate": 8.077785233728692e-06, + "loss": 1.2861, + "step": 19574 + }, + { + "epoch": 0.5747548299958893, + "grad_norm": 0.0, + "learning_rate": 8.076852015976892e-06, + "loss": 1.3389, + "step": 19575 + }, + { + "epoch": 0.5747841916730284, + "grad_norm": 0.0, + "learning_rate": 8.075918815616606e-06, + "loss": 1.4307, + "step": 19576 + }, + { + "epoch": 0.5748135533501674, + "grad_norm": 0.0, + "learning_rate": 8.074985632656285e-06, + "loss": 1.3213, + "step": 19577 + }, + { + "epoch": 0.5748429150273063, + "grad_norm": 0.0, + "learning_rate": 8.07405246710436e-06, + "loss": 1.252, + "step": 19578 + }, + { + "epoch": 0.5748722767044454, + "grad_norm": 0.0, + "learning_rate": 8.073119318969271e-06, + "loss": 1.3545, + "step": 19579 + }, + { + "epoch": 0.5749016383815844, + "grad_norm": 0.0, + "learning_rate": 8.072186188259461e-06, + "loss": 1.4014, + "step": 19580 + }, + { + "epoch": 0.5749310000587233, + "grad_norm": 0.0, + "learning_rate": 8.071253074983358e-06, + "loss": 1.3623, + "step": 19581 + }, + { + "epoch": 0.5749603617358624, + "grad_norm": 0.0, + "learning_rate": 8.070319979149414e-06, + "loss": 1.2607, + "step": 19582 + }, + { + "epoch": 0.5749897234130014, + "grad_norm": 0.0, + "learning_rate": 8.069386900766057e-06, + "loss": 1.3418, + "step": 19583 + }, + { + "epoch": 0.5750190850901403, + "grad_norm": 0.0, + "learning_rate": 8.06845383984173e-06, + "loss": 1.3701, + "step": 19584 + }, + { + "epoch": 0.5750484467672794, + "grad_norm": 0.0, + "learning_rate": 8.067520796384866e-06, + "loss": 1.3281, + "step": 19585 + }, + { + "epoch": 0.5750778084444184, + "grad_norm": 0.0, + "learning_rate": 8.066587770403912e-06, + "loss": 1.2202, + "step": 19586 + }, + { + "epoch": 0.5751071701215573, + "grad_norm": 0.0, + "learning_rate": 8.065654761907297e-06, + "loss": 1.3779, + "step": 19587 + }, + { + "epoch": 0.5751365317986964, + "grad_norm": 0.0, + "learning_rate": 8.064721770903462e-06, + "loss": 1.2041, + "step": 19588 + }, + { + "epoch": 0.5751658934758354, + "grad_norm": 0.0, + "learning_rate": 8.063788797400846e-06, + "loss": 1.3066, + "step": 19589 + }, + { + "epoch": 0.5751952551529743, + "grad_norm": 0.0, + "learning_rate": 8.06285584140788e-06, + "loss": 1.3223, + "step": 19590 + }, + { + "epoch": 0.5752246168301134, + "grad_norm": 0.0, + "learning_rate": 8.061922902933009e-06, + "loss": 1.416, + "step": 19591 + }, + { + "epoch": 0.5752539785072524, + "grad_norm": 0.0, + "learning_rate": 8.06098998198466e-06, + "loss": 1.2998, + "step": 19592 + }, + { + "epoch": 0.5752833401843913, + "grad_norm": 0.0, + "learning_rate": 8.060057078571278e-06, + "loss": 1.3047, + "step": 19593 + }, + { + "epoch": 0.5753127018615304, + "grad_norm": 0.0, + "learning_rate": 8.059124192701296e-06, + "loss": 1.106, + "step": 19594 + }, + { + "epoch": 0.5753420635386693, + "grad_norm": 0.0, + "learning_rate": 8.058191324383153e-06, + "loss": 1.3105, + "step": 19595 + }, + { + "epoch": 0.5753714252158083, + "grad_norm": 0.0, + "learning_rate": 8.05725847362528e-06, + "loss": 1.3896, + "step": 19596 + }, + { + "epoch": 0.5754007868929474, + "grad_norm": 0.0, + "learning_rate": 8.056325640436117e-06, + "loss": 1.3838, + "step": 19597 + }, + { + "epoch": 0.5754301485700863, + "grad_norm": 0.0, + "learning_rate": 8.055392824824103e-06, + "loss": 1.3711, + "step": 19598 + }, + { + "epoch": 0.5754595102472253, + "grad_norm": 0.0, + "learning_rate": 8.054460026797666e-06, + "loss": 1.2422, + "step": 19599 + }, + { + "epoch": 0.5754888719243643, + "grad_norm": 0.0, + "learning_rate": 8.053527246365247e-06, + "loss": 1.2891, + "step": 19600 + }, + { + "epoch": 0.5755182336015033, + "grad_norm": 0.0, + "learning_rate": 8.052594483535275e-06, + "loss": 1.2363, + "step": 19601 + }, + { + "epoch": 0.5755475952786423, + "grad_norm": 0.0, + "learning_rate": 8.051661738316195e-06, + "loss": 1.3428, + "step": 19602 + }, + { + "epoch": 0.5755769569557813, + "grad_norm": 0.0, + "learning_rate": 8.050729010716434e-06, + "loss": 1.3818, + "step": 19603 + }, + { + "epoch": 0.5756063186329203, + "grad_norm": 0.0, + "learning_rate": 8.049796300744433e-06, + "loss": 1.3818, + "step": 19604 + }, + { + "epoch": 0.5756356803100593, + "grad_norm": 0.0, + "learning_rate": 8.048863608408618e-06, + "loss": 1.2822, + "step": 19605 + }, + { + "epoch": 0.5756650419871983, + "grad_norm": 0.0, + "learning_rate": 8.047930933717433e-06, + "loss": 1.04, + "step": 19606 + }, + { + "epoch": 0.5756944036643373, + "grad_norm": 0.0, + "learning_rate": 8.046998276679308e-06, + "loss": 1.3203, + "step": 19607 + }, + { + "epoch": 0.5757237653414763, + "grad_norm": 0.0, + "learning_rate": 8.046065637302678e-06, + "loss": 1.1455, + "step": 19608 + }, + { + "epoch": 0.5757531270186153, + "grad_norm": 0.0, + "learning_rate": 8.04513301559598e-06, + "loss": 1.3164, + "step": 19609 + }, + { + "epoch": 0.5757824886957543, + "grad_norm": 0.0, + "learning_rate": 8.044200411567637e-06, + "loss": 1.3438, + "step": 19610 + }, + { + "epoch": 0.5758118503728933, + "grad_norm": 0.0, + "learning_rate": 8.043267825226097e-06, + "loss": 1.3198, + "step": 19611 + }, + { + "epoch": 0.5758412120500322, + "grad_norm": 0.0, + "learning_rate": 8.042335256579785e-06, + "loss": 1.46, + "step": 19612 + }, + { + "epoch": 0.5758705737271713, + "grad_norm": 0.0, + "learning_rate": 8.041402705637139e-06, + "loss": 1.3105, + "step": 19613 + }, + { + "epoch": 0.5758999354043103, + "grad_norm": 0.0, + "learning_rate": 8.040470172406584e-06, + "loss": 1.2588, + "step": 19614 + }, + { + "epoch": 0.5759292970814492, + "grad_norm": 0.0, + "learning_rate": 8.039537656896564e-06, + "loss": 1.1221, + "step": 19615 + }, + { + "epoch": 0.5759586587585883, + "grad_norm": 0.0, + "learning_rate": 8.038605159115505e-06, + "loss": 1.4619, + "step": 19616 + }, + { + "epoch": 0.5759880204357273, + "grad_norm": 0.0, + "learning_rate": 8.037672679071844e-06, + "loss": 1.2568, + "step": 19617 + }, + { + "epoch": 0.5760173821128662, + "grad_norm": 0.0, + "learning_rate": 8.036740216774012e-06, + "loss": 1.3887, + "step": 19618 + }, + { + "epoch": 0.5760467437900053, + "grad_norm": 0.0, + "learning_rate": 8.035807772230439e-06, + "loss": 1.4268, + "step": 19619 + }, + { + "epoch": 0.5760761054671443, + "grad_norm": 0.0, + "learning_rate": 8.03487534544956e-06, + "loss": 1.2754, + "step": 19620 + }, + { + "epoch": 0.5761054671442832, + "grad_norm": 0.0, + "learning_rate": 8.033942936439803e-06, + "loss": 1.2402, + "step": 19621 + }, + { + "epoch": 0.5761348288214223, + "grad_norm": 0.0, + "learning_rate": 8.033010545209609e-06, + "loss": 1.3213, + "step": 19622 + }, + { + "epoch": 0.5761641904985613, + "grad_norm": 0.0, + "learning_rate": 8.032078171767401e-06, + "loss": 1.3135, + "step": 19623 + }, + { + "epoch": 0.5761935521757002, + "grad_norm": 0.0, + "learning_rate": 8.031145816121616e-06, + "loss": 1.2295, + "step": 19624 + }, + { + "epoch": 0.5762229138528393, + "grad_norm": 0.0, + "learning_rate": 8.03021347828068e-06, + "loss": 1.4023, + "step": 19625 + }, + { + "epoch": 0.5762522755299783, + "grad_norm": 0.0, + "learning_rate": 8.029281158253033e-06, + "loss": 1.3018, + "step": 19626 + }, + { + "epoch": 0.5762816372071172, + "grad_norm": 0.0, + "learning_rate": 8.0283488560471e-06, + "loss": 1.3066, + "step": 19627 + }, + { + "epoch": 0.5763109988842563, + "grad_norm": 0.0, + "learning_rate": 8.027416571671312e-06, + "loss": 1.2949, + "step": 19628 + }, + { + "epoch": 0.5763403605613953, + "grad_norm": 0.0, + "learning_rate": 8.026484305134102e-06, + "loss": 1.2764, + "step": 19629 + }, + { + "epoch": 0.5763697222385342, + "grad_norm": 0.0, + "learning_rate": 8.025552056443896e-06, + "loss": 1.2144, + "step": 19630 + }, + { + "epoch": 0.5763990839156733, + "grad_norm": 0.0, + "learning_rate": 8.024619825609134e-06, + "loss": 1.3936, + "step": 19631 + }, + { + "epoch": 0.5764284455928123, + "grad_norm": 0.0, + "learning_rate": 8.023687612638236e-06, + "loss": 1.3223, + "step": 19632 + }, + { + "epoch": 0.5764578072699512, + "grad_norm": 0.0, + "learning_rate": 8.022755417539642e-06, + "loss": 1.272, + "step": 19633 + }, + { + "epoch": 0.5764871689470903, + "grad_norm": 0.0, + "learning_rate": 8.021823240321771e-06, + "loss": 1.208, + "step": 19634 + }, + { + "epoch": 0.5765165306242293, + "grad_norm": 0.0, + "learning_rate": 8.020891080993064e-06, + "loss": 1.2461, + "step": 19635 + }, + { + "epoch": 0.5765458923013682, + "grad_norm": 0.0, + "learning_rate": 8.019958939561943e-06, + "loss": 1.2896, + "step": 19636 + }, + { + "epoch": 0.5765752539785073, + "grad_norm": 0.0, + "learning_rate": 8.019026816036844e-06, + "loss": 1.2715, + "step": 19637 + }, + { + "epoch": 0.5766046156556462, + "grad_norm": 0.0, + "learning_rate": 8.018094710426191e-06, + "loss": 1.3008, + "step": 19638 + }, + { + "epoch": 0.5766339773327852, + "grad_norm": 0.0, + "learning_rate": 8.017162622738414e-06, + "loss": 1.3623, + "step": 19639 + }, + { + "epoch": 0.5766633390099243, + "grad_norm": 0.0, + "learning_rate": 8.016230552981944e-06, + "loss": 1.1611, + "step": 19640 + }, + { + "epoch": 0.5766927006870632, + "grad_norm": 0.0, + "learning_rate": 8.01529850116521e-06, + "loss": 1.4189, + "step": 19641 + }, + { + "epoch": 0.5767220623642022, + "grad_norm": 0.0, + "learning_rate": 8.01436646729664e-06, + "loss": 1.1841, + "step": 19642 + }, + { + "epoch": 0.5767514240413413, + "grad_norm": 0.0, + "learning_rate": 8.013434451384659e-06, + "loss": 1.4004, + "step": 19643 + }, + { + "epoch": 0.5767807857184802, + "grad_norm": 0.0, + "learning_rate": 8.012502453437705e-06, + "loss": 1.2407, + "step": 19644 + }, + { + "epoch": 0.5768101473956192, + "grad_norm": 0.0, + "learning_rate": 8.011570473464195e-06, + "loss": 1.2388, + "step": 19645 + }, + { + "epoch": 0.5768395090727583, + "grad_norm": 0.0, + "learning_rate": 8.010638511472564e-06, + "loss": 1.2188, + "step": 19646 + }, + { + "epoch": 0.5768688707498972, + "grad_norm": 0.0, + "learning_rate": 8.009706567471241e-06, + "loss": 1.2334, + "step": 19647 + }, + { + "epoch": 0.5768982324270362, + "grad_norm": 0.0, + "learning_rate": 8.008774641468648e-06, + "loss": 1.2803, + "step": 19648 + }, + { + "epoch": 0.5769275941041753, + "grad_norm": 0.0, + "learning_rate": 8.007842733473218e-06, + "loss": 1.2251, + "step": 19649 + }, + { + "epoch": 0.5769569557813142, + "grad_norm": 0.0, + "learning_rate": 8.006910843493375e-06, + "loss": 1.2266, + "step": 19650 + }, + { + "epoch": 0.5769863174584532, + "grad_norm": 0.0, + "learning_rate": 8.005978971537548e-06, + "loss": 1.2529, + "step": 19651 + }, + { + "epoch": 0.5770156791355923, + "grad_norm": 0.0, + "learning_rate": 8.005047117614163e-06, + "loss": 1.2837, + "step": 19652 + }, + { + "epoch": 0.5770450408127312, + "grad_norm": 0.0, + "learning_rate": 8.00411528173165e-06, + "loss": 1.1758, + "step": 19653 + }, + { + "epoch": 0.5770744024898702, + "grad_norm": 0.0, + "learning_rate": 8.003183463898429e-06, + "loss": 1.2969, + "step": 19654 + }, + { + "epoch": 0.5771037641670093, + "grad_norm": 0.0, + "learning_rate": 8.002251664122937e-06, + "loss": 1.145, + "step": 19655 + }, + { + "epoch": 0.5771331258441482, + "grad_norm": 0.0, + "learning_rate": 8.001319882413592e-06, + "loss": 1.2676, + "step": 19656 + }, + { + "epoch": 0.5771624875212872, + "grad_norm": 0.0, + "learning_rate": 8.000388118778825e-06, + "loss": 1.2891, + "step": 19657 + }, + { + "epoch": 0.5771918491984263, + "grad_norm": 0.0, + "learning_rate": 7.99945637322706e-06, + "loss": 1.1772, + "step": 19658 + }, + { + "epoch": 0.5772212108755652, + "grad_norm": 0.0, + "learning_rate": 7.99852464576672e-06, + "loss": 1.3506, + "step": 19659 + }, + { + "epoch": 0.5772505725527042, + "grad_norm": 0.0, + "learning_rate": 7.997592936406237e-06, + "loss": 1.2417, + "step": 19660 + }, + { + "epoch": 0.5772799342298433, + "grad_norm": 0.0, + "learning_rate": 7.996661245154035e-06, + "loss": 1.3887, + "step": 19661 + }, + { + "epoch": 0.5773092959069822, + "grad_norm": 0.0, + "learning_rate": 7.995729572018539e-06, + "loss": 1.1782, + "step": 19662 + }, + { + "epoch": 0.5773386575841212, + "grad_norm": 0.0, + "learning_rate": 7.994797917008167e-06, + "loss": 1.1743, + "step": 19663 + }, + { + "epoch": 0.5773680192612602, + "grad_norm": 0.0, + "learning_rate": 7.993866280131356e-06, + "loss": 1.2793, + "step": 19664 + }, + { + "epoch": 0.5773973809383992, + "grad_norm": 0.0, + "learning_rate": 7.992934661396526e-06, + "loss": 1.417, + "step": 19665 + }, + { + "epoch": 0.5774267426155382, + "grad_norm": 0.0, + "learning_rate": 7.992003060812104e-06, + "loss": 1.251, + "step": 19666 + }, + { + "epoch": 0.5774561042926772, + "grad_norm": 0.0, + "learning_rate": 7.99107147838651e-06, + "loss": 1.3369, + "step": 19667 + }, + { + "epoch": 0.5774854659698162, + "grad_norm": 0.0, + "learning_rate": 7.99013991412817e-06, + "loss": 1.2998, + "step": 19668 + }, + { + "epoch": 0.5775148276469552, + "grad_norm": 0.0, + "learning_rate": 7.989208368045514e-06, + "loss": 1.2393, + "step": 19669 + }, + { + "epoch": 0.5775441893240942, + "grad_norm": 0.0, + "learning_rate": 7.988276840146956e-06, + "loss": 1.2441, + "step": 19670 + }, + { + "epoch": 0.5775735510012332, + "grad_norm": 0.0, + "learning_rate": 7.98734533044093e-06, + "loss": 1.3027, + "step": 19671 + }, + { + "epoch": 0.5776029126783722, + "grad_norm": 0.0, + "learning_rate": 7.986413838935852e-06, + "loss": 1.2578, + "step": 19672 + }, + { + "epoch": 0.5776322743555112, + "grad_norm": 0.0, + "learning_rate": 7.985482365640153e-06, + "loss": 1.1553, + "step": 19673 + }, + { + "epoch": 0.5776616360326502, + "grad_norm": 0.0, + "learning_rate": 7.984550910562248e-06, + "loss": 1.3477, + "step": 19674 + }, + { + "epoch": 0.5776909977097892, + "grad_norm": 0.0, + "learning_rate": 7.98361947371057e-06, + "loss": 1.2852, + "step": 19675 + }, + { + "epoch": 0.5777203593869282, + "grad_norm": 0.0, + "learning_rate": 7.982688055093535e-06, + "loss": 1.3218, + "step": 19676 + }, + { + "epoch": 0.5777497210640672, + "grad_norm": 0.0, + "learning_rate": 7.98175665471957e-06, + "loss": 1.2598, + "step": 19677 + }, + { + "epoch": 0.5777790827412062, + "grad_norm": 0.0, + "learning_rate": 7.980825272597098e-06, + "loss": 1.3105, + "step": 19678 + }, + { + "epoch": 0.5778084444183452, + "grad_norm": 0.0, + "learning_rate": 7.979893908734534e-06, + "loss": 1.2158, + "step": 19679 + }, + { + "epoch": 0.5778378060954842, + "grad_norm": 0.0, + "learning_rate": 7.978962563140313e-06, + "loss": 1.3359, + "step": 19680 + }, + { + "epoch": 0.5778671677726231, + "grad_norm": 0.0, + "learning_rate": 7.978031235822848e-06, + "loss": 1.2676, + "step": 19681 + }, + { + "epoch": 0.5778965294497622, + "grad_norm": 0.0, + "learning_rate": 7.977099926790568e-06, + "loss": 1.2817, + "step": 19682 + }, + { + "epoch": 0.5779258911269012, + "grad_norm": 0.0, + "learning_rate": 7.976168636051886e-06, + "loss": 1.3193, + "step": 19683 + }, + { + "epoch": 0.5779552528040401, + "grad_norm": 0.0, + "learning_rate": 7.975237363615233e-06, + "loss": 1.3184, + "step": 19684 + }, + { + "epoch": 0.5779846144811792, + "grad_norm": 0.0, + "learning_rate": 7.974306109489024e-06, + "loss": 1.1396, + "step": 19685 + }, + { + "epoch": 0.5780139761583182, + "grad_norm": 0.0, + "learning_rate": 7.973374873681689e-06, + "loss": 1.3154, + "step": 19686 + }, + { + "epoch": 0.5780433378354571, + "grad_norm": 0.0, + "learning_rate": 7.972443656201641e-06, + "loss": 1.2451, + "step": 19687 + }, + { + "epoch": 0.5780726995125962, + "grad_norm": 0.0, + "learning_rate": 7.971512457057302e-06, + "loss": 1.1543, + "step": 19688 + }, + { + "epoch": 0.5781020611897352, + "grad_norm": 0.0, + "learning_rate": 7.9705812762571e-06, + "loss": 1.2114, + "step": 19689 + }, + { + "epoch": 0.5781314228668741, + "grad_norm": 0.0, + "learning_rate": 7.969650113809447e-06, + "loss": 1.4473, + "step": 19690 + }, + { + "epoch": 0.5781607845440132, + "grad_norm": 0.0, + "learning_rate": 7.96871896972277e-06, + "loss": 1.1958, + "step": 19691 + }, + { + "epoch": 0.5781901462211522, + "grad_norm": 0.0, + "learning_rate": 7.967787844005485e-06, + "loss": 1.0771, + "step": 19692 + }, + { + "epoch": 0.5782195078982911, + "grad_norm": 0.0, + "learning_rate": 7.966856736666019e-06, + "loss": 1.4629, + "step": 19693 + }, + { + "epoch": 0.5782488695754302, + "grad_norm": 0.0, + "learning_rate": 7.965925647712786e-06, + "loss": 1.3926, + "step": 19694 + }, + { + "epoch": 0.5782782312525692, + "grad_norm": 0.0, + "learning_rate": 7.964994577154211e-06, + "loss": 1.3604, + "step": 19695 + }, + { + "epoch": 0.5783075929297081, + "grad_norm": 0.0, + "learning_rate": 7.96406352499871e-06, + "loss": 1.2422, + "step": 19696 + }, + { + "epoch": 0.5783369546068472, + "grad_norm": 0.0, + "learning_rate": 7.963132491254702e-06, + "loss": 1.3496, + "step": 19697 + }, + { + "epoch": 0.5783663162839862, + "grad_norm": 0.0, + "learning_rate": 7.96220147593061e-06, + "loss": 1.271, + "step": 19698 + }, + { + "epoch": 0.5783956779611251, + "grad_norm": 0.0, + "learning_rate": 7.961270479034849e-06, + "loss": 1.2236, + "step": 19699 + }, + { + "epoch": 0.5784250396382641, + "grad_norm": 0.0, + "learning_rate": 7.960339500575846e-06, + "loss": 1.3184, + "step": 19700 + }, + { + "epoch": 0.5784544013154032, + "grad_norm": 0.0, + "learning_rate": 7.959408540562011e-06, + "loss": 1.1865, + "step": 19701 + }, + { + "epoch": 0.5784837629925421, + "grad_norm": 0.0, + "learning_rate": 7.95847759900177e-06, + "loss": 1.3105, + "step": 19702 + }, + { + "epoch": 0.5785131246696811, + "grad_norm": 0.0, + "learning_rate": 7.957546675903536e-06, + "loss": 1.3223, + "step": 19703 + }, + { + "epoch": 0.5785424863468202, + "grad_norm": 0.0, + "learning_rate": 7.956615771275734e-06, + "loss": 1.3291, + "step": 19704 + }, + { + "epoch": 0.5785718480239591, + "grad_norm": 0.0, + "learning_rate": 7.955684885126777e-06, + "loss": 1.2402, + "step": 19705 + }, + { + "epoch": 0.5786012097010981, + "grad_norm": 0.0, + "learning_rate": 7.954754017465087e-06, + "loss": 1.375, + "step": 19706 + }, + { + "epoch": 0.5786305713782371, + "grad_norm": 0.0, + "learning_rate": 7.95382316829908e-06, + "loss": 1.3223, + "step": 19707 + }, + { + "epoch": 0.5786599330553761, + "grad_norm": 0.0, + "learning_rate": 7.95289233763717e-06, + "loss": 1.2217, + "step": 19708 + }, + { + "epoch": 0.5786892947325151, + "grad_norm": 0.0, + "learning_rate": 7.951961525487783e-06, + "loss": 1.2139, + "step": 19709 + }, + { + "epoch": 0.5787186564096541, + "grad_norm": 0.0, + "learning_rate": 7.95103073185933e-06, + "loss": 1.209, + "step": 19710 + }, + { + "epoch": 0.5787480180867931, + "grad_norm": 0.0, + "learning_rate": 7.950099956760234e-06, + "loss": 1.2666, + "step": 19711 + }, + { + "epoch": 0.5787773797639321, + "grad_norm": 0.0, + "learning_rate": 7.949169200198904e-06, + "loss": 1.3438, + "step": 19712 + }, + { + "epoch": 0.5788067414410711, + "grad_norm": 0.0, + "learning_rate": 7.948238462183767e-06, + "loss": 1.3076, + "step": 19713 + }, + { + "epoch": 0.5788361031182101, + "grad_norm": 0.0, + "learning_rate": 7.947307742723232e-06, + "loss": 1.187, + "step": 19714 + }, + { + "epoch": 0.5788654647953491, + "grad_norm": 0.0, + "learning_rate": 7.94637704182572e-06, + "loss": 1.2988, + "step": 19715 + }, + { + "epoch": 0.5788948264724881, + "grad_norm": 0.0, + "learning_rate": 7.945446359499649e-06, + "loss": 1.4219, + "step": 19716 + }, + { + "epoch": 0.5789241881496271, + "grad_norm": 0.0, + "learning_rate": 7.944515695753428e-06, + "loss": 1.25, + "step": 19717 + }, + { + "epoch": 0.5789535498267661, + "grad_norm": 0.0, + "learning_rate": 7.943585050595482e-06, + "loss": 1.252, + "step": 19718 + }, + { + "epoch": 0.5789829115039051, + "grad_norm": 0.0, + "learning_rate": 7.94265442403422e-06, + "loss": 1.3447, + "step": 19719 + }, + { + "epoch": 0.5790122731810441, + "grad_norm": 0.0, + "learning_rate": 7.941723816078062e-06, + "loss": 1.2109, + "step": 19720 + }, + { + "epoch": 0.579041634858183, + "grad_norm": 0.0, + "learning_rate": 7.940793226735422e-06, + "loss": 1.25, + "step": 19721 + }, + { + "epoch": 0.5790709965353221, + "grad_norm": 0.0, + "learning_rate": 7.93986265601472e-06, + "loss": 1.3574, + "step": 19722 + }, + { + "epoch": 0.5791003582124611, + "grad_norm": 0.0, + "learning_rate": 7.938932103924362e-06, + "loss": 1.2637, + "step": 19723 + }, + { + "epoch": 0.5791297198896, + "grad_norm": 0.0, + "learning_rate": 7.938001570472772e-06, + "loss": 1.3193, + "step": 19724 + }, + { + "epoch": 0.5791590815667391, + "grad_norm": 0.0, + "learning_rate": 7.93707105566836e-06, + "loss": 1.2383, + "step": 19725 + }, + { + "epoch": 0.5791884432438781, + "grad_norm": 0.0, + "learning_rate": 7.936140559519546e-06, + "loss": 1.2705, + "step": 19726 + }, + { + "epoch": 0.579217804921017, + "grad_norm": 0.0, + "learning_rate": 7.935210082034742e-06, + "loss": 1.374, + "step": 19727 + }, + { + "epoch": 0.5792471665981561, + "grad_norm": 0.0, + "learning_rate": 7.934279623222357e-06, + "loss": 1.2988, + "step": 19728 + }, + { + "epoch": 0.5792765282752951, + "grad_norm": 0.0, + "learning_rate": 7.933349183090813e-06, + "loss": 1.2441, + "step": 19729 + }, + { + "epoch": 0.579305889952434, + "grad_norm": 0.0, + "learning_rate": 7.932418761648521e-06, + "loss": 1.2002, + "step": 19730 + }, + { + "epoch": 0.5793352516295731, + "grad_norm": 0.0, + "learning_rate": 7.931488358903898e-06, + "loss": 1.2305, + "step": 19731 + }, + { + "epoch": 0.5793646133067121, + "grad_norm": 0.0, + "learning_rate": 7.930557974865351e-06, + "loss": 1.3682, + "step": 19732 + }, + { + "epoch": 0.579393974983851, + "grad_norm": 0.0, + "learning_rate": 7.929627609541302e-06, + "loss": 1.2358, + "step": 19733 + }, + { + "epoch": 0.5794233366609901, + "grad_norm": 0.0, + "learning_rate": 7.92869726294016e-06, + "loss": 1.2632, + "step": 19734 + }, + { + "epoch": 0.5794526983381291, + "grad_norm": 0.0, + "learning_rate": 7.92776693507034e-06, + "loss": 1.3457, + "step": 19735 + }, + { + "epoch": 0.579482060015268, + "grad_norm": 0.0, + "learning_rate": 7.926836625940255e-06, + "loss": 1.1953, + "step": 19736 + }, + { + "epoch": 0.5795114216924071, + "grad_norm": 0.0, + "learning_rate": 7.925906335558312e-06, + "loss": 1.1328, + "step": 19737 + }, + { + "epoch": 0.5795407833695461, + "grad_norm": 0.0, + "learning_rate": 7.924976063932934e-06, + "loss": 1.1968, + "step": 19738 + }, + { + "epoch": 0.579570145046685, + "grad_norm": 0.0, + "learning_rate": 7.924045811072526e-06, + "loss": 1.2617, + "step": 19739 + }, + { + "epoch": 0.5795995067238241, + "grad_norm": 0.0, + "learning_rate": 7.923115576985508e-06, + "loss": 1.502, + "step": 19740 + }, + { + "epoch": 0.5796288684009631, + "grad_norm": 0.0, + "learning_rate": 7.92218536168028e-06, + "loss": 1.3066, + "step": 19741 + }, + { + "epoch": 0.579658230078102, + "grad_norm": 0.0, + "learning_rate": 7.921255165165269e-06, + "loss": 1.2764, + "step": 19742 + }, + { + "epoch": 0.5796875917552411, + "grad_norm": 0.0, + "learning_rate": 7.920324987448877e-06, + "loss": 1.2842, + "step": 19743 + }, + { + "epoch": 0.57971695343238, + "grad_norm": 0.0, + "learning_rate": 7.919394828539522e-06, + "loss": 1.2354, + "step": 19744 + }, + { + "epoch": 0.579746315109519, + "grad_norm": 0.0, + "learning_rate": 7.918464688445607e-06, + "loss": 1.2734, + "step": 19745 + }, + { + "epoch": 0.5797756767866581, + "grad_norm": 0.0, + "learning_rate": 7.917534567175555e-06, + "loss": 1.3223, + "step": 19746 + }, + { + "epoch": 0.579805038463797, + "grad_norm": 0.0, + "learning_rate": 7.91660446473777e-06, + "loss": 1.4287, + "step": 19747 + }, + { + "epoch": 0.579834400140936, + "grad_norm": 0.0, + "learning_rate": 7.915674381140659e-06, + "loss": 1.3799, + "step": 19748 + }, + { + "epoch": 0.5798637618180751, + "grad_norm": 0.0, + "learning_rate": 7.914744316392641e-06, + "loss": 1.1904, + "step": 19749 + }, + { + "epoch": 0.579893123495214, + "grad_norm": 0.0, + "learning_rate": 7.913814270502125e-06, + "loss": 1.335, + "step": 19750 + }, + { + "epoch": 0.579922485172353, + "grad_norm": 0.0, + "learning_rate": 7.912884243477521e-06, + "loss": 1.2812, + "step": 19751 + }, + { + "epoch": 0.5799518468494921, + "grad_norm": 0.0, + "learning_rate": 7.911954235327238e-06, + "loss": 1.2861, + "step": 19752 + }, + { + "epoch": 0.579981208526631, + "grad_norm": 0.0, + "learning_rate": 7.91102424605969e-06, + "loss": 1.1704, + "step": 19753 + }, + { + "epoch": 0.58001057020377, + "grad_norm": 0.0, + "learning_rate": 7.910094275683282e-06, + "loss": 1.3398, + "step": 19754 + }, + { + "epoch": 0.5800399318809091, + "grad_norm": 0.0, + "learning_rate": 7.90916432420643e-06, + "loss": 1.3311, + "step": 19755 + }, + { + "epoch": 0.580069293558048, + "grad_norm": 0.0, + "learning_rate": 7.90823439163754e-06, + "loss": 1.1279, + "step": 19756 + }, + { + "epoch": 0.580098655235187, + "grad_norm": 0.0, + "learning_rate": 7.907304477985018e-06, + "loss": 1.2148, + "step": 19757 + }, + { + "epoch": 0.5801280169123261, + "grad_norm": 0.0, + "learning_rate": 7.906374583257282e-06, + "loss": 1.2314, + "step": 19758 + }, + { + "epoch": 0.580157378589465, + "grad_norm": 0.0, + "learning_rate": 7.905444707462733e-06, + "loss": 1.1836, + "step": 19759 + }, + { + "epoch": 0.580186740266604, + "grad_norm": 0.0, + "learning_rate": 7.904514850609788e-06, + "loss": 1.3164, + "step": 19760 + }, + { + "epoch": 0.5802161019437431, + "grad_norm": 0.0, + "learning_rate": 7.903585012706847e-06, + "loss": 1.2339, + "step": 19761 + }, + { + "epoch": 0.580245463620882, + "grad_norm": 0.0, + "learning_rate": 7.902655193762327e-06, + "loss": 1.2744, + "step": 19762 + }, + { + "epoch": 0.580274825298021, + "grad_norm": 0.0, + "learning_rate": 7.901725393784632e-06, + "loss": 1.2432, + "step": 19763 + }, + { + "epoch": 0.5803041869751601, + "grad_norm": 0.0, + "learning_rate": 7.900795612782174e-06, + "loss": 1.3594, + "step": 19764 + }, + { + "epoch": 0.580333548652299, + "grad_norm": 0.0, + "learning_rate": 7.899865850763358e-06, + "loss": 1.252, + "step": 19765 + }, + { + "epoch": 0.580362910329438, + "grad_norm": 0.0, + "learning_rate": 7.89893610773659e-06, + "loss": 1.3252, + "step": 19766 + }, + { + "epoch": 0.5803922720065771, + "grad_norm": 0.0, + "learning_rate": 7.898006383710283e-06, + "loss": 1.3584, + "step": 19767 + }, + { + "epoch": 0.580421633683716, + "grad_norm": 0.0, + "learning_rate": 7.897076678692844e-06, + "loss": 1.2139, + "step": 19768 + }, + { + "epoch": 0.580450995360855, + "grad_norm": 0.0, + "learning_rate": 7.896146992692678e-06, + "loss": 1.4014, + "step": 19769 + }, + { + "epoch": 0.580480357037994, + "grad_norm": 0.0, + "learning_rate": 7.89521732571819e-06, + "loss": 1.3613, + "step": 19770 + }, + { + "epoch": 0.580509718715133, + "grad_norm": 0.0, + "learning_rate": 7.894287677777798e-06, + "loss": 1.2686, + "step": 19771 + }, + { + "epoch": 0.580539080392272, + "grad_norm": 0.0, + "learning_rate": 7.893358048879896e-06, + "loss": 1.23, + "step": 19772 + }, + { + "epoch": 0.580568442069411, + "grad_norm": 0.0, + "learning_rate": 7.8924284390329e-06, + "loss": 1.2046, + "step": 19773 + }, + { + "epoch": 0.58059780374655, + "grad_norm": 0.0, + "learning_rate": 7.89149884824521e-06, + "loss": 1.3525, + "step": 19774 + }, + { + "epoch": 0.580627165423689, + "grad_norm": 0.0, + "learning_rate": 7.89056927652524e-06, + "loss": 1.2812, + "step": 19775 + }, + { + "epoch": 0.580656527100828, + "grad_norm": 0.0, + "learning_rate": 7.889639723881392e-06, + "loss": 1.2988, + "step": 19776 + }, + { + "epoch": 0.580685888777967, + "grad_norm": 0.0, + "learning_rate": 7.888710190322068e-06, + "loss": 1.3433, + "step": 19777 + }, + { + "epoch": 0.580715250455106, + "grad_norm": 0.0, + "learning_rate": 7.887780675855682e-06, + "loss": 1.2637, + "step": 19778 + }, + { + "epoch": 0.580744612132245, + "grad_norm": 0.0, + "learning_rate": 7.886851180490634e-06, + "loss": 1.1675, + "step": 19779 + }, + { + "epoch": 0.580773973809384, + "grad_norm": 0.0, + "learning_rate": 7.885921704235335e-06, + "loss": 1.2139, + "step": 19780 + }, + { + "epoch": 0.580803335486523, + "grad_norm": 0.0, + "learning_rate": 7.884992247098184e-06, + "loss": 1.2285, + "step": 19781 + }, + { + "epoch": 0.580832697163662, + "grad_norm": 0.0, + "learning_rate": 7.884062809087591e-06, + "loss": 1.2344, + "step": 19782 + }, + { + "epoch": 0.580862058840801, + "grad_norm": 0.0, + "learning_rate": 7.88313339021196e-06, + "loss": 1.291, + "step": 19783 + }, + { + "epoch": 0.58089142051794, + "grad_norm": 0.0, + "learning_rate": 7.882203990479698e-06, + "loss": 1.3662, + "step": 19784 + }, + { + "epoch": 0.580920782195079, + "grad_norm": 0.0, + "learning_rate": 7.881274609899208e-06, + "loss": 1.377, + "step": 19785 + }, + { + "epoch": 0.580950143872218, + "grad_norm": 0.0, + "learning_rate": 7.880345248478888e-06, + "loss": 1.1943, + "step": 19786 + }, + { + "epoch": 0.580979505549357, + "grad_norm": 0.0, + "learning_rate": 7.879415906227156e-06, + "loss": 1.21, + "step": 19787 + }, + { + "epoch": 0.581008867226496, + "grad_norm": 0.0, + "learning_rate": 7.878486583152405e-06, + "loss": 1.168, + "step": 19788 + }, + { + "epoch": 0.581038228903635, + "grad_norm": 0.0, + "learning_rate": 7.877557279263046e-06, + "loss": 1.29, + "step": 19789 + }, + { + "epoch": 0.581067590580774, + "grad_norm": 0.0, + "learning_rate": 7.876627994567476e-06, + "loss": 1.3809, + "step": 19790 + }, + { + "epoch": 0.581096952257913, + "grad_norm": 0.0, + "learning_rate": 7.875698729074107e-06, + "loss": 1.2852, + "step": 19791 + }, + { + "epoch": 0.581126313935052, + "grad_norm": 0.0, + "learning_rate": 7.874769482791336e-06, + "loss": 1.2822, + "step": 19792 + }, + { + "epoch": 0.581155675612191, + "grad_norm": 0.0, + "learning_rate": 7.87384025572757e-06, + "loss": 1.2344, + "step": 19793 + }, + { + "epoch": 0.58118503728933, + "grad_norm": 0.0, + "learning_rate": 7.872911047891208e-06, + "loss": 1.3008, + "step": 19794 + }, + { + "epoch": 0.581214398966469, + "grad_norm": 0.0, + "learning_rate": 7.87198185929066e-06, + "loss": 1.2842, + "step": 19795 + }, + { + "epoch": 0.5812437606436079, + "grad_norm": 0.0, + "learning_rate": 7.871052689934328e-06, + "loss": 1.3008, + "step": 19796 + }, + { + "epoch": 0.581273122320747, + "grad_norm": 0.0, + "learning_rate": 7.870123539830605e-06, + "loss": 1.2256, + "step": 19797 + }, + { + "epoch": 0.581302483997886, + "grad_norm": 0.0, + "learning_rate": 7.869194408987903e-06, + "loss": 1.3311, + "step": 19798 + }, + { + "epoch": 0.5813318456750249, + "grad_norm": 0.0, + "learning_rate": 7.868265297414621e-06, + "loss": 1.2871, + "step": 19799 + }, + { + "epoch": 0.5813612073521639, + "grad_norm": 0.0, + "learning_rate": 7.867336205119166e-06, + "loss": 1.335, + "step": 19800 + }, + { + "epoch": 0.581390569029303, + "grad_norm": 0.0, + "learning_rate": 7.866407132109929e-06, + "loss": 1.1328, + "step": 19801 + }, + { + "epoch": 0.5814199307064419, + "grad_norm": 0.0, + "learning_rate": 7.865478078395323e-06, + "loss": 1.2837, + "step": 19802 + }, + { + "epoch": 0.5814492923835809, + "grad_norm": 0.0, + "learning_rate": 7.864549043983742e-06, + "loss": 1.1577, + "step": 19803 + }, + { + "epoch": 0.58147865406072, + "grad_norm": 0.0, + "learning_rate": 7.863620028883597e-06, + "loss": 1.2197, + "step": 19804 + }, + { + "epoch": 0.5815080157378589, + "grad_norm": 0.0, + "learning_rate": 7.86269103310328e-06, + "loss": 1.146, + "step": 19805 + }, + { + "epoch": 0.5815373774149979, + "grad_norm": 0.0, + "learning_rate": 7.861762056651192e-06, + "loss": 1.2646, + "step": 19806 + }, + { + "epoch": 0.581566739092137, + "grad_norm": 0.0, + "learning_rate": 7.86083309953574e-06, + "loss": 1.25, + "step": 19807 + }, + { + "epoch": 0.5815961007692759, + "grad_norm": 0.0, + "learning_rate": 7.859904161765322e-06, + "loss": 1.4639, + "step": 19808 + }, + { + "epoch": 0.5816254624464149, + "grad_norm": 0.0, + "learning_rate": 7.858975243348341e-06, + "loss": 1.291, + "step": 19809 + }, + { + "epoch": 0.581654824123554, + "grad_norm": 0.0, + "learning_rate": 7.858046344293192e-06, + "loss": 1.3135, + "step": 19810 + }, + { + "epoch": 0.5816841858006929, + "grad_norm": 0.0, + "learning_rate": 7.85711746460828e-06, + "loss": 1.3784, + "step": 19811 + }, + { + "epoch": 0.5817135474778319, + "grad_norm": 0.0, + "learning_rate": 7.856188604302002e-06, + "loss": 1.291, + "step": 19812 + }, + { + "epoch": 0.581742909154971, + "grad_norm": 0.0, + "learning_rate": 7.855259763382764e-06, + "loss": 1.3037, + "step": 19813 + }, + { + "epoch": 0.5817722708321099, + "grad_norm": 0.0, + "learning_rate": 7.854330941858959e-06, + "loss": 1.2432, + "step": 19814 + }, + { + "epoch": 0.5818016325092489, + "grad_norm": 0.0, + "learning_rate": 7.853402139738983e-06, + "loss": 1.3799, + "step": 19815 + }, + { + "epoch": 0.581830994186388, + "grad_norm": 0.0, + "learning_rate": 7.85247335703125e-06, + "loss": 1.3643, + "step": 19816 + }, + { + "epoch": 0.5818603558635269, + "grad_norm": 0.0, + "learning_rate": 7.851544593744144e-06, + "loss": 1.3779, + "step": 19817 + }, + { + "epoch": 0.5818897175406659, + "grad_norm": 0.0, + "learning_rate": 7.850615849886074e-06, + "loss": 1.4014, + "step": 19818 + }, + { + "epoch": 0.581919079217805, + "grad_norm": 0.0, + "learning_rate": 7.849687125465432e-06, + "loss": 1.2275, + "step": 19819 + }, + { + "epoch": 0.5819484408949439, + "grad_norm": 0.0, + "learning_rate": 7.848758420490625e-06, + "loss": 1.3589, + "step": 19820 + }, + { + "epoch": 0.5819778025720829, + "grad_norm": 0.0, + "learning_rate": 7.847829734970042e-06, + "loss": 1.21, + "step": 19821 + }, + { + "epoch": 0.5820071642492219, + "grad_norm": 0.0, + "learning_rate": 7.846901068912089e-06, + "loss": 1.2959, + "step": 19822 + }, + { + "epoch": 0.5820365259263609, + "grad_norm": 0.0, + "learning_rate": 7.845972422325157e-06, + "loss": 1.4023, + "step": 19823 + }, + { + "epoch": 0.5820658876034999, + "grad_norm": 0.0, + "learning_rate": 7.845043795217654e-06, + "loss": 1.2549, + "step": 19824 + }, + { + "epoch": 0.5820952492806389, + "grad_norm": 0.0, + "learning_rate": 7.84411518759797e-06, + "loss": 1.2529, + "step": 19825 + }, + { + "epoch": 0.5821246109577779, + "grad_norm": 0.0, + "learning_rate": 7.8431865994745e-06, + "loss": 1.3467, + "step": 19826 + }, + { + "epoch": 0.5821539726349169, + "grad_norm": 0.0, + "learning_rate": 7.842258030855651e-06, + "loss": 1.1704, + "step": 19827 + }, + { + "epoch": 0.5821833343120559, + "grad_norm": 0.0, + "learning_rate": 7.841329481749813e-06, + "loss": 1.2446, + "step": 19828 + }, + { + "epoch": 0.5822126959891949, + "grad_norm": 0.0, + "learning_rate": 7.840400952165386e-06, + "loss": 1.2266, + "step": 19829 + }, + { + "epoch": 0.5822420576663339, + "grad_norm": 0.0, + "learning_rate": 7.839472442110765e-06, + "loss": 1.3809, + "step": 19830 + }, + { + "epoch": 0.5822714193434729, + "grad_norm": 0.0, + "learning_rate": 7.838543951594349e-06, + "loss": 1.1846, + "step": 19831 + }, + { + "epoch": 0.5823007810206119, + "grad_norm": 0.0, + "learning_rate": 7.837615480624535e-06, + "loss": 1.1934, + "step": 19832 + }, + { + "epoch": 0.5823301426977509, + "grad_norm": 0.0, + "learning_rate": 7.836687029209716e-06, + "loss": 1.1958, + "step": 19833 + }, + { + "epoch": 0.5823595043748899, + "grad_norm": 0.0, + "learning_rate": 7.835758597358292e-06, + "loss": 1.3594, + "step": 19834 + }, + { + "epoch": 0.5823888660520289, + "grad_norm": 0.0, + "learning_rate": 7.834830185078654e-06, + "loss": 1.3652, + "step": 19835 + }, + { + "epoch": 0.5824182277291678, + "grad_norm": 0.0, + "learning_rate": 7.833901792379204e-06, + "loss": 1.1904, + "step": 19836 + }, + { + "epoch": 0.5824475894063069, + "grad_norm": 0.0, + "learning_rate": 7.832973419268335e-06, + "loss": 1.2949, + "step": 19837 + }, + { + "epoch": 0.5824769510834459, + "grad_norm": 0.0, + "learning_rate": 7.832045065754443e-06, + "loss": 1.2852, + "step": 19838 + }, + { + "epoch": 0.5825063127605848, + "grad_norm": 0.0, + "learning_rate": 7.831116731845917e-06, + "loss": 1.29, + "step": 19839 + }, + { + "epoch": 0.5825356744377239, + "grad_norm": 0.0, + "learning_rate": 7.830188417551163e-06, + "loss": 1.2832, + "step": 19840 + }, + { + "epoch": 0.5825650361148629, + "grad_norm": 0.0, + "learning_rate": 7.82926012287857e-06, + "loss": 1.2471, + "step": 19841 + }, + { + "epoch": 0.5825943977920018, + "grad_norm": 0.0, + "learning_rate": 7.828331847836535e-06, + "loss": 1.1943, + "step": 19842 + }, + { + "epoch": 0.5826237594691409, + "grad_norm": 0.0, + "learning_rate": 7.827403592433446e-06, + "loss": 1.2656, + "step": 19843 + }, + { + "epoch": 0.5826531211462799, + "grad_norm": 0.0, + "learning_rate": 7.826475356677709e-06, + "loss": 1.2061, + "step": 19844 + }, + { + "epoch": 0.5826824828234188, + "grad_norm": 0.0, + "learning_rate": 7.82554714057771e-06, + "loss": 1.1729, + "step": 19845 + }, + { + "epoch": 0.5827118445005579, + "grad_norm": 0.0, + "learning_rate": 7.824618944141845e-06, + "loss": 1.2773, + "step": 19846 + }, + { + "epoch": 0.5827412061776969, + "grad_norm": 0.0, + "learning_rate": 7.823690767378508e-06, + "loss": 1.3066, + "step": 19847 + }, + { + "epoch": 0.5827705678548358, + "grad_norm": 0.0, + "learning_rate": 7.82276261029609e-06, + "loss": 1.2856, + "step": 19848 + }, + { + "epoch": 0.5827999295319749, + "grad_norm": 0.0, + "learning_rate": 7.821834472902992e-06, + "loss": 1.3262, + "step": 19849 + }, + { + "epoch": 0.5828292912091139, + "grad_norm": 0.0, + "learning_rate": 7.820906355207598e-06, + "loss": 1.332, + "step": 19850 + }, + { + "epoch": 0.5828586528862528, + "grad_norm": 0.0, + "learning_rate": 7.81997825721831e-06, + "loss": 1.165, + "step": 19851 + }, + { + "epoch": 0.5828880145633919, + "grad_norm": 0.0, + "learning_rate": 7.819050178943514e-06, + "loss": 1.2949, + "step": 19852 + }, + { + "epoch": 0.5829173762405309, + "grad_norm": 0.0, + "learning_rate": 7.818122120391609e-06, + "loss": 1.2979, + "step": 19853 + }, + { + "epoch": 0.5829467379176698, + "grad_norm": 0.0, + "learning_rate": 7.817194081570983e-06, + "loss": 1.2915, + "step": 19854 + }, + { + "epoch": 0.5829760995948089, + "grad_norm": 0.0, + "learning_rate": 7.816266062490027e-06, + "loss": 1.2969, + "step": 19855 + }, + { + "epoch": 0.5830054612719479, + "grad_norm": 0.0, + "learning_rate": 7.81533806315714e-06, + "loss": 1.3643, + "step": 19856 + }, + { + "epoch": 0.5830348229490868, + "grad_norm": 0.0, + "learning_rate": 7.814410083580708e-06, + "loss": 1.3115, + "step": 19857 + }, + { + "epoch": 0.5830641846262259, + "grad_norm": 0.0, + "learning_rate": 7.813482123769127e-06, + "loss": 1.249, + "step": 19858 + }, + { + "epoch": 0.5830935463033649, + "grad_norm": 0.0, + "learning_rate": 7.812554183730785e-06, + "loss": 1.3086, + "step": 19859 + }, + { + "epoch": 0.5831229079805038, + "grad_norm": 0.0, + "learning_rate": 7.811626263474078e-06, + "loss": 1.3623, + "step": 19860 + }, + { + "epoch": 0.5831522696576429, + "grad_norm": 0.0, + "learning_rate": 7.810698363007393e-06, + "loss": 1.2261, + "step": 19861 + }, + { + "epoch": 0.5831816313347818, + "grad_norm": 0.0, + "learning_rate": 7.809770482339128e-06, + "loss": 1.1934, + "step": 19862 + }, + { + "epoch": 0.5832109930119208, + "grad_norm": 0.0, + "learning_rate": 7.808842621477662e-06, + "loss": 1.2686, + "step": 19863 + }, + { + "epoch": 0.5832403546890599, + "grad_norm": 0.0, + "learning_rate": 7.807914780431399e-06, + "loss": 1.2412, + "step": 19864 + }, + { + "epoch": 0.5832697163661988, + "grad_norm": 0.0, + "learning_rate": 7.806986959208724e-06, + "loss": 1.1895, + "step": 19865 + }, + { + "epoch": 0.5832990780433378, + "grad_norm": 0.0, + "learning_rate": 7.806059157818026e-06, + "loss": 1.2241, + "step": 19866 + }, + { + "epoch": 0.5833284397204769, + "grad_norm": 0.0, + "learning_rate": 7.8051313762677e-06, + "loss": 1.2207, + "step": 19867 + }, + { + "epoch": 0.5833578013976158, + "grad_norm": 0.0, + "learning_rate": 7.804203614566124e-06, + "loss": 1.2529, + "step": 19868 + }, + { + "epoch": 0.5833871630747548, + "grad_norm": 0.0, + "learning_rate": 7.803275872721707e-06, + "loss": 1.249, + "step": 19869 + }, + { + "epoch": 0.5834165247518939, + "grad_norm": 0.0, + "learning_rate": 7.802348150742823e-06, + "loss": 1.2461, + "step": 19870 + }, + { + "epoch": 0.5834458864290328, + "grad_norm": 0.0, + "learning_rate": 7.801420448637871e-06, + "loss": 1.252, + "step": 19871 + }, + { + "epoch": 0.5834752481061718, + "grad_norm": 0.0, + "learning_rate": 7.800492766415234e-06, + "loss": 1.3203, + "step": 19872 + }, + { + "epoch": 0.5835046097833109, + "grad_norm": 0.0, + "learning_rate": 7.799565104083307e-06, + "loss": 1.1987, + "step": 19873 + }, + { + "epoch": 0.5835339714604498, + "grad_norm": 0.0, + "learning_rate": 7.798637461650478e-06, + "loss": 1.293, + "step": 19874 + }, + { + "epoch": 0.5835633331375888, + "grad_norm": 0.0, + "learning_rate": 7.797709839125128e-06, + "loss": 1.251, + "step": 19875 + }, + { + "epoch": 0.5835926948147279, + "grad_norm": 0.0, + "learning_rate": 7.796782236515657e-06, + "loss": 1.2715, + "step": 19876 + }, + { + "epoch": 0.5836220564918668, + "grad_norm": 0.0, + "learning_rate": 7.795854653830446e-06, + "loss": 1.2964, + "step": 19877 + }, + { + "epoch": 0.5836514181690058, + "grad_norm": 0.0, + "learning_rate": 7.794927091077888e-06, + "loss": 1.3379, + "step": 19878 + }, + { + "epoch": 0.5836807798461449, + "grad_norm": 0.0, + "learning_rate": 7.793999548266364e-06, + "loss": 1.3955, + "step": 19879 + }, + { + "epoch": 0.5837101415232838, + "grad_norm": 0.0, + "learning_rate": 7.793072025404273e-06, + "loss": 1.2241, + "step": 19880 + }, + { + "epoch": 0.5837395032004228, + "grad_norm": 0.0, + "learning_rate": 7.792144522499993e-06, + "loss": 1.2007, + "step": 19881 + }, + { + "epoch": 0.5837688648775619, + "grad_norm": 0.0, + "learning_rate": 7.791217039561918e-06, + "loss": 1.252, + "step": 19882 + }, + { + "epoch": 0.5837982265547008, + "grad_norm": 0.0, + "learning_rate": 7.790289576598435e-06, + "loss": 1.2017, + "step": 19883 + }, + { + "epoch": 0.5838275882318398, + "grad_norm": 0.0, + "learning_rate": 7.789362133617922e-06, + "loss": 1.3467, + "step": 19884 + }, + { + "epoch": 0.5838569499089789, + "grad_norm": 0.0, + "learning_rate": 7.78843471062878e-06, + "loss": 1.2515, + "step": 19885 + }, + { + "epoch": 0.5838863115861178, + "grad_norm": 0.0, + "learning_rate": 7.787507307639385e-06, + "loss": 1.2354, + "step": 19886 + }, + { + "epoch": 0.5839156732632568, + "grad_norm": 0.0, + "learning_rate": 7.786579924658132e-06, + "loss": 1.1118, + "step": 19887 + }, + { + "epoch": 0.5839450349403958, + "grad_norm": 0.0, + "learning_rate": 7.785652561693398e-06, + "loss": 1.3887, + "step": 19888 + }, + { + "epoch": 0.5839743966175348, + "grad_norm": 0.0, + "learning_rate": 7.784725218753582e-06, + "loss": 1.3291, + "step": 19889 + }, + { + "epoch": 0.5840037582946738, + "grad_norm": 0.0, + "learning_rate": 7.783797895847058e-06, + "loss": 1.3057, + "step": 19890 + }, + { + "epoch": 0.5840331199718128, + "grad_norm": 0.0, + "learning_rate": 7.78287059298222e-06, + "loss": 1.3359, + "step": 19891 + }, + { + "epoch": 0.5840624816489518, + "grad_norm": 0.0, + "learning_rate": 7.781943310167448e-06, + "loss": 1.3037, + "step": 19892 + }, + { + "epoch": 0.5840918433260908, + "grad_norm": 0.0, + "learning_rate": 7.781016047411134e-06, + "loss": 1.2715, + "step": 19893 + }, + { + "epoch": 0.5841212050032298, + "grad_norm": 0.0, + "learning_rate": 7.78008880472166e-06, + "loss": 1.2236, + "step": 19894 + }, + { + "epoch": 0.5841505666803688, + "grad_norm": 0.0, + "learning_rate": 7.77916158210741e-06, + "loss": 1.3232, + "step": 19895 + }, + { + "epoch": 0.5841799283575078, + "grad_norm": 0.0, + "learning_rate": 7.778234379576772e-06, + "loss": 1.2227, + "step": 19896 + }, + { + "epoch": 0.5842092900346468, + "grad_norm": 0.0, + "learning_rate": 7.777307197138126e-06, + "loss": 1.2529, + "step": 19897 + }, + { + "epoch": 0.5842386517117858, + "grad_norm": 0.0, + "learning_rate": 7.776380034799863e-06, + "loss": 1.2798, + "step": 19898 + }, + { + "epoch": 0.5842680133889248, + "grad_norm": 0.0, + "learning_rate": 7.775452892570366e-06, + "loss": 1.4551, + "step": 19899 + }, + { + "epoch": 0.5842973750660638, + "grad_norm": 0.0, + "learning_rate": 7.774525770458016e-06, + "loss": 1.252, + "step": 19900 + }, + { + "epoch": 0.5843267367432028, + "grad_norm": 0.0, + "learning_rate": 7.773598668471198e-06, + "loss": 1.189, + "step": 19901 + }, + { + "epoch": 0.5843560984203418, + "grad_norm": 0.0, + "learning_rate": 7.772671586618304e-06, + "loss": 1.1895, + "step": 19902 + }, + { + "epoch": 0.5843854600974807, + "grad_norm": 0.0, + "learning_rate": 7.771744524907707e-06, + "loss": 1.3086, + "step": 19903 + }, + { + "epoch": 0.5844148217746198, + "grad_norm": 0.0, + "learning_rate": 7.770817483347793e-06, + "loss": 1.2852, + "step": 19904 + }, + { + "epoch": 0.5844441834517587, + "grad_norm": 0.0, + "learning_rate": 7.76989046194695e-06, + "loss": 1.2373, + "step": 19905 + }, + { + "epoch": 0.5844735451288977, + "grad_norm": 0.0, + "learning_rate": 7.768963460713558e-06, + "loss": 1.2822, + "step": 19906 + }, + { + "epoch": 0.5845029068060368, + "grad_norm": 0.0, + "learning_rate": 7.768036479656002e-06, + "loss": 1.2256, + "step": 19907 + }, + { + "epoch": 0.5845322684831757, + "grad_norm": 0.0, + "learning_rate": 7.767109518782659e-06, + "loss": 1.3491, + "step": 19908 + }, + { + "epoch": 0.5845616301603147, + "grad_norm": 0.0, + "learning_rate": 7.766182578101922e-06, + "loss": 1.335, + "step": 19909 + }, + { + "epoch": 0.5845909918374538, + "grad_norm": 0.0, + "learning_rate": 7.765255657622166e-06, + "loss": 1.2881, + "step": 19910 + }, + { + "epoch": 0.5846203535145927, + "grad_norm": 0.0, + "learning_rate": 7.764328757351775e-06, + "loss": 1.4385, + "step": 19911 + }, + { + "epoch": 0.5846497151917317, + "grad_norm": 0.0, + "learning_rate": 7.763401877299129e-06, + "loss": 1.2461, + "step": 19912 + }, + { + "epoch": 0.5846790768688708, + "grad_norm": 0.0, + "learning_rate": 7.762475017472617e-06, + "loss": 1.4541, + "step": 19913 + }, + { + "epoch": 0.5847084385460097, + "grad_norm": 0.0, + "learning_rate": 7.761548177880617e-06, + "loss": 1.4004, + "step": 19914 + }, + { + "epoch": 0.5847378002231487, + "grad_norm": 0.0, + "learning_rate": 7.760621358531509e-06, + "loss": 1.2764, + "step": 19915 + }, + { + "epoch": 0.5847671619002878, + "grad_norm": 0.0, + "learning_rate": 7.759694559433674e-06, + "loss": 1.4248, + "step": 19916 + }, + { + "epoch": 0.5847965235774267, + "grad_norm": 0.0, + "learning_rate": 7.758767780595494e-06, + "loss": 1.2002, + "step": 19917 + }, + { + "epoch": 0.5848258852545657, + "grad_norm": 0.0, + "learning_rate": 7.757841022025353e-06, + "loss": 1.2939, + "step": 19918 + }, + { + "epoch": 0.5848552469317048, + "grad_norm": 0.0, + "learning_rate": 7.75691428373163e-06, + "loss": 1.2363, + "step": 19919 + }, + { + "epoch": 0.5848846086088437, + "grad_norm": 0.0, + "learning_rate": 7.755987565722705e-06, + "loss": 1.4004, + "step": 19920 + }, + { + "epoch": 0.5849139702859827, + "grad_norm": 0.0, + "learning_rate": 7.755060868006955e-06, + "loss": 1.2773, + "step": 19921 + }, + { + "epoch": 0.5849433319631218, + "grad_norm": 0.0, + "learning_rate": 7.754134190592771e-06, + "loss": 1.1616, + "step": 19922 + }, + { + "epoch": 0.5849726936402607, + "grad_norm": 0.0, + "learning_rate": 7.753207533488526e-06, + "loss": 1.3564, + "step": 19923 + }, + { + "epoch": 0.5850020553173997, + "grad_norm": 0.0, + "learning_rate": 7.752280896702599e-06, + "loss": 1.2681, + "step": 19924 + }, + { + "epoch": 0.5850314169945388, + "grad_norm": 0.0, + "learning_rate": 7.751354280243372e-06, + "loss": 1.3262, + "step": 19925 + }, + { + "epoch": 0.5850607786716777, + "grad_norm": 0.0, + "learning_rate": 7.750427684119221e-06, + "loss": 0.9951, + "step": 19926 + }, + { + "epoch": 0.5850901403488167, + "grad_norm": 0.0, + "learning_rate": 7.749501108338535e-06, + "loss": 1.3608, + "step": 19927 + }, + { + "epoch": 0.5851195020259558, + "grad_norm": 0.0, + "learning_rate": 7.74857455290968e-06, + "loss": 1.3438, + "step": 19928 + }, + { + "epoch": 0.5851488637030947, + "grad_norm": 0.0, + "learning_rate": 7.747648017841046e-06, + "loss": 1.21, + "step": 19929 + }, + { + "epoch": 0.5851782253802337, + "grad_norm": 0.0, + "learning_rate": 7.746721503141007e-06, + "loss": 1.2314, + "step": 19930 + }, + { + "epoch": 0.5852075870573727, + "grad_norm": 0.0, + "learning_rate": 7.745795008817944e-06, + "loss": 1.1616, + "step": 19931 + }, + { + "epoch": 0.5852369487345117, + "grad_norm": 0.0, + "learning_rate": 7.744868534880229e-06, + "loss": 1.3555, + "step": 19932 + }, + { + "epoch": 0.5852663104116507, + "grad_norm": 0.0, + "learning_rate": 7.74394208133625e-06, + "loss": 1.3418, + "step": 19933 + }, + { + "epoch": 0.5852956720887897, + "grad_norm": 0.0, + "learning_rate": 7.743015648194382e-06, + "loss": 1.2866, + "step": 19934 + }, + { + "epoch": 0.5853250337659287, + "grad_norm": 0.0, + "learning_rate": 7.742089235462998e-06, + "loss": 1.3652, + "step": 19935 + }, + { + "epoch": 0.5853543954430677, + "grad_norm": 0.0, + "learning_rate": 7.741162843150481e-06, + "loss": 1.3945, + "step": 19936 + }, + { + "epoch": 0.5853837571202067, + "grad_norm": 0.0, + "learning_rate": 7.740236471265202e-06, + "loss": 1.1572, + "step": 19937 + }, + { + "epoch": 0.5854131187973457, + "grad_norm": 0.0, + "learning_rate": 7.73931011981555e-06, + "loss": 1.3647, + "step": 19938 + }, + { + "epoch": 0.5854424804744847, + "grad_norm": 0.0, + "learning_rate": 7.738383788809892e-06, + "loss": 1.291, + "step": 19939 + }, + { + "epoch": 0.5854718421516237, + "grad_norm": 0.0, + "learning_rate": 7.73745747825661e-06, + "loss": 1.1597, + "step": 19940 + }, + { + "epoch": 0.5855012038287627, + "grad_norm": 0.0, + "learning_rate": 7.736531188164076e-06, + "loss": 1.252, + "step": 19941 + }, + { + "epoch": 0.5855305655059017, + "grad_norm": 0.0, + "learning_rate": 7.735604918540675e-06, + "loss": 1.3398, + "step": 19942 + }, + { + "epoch": 0.5855599271830407, + "grad_norm": 0.0, + "learning_rate": 7.734678669394777e-06, + "loss": 1.3799, + "step": 19943 + }, + { + "epoch": 0.5855892888601797, + "grad_norm": 0.0, + "learning_rate": 7.73375244073476e-06, + "loss": 1.231, + "step": 19944 + }, + { + "epoch": 0.5856186505373187, + "grad_norm": 0.0, + "learning_rate": 7.732826232569002e-06, + "loss": 1.3008, + "step": 19945 + }, + { + "epoch": 0.5856480122144577, + "grad_norm": 0.0, + "learning_rate": 7.731900044905871e-06, + "loss": 1.3164, + "step": 19946 + }, + { + "epoch": 0.5856773738915967, + "grad_norm": 0.0, + "learning_rate": 7.730973877753754e-06, + "loss": 1.2256, + "step": 19947 + }, + { + "epoch": 0.5857067355687356, + "grad_norm": 0.0, + "learning_rate": 7.730047731121019e-06, + "loss": 1.3066, + "step": 19948 + }, + { + "epoch": 0.5857360972458747, + "grad_norm": 0.0, + "learning_rate": 7.729121605016045e-06, + "loss": 1.3447, + "step": 19949 + }, + { + "epoch": 0.5857654589230137, + "grad_norm": 0.0, + "learning_rate": 7.728195499447204e-06, + "loss": 1.2891, + "step": 19950 + }, + { + "epoch": 0.5857948206001526, + "grad_norm": 0.0, + "learning_rate": 7.727269414422877e-06, + "loss": 1.207, + "step": 19951 + }, + { + "epoch": 0.5858241822772917, + "grad_norm": 0.0, + "learning_rate": 7.726343349951434e-06, + "loss": 1.3682, + "step": 19952 + }, + { + "epoch": 0.5858535439544307, + "grad_norm": 0.0, + "learning_rate": 7.725417306041246e-06, + "loss": 1.0347, + "step": 19953 + }, + { + "epoch": 0.5858829056315696, + "grad_norm": 0.0, + "learning_rate": 7.724491282700696e-06, + "loss": 1.2764, + "step": 19954 + }, + { + "epoch": 0.5859122673087087, + "grad_norm": 0.0, + "learning_rate": 7.72356527993815e-06, + "loss": 1.376, + "step": 19955 + }, + { + "epoch": 0.5859416289858477, + "grad_norm": 0.0, + "learning_rate": 7.722639297761991e-06, + "loss": 1.2544, + "step": 19956 + }, + { + "epoch": 0.5859709906629866, + "grad_norm": 0.0, + "learning_rate": 7.721713336180583e-06, + "loss": 1.1943, + "step": 19957 + }, + { + "epoch": 0.5860003523401257, + "grad_norm": 0.0, + "learning_rate": 7.720787395202309e-06, + "loss": 1.2178, + "step": 19958 + }, + { + "epoch": 0.5860297140172647, + "grad_norm": 0.0, + "learning_rate": 7.719861474835536e-06, + "loss": 1.2451, + "step": 19959 + }, + { + "epoch": 0.5860590756944036, + "grad_norm": 0.0, + "learning_rate": 7.718935575088641e-06, + "loss": 1.3281, + "step": 19960 + }, + { + "epoch": 0.5860884373715427, + "grad_norm": 0.0, + "learning_rate": 7.718009695969994e-06, + "loss": 1.3037, + "step": 19961 + }, + { + "epoch": 0.5861177990486817, + "grad_norm": 0.0, + "learning_rate": 7.717083837487971e-06, + "loss": 1.3867, + "step": 19962 + }, + { + "epoch": 0.5861471607258206, + "grad_norm": 0.0, + "learning_rate": 7.716157999650945e-06, + "loss": 1.1538, + "step": 19963 + }, + { + "epoch": 0.5861765224029597, + "grad_norm": 0.0, + "learning_rate": 7.715232182467285e-06, + "loss": 1.3066, + "step": 19964 + }, + { + "epoch": 0.5862058840800987, + "grad_norm": 0.0, + "learning_rate": 7.714306385945368e-06, + "loss": 1.3838, + "step": 19965 + }, + { + "epoch": 0.5862352457572376, + "grad_norm": 0.0, + "learning_rate": 7.71338061009356e-06, + "loss": 1.2852, + "step": 19966 + }, + { + "epoch": 0.5862646074343767, + "grad_norm": 0.0, + "learning_rate": 7.712454854920239e-06, + "loss": 1.2002, + "step": 19967 + }, + { + "epoch": 0.5862939691115157, + "grad_norm": 0.0, + "learning_rate": 7.711529120433774e-06, + "loss": 1.2651, + "step": 19968 + }, + { + "epoch": 0.5863233307886546, + "grad_norm": 0.0, + "learning_rate": 7.710603406642539e-06, + "loss": 1.3467, + "step": 19969 + }, + { + "epoch": 0.5863526924657937, + "grad_norm": 0.0, + "learning_rate": 7.709677713554899e-06, + "loss": 1.3096, + "step": 19970 + }, + { + "epoch": 0.5863820541429327, + "grad_norm": 0.0, + "learning_rate": 7.708752041179234e-06, + "loss": 1.4082, + "step": 19971 + }, + { + "epoch": 0.5864114158200716, + "grad_norm": 0.0, + "learning_rate": 7.707826389523912e-06, + "loss": 1.2773, + "step": 19972 + }, + { + "epoch": 0.5864407774972107, + "grad_norm": 0.0, + "learning_rate": 7.706900758597299e-06, + "loss": 1.3037, + "step": 19973 + }, + { + "epoch": 0.5864701391743496, + "grad_norm": 0.0, + "learning_rate": 7.705975148407774e-06, + "loss": 1.2817, + "step": 19974 + }, + { + "epoch": 0.5864995008514886, + "grad_norm": 0.0, + "learning_rate": 7.705049558963699e-06, + "loss": 1.2275, + "step": 19975 + }, + { + "epoch": 0.5865288625286277, + "grad_norm": 0.0, + "learning_rate": 7.704123990273455e-06, + "loss": 1.2402, + "step": 19976 + }, + { + "epoch": 0.5865582242057666, + "grad_norm": 0.0, + "learning_rate": 7.703198442345398e-06, + "loss": 1.3711, + "step": 19977 + }, + { + "epoch": 0.5865875858829056, + "grad_norm": 0.0, + "learning_rate": 7.702272915187911e-06, + "loss": 1.2363, + "step": 19978 + }, + { + "epoch": 0.5866169475600447, + "grad_norm": 0.0, + "learning_rate": 7.701347408809355e-06, + "loss": 1.2656, + "step": 19979 + }, + { + "epoch": 0.5866463092371836, + "grad_norm": 0.0, + "learning_rate": 7.700421923218107e-06, + "loss": 1.208, + "step": 19980 + }, + { + "epoch": 0.5866756709143226, + "grad_norm": 0.0, + "learning_rate": 7.699496458422526e-06, + "loss": 1.0347, + "step": 19981 + }, + { + "epoch": 0.5867050325914617, + "grad_norm": 0.0, + "learning_rate": 7.698571014430994e-06, + "loss": 1.3232, + "step": 19982 + }, + { + "epoch": 0.5867343942686006, + "grad_norm": 0.0, + "learning_rate": 7.697645591251873e-06, + "loss": 1.2705, + "step": 19983 + }, + { + "epoch": 0.5867637559457396, + "grad_norm": 0.0, + "learning_rate": 7.696720188893531e-06, + "loss": 1.2788, + "step": 19984 + }, + { + "epoch": 0.5867931176228787, + "grad_norm": 0.0, + "learning_rate": 7.69579480736434e-06, + "loss": 1.3994, + "step": 19985 + }, + { + "epoch": 0.5868224793000176, + "grad_norm": 0.0, + "learning_rate": 7.694869446672662e-06, + "loss": 1.2168, + "step": 19986 + }, + { + "epoch": 0.5868518409771566, + "grad_norm": 0.0, + "learning_rate": 7.693944106826873e-06, + "loss": 1.3457, + "step": 19987 + }, + { + "epoch": 0.5868812026542957, + "grad_norm": 0.0, + "learning_rate": 7.693018787835337e-06, + "loss": 1.4121, + "step": 19988 + }, + { + "epoch": 0.5869105643314346, + "grad_norm": 0.0, + "learning_rate": 7.692093489706426e-06, + "loss": 1.3135, + "step": 19989 + }, + { + "epoch": 0.5869399260085736, + "grad_norm": 0.0, + "learning_rate": 7.6911682124485e-06, + "loss": 1.2715, + "step": 19990 + }, + { + "epoch": 0.5869692876857127, + "grad_norm": 0.0, + "learning_rate": 7.690242956069936e-06, + "loss": 1.3418, + "step": 19991 + }, + { + "epoch": 0.5869986493628516, + "grad_norm": 0.0, + "learning_rate": 7.689317720579096e-06, + "loss": 1.207, + "step": 19992 + }, + { + "epoch": 0.5870280110399906, + "grad_norm": 0.0, + "learning_rate": 7.688392505984344e-06, + "loss": 1.2549, + "step": 19993 + }, + { + "epoch": 0.5870573727171297, + "grad_norm": 0.0, + "learning_rate": 7.687467312294055e-06, + "loss": 1.3223, + "step": 19994 + }, + { + "epoch": 0.5870867343942686, + "grad_norm": 0.0, + "learning_rate": 7.686542139516586e-06, + "loss": 1.2383, + "step": 19995 + }, + { + "epoch": 0.5871160960714076, + "grad_norm": 0.0, + "learning_rate": 7.685616987660312e-06, + "loss": 1.1758, + "step": 19996 + }, + { + "epoch": 0.5871454577485467, + "grad_norm": 0.0, + "learning_rate": 7.684691856733595e-06, + "loss": 1.1924, + "step": 19997 + }, + { + "epoch": 0.5871748194256856, + "grad_norm": 0.0, + "learning_rate": 7.683766746744807e-06, + "loss": 1.3438, + "step": 19998 + }, + { + "epoch": 0.5872041811028246, + "grad_norm": 0.0, + "learning_rate": 7.682841657702302e-06, + "loss": 1.1841, + "step": 19999 + }, + { + "epoch": 0.5872335427799636, + "grad_norm": 0.0, + "learning_rate": 7.681916589614458e-06, + "loss": 1.2871, + "step": 20000 + }, + { + "epoch": 0.5872629044571026, + "grad_norm": 0.0, + "learning_rate": 7.680991542489639e-06, + "loss": 1.2534, + "step": 20001 + }, + { + "epoch": 0.5872922661342416, + "grad_norm": 0.0, + "learning_rate": 7.680066516336202e-06, + "loss": 1.3096, + "step": 20002 + }, + { + "epoch": 0.5873216278113805, + "grad_norm": 0.0, + "learning_rate": 7.67914151116252e-06, + "loss": 1.251, + "step": 20003 + }, + { + "epoch": 0.5873509894885196, + "grad_norm": 0.0, + "learning_rate": 7.678216526976955e-06, + "loss": 1.3311, + "step": 20004 + }, + { + "epoch": 0.5873803511656586, + "grad_norm": 0.0, + "learning_rate": 7.677291563787875e-06, + "loss": 1.2681, + "step": 20005 + }, + { + "epoch": 0.5874097128427975, + "grad_norm": 0.0, + "learning_rate": 7.676366621603636e-06, + "loss": 1.3145, + "step": 20006 + }, + { + "epoch": 0.5874390745199366, + "grad_norm": 0.0, + "learning_rate": 7.675441700432615e-06, + "loss": 1.1729, + "step": 20007 + }, + { + "epoch": 0.5874684361970756, + "grad_norm": 0.0, + "learning_rate": 7.674516800283166e-06, + "loss": 1.1733, + "step": 20008 + }, + { + "epoch": 0.5874977978742145, + "grad_norm": 0.0, + "learning_rate": 7.67359192116366e-06, + "loss": 1.2812, + "step": 20009 + }, + { + "epoch": 0.5875271595513536, + "grad_norm": 0.0, + "learning_rate": 7.672667063082453e-06, + "loss": 1.2734, + "step": 20010 + }, + { + "epoch": 0.5875565212284926, + "grad_norm": 0.0, + "learning_rate": 7.67174222604792e-06, + "loss": 1.2949, + "step": 20011 + }, + { + "epoch": 0.5875858829056315, + "grad_norm": 0.0, + "learning_rate": 7.670817410068417e-06, + "loss": 1.3418, + "step": 20012 + }, + { + "epoch": 0.5876152445827706, + "grad_norm": 0.0, + "learning_rate": 7.669892615152307e-06, + "loss": 1.2441, + "step": 20013 + }, + { + "epoch": 0.5876446062599096, + "grad_norm": 0.0, + "learning_rate": 7.668967841307957e-06, + "loss": 1.1758, + "step": 20014 + }, + { + "epoch": 0.5876739679370485, + "grad_norm": 0.0, + "learning_rate": 7.668043088543725e-06, + "loss": 1.3237, + "step": 20015 + }, + { + "epoch": 0.5877033296141876, + "grad_norm": 0.0, + "learning_rate": 7.667118356867978e-06, + "loss": 1.291, + "step": 20016 + }, + { + "epoch": 0.5877326912913265, + "grad_norm": 0.0, + "learning_rate": 7.666193646289075e-06, + "loss": 1.2832, + "step": 20017 + }, + { + "epoch": 0.5877620529684655, + "grad_norm": 0.0, + "learning_rate": 7.665268956815385e-06, + "loss": 1.2173, + "step": 20018 + }, + { + "epoch": 0.5877914146456046, + "grad_norm": 0.0, + "learning_rate": 7.66434428845526e-06, + "loss": 1.3115, + "step": 20019 + }, + { + "epoch": 0.5878207763227435, + "grad_norm": 0.0, + "learning_rate": 7.663419641217072e-06, + "loss": 1.2495, + "step": 20020 + }, + { + "epoch": 0.5878501379998825, + "grad_norm": 0.0, + "learning_rate": 7.662495015109178e-06, + "loss": 1.165, + "step": 20021 + }, + { + "epoch": 0.5878794996770216, + "grad_norm": 0.0, + "learning_rate": 7.66157041013994e-06, + "loss": 1.2139, + "step": 20022 + }, + { + "epoch": 0.5879088613541605, + "grad_norm": 0.0, + "learning_rate": 7.660645826317722e-06, + "loss": 1.3032, + "step": 20023 + }, + { + "epoch": 0.5879382230312995, + "grad_norm": 0.0, + "learning_rate": 7.659721263650876e-06, + "loss": 1.1343, + "step": 20024 + }, + { + "epoch": 0.5879675847084386, + "grad_norm": 0.0, + "learning_rate": 7.658796722147776e-06, + "loss": 1.2959, + "step": 20025 + }, + { + "epoch": 0.5879969463855775, + "grad_norm": 0.0, + "learning_rate": 7.657872201816775e-06, + "loss": 1.2617, + "step": 20026 + }, + { + "epoch": 0.5880263080627165, + "grad_norm": 0.0, + "learning_rate": 7.656947702666237e-06, + "loss": 1.2344, + "step": 20027 + }, + { + "epoch": 0.5880556697398556, + "grad_norm": 0.0, + "learning_rate": 7.656023224704518e-06, + "loss": 1.3301, + "step": 20028 + }, + { + "epoch": 0.5880850314169945, + "grad_norm": 0.0, + "learning_rate": 7.655098767939986e-06, + "loss": 1.3008, + "step": 20029 + }, + { + "epoch": 0.5881143930941335, + "grad_norm": 0.0, + "learning_rate": 7.65417433238099e-06, + "loss": 1.27, + "step": 20030 + }, + { + "epoch": 0.5881437547712726, + "grad_norm": 0.0, + "learning_rate": 7.653249918035901e-06, + "loss": 1.1602, + "step": 20031 + }, + { + "epoch": 0.5881731164484115, + "grad_norm": 0.0, + "learning_rate": 7.652325524913075e-06, + "loss": 1.1665, + "step": 20032 + }, + { + "epoch": 0.5882024781255505, + "grad_norm": 0.0, + "learning_rate": 7.651401153020868e-06, + "loss": 1.2871, + "step": 20033 + }, + { + "epoch": 0.5882318398026896, + "grad_norm": 0.0, + "learning_rate": 7.650476802367643e-06, + "loss": 1.3818, + "step": 20034 + }, + { + "epoch": 0.5882612014798285, + "grad_norm": 0.0, + "learning_rate": 7.649552472961755e-06, + "loss": 1.3193, + "step": 20035 + }, + { + "epoch": 0.5882905631569675, + "grad_norm": 0.0, + "learning_rate": 7.64862816481157e-06, + "loss": 1.4062, + "step": 20036 + }, + { + "epoch": 0.5883199248341066, + "grad_norm": 0.0, + "learning_rate": 7.64770387792544e-06, + "loss": 1.2842, + "step": 20037 + }, + { + "epoch": 0.5883492865112455, + "grad_norm": 0.0, + "learning_rate": 7.646779612311727e-06, + "loss": 1.146, + "step": 20038 + }, + { + "epoch": 0.5883786481883845, + "grad_norm": 0.0, + "learning_rate": 7.645855367978786e-06, + "loss": 1.1777, + "step": 20039 + }, + { + "epoch": 0.5884080098655236, + "grad_norm": 0.0, + "learning_rate": 7.644931144934981e-06, + "loss": 1.3027, + "step": 20040 + }, + { + "epoch": 0.5884373715426625, + "grad_norm": 0.0, + "learning_rate": 7.644006943188668e-06, + "loss": 1.3076, + "step": 20041 + }, + { + "epoch": 0.5884667332198015, + "grad_norm": 0.0, + "learning_rate": 7.6430827627482e-06, + "loss": 1.3408, + "step": 20042 + }, + { + "epoch": 0.5884960948969405, + "grad_norm": 0.0, + "learning_rate": 7.642158603621943e-06, + "loss": 1.0459, + "step": 20043 + }, + { + "epoch": 0.5885254565740795, + "grad_norm": 0.0, + "learning_rate": 7.641234465818243e-06, + "loss": 1.3525, + "step": 20044 + }, + { + "epoch": 0.5885548182512185, + "grad_norm": 0.0, + "learning_rate": 7.640310349345469e-06, + "loss": 1.2949, + "step": 20045 + }, + { + "epoch": 0.5885841799283575, + "grad_norm": 0.0, + "learning_rate": 7.63938625421197e-06, + "loss": 1.2896, + "step": 20046 + }, + { + "epoch": 0.5886135416054965, + "grad_norm": 0.0, + "learning_rate": 7.63846218042611e-06, + "loss": 1.3809, + "step": 20047 + }, + { + "epoch": 0.5886429032826355, + "grad_norm": 0.0, + "learning_rate": 7.637538127996234e-06, + "loss": 1.249, + "step": 20048 + }, + { + "epoch": 0.5886722649597745, + "grad_norm": 0.0, + "learning_rate": 7.636614096930712e-06, + "loss": 1.3701, + "step": 20049 + }, + { + "epoch": 0.5887016266369135, + "grad_norm": 0.0, + "learning_rate": 7.63569008723789e-06, + "loss": 1.3379, + "step": 20050 + }, + { + "epoch": 0.5887309883140525, + "grad_norm": 0.0, + "learning_rate": 7.634766098926132e-06, + "loss": 1.2354, + "step": 20051 + }, + { + "epoch": 0.5887603499911915, + "grad_norm": 0.0, + "learning_rate": 7.633842132003791e-06, + "loss": 1.2559, + "step": 20052 + }, + { + "epoch": 0.5887897116683305, + "grad_norm": 0.0, + "learning_rate": 7.632918186479215e-06, + "loss": 1.2212, + "step": 20053 + }, + { + "epoch": 0.5888190733454695, + "grad_norm": 0.0, + "learning_rate": 7.631994262360772e-06, + "loss": 1.1274, + "step": 20054 + }, + { + "epoch": 0.5888484350226085, + "grad_norm": 0.0, + "learning_rate": 7.631070359656807e-06, + "loss": 1.3018, + "step": 20055 + }, + { + "epoch": 0.5888777966997475, + "grad_norm": 0.0, + "learning_rate": 7.630146478375681e-06, + "loss": 1.2378, + "step": 20056 + }, + { + "epoch": 0.5889071583768865, + "grad_norm": 0.0, + "learning_rate": 7.629222618525748e-06, + "loss": 1.123, + "step": 20057 + }, + { + "epoch": 0.5889365200540255, + "grad_norm": 0.0, + "learning_rate": 7.628298780115363e-06, + "loss": 1.292, + "step": 20058 + }, + { + "epoch": 0.5889658817311645, + "grad_norm": 0.0, + "learning_rate": 7.627374963152876e-06, + "loss": 1.3613, + "step": 20059 + }, + { + "epoch": 0.5889952434083034, + "grad_norm": 0.0, + "learning_rate": 7.626451167646648e-06, + "loss": 1.2939, + "step": 20060 + }, + { + "epoch": 0.5890246050854425, + "grad_norm": 0.0, + "learning_rate": 7.62552739360503e-06, + "loss": 1.3203, + "step": 20061 + }, + { + "epoch": 0.5890539667625815, + "grad_norm": 0.0, + "learning_rate": 7.624603641036375e-06, + "loss": 1.373, + "step": 20062 + }, + { + "epoch": 0.5890833284397204, + "grad_norm": 0.0, + "learning_rate": 7.623679909949039e-06, + "loss": 1.2852, + "step": 20063 + }, + { + "epoch": 0.5891126901168595, + "grad_norm": 0.0, + "learning_rate": 7.622756200351369e-06, + "loss": 1.2568, + "step": 20064 + }, + { + "epoch": 0.5891420517939985, + "grad_norm": 0.0, + "learning_rate": 7.621832512251729e-06, + "loss": 1.2979, + "step": 20065 + }, + { + "epoch": 0.5891714134711374, + "grad_norm": 0.0, + "learning_rate": 7.620908845658465e-06, + "loss": 1.2275, + "step": 20066 + }, + { + "epoch": 0.5892007751482765, + "grad_norm": 0.0, + "learning_rate": 7.6199852005799325e-06, + "loss": 1.2871, + "step": 20067 + }, + { + "epoch": 0.5892301368254155, + "grad_norm": 0.0, + "learning_rate": 7.619061577024479e-06, + "loss": 1.3027, + "step": 20068 + }, + { + "epoch": 0.5892594985025544, + "grad_norm": 0.0, + "learning_rate": 7.618137975000466e-06, + "loss": 1.2734, + "step": 20069 + }, + { + "epoch": 0.5892888601796935, + "grad_norm": 0.0, + "learning_rate": 7.617214394516241e-06, + "loss": 1.2549, + "step": 20070 + }, + { + "epoch": 0.5893182218568325, + "grad_norm": 0.0, + "learning_rate": 7.616290835580156e-06, + "loss": 1.2588, + "step": 20071 + }, + { + "epoch": 0.5893475835339714, + "grad_norm": 0.0, + "learning_rate": 7.615367298200564e-06, + "loss": 1.3135, + "step": 20072 + }, + { + "epoch": 0.5893769452111105, + "grad_norm": 0.0, + "learning_rate": 7.614443782385812e-06, + "loss": 1.292, + "step": 20073 + }, + { + "epoch": 0.5894063068882495, + "grad_norm": 0.0, + "learning_rate": 7.613520288144261e-06, + "loss": 1.2725, + "step": 20074 + }, + { + "epoch": 0.5894356685653884, + "grad_norm": 0.0, + "learning_rate": 7.612596815484254e-06, + "loss": 1.188, + "step": 20075 + }, + { + "epoch": 0.5894650302425275, + "grad_norm": 0.0, + "learning_rate": 7.611673364414148e-06, + "loss": 1.2363, + "step": 20076 + }, + { + "epoch": 0.5894943919196665, + "grad_norm": 0.0, + "learning_rate": 7.610749934942288e-06, + "loss": 1.207, + "step": 20077 + }, + { + "epoch": 0.5895237535968054, + "grad_norm": 0.0, + "learning_rate": 7.6098265270770315e-06, + "loss": 1.2412, + "step": 20078 + }, + { + "epoch": 0.5895531152739445, + "grad_norm": 0.0, + "learning_rate": 7.608903140826724e-06, + "loss": 1.3574, + "step": 20079 + }, + { + "epoch": 0.5895824769510835, + "grad_norm": 0.0, + "learning_rate": 7.607979776199719e-06, + "loss": 1.2607, + "step": 20080 + }, + { + "epoch": 0.5896118386282224, + "grad_norm": 0.0, + "learning_rate": 7.607056433204367e-06, + "loss": 1.2793, + "step": 20081 + }, + { + "epoch": 0.5896412003053615, + "grad_norm": 0.0, + "learning_rate": 7.6061331118490136e-06, + "loss": 1.3301, + "step": 20082 + }, + { + "epoch": 0.5896705619825005, + "grad_norm": 0.0, + "learning_rate": 7.605209812142014e-06, + "loss": 1.3857, + "step": 20083 + }, + { + "epoch": 0.5896999236596394, + "grad_norm": 0.0, + "learning_rate": 7.604286534091711e-06, + "loss": 1.3037, + "step": 20084 + }, + { + "epoch": 0.5897292853367785, + "grad_norm": 0.0, + "learning_rate": 7.603363277706462e-06, + "loss": 1.2559, + "step": 20085 + }, + { + "epoch": 0.5897586470139174, + "grad_norm": 0.0, + "learning_rate": 7.602440042994611e-06, + "loss": 1.207, + "step": 20086 + }, + { + "epoch": 0.5897880086910564, + "grad_norm": 0.0, + "learning_rate": 7.601516829964511e-06, + "loss": 1.2666, + "step": 20087 + }, + { + "epoch": 0.5898173703681955, + "grad_norm": 0.0, + "learning_rate": 7.600593638624505e-06, + "loss": 1.3198, + "step": 20088 + }, + { + "epoch": 0.5898467320453344, + "grad_norm": 0.0, + "learning_rate": 7.599670468982948e-06, + "loss": 1.2598, + "step": 20089 + }, + { + "epoch": 0.5898760937224734, + "grad_norm": 0.0, + "learning_rate": 7.598747321048187e-06, + "loss": 1.4111, + "step": 20090 + }, + { + "epoch": 0.5899054553996125, + "grad_norm": 0.0, + "learning_rate": 7.597824194828566e-06, + "loss": 1.3096, + "step": 20091 + }, + { + "epoch": 0.5899348170767514, + "grad_norm": 0.0, + "learning_rate": 7.596901090332438e-06, + "loss": 1.1689, + "step": 20092 + }, + { + "epoch": 0.5899641787538904, + "grad_norm": 0.0, + "learning_rate": 7.595978007568146e-06, + "loss": 1.2485, + "step": 20093 + }, + { + "epoch": 0.5899935404310295, + "grad_norm": 0.0, + "learning_rate": 7.5950549465440435e-06, + "loss": 1.2236, + "step": 20094 + }, + { + "epoch": 0.5900229021081684, + "grad_norm": 0.0, + "learning_rate": 7.5941319072684735e-06, + "loss": 1.2734, + "step": 20095 + }, + { + "epoch": 0.5900522637853074, + "grad_norm": 0.0, + "learning_rate": 7.593208889749786e-06, + "loss": 1.2725, + "step": 20096 + }, + { + "epoch": 0.5900816254624465, + "grad_norm": 0.0, + "learning_rate": 7.592285893996324e-06, + "loss": 1.3076, + "step": 20097 + }, + { + "epoch": 0.5901109871395854, + "grad_norm": 0.0, + "learning_rate": 7.591362920016441e-06, + "loss": 1.1846, + "step": 20098 + }, + { + "epoch": 0.5901403488167244, + "grad_norm": 0.0, + "learning_rate": 7.590439967818477e-06, + "loss": 1.2305, + "step": 20099 + }, + { + "epoch": 0.5901697104938635, + "grad_norm": 0.0, + "learning_rate": 7.589517037410785e-06, + "loss": 1.2183, + "step": 20100 + }, + { + "epoch": 0.5901990721710024, + "grad_norm": 0.0, + "learning_rate": 7.588594128801708e-06, + "loss": 1.2979, + "step": 20101 + }, + { + "epoch": 0.5902284338481414, + "grad_norm": 0.0, + "learning_rate": 7.587671241999586e-06, + "loss": 1.3154, + "step": 20102 + }, + { + "epoch": 0.5902577955252803, + "grad_norm": 0.0, + "learning_rate": 7.586748377012776e-06, + "loss": 1.2593, + "step": 20103 + }, + { + "epoch": 0.5902871572024194, + "grad_norm": 0.0, + "learning_rate": 7.585825533849616e-06, + "loss": 1.2822, + "step": 20104 + }, + { + "epoch": 0.5903165188795584, + "grad_norm": 0.0, + "learning_rate": 7.584902712518456e-06, + "loss": 1.377, + "step": 20105 + }, + { + "epoch": 0.5903458805566973, + "grad_norm": 0.0, + "learning_rate": 7.5839799130276345e-06, + "loss": 1.25, + "step": 20106 + }, + { + "epoch": 0.5903752422338364, + "grad_norm": 0.0, + "learning_rate": 7.583057135385509e-06, + "loss": 1.4316, + "step": 20107 + }, + { + "epoch": 0.5904046039109754, + "grad_norm": 0.0, + "learning_rate": 7.58213437960041e-06, + "loss": 1.2686, + "step": 20108 + }, + { + "epoch": 0.5904339655881143, + "grad_norm": 0.0, + "learning_rate": 7.581211645680693e-06, + "loss": 1.3682, + "step": 20109 + }, + { + "epoch": 0.5904633272652534, + "grad_norm": 0.0, + "learning_rate": 7.580288933634698e-06, + "loss": 1.3809, + "step": 20110 + }, + { + "epoch": 0.5904926889423924, + "grad_norm": 0.0, + "learning_rate": 7.579366243470769e-06, + "loss": 1.3682, + "step": 20111 + }, + { + "epoch": 0.5905220506195313, + "grad_norm": 0.0, + "learning_rate": 7.578443575197253e-06, + "loss": 1.1919, + "step": 20112 + }, + { + "epoch": 0.5905514122966704, + "grad_norm": 0.0, + "learning_rate": 7.577520928822487e-06, + "loss": 1.248, + "step": 20113 + }, + { + "epoch": 0.5905807739738094, + "grad_norm": 0.0, + "learning_rate": 7.576598304354824e-06, + "loss": 1.2852, + "step": 20114 + }, + { + "epoch": 0.5906101356509483, + "grad_norm": 0.0, + "learning_rate": 7.575675701802602e-06, + "loss": 1.3818, + "step": 20115 + }, + { + "epoch": 0.5906394973280874, + "grad_norm": 0.0, + "learning_rate": 7.5747531211741665e-06, + "loss": 1.2402, + "step": 20116 + }, + { + "epoch": 0.5906688590052264, + "grad_norm": 0.0, + "learning_rate": 7.573830562477855e-06, + "loss": 1.4043, + "step": 20117 + }, + { + "epoch": 0.5906982206823653, + "grad_norm": 0.0, + "learning_rate": 7.572908025722021e-06, + "loss": 1.3086, + "step": 20118 + }, + { + "epoch": 0.5907275823595044, + "grad_norm": 0.0, + "learning_rate": 7.571985510914998e-06, + "loss": 1.249, + "step": 20119 + }, + { + "epoch": 0.5907569440366434, + "grad_norm": 0.0, + "learning_rate": 7.571063018065135e-06, + "loss": 1.3691, + "step": 20120 + }, + { + "epoch": 0.5907863057137823, + "grad_norm": 0.0, + "learning_rate": 7.57014054718077e-06, + "loss": 1.1943, + "step": 20121 + }, + { + "epoch": 0.5908156673909214, + "grad_norm": 0.0, + "learning_rate": 7.569218098270242e-06, + "loss": 1.2344, + "step": 20122 + }, + { + "epoch": 0.5908450290680604, + "grad_norm": 0.0, + "learning_rate": 7.568295671341902e-06, + "loss": 1.2461, + "step": 20123 + }, + { + "epoch": 0.5908743907451993, + "grad_norm": 0.0, + "learning_rate": 7.5673732664040855e-06, + "loss": 1.061, + "step": 20124 + }, + { + "epoch": 0.5909037524223384, + "grad_norm": 0.0, + "learning_rate": 7.566450883465136e-06, + "loss": 1.373, + "step": 20125 + }, + { + "epoch": 0.5909331140994774, + "grad_norm": 0.0, + "learning_rate": 7.565528522533392e-06, + "loss": 1.313, + "step": 20126 + }, + { + "epoch": 0.5909624757766163, + "grad_norm": 0.0, + "learning_rate": 7.5646061836172e-06, + "loss": 1.3936, + "step": 20127 + }, + { + "epoch": 0.5909918374537554, + "grad_norm": 0.0, + "learning_rate": 7.5636838667248965e-06, + "loss": 1.2783, + "step": 20128 + }, + { + "epoch": 0.5910211991308943, + "grad_norm": 0.0, + "learning_rate": 7.562761571864826e-06, + "loss": 1.2588, + "step": 20129 + }, + { + "epoch": 0.5910505608080333, + "grad_norm": 0.0, + "learning_rate": 7.561839299045326e-06, + "loss": 1.3008, + "step": 20130 + }, + { + "epoch": 0.5910799224851724, + "grad_norm": 0.0, + "learning_rate": 7.560917048274736e-06, + "loss": 1.3125, + "step": 20131 + }, + { + "epoch": 0.5911092841623113, + "grad_norm": 0.0, + "learning_rate": 7.5599948195614e-06, + "loss": 1.2656, + "step": 20132 + }, + { + "epoch": 0.5911386458394503, + "grad_norm": 0.0, + "learning_rate": 7.559072612913652e-06, + "loss": 1.1562, + "step": 20133 + }, + { + "epoch": 0.5911680075165894, + "grad_norm": 0.0, + "learning_rate": 7.55815042833984e-06, + "loss": 1.2217, + "step": 20134 + }, + { + "epoch": 0.5911973691937283, + "grad_norm": 0.0, + "learning_rate": 7.557228265848296e-06, + "loss": 1.1392, + "step": 20135 + }, + { + "epoch": 0.5912267308708673, + "grad_norm": 0.0, + "learning_rate": 7.556306125447365e-06, + "loss": 1.2939, + "step": 20136 + }, + { + "epoch": 0.5912560925480064, + "grad_norm": 0.0, + "learning_rate": 7.555384007145379e-06, + "loss": 1.3779, + "step": 20137 + }, + { + "epoch": 0.5912854542251453, + "grad_norm": 0.0, + "learning_rate": 7.554461910950686e-06, + "loss": 1.1562, + "step": 20138 + }, + { + "epoch": 0.5913148159022843, + "grad_norm": 0.0, + "learning_rate": 7.55353983687162e-06, + "loss": 1.1831, + "step": 20139 + }, + { + "epoch": 0.5913441775794234, + "grad_norm": 0.0, + "learning_rate": 7.552617784916516e-06, + "loss": 1.2124, + "step": 20140 + }, + { + "epoch": 0.5913735392565623, + "grad_norm": 0.0, + "learning_rate": 7.551695755093721e-06, + "loss": 1.2495, + "step": 20141 + }, + { + "epoch": 0.5914029009337013, + "grad_norm": 0.0, + "learning_rate": 7.550773747411563e-06, + "loss": 1.2056, + "step": 20142 + }, + { + "epoch": 0.5914322626108404, + "grad_norm": 0.0, + "learning_rate": 7.549851761878389e-06, + "loss": 1.2705, + "step": 20143 + }, + { + "epoch": 0.5914616242879793, + "grad_norm": 0.0, + "learning_rate": 7.548929798502531e-06, + "loss": 1.1899, + "step": 20144 + }, + { + "epoch": 0.5914909859651183, + "grad_norm": 0.0, + "learning_rate": 7.548007857292332e-06, + "loss": 1.2734, + "step": 20145 + }, + { + "epoch": 0.5915203476422574, + "grad_norm": 0.0, + "learning_rate": 7.5470859382561205e-06, + "loss": 1.2891, + "step": 20146 + }, + { + "epoch": 0.5915497093193963, + "grad_norm": 0.0, + "learning_rate": 7.546164041402243e-06, + "loss": 1.3438, + "step": 20147 + }, + { + "epoch": 0.5915790709965353, + "grad_norm": 0.0, + "learning_rate": 7.54524216673903e-06, + "loss": 1.2422, + "step": 20148 + }, + { + "epoch": 0.5916084326736744, + "grad_norm": 0.0, + "learning_rate": 7.544320314274825e-06, + "loss": 1.21, + "step": 20149 + }, + { + "epoch": 0.5916377943508133, + "grad_norm": 0.0, + "learning_rate": 7.5433984840179584e-06, + "loss": 1.2134, + "step": 20150 + }, + { + "epoch": 0.5916671560279523, + "grad_norm": 0.0, + "learning_rate": 7.542476675976766e-06, + "loss": 1.291, + "step": 20151 + }, + { + "epoch": 0.5916965177050914, + "grad_norm": 0.0, + "learning_rate": 7.541554890159589e-06, + "loss": 1.3047, + "step": 20152 + }, + { + "epoch": 0.5917258793822303, + "grad_norm": 0.0, + "learning_rate": 7.540633126574759e-06, + "loss": 1.3213, + "step": 20153 + }, + { + "epoch": 0.5917552410593693, + "grad_norm": 0.0, + "learning_rate": 7.539711385230617e-06, + "loss": 1.3447, + "step": 20154 + }, + { + "epoch": 0.5917846027365083, + "grad_norm": 0.0, + "learning_rate": 7.538789666135489e-06, + "loss": 1.1255, + "step": 20155 + }, + { + "epoch": 0.5918139644136473, + "grad_norm": 0.0, + "learning_rate": 7.537867969297723e-06, + "loss": 1.2441, + "step": 20156 + }, + { + "epoch": 0.5918433260907863, + "grad_norm": 0.0, + "learning_rate": 7.536946294725642e-06, + "loss": 1.375, + "step": 20157 + }, + { + "epoch": 0.5918726877679253, + "grad_norm": 0.0, + "learning_rate": 7.53602464242759e-06, + "loss": 1.3398, + "step": 20158 + }, + { + "epoch": 0.5919020494450643, + "grad_norm": 0.0, + "learning_rate": 7.535103012411898e-06, + "loss": 1.3096, + "step": 20159 + }, + { + "epoch": 0.5919314111222033, + "grad_norm": 0.0, + "learning_rate": 7.5341814046869e-06, + "loss": 1.2168, + "step": 20160 + }, + { + "epoch": 0.5919607727993423, + "grad_norm": 0.0, + "learning_rate": 7.5332598192609314e-06, + "loss": 1.2358, + "step": 20161 + }, + { + "epoch": 0.5919901344764813, + "grad_norm": 0.0, + "learning_rate": 7.532338256142322e-06, + "loss": 1.3652, + "step": 20162 + }, + { + "epoch": 0.5920194961536203, + "grad_norm": 0.0, + "learning_rate": 7.531416715339415e-06, + "loss": 1.2432, + "step": 20163 + }, + { + "epoch": 0.5920488578307593, + "grad_norm": 0.0, + "learning_rate": 7.5304951968605364e-06, + "loss": 1.2876, + "step": 20164 + }, + { + "epoch": 0.5920782195078983, + "grad_norm": 0.0, + "learning_rate": 7.529573700714023e-06, + "loss": 1.3848, + "step": 20165 + }, + { + "epoch": 0.5921075811850373, + "grad_norm": 0.0, + "learning_rate": 7.528652226908205e-06, + "loss": 1.2129, + "step": 20166 + }, + { + "epoch": 0.5921369428621763, + "grad_norm": 0.0, + "learning_rate": 7.5277307754514214e-06, + "loss": 1.2129, + "step": 20167 + }, + { + "epoch": 0.5921663045393153, + "grad_norm": 0.0, + "learning_rate": 7.526809346352e-06, + "loss": 1.3887, + "step": 20168 + }, + { + "epoch": 0.5921956662164543, + "grad_norm": 0.0, + "learning_rate": 7.525887939618276e-06, + "loss": 1.1904, + "step": 20169 + }, + { + "epoch": 0.5922250278935933, + "grad_norm": 0.0, + "learning_rate": 7.524966555258581e-06, + "loss": 1.3091, + "step": 20170 + }, + { + "epoch": 0.5922543895707323, + "grad_norm": 0.0, + "learning_rate": 7.524045193281244e-06, + "loss": 1.2485, + "step": 20171 + }, + { + "epoch": 0.5922837512478712, + "grad_norm": 0.0, + "learning_rate": 7.5231238536946035e-06, + "loss": 1.2002, + "step": 20172 + }, + { + "epoch": 0.5923131129250103, + "grad_norm": 0.0, + "learning_rate": 7.522202536506987e-06, + "loss": 1.2881, + "step": 20173 + }, + { + "epoch": 0.5923424746021493, + "grad_norm": 0.0, + "learning_rate": 7.52128124172673e-06, + "loss": 1.1421, + "step": 20174 + }, + { + "epoch": 0.5923718362792882, + "grad_norm": 0.0, + "learning_rate": 7.5203599693621565e-06, + "loss": 1.3149, + "step": 20175 + }, + { + "epoch": 0.5924011979564273, + "grad_norm": 0.0, + "learning_rate": 7.519438719421608e-06, + "loss": 1.4482, + "step": 20176 + }, + { + "epoch": 0.5924305596335663, + "grad_norm": 0.0, + "learning_rate": 7.518517491913408e-06, + "loss": 1.3555, + "step": 20177 + }, + { + "epoch": 0.5924599213107052, + "grad_norm": 0.0, + "learning_rate": 7.517596286845892e-06, + "loss": 1.3232, + "step": 20178 + }, + { + "epoch": 0.5924892829878443, + "grad_norm": 0.0, + "learning_rate": 7.5166751042273876e-06, + "loss": 1.3271, + "step": 20179 + }, + { + "epoch": 0.5925186446649833, + "grad_norm": 0.0, + "learning_rate": 7.515753944066223e-06, + "loss": 1.375, + "step": 20180 + }, + { + "epoch": 0.5925480063421222, + "grad_norm": 0.0, + "learning_rate": 7.514832806370737e-06, + "loss": 1.1235, + "step": 20181 + }, + { + "epoch": 0.5925773680192613, + "grad_norm": 0.0, + "learning_rate": 7.513911691149249e-06, + "loss": 1.2373, + "step": 20182 + }, + { + "epoch": 0.5926067296964003, + "grad_norm": 0.0, + "learning_rate": 7.512990598410099e-06, + "loss": 1.3828, + "step": 20183 + }, + { + "epoch": 0.5926360913735392, + "grad_norm": 0.0, + "learning_rate": 7.512069528161609e-06, + "loss": 1.2568, + "step": 20184 + }, + { + "epoch": 0.5926654530506783, + "grad_norm": 0.0, + "learning_rate": 7.511148480412113e-06, + "loss": 1.3452, + "step": 20185 + }, + { + "epoch": 0.5926948147278173, + "grad_norm": 0.0, + "learning_rate": 7.510227455169936e-06, + "loss": 1.2256, + "step": 20186 + }, + { + "epoch": 0.5927241764049562, + "grad_norm": 0.0, + "learning_rate": 7.509306452443412e-06, + "loss": 1.3057, + "step": 20187 + }, + { + "epoch": 0.5927535380820953, + "grad_norm": 0.0, + "learning_rate": 7.508385472240869e-06, + "loss": 1.3081, + "step": 20188 + }, + { + "epoch": 0.5927828997592343, + "grad_norm": 0.0, + "learning_rate": 7.507464514570633e-06, + "loss": 1.4229, + "step": 20189 + }, + { + "epoch": 0.5928122614363732, + "grad_norm": 0.0, + "learning_rate": 7.506543579441035e-06, + "loss": 1.23, + "step": 20190 + }, + { + "epoch": 0.5928416231135123, + "grad_norm": 0.0, + "learning_rate": 7.505622666860397e-06, + "loss": 1.2119, + "step": 20191 + }, + { + "epoch": 0.5928709847906513, + "grad_norm": 0.0, + "learning_rate": 7.504701776837057e-06, + "loss": 1.3838, + "step": 20192 + }, + { + "epoch": 0.5929003464677902, + "grad_norm": 0.0, + "learning_rate": 7.5037809093793345e-06, + "loss": 1.3867, + "step": 20193 + }, + { + "epoch": 0.5929297081449293, + "grad_norm": 0.0, + "learning_rate": 7.502860064495564e-06, + "loss": 1.2539, + "step": 20194 + }, + { + "epoch": 0.5929590698220683, + "grad_norm": 0.0, + "learning_rate": 7.501939242194065e-06, + "loss": 1.3516, + "step": 20195 + }, + { + "epoch": 0.5929884314992072, + "grad_norm": 0.0, + "learning_rate": 7.5010184424831735e-06, + "loss": 1.2803, + "step": 20196 + }, + { + "epoch": 0.5930177931763463, + "grad_norm": 0.0, + "learning_rate": 7.5000976653712095e-06, + "loss": 1.0923, + "step": 20197 + }, + { + "epoch": 0.5930471548534852, + "grad_norm": 0.0, + "learning_rate": 7.499176910866506e-06, + "loss": 1.2075, + "step": 20198 + }, + { + "epoch": 0.5930765165306242, + "grad_norm": 0.0, + "learning_rate": 7.498256178977385e-06, + "loss": 1.2729, + "step": 20199 + }, + { + "epoch": 0.5931058782077633, + "grad_norm": 0.0, + "learning_rate": 7.49733546971217e-06, + "loss": 1.1729, + "step": 20200 + }, + { + "epoch": 0.5931352398849022, + "grad_norm": 0.0, + "learning_rate": 7.496414783079196e-06, + "loss": 1.3984, + "step": 20201 + }, + { + "epoch": 0.5931646015620412, + "grad_norm": 0.0, + "learning_rate": 7.495494119086781e-06, + "loss": 1.2842, + "step": 20202 + }, + { + "epoch": 0.5931939632391803, + "grad_norm": 0.0, + "learning_rate": 7.4945734777432575e-06, + "loss": 1.332, + "step": 20203 + }, + { + "epoch": 0.5932233249163192, + "grad_norm": 0.0, + "learning_rate": 7.493652859056944e-06, + "loss": 1.3379, + "step": 20204 + }, + { + "epoch": 0.5932526865934582, + "grad_norm": 0.0, + "learning_rate": 7.492732263036172e-06, + "loss": 1.3369, + "step": 20205 + }, + { + "epoch": 0.5932820482705972, + "grad_norm": 0.0, + "learning_rate": 7.491811689689263e-06, + "loss": 1.3516, + "step": 20206 + }, + { + "epoch": 0.5933114099477362, + "grad_norm": 0.0, + "learning_rate": 7.490891139024545e-06, + "loss": 1.2686, + "step": 20207 + }, + { + "epoch": 0.5933407716248752, + "grad_norm": 0.0, + "learning_rate": 7.489970611050341e-06, + "loss": 1.0972, + "step": 20208 + }, + { + "epoch": 0.5933701333020142, + "grad_norm": 0.0, + "learning_rate": 7.489050105774976e-06, + "loss": 1.2744, + "step": 20209 + }, + { + "epoch": 0.5933994949791532, + "grad_norm": 0.0, + "learning_rate": 7.4881296232067725e-06, + "loss": 1.3789, + "step": 20210 + }, + { + "epoch": 0.5934288566562922, + "grad_norm": 0.0, + "learning_rate": 7.487209163354053e-06, + "loss": 1.3301, + "step": 20211 + }, + { + "epoch": 0.5934582183334312, + "grad_norm": 0.0, + "learning_rate": 7.4862887262251504e-06, + "loss": 1.2705, + "step": 20212 + }, + { + "epoch": 0.5934875800105702, + "grad_norm": 0.0, + "learning_rate": 7.485368311828378e-06, + "loss": 1.3525, + "step": 20213 + }, + { + "epoch": 0.5935169416877092, + "grad_norm": 0.0, + "learning_rate": 7.484447920172068e-06, + "loss": 1.3457, + "step": 20214 + }, + { + "epoch": 0.5935463033648481, + "grad_norm": 0.0, + "learning_rate": 7.4835275512645344e-06, + "loss": 1.293, + "step": 20215 + }, + { + "epoch": 0.5935756650419872, + "grad_norm": 0.0, + "learning_rate": 7.48260720511411e-06, + "loss": 1.2822, + "step": 20216 + }, + { + "epoch": 0.5936050267191262, + "grad_norm": 0.0, + "learning_rate": 7.481686881729111e-06, + "loss": 1.1934, + "step": 20217 + }, + { + "epoch": 0.5936343883962651, + "grad_norm": 0.0, + "learning_rate": 7.4807665811178666e-06, + "loss": 1.3008, + "step": 20218 + }, + { + "epoch": 0.5936637500734042, + "grad_norm": 0.0, + "learning_rate": 7.479846303288693e-06, + "loss": 1.1919, + "step": 20219 + }, + { + "epoch": 0.5936931117505432, + "grad_norm": 0.0, + "learning_rate": 7.4789260482499104e-06, + "loss": 1.334, + "step": 20220 + }, + { + "epoch": 0.5937224734276821, + "grad_norm": 0.0, + "learning_rate": 7.47800581600985e-06, + "loss": 1.1855, + "step": 20221 + }, + { + "epoch": 0.5937518351048212, + "grad_norm": 0.0, + "learning_rate": 7.477085606576828e-06, + "loss": 1.3135, + "step": 20222 + }, + { + "epoch": 0.5937811967819602, + "grad_norm": 0.0, + "learning_rate": 7.476165419959167e-06, + "loss": 1.3062, + "step": 20223 + }, + { + "epoch": 0.5938105584590991, + "grad_norm": 0.0, + "learning_rate": 7.4752452561651865e-06, + "loss": 1.2725, + "step": 20224 + }, + { + "epoch": 0.5938399201362382, + "grad_norm": 0.0, + "learning_rate": 7.474325115203213e-06, + "loss": 1.3633, + "step": 20225 + }, + { + "epoch": 0.5938692818133772, + "grad_norm": 0.0, + "learning_rate": 7.473404997081561e-06, + "loss": 1.2476, + "step": 20226 + }, + { + "epoch": 0.5938986434905161, + "grad_norm": 0.0, + "learning_rate": 7.472484901808558e-06, + "loss": 1.3125, + "step": 20227 + }, + { + "epoch": 0.5939280051676552, + "grad_norm": 0.0, + "learning_rate": 7.471564829392521e-06, + "loss": 1.2754, + "step": 20228 + }, + { + "epoch": 0.5939573668447942, + "grad_norm": 0.0, + "learning_rate": 7.470644779841767e-06, + "loss": 1.208, + "step": 20229 + }, + { + "epoch": 0.5939867285219331, + "grad_norm": 0.0, + "learning_rate": 7.469724753164624e-06, + "loss": 1.2759, + "step": 20230 + }, + { + "epoch": 0.5940160901990722, + "grad_norm": 0.0, + "learning_rate": 7.468804749369406e-06, + "loss": 1.2725, + "step": 20231 + }, + { + "epoch": 0.5940454518762112, + "grad_norm": 0.0, + "learning_rate": 7.467884768464437e-06, + "loss": 1.2861, + "step": 20232 + }, + { + "epoch": 0.5940748135533501, + "grad_norm": 0.0, + "learning_rate": 7.46696481045803e-06, + "loss": 1.335, + "step": 20233 + }, + { + "epoch": 0.5941041752304892, + "grad_norm": 0.0, + "learning_rate": 7.466044875358516e-06, + "loss": 1.2861, + "step": 20234 + }, + { + "epoch": 0.5941335369076282, + "grad_norm": 0.0, + "learning_rate": 7.4651249631742e-06, + "loss": 1.3711, + "step": 20235 + }, + { + "epoch": 0.5941628985847671, + "grad_norm": 0.0, + "learning_rate": 7.464205073913413e-06, + "loss": 1.1494, + "step": 20236 + }, + { + "epoch": 0.5941922602619062, + "grad_norm": 0.0, + "learning_rate": 7.463285207584465e-06, + "loss": 1.2676, + "step": 20237 + }, + { + "epoch": 0.5942216219390452, + "grad_norm": 0.0, + "learning_rate": 7.4623653641956824e-06, + "loss": 1.1973, + "step": 20238 + }, + { + "epoch": 0.5942509836161841, + "grad_norm": 0.0, + "learning_rate": 7.46144554375538e-06, + "loss": 1.4424, + "step": 20239 + }, + { + "epoch": 0.5942803452933232, + "grad_norm": 0.0, + "learning_rate": 7.460525746271871e-06, + "loss": 1.3145, + "step": 20240 + }, + { + "epoch": 0.5943097069704621, + "grad_norm": 0.0, + "learning_rate": 7.459605971753482e-06, + "loss": 1.2002, + "step": 20241 + }, + { + "epoch": 0.5943390686476011, + "grad_norm": 0.0, + "learning_rate": 7.458686220208524e-06, + "loss": 1.2471, + "step": 20242 + }, + { + "epoch": 0.5943684303247402, + "grad_norm": 0.0, + "learning_rate": 7.457766491645322e-06, + "loss": 1.2188, + "step": 20243 + }, + { + "epoch": 0.5943977920018791, + "grad_norm": 0.0, + "learning_rate": 7.456846786072183e-06, + "loss": 1.3232, + "step": 20244 + }, + { + "epoch": 0.5944271536790181, + "grad_norm": 0.0, + "learning_rate": 7.4559271034974334e-06, + "loss": 1.2305, + "step": 20245 + }, + { + "epoch": 0.5944565153561572, + "grad_norm": 0.0, + "learning_rate": 7.4550074439293855e-06, + "loss": 1.2012, + "step": 20246 + }, + { + "epoch": 0.5944858770332961, + "grad_norm": 0.0, + "learning_rate": 7.454087807376358e-06, + "loss": 1.2295, + "step": 20247 + }, + { + "epoch": 0.5945152387104351, + "grad_norm": 0.0, + "learning_rate": 7.453168193846667e-06, + "loss": 1.3496, + "step": 20248 + }, + { + "epoch": 0.5945446003875742, + "grad_norm": 0.0, + "learning_rate": 7.452248603348624e-06, + "loss": 1.124, + "step": 20249 + }, + { + "epoch": 0.5945739620647131, + "grad_norm": 0.0, + "learning_rate": 7.451329035890553e-06, + "loss": 1.3242, + "step": 20250 + }, + { + "epoch": 0.5946033237418521, + "grad_norm": 0.0, + "learning_rate": 7.450409491480765e-06, + "loss": 1.2373, + "step": 20251 + }, + { + "epoch": 0.5946326854189912, + "grad_norm": 0.0, + "learning_rate": 7.4494899701275795e-06, + "loss": 1.1855, + "step": 20252 + }, + { + "epoch": 0.5946620470961301, + "grad_norm": 0.0, + "learning_rate": 7.448570471839304e-06, + "loss": 1.3164, + "step": 20253 + }, + { + "epoch": 0.5946914087732691, + "grad_norm": 0.0, + "learning_rate": 7.447650996624264e-06, + "loss": 1.3613, + "step": 20254 + }, + { + "epoch": 0.5947207704504082, + "grad_norm": 0.0, + "learning_rate": 7.446731544490767e-06, + "loss": 1.4004, + "step": 20255 + }, + { + "epoch": 0.5947501321275471, + "grad_norm": 0.0, + "learning_rate": 7.445812115447133e-06, + "loss": 1.3809, + "step": 20256 + }, + { + "epoch": 0.5947794938046861, + "grad_norm": 0.0, + "learning_rate": 7.444892709501674e-06, + "loss": 1.3076, + "step": 20257 + }, + { + "epoch": 0.5948088554818252, + "grad_norm": 0.0, + "learning_rate": 7.443973326662701e-06, + "loss": 1.292, + "step": 20258 + }, + { + "epoch": 0.5948382171589641, + "grad_norm": 0.0, + "learning_rate": 7.443053966938538e-06, + "loss": 1.3486, + "step": 20259 + }, + { + "epoch": 0.5948675788361031, + "grad_norm": 0.0, + "learning_rate": 7.4421346303374875e-06, + "loss": 1.3281, + "step": 20260 + }, + { + "epoch": 0.5948969405132422, + "grad_norm": 0.0, + "learning_rate": 7.441215316867872e-06, + "loss": 1.2441, + "step": 20261 + }, + { + "epoch": 0.5949263021903811, + "grad_norm": 0.0, + "learning_rate": 7.440296026537998e-06, + "loss": 1.2832, + "step": 20262 + }, + { + "epoch": 0.5949556638675201, + "grad_norm": 0.0, + "learning_rate": 7.439376759356186e-06, + "loss": 1.3179, + "step": 20263 + }, + { + "epoch": 0.5949850255446592, + "grad_norm": 0.0, + "learning_rate": 7.438457515330741e-06, + "loss": 1.126, + "step": 20264 + }, + { + "epoch": 0.5950143872217981, + "grad_norm": 0.0, + "learning_rate": 7.437538294469986e-06, + "loss": 1.3506, + "step": 20265 + }, + { + "epoch": 0.5950437488989371, + "grad_norm": 0.0, + "learning_rate": 7.436619096782226e-06, + "loss": 1.2612, + "step": 20266 + }, + { + "epoch": 0.5950731105760761, + "grad_norm": 0.0, + "learning_rate": 7.435699922275777e-06, + "loss": 1.3008, + "step": 20267 + }, + { + "epoch": 0.5951024722532151, + "grad_norm": 0.0, + "learning_rate": 7.434780770958951e-06, + "loss": 1.2012, + "step": 20268 + }, + { + "epoch": 0.5951318339303541, + "grad_norm": 0.0, + "learning_rate": 7.4338616428400564e-06, + "loss": 1.3564, + "step": 20269 + }, + { + "epoch": 0.5951611956074931, + "grad_norm": 0.0, + "learning_rate": 7.432942537927412e-06, + "loss": 1.2793, + "step": 20270 + }, + { + "epoch": 0.5951905572846321, + "grad_norm": 0.0, + "learning_rate": 7.432023456229323e-06, + "loss": 1.2998, + "step": 20271 + }, + { + "epoch": 0.5952199189617711, + "grad_norm": 0.0, + "learning_rate": 7.4311043977541054e-06, + "loss": 1.1982, + "step": 20272 + }, + { + "epoch": 0.5952492806389101, + "grad_norm": 0.0, + "learning_rate": 7.430185362510065e-06, + "loss": 1.126, + "step": 20273 + }, + { + "epoch": 0.5952786423160491, + "grad_norm": 0.0, + "learning_rate": 7.42926635050552e-06, + "loss": 1.1758, + "step": 20274 + }, + { + "epoch": 0.5953080039931881, + "grad_norm": 0.0, + "learning_rate": 7.428347361748775e-06, + "loss": 1.2554, + "step": 20275 + }, + { + "epoch": 0.5953373656703271, + "grad_norm": 0.0, + "learning_rate": 7.427428396248147e-06, + "loss": 1.21, + "step": 20276 + }, + { + "epoch": 0.5953667273474661, + "grad_norm": 0.0, + "learning_rate": 7.426509454011943e-06, + "loss": 1.2363, + "step": 20277 + }, + { + "epoch": 0.5953960890246051, + "grad_norm": 0.0, + "learning_rate": 7.425590535048468e-06, + "loss": 1.2998, + "step": 20278 + }, + { + "epoch": 0.5954254507017441, + "grad_norm": 0.0, + "learning_rate": 7.4246716393660414e-06, + "loss": 1.3398, + "step": 20279 + }, + { + "epoch": 0.5954548123788831, + "grad_norm": 0.0, + "learning_rate": 7.423752766972967e-06, + "loss": 1.3135, + "step": 20280 + }, + { + "epoch": 0.595484174056022, + "grad_norm": 0.0, + "learning_rate": 7.422833917877559e-06, + "loss": 1.2627, + "step": 20281 + }, + { + "epoch": 0.5955135357331611, + "grad_norm": 0.0, + "learning_rate": 7.421915092088118e-06, + "loss": 1.2412, + "step": 20282 + }, + { + "epoch": 0.5955428974103001, + "grad_norm": 0.0, + "learning_rate": 7.420996289612964e-06, + "loss": 1.29, + "step": 20283 + }, + { + "epoch": 0.595572259087439, + "grad_norm": 0.0, + "learning_rate": 7.420077510460399e-06, + "loss": 1.2832, + "step": 20284 + }, + { + "epoch": 0.5956016207645781, + "grad_norm": 0.0, + "learning_rate": 7.419158754638736e-06, + "loss": 1.1665, + "step": 20285 + }, + { + "epoch": 0.5956309824417171, + "grad_norm": 0.0, + "learning_rate": 7.418240022156277e-06, + "loss": 1.252, + "step": 20286 + }, + { + "epoch": 0.595660344118856, + "grad_norm": 0.0, + "learning_rate": 7.417321313021342e-06, + "loss": 1.2432, + "step": 20287 + }, + { + "epoch": 0.5956897057959951, + "grad_norm": 0.0, + "learning_rate": 7.416402627242227e-06, + "loss": 1.4258, + "step": 20288 + }, + { + "epoch": 0.5957190674731341, + "grad_norm": 0.0, + "learning_rate": 7.415483964827244e-06, + "loss": 1.3633, + "step": 20289 + }, + { + "epoch": 0.595748429150273, + "grad_norm": 0.0, + "learning_rate": 7.414565325784703e-06, + "loss": 1.2842, + "step": 20290 + }, + { + "epoch": 0.5957777908274121, + "grad_norm": 0.0, + "learning_rate": 7.413646710122908e-06, + "loss": 1.106, + "step": 20291 + }, + { + "epoch": 0.5958071525045511, + "grad_norm": 0.0, + "learning_rate": 7.412728117850172e-06, + "loss": 1.4043, + "step": 20292 + }, + { + "epoch": 0.59583651418169, + "grad_norm": 0.0, + "learning_rate": 7.411809548974792e-06, + "loss": 1.3652, + "step": 20293 + }, + { + "epoch": 0.5958658758588291, + "grad_norm": 0.0, + "learning_rate": 7.410891003505086e-06, + "loss": 1.0952, + "step": 20294 + }, + { + "epoch": 0.5958952375359681, + "grad_norm": 0.0, + "learning_rate": 7.409972481449354e-06, + "loss": 1.3564, + "step": 20295 + }, + { + "epoch": 0.595924599213107, + "grad_norm": 0.0, + "learning_rate": 7.409053982815905e-06, + "loss": 1.2432, + "step": 20296 + }, + { + "epoch": 0.5959539608902461, + "grad_norm": 0.0, + "learning_rate": 7.408135507613048e-06, + "loss": 1.3574, + "step": 20297 + }, + { + "epoch": 0.5959833225673851, + "grad_norm": 0.0, + "learning_rate": 7.407217055849078e-06, + "loss": 1.2256, + "step": 20298 + }, + { + "epoch": 0.596012684244524, + "grad_norm": 0.0, + "learning_rate": 7.4062986275323135e-06, + "loss": 1.2168, + "step": 20299 + }, + { + "epoch": 0.5960420459216631, + "grad_norm": 0.0, + "learning_rate": 7.405380222671052e-06, + "loss": 1.3535, + "step": 20300 + }, + { + "epoch": 0.5960714075988021, + "grad_norm": 0.0, + "learning_rate": 7.4044618412736035e-06, + "loss": 1.3398, + "step": 20301 + }, + { + "epoch": 0.596100769275941, + "grad_norm": 0.0, + "learning_rate": 7.4035434833482676e-06, + "loss": 1.3936, + "step": 20302 + }, + { + "epoch": 0.5961301309530801, + "grad_norm": 0.0, + "learning_rate": 7.4026251489033575e-06, + "loss": 1.2622, + "step": 20303 + }, + { + "epoch": 0.5961594926302191, + "grad_norm": 0.0, + "learning_rate": 7.401706837947172e-06, + "loss": 1.2461, + "step": 20304 + }, + { + "epoch": 0.596188854307358, + "grad_norm": 0.0, + "learning_rate": 7.400788550488016e-06, + "loss": 1.3086, + "step": 20305 + }, + { + "epoch": 0.596218215984497, + "grad_norm": 0.0, + "learning_rate": 7.399870286534193e-06, + "loss": 1.2378, + "step": 20306 + }, + { + "epoch": 0.596247577661636, + "grad_norm": 0.0, + "learning_rate": 7.398952046094014e-06, + "loss": 1.3057, + "step": 20307 + }, + { + "epoch": 0.596276939338775, + "grad_norm": 0.0, + "learning_rate": 7.398033829175776e-06, + "loss": 1.1162, + "step": 20308 + }, + { + "epoch": 0.596306301015914, + "grad_norm": 0.0, + "learning_rate": 7.397115635787783e-06, + "loss": 1.3359, + "step": 20309 + }, + { + "epoch": 0.596335662693053, + "grad_norm": 0.0, + "learning_rate": 7.3961974659383414e-06, + "loss": 1.2646, + "step": 20310 + }, + { + "epoch": 0.596365024370192, + "grad_norm": 0.0, + "learning_rate": 7.395279319635751e-06, + "loss": 1.3662, + "step": 20311 + }, + { + "epoch": 0.596394386047331, + "grad_norm": 0.0, + "learning_rate": 7.39436119688832e-06, + "loss": 1.1929, + "step": 20312 + }, + { + "epoch": 0.59642374772447, + "grad_norm": 0.0, + "learning_rate": 7.393443097704343e-06, + "loss": 1.2832, + "step": 20313 + }, + { + "epoch": 0.596453109401609, + "grad_norm": 0.0, + "learning_rate": 7.392525022092132e-06, + "loss": 1.2236, + "step": 20314 + }, + { + "epoch": 0.596482471078748, + "grad_norm": 0.0, + "learning_rate": 7.391606970059983e-06, + "loss": 1.335, + "step": 20315 + }, + { + "epoch": 0.596511832755887, + "grad_norm": 0.0, + "learning_rate": 7.390688941616202e-06, + "loss": 1.3477, + "step": 20316 + }, + { + "epoch": 0.596541194433026, + "grad_norm": 0.0, + "learning_rate": 7.3897709367690885e-06, + "loss": 1.2788, + "step": 20317 + }, + { + "epoch": 0.596570556110165, + "grad_norm": 0.0, + "learning_rate": 7.388852955526942e-06, + "loss": 1.229, + "step": 20318 + }, + { + "epoch": 0.596599917787304, + "grad_norm": 0.0, + "learning_rate": 7.38793499789807e-06, + "loss": 1.1787, + "step": 20319 + }, + { + "epoch": 0.596629279464443, + "grad_norm": 0.0, + "learning_rate": 7.387017063890769e-06, + "loss": 1.2168, + "step": 20320 + }, + { + "epoch": 0.596658641141582, + "grad_norm": 0.0, + "learning_rate": 7.386099153513344e-06, + "loss": 1.3457, + "step": 20321 + }, + { + "epoch": 0.596688002818721, + "grad_norm": 0.0, + "learning_rate": 7.38518126677409e-06, + "loss": 1.2432, + "step": 20322 + }, + { + "epoch": 0.59671736449586, + "grad_norm": 0.0, + "learning_rate": 7.384263403681315e-06, + "loss": 1.2095, + "step": 20323 + }, + { + "epoch": 0.596746726172999, + "grad_norm": 0.0, + "learning_rate": 7.383345564243316e-06, + "loss": 1.2236, + "step": 20324 + }, + { + "epoch": 0.596776087850138, + "grad_norm": 0.0, + "learning_rate": 7.382427748468392e-06, + "loss": 1.2656, + "step": 20325 + }, + { + "epoch": 0.596805449527277, + "grad_norm": 0.0, + "learning_rate": 7.381509956364847e-06, + "loss": 1.3374, + "step": 20326 + }, + { + "epoch": 0.596834811204416, + "grad_norm": 0.0, + "learning_rate": 7.380592187940972e-06, + "loss": 1.3115, + "step": 20327 + }, + { + "epoch": 0.596864172881555, + "grad_norm": 0.0, + "learning_rate": 7.379674443205078e-06, + "loss": 1.1772, + "step": 20328 + }, + { + "epoch": 0.596893534558694, + "grad_norm": 0.0, + "learning_rate": 7.378756722165457e-06, + "loss": 1.4072, + "step": 20329 + }, + { + "epoch": 0.596922896235833, + "grad_norm": 0.0, + "learning_rate": 7.377839024830411e-06, + "loss": 1.2764, + "step": 20330 + }, + { + "epoch": 0.596952257912972, + "grad_norm": 0.0, + "learning_rate": 7.376921351208235e-06, + "loss": 1.2607, + "step": 20331 + }, + { + "epoch": 0.596981619590111, + "grad_norm": 0.0, + "learning_rate": 7.376003701307236e-06, + "loss": 1.1914, + "step": 20332 + }, + { + "epoch": 0.5970109812672499, + "grad_norm": 0.0, + "learning_rate": 7.375086075135704e-06, + "loss": 1.1831, + "step": 20333 + }, + { + "epoch": 0.597040342944389, + "grad_norm": 0.0, + "learning_rate": 7.3741684727019434e-06, + "loss": 1.3799, + "step": 20334 + }, + { + "epoch": 0.597069704621528, + "grad_norm": 0.0, + "learning_rate": 7.373250894014246e-06, + "loss": 1.4131, + "step": 20335 + }, + { + "epoch": 0.5970990662986669, + "grad_norm": 0.0, + "learning_rate": 7.37233333908092e-06, + "loss": 1.3857, + "step": 20336 + }, + { + "epoch": 0.597128427975806, + "grad_norm": 0.0, + "learning_rate": 7.3714158079102535e-06, + "loss": 1.2129, + "step": 20337 + }, + { + "epoch": 0.597157789652945, + "grad_norm": 0.0, + "learning_rate": 7.370498300510544e-06, + "loss": 1.2793, + "step": 20338 + }, + { + "epoch": 0.5971871513300839, + "grad_norm": 0.0, + "learning_rate": 7.3695808168900965e-06, + "loss": 1.3086, + "step": 20339 + }, + { + "epoch": 0.597216513007223, + "grad_norm": 0.0, + "learning_rate": 7.3686633570571995e-06, + "loss": 1.3428, + "step": 20340 + }, + { + "epoch": 0.597245874684362, + "grad_norm": 0.0, + "learning_rate": 7.367745921020157e-06, + "loss": 1.2339, + "step": 20341 + }, + { + "epoch": 0.5972752363615009, + "grad_norm": 0.0, + "learning_rate": 7.366828508787257e-06, + "loss": 1.1865, + "step": 20342 + }, + { + "epoch": 0.59730459803864, + "grad_norm": 0.0, + "learning_rate": 7.365911120366806e-06, + "loss": 1.2422, + "step": 20343 + }, + { + "epoch": 0.597333959715779, + "grad_norm": 0.0, + "learning_rate": 7.364993755767093e-06, + "loss": 1.2812, + "step": 20344 + }, + { + "epoch": 0.5973633213929179, + "grad_norm": 0.0, + "learning_rate": 7.364076414996418e-06, + "loss": 1.3213, + "step": 20345 + }, + { + "epoch": 0.597392683070057, + "grad_norm": 0.0, + "learning_rate": 7.363159098063075e-06, + "loss": 1.3232, + "step": 20346 + }, + { + "epoch": 0.597422044747196, + "grad_norm": 0.0, + "learning_rate": 7.362241804975356e-06, + "loss": 1.2383, + "step": 20347 + }, + { + "epoch": 0.5974514064243349, + "grad_norm": 0.0, + "learning_rate": 7.361324535741565e-06, + "loss": 1.1719, + "step": 20348 + }, + { + "epoch": 0.597480768101474, + "grad_norm": 0.0, + "learning_rate": 7.360407290369989e-06, + "loss": 1.2861, + "step": 20349 + }, + { + "epoch": 0.597510129778613, + "grad_norm": 0.0, + "learning_rate": 7.3594900688689275e-06, + "loss": 1.188, + "step": 20350 + }, + { + "epoch": 0.5975394914557519, + "grad_norm": 0.0, + "learning_rate": 7.35857287124667e-06, + "loss": 1.2393, + "step": 20351 + }, + { + "epoch": 0.597568853132891, + "grad_norm": 0.0, + "learning_rate": 7.3576556975115185e-06, + "loss": 1.2905, + "step": 20352 + }, + { + "epoch": 0.59759821481003, + "grad_norm": 0.0, + "learning_rate": 7.35673854767176e-06, + "loss": 1.2393, + "step": 20353 + }, + { + "epoch": 0.5976275764871689, + "grad_norm": 0.0, + "learning_rate": 7.355821421735696e-06, + "loss": 1.2393, + "step": 20354 + }, + { + "epoch": 0.597656938164308, + "grad_norm": 0.0, + "learning_rate": 7.354904319711611e-06, + "loss": 1.3237, + "step": 20355 + }, + { + "epoch": 0.5976862998414469, + "grad_norm": 0.0, + "learning_rate": 7.353987241607807e-06, + "loss": 1.3525, + "step": 20356 + }, + { + "epoch": 0.5977156615185859, + "grad_norm": 0.0, + "learning_rate": 7.353070187432575e-06, + "loss": 1.248, + "step": 20357 + }, + { + "epoch": 0.597745023195725, + "grad_norm": 0.0, + "learning_rate": 7.352153157194204e-06, + "loss": 1.125, + "step": 20358 + }, + { + "epoch": 0.5977743848728639, + "grad_norm": 0.0, + "learning_rate": 7.351236150900993e-06, + "loss": 1.146, + "step": 20359 + }, + { + "epoch": 0.5978037465500029, + "grad_norm": 0.0, + "learning_rate": 7.350319168561229e-06, + "loss": 1.3701, + "step": 20360 + }, + { + "epoch": 0.597833108227142, + "grad_norm": 0.0, + "learning_rate": 7.349402210183212e-06, + "loss": 1.2852, + "step": 20361 + }, + { + "epoch": 0.5978624699042809, + "grad_norm": 0.0, + "learning_rate": 7.348485275775223e-06, + "loss": 1.2153, + "step": 20362 + }, + { + "epoch": 0.5978918315814199, + "grad_norm": 0.0, + "learning_rate": 7.347568365345566e-06, + "loss": 1.2041, + "step": 20363 + }, + { + "epoch": 0.597921193258559, + "grad_norm": 0.0, + "learning_rate": 7.3466514789025245e-06, + "loss": 1.2837, + "step": 20364 + }, + { + "epoch": 0.5979505549356979, + "grad_norm": 0.0, + "learning_rate": 7.345734616454396e-06, + "loss": 1.2734, + "step": 20365 + }, + { + "epoch": 0.5979799166128369, + "grad_norm": 0.0, + "learning_rate": 7.34481777800947e-06, + "loss": 1.3447, + "step": 20366 + }, + { + "epoch": 0.598009278289976, + "grad_norm": 0.0, + "learning_rate": 7.343900963576032e-06, + "loss": 1.2129, + "step": 20367 + }, + { + "epoch": 0.5980386399671149, + "grad_norm": 0.0, + "learning_rate": 7.34298417316238e-06, + "loss": 1.4111, + "step": 20368 + }, + { + "epoch": 0.5980680016442539, + "grad_norm": 0.0, + "learning_rate": 7.342067406776803e-06, + "loss": 1.3145, + "step": 20369 + }, + { + "epoch": 0.598097363321393, + "grad_norm": 0.0, + "learning_rate": 7.341150664427592e-06, + "loss": 1.2441, + "step": 20370 + }, + { + "epoch": 0.5981267249985319, + "grad_norm": 0.0, + "learning_rate": 7.340233946123032e-06, + "loss": 1.3926, + "step": 20371 + }, + { + "epoch": 0.5981560866756709, + "grad_norm": 0.0, + "learning_rate": 7.339317251871423e-06, + "loss": 1.3555, + "step": 20372 + }, + { + "epoch": 0.59818544835281, + "grad_norm": 0.0, + "learning_rate": 7.338400581681047e-06, + "loss": 1.2451, + "step": 20373 + }, + { + "epoch": 0.5982148100299489, + "grad_norm": 0.0, + "learning_rate": 7.337483935560199e-06, + "loss": 1.2627, + "step": 20374 + }, + { + "epoch": 0.5982441717070879, + "grad_norm": 0.0, + "learning_rate": 7.33656731351716e-06, + "loss": 1.1001, + "step": 20375 + }, + { + "epoch": 0.598273533384227, + "grad_norm": 0.0, + "learning_rate": 7.335650715560229e-06, + "loss": 1.2559, + "step": 20376 + }, + { + "epoch": 0.5983028950613659, + "grad_norm": 0.0, + "learning_rate": 7.334734141697693e-06, + "loss": 1.3369, + "step": 20377 + }, + { + "epoch": 0.5983322567385049, + "grad_norm": 0.0, + "learning_rate": 7.333817591937835e-06, + "loss": 1.2686, + "step": 20378 + }, + { + "epoch": 0.598361618415644, + "grad_norm": 0.0, + "learning_rate": 7.332901066288949e-06, + "loss": 1.293, + "step": 20379 + }, + { + "epoch": 0.5983909800927829, + "grad_norm": 0.0, + "learning_rate": 7.331984564759318e-06, + "loss": 1.1831, + "step": 20380 + }, + { + "epoch": 0.5984203417699219, + "grad_norm": 0.0, + "learning_rate": 7.331068087357238e-06, + "loss": 1.2212, + "step": 20381 + }, + { + "epoch": 0.5984497034470609, + "grad_norm": 0.0, + "learning_rate": 7.330151634090992e-06, + "loss": 1.2451, + "step": 20382 + }, + { + "epoch": 0.5984790651241999, + "grad_norm": 0.0, + "learning_rate": 7.32923520496887e-06, + "loss": 1.3311, + "step": 20383 + }, + { + "epoch": 0.5985084268013389, + "grad_norm": 0.0, + "learning_rate": 7.328318799999152e-06, + "loss": 1.3701, + "step": 20384 + }, + { + "epoch": 0.5985377884784779, + "grad_norm": 0.0, + "learning_rate": 7.327402419190137e-06, + "loss": 1.1987, + "step": 20385 + }, + { + "epoch": 0.5985671501556169, + "grad_norm": 0.0, + "learning_rate": 7.326486062550109e-06, + "loss": 1.334, + "step": 20386 + }, + { + "epoch": 0.5985965118327559, + "grad_norm": 0.0, + "learning_rate": 7.325569730087345e-06, + "loss": 1.1812, + "step": 20387 + }, + { + "epoch": 0.5986258735098949, + "grad_norm": 0.0, + "learning_rate": 7.324653421810144e-06, + "loss": 1.2686, + "step": 20388 + }, + { + "epoch": 0.5986552351870339, + "grad_norm": 0.0, + "learning_rate": 7.323737137726785e-06, + "loss": 1.2334, + "step": 20389 + }, + { + "epoch": 0.5986845968641729, + "grad_norm": 0.0, + "learning_rate": 7.322820877845558e-06, + "loss": 1.252, + "step": 20390 + }, + { + "epoch": 0.5987139585413119, + "grad_norm": 0.0, + "learning_rate": 7.321904642174743e-06, + "loss": 1.293, + "step": 20391 + }, + { + "epoch": 0.5987433202184509, + "grad_norm": 0.0, + "learning_rate": 7.320988430722636e-06, + "loss": 1.2988, + "step": 20392 + }, + { + "epoch": 0.5987726818955899, + "grad_norm": 0.0, + "learning_rate": 7.320072243497512e-06, + "loss": 1.2212, + "step": 20393 + }, + { + "epoch": 0.5988020435727289, + "grad_norm": 0.0, + "learning_rate": 7.319156080507666e-06, + "loss": 1.1436, + "step": 20394 + }, + { + "epoch": 0.5988314052498679, + "grad_norm": 0.0, + "learning_rate": 7.3182399417613755e-06, + "loss": 1.3672, + "step": 20395 + }, + { + "epoch": 0.5988607669270068, + "grad_norm": 0.0, + "learning_rate": 7.317323827266926e-06, + "loss": 1.3633, + "step": 20396 + }, + { + "epoch": 0.5988901286041459, + "grad_norm": 0.0, + "learning_rate": 7.316407737032607e-06, + "loss": 1.2246, + "step": 20397 + }, + { + "epoch": 0.5989194902812849, + "grad_norm": 0.0, + "learning_rate": 7.315491671066699e-06, + "loss": 1.3545, + "step": 20398 + }, + { + "epoch": 0.5989488519584238, + "grad_norm": 0.0, + "learning_rate": 7.3145756293774885e-06, + "loss": 1.2803, + "step": 20399 + }, + { + "epoch": 0.5989782136355629, + "grad_norm": 0.0, + "learning_rate": 7.313659611973254e-06, + "loss": 1.3721, + "step": 20400 + }, + { + "epoch": 0.5990075753127019, + "grad_norm": 0.0, + "learning_rate": 7.312743618862288e-06, + "loss": 1.3135, + "step": 20401 + }, + { + "epoch": 0.5990369369898408, + "grad_norm": 0.0, + "learning_rate": 7.3118276500528686e-06, + "loss": 1.2559, + "step": 20402 + }, + { + "epoch": 0.5990662986669799, + "grad_norm": 0.0, + "learning_rate": 7.31091170555328e-06, + "loss": 1.3623, + "step": 20403 + }, + { + "epoch": 0.5990956603441189, + "grad_norm": 0.0, + "learning_rate": 7.309995785371804e-06, + "loss": 1.2139, + "step": 20404 + }, + { + "epoch": 0.5991250220212578, + "grad_norm": 0.0, + "learning_rate": 7.3090798895167285e-06, + "loss": 1.272, + "step": 20405 + }, + { + "epoch": 0.5991543836983968, + "grad_norm": 0.0, + "learning_rate": 7.3081640179963335e-06, + "loss": 1.2134, + "step": 20406 + }, + { + "epoch": 0.5991837453755359, + "grad_norm": 0.0, + "learning_rate": 7.307248170818897e-06, + "loss": 1.2969, + "step": 20407 + }, + { + "epoch": 0.5992131070526748, + "grad_norm": 0.0, + "learning_rate": 7.306332347992706e-06, + "loss": 1.373, + "step": 20408 + }, + { + "epoch": 0.5992424687298138, + "grad_norm": 0.0, + "learning_rate": 7.305416549526039e-06, + "loss": 1.3013, + "step": 20409 + }, + { + "epoch": 0.5992718304069529, + "grad_norm": 0.0, + "learning_rate": 7.304500775427184e-06, + "loss": 1.2461, + "step": 20410 + }, + { + "epoch": 0.5993011920840918, + "grad_norm": 0.0, + "learning_rate": 7.303585025704418e-06, + "loss": 1.3203, + "step": 20411 + }, + { + "epoch": 0.5993305537612308, + "grad_norm": 0.0, + "learning_rate": 7.302669300366023e-06, + "loss": 1.2148, + "step": 20412 + }, + { + "epoch": 0.5993599154383699, + "grad_norm": 0.0, + "learning_rate": 7.301753599420278e-06, + "loss": 1.2705, + "step": 20413 + }, + { + "epoch": 0.5993892771155088, + "grad_norm": 0.0, + "learning_rate": 7.300837922875472e-06, + "loss": 1.4043, + "step": 20414 + }, + { + "epoch": 0.5994186387926478, + "grad_norm": 0.0, + "learning_rate": 7.299922270739877e-06, + "loss": 1.186, + "step": 20415 + }, + { + "epoch": 0.5994480004697869, + "grad_norm": 0.0, + "learning_rate": 7.299006643021773e-06, + "loss": 1.2334, + "step": 20416 + }, + { + "epoch": 0.5994773621469258, + "grad_norm": 0.0, + "learning_rate": 7.298091039729448e-06, + "loss": 1.3174, + "step": 20417 + }, + { + "epoch": 0.5995067238240648, + "grad_norm": 0.0, + "learning_rate": 7.297175460871176e-06, + "loss": 1.1699, + "step": 20418 + }, + { + "epoch": 0.5995360855012039, + "grad_norm": 0.0, + "learning_rate": 7.296259906455239e-06, + "loss": 1.2305, + "step": 20419 + }, + { + "epoch": 0.5995654471783428, + "grad_norm": 0.0, + "learning_rate": 7.295344376489912e-06, + "loss": 1.4004, + "step": 20420 + }, + { + "epoch": 0.5995948088554818, + "grad_norm": 0.0, + "learning_rate": 7.294428870983484e-06, + "loss": 1.269, + "step": 20421 + }, + { + "epoch": 0.5996241705326208, + "grad_norm": 0.0, + "learning_rate": 7.2935133899442265e-06, + "loss": 1.2622, + "step": 20422 + }, + { + "epoch": 0.5996535322097598, + "grad_norm": 0.0, + "learning_rate": 7.292597933380422e-06, + "loss": 1.2979, + "step": 20423 + }, + { + "epoch": 0.5996828938868988, + "grad_norm": 0.0, + "learning_rate": 7.2916825013003434e-06, + "loss": 1.3975, + "step": 20424 + }, + { + "epoch": 0.5997122555640378, + "grad_norm": 0.0, + "learning_rate": 7.290767093712279e-06, + "loss": 1.4092, + "step": 20425 + }, + { + "epoch": 0.5997416172411768, + "grad_norm": 0.0, + "learning_rate": 7.2898517106245e-06, + "loss": 1.2061, + "step": 20426 + }, + { + "epoch": 0.5997709789183158, + "grad_norm": 0.0, + "learning_rate": 7.288936352045284e-06, + "loss": 1.3223, + "step": 20427 + }, + { + "epoch": 0.5998003405954548, + "grad_norm": 0.0, + "learning_rate": 7.288021017982914e-06, + "loss": 1.144, + "step": 20428 + }, + { + "epoch": 0.5998297022725938, + "grad_norm": 0.0, + "learning_rate": 7.28710570844566e-06, + "loss": 1.2002, + "step": 20429 + }, + { + "epoch": 0.5998590639497328, + "grad_norm": 0.0, + "learning_rate": 7.286190423441808e-06, + "loss": 1.2979, + "step": 20430 + }, + { + "epoch": 0.5998884256268718, + "grad_norm": 0.0, + "learning_rate": 7.285275162979628e-06, + "loss": 1.4102, + "step": 20431 + }, + { + "epoch": 0.5999177873040108, + "grad_norm": 0.0, + "learning_rate": 7.284359927067404e-06, + "loss": 1.3242, + "step": 20432 + }, + { + "epoch": 0.5999471489811498, + "grad_norm": 0.0, + "learning_rate": 7.2834447157134034e-06, + "loss": 1.2661, + "step": 20433 + }, + { + "epoch": 0.5999765106582888, + "grad_norm": 0.0, + "learning_rate": 7.282529528925913e-06, + "loss": 1.3057, + "step": 20434 + }, + { + "epoch": 0.6000058723354278, + "grad_norm": 0.0, + "learning_rate": 7.281614366713202e-06, + "loss": 1.3853, + "step": 20435 + }, + { + "epoch": 0.6000352340125668, + "grad_norm": 0.0, + "learning_rate": 7.280699229083548e-06, + "loss": 1.0112, + "step": 20436 + }, + { + "epoch": 0.6000645956897058, + "grad_norm": 0.0, + "learning_rate": 7.279784116045229e-06, + "loss": 1.0669, + "step": 20437 + }, + { + "epoch": 0.6000939573668448, + "grad_norm": 0.0, + "learning_rate": 7.278869027606514e-06, + "loss": 1.3867, + "step": 20438 + }, + { + "epoch": 0.6001233190439837, + "grad_norm": 0.0, + "learning_rate": 7.27795396377569e-06, + "loss": 1.3726, + "step": 20439 + }, + { + "epoch": 0.6001526807211228, + "grad_norm": 0.0, + "learning_rate": 7.27703892456102e-06, + "loss": 1.2949, + "step": 20440 + }, + { + "epoch": 0.6001820423982618, + "grad_norm": 0.0, + "learning_rate": 7.276123909970787e-06, + "loss": 1.2988, + "step": 20441 + }, + { + "epoch": 0.6002114040754007, + "grad_norm": 0.0, + "learning_rate": 7.275208920013262e-06, + "loss": 1.3115, + "step": 20442 + }, + { + "epoch": 0.6002407657525398, + "grad_norm": 0.0, + "learning_rate": 7.2742939546967226e-06, + "loss": 1.2266, + "step": 20443 + }, + { + "epoch": 0.6002701274296788, + "grad_norm": 0.0, + "learning_rate": 7.273379014029443e-06, + "loss": 1.377, + "step": 20444 + }, + { + "epoch": 0.6002994891068177, + "grad_norm": 0.0, + "learning_rate": 7.272464098019687e-06, + "loss": 1.4072, + "step": 20445 + }, + { + "epoch": 0.6003288507839568, + "grad_norm": 0.0, + "learning_rate": 7.271549206675743e-06, + "loss": 1.3457, + "step": 20446 + }, + { + "epoch": 0.6003582124610958, + "grad_norm": 0.0, + "learning_rate": 7.270634340005877e-06, + "loss": 1.2295, + "step": 20447 + }, + { + "epoch": 0.6003875741382347, + "grad_norm": 0.0, + "learning_rate": 7.269719498018365e-06, + "loss": 1.2114, + "step": 20448 + }, + { + "epoch": 0.6004169358153738, + "grad_norm": 0.0, + "learning_rate": 7.268804680721474e-06, + "loss": 1.2979, + "step": 20449 + }, + { + "epoch": 0.6004462974925128, + "grad_norm": 0.0, + "learning_rate": 7.267889888123487e-06, + "loss": 1.2793, + "step": 20450 + }, + { + "epoch": 0.6004756591696517, + "grad_norm": 0.0, + "learning_rate": 7.2669751202326685e-06, + "loss": 1.3535, + "step": 20451 + }, + { + "epoch": 0.6005050208467908, + "grad_norm": 0.0, + "learning_rate": 7.266060377057295e-06, + "loss": 1.1704, + "step": 20452 + }, + { + "epoch": 0.6005343825239298, + "grad_norm": 0.0, + "learning_rate": 7.2651456586056365e-06, + "loss": 1.3691, + "step": 20453 + }, + { + "epoch": 0.6005637442010687, + "grad_norm": 0.0, + "learning_rate": 7.264230964885968e-06, + "loss": 1.1919, + "step": 20454 + }, + { + "epoch": 0.6005931058782078, + "grad_norm": 0.0, + "learning_rate": 7.263316295906561e-06, + "loss": 1.3047, + "step": 20455 + }, + { + "epoch": 0.6006224675553468, + "grad_norm": 0.0, + "learning_rate": 7.262401651675683e-06, + "loss": 1.3467, + "step": 20456 + }, + { + "epoch": 0.6006518292324857, + "grad_norm": 0.0, + "learning_rate": 7.261487032201609e-06, + "loss": 1.2441, + "step": 20457 + }, + { + "epoch": 0.6006811909096248, + "grad_norm": 0.0, + "learning_rate": 7.260572437492606e-06, + "loss": 1.3066, + "step": 20458 + }, + { + "epoch": 0.6007105525867638, + "grad_norm": 0.0, + "learning_rate": 7.259657867556953e-06, + "loss": 1.3271, + "step": 20459 + }, + { + "epoch": 0.6007399142639027, + "grad_norm": 0.0, + "learning_rate": 7.258743322402912e-06, + "loss": 1.3623, + "step": 20460 + }, + { + "epoch": 0.6007692759410418, + "grad_norm": 0.0, + "learning_rate": 7.25782880203876e-06, + "loss": 1.2412, + "step": 20461 + }, + { + "epoch": 0.6007986376181808, + "grad_norm": 0.0, + "learning_rate": 7.2569143064727595e-06, + "loss": 1.3184, + "step": 20462 + }, + { + "epoch": 0.6008279992953197, + "grad_norm": 0.0, + "learning_rate": 7.25599983571319e-06, + "loss": 1.335, + "step": 20463 + }, + { + "epoch": 0.6008573609724588, + "grad_norm": 0.0, + "learning_rate": 7.2550853897683195e-06, + "loss": 1.2344, + "step": 20464 + }, + { + "epoch": 0.6008867226495977, + "grad_norm": 0.0, + "learning_rate": 7.254170968646408e-06, + "loss": 1.1953, + "step": 20465 + }, + { + "epoch": 0.6009160843267367, + "grad_norm": 0.0, + "learning_rate": 7.253256572355736e-06, + "loss": 1.2891, + "step": 20466 + }, + { + "epoch": 0.6009454460038758, + "grad_norm": 0.0, + "learning_rate": 7.252342200904567e-06, + "loss": 1.2432, + "step": 20467 + }, + { + "epoch": 0.6009748076810147, + "grad_norm": 0.0, + "learning_rate": 7.251427854301172e-06, + "loss": 1.3818, + "step": 20468 + }, + { + "epoch": 0.6010041693581537, + "grad_norm": 0.0, + "learning_rate": 7.2505135325538145e-06, + "loss": 1.3271, + "step": 20469 + }, + { + "epoch": 0.6010335310352928, + "grad_norm": 0.0, + "learning_rate": 7.249599235670772e-06, + "loss": 1.3525, + "step": 20470 + }, + { + "epoch": 0.6010628927124317, + "grad_norm": 0.0, + "learning_rate": 7.248684963660307e-06, + "loss": 1.2705, + "step": 20471 + }, + { + "epoch": 0.6010922543895707, + "grad_norm": 0.0, + "learning_rate": 7.2477707165306885e-06, + "loss": 1.2515, + "step": 20472 + }, + { + "epoch": 0.6011216160667098, + "grad_norm": 0.0, + "learning_rate": 7.246856494290181e-06, + "loss": 1.335, + "step": 20473 + }, + { + "epoch": 0.6011509777438487, + "grad_norm": 0.0, + "learning_rate": 7.2459422969470596e-06, + "loss": 1.2476, + "step": 20474 + }, + { + "epoch": 0.6011803394209877, + "grad_norm": 0.0, + "learning_rate": 7.245028124509588e-06, + "loss": 1.2178, + "step": 20475 + }, + { + "epoch": 0.6012097010981268, + "grad_norm": 0.0, + "learning_rate": 7.244113976986029e-06, + "loss": 1.1973, + "step": 20476 + }, + { + "epoch": 0.6012390627752657, + "grad_norm": 0.0, + "learning_rate": 7.243199854384656e-06, + "loss": 1.2583, + "step": 20477 + }, + { + "epoch": 0.6012684244524047, + "grad_norm": 0.0, + "learning_rate": 7.242285756713728e-06, + "loss": 1.3457, + "step": 20478 + }, + { + "epoch": 0.6012977861295438, + "grad_norm": 0.0, + "learning_rate": 7.241371683981522e-06, + "loss": 1.3105, + "step": 20479 + }, + { + "epoch": 0.6013271478066827, + "grad_norm": 0.0, + "learning_rate": 7.240457636196294e-06, + "loss": 1.2227, + "step": 20480 + }, + { + "epoch": 0.6013565094838217, + "grad_norm": 0.0, + "learning_rate": 7.239543613366318e-06, + "loss": 1.166, + "step": 20481 + }, + { + "epoch": 0.6013858711609608, + "grad_norm": 0.0, + "learning_rate": 7.238629615499851e-06, + "loss": 1.3633, + "step": 20482 + }, + { + "epoch": 0.6014152328380997, + "grad_norm": 0.0, + "learning_rate": 7.2377156426051686e-06, + "loss": 1.4043, + "step": 20483 + }, + { + "epoch": 0.6014445945152387, + "grad_norm": 0.0, + "learning_rate": 7.236801694690532e-06, + "loss": 1.5664, + "step": 20484 + }, + { + "epoch": 0.6014739561923778, + "grad_norm": 0.0, + "learning_rate": 7.235887771764201e-06, + "loss": 1.2617, + "step": 20485 + }, + { + "epoch": 0.6015033178695167, + "grad_norm": 0.0, + "learning_rate": 7.2349738738344475e-06, + "loss": 1.3691, + "step": 20486 + }, + { + "epoch": 0.6015326795466557, + "grad_norm": 0.0, + "learning_rate": 7.234060000909531e-06, + "loss": 1.2412, + "step": 20487 + }, + { + "epoch": 0.6015620412237948, + "grad_norm": 0.0, + "learning_rate": 7.23314615299772e-06, + "loss": 1.1797, + "step": 20488 + }, + { + "epoch": 0.6015914029009337, + "grad_norm": 0.0, + "learning_rate": 7.232232330107276e-06, + "loss": 1.1538, + "step": 20489 + }, + { + "epoch": 0.6016207645780727, + "grad_norm": 0.0, + "learning_rate": 7.231318532246465e-06, + "loss": 1.25, + "step": 20490 + }, + { + "epoch": 0.6016501262552117, + "grad_norm": 0.0, + "learning_rate": 7.230404759423547e-06, + "loss": 1.2803, + "step": 20491 + }, + { + "epoch": 0.6016794879323507, + "grad_norm": 0.0, + "learning_rate": 7.229491011646794e-06, + "loss": 1.1655, + "step": 20492 + }, + { + "epoch": 0.6017088496094897, + "grad_norm": 0.0, + "learning_rate": 7.228577288924456e-06, + "loss": 1.3662, + "step": 20493 + }, + { + "epoch": 0.6017382112866287, + "grad_norm": 0.0, + "learning_rate": 7.227663591264809e-06, + "loss": 1.2227, + "step": 20494 + }, + { + "epoch": 0.6017675729637677, + "grad_norm": 0.0, + "learning_rate": 7.226749918676108e-06, + "loss": 1.4307, + "step": 20495 + }, + { + "epoch": 0.6017969346409067, + "grad_norm": 0.0, + "learning_rate": 7.225836271166618e-06, + "loss": 1.2012, + "step": 20496 + }, + { + "epoch": 0.6018262963180457, + "grad_norm": 0.0, + "learning_rate": 7.224922648744601e-06, + "loss": 1.3477, + "step": 20497 + }, + { + "epoch": 0.6018556579951847, + "grad_norm": 0.0, + "learning_rate": 7.224009051418317e-06, + "loss": 1.2153, + "step": 20498 + }, + { + "epoch": 0.6018850196723237, + "grad_norm": 0.0, + "learning_rate": 7.223095479196034e-06, + "loss": 1.3799, + "step": 20499 + }, + { + "epoch": 0.6019143813494627, + "grad_norm": 0.0, + "learning_rate": 7.222181932086005e-06, + "loss": 1.5195, + "step": 20500 + }, + { + "epoch": 0.6019437430266017, + "grad_norm": 0.0, + "learning_rate": 7.221268410096501e-06, + "loss": 1.2939, + "step": 20501 + }, + { + "epoch": 0.6019731047037407, + "grad_norm": 0.0, + "learning_rate": 7.220354913235774e-06, + "loss": 1.252, + "step": 20502 + }, + { + "epoch": 0.6020024663808797, + "grad_norm": 0.0, + "learning_rate": 7.219441441512094e-06, + "loss": 1.2793, + "step": 20503 + }, + { + "epoch": 0.6020318280580187, + "grad_norm": 0.0, + "learning_rate": 7.218527994933715e-06, + "loss": 1.3867, + "step": 20504 + }, + { + "epoch": 0.6020611897351577, + "grad_norm": 0.0, + "learning_rate": 7.2176145735088995e-06, + "loss": 1.3047, + "step": 20505 + }, + { + "epoch": 0.6020905514122966, + "grad_norm": 0.0, + "learning_rate": 7.216701177245909e-06, + "loss": 1.334, + "step": 20506 + }, + { + "epoch": 0.6021199130894357, + "grad_norm": 0.0, + "learning_rate": 7.215787806152999e-06, + "loss": 1.2646, + "step": 20507 + }, + { + "epoch": 0.6021492747665746, + "grad_norm": 0.0, + "learning_rate": 7.214874460238437e-06, + "loss": 1.2998, + "step": 20508 + }, + { + "epoch": 0.6021786364437136, + "grad_norm": 0.0, + "learning_rate": 7.213961139510477e-06, + "loss": 1.1211, + "step": 20509 + }, + { + "epoch": 0.6022079981208527, + "grad_norm": 0.0, + "learning_rate": 7.213047843977381e-06, + "loss": 1.2891, + "step": 20510 + }, + { + "epoch": 0.6022373597979916, + "grad_norm": 0.0, + "learning_rate": 7.212134573647403e-06, + "loss": 1.2329, + "step": 20511 + }, + { + "epoch": 0.6022667214751306, + "grad_norm": 0.0, + "learning_rate": 7.211221328528811e-06, + "loss": 1.2954, + "step": 20512 + }, + { + "epoch": 0.6022960831522697, + "grad_norm": 0.0, + "learning_rate": 7.2103081086298575e-06, + "loss": 1.2666, + "step": 20513 + }, + { + "epoch": 0.6023254448294086, + "grad_norm": 0.0, + "learning_rate": 7.209394913958803e-06, + "loss": 1.458, + "step": 20514 + }, + { + "epoch": 0.6023548065065476, + "grad_norm": 0.0, + "learning_rate": 7.208481744523903e-06, + "loss": 1.2891, + "step": 20515 + }, + { + "epoch": 0.6023841681836867, + "grad_norm": 0.0, + "learning_rate": 7.207568600333419e-06, + "loss": 1.3516, + "step": 20516 + }, + { + "epoch": 0.6024135298608256, + "grad_norm": 0.0, + "learning_rate": 7.206655481395607e-06, + "loss": 1.2051, + "step": 20517 + }, + { + "epoch": 0.6024428915379646, + "grad_norm": 0.0, + "learning_rate": 7.205742387718722e-06, + "loss": 1.4551, + "step": 20518 + }, + { + "epoch": 0.6024722532151037, + "grad_norm": 0.0, + "learning_rate": 7.204829319311027e-06, + "loss": 1.2969, + "step": 20519 + }, + { + "epoch": 0.6025016148922426, + "grad_norm": 0.0, + "learning_rate": 7.203916276180775e-06, + "loss": 1.1641, + "step": 20520 + }, + { + "epoch": 0.6025309765693816, + "grad_norm": 0.0, + "learning_rate": 7.203003258336227e-06, + "loss": 1.3457, + "step": 20521 + }, + { + "epoch": 0.6025603382465207, + "grad_norm": 0.0, + "learning_rate": 7.202090265785633e-06, + "loss": 1.2314, + "step": 20522 + }, + { + "epoch": 0.6025896999236596, + "grad_norm": 0.0, + "learning_rate": 7.201177298537256e-06, + "loss": 1.335, + "step": 20523 + }, + { + "epoch": 0.6026190616007986, + "grad_norm": 0.0, + "learning_rate": 7.200264356599349e-06, + "loss": 1.3047, + "step": 20524 + }, + { + "epoch": 0.6026484232779377, + "grad_norm": 0.0, + "learning_rate": 7.199351439980168e-06, + "loss": 1.1953, + "step": 20525 + }, + { + "epoch": 0.6026777849550766, + "grad_norm": 0.0, + "learning_rate": 7.198438548687971e-06, + "loss": 1.2368, + "step": 20526 + }, + { + "epoch": 0.6027071466322156, + "grad_norm": 0.0, + "learning_rate": 7.197525682731007e-06, + "loss": 1.2998, + "step": 20527 + }, + { + "epoch": 0.6027365083093547, + "grad_norm": 0.0, + "learning_rate": 7.196612842117541e-06, + "loss": 1.1816, + "step": 20528 + }, + { + "epoch": 0.6027658699864936, + "grad_norm": 0.0, + "learning_rate": 7.1957000268558195e-06, + "loss": 1.3525, + "step": 20529 + }, + { + "epoch": 0.6027952316636326, + "grad_norm": 0.0, + "learning_rate": 7.194787236954104e-06, + "loss": 1.3018, + "step": 20530 + }, + { + "epoch": 0.6028245933407717, + "grad_norm": 0.0, + "learning_rate": 7.193874472420641e-06, + "loss": 1.2183, + "step": 20531 + }, + { + "epoch": 0.6028539550179106, + "grad_norm": 0.0, + "learning_rate": 7.1929617332636945e-06, + "loss": 1.2578, + "step": 20532 + }, + { + "epoch": 0.6028833166950496, + "grad_norm": 0.0, + "learning_rate": 7.192049019491514e-06, + "loss": 1.2129, + "step": 20533 + }, + { + "epoch": 0.6029126783721886, + "grad_norm": 0.0, + "learning_rate": 7.1911363311123515e-06, + "loss": 1.2275, + "step": 20534 + }, + { + "epoch": 0.6029420400493276, + "grad_norm": 0.0, + "learning_rate": 7.190223668134464e-06, + "loss": 1.3037, + "step": 20535 + }, + { + "epoch": 0.6029714017264666, + "grad_norm": 0.0, + "learning_rate": 7.1893110305660996e-06, + "loss": 1.2842, + "step": 20536 + }, + { + "epoch": 0.6030007634036056, + "grad_norm": 0.0, + "learning_rate": 7.188398418415518e-06, + "loss": 1.3447, + "step": 20537 + }, + { + "epoch": 0.6030301250807446, + "grad_norm": 0.0, + "learning_rate": 7.18748583169097e-06, + "loss": 1.3691, + "step": 20538 + }, + { + "epoch": 0.6030594867578836, + "grad_norm": 0.0, + "learning_rate": 7.18657327040071e-06, + "loss": 1.2188, + "step": 20539 + }, + { + "epoch": 0.6030888484350226, + "grad_norm": 0.0, + "learning_rate": 7.185660734552983e-06, + "loss": 1.3984, + "step": 20540 + }, + { + "epoch": 0.6031182101121616, + "grad_norm": 0.0, + "learning_rate": 7.184748224156053e-06, + "loss": 1.3389, + "step": 20541 + }, + { + "epoch": 0.6031475717893006, + "grad_norm": 0.0, + "learning_rate": 7.183835739218159e-06, + "loss": 1.2734, + "step": 20542 + }, + { + "epoch": 0.6031769334664396, + "grad_norm": 0.0, + "learning_rate": 7.182923279747565e-06, + "loss": 1.187, + "step": 20543 + }, + { + "epoch": 0.6032062951435786, + "grad_norm": 0.0, + "learning_rate": 7.182010845752517e-06, + "loss": 1.2578, + "step": 20544 + }, + { + "epoch": 0.6032356568207176, + "grad_norm": 0.0, + "learning_rate": 7.181098437241265e-06, + "loss": 1.187, + "step": 20545 + }, + { + "epoch": 0.6032650184978566, + "grad_norm": 0.0, + "learning_rate": 7.180186054222062e-06, + "loss": 1.2036, + "step": 20546 + }, + { + "epoch": 0.6032943801749956, + "grad_norm": 0.0, + "learning_rate": 7.179273696703156e-06, + "loss": 1.3232, + "step": 20547 + }, + { + "epoch": 0.6033237418521346, + "grad_norm": 0.0, + "learning_rate": 7.178361364692805e-06, + "loss": 1.3115, + "step": 20548 + }, + { + "epoch": 0.6033531035292736, + "grad_norm": 0.0, + "learning_rate": 7.177449058199252e-06, + "loss": 1.332, + "step": 20549 + }, + { + "epoch": 0.6033824652064126, + "grad_norm": 0.0, + "learning_rate": 7.176536777230752e-06, + "loss": 1.2476, + "step": 20550 + }, + { + "epoch": 0.6034118268835515, + "grad_norm": 0.0, + "learning_rate": 7.175624521795549e-06, + "loss": 1.3389, + "step": 20551 + }, + { + "epoch": 0.6034411885606906, + "grad_norm": 0.0, + "learning_rate": 7.174712291901901e-06, + "loss": 1.1455, + "step": 20552 + }, + { + "epoch": 0.6034705502378296, + "grad_norm": 0.0, + "learning_rate": 7.173800087558054e-06, + "loss": 1.2588, + "step": 20553 + }, + { + "epoch": 0.6034999119149685, + "grad_norm": 0.0, + "learning_rate": 7.172887908772253e-06, + "loss": 1.415, + "step": 20554 + }, + { + "epoch": 0.6035292735921076, + "grad_norm": 0.0, + "learning_rate": 7.171975755552754e-06, + "loss": 1.3301, + "step": 20555 + }, + { + "epoch": 0.6035586352692466, + "grad_norm": 0.0, + "learning_rate": 7.1710636279077975e-06, + "loss": 1.3169, + "step": 20556 + }, + { + "epoch": 0.6035879969463855, + "grad_norm": 0.0, + "learning_rate": 7.170151525845641e-06, + "loss": 1.3916, + "step": 20557 + }, + { + "epoch": 0.6036173586235246, + "grad_norm": 0.0, + "learning_rate": 7.169239449374528e-06, + "loss": 1.3027, + "step": 20558 + }, + { + "epoch": 0.6036467203006636, + "grad_norm": 0.0, + "learning_rate": 7.168327398502709e-06, + "loss": 1.2305, + "step": 20559 + }, + { + "epoch": 0.6036760819778025, + "grad_norm": 0.0, + "learning_rate": 7.167415373238427e-06, + "loss": 1.2388, + "step": 20560 + }, + { + "epoch": 0.6037054436549416, + "grad_norm": 0.0, + "learning_rate": 7.166503373589937e-06, + "loss": 1.334, + "step": 20561 + }, + { + "epoch": 0.6037348053320806, + "grad_norm": 0.0, + "learning_rate": 7.16559139956548e-06, + "loss": 1.416, + "step": 20562 + }, + { + "epoch": 0.6037641670092195, + "grad_norm": 0.0, + "learning_rate": 7.164679451173308e-06, + "loss": 1.3105, + "step": 20563 + }, + { + "epoch": 0.6037935286863586, + "grad_norm": 0.0, + "learning_rate": 7.163767528421668e-06, + "loss": 1.3779, + "step": 20564 + }, + { + "epoch": 0.6038228903634976, + "grad_norm": 0.0, + "learning_rate": 7.162855631318798e-06, + "loss": 1.2568, + "step": 20565 + }, + { + "epoch": 0.6038522520406365, + "grad_norm": 0.0, + "learning_rate": 7.161943759872958e-06, + "loss": 1.165, + "step": 20566 + }, + { + "epoch": 0.6038816137177756, + "grad_norm": 0.0, + "learning_rate": 7.1610319140923826e-06, + "loss": 1.4268, + "step": 20567 + }, + { + "epoch": 0.6039109753949146, + "grad_norm": 0.0, + "learning_rate": 7.160120093985326e-06, + "loss": 1.3052, + "step": 20568 + }, + { + "epoch": 0.6039403370720535, + "grad_norm": 0.0, + "learning_rate": 7.159208299560027e-06, + "loss": 1.2598, + "step": 20569 + }, + { + "epoch": 0.6039696987491926, + "grad_norm": 0.0, + "learning_rate": 7.158296530824738e-06, + "loss": 1.1758, + "step": 20570 + }, + { + "epoch": 0.6039990604263316, + "grad_norm": 0.0, + "learning_rate": 7.157384787787698e-06, + "loss": 1.2236, + "step": 20571 + }, + { + "epoch": 0.6040284221034705, + "grad_norm": 0.0, + "learning_rate": 7.156473070457158e-06, + "loss": 1.4287, + "step": 20572 + }, + { + "epoch": 0.6040577837806096, + "grad_norm": 0.0, + "learning_rate": 7.155561378841361e-06, + "loss": 1.3418, + "step": 20573 + }, + { + "epoch": 0.6040871454577486, + "grad_norm": 0.0, + "learning_rate": 7.154649712948549e-06, + "loss": 1.3291, + "step": 20574 + }, + { + "epoch": 0.6041165071348875, + "grad_norm": 0.0, + "learning_rate": 7.153738072786971e-06, + "loss": 1.3071, + "step": 20575 + }, + { + "epoch": 0.6041458688120266, + "grad_norm": 0.0, + "learning_rate": 7.152826458364863e-06, + "loss": 1.2705, + "step": 20576 + }, + { + "epoch": 0.6041752304891655, + "grad_norm": 0.0, + "learning_rate": 7.15191486969048e-06, + "loss": 1.3281, + "step": 20577 + }, + { + "epoch": 0.6042045921663045, + "grad_norm": 0.0, + "learning_rate": 7.1510033067720576e-06, + "loss": 1.2559, + "step": 20578 + }, + { + "epoch": 0.6042339538434436, + "grad_norm": 0.0, + "learning_rate": 7.150091769617844e-06, + "loss": 1.2705, + "step": 20579 + }, + { + "epoch": 0.6042633155205825, + "grad_norm": 0.0, + "learning_rate": 7.149180258236077e-06, + "loss": 1.418, + "step": 20580 + }, + { + "epoch": 0.6042926771977215, + "grad_norm": 0.0, + "learning_rate": 7.148268772635005e-06, + "loss": 1.1611, + "step": 20581 + }, + { + "epoch": 0.6043220388748606, + "grad_norm": 0.0, + "learning_rate": 7.147357312822871e-06, + "loss": 1.2715, + "step": 20582 + }, + { + "epoch": 0.6043514005519995, + "grad_norm": 0.0, + "learning_rate": 7.146445878807912e-06, + "loss": 1.4102, + "step": 20583 + }, + { + "epoch": 0.6043807622291385, + "grad_norm": 0.0, + "learning_rate": 7.145534470598377e-06, + "loss": 1.3984, + "step": 20584 + }, + { + "epoch": 0.6044101239062776, + "grad_norm": 0.0, + "learning_rate": 7.1446230882025e-06, + "loss": 1.2686, + "step": 20585 + }, + { + "epoch": 0.6044394855834165, + "grad_norm": 0.0, + "learning_rate": 7.143711731628532e-06, + "loss": 1.272, + "step": 20586 + }, + { + "epoch": 0.6044688472605555, + "grad_norm": 0.0, + "learning_rate": 7.142800400884708e-06, + "loss": 1.3145, + "step": 20587 + }, + { + "epoch": 0.6044982089376946, + "grad_norm": 0.0, + "learning_rate": 7.141889095979273e-06, + "loss": 1.2559, + "step": 20588 + }, + { + "epoch": 0.6045275706148335, + "grad_norm": 0.0, + "learning_rate": 7.140977816920465e-06, + "loss": 1.3154, + "step": 20589 + }, + { + "epoch": 0.6045569322919725, + "grad_norm": 0.0, + "learning_rate": 7.140066563716529e-06, + "loss": 1.1196, + "step": 20590 + }, + { + "epoch": 0.6045862939691116, + "grad_norm": 0.0, + "learning_rate": 7.139155336375702e-06, + "loss": 1.2334, + "step": 20591 + }, + { + "epoch": 0.6046156556462505, + "grad_norm": 0.0, + "learning_rate": 7.138244134906228e-06, + "loss": 1.377, + "step": 20592 + }, + { + "epoch": 0.6046450173233895, + "grad_norm": 0.0, + "learning_rate": 7.137332959316346e-06, + "loss": 1.3584, + "step": 20593 + }, + { + "epoch": 0.6046743790005286, + "grad_norm": 0.0, + "learning_rate": 7.136421809614291e-06, + "loss": 1.207, + "step": 20594 + }, + { + "epoch": 0.6047037406776675, + "grad_norm": 0.0, + "learning_rate": 7.135510685808311e-06, + "loss": 1.207, + "step": 20595 + }, + { + "epoch": 0.6047331023548065, + "grad_norm": 0.0, + "learning_rate": 7.134599587906637e-06, + "loss": 1.2524, + "step": 20596 + }, + { + "epoch": 0.6047624640319456, + "grad_norm": 0.0, + "learning_rate": 7.133688515917516e-06, + "loss": 1.2471, + "step": 20597 + }, + { + "epoch": 0.6047918257090845, + "grad_norm": 0.0, + "learning_rate": 7.132777469849184e-06, + "loss": 1.2568, + "step": 20598 + }, + { + "epoch": 0.6048211873862235, + "grad_norm": 0.0, + "learning_rate": 7.131866449709879e-06, + "loss": 1.3252, + "step": 20599 + }, + { + "epoch": 0.6048505490633626, + "grad_norm": 0.0, + "learning_rate": 7.130955455507838e-06, + "loss": 1.167, + "step": 20600 + }, + { + "epoch": 0.6048799107405015, + "grad_norm": 0.0, + "learning_rate": 7.130044487251304e-06, + "loss": 1.4473, + "step": 20601 + }, + { + "epoch": 0.6049092724176405, + "grad_norm": 0.0, + "learning_rate": 7.129133544948514e-06, + "loss": 1.1431, + "step": 20602 + }, + { + "epoch": 0.6049386340947795, + "grad_norm": 0.0, + "learning_rate": 7.128222628607701e-06, + "loss": 1.2881, + "step": 20603 + }, + { + "epoch": 0.6049679957719185, + "grad_norm": 0.0, + "learning_rate": 7.12731173823711e-06, + "loss": 1.373, + "step": 20604 + }, + { + "epoch": 0.6049973574490575, + "grad_norm": 0.0, + "learning_rate": 7.12640087384497e-06, + "loss": 1.3105, + "step": 20605 + }, + { + "epoch": 0.6050267191261965, + "grad_norm": 0.0, + "learning_rate": 7.125490035439527e-06, + "loss": 1.2598, + "step": 20606 + }, + { + "epoch": 0.6050560808033355, + "grad_norm": 0.0, + "learning_rate": 7.124579223029013e-06, + "loss": 1.2734, + "step": 20607 + }, + { + "epoch": 0.6050854424804745, + "grad_norm": 0.0, + "learning_rate": 7.123668436621665e-06, + "loss": 1.3374, + "step": 20608 + }, + { + "epoch": 0.6051148041576134, + "grad_norm": 0.0, + "learning_rate": 7.122757676225718e-06, + "loss": 1.2412, + "step": 20609 + }, + { + "epoch": 0.6051441658347525, + "grad_norm": 0.0, + "learning_rate": 7.121846941849413e-06, + "loss": 1.2793, + "step": 20610 + }, + { + "epoch": 0.6051735275118915, + "grad_norm": 0.0, + "learning_rate": 7.120936233500981e-06, + "loss": 1.2114, + "step": 20611 + }, + { + "epoch": 0.6052028891890304, + "grad_norm": 0.0, + "learning_rate": 7.120025551188663e-06, + "loss": 1.3955, + "step": 20612 + }, + { + "epoch": 0.6052322508661695, + "grad_norm": 0.0, + "learning_rate": 7.119114894920691e-06, + "loss": 1.1934, + "step": 20613 + }, + { + "epoch": 0.6052616125433085, + "grad_norm": 0.0, + "learning_rate": 7.118204264705297e-06, + "loss": 1.1313, + "step": 20614 + }, + { + "epoch": 0.6052909742204474, + "grad_norm": 0.0, + "learning_rate": 7.117293660550724e-06, + "loss": 1.3564, + "step": 20615 + }, + { + "epoch": 0.6053203358975865, + "grad_norm": 0.0, + "learning_rate": 7.1163830824652e-06, + "loss": 1.2637, + "step": 20616 + }, + { + "epoch": 0.6053496975747255, + "grad_norm": 0.0, + "learning_rate": 7.115472530456965e-06, + "loss": 1.25, + "step": 20617 + }, + { + "epoch": 0.6053790592518644, + "grad_norm": 0.0, + "learning_rate": 7.114562004534247e-06, + "loss": 1.3193, + "step": 20618 + }, + { + "epoch": 0.6054084209290035, + "grad_norm": 0.0, + "learning_rate": 7.1136515047052875e-06, + "loss": 1.2178, + "step": 20619 + }, + { + "epoch": 0.6054377826061424, + "grad_norm": 0.0, + "learning_rate": 7.112741030978314e-06, + "loss": 1.3213, + "step": 20620 + }, + { + "epoch": 0.6054671442832814, + "grad_norm": 0.0, + "learning_rate": 7.111830583361565e-06, + "loss": 1.2104, + "step": 20621 + }, + { + "epoch": 0.6054965059604205, + "grad_norm": 0.0, + "learning_rate": 7.11092016186327e-06, + "loss": 1.3096, + "step": 20622 + }, + { + "epoch": 0.6055258676375594, + "grad_norm": 0.0, + "learning_rate": 7.110009766491666e-06, + "loss": 1.2588, + "step": 20623 + }, + { + "epoch": 0.6055552293146984, + "grad_norm": 0.0, + "learning_rate": 7.109099397254983e-06, + "loss": 1.3105, + "step": 20624 + }, + { + "epoch": 0.6055845909918375, + "grad_norm": 0.0, + "learning_rate": 7.108189054161451e-06, + "loss": 1.2324, + "step": 20625 + }, + { + "epoch": 0.6056139526689764, + "grad_norm": 0.0, + "learning_rate": 7.1072787372193095e-06, + "loss": 1.292, + "step": 20626 + }, + { + "epoch": 0.6056433143461154, + "grad_norm": 0.0, + "learning_rate": 7.1063684464367865e-06, + "loss": 1.2217, + "step": 20627 + }, + { + "epoch": 0.6056726760232545, + "grad_norm": 0.0, + "learning_rate": 7.105458181822116e-06, + "loss": 1.3271, + "step": 20628 + }, + { + "epoch": 0.6057020377003934, + "grad_norm": 0.0, + "learning_rate": 7.104547943383524e-06, + "loss": 1.2515, + "step": 20629 + }, + { + "epoch": 0.6057313993775324, + "grad_norm": 0.0, + "learning_rate": 7.10363773112925e-06, + "loss": 1.2529, + "step": 20630 + }, + { + "epoch": 0.6057607610546715, + "grad_norm": 0.0, + "learning_rate": 7.102727545067523e-06, + "loss": 1.2676, + "step": 20631 + }, + { + "epoch": 0.6057901227318104, + "grad_norm": 0.0, + "learning_rate": 7.101817385206571e-06, + "loss": 1.2158, + "step": 20632 + }, + { + "epoch": 0.6058194844089494, + "grad_norm": 0.0, + "learning_rate": 7.100907251554627e-06, + "loss": 1.3047, + "step": 20633 + }, + { + "epoch": 0.6058488460860885, + "grad_norm": 0.0, + "learning_rate": 7.099997144119919e-06, + "loss": 1.2979, + "step": 20634 + }, + { + "epoch": 0.6058782077632274, + "grad_norm": 0.0, + "learning_rate": 7.0990870629106835e-06, + "loss": 1.2432, + "step": 20635 + }, + { + "epoch": 0.6059075694403664, + "grad_norm": 0.0, + "learning_rate": 7.098177007935143e-06, + "loss": 1.2617, + "step": 20636 + }, + { + "epoch": 0.6059369311175055, + "grad_norm": 0.0, + "learning_rate": 7.097266979201533e-06, + "loss": 1.208, + "step": 20637 + }, + { + "epoch": 0.6059662927946444, + "grad_norm": 0.0, + "learning_rate": 7.09635697671808e-06, + "loss": 1.2754, + "step": 20638 + }, + { + "epoch": 0.6059956544717834, + "grad_norm": 0.0, + "learning_rate": 7.0954470004930165e-06, + "loss": 1.2734, + "step": 20639 + }, + { + "epoch": 0.6060250161489225, + "grad_norm": 0.0, + "learning_rate": 7.094537050534567e-06, + "loss": 1.1099, + "step": 20640 + }, + { + "epoch": 0.6060543778260614, + "grad_norm": 0.0, + "learning_rate": 7.093627126850966e-06, + "loss": 1.1758, + "step": 20641 + }, + { + "epoch": 0.6060837395032004, + "grad_norm": 0.0, + "learning_rate": 7.092717229450439e-06, + "loss": 1.2852, + "step": 20642 + }, + { + "epoch": 0.6061131011803395, + "grad_norm": 0.0, + "learning_rate": 7.09180735834121e-06, + "loss": 1.2544, + "step": 20643 + }, + { + "epoch": 0.6061424628574784, + "grad_norm": 0.0, + "learning_rate": 7.090897513531518e-06, + "loss": 1.3232, + "step": 20644 + }, + { + "epoch": 0.6061718245346174, + "grad_norm": 0.0, + "learning_rate": 7.08998769502958e-06, + "loss": 1.1738, + "step": 20645 + }, + { + "epoch": 0.6062011862117564, + "grad_norm": 0.0, + "learning_rate": 7.089077902843632e-06, + "loss": 1.4121, + "step": 20646 + }, + { + "epoch": 0.6062305478888954, + "grad_norm": 0.0, + "learning_rate": 7.088168136981896e-06, + "loss": 1.3721, + "step": 20647 + }, + { + "epoch": 0.6062599095660344, + "grad_norm": 0.0, + "learning_rate": 7.087258397452603e-06, + "loss": 1.2461, + "step": 20648 + }, + { + "epoch": 0.6062892712431734, + "grad_norm": 0.0, + "learning_rate": 7.086348684263974e-06, + "loss": 1.2998, + "step": 20649 + }, + { + "epoch": 0.6063186329203124, + "grad_norm": 0.0, + "learning_rate": 7.085438997424246e-06, + "loss": 1.2246, + "step": 20650 + }, + { + "epoch": 0.6063479945974514, + "grad_norm": 0.0, + "learning_rate": 7.084529336941637e-06, + "loss": 1.2173, + "step": 20651 + }, + { + "epoch": 0.6063773562745904, + "grad_norm": 0.0, + "learning_rate": 7.083619702824375e-06, + "loss": 1.2246, + "step": 20652 + }, + { + "epoch": 0.6064067179517294, + "grad_norm": 0.0, + "learning_rate": 7.082710095080688e-06, + "loss": 1.2876, + "step": 20653 + }, + { + "epoch": 0.6064360796288684, + "grad_norm": 0.0, + "learning_rate": 7.081800513718797e-06, + "loss": 1.2705, + "step": 20654 + }, + { + "epoch": 0.6064654413060074, + "grad_norm": 0.0, + "learning_rate": 7.080890958746936e-06, + "loss": 1.3564, + "step": 20655 + }, + { + "epoch": 0.6064948029831464, + "grad_norm": 0.0, + "learning_rate": 7.079981430173324e-06, + "loss": 1.2451, + "step": 20656 + }, + { + "epoch": 0.6065241646602854, + "grad_norm": 0.0, + "learning_rate": 7.07907192800619e-06, + "loss": 1.271, + "step": 20657 + }, + { + "epoch": 0.6065535263374244, + "grad_norm": 0.0, + "learning_rate": 7.078162452253752e-06, + "loss": 1.2324, + "step": 20658 + }, + { + "epoch": 0.6065828880145634, + "grad_norm": 0.0, + "learning_rate": 7.077253002924243e-06, + "loss": 1.3491, + "step": 20659 + }, + { + "epoch": 0.6066122496917024, + "grad_norm": 0.0, + "learning_rate": 7.0763435800258815e-06, + "loss": 1.2256, + "step": 20660 + }, + { + "epoch": 0.6066416113688414, + "grad_norm": 0.0, + "learning_rate": 7.075434183566896e-06, + "loss": 1.3955, + "step": 20661 + }, + { + "epoch": 0.6066709730459804, + "grad_norm": 0.0, + "learning_rate": 7.074524813555508e-06, + "loss": 1.2441, + "step": 20662 + }, + { + "epoch": 0.6067003347231193, + "grad_norm": 0.0, + "learning_rate": 7.073615469999937e-06, + "loss": 1.3555, + "step": 20663 + }, + { + "epoch": 0.6067296964002584, + "grad_norm": 0.0, + "learning_rate": 7.072706152908415e-06, + "loss": 1.3145, + "step": 20664 + }, + { + "epoch": 0.6067590580773974, + "grad_norm": 0.0, + "learning_rate": 7.07179686228916e-06, + "loss": 1.208, + "step": 20665 + }, + { + "epoch": 0.6067884197545363, + "grad_norm": 0.0, + "learning_rate": 7.070887598150398e-06, + "loss": 1.2139, + "step": 20666 + }, + { + "epoch": 0.6068177814316754, + "grad_norm": 0.0, + "learning_rate": 7.0699783605003436e-06, + "loss": 1.3467, + "step": 20667 + }, + { + "epoch": 0.6068471431088144, + "grad_norm": 0.0, + "learning_rate": 7.0690691493472306e-06, + "loss": 1.2451, + "step": 20668 + }, + { + "epoch": 0.6068765047859533, + "grad_norm": 0.0, + "learning_rate": 7.0681599646992735e-06, + "loss": 1.1431, + "step": 20669 + }, + { + "epoch": 0.6069058664630924, + "grad_norm": 0.0, + "learning_rate": 7.0672508065647e-06, + "loss": 1.2612, + "step": 20670 + }, + { + "epoch": 0.6069352281402314, + "grad_norm": 0.0, + "learning_rate": 7.066341674951727e-06, + "loss": 1.2383, + "step": 20671 + }, + { + "epoch": 0.6069645898173703, + "grad_norm": 0.0, + "learning_rate": 7.0654325698685756e-06, + "loss": 1.2935, + "step": 20672 + }, + { + "epoch": 0.6069939514945094, + "grad_norm": 0.0, + "learning_rate": 7.064523491323471e-06, + "loss": 1.2158, + "step": 20673 + }, + { + "epoch": 0.6070233131716484, + "grad_norm": 0.0, + "learning_rate": 7.063614439324629e-06, + "loss": 1.3809, + "step": 20674 + }, + { + "epoch": 0.6070526748487873, + "grad_norm": 0.0, + "learning_rate": 7.0627054138802775e-06, + "loss": 1.3379, + "step": 20675 + }, + { + "epoch": 0.6070820365259264, + "grad_norm": 0.0, + "learning_rate": 7.061796414998632e-06, + "loss": 1.2539, + "step": 20676 + }, + { + "epoch": 0.6071113982030654, + "grad_norm": 0.0, + "learning_rate": 7.060887442687914e-06, + "loss": 1.2744, + "step": 20677 + }, + { + "epoch": 0.6071407598802043, + "grad_norm": 0.0, + "learning_rate": 7.059978496956341e-06, + "loss": 1.2241, + "step": 20678 + }, + { + "epoch": 0.6071701215573434, + "grad_norm": 0.0, + "learning_rate": 7.0590695778121385e-06, + "loss": 1.2007, + "step": 20679 + }, + { + "epoch": 0.6071994832344824, + "grad_norm": 0.0, + "learning_rate": 7.058160685263522e-06, + "loss": 1.3438, + "step": 20680 + }, + { + "epoch": 0.6072288449116213, + "grad_norm": 0.0, + "learning_rate": 7.057251819318713e-06, + "loss": 1.21, + "step": 20681 + }, + { + "epoch": 0.6072582065887604, + "grad_norm": 0.0, + "learning_rate": 7.056342979985931e-06, + "loss": 1.2612, + "step": 20682 + }, + { + "epoch": 0.6072875682658994, + "grad_norm": 0.0, + "learning_rate": 7.055434167273387e-06, + "loss": 1.2241, + "step": 20683 + }, + { + "epoch": 0.6073169299430383, + "grad_norm": 0.0, + "learning_rate": 7.0545253811893115e-06, + "loss": 1.4092, + "step": 20684 + }, + { + "epoch": 0.6073462916201774, + "grad_norm": 0.0, + "learning_rate": 7.053616621741914e-06, + "loss": 1.4209, + "step": 20685 + }, + { + "epoch": 0.6073756532973164, + "grad_norm": 0.0, + "learning_rate": 7.05270788893942e-06, + "loss": 1.3145, + "step": 20686 + }, + { + "epoch": 0.6074050149744553, + "grad_norm": 0.0, + "learning_rate": 7.0517991827900376e-06, + "loss": 1.2207, + "step": 20687 + }, + { + "epoch": 0.6074343766515944, + "grad_norm": 0.0, + "learning_rate": 7.050890503301995e-06, + "loss": 1.2817, + "step": 20688 + }, + { + "epoch": 0.6074637383287333, + "grad_norm": 0.0, + "learning_rate": 7.049981850483503e-06, + "loss": 1.2451, + "step": 20689 + }, + { + "epoch": 0.6074931000058723, + "grad_norm": 0.0, + "learning_rate": 7.049073224342782e-06, + "loss": 1.1445, + "step": 20690 + }, + { + "epoch": 0.6075224616830114, + "grad_norm": 0.0, + "learning_rate": 7.048164624888047e-06, + "loss": 1.3291, + "step": 20691 + }, + { + "epoch": 0.6075518233601503, + "grad_norm": 0.0, + "learning_rate": 7.047256052127514e-06, + "loss": 1.3662, + "step": 20692 + }, + { + "epoch": 0.6075811850372893, + "grad_norm": 0.0, + "learning_rate": 7.046347506069402e-06, + "loss": 1.2261, + "step": 20693 + }, + { + "epoch": 0.6076105467144284, + "grad_norm": 0.0, + "learning_rate": 7.045438986721925e-06, + "loss": 1.2686, + "step": 20694 + }, + { + "epoch": 0.6076399083915673, + "grad_norm": 0.0, + "learning_rate": 7.044530494093302e-06, + "loss": 1.2275, + "step": 20695 + }, + { + "epoch": 0.6076692700687063, + "grad_norm": 0.0, + "learning_rate": 7.043622028191743e-06, + "loss": 1.3379, + "step": 20696 + }, + { + "epoch": 0.6076986317458454, + "grad_norm": 0.0, + "learning_rate": 7.0427135890254715e-06, + "loss": 1.167, + "step": 20697 + }, + { + "epoch": 0.6077279934229843, + "grad_norm": 0.0, + "learning_rate": 7.041805176602694e-06, + "loss": 1.3174, + "step": 20698 + }, + { + "epoch": 0.6077573551001233, + "grad_norm": 0.0, + "learning_rate": 7.040896790931633e-06, + "loss": 1.2246, + "step": 20699 + }, + { + "epoch": 0.6077867167772624, + "grad_norm": 0.0, + "learning_rate": 7.039988432020501e-06, + "loss": 1.2988, + "step": 20700 + }, + { + "epoch": 0.6078160784544013, + "grad_norm": 0.0, + "learning_rate": 7.039080099877509e-06, + "loss": 1.3242, + "step": 20701 + }, + { + "epoch": 0.6078454401315403, + "grad_norm": 0.0, + "learning_rate": 7.038171794510875e-06, + "loss": 1.1968, + "step": 20702 + }, + { + "epoch": 0.6078748018086794, + "grad_norm": 0.0, + "learning_rate": 7.037263515928809e-06, + "loss": 1.2173, + "step": 20703 + }, + { + "epoch": 0.6079041634858183, + "grad_norm": 0.0, + "learning_rate": 7.036355264139533e-06, + "loss": 1.2969, + "step": 20704 + }, + { + "epoch": 0.6079335251629573, + "grad_norm": 0.0, + "learning_rate": 7.035447039151252e-06, + "loss": 1.3384, + "step": 20705 + }, + { + "epoch": 0.6079628868400964, + "grad_norm": 0.0, + "learning_rate": 7.034538840972185e-06, + "loss": 1.2852, + "step": 20706 + }, + { + "epoch": 0.6079922485172353, + "grad_norm": 0.0, + "learning_rate": 7.033630669610538e-06, + "loss": 1.2451, + "step": 20707 + }, + { + "epoch": 0.6080216101943743, + "grad_norm": 0.0, + "learning_rate": 7.032722525074534e-06, + "loss": 1.1709, + "step": 20708 + }, + { + "epoch": 0.6080509718715132, + "grad_norm": 0.0, + "learning_rate": 7.031814407372377e-06, + "loss": 1.2236, + "step": 20709 + }, + { + "epoch": 0.6080803335486523, + "grad_norm": 0.0, + "learning_rate": 7.030906316512285e-06, + "loss": 1.1904, + "step": 20710 + }, + { + "epoch": 0.6081096952257913, + "grad_norm": 0.0, + "learning_rate": 7.029998252502468e-06, + "loss": 1.1807, + "step": 20711 + }, + { + "epoch": 0.6081390569029302, + "grad_norm": 0.0, + "learning_rate": 7.029090215351133e-06, + "loss": 1.2241, + "step": 20712 + }, + { + "epoch": 0.6081684185800693, + "grad_norm": 0.0, + "learning_rate": 7.028182205066499e-06, + "loss": 1.377, + "step": 20713 + }, + { + "epoch": 0.6081977802572083, + "grad_norm": 0.0, + "learning_rate": 7.027274221656773e-06, + "loss": 1.2539, + "step": 20714 + }, + { + "epoch": 0.6082271419343472, + "grad_norm": 0.0, + "learning_rate": 7.026366265130171e-06, + "loss": 1.251, + "step": 20715 + }, + { + "epoch": 0.6082565036114863, + "grad_norm": 0.0, + "learning_rate": 7.025458335494894e-06, + "loss": 1.2676, + "step": 20716 + }, + { + "epoch": 0.6082858652886253, + "grad_norm": 0.0, + "learning_rate": 7.024550432759165e-06, + "loss": 1.2725, + "step": 20717 + }, + { + "epoch": 0.6083152269657642, + "grad_norm": 0.0, + "learning_rate": 7.0236425569311854e-06, + "loss": 1.2949, + "step": 20718 + }, + { + "epoch": 0.6083445886429033, + "grad_norm": 0.0, + "learning_rate": 7.022734708019171e-06, + "loss": 1.1021, + "step": 20719 + }, + { + "epoch": 0.6083739503200423, + "grad_norm": 0.0, + "learning_rate": 7.021826886031329e-06, + "loss": 1.3311, + "step": 20720 + }, + { + "epoch": 0.6084033119971812, + "grad_norm": 0.0, + "learning_rate": 7.020919090975867e-06, + "loss": 1.2827, + "step": 20721 + }, + { + "epoch": 0.6084326736743203, + "grad_norm": 0.0, + "learning_rate": 7.020011322860999e-06, + "loss": 1.3955, + "step": 20722 + }, + { + "epoch": 0.6084620353514593, + "grad_norm": 0.0, + "learning_rate": 7.019103581694928e-06, + "loss": 1.3486, + "step": 20723 + }, + { + "epoch": 0.6084913970285982, + "grad_norm": 0.0, + "learning_rate": 7.0181958674858695e-06, + "loss": 1.1982, + "step": 20724 + }, + { + "epoch": 0.6085207587057373, + "grad_norm": 0.0, + "learning_rate": 7.017288180242028e-06, + "loss": 1.4028, + "step": 20725 + }, + { + "epoch": 0.6085501203828763, + "grad_norm": 0.0, + "learning_rate": 7.016380519971616e-06, + "loss": 1.2446, + "step": 20726 + }, + { + "epoch": 0.6085794820600152, + "grad_norm": 0.0, + "learning_rate": 7.015472886682835e-06, + "loss": 1.3848, + "step": 20727 + }, + { + "epoch": 0.6086088437371543, + "grad_norm": 0.0, + "learning_rate": 7.014565280383901e-06, + "loss": 1.2656, + "step": 20728 + }, + { + "epoch": 0.6086382054142933, + "grad_norm": 0.0, + "learning_rate": 7.013657701083014e-06, + "loss": 1.1646, + "step": 20729 + }, + { + "epoch": 0.6086675670914322, + "grad_norm": 0.0, + "learning_rate": 7.0127501487883895e-06, + "loss": 1.3594, + "step": 20730 + }, + { + "epoch": 0.6086969287685713, + "grad_norm": 0.0, + "learning_rate": 7.01184262350823e-06, + "loss": 1.4453, + "step": 20731 + }, + { + "epoch": 0.6087262904457102, + "grad_norm": 0.0, + "learning_rate": 7.010935125250737e-06, + "loss": 1.1885, + "step": 20732 + }, + { + "epoch": 0.6087556521228492, + "grad_norm": 0.0, + "learning_rate": 7.01002765402413e-06, + "loss": 1.3398, + "step": 20733 + }, + { + "epoch": 0.6087850137999883, + "grad_norm": 0.0, + "learning_rate": 7.0091202098366054e-06, + "loss": 1.252, + "step": 20734 + }, + { + "epoch": 0.6088143754771272, + "grad_norm": 0.0, + "learning_rate": 7.008212792696374e-06, + "loss": 1.1943, + "step": 20735 + }, + { + "epoch": 0.6088437371542662, + "grad_norm": 0.0, + "learning_rate": 7.007305402611637e-06, + "loss": 1.3662, + "step": 20736 + }, + { + "epoch": 0.6088730988314053, + "grad_norm": 0.0, + "learning_rate": 7.006398039590609e-06, + "loss": 1.4111, + "step": 20737 + }, + { + "epoch": 0.6089024605085442, + "grad_norm": 0.0, + "learning_rate": 7.005490703641487e-06, + "loss": 1.3818, + "step": 20738 + }, + { + "epoch": 0.6089318221856832, + "grad_norm": 0.0, + "learning_rate": 7.004583394772482e-06, + "loss": 1.1943, + "step": 20739 + }, + { + "epoch": 0.6089611838628223, + "grad_norm": 0.0, + "learning_rate": 7.003676112991795e-06, + "loss": 1.1729, + "step": 20740 + }, + { + "epoch": 0.6089905455399612, + "grad_norm": 0.0, + "learning_rate": 7.002768858307629e-06, + "loss": 1.2529, + "step": 20741 + }, + { + "epoch": 0.6090199072171002, + "grad_norm": 0.0, + "learning_rate": 7.001861630728197e-06, + "loss": 1.2148, + "step": 20742 + }, + { + "epoch": 0.6090492688942393, + "grad_norm": 0.0, + "learning_rate": 7.000954430261695e-06, + "loss": 1.2666, + "step": 20743 + }, + { + "epoch": 0.6090786305713782, + "grad_norm": 0.0, + "learning_rate": 7.000047256916332e-06, + "loss": 1.1885, + "step": 20744 + }, + { + "epoch": 0.6091079922485172, + "grad_norm": 0.0, + "learning_rate": 6.999140110700306e-06, + "loss": 1.229, + "step": 20745 + }, + { + "epoch": 0.6091373539256563, + "grad_norm": 0.0, + "learning_rate": 6.9982329916218316e-06, + "loss": 1.3105, + "step": 20746 + }, + { + "epoch": 0.6091667156027952, + "grad_norm": 0.0, + "learning_rate": 6.997325899689098e-06, + "loss": 1.2471, + "step": 20747 + }, + { + "epoch": 0.6091960772799342, + "grad_norm": 0.0, + "learning_rate": 6.99641883491032e-06, + "loss": 1.1216, + "step": 20748 + }, + { + "epoch": 0.6092254389570733, + "grad_norm": 0.0, + "learning_rate": 6.995511797293692e-06, + "loss": 1.167, + "step": 20749 + }, + { + "epoch": 0.6092548006342122, + "grad_norm": 0.0, + "learning_rate": 6.994604786847425e-06, + "loss": 1.1924, + "step": 20750 + }, + { + "epoch": 0.6092841623113512, + "grad_norm": 0.0, + "learning_rate": 6.9936978035797155e-06, + "loss": 1.2275, + "step": 20751 + }, + { + "epoch": 0.6093135239884903, + "grad_norm": 0.0, + "learning_rate": 6.9927908474987615e-06, + "loss": 1.2119, + "step": 20752 + }, + { + "epoch": 0.6093428856656292, + "grad_norm": 0.0, + "learning_rate": 6.991883918612775e-06, + "loss": 1.3037, + "step": 20753 + }, + { + "epoch": 0.6093722473427682, + "grad_norm": 0.0, + "learning_rate": 6.990977016929951e-06, + "loss": 1.2305, + "step": 20754 + }, + { + "epoch": 0.6094016090199073, + "grad_norm": 0.0, + "learning_rate": 6.9900701424584935e-06, + "loss": 1.3242, + "step": 20755 + }, + { + "epoch": 0.6094309706970462, + "grad_norm": 0.0, + "learning_rate": 6.989163295206599e-06, + "loss": 1.2153, + "step": 20756 + }, + { + "epoch": 0.6094603323741852, + "grad_norm": 0.0, + "learning_rate": 6.988256475182476e-06, + "loss": 1.4229, + "step": 20757 + }, + { + "epoch": 0.6094896940513242, + "grad_norm": 0.0, + "learning_rate": 6.98734968239432e-06, + "loss": 1.2188, + "step": 20758 + }, + { + "epoch": 0.6095190557284632, + "grad_norm": 0.0, + "learning_rate": 6.986442916850332e-06, + "loss": 1.252, + "step": 20759 + }, + { + "epoch": 0.6095484174056022, + "grad_norm": 0.0, + "learning_rate": 6.985536178558714e-06, + "loss": 1.3438, + "step": 20760 + }, + { + "epoch": 0.6095777790827412, + "grad_norm": 0.0, + "learning_rate": 6.98462946752766e-06, + "loss": 1.3721, + "step": 20761 + }, + { + "epoch": 0.6096071407598802, + "grad_norm": 0.0, + "learning_rate": 6.983722783765379e-06, + "loss": 1.3584, + "step": 20762 + }, + { + "epoch": 0.6096365024370192, + "grad_norm": 0.0, + "learning_rate": 6.982816127280062e-06, + "loss": 1.2637, + "step": 20763 + }, + { + "epoch": 0.6096658641141582, + "grad_norm": 0.0, + "learning_rate": 6.981909498079914e-06, + "loss": 1.334, + "step": 20764 + }, + { + "epoch": 0.6096952257912972, + "grad_norm": 0.0, + "learning_rate": 6.981002896173128e-06, + "loss": 1.3389, + "step": 20765 + }, + { + "epoch": 0.6097245874684362, + "grad_norm": 0.0, + "learning_rate": 6.9800963215679085e-06, + "loss": 1.2812, + "step": 20766 + }, + { + "epoch": 0.6097539491455752, + "grad_norm": 0.0, + "learning_rate": 6.97918977427245e-06, + "loss": 1.3721, + "step": 20767 + }, + { + "epoch": 0.6097833108227142, + "grad_norm": 0.0, + "learning_rate": 6.978283254294955e-06, + "loss": 1.3442, + "step": 20768 + }, + { + "epoch": 0.6098126724998532, + "grad_norm": 0.0, + "learning_rate": 6.977376761643617e-06, + "loss": 1.2808, + "step": 20769 + }, + { + "epoch": 0.6098420341769922, + "grad_norm": 0.0, + "learning_rate": 6.976470296326632e-06, + "loss": 1.3232, + "step": 20770 + }, + { + "epoch": 0.6098713958541312, + "grad_norm": 0.0, + "learning_rate": 6.975563858352205e-06, + "loss": 1.333, + "step": 20771 + }, + { + "epoch": 0.6099007575312702, + "grad_norm": 0.0, + "learning_rate": 6.974657447728524e-06, + "loss": 1.2314, + "step": 20772 + }, + { + "epoch": 0.6099301192084092, + "grad_norm": 0.0, + "learning_rate": 6.973751064463794e-06, + "loss": 1.3984, + "step": 20773 + }, + { + "epoch": 0.6099594808855482, + "grad_norm": 0.0, + "learning_rate": 6.972844708566206e-06, + "loss": 1.3643, + "step": 20774 + }, + { + "epoch": 0.6099888425626872, + "grad_norm": 0.0, + "learning_rate": 6.971938380043962e-06, + "loss": 1.2412, + "step": 20775 + }, + { + "epoch": 0.6100182042398262, + "grad_norm": 0.0, + "learning_rate": 6.971032078905249e-06, + "loss": 1.165, + "step": 20776 + }, + { + "epoch": 0.6100475659169652, + "grad_norm": 0.0, + "learning_rate": 6.970125805158274e-06, + "loss": 1.2871, + "step": 20777 + }, + { + "epoch": 0.6100769275941041, + "grad_norm": 0.0, + "learning_rate": 6.969219558811225e-06, + "loss": 1.0615, + "step": 20778 + }, + { + "epoch": 0.6101062892712432, + "grad_norm": 0.0, + "learning_rate": 6.968313339872302e-06, + "loss": 1.2451, + "step": 20779 + }, + { + "epoch": 0.6101356509483822, + "grad_norm": 0.0, + "learning_rate": 6.967407148349696e-06, + "loss": 1.3369, + "step": 20780 + }, + { + "epoch": 0.6101650126255211, + "grad_norm": 0.0, + "learning_rate": 6.9665009842516004e-06, + "loss": 1.3447, + "step": 20781 + }, + { + "epoch": 0.6101943743026602, + "grad_norm": 0.0, + "learning_rate": 6.965594847586219e-06, + "loss": 1.416, + "step": 20782 + }, + { + "epoch": 0.6102237359797992, + "grad_norm": 0.0, + "learning_rate": 6.9646887383617364e-06, + "loss": 1.2046, + "step": 20783 + }, + { + "epoch": 0.6102530976569381, + "grad_norm": 0.0, + "learning_rate": 6.963782656586354e-06, + "loss": 1.3154, + "step": 20784 + }, + { + "epoch": 0.6102824593340772, + "grad_norm": 0.0, + "learning_rate": 6.962876602268258e-06, + "loss": 1.3398, + "step": 20785 + }, + { + "epoch": 0.6103118210112162, + "grad_norm": 0.0, + "learning_rate": 6.961970575415652e-06, + "loss": 1.0186, + "step": 20786 + }, + { + "epoch": 0.6103411826883551, + "grad_norm": 0.0, + "learning_rate": 6.961064576036721e-06, + "loss": 1.2109, + "step": 20787 + }, + { + "epoch": 0.6103705443654942, + "grad_norm": 0.0, + "learning_rate": 6.960158604139664e-06, + "loss": 1.2363, + "step": 20788 + }, + { + "epoch": 0.6103999060426332, + "grad_norm": 0.0, + "learning_rate": 6.9592526597326716e-06, + "loss": 1.1387, + "step": 20789 + }, + { + "epoch": 0.6104292677197721, + "grad_norm": 0.0, + "learning_rate": 6.958346742823931e-06, + "loss": 1.3174, + "step": 20790 + }, + { + "epoch": 0.6104586293969112, + "grad_norm": 0.0, + "learning_rate": 6.957440853421644e-06, + "loss": 1.2568, + "step": 20791 + }, + { + "epoch": 0.6104879910740502, + "grad_norm": 0.0, + "learning_rate": 6.9565349915339965e-06, + "loss": 1.2954, + "step": 20792 + }, + { + "epoch": 0.6105173527511891, + "grad_norm": 0.0, + "learning_rate": 6.955629157169186e-06, + "loss": 1.3477, + "step": 20793 + }, + { + "epoch": 0.6105467144283282, + "grad_norm": 0.0, + "learning_rate": 6.954723350335395e-06, + "loss": 1.2891, + "step": 20794 + }, + { + "epoch": 0.6105760761054672, + "grad_norm": 0.0, + "learning_rate": 6.953817571040825e-06, + "loss": 1.3779, + "step": 20795 + }, + { + "epoch": 0.6106054377826061, + "grad_norm": 0.0, + "learning_rate": 6.952911819293662e-06, + "loss": 1.3975, + "step": 20796 + }, + { + "epoch": 0.6106347994597452, + "grad_norm": 0.0, + "learning_rate": 6.952006095102098e-06, + "loss": 1.3721, + "step": 20797 + }, + { + "epoch": 0.6106641611368842, + "grad_norm": 0.0, + "learning_rate": 6.95110039847432e-06, + "loss": 1.2744, + "step": 20798 + }, + { + "epoch": 0.6106935228140231, + "grad_norm": 0.0, + "learning_rate": 6.950194729418529e-06, + "loss": 1.3584, + "step": 20799 + }, + { + "epoch": 0.6107228844911622, + "grad_norm": 0.0, + "learning_rate": 6.949289087942906e-06, + "loss": 1.3301, + "step": 20800 + }, + { + "epoch": 0.6107522461683011, + "grad_norm": 0.0, + "learning_rate": 6.948383474055638e-06, + "loss": 1.1875, + "step": 20801 + }, + { + "epoch": 0.6107816078454401, + "grad_norm": 0.0, + "learning_rate": 6.947477887764924e-06, + "loss": 1.3408, + "step": 20802 + }, + { + "epoch": 0.6108109695225792, + "grad_norm": 0.0, + "learning_rate": 6.946572329078948e-06, + "loss": 1.2319, + "step": 20803 + }, + { + "epoch": 0.6108403311997181, + "grad_norm": 0.0, + "learning_rate": 6.945666798005901e-06, + "loss": 1.2305, + "step": 20804 + }, + { + "epoch": 0.6108696928768571, + "grad_norm": 0.0, + "learning_rate": 6.944761294553969e-06, + "loss": 1.2612, + "step": 20805 + }, + { + "epoch": 0.6108990545539962, + "grad_norm": 0.0, + "learning_rate": 6.943855818731346e-06, + "loss": 1.2988, + "step": 20806 + }, + { + "epoch": 0.6109284162311351, + "grad_norm": 0.0, + "learning_rate": 6.9429503705462156e-06, + "loss": 1.3486, + "step": 20807 + }, + { + "epoch": 0.6109577779082741, + "grad_norm": 0.0, + "learning_rate": 6.94204495000677e-06, + "loss": 1.2734, + "step": 20808 + }, + { + "epoch": 0.6109871395854131, + "grad_norm": 0.0, + "learning_rate": 6.941139557121196e-06, + "loss": 1.3369, + "step": 20809 + }, + { + "epoch": 0.6110165012625521, + "grad_norm": 0.0, + "learning_rate": 6.9402341918976755e-06, + "loss": 1.166, + "step": 20810 + }, + { + "epoch": 0.6110458629396911, + "grad_norm": 0.0, + "learning_rate": 6.939328854344405e-06, + "loss": 1.2563, + "step": 20811 + }, + { + "epoch": 0.6110752246168301, + "grad_norm": 0.0, + "learning_rate": 6.938423544469566e-06, + "loss": 1.2773, + "step": 20812 + }, + { + "epoch": 0.6111045862939691, + "grad_norm": 0.0, + "learning_rate": 6.937518262281348e-06, + "loss": 1.3809, + "step": 20813 + }, + { + "epoch": 0.6111339479711081, + "grad_norm": 0.0, + "learning_rate": 6.936613007787934e-06, + "loss": 1.2754, + "step": 20814 + }, + { + "epoch": 0.611163309648247, + "grad_norm": 0.0, + "learning_rate": 6.935707780997516e-06, + "loss": 1.3164, + "step": 20815 + }, + { + "epoch": 0.6111926713253861, + "grad_norm": 0.0, + "learning_rate": 6.934802581918274e-06, + "loss": 1.1904, + "step": 20816 + }, + { + "epoch": 0.6112220330025251, + "grad_norm": 0.0, + "learning_rate": 6.933897410558401e-06, + "loss": 1.1797, + "step": 20817 + }, + { + "epoch": 0.611251394679664, + "grad_norm": 0.0, + "learning_rate": 6.9329922669260756e-06, + "loss": 1.3594, + "step": 20818 + }, + { + "epoch": 0.6112807563568031, + "grad_norm": 0.0, + "learning_rate": 6.932087151029489e-06, + "loss": 1.1938, + "step": 20819 + }, + { + "epoch": 0.6113101180339421, + "grad_norm": 0.0, + "learning_rate": 6.931182062876824e-06, + "loss": 1.2646, + "step": 20820 + }, + { + "epoch": 0.611339479711081, + "grad_norm": 0.0, + "learning_rate": 6.930277002476265e-06, + "loss": 1.2393, + "step": 20821 + }, + { + "epoch": 0.6113688413882201, + "grad_norm": 0.0, + "learning_rate": 6.929371969835998e-06, + "loss": 1.3418, + "step": 20822 + }, + { + "epoch": 0.6113982030653591, + "grad_norm": 0.0, + "learning_rate": 6.9284669649642045e-06, + "loss": 1.3008, + "step": 20823 + }, + { + "epoch": 0.611427564742498, + "grad_norm": 0.0, + "learning_rate": 6.927561987869076e-06, + "loss": 1.249, + "step": 20824 + }, + { + "epoch": 0.6114569264196371, + "grad_norm": 0.0, + "learning_rate": 6.926657038558784e-06, + "loss": 1.188, + "step": 20825 + }, + { + "epoch": 0.6114862880967761, + "grad_norm": 0.0, + "learning_rate": 6.925752117041525e-06, + "loss": 1.2461, + "step": 20826 + }, + { + "epoch": 0.611515649773915, + "grad_norm": 0.0, + "learning_rate": 6.924847223325474e-06, + "loss": 1.3242, + "step": 20827 + }, + { + "epoch": 0.6115450114510541, + "grad_norm": 0.0, + "learning_rate": 6.923942357418821e-06, + "loss": 1.1777, + "step": 20828 + }, + { + "epoch": 0.6115743731281931, + "grad_norm": 0.0, + "learning_rate": 6.923037519329743e-06, + "loss": 1.311, + "step": 20829 + }, + { + "epoch": 0.611603734805332, + "grad_norm": 0.0, + "learning_rate": 6.92213270906642e-06, + "loss": 1.1919, + "step": 20830 + }, + { + "epoch": 0.6116330964824711, + "grad_norm": 0.0, + "learning_rate": 6.921227926637044e-06, + "loss": 1.2666, + "step": 20831 + }, + { + "epoch": 0.6116624581596101, + "grad_norm": 0.0, + "learning_rate": 6.920323172049791e-06, + "loss": 1.3965, + "step": 20832 + }, + { + "epoch": 0.611691819836749, + "grad_norm": 0.0, + "learning_rate": 6.919418445312847e-06, + "loss": 1.3037, + "step": 20833 + }, + { + "epoch": 0.6117211815138881, + "grad_norm": 0.0, + "learning_rate": 6.918513746434385e-06, + "loss": 1.1631, + "step": 20834 + }, + { + "epoch": 0.6117505431910271, + "grad_norm": 0.0, + "learning_rate": 6.917609075422598e-06, + "loss": 1.3701, + "step": 20835 + }, + { + "epoch": 0.611779904868166, + "grad_norm": 0.0, + "learning_rate": 6.9167044322856595e-06, + "loss": 1.3091, + "step": 20836 + }, + { + "epoch": 0.6118092665453051, + "grad_norm": 0.0, + "learning_rate": 6.915799817031752e-06, + "loss": 1.2158, + "step": 20837 + }, + { + "epoch": 0.6118386282224441, + "grad_norm": 0.0, + "learning_rate": 6.914895229669059e-06, + "loss": 1.3174, + "step": 20838 + }, + { + "epoch": 0.611867989899583, + "grad_norm": 0.0, + "learning_rate": 6.913990670205755e-06, + "loss": 1.1846, + "step": 20839 + }, + { + "epoch": 0.6118973515767221, + "grad_norm": 0.0, + "learning_rate": 6.913086138650026e-06, + "loss": 1.4277, + "step": 20840 + }, + { + "epoch": 0.611926713253861, + "grad_norm": 0.0, + "learning_rate": 6.912181635010049e-06, + "loss": 1.2261, + "step": 20841 + }, + { + "epoch": 0.611956074931, + "grad_norm": 0.0, + "learning_rate": 6.911277159294005e-06, + "loss": 1.2695, + "step": 20842 + }, + { + "epoch": 0.6119854366081391, + "grad_norm": 0.0, + "learning_rate": 6.910372711510069e-06, + "loss": 1.333, + "step": 20843 + }, + { + "epoch": 0.612014798285278, + "grad_norm": 0.0, + "learning_rate": 6.909468291666428e-06, + "loss": 1.1216, + "step": 20844 + }, + { + "epoch": 0.612044159962417, + "grad_norm": 0.0, + "learning_rate": 6.908563899771255e-06, + "loss": 1.3369, + "step": 20845 + }, + { + "epoch": 0.6120735216395561, + "grad_norm": 0.0, + "learning_rate": 6.907659535832733e-06, + "loss": 1.1816, + "step": 20846 + }, + { + "epoch": 0.612102883316695, + "grad_norm": 0.0, + "learning_rate": 6.906755199859032e-06, + "loss": 1.3125, + "step": 20847 + }, + { + "epoch": 0.612132244993834, + "grad_norm": 0.0, + "learning_rate": 6.905850891858341e-06, + "loss": 1.2109, + "step": 20848 + }, + { + "epoch": 0.6121616066709731, + "grad_norm": 0.0, + "learning_rate": 6.904946611838835e-06, + "loss": 1.2744, + "step": 20849 + }, + { + "epoch": 0.612190968348112, + "grad_norm": 0.0, + "learning_rate": 6.904042359808682e-06, + "loss": 1.3838, + "step": 20850 + }, + { + "epoch": 0.612220330025251, + "grad_norm": 0.0, + "learning_rate": 6.903138135776073e-06, + "loss": 1.2061, + "step": 20851 + }, + { + "epoch": 0.6122496917023901, + "grad_norm": 0.0, + "learning_rate": 6.902233939749177e-06, + "loss": 1.3154, + "step": 20852 + }, + { + "epoch": 0.612279053379529, + "grad_norm": 0.0, + "learning_rate": 6.901329771736174e-06, + "loss": 1.2666, + "step": 20853 + }, + { + "epoch": 0.612308415056668, + "grad_norm": 0.0, + "learning_rate": 6.900425631745234e-06, + "loss": 1.293, + "step": 20854 + }, + { + "epoch": 0.6123377767338071, + "grad_norm": 0.0, + "learning_rate": 6.899521519784544e-06, + "loss": 1.3242, + "step": 20855 + }, + { + "epoch": 0.612367138410946, + "grad_norm": 0.0, + "learning_rate": 6.898617435862274e-06, + "loss": 1.1216, + "step": 20856 + }, + { + "epoch": 0.612396500088085, + "grad_norm": 0.0, + "learning_rate": 6.8977133799866015e-06, + "loss": 1.2358, + "step": 20857 + }, + { + "epoch": 0.6124258617652241, + "grad_norm": 0.0, + "learning_rate": 6.896809352165701e-06, + "loss": 1.2207, + "step": 20858 + }, + { + "epoch": 0.612455223442363, + "grad_norm": 0.0, + "learning_rate": 6.895905352407746e-06, + "loss": 1.2764, + "step": 20859 + }, + { + "epoch": 0.612484585119502, + "grad_norm": 0.0, + "learning_rate": 6.895001380720916e-06, + "loss": 1.1416, + "step": 20860 + }, + { + "epoch": 0.6125139467966411, + "grad_norm": 0.0, + "learning_rate": 6.894097437113383e-06, + "loss": 1.2559, + "step": 20861 + }, + { + "epoch": 0.61254330847378, + "grad_norm": 0.0, + "learning_rate": 6.893193521593325e-06, + "loss": 1.333, + "step": 20862 + }, + { + "epoch": 0.612572670150919, + "grad_norm": 0.0, + "learning_rate": 6.892289634168907e-06, + "loss": 1.0791, + "step": 20863 + }, + { + "epoch": 0.6126020318280581, + "grad_norm": 0.0, + "learning_rate": 6.891385774848316e-06, + "loss": 1.2329, + "step": 20864 + }, + { + "epoch": 0.612631393505197, + "grad_norm": 0.0, + "learning_rate": 6.890481943639716e-06, + "loss": 1.29, + "step": 20865 + }, + { + "epoch": 0.612660755182336, + "grad_norm": 0.0, + "learning_rate": 6.889578140551287e-06, + "loss": 1.3828, + "step": 20866 + }, + { + "epoch": 0.612690116859475, + "grad_norm": 0.0, + "learning_rate": 6.888674365591196e-06, + "loss": 1.1904, + "step": 20867 + }, + { + "epoch": 0.612719478536614, + "grad_norm": 0.0, + "learning_rate": 6.887770618767623e-06, + "loss": 1.1675, + "step": 20868 + }, + { + "epoch": 0.612748840213753, + "grad_norm": 0.0, + "learning_rate": 6.886866900088738e-06, + "loss": 1.2637, + "step": 20869 + }, + { + "epoch": 0.612778201890892, + "grad_norm": 0.0, + "learning_rate": 6.885963209562709e-06, + "loss": 1.2969, + "step": 20870 + }, + { + "epoch": 0.612807563568031, + "grad_norm": 0.0, + "learning_rate": 6.885059547197716e-06, + "loss": 1.2246, + "step": 20871 + }, + { + "epoch": 0.61283692524517, + "grad_norm": 0.0, + "learning_rate": 6.884155913001923e-06, + "loss": 1.2148, + "step": 20872 + }, + { + "epoch": 0.612866286922309, + "grad_norm": 0.0, + "learning_rate": 6.88325230698351e-06, + "loss": 1.417, + "step": 20873 + }, + { + "epoch": 0.612895648599448, + "grad_norm": 0.0, + "learning_rate": 6.882348729150642e-06, + "loss": 1.3203, + "step": 20874 + }, + { + "epoch": 0.612925010276587, + "grad_norm": 0.0, + "learning_rate": 6.881445179511496e-06, + "loss": 1.2959, + "step": 20875 + }, + { + "epoch": 0.612954371953726, + "grad_norm": 0.0, + "learning_rate": 6.880541658074237e-06, + "loss": 1.2759, + "step": 20876 + }, + { + "epoch": 0.612983733630865, + "grad_norm": 0.0, + "learning_rate": 6.87963816484704e-06, + "loss": 1.3105, + "step": 20877 + }, + { + "epoch": 0.613013095308004, + "grad_norm": 0.0, + "learning_rate": 6.878734699838075e-06, + "loss": 1.2539, + "step": 20878 + }, + { + "epoch": 0.613042456985143, + "grad_norm": 0.0, + "learning_rate": 6.877831263055508e-06, + "loss": 1.2715, + "step": 20879 + }, + { + "epoch": 0.613071818662282, + "grad_norm": 0.0, + "learning_rate": 6.876927854507516e-06, + "loss": 1.3271, + "step": 20880 + }, + { + "epoch": 0.613101180339421, + "grad_norm": 0.0, + "learning_rate": 6.876024474202263e-06, + "loss": 1.2559, + "step": 20881 + }, + { + "epoch": 0.61313054201656, + "grad_norm": 0.0, + "learning_rate": 6.875121122147923e-06, + "loss": 1.0796, + "step": 20882 + }, + { + "epoch": 0.613159903693699, + "grad_norm": 0.0, + "learning_rate": 6.874217798352659e-06, + "loss": 1.2822, + "step": 20883 + }, + { + "epoch": 0.613189265370838, + "grad_norm": 0.0, + "learning_rate": 6.873314502824649e-06, + "loss": 1.3281, + "step": 20884 + }, + { + "epoch": 0.613218627047977, + "grad_norm": 0.0, + "learning_rate": 6.872411235572054e-06, + "loss": 1.2153, + "step": 20885 + }, + { + "epoch": 0.613247988725116, + "grad_norm": 0.0, + "learning_rate": 6.871507996603046e-06, + "loss": 1.2578, + "step": 20886 + }, + { + "epoch": 0.613277350402255, + "grad_norm": 0.0, + "learning_rate": 6.870604785925794e-06, + "loss": 1.2051, + "step": 20887 + }, + { + "epoch": 0.613306712079394, + "grad_norm": 0.0, + "learning_rate": 6.86970160354846e-06, + "loss": 1.2979, + "step": 20888 + }, + { + "epoch": 0.613336073756533, + "grad_norm": 0.0, + "learning_rate": 6.868798449479219e-06, + "loss": 1.2861, + "step": 20889 + }, + { + "epoch": 0.613365435433672, + "grad_norm": 0.0, + "learning_rate": 6.867895323726235e-06, + "loss": 1.1592, + "step": 20890 + }, + { + "epoch": 0.613394797110811, + "grad_norm": 0.0, + "learning_rate": 6.866992226297676e-06, + "loss": 1.3926, + "step": 20891 + }, + { + "epoch": 0.61342415878795, + "grad_norm": 0.0, + "learning_rate": 6.866089157201705e-06, + "loss": 1.1846, + "step": 20892 + }, + { + "epoch": 0.6134535204650889, + "grad_norm": 0.0, + "learning_rate": 6.865186116446498e-06, + "loss": 1.2744, + "step": 20893 + }, + { + "epoch": 0.613482882142228, + "grad_norm": 0.0, + "learning_rate": 6.864283104040212e-06, + "loss": 1.1377, + "step": 20894 + }, + { + "epoch": 0.613512243819367, + "grad_norm": 0.0, + "learning_rate": 6.863380119991021e-06, + "loss": 1.2783, + "step": 20895 + }, + { + "epoch": 0.6135416054965059, + "grad_norm": 0.0, + "learning_rate": 6.8624771643070805e-06, + "loss": 1.3545, + "step": 20896 + }, + { + "epoch": 0.613570967173645, + "grad_norm": 0.0, + "learning_rate": 6.861574236996569e-06, + "loss": 1.2627, + "step": 20897 + }, + { + "epoch": 0.613600328850784, + "grad_norm": 0.0, + "learning_rate": 6.8606713380676434e-06, + "loss": 1.2812, + "step": 20898 + }, + { + "epoch": 0.6136296905279229, + "grad_norm": 0.0, + "learning_rate": 6.859768467528469e-06, + "loss": 1.2139, + "step": 20899 + }, + { + "epoch": 0.613659052205062, + "grad_norm": 0.0, + "learning_rate": 6.858865625387217e-06, + "loss": 1.3506, + "step": 20900 + }, + { + "epoch": 0.613688413882201, + "grad_norm": 0.0, + "learning_rate": 6.8579628116520435e-06, + "loss": 1.3701, + "step": 20901 + }, + { + "epoch": 0.6137177755593399, + "grad_norm": 0.0, + "learning_rate": 6.857060026331119e-06, + "loss": 1.4189, + "step": 20902 + }, + { + "epoch": 0.613747137236479, + "grad_norm": 0.0, + "learning_rate": 6.856157269432601e-06, + "loss": 1.3076, + "step": 20903 + }, + { + "epoch": 0.613776498913618, + "grad_norm": 0.0, + "learning_rate": 6.855254540964663e-06, + "loss": 1.2988, + "step": 20904 + }, + { + "epoch": 0.6138058605907569, + "grad_norm": 0.0, + "learning_rate": 6.854351840935463e-06, + "loss": 1.2935, + "step": 20905 + }, + { + "epoch": 0.613835222267896, + "grad_norm": 0.0, + "learning_rate": 6.853449169353164e-06, + "loss": 1.3164, + "step": 20906 + }, + { + "epoch": 0.613864583945035, + "grad_norm": 0.0, + "learning_rate": 6.852546526225931e-06, + "loss": 1.2236, + "step": 20907 + }, + { + "epoch": 0.6138939456221739, + "grad_norm": 0.0, + "learning_rate": 6.851643911561923e-06, + "loss": 1.3809, + "step": 20908 + }, + { + "epoch": 0.6139233072993129, + "grad_norm": 0.0, + "learning_rate": 6.850741325369307e-06, + "loss": 1.1963, + "step": 20909 + }, + { + "epoch": 0.613952668976452, + "grad_norm": 0.0, + "learning_rate": 6.849838767656245e-06, + "loss": 1.2197, + "step": 20910 + }, + { + "epoch": 0.6139820306535909, + "grad_norm": 0.0, + "learning_rate": 6.848936238430898e-06, + "loss": 1.312, + "step": 20911 + }, + { + "epoch": 0.6140113923307299, + "grad_norm": 0.0, + "learning_rate": 6.848033737701423e-06, + "loss": 1.291, + "step": 20912 + }, + { + "epoch": 0.614040754007869, + "grad_norm": 0.0, + "learning_rate": 6.847131265475991e-06, + "loss": 1.4248, + "step": 20913 + }, + { + "epoch": 0.6140701156850079, + "grad_norm": 0.0, + "learning_rate": 6.8462288217627574e-06, + "loss": 1.1465, + "step": 20914 + }, + { + "epoch": 0.6140994773621469, + "grad_norm": 0.0, + "learning_rate": 6.845326406569884e-06, + "loss": 1.249, + "step": 20915 + }, + { + "epoch": 0.614128839039286, + "grad_norm": 0.0, + "learning_rate": 6.844424019905529e-06, + "loss": 1.2734, + "step": 20916 + }, + { + "epoch": 0.6141582007164249, + "grad_norm": 0.0, + "learning_rate": 6.84352166177786e-06, + "loss": 1.2734, + "step": 20917 + }, + { + "epoch": 0.6141875623935639, + "grad_norm": 0.0, + "learning_rate": 6.842619332195033e-06, + "loss": 1.3877, + "step": 20918 + }, + { + "epoch": 0.6142169240707029, + "grad_norm": 0.0, + "learning_rate": 6.841717031165205e-06, + "loss": 1.3066, + "step": 20919 + }, + { + "epoch": 0.6142462857478419, + "grad_norm": 0.0, + "learning_rate": 6.840814758696542e-06, + "loss": 1.2891, + "step": 20920 + }, + { + "epoch": 0.6142756474249809, + "grad_norm": 0.0, + "learning_rate": 6.839912514797196e-06, + "loss": 1.3711, + "step": 20921 + }, + { + "epoch": 0.6143050091021199, + "grad_norm": 0.0, + "learning_rate": 6.839010299475334e-06, + "loss": 1.3101, + "step": 20922 + }, + { + "epoch": 0.6143343707792589, + "grad_norm": 0.0, + "learning_rate": 6.8381081127391095e-06, + "loss": 1.1899, + "step": 20923 + }, + { + "epoch": 0.6143637324563979, + "grad_norm": 0.0, + "learning_rate": 6.837205954596684e-06, + "loss": 1.2041, + "step": 20924 + }, + { + "epoch": 0.6143930941335369, + "grad_norm": 0.0, + "learning_rate": 6.836303825056213e-06, + "loss": 1.3057, + "step": 20925 + }, + { + "epoch": 0.6144224558106759, + "grad_norm": 0.0, + "learning_rate": 6.835401724125862e-06, + "loss": 1.3096, + "step": 20926 + }, + { + "epoch": 0.6144518174878149, + "grad_norm": 0.0, + "learning_rate": 6.834499651813782e-06, + "loss": 1.2871, + "step": 20927 + }, + { + "epoch": 0.6144811791649539, + "grad_norm": 0.0, + "learning_rate": 6.833597608128127e-06, + "loss": 1.2637, + "step": 20928 + }, + { + "epoch": 0.6145105408420929, + "grad_norm": 0.0, + "learning_rate": 6.832695593077066e-06, + "loss": 1.2974, + "step": 20929 + }, + { + "epoch": 0.6145399025192319, + "grad_norm": 0.0, + "learning_rate": 6.831793606668746e-06, + "loss": 1.1504, + "step": 20930 + }, + { + "epoch": 0.6145692641963709, + "grad_norm": 0.0, + "learning_rate": 6.830891648911331e-06, + "loss": 1.2793, + "step": 20931 + }, + { + "epoch": 0.6145986258735099, + "grad_norm": 0.0, + "learning_rate": 6.829989719812968e-06, + "loss": 1.3105, + "step": 20932 + }, + { + "epoch": 0.6146279875506488, + "grad_norm": 0.0, + "learning_rate": 6.829087819381827e-06, + "loss": 1.2417, + "step": 20933 + }, + { + "epoch": 0.6146573492277879, + "grad_norm": 0.0, + "learning_rate": 6.828185947626054e-06, + "loss": 1.2969, + "step": 20934 + }, + { + "epoch": 0.6146867109049269, + "grad_norm": 0.0, + "learning_rate": 6.8272841045538085e-06, + "loss": 1.3154, + "step": 20935 + }, + { + "epoch": 0.6147160725820658, + "grad_norm": 0.0, + "learning_rate": 6.826382290173242e-06, + "loss": 1.2979, + "step": 20936 + }, + { + "epoch": 0.6147454342592049, + "grad_norm": 0.0, + "learning_rate": 6.825480504492518e-06, + "loss": 1.291, + "step": 20937 + }, + { + "epoch": 0.6147747959363439, + "grad_norm": 0.0, + "learning_rate": 6.824578747519786e-06, + "loss": 1.2651, + "step": 20938 + }, + { + "epoch": 0.6148041576134828, + "grad_norm": 0.0, + "learning_rate": 6.8236770192631996e-06, + "loss": 1.2471, + "step": 20939 + }, + { + "epoch": 0.6148335192906219, + "grad_norm": 0.0, + "learning_rate": 6.822775319730918e-06, + "loss": 1.4277, + "step": 20940 + }, + { + "epoch": 0.6148628809677609, + "grad_norm": 0.0, + "learning_rate": 6.821873648931089e-06, + "loss": 1.1982, + "step": 20941 + }, + { + "epoch": 0.6148922426448998, + "grad_norm": 0.0, + "learning_rate": 6.820972006871873e-06, + "loss": 1.2949, + "step": 20942 + }, + { + "epoch": 0.6149216043220389, + "grad_norm": 0.0, + "learning_rate": 6.820070393561421e-06, + "loss": 1.1094, + "step": 20943 + }, + { + "epoch": 0.6149509659991779, + "grad_norm": 0.0, + "learning_rate": 6.819168809007888e-06, + "loss": 1.1216, + "step": 20944 + }, + { + "epoch": 0.6149803276763168, + "grad_norm": 0.0, + "learning_rate": 6.818267253219422e-06, + "loss": 1.1982, + "step": 20945 + }, + { + "epoch": 0.6150096893534559, + "grad_norm": 0.0, + "learning_rate": 6.817365726204186e-06, + "loss": 1.2803, + "step": 20946 + }, + { + "epoch": 0.6150390510305949, + "grad_norm": 0.0, + "learning_rate": 6.816464227970325e-06, + "loss": 1.3457, + "step": 20947 + }, + { + "epoch": 0.6150684127077338, + "grad_norm": 0.0, + "learning_rate": 6.815562758525991e-06, + "loss": 1.271, + "step": 20948 + }, + { + "epoch": 0.6150977743848729, + "grad_norm": 0.0, + "learning_rate": 6.814661317879341e-06, + "loss": 1.4189, + "step": 20949 + }, + { + "epoch": 0.6151271360620119, + "grad_norm": 0.0, + "learning_rate": 6.8137599060385204e-06, + "loss": 1.2437, + "step": 20950 + }, + { + "epoch": 0.6151564977391508, + "grad_norm": 0.0, + "learning_rate": 6.812858523011692e-06, + "loss": 1.1699, + "step": 20951 + }, + { + "epoch": 0.6151858594162899, + "grad_norm": 0.0, + "learning_rate": 6.811957168806993e-06, + "loss": 1.252, + "step": 20952 + }, + { + "epoch": 0.6152152210934289, + "grad_norm": 0.0, + "learning_rate": 6.8110558434325856e-06, + "loss": 1.124, + "step": 20953 + }, + { + "epoch": 0.6152445827705678, + "grad_norm": 0.0, + "learning_rate": 6.810154546896615e-06, + "loss": 1.2402, + "step": 20954 + }, + { + "epoch": 0.6152739444477069, + "grad_norm": 0.0, + "learning_rate": 6.809253279207237e-06, + "loss": 1.2461, + "step": 20955 + }, + { + "epoch": 0.6153033061248459, + "grad_norm": 0.0, + "learning_rate": 6.8083520403725975e-06, + "loss": 1.0718, + "step": 20956 + }, + { + "epoch": 0.6153326678019848, + "grad_norm": 0.0, + "learning_rate": 6.807450830400845e-06, + "loss": 1.2832, + "step": 20957 + }, + { + "epoch": 0.6153620294791239, + "grad_norm": 0.0, + "learning_rate": 6.806549649300136e-06, + "loss": 1.2969, + "step": 20958 + }, + { + "epoch": 0.6153913911562628, + "grad_norm": 0.0, + "learning_rate": 6.805648497078613e-06, + "loss": 1.2808, + "step": 20959 + }, + { + "epoch": 0.6154207528334018, + "grad_norm": 0.0, + "learning_rate": 6.804747373744431e-06, + "loss": 1.3398, + "step": 20960 + }, + { + "epoch": 0.6154501145105409, + "grad_norm": 0.0, + "learning_rate": 6.803846279305735e-06, + "loss": 1.3076, + "step": 20961 + }, + { + "epoch": 0.6154794761876798, + "grad_norm": 0.0, + "learning_rate": 6.802945213770677e-06, + "loss": 1.3662, + "step": 20962 + }, + { + "epoch": 0.6155088378648188, + "grad_norm": 0.0, + "learning_rate": 6.802044177147405e-06, + "loss": 1.27, + "step": 20963 + }, + { + "epoch": 0.6155381995419579, + "grad_norm": 0.0, + "learning_rate": 6.8011431694440665e-06, + "loss": 1.3193, + "step": 20964 + }, + { + "epoch": 0.6155675612190968, + "grad_norm": 0.0, + "learning_rate": 6.800242190668805e-06, + "loss": 1.3076, + "step": 20965 + }, + { + "epoch": 0.6155969228962358, + "grad_norm": 0.0, + "learning_rate": 6.799341240829778e-06, + "loss": 1.3115, + "step": 20966 + }, + { + "epoch": 0.6156262845733749, + "grad_norm": 0.0, + "learning_rate": 6.798440319935128e-06, + "loss": 1.2891, + "step": 20967 + }, + { + "epoch": 0.6156556462505138, + "grad_norm": 0.0, + "learning_rate": 6.797539427992999e-06, + "loss": 1.2783, + "step": 20968 + }, + { + "epoch": 0.6156850079276528, + "grad_norm": 0.0, + "learning_rate": 6.796638565011544e-06, + "loss": 1.0508, + "step": 20969 + }, + { + "epoch": 0.6157143696047919, + "grad_norm": 0.0, + "learning_rate": 6.795737730998904e-06, + "loss": 1.2432, + "step": 20970 + }, + { + "epoch": 0.6157437312819308, + "grad_norm": 0.0, + "learning_rate": 6.79483692596323e-06, + "loss": 1.249, + "step": 20971 + }, + { + "epoch": 0.6157730929590698, + "grad_norm": 0.0, + "learning_rate": 6.793936149912665e-06, + "loss": 1.3271, + "step": 20972 + }, + { + "epoch": 0.6158024546362089, + "grad_norm": 0.0, + "learning_rate": 6.793035402855359e-06, + "loss": 1.3613, + "step": 20973 + }, + { + "epoch": 0.6158318163133478, + "grad_norm": 0.0, + "learning_rate": 6.7921346847994515e-06, + "loss": 1.2715, + "step": 20974 + }, + { + "epoch": 0.6158611779904868, + "grad_norm": 0.0, + "learning_rate": 6.791233995753096e-06, + "loss": 1.2485, + "step": 20975 + }, + { + "epoch": 0.6158905396676259, + "grad_norm": 0.0, + "learning_rate": 6.790333335724435e-06, + "loss": 1.1182, + "step": 20976 + }, + { + "epoch": 0.6159199013447648, + "grad_norm": 0.0, + "learning_rate": 6.789432704721605e-06, + "loss": 1.3379, + "step": 20977 + }, + { + "epoch": 0.6159492630219038, + "grad_norm": 0.0, + "learning_rate": 6.788532102752761e-06, + "loss": 1.1094, + "step": 20978 + }, + { + "epoch": 0.6159786246990429, + "grad_norm": 0.0, + "learning_rate": 6.787631529826042e-06, + "loss": 1.3145, + "step": 20979 + }, + { + "epoch": 0.6160079863761818, + "grad_norm": 0.0, + "learning_rate": 6.786730985949597e-06, + "loss": 1.27, + "step": 20980 + }, + { + "epoch": 0.6160373480533208, + "grad_norm": 0.0, + "learning_rate": 6.785830471131561e-06, + "loss": 1.2803, + "step": 20981 + }, + { + "epoch": 0.6160667097304598, + "grad_norm": 0.0, + "learning_rate": 6.784929985380087e-06, + "loss": 1.1924, + "step": 20982 + }, + { + "epoch": 0.6160960714075988, + "grad_norm": 0.0, + "learning_rate": 6.7840295287033145e-06, + "loss": 1.2881, + "step": 20983 + }, + { + "epoch": 0.6161254330847378, + "grad_norm": 0.0, + "learning_rate": 6.783129101109387e-06, + "loss": 1.2388, + "step": 20984 + }, + { + "epoch": 0.6161547947618768, + "grad_norm": 0.0, + "learning_rate": 6.782228702606444e-06, + "loss": 1.1265, + "step": 20985 + }, + { + "epoch": 0.6161841564390158, + "grad_norm": 0.0, + "learning_rate": 6.781328333202634e-06, + "loss": 1.292, + "step": 20986 + }, + { + "epoch": 0.6162135181161548, + "grad_norm": 0.0, + "learning_rate": 6.780427992906096e-06, + "loss": 1.272, + "step": 20987 + }, + { + "epoch": 0.6162428797932938, + "grad_norm": 0.0, + "learning_rate": 6.779527681724971e-06, + "loss": 1.2422, + "step": 20988 + }, + { + "epoch": 0.6162722414704328, + "grad_norm": 0.0, + "learning_rate": 6.778627399667403e-06, + "loss": 1.3711, + "step": 20989 + }, + { + "epoch": 0.6163016031475718, + "grad_norm": 0.0, + "learning_rate": 6.777727146741529e-06, + "loss": 1.1689, + "step": 20990 + }, + { + "epoch": 0.6163309648247108, + "grad_norm": 0.0, + "learning_rate": 6.776826922955497e-06, + "loss": 1.2754, + "step": 20991 + }, + { + "epoch": 0.6163603265018498, + "grad_norm": 0.0, + "learning_rate": 6.775926728317444e-06, + "loss": 1.2559, + "step": 20992 + }, + { + "epoch": 0.6163896881789888, + "grad_norm": 0.0, + "learning_rate": 6.775026562835512e-06, + "loss": 1.1953, + "step": 20993 + }, + { + "epoch": 0.6164190498561278, + "grad_norm": 0.0, + "learning_rate": 6.774126426517838e-06, + "loss": 1.2676, + "step": 20994 + }, + { + "epoch": 0.6164484115332668, + "grad_norm": 0.0, + "learning_rate": 6.7732263193725675e-06, + "loss": 1.3311, + "step": 20995 + }, + { + "epoch": 0.6164777732104058, + "grad_norm": 0.0, + "learning_rate": 6.77232624140784e-06, + "loss": 1.3438, + "step": 20996 + }, + { + "epoch": 0.6165071348875448, + "grad_norm": 0.0, + "learning_rate": 6.771426192631789e-06, + "loss": 1.3066, + "step": 20997 + }, + { + "epoch": 0.6165364965646838, + "grad_norm": 0.0, + "learning_rate": 6.770526173052561e-06, + "loss": 1.3633, + "step": 20998 + }, + { + "epoch": 0.6165658582418228, + "grad_norm": 0.0, + "learning_rate": 6.769626182678286e-06, + "loss": 1.3101, + "step": 20999 + }, + { + "epoch": 0.6165952199189618, + "grad_norm": 0.0, + "learning_rate": 6.768726221517115e-06, + "loss": 1.231, + "step": 21000 + }, + { + "epoch": 0.6166245815961008, + "grad_norm": 0.0, + "learning_rate": 6.767826289577175e-06, + "loss": 1.3066, + "step": 21001 + }, + { + "epoch": 0.6166539432732397, + "grad_norm": 0.0, + "learning_rate": 6.766926386866615e-06, + "loss": 1.3267, + "step": 21002 + }, + { + "epoch": 0.6166833049503788, + "grad_norm": 0.0, + "learning_rate": 6.766026513393562e-06, + "loss": 1.3076, + "step": 21003 + }, + { + "epoch": 0.6167126666275178, + "grad_norm": 0.0, + "learning_rate": 6.765126669166167e-06, + "loss": 1.188, + "step": 21004 + }, + { + "epoch": 0.6167420283046567, + "grad_norm": 0.0, + "learning_rate": 6.764226854192552e-06, + "loss": 1.2871, + "step": 21005 + }, + { + "epoch": 0.6167713899817958, + "grad_norm": 0.0, + "learning_rate": 6.763327068480867e-06, + "loss": 1.3418, + "step": 21006 + }, + { + "epoch": 0.6168007516589348, + "grad_norm": 0.0, + "learning_rate": 6.7624273120392445e-06, + "loss": 1.2046, + "step": 21007 + }, + { + "epoch": 0.6168301133360737, + "grad_norm": 0.0, + "learning_rate": 6.761527584875819e-06, + "loss": 1.3066, + "step": 21008 + }, + { + "epoch": 0.6168594750132128, + "grad_norm": 0.0, + "learning_rate": 6.7606278869987305e-06, + "loss": 1.2915, + "step": 21009 + }, + { + "epoch": 0.6168888366903518, + "grad_norm": 0.0, + "learning_rate": 6.75972821841611e-06, + "loss": 1.2432, + "step": 21010 + }, + { + "epoch": 0.6169181983674907, + "grad_norm": 0.0, + "learning_rate": 6.7588285791361e-06, + "loss": 1.3027, + "step": 21011 + }, + { + "epoch": 0.6169475600446297, + "grad_norm": 0.0, + "learning_rate": 6.757928969166834e-06, + "loss": 1.1406, + "step": 21012 + }, + { + "epoch": 0.6169769217217688, + "grad_norm": 0.0, + "learning_rate": 6.757029388516447e-06, + "loss": 1.3545, + "step": 21013 + }, + { + "epoch": 0.6170062833989077, + "grad_norm": 0.0, + "learning_rate": 6.756129837193069e-06, + "loss": 1.2812, + "step": 21014 + }, + { + "epoch": 0.6170356450760467, + "grad_norm": 0.0, + "learning_rate": 6.755230315204844e-06, + "loss": 1.3027, + "step": 21015 + }, + { + "epoch": 0.6170650067531858, + "grad_norm": 0.0, + "learning_rate": 6.754330822559902e-06, + "loss": 1.2969, + "step": 21016 + }, + { + "epoch": 0.6170943684303247, + "grad_norm": 0.0, + "learning_rate": 6.753431359266378e-06, + "loss": 1.2461, + "step": 21017 + }, + { + "epoch": 0.6171237301074637, + "grad_norm": 0.0, + "learning_rate": 6.752531925332405e-06, + "loss": 1.3213, + "step": 21018 + }, + { + "epoch": 0.6171530917846028, + "grad_norm": 0.0, + "learning_rate": 6.7516325207661134e-06, + "loss": 1.2627, + "step": 21019 + }, + { + "epoch": 0.6171824534617417, + "grad_norm": 0.0, + "learning_rate": 6.750733145575648e-06, + "loss": 1.2432, + "step": 21020 + }, + { + "epoch": 0.6172118151388807, + "grad_norm": 0.0, + "learning_rate": 6.74983379976913e-06, + "loss": 1.4316, + "step": 21021 + }, + { + "epoch": 0.6172411768160198, + "grad_norm": 0.0, + "learning_rate": 6.748934483354699e-06, + "loss": 1.3857, + "step": 21022 + }, + { + "epoch": 0.6172705384931587, + "grad_norm": 0.0, + "learning_rate": 6.748035196340483e-06, + "loss": 1.1445, + "step": 21023 + }, + { + "epoch": 0.6172999001702977, + "grad_norm": 0.0, + "learning_rate": 6.747135938734622e-06, + "loss": 1.3174, + "step": 21024 + }, + { + "epoch": 0.6173292618474367, + "grad_norm": 0.0, + "learning_rate": 6.746236710545244e-06, + "loss": 1.2559, + "step": 21025 + }, + { + "epoch": 0.6173586235245757, + "grad_norm": 0.0, + "learning_rate": 6.745337511780479e-06, + "loss": 1.3311, + "step": 21026 + }, + { + "epoch": 0.6173879852017147, + "grad_norm": 0.0, + "learning_rate": 6.7444383424484605e-06, + "loss": 1.2715, + "step": 21027 + }, + { + "epoch": 0.6174173468788537, + "grad_norm": 0.0, + "learning_rate": 6.743539202557317e-06, + "loss": 1.2959, + "step": 21028 + }, + { + "epoch": 0.6174467085559927, + "grad_norm": 0.0, + "learning_rate": 6.7426400921151895e-06, + "loss": 1.3193, + "step": 21029 + }, + { + "epoch": 0.6174760702331317, + "grad_norm": 0.0, + "learning_rate": 6.741741011130194e-06, + "loss": 1.3496, + "step": 21030 + }, + { + "epoch": 0.6175054319102707, + "grad_norm": 0.0, + "learning_rate": 6.740841959610474e-06, + "loss": 1.3477, + "step": 21031 + }, + { + "epoch": 0.6175347935874097, + "grad_norm": 0.0, + "learning_rate": 6.739942937564153e-06, + "loss": 1.2266, + "step": 21032 + }, + { + "epoch": 0.6175641552645487, + "grad_norm": 0.0, + "learning_rate": 6.739043944999364e-06, + "loss": 1.3262, + "step": 21033 + }, + { + "epoch": 0.6175935169416877, + "grad_norm": 0.0, + "learning_rate": 6.738144981924232e-06, + "loss": 1.2725, + "step": 21034 + }, + { + "epoch": 0.6176228786188267, + "grad_norm": 0.0, + "learning_rate": 6.7372460483468945e-06, + "loss": 1.2568, + "step": 21035 + }, + { + "epoch": 0.6176522402959657, + "grad_norm": 0.0, + "learning_rate": 6.736347144275477e-06, + "loss": 1.2119, + "step": 21036 + }, + { + "epoch": 0.6176816019731047, + "grad_norm": 0.0, + "learning_rate": 6.735448269718106e-06, + "loss": 1.3066, + "step": 21037 + }, + { + "epoch": 0.6177109636502437, + "grad_norm": 0.0, + "learning_rate": 6.734549424682914e-06, + "loss": 1.2148, + "step": 21038 + }, + { + "epoch": 0.6177403253273827, + "grad_norm": 0.0, + "learning_rate": 6.733650609178024e-06, + "loss": 1.377, + "step": 21039 + }, + { + "epoch": 0.6177696870045217, + "grad_norm": 0.0, + "learning_rate": 6.7327518232115714e-06, + "loss": 1.3623, + "step": 21040 + }, + { + "epoch": 0.6177990486816607, + "grad_norm": 0.0, + "learning_rate": 6.731853066791679e-06, + "loss": 1.3984, + "step": 21041 + }, + { + "epoch": 0.6178284103587997, + "grad_norm": 0.0, + "learning_rate": 6.730954339926479e-06, + "loss": 1.2598, + "step": 21042 + }, + { + "epoch": 0.6178577720359387, + "grad_norm": 0.0, + "learning_rate": 6.730055642624091e-06, + "loss": 1.083, + "step": 21043 + }, + { + "epoch": 0.6178871337130777, + "grad_norm": 0.0, + "learning_rate": 6.729156974892653e-06, + "loss": 1.2539, + "step": 21044 + }, + { + "epoch": 0.6179164953902166, + "grad_norm": 0.0, + "learning_rate": 6.728258336740286e-06, + "loss": 1.1777, + "step": 21045 + }, + { + "epoch": 0.6179458570673557, + "grad_norm": 0.0, + "learning_rate": 6.727359728175114e-06, + "loss": 1.2568, + "step": 21046 + }, + { + "epoch": 0.6179752187444947, + "grad_norm": 0.0, + "learning_rate": 6.726461149205268e-06, + "loss": 1.1982, + "step": 21047 + }, + { + "epoch": 0.6180045804216336, + "grad_norm": 0.0, + "learning_rate": 6.725562599838868e-06, + "loss": 1.292, + "step": 21048 + }, + { + "epoch": 0.6180339420987727, + "grad_norm": 0.0, + "learning_rate": 6.724664080084049e-06, + "loss": 1.2246, + "step": 21049 + }, + { + "epoch": 0.6180633037759117, + "grad_norm": 0.0, + "learning_rate": 6.723765589948929e-06, + "loss": 1.2539, + "step": 21050 + }, + { + "epoch": 0.6180926654530506, + "grad_norm": 0.0, + "learning_rate": 6.722867129441637e-06, + "loss": 1.335, + "step": 21051 + }, + { + "epoch": 0.6181220271301897, + "grad_norm": 0.0, + "learning_rate": 6.721968698570294e-06, + "loss": 1.3105, + "step": 21052 + }, + { + "epoch": 0.6181513888073287, + "grad_norm": 0.0, + "learning_rate": 6.721070297343031e-06, + "loss": 1.1743, + "step": 21053 + }, + { + "epoch": 0.6181807504844676, + "grad_norm": 0.0, + "learning_rate": 6.720171925767967e-06, + "loss": 1.2754, + "step": 21054 + }, + { + "epoch": 0.6182101121616067, + "grad_norm": 0.0, + "learning_rate": 6.7192735838532305e-06, + "loss": 1.2031, + "step": 21055 + }, + { + "epoch": 0.6182394738387457, + "grad_norm": 0.0, + "learning_rate": 6.718375271606943e-06, + "loss": 1.3594, + "step": 21056 + }, + { + "epoch": 0.6182688355158846, + "grad_norm": 0.0, + "learning_rate": 6.717476989037227e-06, + "loss": 1.2979, + "step": 21057 + }, + { + "epoch": 0.6182981971930237, + "grad_norm": 0.0, + "learning_rate": 6.716578736152209e-06, + "loss": 1.2007, + "step": 21058 + }, + { + "epoch": 0.6183275588701627, + "grad_norm": 0.0, + "learning_rate": 6.7156805129600055e-06, + "loss": 1.2422, + "step": 21059 + }, + { + "epoch": 0.6183569205473016, + "grad_norm": 0.0, + "learning_rate": 6.714782319468749e-06, + "loss": 1.1841, + "step": 21060 + }, + { + "epoch": 0.6183862822244407, + "grad_norm": 0.0, + "learning_rate": 6.713884155686555e-06, + "loss": 1.3525, + "step": 21061 + }, + { + "epoch": 0.6184156439015797, + "grad_norm": 0.0, + "learning_rate": 6.71298602162155e-06, + "loss": 1.2505, + "step": 21062 + }, + { + "epoch": 0.6184450055787186, + "grad_norm": 0.0, + "learning_rate": 6.712087917281851e-06, + "loss": 1.29, + "step": 21063 + }, + { + "epoch": 0.6184743672558577, + "grad_norm": 0.0, + "learning_rate": 6.711189842675587e-06, + "loss": 1.3164, + "step": 21064 + }, + { + "epoch": 0.6185037289329967, + "grad_norm": 0.0, + "learning_rate": 6.710291797810875e-06, + "loss": 1.2393, + "step": 21065 + }, + { + "epoch": 0.6185330906101356, + "grad_norm": 0.0, + "learning_rate": 6.709393782695835e-06, + "loss": 1.1582, + "step": 21066 + }, + { + "epoch": 0.6185624522872747, + "grad_norm": 0.0, + "learning_rate": 6.708495797338592e-06, + "loss": 1.1787, + "step": 21067 + }, + { + "epoch": 0.6185918139644137, + "grad_norm": 0.0, + "learning_rate": 6.70759784174726e-06, + "loss": 1.3003, + "step": 21068 + }, + { + "epoch": 0.6186211756415526, + "grad_norm": 0.0, + "learning_rate": 6.706699915929969e-06, + "loss": 1.1392, + "step": 21069 + }, + { + "epoch": 0.6186505373186917, + "grad_norm": 0.0, + "learning_rate": 6.70580201989483e-06, + "loss": 1.2578, + "step": 21070 + }, + { + "epoch": 0.6186798989958306, + "grad_norm": 0.0, + "learning_rate": 6.704904153649972e-06, + "loss": 1.2603, + "step": 21071 + }, + { + "epoch": 0.6187092606729696, + "grad_norm": 0.0, + "learning_rate": 6.704006317203502e-06, + "loss": 1.332, + "step": 21072 + }, + { + "epoch": 0.6187386223501087, + "grad_norm": 0.0, + "learning_rate": 6.703108510563554e-06, + "loss": 1.1895, + "step": 21073 + }, + { + "epoch": 0.6187679840272476, + "grad_norm": 0.0, + "learning_rate": 6.702210733738238e-06, + "loss": 1.2734, + "step": 21074 + }, + { + "epoch": 0.6187973457043866, + "grad_norm": 0.0, + "learning_rate": 6.701312986735674e-06, + "loss": 1.1899, + "step": 21075 + }, + { + "epoch": 0.6188267073815257, + "grad_norm": 0.0, + "learning_rate": 6.700415269563984e-06, + "loss": 1.3145, + "step": 21076 + }, + { + "epoch": 0.6188560690586646, + "grad_norm": 0.0, + "learning_rate": 6.6995175822312786e-06, + "loss": 1.1377, + "step": 21077 + }, + { + "epoch": 0.6188854307358036, + "grad_norm": 0.0, + "learning_rate": 6.698619924745685e-06, + "loss": 1.2031, + "step": 21078 + }, + { + "epoch": 0.6189147924129427, + "grad_norm": 0.0, + "learning_rate": 6.697722297115315e-06, + "loss": 1.2734, + "step": 21079 + }, + { + "epoch": 0.6189441540900816, + "grad_norm": 0.0, + "learning_rate": 6.69682469934829e-06, + "loss": 1.2988, + "step": 21080 + }, + { + "epoch": 0.6189735157672206, + "grad_norm": 0.0, + "learning_rate": 6.695927131452724e-06, + "loss": 1.1792, + "step": 21081 + }, + { + "epoch": 0.6190028774443597, + "grad_norm": 0.0, + "learning_rate": 6.695029593436737e-06, + "loss": 1.2529, + "step": 21082 + }, + { + "epoch": 0.6190322391214986, + "grad_norm": 0.0, + "learning_rate": 6.694132085308439e-06, + "loss": 1.2466, + "step": 21083 + }, + { + "epoch": 0.6190616007986376, + "grad_norm": 0.0, + "learning_rate": 6.6932346070759556e-06, + "loss": 1.1973, + "step": 21084 + }, + { + "epoch": 0.6190909624757767, + "grad_norm": 0.0, + "learning_rate": 6.6923371587474e-06, + "loss": 1.2852, + "step": 21085 + }, + { + "epoch": 0.6191203241529156, + "grad_norm": 0.0, + "learning_rate": 6.691439740330882e-06, + "loss": 1.2461, + "step": 21086 + }, + { + "epoch": 0.6191496858300546, + "grad_norm": 0.0, + "learning_rate": 6.6905423518345245e-06, + "loss": 1.2656, + "step": 21087 + }, + { + "epoch": 0.6191790475071937, + "grad_norm": 0.0, + "learning_rate": 6.689644993266436e-06, + "loss": 1.332, + "step": 21088 + }, + { + "epoch": 0.6192084091843326, + "grad_norm": 0.0, + "learning_rate": 6.68874766463474e-06, + "loss": 1.3228, + "step": 21089 + }, + { + "epoch": 0.6192377708614716, + "grad_norm": 0.0, + "learning_rate": 6.6878503659475455e-06, + "loss": 1.1572, + "step": 21090 + }, + { + "epoch": 0.6192671325386107, + "grad_norm": 0.0, + "learning_rate": 6.6869530972129704e-06, + "loss": 1.1987, + "step": 21091 + }, + { + "epoch": 0.6192964942157496, + "grad_norm": 0.0, + "learning_rate": 6.686055858439122e-06, + "loss": 1.2349, + "step": 21092 + }, + { + "epoch": 0.6193258558928886, + "grad_norm": 0.0, + "learning_rate": 6.685158649634124e-06, + "loss": 1.1489, + "step": 21093 + }, + { + "epoch": 0.6193552175700276, + "grad_norm": 0.0, + "learning_rate": 6.684261470806085e-06, + "loss": 1.252, + "step": 21094 + }, + { + "epoch": 0.6193845792471666, + "grad_norm": 0.0, + "learning_rate": 6.683364321963116e-06, + "loss": 1.2744, + "step": 21095 + }, + { + "epoch": 0.6194139409243056, + "grad_norm": 0.0, + "learning_rate": 6.682467203113336e-06, + "loss": 1.3271, + "step": 21096 + }, + { + "epoch": 0.6194433026014446, + "grad_norm": 0.0, + "learning_rate": 6.681570114264851e-06, + "loss": 1.3105, + "step": 21097 + }, + { + "epoch": 0.6194726642785836, + "grad_norm": 0.0, + "learning_rate": 6.680673055425779e-06, + "loss": 1.2949, + "step": 21098 + }, + { + "epoch": 0.6195020259557226, + "grad_norm": 0.0, + "learning_rate": 6.67977602660423e-06, + "loss": 1.1587, + "step": 21099 + }, + { + "epoch": 0.6195313876328616, + "grad_norm": 0.0, + "learning_rate": 6.678879027808319e-06, + "loss": 1.2676, + "step": 21100 + }, + { + "epoch": 0.6195607493100006, + "grad_norm": 0.0, + "learning_rate": 6.677982059046151e-06, + "loss": 1.3018, + "step": 21101 + }, + { + "epoch": 0.6195901109871396, + "grad_norm": 0.0, + "learning_rate": 6.677085120325847e-06, + "loss": 1.2617, + "step": 21102 + }, + { + "epoch": 0.6196194726642786, + "grad_norm": 0.0, + "learning_rate": 6.67618821165551e-06, + "loss": 1.3223, + "step": 21103 + }, + { + "epoch": 0.6196488343414176, + "grad_norm": 0.0, + "learning_rate": 6.675291333043256e-06, + "loss": 1.1953, + "step": 21104 + }, + { + "epoch": 0.6196781960185566, + "grad_norm": 0.0, + "learning_rate": 6.6743944844971955e-06, + "loss": 1.2227, + "step": 21105 + }, + { + "epoch": 0.6197075576956956, + "grad_norm": 0.0, + "learning_rate": 6.673497666025435e-06, + "loss": 1.1963, + "step": 21106 + }, + { + "epoch": 0.6197369193728346, + "grad_norm": 0.0, + "learning_rate": 6.672600877636089e-06, + "loss": 1.3604, + "step": 21107 + }, + { + "epoch": 0.6197662810499736, + "grad_norm": 0.0, + "learning_rate": 6.671704119337261e-06, + "loss": 1.1963, + "step": 21108 + }, + { + "epoch": 0.6197956427271126, + "grad_norm": 0.0, + "learning_rate": 6.6708073911370685e-06, + "loss": 1.3027, + "step": 21109 + }, + { + "epoch": 0.6198250044042516, + "grad_norm": 0.0, + "learning_rate": 6.669910693043616e-06, + "loss": 1.2393, + "step": 21110 + }, + { + "epoch": 0.6198543660813906, + "grad_norm": 0.0, + "learning_rate": 6.669014025065017e-06, + "loss": 1.3008, + "step": 21111 + }, + { + "epoch": 0.6198837277585295, + "grad_norm": 0.0, + "learning_rate": 6.6681173872093715e-06, + "loss": 1.2998, + "step": 21112 + }, + { + "epoch": 0.6199130894356686, + "grad_norm": 0.0, + "learning_rate": 6.667220779484799e-06, + "loss": 1.1777, + "step": 21113 + }, + { + "epoch": 0.6199424511128075, + "grad_norm": 0.0, + "learning_rate": 6.666324201899401e-06, + "loss": 1.1704, + "step": 21114 + }, + { + "epoch": 0.6199718127899465, + "grad_norm": 0.0, + "learning_rate": 6.665427654461287e-06, + "loss": 1.3389, + "step": 21115 + }, + { + "epoch": 0.6200011744670856, + "grad_norm": 0.0, + "learning_rate": 6.664531137178566e-06, + "loss": 1.2061, + "step": 21116 + }, + { + "epoch": 0.6200305361442245, + "grad_norm": 0.0, + "learning_rate": 6.66363465005934e-06, + "loss": 1.3457, + "step": 21117 + }, + { + "epoch": 0.6200598978213635, + "grad_norm": 0.0, + "learning_rate": 6.662738193111724e-06, + "loss": 1.2578, + "step": 21118 + }, + { + "epoch": 0.6200892594985026, + "grad_norm": 0.0, + "learning_rate": 6.6618417663438196e-06, + "loss": 1.2822, + "step": 21119 + }, + { + "epoch": 0.6201186211756415, + "grad_norm": 0.0, + "learning_rate": 6.660945369763736e-06, + "loss": 1.2734, + "step": 21120 + }, + { + "epoch": 0.6201479828527805, + "grad_norm": 0.0, + "learning_rate": 6.660049003379576e-06, + "loss": 1.3369, + "step": 21121 + }, + { + "epoch": 0.6201773445299196, + "grad_norm": 0.0, + "learning_rate": 6.659152667199451e-06, + "loss": 1.2891, + "step": 21122 + }, + { + "epoch": 0.6202067062070585, + "grad_norm": 0.0, + "learning_rate": 6.6582563612314634e-06, + "loss": 1.1626, + "step": 21123 + }, + { + "epoch": 0.6202360678841975, + "grad_norm": 0.0, + "learning_rate": 6.657360085483721e-06, + "loss": 1.2373, + "step": 21124 + }, + { + "epoch": 0.6202654295613366, + "grad_norm": 0.0, + "learning_rate": 6.656463839964327e-06, + "loss": 1.1323, + "step": 21125 + }, + { + "epoch": 0.6202947912384755, + "grad_norm": 0.0, + "learning_rate": 6.655567624681383e-06, + "loss": 1.2627, + "step": 21126 + }, + { + "epoch": 0.6203241529156145, + "grad_norm": 0.0, + "learning_rate": 6.654671439643002e-06, + "loss": 1.3267, + "step": 21127 + }, + { + "epoch": 0.6203535145927536, + "grad_norm": 0.0, + "learning_rate": 6.65377528485728e-06, + "loss": 1.1929, + "step": 21128 + }, + { + "epoch": 0.6203828762698925, + "grad_norm": 0.0, + "learning_rate": 6.652879160332328e-06, + "loss": 1.2012, + "step": 21129 + }, + { + "epoch": 0.6204122379470315, + "grad_norm": 0.0, + "learning_rate": 6.651983066076244e-06, + "loss": 1.2153, + "step": 21130 + }, + { + "epoch": 0.6204415996241706, + "grad_norm": 0.0, + "learning_rate": 6.651087002097139e-06, + "loss": 1.271, + "step": 21131 + }, + { + "epoch": 0.6204709613013095, + "grad_norm": 0.0, + "learning_rate": 6.650190968403108e-06, + "loss": 1.2598, + "step": 21132 + }, + { + "epoch": 0.6205003229784485, + "grad_norm": 0.0, + "learning_rate": 6.64929496500226e-06, + "loss": 1.1597, + "step": 21133 + }, + { + "epoch": 0.6205296846555876, + "grad_norm": 0.0, + "learning_rate": 6.648398991902697e-06, + "loss": 1.334, + "step": 21134 + }, + { + "epoch": 0.6205590463327265, + "grad_norm": 0.0, + "learning_rate": 6.647503049112518e-06, + "loss": 1.251, + "step": 21135 + }, + { + "epoch": 0.6205884080098655, + "grad_norm": 0.0, + "learning_rate": 6.646607136639829e-06, + "loss": 1.3213, + "step": 21136 + }, + { + "epoch": 0.6206177696870046, + "grad_norm": 0.0, + "learning_rate": 6.645711254492726e-06, + "loss": 1.2451, + "step": 21137 + }, + { + "epoch": 0.6206471313641435, + "grad_norm": 0.0, + "learning_rate": 6.644815402679321e-06, + "loss": 1.3867, + "step": 21138 + }, + { + "epoch": 0.6206764930412825, + "grad_norm": 0.0, + "learning_rate": 6.643919581207706e-06, + "loss": 1.1802, + "step": 21139 + }, + { + "epoch": 0.6207058547184215, + "grad_norm": 0.0, + "learning_rate": 6.643023790085988e-06, + "loss": 1.2666, + "step": 21140 + }, + { + "epoch": 0.6207352163955605, + "grad_norm": 0.0, + "learning_rate": 6.642128029322262e-06, + "loss": 1.2158, + "step": 21141 + }, + { + "epoch": 0.6207645780726995, + "grad_norm": 0.0, + "learning_rate": 6.641232298924636e-06, + "loss": 1.3887, + "step": 21142 + }, + { + "epoch": 0.6207939397498385, + "grad_norm": 0.0, + "learning_rate": 6.640336598901207e-06, + "loss": 1.2793, + "step": 21143 + }, + { + "epoch": 0.6208233014269775, + "grad_norm": 0.0, + "learning_rate": 6.639440929260072e-06, + "loss": 1.1401, + "step": 21144 + }, + { + "epoch": 0.6208526631041165, + "grad_norm": 0.0, + "learning_rate": 6.638545290009336e-06, + "loss": 1.2666, + "step": 21145 + }, + { + "epoch": 0.6208820247812555, + "grad_norm": 0.0, + "learning_rate": 6.6376496811570915e-06, + "loss": 1.0708, + "step": 21146 + }, + { + "epoch": 0.6209113864583945, + "grad_norm": 0.0, + "learning_rate": 6.636754102711445e-06, + "loss": 1.2612, + "step": 21147 + }, + { + "epoch": 0.6209407481355335, + "grad_norm": 0.0, + "learning_rate": 6.635858554680492e-06, + "loss": 1.3867, + "step": 21148 + }, + { + "epoch": 0.6209701098126725, + "grad_norm": 0.0, + "learning_rate": 6.634963037072333e-06, + "loss": 1.1963, + "step": 21149 + }, + { + "epoch": 0.6209994714898115, + "grad_norm": 0.0, + "learning_rate": 6.63406754989506e-06, + "loss": 1.3125, + "step": 21150 + }, + { + "epoch": 0.6210288331669505, + "grad_norm": 0.0, + "learning_rate": 6.633172093156782e-06, + "loss": 1.3467, + "step": 21151 + }, + { + "epoch": 0.6210581948440895, + "grad_norm": 0.0, + "learning_rate": 6.632276666865589e-06, + "loss": 1.3262, + "step": 21152 + }, + { + "epoch": 0.6210875565212285, + "grad_norm": 0.0, + "learning_rate": 6.631381271029581e-06, + "loss": 1.2471, + "step": 21153 + }, + { + "epoch": 0.6211169181983675, + "grad_norm": 0.0, + "learning_rate": 6.630485905656856e-06, + "loss": 1.2129, + "step": 21154 + }, + { + "epoch": 0.6211462798755065, + "grad_norm": 0.0, + "learning_rate": 6.629590570755506e-06, + "loss": 1.2441, + "step": 21155 + }, + { + "epoch": 0.6211756415526455, + "grad_norm": 0.0, + "learning_rate": 6.628695266333637e-06, + "loss": 1.2959, + "step": 21156 + }, + { + "epoch": 0.6212050032297844, + "grad_norm": 0.0, + "learning_rate": 6.627799992399334e-06, + "loss": 1.2715, + "step": 21157 + }, + { + "epoch": 0.6212343649069235, + "grad_norm": 0.0, + "learning_rate": 6.626904748960704e-06, + "loss": 1.3916, + "step": 21158 + }, + { + "epoch": 0.6212637265840625, + "grad_norm": 0.0, + "learning_rate": 6.626009536025836e-06, + "loss": 1.1826, + "step": 21159 + }, + { + "epoch": 0.6212930882612014, + "grad_norm": 0.0, + "learning_rate": 6.625114353602829e-06, + "loss": 1.3691, + "step": 21160 + }, + { + "epoch": 0.6213224499383405, + "grad_norm": 0.0, + "learning_rate": 6.624219201699775e-06, + "loss": 1.29, + "step": 21161 + }, + { + "epoch": 0.6213518116154795, + "grad_norm": 0.0, + "learning_rate": 6.6233240803247735e-06, + "loss": 1.1646, + "step": 21162 + }, + { + "epoch": 0.6213811732926184, + "grad_norm": 0.0, + "learning_rate": 6.622428989485919e-06, + "loss": 1.2637, + "step": 21163 + }, + { + "epoch": 0.6214105349697575, + "grad_norm": 0.0, + "learning_rate": 6.6215339291913005e-06, + "loss": 1.2832, + "step": 21164 + }, + { + "epoch": 0.6214398966468965, + "grad_norm": 0.0, + "learning_rate": 6.620638899449019e-06, + "loss": 1.2666, + "step": 21165 + }, + { + "epoch": 0.6214692583240354, + "grad_norm": 0.0, + "learning_rate": 6.61974390026716e-06, + "loss": 1.1143, + "step": 21166 + }, + { + "epoch": 0.6214986200011745, + "grad_norm": 0.0, + "learning_rate": 6.618848931653827e-06, + "loss": 1.3145, + "step": 21167 + }, + { + "epoch": 0.6215279816783135, + "grad_norm": 0.0, + "learning_rate": 6.617953993617108e-06, + "loss": 1.2832, + "step": 21168 + }, + { + "epoch": 0.6215573433554524, + "grad_norm": 0.0, + "learning_rate": 6.617059086165098e-06, + "loss": 1.3408, + "step": 21169 + }, + { + "epoch": 0.6215867050325915, + "grad_norm": 0.0, + "learning_rate": 6.616164209305887e-06, + "loss": 1.2217, + "step": 21170 + }, + { + "epoch": 0.6216160667097305, + "grad_norm": 0.0, + "learning_rate": 6.615269363047572e-06, + "loss": 1.1851, + "step": 21171 + }, + { + "epoch": 0.6216454283868694, + "grad_norm": 0.0, + "learning_rate": 6.614374547398241e-06, + "loss": 1.3027, + "step": 21172 + }, + { + "epoch": 0.6216747900640085, + "grad_norm": 0.0, + "learning_rate": 6.613479762365991e-06, + "loss": 1.2432, + "step": 21173 + }, + { + "epoch": 0.6217041517411475, + "grad_norm": 0.0, + "learning_rate": 6.61258500795891e-06, + "loss": 1.2695, + "step": 21174 + }, + { + "epoch": 0.6217335134182864, + "grad_norm": 0.0, + "learning_rate": 6.611690284185087e-06, + "loss": 1.1602, + "step": 21175 + }, + { + "epoch": 0.6217628750954255, + "grad_norm": 0.0, + "learning_rate": 6.61079559105262e-06, + "loss": 1.1528, + "step": 21176 + }, + { + "epoch": 0.6217922367725645, + "grad_norm": 0.0, + "learning_rate": 6.609900928569595e-06, + "loss": 1.292, + "step": 21177 + }, + { + "epoch": 0.6218215984497034, + "grad_norm": 0.0, + "learning_rate": 6.609006296744105e-06, + "loss": 1.2744, + "step": 21178 + }, + { + "epoch": 0.6218509601268425, + "grad_norm": 0.0, + "learning_rate": 6.608111695584236e-06, + "loss": 1.2798, + "step": 21179 + }, + { + "epoch": 0.6218803218039815, + "grad_norm": 0.0, + "learning_rate": 6.607217125098087e-06, + "loss": 1.3438, + "step": 21180 + }, + { + "epoch": 0.6219096834811204, + "grad_norm": 0.0, + "learning_rate": 6.606322585293738e-06, + "loss": 1.3066, + "step": 21181 + }, + { + "epoch": 0.6219390451582595, + "grad_norm": 0.0, + "learning_rate": 6.605428076179287e-06, + "loss": 1.2793, + "step": 21182 + }, + { + "epoch": 0.6219684068353984, + "grad_norm": 0.0, + "learning_rate": 6.604533597762819e-06, + "loss": 1.2314, + "step": 21183 + }, + { + "epoch": 0.6219977685125374, + "grad_norm": 0.0, + "learning_rate": 6.603639150052421e-06, + "loss": 1.2056, + "step": 21184 + }, + { + "epoch": 0.6220271301896765, + "grad_norm": 0.0, + "learning_rate": 6.602744733056185e-06, + "loss": 1.2256, + "step": 21185 + }, + { + "epoch": 0.6220564918668154, + "grad_norm": 0.0, + "learning_rate": 6.601850346782195e-06, + "loss": 1.231, + "step": 21186 + }, + { + "epoch": 0.6220858535439544, + "grad_norm": 0.0, + "learning_rate": 6.600955991238547e-06, + "loss": 1.3179, + "step": 21187 + }, + { + "epoch": 0.6221152152210935, + "grad_norm": 0.0, + "learning_rate": 6.600061666433322e-06, + "loss": 1.3398, + "step": 21188 + }, + { + "epoch": 0.6221445768982324, + "grad_norm": 0.0, + "learning_rate": 6.599167372374613e-06, + "loss": 1.1768, + "step": 21189 + }, + { + "epoch": 0.6221739385753714, + "grad_norm": 0.0, + "learning_rate": 6.598273109070499e-06, + "loss": 1.251, + "step": 21190 + }, + { + "epoch": 0.6222033002525105, + "grad_norm": 0.0, + "learning_rate": 6.597378876529078e-06, + "loss": 1.3252, + "step": 21191 + }, + { + "epoch": 0.6222326619296494, + "grad_norm": 0.0, + "learning_rate": 6.596484674758428e-06, + "loss": 1.2803, + "step": 21192 + }, + { + "epoch": 0.6222620236067884, + "grad_norm": 0.0, + "learning_rate": 6.595590503766643e-06, + "loss": 1.3057, + "step": 21193 + }, + { + "epoch": 0.6222913852839275, + "grad_norm": 0.0, + "learning_rate": 6.594696363561801e-06, + "loss": 1.3281, + "step": 21194 + }, + { + "epoch": 0.6223207469610664, + "grad_norm": 0.0, + "learning_rate": 6.593802254151991e-06, + "loss": 1.3291, + "step": 21195 + }, + { + "epoch": 0.6223501086382054, + "grad_norm": 0.0, + "learning_rate": 6.592908175545303e-06, + "loss": 1.209, + "step": 21196 + }, + { + "epoch": 0.6223794703153445, + "grad_norm": 0.0, + "learning_rate": 6.592014127749816e-06, + "loss": 1.2573, + "step": 21197 + }, + { + "epoch": 0.6224088319924834, + "grad_norm": 0.0, + "learning_rate": 6.591120110773621e-06, + "loss": 1.2402, + "step": 21198 + }, + { + "epoch": 0.6224381936696224, + "grad_norm": 0.0, + "learning_rate": 6.590226124624795e-06, + "loss": 1.208, + "step": 21199 + }, + { + "epoch": 0.6224675553467615, + "grad_norm": 0.0, + "learning_rate": 6.589332169311432e-06, + "loss": 1.2051, + "step": 21200 + }, + { + "epoch": 0.6224969170239004, + "grad_norm": 0.0, + "learning_rate": 6.58843824484161e-06, + "loss": 1.2617, + "step": 21201 + }, + { + "epoch": 0.6225262787010394, + "grad_norm": 0.0, + "learning_rate": 6.5875443512234165e-06, + "loss": 1.3271, + "step": 21202 + }, + { + "epoch": 0.6225556403781785, + "grad_norm": 0.0, + "learning_rate": 6.586650488464932e-06, + "loss": 1.1768, + "step": 21203 + }, + { + "epoch": 0.6225850020553174, + "grad_norm": 0.0, + "learning_rate": 6.585756656574238e-06, + "loss": 1.3965, + "step": 21204 + }, + { + "epoch": 0.6226143637324564, + "grad_norm": 0.0, + "learning_rate": 6.584862855559426e-06, + "loss": 1.2876, + "step": 21205 + }, + { + "epoch": 0.6226437254095954, + "grad_norm": 0.0, + "learning_rate": 6.583969085428572e-06, + "loss": 1.1777, + "step": 21206 + }, + { + "epoch": 0.6226730870867344, + "grad_norm": 0.0, + "learning_rate": 6.5830753461897615e-06, + "loss": 1.3105, + "step": 21207 + }, + { + "epoch": 0.6227024487638734, + "grad_norm": 0.0, + "learning_rate": 6.582181637851072e-06, + "loss": 1.1143, + "step": 21208 + }, + { + "epoch": 0.6227318104410124, + "grad_norm": 0.0, + "learning_rate": 6.5812879604205946e-06, + "loss": 1.2153, + "step": 21209 + }, + { + "epoch": 0.6227611721181514, + "grad_norm": 0.0, + "learning_rate": 6.5803943139063995e-06, + "loss": 1.2305, + "step": 21210 + }, + { + "epoch": 0.6227905337952904, + "grad_norm": 0.0, + "learning_rate": 6.57950069831658e-06, + "loss": 1.2686, + "step": 21211 + }, + { + "epoch": 0.6228198954724293, + "grad_norm": 0.0, + "learning_rate": 6.578607113659211e-06, + "loss": 1.2207, + "step": 21212 + }, + { + "epoch": 0.6228492571495684, + "grad_norm": 0.0, + "learning_rate": 6.577713559942371e-06, + "loss": 1.3115, + "step": 21213 + }, + { + "epoch": 0.6228786188267074, + "grad_norm": 0.0, + "learning_rate": 6.5768200371741475e-06, + "loss": 1.2695, + "step": 21214 + }, + { + "epoch": 0.6229079805038463, + "grad_norm": 0.0, + "learning_rate": 6.575926545362612e-06, + "loss": 1.2578, + "step": 21215 + }, + { + "epoch": 0.6229373421809854, + "grad_norm": 0.0, + "learning_rate": 6.575033084515854e-06, + "loss": 1.2715, + "step": 21216 + }, + { + "epoch": 0.6229667038581244, + "grad_norm": 0.0, + "learning_rate": 6.574139654641946e-06, + "loss": 1.2764, + "step": 21217 + }, + { + "epoch": 0.6229960655352633, + "grad_norm": 0.0, + "learning_rate": 6.573246255748975e-06, + "loss": 1.3262, + "step": 21218 + }, + { + "epoch": 0.6230254272124024, + "grad_norm": 0.0, + "learning_rate": 6.572352887845009e-06, + "loss": 1.3174, + "step": 21219 + }, + { + "epoch": 0.6230547888895414, + "grad_norm": 0.0, + "learning_rate": 6.5714595509381395e-06, + "loss": 1.2217, + "step": 21220 + }, + { + "epoch": 0.6230841505666803, + "grad_norm": 0.0, + "learning_rate": 6.570566245036436e-06, + "loss": 1.2676, + "step": 21221 + }, + { + "epoch": 0.6231135122438194, + "grad_norm": 0.0, + "learning_rate": 6.569672970147983e-06, + "loss": 1.2334, + "step": 21222 + }, + { + "epoch": 0.6231428739209584, + "grad_norm": 0.0, + "learning_rate": 6.568779726280856e-06, + "loss": 1.3027, + "step": 21223 + }, + { + "epoch": 0.6231722355980973, + "grad_norm": 0.0, + "learning_rate": 6.567886513443128e-06, + "loss": 1.2871, + "step": 21224 + }, + { + "epoch": 0.6232015972752364, + "grad_norm": 0.0, + "learning_rate": 6.566993331642886e-06, + "loss": 1.2031, + "step": 21225 + }, + { + "epoch": 0.6232309589523753, + "grad_norm": 0.0, + "learning_rate": 6.5661001808881995e-06, + "loss": 1.2842, + "step": 21226 + }, + { + "epoch": 0.6232603206295143, + "grad_norm": 0.0, + "learning_rate": 6.565207061187151e-06, + "loss": 1.2471, + "step": 21227 + }, + { + "epoch": 0.6232896823066534, + "grad_norm": 0.0, + "learning_rate": 6.564313972547811e-06, + "loss": 1.3408, + "step": 21228 + }, + { + "epoch": 0.6233190439837923, + "grad_norm": 0.0, + "learning_rate": 6.563420914978265e-06, + "loss": 1.1826, + "step": 21229 + }, + { + "epoch": 0.6233484056609313, + "grad_norm": 0.0, + "learning_rate": 6.562527888486579e-06, + "loss": 1.3477, + "step": 21230 + }, + { + "epoch": 0.6233777673380704, + "grad_norm": 0.0, + "learning_rate": 6.561634893080837e-06, + "loss": 1.2842, + "step": 21231 + }, + { + "epoch": 0.6234071290152093, + "grad_norm": 0.0, + "learning_rate": 6.560741928769111e-06, + "loss": 1.2183, + "step": 21232 + }, + { + "epoch": 0.6234364906923483, + "grad_norm": 0.0, + "learning_rate": 6.559848995559472e-06, + "loss": 1.3066, + "step": 21233 + }, + { + "epoch": 0.6234658523694874, + "grad_norm": 0.0, + "learning_rate": 6.558956093460008e-06, + "loss": 1.249, + "step": 21234 + }, + { + "epoch": 0.6234952140466263, + "grad_norm": 0.0, + "learning_rate": 6.558063222478777e-06, + "loss": 1.2939, + "step": 21235 + }, + { + "epoch": 0.6235245757237653, + "grad_norm": 0.0, + "learning_rate": 6.557170382623865e-06, + "loss": 1.2988, + "step": 21236 + }, + { + "epoch": 0.6235539374009044, + "grad_norm": 0.0, + "learning_rate": 6.556277573903344e-06, + "loss": 1.3042, + "step": 21237 + }, + { + "epoch": 0.6235832990780433, + "grad_norm": 0.0, + "learning_rate": 6.5553847963252855e-06, + "loss": 1.1323, + "step": 21238 + }, + { + "epoch": 0.6236126607551823, + "grad_norm": 0.0, + "learning_rate": 6.5544920498977625e-06, + "loss": 1.0469, + "step": 21239 + }, + { + "epoch": 0.6236420224323214, + "grad_norm": 0.0, + "learning_rate": 6.553599334628855e-06, + "loss": 1.2749, + "step": 21240 + }, + { + "epoch": 0.6236713841094603, + "grad_norm": 0.0, + "learning_rate": 6.552706650526628e-06, + "loss": 1.2192, + "step": 21241 + }, + { + "epoch": 0.6237007457865993, + "grad_norm": 0.0, + "learning_rate": 6.551813997599159e-06, + "loss": 1.2866, + "step": 21242 + }, + { + "epoch": 0.6237301074637384, + "grad_norm": 0.0, + "learning_rate": 6.550921375854521e-06, + "loss": 1.3311, + "step": 21243 + }, + { + "epoch": 0.6237594691408773, + "grad_norm": 0.0, + "learning_rate": 6.550028785300779e-06, + "loss": 1.187, + "step": 21244 + }, + { + "epoch": 0.6237888308180163, + "grad_norm": 0.0, + "learning_rate": 6.549136225946015e-06, + "loss": 1.2988, + "step": 21245 + }, + { + "epoch": 0.6238181924951554, + "grad_norm": 0.0, + "learning_rate": 6.548243697798294e-06, + "loss": 1.3193, + "step": 21246 + }, + { + "epoch": 0.6238475541722943, + "grad_norm": 0.0, + "learning_rate": 6.54735120086569e-06, + "loss": 1.2227, + "step": 21247 + }, + { + "epoch": 0.6238769158494333, + "grad_norm": 0.0, + "learning_rate": 6.546458735156269e-06, + "loss": 1.2363, + "step": 21248 + }, + { + "epoch": 0.6239062775265724, + "grad_norm": 0.0, + "learning_rate": 6.545566300678112e-06, + "loss": 1.1479, + "step": 21249 + }, + { + "epoch": 0.6239356392037113, + "grad_norm": 0.0, + "learning_rate": 6.54467389743928e-06, + "loss": 1.2832, + "step": 21250 + }, + { + "epoch": 0.6239650008808503, + "grad_norm": 0.0, + "learning_rate": 6.54378152544785e-06, + "loss": 1.2598, + "step": 21251 + }, + { + "epoch": 0.6239943625579893, + "grad_norm": 0.0, + "learning_rate": 6.542889184711889e-06, + "loss": 1.3247, + "step": 21252 + }, + { + "epoch": 0.6240237242351283, + "grad_norm": 0.0, + "learning_rate": 6.541996875239462e-06, + "loss": 1.2188, + "step": 21253 + }, + { + "epoch": 0.6240530859122673, + "grad_norm": 0.0, + "learning_rate": 6.541104597038646e-06, + "loss": 1.2207, + "step": 21254 + }, + { + "epoch": 0.6240824475894063, + "grad_norm": 0.0, + "learning_rate": 6.540212350117506e-06, + "loss": 1.333, + "step": 21255 + }, + { + "epoch": 0.6241118092665453, + "grad_norm": 0.0, + "learning_rate": 6.539320134484114e-06, + "loss": 1.2803, + "step": 21256 + }, + { + "epoch": 0.6241411709436843, + "grad_norm": 0.0, + "learning_rate": 6.538427950146532e-06, + "loss": 1.1406, + "step": 21257 + }, + { + "epoch": 0.6241705326208233, + "grad_norm": 0.0, + "learning_rate": 6.5375357971128365e-06, + "loss": 1.3633, + "step": 21258 + }, + { + "epoch": 0.6241998942979623, + "grad_norm": 0.0, + "learning_rate": 6.53664367539109e-06, + "loss": 1.1655, + "step": 21259 + }, + { + "epoch": 0.6242292559751013, + "grad_norm": 0.0, + "learning_rate": 6.535751584989362e-06, + "loss": 1.2437, + "step": 21260 + }, + { + "epoch": 0.6242586176522403, + "grad_norm": 0.0, + "learning_rate": 6.534859525915722e-06, + "loss": 1.2393, + "step": 21261 + }, + { + "epoch": 0.6242879793293793, + "grad_norm": 0.0, + "learning_rate": 6.533967498178231e-06, + "loss": 1.2832, + "step": 21262 + }, + { + "epoch": 0.6243173410065183, + "grad_norm": 0.0, + "learning_rate": 6.5330755017849625e-06, + "loss": 1.2358, + "step": 21263 + }, + { + "epoch": 0.6243467026836573, + "grad_norm": 0.0, + "learning_rate": 6.532183536743975e-06, + "loss": 1.333, + "step": 21264 + }, + { + "epoch": 0.6243760643607963, + "grad_norm": 0.0, + "learning_rate": 6.531291603063345e-06, + "loss": 1.2881, + "step": 21265 + }, + { + "epoch": 0.6244054260379353, + "grad_norm": 0.0, + "learning_rate": 6.530399700751131e-06, + "loss": 1.2583, + "step": 21266 + }, + { + "epoch": 0.6244347877150743, + "grad_norm": 0.0, + "learning_rate": 6.529507829815403e-06, + "loss": 1.2451, + "step": 21267 + }, + { + "epoch": 0.6244641493922133, + "grad_norm": 0.0, + "learning_rate": 6.528615990264221e-06, + "loss": 1.2705, + "step": 21268 + }, + { + "epoch": 0.6244935110693522, + "grad_norm": 0.0, + "learning_rate": 6.527724182105657e-06, + "loss": 1.2861, + "step": 21269 + }, + { + "epoch": 0.6245228727464913, + "grad_norm": 0.0, + "learning_rate": 6.526832405347772e-06, + "loss": 1.2422, + "step": 21270 + }, + { + "epoch": 0.6245522344236303, + "grad_norm": 0.0, + "learning_rate": 6.5259406599986305e-06, + "loss": 1.3174, + "step": 21271 + }, + { + "epoch": 0.6245815961007692, + "grad_norm": 0.0, + "learning_rate": 6.5250489460662995e-06, + "loss": 1.2627, + "step": 21272 + }, + { + "epoch": 0.6246109577779083, + "grad_norm": 0.0, + "learning_rate": 6.524157263558836e-06, + "loss": 1.3096, + "step": 21273 + }, + { + "epoch": 0.6246403194550473, + "grad_norm": 0.0, + "learning_rate": 6.5232656124843115e-06, + "loss": 1.231, + "step": 21274 + }, + { + "epoch": 0.6246696811321862, + "grad_norm": 0.0, + "learning_rate": 6.522373992850785e-06, + "loss": 1.1484, + "step": 21275 + }, + { + "epoch": 0.6246990428093253, + "grad_norm": 0.0, + "learning_rate": 6.521482404666325e-06, + "loss": 1.1655, + "step": 21276 + }, + { + "epoch": 0.6247284044864643, + "grad_norm": 0.0, + "learning_rate": 6.5205908479389845e-06, + "loss": 1.1836, + "step": 21277 + }, + { + "epoch": 0.6247577661636032, + "grad_norm": 0.0, + "learning_rate": 6.519699322676836e-06, + "loss": 1.1816, + "step": 21278 + }, + { + "epoch": 0.6247871278407423, + "grad_norm": 0.0, + "learning_rate": 6.518807828887937e-06, + "loss": 1.2261, + "step": 21279 + }, + { + "epoch": 0.6248164895178813, + "grad_norm": 0.0, + "learning_rate": 6.517916366580352e-06, + "loss": 1.2988, + "step": 21280 + }, + { + "epoch": 0.6248458511950202, + "grad_norm": 0.0, + "learning_rate": 6.5170249357621416e-06, + "loss": 1.0786, + "step": 21281 + }, + { + "epoch": 0.6248752128721593, + "grad_norm": 0.0, + "learning_rate": 6.516133536441362e-06, + "loss": 1.3584, + "step": 21282 + }, + { + "epoch": 0.6249045745492983, + "grad_norm": 0.0, + "learning_rate": 6.515242168626082e-06, + "loss": 1.3145, + "step": 21283 + }, + { + "epoch": 0.6249339362264372, + "grad_norm": 0.0, + "learning_rate": 6.514350832324359e-06, + "loss": 1.3711, + "step": 21284 + }, + { + "epoch": 0.6249632979035763, + "grad_norm": 0.0, + "learning_rate": 6.513459527544256e-06, + "loss": 1.2642, + "step": 21285 + }, + { + "epoch": 0.6249926595807153, + "grad_norm": 0.0, + "learning_rate": 6.512568254293829e-06, + "loss": 1.3076, + "step": 21286 + }, + { + "epoch": 0.6250220212578542, + "grad_norm": 0.0, + "learning_rate": 6.511677012581143e-06, + "loss": 1.209, + "step": 21287 + }, + { + "epoch": 0.6250513829349933, + "grad_norm": 0.0, + "learning_rate": 6.51078580241425e-06, + "loss": 1.2388, + "step": 21288 + }, + { + "epoch": 0.6250807446121323, + "grad_norm": 0.0, + "learning_rate": 6.509894623801219e-06, + "loss": 1.1484, + "step": 21289 + }, + { + "epoch": 0.6251101062892712, + "grad_norm": 0.0, + "learning_rate": 6.509003476750105e-06, + "loss": 1.2725, + "step": 21290 + }, + { + "epoch": 0.6251394679664103, + "grad_norm": 0.0, + "learning_rate": 6.508112361268966e-06, + "loss": 1.3042, + "step": 21291 + }, + { + "epoch": 0.6251688296435493, + "grad_norm": 0.0, + "learning_rate": 6.507221277365861e-06, + "loss": 1.374, + "step": 21292 + }, + { + "epoch": 0.6251981913206882, + "grad_norm": 0.0, + "learning_rate": 6.5063302250488456e-06, + "loss": 1.3184, + "step": 21293 + }, + { + "epoch": 0.6252275529978273, + "grad_norm": 0.0, + "learning_rate": 6.505439204325985e-06, + "loss": 1.1846, + "step": 21294 + }, + { + "epoch": 0.6252569146749662, + "grad_norm": 0.0, + "learning_rate": 6.5045482152053295e-06, + "loss": 1.1719, + "step": 21295 + }, + { + "epoch": 0.6252862763521052, + "grad_norm": 0.0, + "learning_rate": 6.503657257694942e-06, + "loss": 1.2715, + "step": 21296 + }, + { + "epoch": 0.6253156380292443, + "grad_norm": 0.0, + "learning_rate": 6.502766331802874e-06, + "loss": 1.2412, + "step": 21297 + }, + { + "epoch": 0.6253449997063832, + "grad_norm": 0.0, + "learning_rate": 6.501875437537189e-06, + "loss": 1.2441, + "step": 21298 + }, + { + "epoch": 0.6253743613835222, + "grad_norm": 0.0, + "learning_rate": 6.50098457490594e-06, + "loss": 1.209, + "step": 21299 + }, + { + "epoch": 0.6254037230606613, + "grad_norm": 0.0, + "learning_rate": 6.500093743917184e-06, + "loss": 1.1963, + "step": 21300 + }, + { + "epoch": 0.6254330847378002, + "grad_norm": 0.0, + "learning_rate": 6.499202944578976e-06, + "loss": 1.2031, + "step": 21301 + }, + { + "epoch": 0.6254624464149392, + "grad_norm": 0.0, + "learning_rate": 6.49831217689937e-06, + "loss": 1.2051, + "step": 21302 + }, + { + "epoch": 0.6254918080920783, + "grad_norm": 0.0, + "learning_rate": 6.4974214408864264e-06, + "loss": 1.292, + "step": 21303 + }, + { + "epoch": 0.6255211697692172, + "grad_norm": 0.0, + "learning_rate": 6.496530736548196e-06, + "loss": 1.2236, + "step": 21304 + }, + { + "epoch": 0.6255505314463562, + "grad_norm": 0.0, + "learning_rate": 6.495640063892738e-06, + "loss": 1.2617, + "step": 21305 + }, + { + "epoch": 0.6255798931234953, + "grad_norm": 0.0, + "learning_rate": 6.4947494229281005e-06, + "loss": 1.1963, + "step": 21306 + }, + { + "epoch": 0.6256092548006342, + "grad_norm": 0.0, + "learning_rate": 6.493858813662346e-06, + "loss": 1.249, + "step": 21307 + }, + { + "epoch": 0.6256386164777732, + "grad_norm": 0.0, + "learning_rate": 6.492968236103521e-06, + "loss": 1.1748, + "step": 21308 + }, + { + "epoch": 0.6256679781549123, + "grad_norm": 0.0, + "learning_rate": 6.492077690259684e-06, + "loss": 1.2881, + "step": 21309 + }, + { + "epoch": 0.6256973398320512, + "grad_norm": 0.0, + "learning_rate": 6.4911871761388845e-06, + "loss": 1.2217, + "step": 21310 + }, + { + "epoch": 0.6257267015091902, + "grad_norm": 0.0, + "learning_rate": 6.490296693749183e-06, + "loss": 1.2153, + "step": 21311 + }, + { + "epoch": 0.6257560631863291, + "grad_norm": 0.0, + "learning_rate": 6.489406243098626e-06, + "loss": 1.2549, + "step": 21312 + }, + { + "epoch": 0.6257854248634682, + "grad_norm": 0.0, + "learning_rate": 6.488515824195263e-06, + "loss": 1.2754, + "step": 21313 + }, + { + "epoch": 0.6258147865406072, + "grad_norm": 0.0, + "learning_rate": 6.487625437047154e-06, + "loss": 1.1953, + "step": 21314 + }, + { + "epoch": 0.6258441482177461, + "grad_norm": 0.0, + "learning_rate": 6.4867350816623465e-06, + "loss": 1.2002, + "step": 21315 + }, + { + "epoch": 0.6258735098948852, + "grad_norm": 0.0, + "learning_rate": 6.4858447580488956e-06, + "loss": 1.4375, + "step": 21316 + }, + { + "epoch": 0.6259028715720242, + "grad_norm": 0.0, + "learning_rate": 6.484954466214845e-06, + "loss": 1.292, + "step": 21317 + }, + { + "epoch": 0.6259322332491631, + "grad_norm": 0.0, + "learning_rate": 6.484064206168258e-06, + "loss": 1.2021, + "step": 21318 + }, + { + "epoch": 0.6259615949263022, + "grad_norm": 0.0, + "learning_rate": 6.483173977917176e-06, + "loss": 1.2163, + "step": 21319 + }, + { + "epoch": 0.6259909566034412, + "grad_norm": 0.0, + "learning_rate": 6.482283781469654e-06, + "loss": 1.4277, + "step": 21320 + }, + { + "epoch": 0.6260203182805801, + "grad_norm": 0.0, + "learning_rate": 6.48139361683374e-06, + "loss": 1.3887, + "step": 21321 + }, + { + "epoch": 0.6260496799577192, + "grad_norm": 0.0, + "learning_rate": 6.480503484017481e-06, + "loss": 1.2842, + "step": 21322 + }, + { + "epoch": 0.6260790416348582, + "grad_norm": 0.0, + "learning_rate": 6.479613383028934e-06, + "loss": 1.1035, + "step": 21323 + }, + { + "epoch": 0.6261084033119971, + "grad_norm": 0.0, + "learning_rate": 6.478723313876143e-06, + "loss": 1.168, + "step": 21324 + }, + { + "epoch": 0.6261377649891362, + "grad_norm": 0.0, + "learning_rate": 6.477833276567162e-06, + "loss": 1.1606, + "step": 21325 + }, + { + "epoch": 0.6261671266662752, + "grad_norm": 0.0, + "learning_rate": 6.476943271110032e-06, + "loss": 1.2109, + "step": 21326 + }, + { + "epoch": 0.6261964883434141, + "grad_norm": 0.0, + "learning_rate": 6.47605329751281e-06, + "loss": 1.209, + "step": 21327 + }, + { + "epoch": 0.6262258500205532, + "grad_norm": 0.0, + "learning_rate": 6.475163355783538e-06, + "loss": 1.2568, + "step": 21328 + }, + { + "epoch": 0.6262552116976922, + "grad_norm": 0.0, + "learning_rate": 6.47427344593027e-06, + "loss": 1.2842, + "step": 21329 + }, + { + "epoch": 0.6262845733748311, + "grad_norm": 0.0, + "learning_rate": 6.4733835679610494e-06, + "loss": 1.2861, + "step": 21330 + }, + { + "epoch": 0.6263139350519702, + "grad_norm": 0.0, + "learning_rate": 6.47249372188392e-06, + "loss": 1.2871, + "step": 21331 + }, + { + "epoch": 0.6263432967291092, + "grad_norm": 0.0, + "learning_rate": 6.471603907706938e-06, + "loss": 1.208, + "step": 21332 + }, + { + "epoch": 0.6263726584062481, + "grad_norm": 0.0, + "learning_rate": 6.470714125438142e-06, + "loss": 1.0835, + "step": 21333 + }, + { + "epoch": 0.6264020200833872, + "grad_norm": 0.0, + "learning_rate": 6.469824375085586e-06, + "loss": 1.2783, + "step": 21334 + }, + { + "epoch": 0.6264313817605262, + "grad_norm": 0.0, + "learning_rate": 6.468934656657307e-06, + "loss": 1.2393, + "step": 21335 + }, + { + "epoch": 0.6264607434376651, + "grad_norm": 0.0, + "learning_rate": 6.468044970161362e-06, + "loss": 1.2402, + "step": 21336 + }, + { + "epoch": 0.6264901051148042, + "grad_norm": 0.0, + "learning_rate": 6.4671553156057864e-06, + "loss": 1.2007, + "step": 21337 + }, + { + "epoch": 0.6265194667919431, + "grad_norm": 0.0, + "learning_rate": 6.466265692998633e-06, + "loss": 1.2939, + "step": 21338 + }, + { + "epoch": 0.6265488284690821, + "grad_norm": 0.0, + "learning_rate": 6.465376102347942e-06, + "loss": 1.3262, + "step": 21339 + }, + { + "epoch": 0.6265781901462212, + "grad_norm": 0.0, + "learning_rate": 6.464486543661763e-06, + "loss": 1.3193, + "step": 21340 + }, + { + "epoch": 0.6266075518233601, + "grad_norm": 0.0, + "learning_rate": 6.463597016948137e-06, + "loss": 1.2822, + "step": 21341 + }, + { + "epoch": 0.6266369135004991, + "grad_norm": 0.0, + "learning_rate": 6.462707522215104e-06, + "loss": 1.3223, + "step": 21342 + }, + { + "epoch": 0.6266662751776382, + "grad_norm": 0.0, + "learning_rate": 6.461818059470718e-06, + "loss": 1.3057, + "step": 21343 + }, + { + "epoch": 0.6266956368547771, + "grad_norm": 0.0, + "learning_rate": 6.460928628723016e-06, + "loss": 1.1284, + "step": 21344 + }, + { + "epoch": 0.6267249985319161, + "grad_norm": 0.0, + "learning_rate": 6.4600392299800416e-06, + "loss": 1.1436, + "step": 21345 + }, + { + "epoch": 0.6267543602090552, + "grad_norm": 0.0, + "learning_rate": 6.459149863249838e-06, + "loss": 1.167, + "step": 21346 + }, + { + "epoch": 0.6267837218861941, + "grad_norm": 0.0, + "learning_rate": 6.458260528540452e-06, + "loss": 1.2578, + "step": 21347 + }, + { + "epoch": 0.6268130835633331, + "grad_norm": 0.0, + "learning_rate": 6.457371225859921e-06, + "loss": 1.3018, + "step": 21348 + }, + { + "epoch": 0.6268424452404722, + "grad_norm": 0.0, + "learning_rate": 6.456481955216291e-06, + "loss": 1.3496, + "step": 21349 + }, + { + "epoch": 0.6268718069176111, + "grad_norm": 0.0, + "learning_rate": 6.455592716617605e-06, + "loss": 1.4053, + "step": 21350 + }, + { + "epoch": 0.6269011685947501, + "grad_norm": 0.0, + "learning_rate": 6.454703510071895e-06, + "loss": 1.2812, + "step": 21351 + }, + { + "epoch": 0.6269305302718892, + "grad_norm": 0.0, + "learning_rate": 6.453814335587214e-06, + "loss": 1.3965, + "step": 21352 + }, + { + "epoch": 0.6269598919490281, + "grad_norm": 0.0, + "learning_rate": 6.452925193171595e-06, + "loss": 1.4209, + "step": 21353 + }, + { + "epoch": 0.6269892536261671, + "grad_norm": 0.0, + "learning_rate": 6.4520360828330845e-06, + "loss": 1.2422, + "step": 21354 + }, + { + "epoch": 0.6270186153033062, + "grad_norm": 0.0, + "learning_rate": 6.451147004579717e-06, + "loss": 1.2217, + "step": 21355 + }, + { + "epoch": 0.6270479769804451, + "grad_norm": 0.0, + "learning_rate": 6.450257958419539e-06, + "loss": 1.3008, + "step": 21356 + }, + { + "epoch": 0.6270773386575841, + "grad_norm": 0.0, + "learning_rate": 6.449368944360587e-06, + "loss": 1.2402, + "step": 21357 + }, + { + "epoch": 0.6271067003347232, + "grad_norm": 0.0, + "learning_rate": 6.448479962410902e-06, + "loss": 1.3311, + "step": 21358 + }, + { + "epoch": 0.6271360620118621, + "grad_norm": 0.0, + "learning_rate": 6.447591012578518e-06, + "loss": 1.2559, + "step": 21359 + }, + { + "epoch": 0.6271654236890011, + "grad_norm": 0.0, + "learning_rate": 6.446702094871482e-06, + "loss": 1.2744, + "step": 21360 + }, + { + "epoch": 0.6271947853661402, + "grad_norm": 0.0, + "learning_rate": 6.445813209297831e-06, + "loss": 1.3135, + "step": 21361 + }, + { + "epoch": 0.6272241470432791, + "grad_norm": 0.0, + "learning_rate": 6.444924355865596e-06, + "loss": 1.3701, + "step": 21362 + }, + { + "epoch": 0.6272535087204181, + "grad_norm": 0.0, + "learning_rate": 6.444035534582824e-06, + "loss": 1.3574, + "step": 21363 + }, + { + "epoch": 0.6272828703975571, + "grad_norm": 0.0, + "learning_rate": 6.443146745457549e-06, + "loss": 1.1787, + "step": 21364 + }, + { + "epoch": 0.6273122320746961, + "grad_norm": 0.0, + "learning_rate": 6.442257988497809e-06, + "loss": 1.1963, + "step": 21365 + }, + { + "epoch": 0.6273415937518351, + "grad_norm": 0.0, + "learning_rate": 6.441369263711639e-06, + "loss": 1.2246, + "step": 21366 + }, + { + "epoch": 0.6273709554289741, + "grad_norm": 0.0, + "learning_rate": 6.44048057110708e-06, + "loss": 1.2061, + "step": 21367 + }, + { + "epoch": 0.6274003171061131, + "grad_norm": 0.0, + "learning_rate": 6.4395919106921655e-06, + "loss": 1.1724, + "step": 21368 + }, + { + "epoch": 0.6274296787832521, + "grad_norm": 0.0, + "learning_rate": 6.438703282474936e-06, + "loss": 1.3818, + "step": 21369 + }, + { + "epoch": 0.6274590404603911, + "grad_norm": 0.0, + "learning_rate": 6.437814686463424e-06, + "loss": 1.2744, + "step": 21370 + }, + { + "epoch": 0.6274884021375301, + "grad_norm": 0.0, + "learning_rate": 6.436926122665662e-06, + "loss": 1.1855, + "step": 21371 + }, + { + "epoch": 0.6275177638146691, + "grad_norm": 0.0, + "learning_rate": 6.436037591089694e-06, + "loss": 1.3379, + "step": 21372 + }, + { + "epoch": 0.6275471254918081, + "grad_norm": 0.0, + "learning_rate": 6.435149091743549e-06, + "loss": 1.2627, + "step": 21373 + }, + { + "epoch": 0.6275764871689471, + "grad_norm": 0.0, + "learning_rate": 6.434260624635264e-06, + "loss": 1.3047, + "step": 21374 + }, + { + "epoch": 0.6276058488460861, + "grad_norm": 0.0, + "learning_rate": 6.433372189772871e-06, + "loss": 1.1304, + "step": 21375 + }, + { + "epoch": 0.6276352105232251, + "grad_norm": 0.0, + "learning_rate": 6.432483787164411e-06, + "loss": 1.2764, + "step": 21376 + }, + { + "epoch": 0.6276645722003641, + "grad_norm": 0.0, + "learning_rate": 6.431595416817911e-06, + "loss": 1.292, + "step": 21377 + }, + { + "epoch": 0.627693933877503, + "grad_norm": 0.0, + "learning_rate": 6.430707078741409e-06, + "loss": 1.2539, + "step": 21378 + }, + { + "epoch": 0.6277232955546421, + "grad_norm": 0.0, + "learning_rate": 6.4298187729429355e-06, + "loss": 1.1431, + "step": 21379 + }, + { + "epoch": 0.6277526572317811, + "grad_norm": 0.0, + "learning_rate": 6.428930499430527e-06, + "loss": 1.1748, + "step": 21380 + }, + { + "epoch": 0.62778201890892, + "grad_norm": 0.0, + "learning_rate": 6.428042258212215e-06, + "loss": 1.3223, + "step": 21381 + }, + { + "epoch": 0.6278113805860591, + "grad_norm": 0.0, + "learning_rate": 6.427154049296029e-06, + "loss": 1.1729, + "step": 21382 + }, + { + "epoch": 0.6278407422631981, + "grad_norm": 0.0, + "learning_rate": 6.426265872690007e-06, + "loss": 1.4287, + "step": 21383 + }, + { + "epoch": 0.627870103940337, + "grad_norm": 0.0, + "learning_rate": 6.425377728402174e-06, + "loss": 1.2676, + "step": 21384 + }, + { + "epoch": 0.6278994656174761, + "grad_norm": 0.0, + "learning_rate": 6.424489616440569e-06, + "loss": 1.3066, + "step": 21385 + }, + { + "epoch": 0.6279288272946151, + "grad_norm": 0.0, + "learning_rate": 6.423601536813218e-06, + "loss": 1.248, + "step": 21386 + }, + { + "epoch": 0.627958188971754, + "grad_norm": 0.0, + "learning_rate": 6.422713489528156e-06, + "loss": 1.2373, + "step": 21387 + }, + { + "epoch": 0.6279875506488931, + "grad_norm": 0.0, + "learning_rate": 6.421825474593409e-06, + "loss": 1.3623, + "step": 21388 + }, + { + "epoch": 0.6280169123260321, + "grad_norm": 0.0, + "learning_rate": 6.420937492017017e-06, + "loss": 1.166, + "step": 21389 + }, + { + "epoch": 0.628046274003171, + "grad_norm": 0.0, + "learning_rate": 6.420049541807001e-06, + "loss": 1.2578, + "step": 21390 + }, + { + "epoch": 0.6280756356803101, + "grad_norm": 0.0, + "learning_rate": 6.41916162397139e-06, + "loss": 1.3789, + "step": 21391 + }, + { + "epoch": 0.6281049973574491, + "grad_norm": 0.0, + "learning_rate": 6.418273738518222e-06, + "loss": 1.3799, + "step": 21392 + }, + { + "epoch": 0.628134359034588, + "grad_norm": 0.0, + "learning_rate": 6.417385885455518e-06, + "loss": 1.1802, + "step": 21393 + }, + { + "epoch": 0.6281637207117271, + "grad_norm": 0.0, + "learning_rate": 6.416498064791314e-06, + "loss": 1.1084, + "step": 21394 + }, + { + "epoch": 0.6281930823888661, + "grad_norm": 0.0, + "learning_rate": 6.415610276533633e-06, + "loss": 1.2568, + "step": 21395 + }, + { + "epoch": 0.628222444066005, + "grad_norm": 0.0, + "learning_rate": 6.414722520690508e-06, + "loss": 1.2095, + "step": 21396 + }, + { + "epoch": 0.6282518057431441, + "grad_norm": 0.0, + "learning_rate": 6.413834797269967e-06, + "loss": 1.126, + "step": 21397 + }, + { + "epoch": 0.6282811674202831, + "grad_norm": 0.0, + "learning_rate": 6.4129471062800355e-06, + "loss": 1.2275, + "step": 21398 + }, + { + "epoch": 0.628310529097422, + "grad_norm": 0.0, + "learning_rate": 6.412059447728742e-06, + "loss": 1.29, + "step": 21399 + }, + { + "epoch": 0.6283398907745611, + "grad_norm": 0.0, + "learning_rate": 6.411171821624111e-06, + "loss": 1.1362, + "step": 21400 + }, + { + "epoch": 0.6283692524517, + "grad_norm": 0.0, + "learning_rate": 6.410284227974175e-06, + "loss": 1.2578, + "step": 21401 + }, + { + "epoch": 0.628398614128839, + "grad_norm": 0.0, + "learning_rate": 6.409396666786957e-06, + "loss": 1.2939, + "step": 21402 + }, + { + "epoch": 0.6284279758059781, + "grad_norm": 0.0, + "learning_rate": 6.408509138070486e-06, + "loss": 1.2676, + "step": 21403 + }, + { + "epoch": 0.628457337483117, + "grad_norm": 0.0, + "learning_rate": 6.407621641832783e-06, + "loss": 1.1875, + "step": 21404 + }, + { + "epoch": 0.628486699160256, + "grad_norm": 0.0, + "learning_rate": 6.406734178081881e-06, + "loss": 1.3398, + "step": 21405 + }, + { + "epoch": 0.6285160608373951, + "grad_norm": 0.0, + "learning_rate": 6.405846746825801e-06, + "loss": 1.0332, + "step": 21406 + }, + { + "epoch": 0.628545422514534, + "grad_norm": 0.0, + "learning_rate": 6.40495934807257e-06, + "loss": 1.1152, + "step": 21407 + }, + { + "epoch": 0.628574784191673, + "grad_norm": 0.0, + "learning_rate": 6.40407198183021e-06, + "loss": 1.2012, + "step": 21408 + }, + { + "epoch": 0.6286041458688121, + "grad_norm": 0.0, + "learning_rate": 6.403184648106751e-06, + "loss": 1.1357, + "step": 21409 + }, + { + "epoch": 0.628633507545951, + "grad_norm": 0.0, + "learning_rate": 6.402297346910216e-06, + "loss": 1.2441, + "step": 21410 + }, + { + "epoch": 0.62866286922309, + "grad_norm": 0.0, + "learning_rate": 6.401410078248623e-06, + "loss": 1.4219, + "step": 21411 + }, + { + "epoch": 0.6286922309002291, + "grad_norm": 0.0, + "learning_rate": 6.400522842130004e-06, + "loss": 1.1948, + "step": 21412 + }, + { + "epoch": 0.628721592577368, + "grad_norm": 0.0, + "learning_rate": 6.3996356385623735e-06, + "loss": 1.2949, + "step": 21413 + }, + { + "epoch": 0.628750954254507, + "grad_norm": 0.0, + "learning_rate": 6.3987484675537656e-06, + "loss": 1.1816, + "step": 21414 + }, + { + "epoch": 0.628780315931646, + "grad_norm": 0.0, + "learning_rate": 6.397861329112194e-06, + "loss": 1.3384, + "step": 21415 + }, + { + "epoch": 0.628809677608785, + "grad_norm": 0.0, + "learning_rate": 6.396974223245686e-06, + "loss": 1.3081, + "step": 21416 + }, + { + "epoch": 0.628839039285924, + "grad_norm": 0.0, + "learning_rate": 6.396087149962264e-06, + "loss": 1.1284, + "step": 21417 + }, + { + "epoch": 0.628868400963063, + "grad_norm": 0.0, + "learning_rate": 6.395200109269949e-06, + "loss": 1.2358, + "step": 21418 + }, + { + "epoch": 0.628897762640202, + "grad_norm": 0.0, + "learning_rate": 6.394313101176764e-06, + "loss": 1.3535, + "step": 21419 + }, + { + "epoch": 0.628927124317341, + "grad_norm": 0.0, + "learning_rate": 6.393426125690723e-06, + "loss": 1.2539, + "step": 21420 + }, + { + "epoch": 0.62895648599448, + "grad_norm": 0.0, + "learning_rate": 6.392539182819857e-06, + "loss": 1.3477, + "step": 21421 + }, + { + "epoch": 0.628985847671619, + "grad_norm": 0.0, + "learning_rate": 6.391652272572183e-06, + "loss": 1.332, + "step": 21422 + }, + { + "epoch": 0.629015209348758, + "grad_norm": 0.0, + "learning_rate": 6.390765394955723e-06, + "loss": 1.2705, + "step": 21423 + }, + { + "epoch": 0.629044571025897, + "grad_norm": 0.0, + "learning_rate": 6.389878549978491e-06, + "loss": 1.2422, + "step": 21424 + }, + { + "epoch": 0.629073932703036, + "grad_norm": 0.0, + "learning_rate": 6.3889917376485165e-06, + "loss": 1.2217, + "step": 21425 + }, + { + "epoch": 0.629103294380175, + "grad_norm": 0.0, + "learning_rate": 6.388104957973813e-06, + "loss": 1.2422, + "step": 21426 + }, + { + "epoch": 0.6291326560573139, + "grad_norm": 0.0, + "learning_rate": 6.3872182109624024e-06, + "loss": 1.1274, + "step": 21427 + }, + { + "epoch": 0.629162017734453, + "grad_norm": 0.0, + "learning_rate": 6.3863314966223e-06, + "loss": 1.2148, + "step": 21428 + }, + { + "epoch": 0.629191379411592, + "grad_norm": 0.0, + "learning_rate": 6.38544481496153e-06, + "loss": 1.2959, + "step": 21429 + }, + { + "epoch": 0.6292207410887309, + "grad_norm": 0.0, + "learning_rate": 6.384558165988108e-06, + "loss": 1.1934, + "step": 21430 + }, + { + "epoch": 0.62925010276587, + "grad_norm": 0.0, + "learning_rate": 6.383671549710052e-06, + "loss": 1.1963, + "step": 21431 + }, + { + "epoch": 0.629279464443009, + "grad_norm": 0.0, + "learning_rate": 6.382784966135382e-06, + "loss": 1.2178, + "step": 21432 + }, + { + "epoch": 0.6293088261201479, + "grad_norm": 0.0, + "learning_rate": 6.38189841527211e-06, + "loss": 1.2461, + "step": 21433 + }, + { + "epoch": 0.629338187797287, + "grad_norm": 0.0, + "learning_rate": 6.38101189712826e-06, + "loss": 1.2607, + "step": 21434 + }, + { + "epoch": 0.629367549474426, + "grad_norm": 0.0, + "learning_rate": 6.380125411711846e-06, + "loss": 1.3379, + "step": 21435 + }, + { + "epoch": 0.6293969111515649, + "grad_norm": 0.0, + "learning_rate": 6.379238959030885e-06, + "loss": 1.145, + "step": 21436 + }, + { + "epoch": 0.629426272828704, + "grad_norm": 0.0, + "learning_rate": 6.378352539093391e-06, + "loss": 1.2471, + "step": 21437 + }, + { + "epoch": 0.629455634505843, + "grad_norm": 0.0, + "learning_rate": 6.377466151907386e-06, + "loss": 1.2178, + "step": 21438 + }, + { + "epoch": 0.6294849961829819, + "grad_norm": 0.0, + "learning_rate": 6.376579797480884e-06, + "loss": 1.229, + "step": 21439 + }, + { + "epoch": 0.629514357860121, + "grad_norm": 0.0, + "learning_rate": 6.375693475821894e-06, + "loss": 1.1484, + "step": 21440 + }, + { + "epoch": 0.62954371953726, + "grad_norm": 0.0, + "learning_rate": 6.374807186938439e-06, + "loss": 1.2656, + "step": 21441 + }, + { + "epoch": 0.6295730812143989, + "grad_norm": 0.0, + "learning_rate": 6.373920930838528e-06, + "loss": 1.2656, + "step": 21442 + }, + { + "epoch": 0.629602442891538, + "grad_norm": 0.0, + "learning_rate": 6.373034707530182e-06, + "loss": 1.209, + "step": 21443 + }, + { + "epoch": 0.629631804568677, + "grad_norm": 0.0, + "learning_rate": 6.372148517021408e-06, + "loss": 1.2886, + "step": 21444 + }, + { + "epoch": 0.6296611662458159, + "grad_norm": 0.0, + "learning_rate": 6.371262359320228e-06, + "loss": 1.251, + "step": 21445 + }, + { + "epoch": 0.629690527922955, + "grad_norm": 0.0, + "learning_rate": 6.37037623443465e-06, + "loss": 1.2637, + "step": 21446 + }, + { + "epoch": 0.629719889600094, + "grad_norm": 0.0, + "learning_rate": 6.369490142372692e-06, + "loss": 1.1729, + "step": 21447 + }, + { + "epoch": 0.6297492512772329, + "grad_norm": 0.0, + "learning_rate": 6.368604083142358e-06, + "loss": 1.269, + "step": 21448 + }, + { + "epoch": 0.629778612954372, + "grad_norm": 0.0, + "learning_rate": 6.367718056751673e-06, + "loss": 1.2861, + "step": 21449 + }, + { + "epoch": 0.629807974631511, + "grad_norm": 0.0, + "learning_rate": 6.366832063208643e-06, + "loss": 1.2422, + "step": 21450 + }, + { + "epoch": 0.6298373363086499, + "grad_norm": 0.0, + "learning_rate": 6.365946102521281e-06, + "loss": 1.2207, + "step": 21451 + }, + { + "epoch": 0.629866697985789, + "grad_norm": 0.0, + "learning_rate": 6.365060174697599e-06, + "loss": 1.3164, + "step": 21452 + }, + { + "epoch": 0.6298960596629279, + "grad_norm": 0.0, + "learning_rate": 6.364174279745606e-06, + "loss": 1.3965, + "step": 21453 + }, + { + "epoch": 0.6299254213400669, + "grad_norm": 0.0, + "learning_rate": 6.36328841767332e-06, + "loss": 1.3135, + "step": 21454 + }, + { + "epoch": 0.629954783017206, + "grad_norm": 0.0, + "learning_rate": 6.3624025884887456e-06, + "loss": 1.3154, + "step": 21455 + }, + { + "epoch": 0.6299841446943449, + "grad_norm": 0.0, + "learning_rate": 6.361516792199899e-06, + "loss": 1.3877, + "step": 21456 + }, + { + "epoch": 0.6300135063714839, + "grad_norm": 0.0, + "learning_rate": 6.360631028814783e-06, + "loss": 1.3271, + "step": 21457 + }, + { + "epoch": 0.630042868048623, + "grad_norm": 0.0, + "learning_rate": 6.359745298341418e-06, + "loss": 1.207, + "step": 21458 + }, + { + "epoch": 0.6300722297257619, + "grad_norm": 0.0, + "learning_rate": 6.358859600787808e-06, + "loss": 1.2925, + "step": 21459 + }, + { + "epoch": 0.6301015914029009, + "grad_norm": 0.0, + "learning_rate": 6.357973936161961e-06, + "loss": 1.29, + "step": 21460 + }, + { + "epoch": 0.63013095308004, + "grad_norm": 0.0, + "learning_rate": 6.35708830447189e-06, + "loss": 1.2285, + "step": 21461 + }, + { + "epoch": 0.6301603147571789, + "grad_norm": 0.0, + "learning_rate": 6.356202705725598e-06, + "loss": 1.2344, + "step": 21462 + }, + { + "epoch": 0.6301896764343179, + "grad_norm": 0.0, + "learning_rate": 6.355317139931103e-06, + "loss": 1.1924, + "step": 21463 + }, + { + "epoch": 0.630219038111457, + "grad_norm": 0.0, + "learning_rate": 6.3544316070964056e-06, + "loss": 1.2227, + "step": 21464 + }, + { + "epoch": 0.6302483997885959, + "grad_norm": 0.0, + "learning_rate": 6.35354610722952e-06, + "loss": 1.3457, + "step": 21465 + }, + { + "epoch": 0.6302777614657349, + "grad_norm": 0.0, + "learning_rate": 6.352660640338449e-06, + "loss": 1.3076, + "step": 21466 + }, + { + "epoch": 0.630307123142874, + "grad_norm": 0.0, + "learning_rate": 6.351775206431203e-06, + "loss": 1.2217, + "step": 21467 + }, + { + "epoch": 0.6303364848200129, + "grad_norm": 0.0, + "learning_rate": 6.350889805515788e-06, + "loss": 1.3496, + "step": 21468 + }, + { + "epoch": 0.6303658464971519, + "grad_norm": 0.0, + "learning_rate": 6.350004437600208e-06, + "loss": 1.2642, + "step": 21469 + }, + { + "epoch": 0.630395208174291, + "grad_norm": 0.0, + "learning_rate": 6.349119102692476e-06, + "loss": 1.2407, + "step": 21470 + }, + { + "epoch": 0.6304245698514299, + "grad_norm": 0.0, + "learning_rate": 6.348233800800593e-06, + "loss": 1.2119, + "step": 21471 + }, + { + "epoch": 0.6304539315285689, + "grad_norm": 0.0, + "learning_rate": 6.347348531932569e-06, + "loss": 1.1816, + "step": 21472 + }, + { + "epoch": 0.630483293205708, + "grad_norm": 0.0, + "learning_rate": 6.346463296096403e-06, + "loss": 1.332, + "step": 21473 + }, + { + "epoch": 0.6305126548828469, + "grad_norm": 0.0, + "learning_rate": 6.345578093300111e-06, + "loss": 1.2095, + "step": 21474 + }, + { + "epoch": 0.6305420165599859, + "grad_norm": 0.0, + "learning_rate": 6.344692923551687e-06, + "loss": 1.2949, + "step": 21475 + }, + { + "epoch": 0.630571378237125, + "grad_norm": 0.0, + "learning_rate": 6.343807786859147e-06, + "loss": 1.2188, + "step": 21476 + }, + { + "epoch": 0.6306007399142639, + "grad_norm": 0.0, + "learning_rate": 6.342922683230484e-06, + "loss": 1.1738, + "step": 21477 + }, + { + "epoch": 0.6306301015914029, + "grad_norm": 0.0, + "learning_rate": 6.342037612673711e-06, + "loss": 1.3071, + "step": 21478 + }, + { + "epoch": 0.6306594632685419, + "grad_norm": 0.0, + "learning_rate": 6.341152575196828e-06, + "loss": 1.4033, + "step": 21479 + }, + { + "epoch": 0.6306888249456809, + "grad_norm": 0.0, + "learning_rate": 6.340267570807839e-06, + "loss": 1.4375, + "step": 21480 + }, + { + "epoch": 0.6307181866228199, + "grad_norm": 0.0, + "learning_rate": 6.339382599514749e-06, + "loss": 1.3315, + "step": 21481 + }, + { + "epoch": 0.6307475482999589, + "grad_norm": 0.0, + "learning_rate": 6.338497661325556e-06, + "loss": 1.1816, + "step": 21482 + }, + { + "epoch": 0.6307769099770979, + "grad_norm": 0.0, + "learning_rate": 6.337612756248269e-06, + "loss": 1.3857, + "step": 21483 + }, + { + "epoch": 0.6308062716542369, + "grad_norm": 0.0, + "learning_rate": 6.336727884290888e-06, + "loss": 1.4033, + "step": 21484 + }, + { + "epoch": 0.6308356333313759, + "grad_norm": 0.0, + "learning_rate": 6.335843045461415e-06, + "loss": 1.3037, + "step": 21485 + }, + { + "epoch": 0.6308649950085149, + "grad_norm": 0.0, + "learning_rate": 6.334958239767847e-06, + "loss": 1.3232, + "step": 21486 + }, + { + "epoch": 0.6308943566856539, + "grad_norm": 0.0, + "learning_rate": 6.334073467218197e-06, + "loss": 1.3008, + "step": 21487 + }, + { + "epoch": 0.6309237183627929, + "grad_norm": 0.0, + "learning_rate": 6.333188727820458e-06, + "loss": 1.2158, + "step": 21488 + }, + { + "epoch": 0.6309530800399319, + "grad_norm": 0.0, + "learning_rate": 6.332304021582631e-06, + "loss": 1.3066, + "step": 21489 + }, + { + "epoch": 0.6309824417170709, + "grad_norm": 0.0, + "learning_rate": 6.3314193485127196e-06, + "loss": 1.3452, + "step": 21490 + }, + { + "epoch": 0.6310118033942099, + "grad_norm": 0.0, + "learning_rate": 6.330534708618721e-06, + "loss": 1.1899, + "step": 21491 + }, + { + "epoch": 0.6310411650713489, + "grad_norm": 0.0, + "learning_rate": 6.3296501019086375e-06, + "loss": 1.2246, + "step": 21492 + }, + { + "epoch": 0.6310705267484878, + "grad_norm": 0.0, + "learning_rate": 6.328765528390466e-06, + "loss": 1.1812, + "step": 21493 + }, + { + "epoch": 0.6310998884256269, + "grad_norm": 0.0, + "learning_rate": 6.3278809880722105e-06, + "loss": 1.2261, + "step": 21494 + }, + { + "epoch": 0.6311292501027659, + "grad_norm": 0.0, + "learning_rate": 6.3269964809618666e-06, + "loss": 1.2646, + "step": 21495 + }, + { + "epoch": 0.6311586117799048, + "grad_norm": 0.0, + "learning_rate": 6.326112007067435e-06, + "loss": 1.2349, + "step": 21496 + }, + { + "epoch": 0.6311879734570439, + "grad_norm": 0.0, + "learning_rate": 6.325227566396912e-06, + "loss": 1.1748, + "step": 21497 + }, + { + "epoch": 0.6312173351341829, + "grad_norm": 0.0, + "learning_rate": 6.324343158958299e-06, + "loss": 1.2168, + "step": 21498 + }, + { + "epoch": 0.6312466968113218, + "grad_norm": 0.0, + "learning_rate": 6.323458784759593e-06, + "loss": 1.291, + "step": 21499 + }, + { + "epoch": 0.6312760584884609, + "grad_norm": 0.0, + "learning_rate": 6.3225744438087885e-06, + "loss": 1.0923, + "step": 21500 + }, + { + "epoch": 0.6313054201655999, + "grad_norm": 0.0, + "learning_rate": 6.321690136113888e-06, + "loss": 1.2217, + "step": 21501 + }, + { + "epoch": 0.6313347818427388, + "grad_norm": 0.0, + "learning_rate": 6.320805861682879e-06, + "loss": 1.2568, + "step": 21502 + }, + { + "epoch": 0.6313641435198779, + "grad_norm": 0.0, + "learning_rate": 6.319921620523772e-06, + "loss": 1.3877, + "step": 21503 + }, + { + "epoch": 0.6313935051970169, + "grad_norm": 0.0, + "learning_rate": 6.319037412644552e-06, + "loss": 1.2891, + "step": 21504 + }, + { + "epoch": 0.6314228668741558, + "grad_norm": 0.0, + "learning_rate": 6.3181532380532215e-06, + "loss": 1.3193, + "step": 21505 + }, + { + "epoch": 0.6314522285512949, + "grad_norm": 0.0, + "learning_rate": 6.317269096757772e-06, + "loss": 1.3018, + "step": 21506 + }, + { + "epoch": 0.6314815902284339, + "grad_norm": 0.0, + "learning_rate": 6.316384988766204e-06, + "loss": 1.2891, + "step": 21507 + }, + { + "epoch": 0.6315109519055728, + "grad_norm": 0.0, + "learning_rate": 6.31550091408651e-06, + "loss": 1.2773, + "step": 21508 + }, + { + "epoch": 0.6315403135827119, + "grad_norm": 0.0, + "learning_rate": 6.314616872726682e-06, + "loss": 1.3647, + "step": 21509 + }, + { + "epoch": 0.6315696752598509, + "grad_norm": 0.0, + "learning_rate": 6.313732864694721e-06, + "loss": 1.1826, + "step": 21510 + }, + { + "epoch": 0.6315990369369898, + "grad_norm": 0.0, + "learning_rate": 6.312848889998613e-06, + "loss": 1.4141, + "step": 21511 + }, + { + "epoch": 0.6316283986141289, + "grad_norm": 0.0, + "learning_rate": 6.311964948646361e-06, + "loss": 1.2646, + "step": 21512 + }, + { + "epoch": 0.6316577602912679, + "grad_norm": 0.0, + "learning_rate": 6.311081040645952e-06, + "loss": 1.2324, + "step": 21513 + }, + { + "epoch": 0.6316871219684068, + "grad_norm": 0.0, + "learning_rate": 6.310197166005385e-06, + "loss": 1.3359, + "step": 21514 + }, + { + "epoch": 0.6317164836455458, + "grad_norm": 0.0, + "learning_rate": 6.309313324732646e-06, + "loss": 1.3232, + "step": 21515 + }, + { + "epoch": 0.6317458453226849, + "grad_norm": 0.0, + "learning_rate": 6.3084295168357364e-06, + "loss": 1.186, + "step": 21516 + }, + { + "epoch": 0.6317752069998238, + "grad_norm": 0.0, + "learning_rate": 6.307545742322643e-06, + "loss": 1.2324, + "step": 21517 + }, + { + "epoch": 0.6318045686769628, + "grad_norm": 0.0, + "learning_rate": 6.306662001201356e-06, + "loss": 1.2183, + "step": 21518 + }, + { + "epoch": 0.6318339303541018, + "grad_norm": 0.0, + "learning_rate": 6.305778293479872e-06, + "loss": 1.3037, + "step": 21519 + }, + { + "epoch": 0.6318632920312408, + "grad_norm": 0.0, + "learning_rate": 6.304894619166181e-06, + "loss": 1.2725, + "step": 21520 + }, + { + "epoch": 0.6318926537083798, + "grad_norm": 0.0, + "learning_rate": 6.304010978268276e-06, + "loss": 1.3232, + "step": 21521 + }, + { + "epoch": 0.6319220153855188, + "grad_norm": 0.0, + "learning_rate": 6.303127370794142e-06, + "loss": 1.2148, + "step": 21522 + }, + { + "epoch": 0.6319513770626578, + "grad_norm": 0.0, + "learning_rate": 6.302243796751777e-06, + "loss": 1.2383, + "step": 21523 + }, + { + "epoch": 0.6319807387397968, + "grad_norm": 0.0, + "learning_rate": 6.301360256149167e-06, + "loss": 1.2764, + "step": 21524 + }, + { + "epoch": 0.6320101004169358, + "grad_norm": 0.0, + "learning_rate": 6.3004767489943066e-06, + "loss": 1.2441, + "step": 21525 + }, + { + "epoch": 0.6320394620940748, + "grad_norm": 0.0, + "learning_rate": 6.2995932752951775e-06, + "loss": 1.208, + "step": 21526 + }, + { + "epoch": 0.6320688237712138, + "grad_norm": 0.0, + "learning_rate": 6.298709835059777e-06, + "loss": 1.3223, + "step": 21527 + }, + { + "epoch": 0.6320981854483528, + "grad_norm": 0.0, + "learning_rate": 6.297826428296093e-06, + "loss": 1.1846, + "step": 21528 + }, + { + "epoch": 0.6321275471254918, + "grad_norm": 0.0, + "learning_rate": 6.29694305501211e-06, + "loss": 1.3408, + "step": 21529 + }, + { + "epoch": 0.6321569088026308, + "grad_norm": 0.0, + "learning_rate": 6.296059715215822e-06, + "loss": 1.2783, + "step": 21530 + }, + { + "epoch": 0.6321862704797698, + "grad_norm": 0.0, + "learning_rate": 6.29517640891521e-06, + "loss": 1.2236, + "step": 21531 + }, + { + "epoch": 0.6322156321569088, + "grad_norm": 0.0, + "learning_rate": 6.29429313611827e-06, + "loss": 1.3633, + "step": 21532 + }, + { + "epoch": 0.6322449938340478, + "grad_norm": 0.0, + "learning_rate": 6.293409896832986e-06, + "loss": 1.3682, + "step": 21533 + }, + { + "epoch": 0.6322743555111868, + "grad_norm": 0.0, + "learning_rate": 6.292526691067347e-06, + "loss": 1.3135, + "step": 21534 + }, + { + "epoch": 0.6323037171883258, + "grad_norm": 0.0, + "learning_rate": 6.291643518829333e-06, + "loss": 1.3193, + "step": 21535 + }, + { + "epoch": 0.6323330788654647, + "grad_norm": 0.0, + "learning_rate": 6.290760380126941e-06, + "loss": 1.1987, + "step": 21536 + }, + { + "epoch": 0.6323624405426038, + "grad_norm": 0.0, + "learning_rate": 6.2898772749681545e-06, + "loss": 1.1963, + "step": 21537 + }, + { + "epoch": 0.6323918022197428, + "grad_norm": 0.0, + "learning_rate": 6.288994203360954e-06, + "loss": 1.2217, + "step": 21538 + }, + { + "epoch": 0.6324211638968817, + "grad_norm": 0.0, + "learning_rate": 6.288111165313333e-06, + "loss": 1.2217, + "step": 21539 + }, + { + "epoch": 0.6324505255740208, + "grad_norm": 0.0, + "learning_rate": 6.287228160833267e-06, + "loss": 1.1577, + "step": 21540 + }, + { + "epoch": 0.6324798872511598, + "grad_norm": 0.0, + "learning_rate": 6.286345189928756e-06, + "loss": 1.2812, + "step": 21541 + }, + { + "epoch": 0.6325092489282987, + "grad_norm": 0.0, + "learning_rate": 6.28546225260777e-06, + "loss": 1.207, + "step": 21542 + }, + { + "epoch": 0.6325386106054378, + "grad_norm": 0.0, + "learning_rate": 6.284579348878304e-06, + "loss": 1.2998, + "step": 21543 + }, + { + "epoch": 0.6325679722825768, + "grad_norm": 0.0, + "learning_rate": 6.283696478748336e-06, + "loss": 1.1885, + "step": 21544 + }, + { + "epoch": 0.6325973339597157, + "grad_norm": 0.0, + "learning_rate": 6.282813642225855e-06, + "loss": 1.1685, + "step": 21545 + }, + { + "epoch": 0.6326266956368548, + "grad_norm": 0.0, + "learning_rate": 6.281930839318837e-06, + "loss": 1.2495, + "step": 21546 + }, + { + "epoch": 0.6326560573139938, + "grad_norm": 0.0, + "learning_rate": 6.281048070035276e-06, + "loss": 1.2378, + "step": 21547 + }, + { + "epoch": 0.6326854189911327, + "grad_norm": 0.0, + "learning_rate": 6.2801653343831505e-06, + "loss": 1.2832, + "step": 21548 + }, + { + "epoch": 0.6327147806682718, + "grad_norm": 0.0, + "learning_rate": 6.279282632370438e-06, + "loss": 1.2686, + "step": 21549 + }, + { + "epoch": 0.6327441423454108, + "grad_norm": 0.0, + "learning_rate": 6.27839996400513e-06, + "loss": 1.3154, + "step": 21550 + }, + { + "epoch": 0.6327735040225497, + "grad_norm": 0.0, + "learning_rate": 6.277517329295199e-06, + "loss": 1.2803, + "step": 21551 + }, + { + "epoch": 0.6328028656996888, + "grad_norm": 0.0, + "learning_rate": 6.276634728248636e-06, + "loss": 1.2344, + "step": 21552 + }, + { + "epoch": 0.6328322273768278, + "grad_norm": 0.0, + "learning_rate": 6.2757521608734166e-06, + "loss": 1.2949, + "step": 21553 + }, + { + "epoch": 0.6328615890539667, + "grad_norm": 0.0, + "learning_rate": 6.274869627177527e-06, + "loss": 1.3584, + "step": 21554 + }, + { + "epoch": 0.6328909507311058, + "grad_norm": 0.0, + "learning_rate": 6.2739871271689415e-06, + "loss": 1.3027, + "step": 21555 + }, + { + "epoch": 0.6329203124082448, + "grad_norm": 0.0, + "learning_rate": 6.273104660855648e-06, + "loss": 1.2119, + "step": 21556 + }, + { + "epoch": 0.6329496740853837, + "grad_norm": 0.0, + "learning_rate": 6.272222228245624e-06, + "loss": 1.3828, + "step": 21557 + }, + { + "epoch": 0.6329790357625228, + "grad_norm": 0.0, + "learning_rate": 6.271339829346846e-06, + "loss": 1.1807, + "step": 21558 + }, + { + "epoch": 0.6330083974396618, + "grad_norm": 0.0, + "learning_rate": 6.2704574641673e-06, + "loss": 1.293, + "step": 21559 + }, + { + "epoch": 0.6330377591168007, + "grad_norm": 0.0, + "learning_rate": 6.269575132714958e-06, + "loss": 1.2075, + "step": 21560 + }, + { + "epoch": 0.6330671207939398, + "grad_norm": 0.0, + "learning_rate": 6.268692834997807e-06, + "loss": 1.2734, + "step": 21561 + }, + { + "epoch": 0.6330964824710787, + "grad_norm": 0.0, + "learning_rate": 6.26781057102382e-06, + "loss": 1.3525, + "step": 21562 + }, + { + "epoch": 0.6331258441482177, + "grad_norm": 0.0, + "learning_rate": 6.2669283408009814e-06, + "loss": 1.3564, + "step": 21563 + }, + { + "epoch": 0.6331552058253568, + "grad_norm": 0.0, + "learning_rate": 6.266046144337261e-06, + "loss": 1.332, + "step": 21564 + }, + { + "epoch": 0.6331845675024957, + "grad_norm": 0.0, + "learning_rate": 6.265163981640645e-06, + "loss": 1.2754, + "step": 21565 + }, + { + "epoch": 0.6332139291796347, + "grad_norm": 0.0, + "learning_rate": 6.264281852719107e-06, + "loss": 1.3398, + "step": 21566 + }, + { + "epoch": 0.6332432908567738, + "grad_norm": 0.0, + "learning_rate": 6.263399757580626e-06, + "loss": 1.1934, + "step": 21567 + }, + { + "epoch": 0.6332726525339127, + "grad_norm": 0.0, + "learning_rate": 6.262517696233178e-06, + "loss": 1.3281, + "step": 21568 + }, + { + "epoch": 0.6333020142110517, + "grad_norm": 0.0, + "learning_rate": 6.261635668684738e-06, + "loss": 1.2871, + "step": 21569 + }, + { + "epoch": 0.6333313758881908, + "grad_norm": 0.0, + "learning_rate": 6.260753674943285e-06, + "loss": 1.3955, + "step": 21570 + }, + { + "epoch": 0.6333607375653297, + "grad_norm": 0.0, + "learning_rate": 6.259871715016792e-06, + "loss": 1.2305, + "step": 21571 + }, + { + "epoch": 0.6333900992424687, + "grad_norm": 0.0, + "learning_rate": 6.258989788913239e-06, + "loss": 1.1064, + "step": 21572 + }, + { + "epoch": 0.6334194609196078, + "grad_norm": 0.0, + "learning_rate": 6.258107896640599e-06, + "loss": 1.2866, + "step": 21573 + }, + { + "epoch": 0.6334488225967467, + "grad_norm": 0.0, + "learning_rate": 6.2572260382068495e-06, + "loss": 1.186, + "step": 21574 + }, + { + "epoch": 0.6334781842738857, + "grad_norm": 0.0, + "learning_rate": 6.2563442136199586e-06, + "loss": 1.3174, + "step": 21575 + }, + { + "epoch": 0.6335075459510248, + "grad_norm": 0.0, + "learning_rate": 6.25546242288791e-06, + "loss": 1.2759, + "step": 21576 + }, + { + "epoch": 0.6335369076281637, + "grad_norm": 0.0, + "learning_rate": 6.254580666018674e-06, + "loss": 1.165, + "step": 21577 + }, + { + "epoch": 0.6335662693053027, + "grad_norm": 0.0, + "learning_rate": 6.253698943020221e-06, + "loss": 1.2764, + "step": 21578 + }, + { + "epoch": 0.6335956309824418, + "grad_norm": 0.0, + "learning_rate": 6.252817253900532e-06, + "loss": 1.207, + "step": 21579 + }, + { + "epoch": 0.6336249926595807, + "grad_norm": 0.0, + "learning_rate": 6.251935598667571e-06, + "loss": 1.1831, + "step": 21580 + }, + { + "epoch": 0.6336543543367197, + "grad_norm": 0.0, + "learning_rate": 6.25105397732932e-06, + "loss": 1.3105, + "step": 21581 + }, + { + "epoch": 0.6336837160138588, + "grad_norm": 0.0, + "learning_rate": 6.250172389893746e-06, + "loss": 1.1802, + "step": 21582 + }, + { + "epoch": 0.6337130776909977, + "grad_norm": 0.0, + "learning_rate": 6.249290836368827e-06, + "loss": 1.1357, + "step": 21583 + }, + { + "epoch": 0.6337424393681367, + "grad_norm": 0.0, + "learning_rate": 6.248409316762525e-06, + "loss": 1.2568, + "step": 21584 + }, + { + "epoch": 0.6337718010452758, + "grad_norm": 0.0, + "learning_rate": 6.247527831082824e-06, + "loss": 1.2637, + "step": 21585 + }, + { + "epoch": 0.6338011627224147, + "grad_norm": 0.0, + "learning_rate": 6.246646379337689e-06, + "loss": 1.2686, + "step": 21586 + }, + { + "epoch": 0.6338305243995537, + "grad_norm": 0.0, + "learning_rate": 6.245764961535091e-06, + "loss": 1.332, + "step": 21587 + }, + { + "epoch": 0.6338598860766927, + "grad_norm": 0.0, + "learning_rate": 6.244883577683004e-06, + "loss": 1.3125, + "step": 21588 + }, + { + "epoch": 0.6338892477538317, + "grad_norm": 0.0, + "learning_rate": 6.244002227789392e-06, + "loss": 1.3027, + "step": 21589 + }, + { + "epoch": 0.6339186094309707, + "grad_norm": 0.0, + "learning_rate": 6.2431209118622335e-06, + "loss": 1.3564, + "step": 21590 + }, + { + "epoch": 0.6339479711081097, + "grad_norm": 0.0, + "learning_rate": 6.242239629909494e-06, + "loss": 1.2812, + "step": 21591 + }, + { + "epoch": 0.6339773327852487, + "grad_norm": 0.0, + "learning_rate": 6.241358381939144e-06, + "loss": 1.3184, + "step": 21592 + }, + { + "epoch": 0.6340066944623877, + "grad_norm": 0.0, + "learning_rate": 6.24047716795915e-06, + "loss": 1.1274, + "step": 21593 + }, + { + "epoch": 0.6340360561395267, + "grad_norm": 0.0, + "learning_rate": 6.239595987977489e-06, + "loss": 1.2729, + "step": 21594 + }, + { + "epoch": 0.6340654178166657, + "grad_norm": 0.0, + "learning_rate": 6.2387148420021205e-06, + "loss": 1.2891, + "step": 21595 + }, + { + "epoch": 0.6340947794938047, + "grad_norm": 0.0, + "learning_rate": 6.237833730041018e-06, + "loss": 1.249, + "step": 21596 + }, + { + "epoch": 0.6341241411709437, + "grad_norm": 0.0, + "learning_rate": 6.236952652102151e-06, + "loss": 1.2012, + "step": 21597 + }, + { + "epoch": 0.6341535028480827, + "grad_norm": 0.0, + "learning_rate": 6.236071608193482e-06, + "loss": 1.3223, + "step": 21598 + }, + { + "epoch": 0.6341828645252217, + "grad_norm": 0.0, + "learning_rate": 6.235190598322983e-06, + "loss": 1.2998, + "step": 21599 + }, + { + "epoch": 0.6342122262023607, + "grad_norm": 0.0, + "learning_rate": 6.234309622498616e-06, + "loss": 1.2085, + "step": 21600 + }, + { + "epoch": 0.6342415878794997, + "grad_norm": 0.0, + "learning_rate": 6.233428680728356e-06, + "loss": 1.1934, + "step": 21601 + }, + { + "epoch": 0.6342709495566387, + "grad_norm": 0.0, + "learning_rate": 6.232547773020163e-06, + "loss": 1.3115, + "step": 21602 + }, + { + "epoch": 0.6343003112337777, + "grad_norm": 0.0, + "learning_rate": 6.231666899382007e-06, + "loss": 1.2266, + "step": 21603 + }, + { + "epoch": 0.6343296729109167, + "grad_norm": 0.0, + "learning_rate": 6.230786059821847e-06, + "loss": 1.1104, + "step": 21604 + }, + { + "epoch": 0.6343590345880556, + "grad_norm": 0.0, + "learning_rate": 6.2299052543476605e-06, + "loss": 1.1978, + "step": 21605 + }, + { + "epoch": 0.6343883962651947, + "grad_norm": 0.0, + "learning_rate": 6.229024482967405e-06, + "loss": 1.2007, + "step": 21606 + }, + { + "epoch": 0.6344177579423337, + "grad_norm": 0.0, + "learning_rate": 6.228143745689044e-06, + "loss": 1.2451, + "step": 21607 + }, + { + "epoch": 0.6344471196194726, + "grad_norm": 0.0, + "learning_rate": 6.227263042520548e-06, + "loss": 1.2056, + "step": 21608 + }, + { + "epoch": 0.6344764812966117, + "grad_norm": 0.0, + "learning_rate": 6.226382373469875e-06, + "loss": 1.1797, + "step": 21609 + }, + { + "epoch": 0.6345058429737507, + "grad_norm": 0.0, + "learning_rate": 6.225501738544996e-06, + "loss": 1.2471, + "step": 21610 + }, + { + "epoch": 0.6345352046508896, + "grad_norm": 0.0, + "learning_rate": 6.22462113775387e-06, + "loss": 1.3633, + "step": 21611 + }, + { + "epoch": 0.6345645663280287, + "grad_norm": 0.0, + "learning_rate": 6.223740571104463e-06, + "loss": 1.1392, + "step": 21612 + }, + { + "epoch": 0.6345939280051677, + "grad_norm": 0.0, + "learning_rate": 6.222860038604734e-06, + "loss": 1.1719, + "step": 21613 + }, + { + "epoch": 0.6346232896823066, + "grad_norm": 0.0, + "learning_rate": 6.221979540262653e-06, + "loss": 1.2412, + "step": 21614 + }, + { + "epoch": 0.6346526513594456, + "grad_norm": 0.0, + "learning_rate": 6.2210990760861755e-06, + "loss": 1.1768, + "step": 21615 + }, + { + "epoch": 0.6346820130365847, + "grad_norm": 0.0, + "learning_rate": 6.2202186460832695e-06, + "loss": 1.3701, + "step": 21616 + }, + { + "epoch": 0.6347113747137236, + "grad_norm": 0.0, + "learning_rate": 6.2193382502618945e-06, + "loss": 1.2588, + "step": 21617 + }, + { + "epoch": 0.6347407363908626, + "grad_norm": 0.0, + "learning_rate": 6.218457888630007e-06, + "loss": 1.2734, + "step": 21618 + }, + { + "epoch": 0.6347700980680017, + "grad_norm": 0.0, + "learning_rate": 6.217577561195581e-06, + "loss": 1.3096, + "step": 21619 + }, + { + "epoch": 0.6347994597451406, + "grad_norm": 0.0, + "learning_rate": 6.2166972679665625e-06, + "loss": 1.2705, + "step": 21620 + }, + { + "epoch": 0.6348288214222796, + "grad_norm": 0.0, + "learning_rate": 6.215817008950924e-06, + "loss": 1.0703, + "step": 21621 + }, + { + "epoch": 0.6348581830994187, + "grad_norm": 0.0, + "learning_rate": 6.21493678415662e-06, + "loss": 1.4131, + "step": 21622 + }, + { + "epoch": 0.6348875447765576, + "grad_norm": 0.0, + "learning_rate": 6.2140565935916135e-06, + "loss": 1.2559, + "step": 21623 + }, + { + "epoch": 0.6349169064536966, + "grad_norm": 0.0, + "learning_rate": 6.213176437263859e-06, + "loss": 1.3535, + "step": 21624 + }, + { + "epoch": 0.6349462681308357, + "grad_norm": 0.0, + "learning_rate": 6.212296315181323e-06, + "loss": 1.4258, + "step": 21625 + }, + { + "epoch": 0.6349756298079746, + "grad_norm": 0.0, + "learning_rate": 6.2114162273519626e-06, + "loss": 1.1729, + "step": 21626 + }, + { + "epoch": 0.6350049914851136, + "grad_norm": 0.0, + "learning_rate": 6.210536173783732e-06, + "loss": 1.3408, + "step": 21627 + }, + { + "epoch": 0.6350343531622527, + "grad_norm": 0.0, + "learning_rate": 6.209656154484597e-06, + "loss": 1.1797, + "step": 21628 + }, + { + "epoch": 0.6350637148393916, + "grad_norm": 0.0, + "learning_rate": 6.208776169462507e-06, + "loss": 1.144, + "step": 21629 + }, + { + "epoch": 0.6350930765165306, + "grad_norm": 0.0, + "learning_rate": 6.207896218725429e-06, + "loss": 1.2578, + "step": 21630 + }, + { + "epoch": 0.6351224381936696, + "grad_norm": 0.0, + "learning_rate": 6.207016302281315e-06, + "loss": 1.1704, + "step": 21631 + }, + { + "epoch": 0.6351517998708086, + "grad_norm": 0.0, + "learning_rate": 6.206136420138127e-06, + "loss": 1.2637, + "step": 21632 + }, + { + "epoch": 0.6351811615479476, + "grad_norm": 0.0, + "learning_rate": 6.205256572303813e-06, + "loss": 1.2275, + "step": 21633 + }, + { + "epoch": 0.6352105232250866, + "grad_norm": 0.0, + "learning_rate": 6.204376758786341e-06, + "loss": 1.2422, + "step": 21634 + }, + { + "epoch": 0.6352398849022256, + "grad_norm": 0.0, + "learning_rate": 6.203496979593659e-06, + "loss": 1.2822, + "step": 21635 + }, + { + "epoch": 0.6352692465793646, + "grad_norm": 0.0, + "learning_rate": 6.20261723473373e-06, + "loss": 1.2046, + "step": 21636 + }, + { + "epoch": 0.6352986082565036, + "grad_norm": 0.0, + "learning_rate": 6.201737524214504e-06, + "loss": 1.334, + "step": 21637 + }, + { + "epoch": 0.6353279699336426, + "grad_norm": 0.0, + "learning_rate": 6.200857848043935e-06, + "loss": 1.2446, + "step": 21638 + }, + { + "epoch": 0.6353573316107816, + "grad_norm": 0.0, + "learning_rate": 6.199978206229986e-06, + "loss": 1.2031, + "step": 21639 + }, + { + "epoch": 0.6353866932879206, + "grad_norm": 0.0, + "learning_rate": 6.199098598780604e-06, + "loss": 1.2617, + "step": 21640 + }, + { + "epoch": 0.6354160549650596, + "grad_norm": 0.0, + "learning_rate": 6.19821902570375e-06, + "loss": 1.1846, + "step": 21641 + }, + { + "epoch": 0.6354454166421986, + "grad_norm": 0.0, + "learning_rate": 6.19733948700737e-06, + "loss": 1.3223, + "step": 21642 + }, + { + "epoch": 0.6354747783193376, + "grad_norm": 0.0, + "learning_rate": 6.196459982699428e-06, + "loss": 1.2334, + "step": 21643 + }, + { + "epoch": 0.6355041399964766, + "grad_norm": 0.0, + "learning_rate": 6.1955805127878694e-06, + "loss": 1.3301, + "step": 21644 + }, + { + "epoch": 0.6355335016736156, + "grad_norm": 0.0, + "learning_rate": 6.194701077280654e-06, + "loss": 1.2363, + "step": 21645 + }, + { + "epoch": 0.6355628633507546, + "grad_norm": 0.0, + "learning_rate": 6.193821676185729e-06, + "loss": 1.1816, + "step": 21646 + }, + { + "epoch": 0.6355922250278936, + "grad_norm": 0.0, + "learning_rate": 6.192942309511049e-06, + "loss": 1.2188, + "step": 21647 + }, + { + "epoch": 0.6356215867050325, + "grad_norm": 0.0, + "learning_rate": 6.192062977264569e-06, + "loss": 1.1968, + "step": 21648 + }, + { + "epoch": 0.6356509483821716, + "grad_norm": 0.0, + "learning_rate": 6.191183679454233e-06, + "loss": 1.2832, + "step": 21649 + }, + { + "epoch": 0.6356803100593106, + "grad_norm": 0.0, + "learning_rate": 6.190304416088004e-06, + "loss": 1.2744, + "step": 21650 + }, + { + "epoch": 0.6357096717364495, + "grad_norm": 0.0, + "learning_rate": 6.189425187173825e-06, + "loss": 1.124, + "step": 21651 + }, + { + "epoch": 0.6357390334135886, + "grad_norm": 0.0, + "learning_rate": 6.188545992719654e-06, + "loss": 1.3018, + "step": 21652 + }, + { + "epoch": 0.6357683950907276, + "grad_norm": 0.0, + "learning_rate": 6.187666832733432e-06, + "loss": 1.3213, + "step": 21653 + }, + { + "epoch": 0.6357977567678665, + "grad_norm": 0.0, + "learning_rate": 6.186787707223119e-06, + "loss": 1.3252, + "step": 21654 + }, + { + "epoch": 0.6358271184450056, + "grad_norm": 0.0, + "learning_rate": 6.185908616196661e-06, + "loss": 1.3516, + "step": 21655 + }, + { + "epoch": 0.6358564801221446, + "grad_norm": 0.0, + "learning_rate": 6.185029559662008e-06, + "loss": 1.2627, + "step": 21656 + }, + { + "epoch": 0.6358858417992835, + "grad_norm": 0.0, + "learning_rate": 6.18415053762711e-06, + "loss": 1.2402, + "step": 21657 + }, + { + "epoch": 0.6359152034764226, + "grad_norm": 0.0, + "learning_rate": 6.183271550099912e-06, + "loss": 1.375, + "step": 21658 + }, + { + "epoch": 0.6359445651535616, + "grad_norm": 0.0, + "learning_rate": 6.182392597088371e-06, + "loss": 1.2588, + "step": 21659 + }, + { + "epoch": 0.6359739268307005, + "grad_norm": 0.0, + "learning_rate": 6.18151367860043e-06, + "loss": 1.21, + "step": 21660 + }, + { + "epoch": 0.6360032885078396, + "grad_norm": 0.0, + "learning_rate": 6.18063479464404e-06, + "loss": 1.1626, + "step": 21661 + }, + { + "epoch": 0.6360326501849786, + "grad_norm": 0.0, + "learning_rate": 6.179755945227145e-06, + "loss": 1.2324, + "step": 21662 + }, + { + "epoch": 0.6360620118621175, + "grad_norm": 0.0, + "learning_rate": 6.178877130357699e-06, + "loss": 1.1641, + "step": 21663 + }, + { + "epoch": 0.6360913735392566, + "grad_norm": 0.0, + "learning_rate": 6.1779983500436435e-06, + "loss": 1.2012, + "step": 21664 + }, + { + "epoch": 0.6361207352163956, + "grad_norm": 0.0, + "learning_rate": 6.1771196042929295e-06, + "loss": 1.1821, + "step": 21665 + }, + { + "epoch": 0.6361500968935345, + "grad_norm": 0.0, + "learning_rate": 6.176240893113501e-06, + "loss": 1.1582, + "step": 21666 + }, + { + "epoch": 0.6361794585706736, + "grad_norm": 0.0, + "learning_rate": 6.175362216513303e-06, + "loss": 1.1406, + "step": 21667 + }, + { + "epoch": 0.6362088202478126, + "grad_norm": 0.0, + "learning_rate": 6.174483574500287e-06, + "loss": 1.2476, + "step": 21668 + }, + { + "epoch": 0.6362381819249515, + "grad_norm": 0.0, + "learning_rate": 6.173604967082394e-06, + "loss": 1.3018, + "step": 21669 + }, + { + "epoch": 0.6362675436020906, + "grad_norm": 0.0, + "learning_rate": 6.172726394267574e-06, + "loss": 1.1387, + "step": 21670 + }, + { + "epoch": 0.6362969052792296, + "grad_norm": 0.0, + "learning_rate": 6.171847856063766e-06, + "loss": 1.231, + "step": 21671 + }, + { + "epoch": 0.6363262669563685, + "grad_norm": 0.0, + "learning_rate": 6.170969352478921e-06, + "loss": 1.2563, + "step": 21672 + }, + { + "epoch": 0.6363556286335076, + "grad_norm": 0.0, + "learning_rate": 6.1700908835209774e-06, + "loss": 1.1963, + "step": 21673 + }, + { + "epoch": 0.6363849903106465, + "grad_norm": 0.0, + "learning_rate": 6.1692124491978854e-06, + "loss": 1.3057, + "step": 21674 + }, + { + "epoch": 0.6364143519877855, + "grad_norm": 0.0, + "learning_rate": 6.168334049517588e-06, + "loss": 1.2261, + "step": 21675 + }, + { + "epoch": 0.6364437136649246, + "grad_norm": 0.0, + "learning_rate": 6.167455684488024e-06, + "loss": 1.334, + "step": 21676 + }, + { + "epoch": 0.6364730753420635, + "grad_norm": 0.0, + "learning_rate": 6.1665773541171415e-06, + "loss": 1.3047, + "step": 21677 + }, + { + "epoch": 0.6365024370192025, + "grad_norm": 0.0, + "learning_rate": 6.1656990584128785e-06, + "loss": 1.1807, + "step": 21678 + }, + { + "epoch": 0.6365317986963416, + "grad_norm": 0.0, + "learning_rate": 6.164820797383184e-06, + "loss": 1.2168, + "step": 21679 + }, + { + "epoch": 0.6365611603734805, + "grad_norm": 0.0, + "learning_rate": 6.163942571035997e-06, + "loss": 1.2334, + "step": 21680 + }, + { + "epoch": 0.6365905220506195, + "grad_norm": 0.0, + "learning_rate": 6.16306437937926e-06, + "loss": 1.252, + "step": 21681 + }, + { + "epoch": 0.6366198837277586, + "grad_norm": 0.0, + "learning_rate": 6.16218622242091e-06, + "loss": 1.2598, + "step": 21682 + }, + { + "epoch": 0.6366492454048975, + "grad_norm": 0.0, + "learning_rate": 6.161308100168899e-06, + "loss": 1.2524, + "step": 21683 + }, + { + "epoch": 0.6366786070820365, + "grad_norm": 0.0, + "learning_rate": 6.160430012631158e-06, + "loss": 1.1943, + "step": 21684 + }, + { + "epoch": 0.6367079687591756, + "grad_norm": 0.0, + "learning_rate": 6.159551959815635e-06, + "loss": 1.1782, + "step": 21685 + }, + { + "epoch": 0.6367373304363145, + "grad_norm": 0.0, + "learning_rate": 6.158673941730266e-06, + "loss": 1.2578, + "step": 21686 + }, + { + "epoch": 0.6367666921134535, + "grad_norm": 0.0, + "learning_rate": 6.157795958382991e-06, + "loss": 1.0547, + "step": 21687 + }, + { + "epoch": 0.6367960537905926, + "grad_norm": 0.0, + "learning_rate": 6.156918009781752e-06, + "loss": 1.1792, + "step": 21688 + }, + { + "epoch": 0.6368254154677315, + "grad_norm": 0.0, + "learning_rate": 6.156040095934488e-06, + "loss": 1.21, + "step": 21689 + }, + { + "epoch": 0.6368547771448705, + "grad_norm": 0.0, + "learning_rate": 6.155162216849138e-06, + "loss": 1.3008, + "step": 21690 + }, + { + "epoch": 0.6368841388220096, + "grad_norm": 0.0, + "learning_rate": 6.154284372533638e-06, + "loss": 1.3428, + "step": 21691 + }, + { + "epoch": 0.6369135004991485, + "grad_norm": 0.0, + "learning_rate": 6.153406562995931e-06, + "loss": 1.3037, + "step": 21692 + }, + { + "epoch": 0.6369428621762875, + "grad_norm": 0.0, + "learning_rate": 6.152528788243955e-06, + "loss": 1.1631, + "step": 21693 + }, + { + "epoch": 0.6369722238534266, + "grad_norm": 0.0, + "learning_rate": 6.151651048285648e-06, + "loss": 1.2266, + "step": 21694 + }, + { + "epoch": 0.6370015855305655, + "grad_norm": 0.0, + "learning_rate": 6.150773343128944e-06, + "loss": 1.334, + "step": 21695 + }, + { + "epoch": 0.6370309472077045, + "grad_norm": 0.0, + "learning_rate": 6.1498956727817815e-06, + "loss": 1.2656, + "step": 21696 + }, + { + "epoch": 0.6370603088848436, + "grad_norm": 0.0, + "learning_rate": 6.1490180372521e-06, + "loss": 1.3013, + "step": 21697 + }, + { + "epoch": 0.6370896705619825, + "grad_norm": 0.0, + "learning_rate": 6.1481404365478315e-06, + "loss": 1.2432, + "step": 21698 + }, + { + "epoch": 0.6371190322391215, + "grad_norm": 0.0, + "learning_rate": 6.14726287067692e-06, + "loss": 1.1782, + "step": 21699 + }, + { + "epoch": 0.6371483939162605, + "grad_norm": 0.0, + "learning_rate": 6.146385339647294e-06, + "loss": 1.2891, + "step": 21700 + }, + { + "epoch": 0.6371777555933995, + "grad_norm": 0.0, + "learning_rate": 6.1455078434668935e-06, + "loss": 1.3086, + "step": 21701 + }, + { + "epoch": 0.6372071172705385, + "grad_norm": 0.0, + "learning_rate": 6.144630382143651e-06, + "loss": 1.2021, + "step": 21702 + }, + { + "epoch": 0.6372364789476775, + "grad_norm": 0.0, + "learning_rate": 6.143752955685505e-06, + "loss": 1.188, + "step": 21703 + }, + { + "epoch": 0.6372658406248165, + "grad_norm": 0.0, + "learning_rate": 6.14287556410039e-06, + "loss": 1.1621, + "step": 21704 + }, + { + "epoch": 0.6372952023019555, + "grad_norm": 0.0, + "learning_rate": 6.141998207396237e-06, + "loss": 1.3672, + "step": 21705 + }, + { + "epoch": 0.6373245639790945, + "grad_norm": 0.0, + "learning_rate": 6.141120885580985e-06, + "loss": 1.4111, + "step": 21706 + }, + { + "epoch": 0.6373539256562335, + "grad_norm": 0.0, + "learning_rate": 6.1402435986625586e-06, + "loss": 1.2363, + "step": 21707 + }, + { + "epoch": 0.6373832873333725, + "grad_norm": 0.0, + "learning_rate": 6.139366346648904e-06, + "loss": 1.2021, + "step": 21708 + }, + { + "epoch": 0.6374126490105115, + "grad_norm": 0.0, + "learning_rate": 6.138489129547945e-06, + "loss": 1.2402, + "step": 21709 + }, + { + "epoch": 0.6374420106876505, + "grad_norm": 0.0, + "learning_rate": 6.137611947367621e-06, + "loss": 1.2314, + "step": 21710 + }, + { + "epoch": 0.6374713723647895, + "grad_norm": 0.0, + "learning_rate": 6.136734800115856e-06, + "loss": 1.3618, + "step": 21711 + }, + { + "epoch": 0.6375007340419285, + "grad_norm": 0.0, + "learning_rate": 6.135857687800593e-06, + "loss": 1.1855, + "step": 21712 + }, + { + "epoch": 0.6375300957190675, + "grad_norm": 0.0, + "learning_rate": 6.134980610429755e-06, + "loss": 1.2568, + "step": 21713 + }, + { + "epoch": 0.6375594573962065, + "grad_norm": 0.0, + "learning_rate": 6.1341035680112804e-06, + "loss": 0.9932, + "step": 21714 + }, + { + "epoch": 0.6375888190733454, + "grad_norm": 0.0, + "learning_rate": 6.133226560553097e-06, + "loss": 1.1421, + "step": 21715 + }, + { + "epoch": 0.6376181807504845, + "grad_norm": 0.0, + "learning_rate": 6.132349588063132e-06, + "loss": 1.2529, + "step": 21716 + }, + { + "epoch": 0.6376475424276234, + "grad_norm": 0.0, + "learning_rate": 6.131472650549324e-06, + "loss": 1.3818, + "step": 21717 + }, + { + "epoch": 0.6376769041047624, + "grad_norm": 0.0, + "learning_rate": 6.130595748019599e-06, + "loss": 1.1328, + "step": 21718 + }, + { + "epoch": 0.6377062657819015, + "grad_norm": 0.0, + "learning_rate": 6.129718880481888e-06, + "loss": 1.3311, + "step": 21719 + }, + { + "epoch": 0.6377356274590404, + "grad_norm": 0.0, + "learning_rate": 6.128842047944117e-06, + "loss": 1.2441, + "step": 21720 + }, + { + "epoch": 0.6377649891361794, + "grad_norm": 0.0, + "learning_rate": 6.127965250414225e-06, + "loss": 1.2002, + "step": 21721 + }, + { + "epoch": 0.6377943508133185, + "grad_norm": 0.0, + "learning_rate": 6.127088487900128e-06, + "loss": 1.251, + "step": 21722 + }, + { + "epoch": 0.6378237124904574, + "grad_norm": 0.0, + "learning_rate": 6.126211760409766e-06, + "loss": 1.2539, + "step": 21723 + }, + { + "epoch": 0.6378530741675964, + "grad_norm": 0.0, + "learning_rate": 6.125335067951064e-06, + "loss": 1.3008, + "step": 21724 + }, + { + "epoch": 0.6378824358447355, + "grad_norm": 0.0, + "learning_rate": 6.124458410531946e-06, + "loss": 1.2808, + "step": 21725 + }, + { + "epoch": 0.6379117975218744, + "grad_norm": 0.0, + "learning_rate": 6.123581788160346e-06, + "loss": 1.1987, + "step": 21726 + }, + { + "epoch": 0.6379411591990134, + "grad_norm": 0.0, + "learning_rate": 6.122705200844184e-06, + "loss": 1.187, + "step": 21727 + }, + { + "epoch": 0.6379705208761525, + "grad_norm": 0.0, + "learning_rate": 6.121828648591397e-06, + "loss": 1.3516, + "step": 21728 + }, + { + "epoch": 0.6379998825532914, + "grad_norm": 0.0, + "learning_rate": 6.1209521314099026e-06, + "loss": 1.1372, + "step": 21729 + }, + { + "epoch": 0.6380292442304304, + "grad_norm": 0.0, + "learning_rate": 6.120075649307635e-06, + "loss": 1.147, + "step": 21730 + }, + { + "epoch": 0.6380586059075695, + "grad_norm": 0.0, + "learning_rate": 6.119199202292514e-06, + "loss": 1.2554, + "step": 21731 + }, + { + "epoch": 0.6380879675847084, + "grad_norm": 0.0, + "learning_rate": 6.11832279037247e-06, + "loss": 1.3643, + "step": 21732 + }, + { + "epoch": 0.6381173292618474, + "grad_norm": 0.0, + "learning_rate": 6.117446413555428e-06, + "loss": 1.1582, + "step": 21733 + }, + { + "epoch": 0.6381466909389865, + "grad_norm": 0.0, + "learning_rate": 6.116570071849313e-06, + "loss": 1.2969, + "step": 21734 + }, + { + "epoch": 0.6381760526161254, + "grad_norm": 0.0, + "learning_rate": 6.115693765262048e-06, + "loss": 1.2998, + "step": 21735 + }, + { + "epoch": 0.6382054142932644, + "grad_norm": 0.0, + "learning_rate": 6.114817493801557e-06, + "loss": 1.1885, + "step": 21736 + }, + { + "epoch": 0.6382347759704035, + "grad_norm": 0.0, + "learning_rate": 6.113941257475769e-06, + "loss": 1.1348, + "step": 21737 + }, + { + "epoch": 0.6382641376475424, + "grad_norm": 0.0, + "learning_rate": 6.113065056292604e-06, + "loss": 1.2393, + "step": 21738 + }, + { + "epoch": 0.6382934993246814, + "grad_norm": 0.0, + "learning_rate": 6.11218889025999e-06, + "loss": 1.3379, + "step": 21739 + }, + { + "epoch": 0.6383228610018205, + "grad_norm": 0.0, + "learning_rate": 6.1113127593858435e-06, + "loss": 1.208, + "step": 21740 + }, + { + "epoch": 0.6383522226789594, + "grad_norm": 0.0, + "learning_rate": 6.1104366636780944e-06, + "loss": 1.2607, + "step": 21741 + }, + { + "epoch": 0.6383815843560984, + "grad_norm": 0.0, + "learning_rate": 6.109560603144662e-06, + "loss": 1.3672, + "step": 21742 + }, + { + "epoch": 0.6384109460332374, + "grad_norm": 0.0, + "learning_rate": 6.10868457779347e-06, + "loss": 1.2017, + "step": 21743 + }, + { + "epoch": 0.6384403077103764, + "grad_norm": 0.0, + "learning_rate": 6.1078085876324415e-06, + "loss": 1.1729, + "step": 21744 + }, + { + "epoch": 0.6384696693875154, + "grad_norm": 0.0, + "learning_rate": 6.106932632669491e-06, + "loss": 1.3486, + "step": 21745 + }, + { + "epoch": 0.6384990310646544, + "grad_norm": 0.0, + "learning_rate": 6.106056712912554e-06, + "loss": 1.2549, + "step": 21746 + }, + { + "epoch": 0.6385283927417934, + "grad_norm": 0.0, + "learning_rate": 6.105180828369535e-06, + "loss": 1.1826, + "step": 21747 + }, + { + "epoch": 0.6385577544189324, + "grad_norm": 0.0, + "learning_rate": 6.104304979048368e-06, + "loss": 1.2607, + "step": 21748 + }, + { + "epoch": 0.6385871160960714, + "grad_norm": 0.0, + "learning_rate": 6.103429164956968e-06, + "loss": 1.1772, + "step": 21749 + }, + { + "epoch": 0.6386164777732104, + "grad_norm": 0.0, + "learning_rate": 6.102553386103256e-06, + "loss": 1.2139, + "step": 21750 + }, + { + "epoch": 0.6386458394503494, + "grad_norm": 0.0, + "learning_rate": 6.10167764249515e-06, + "loss": 1.1294, + "step": 21751 + }, + { + "epoch": 0.6386752011274884, + "grad_norm": 0.0, + "learning_rate": 6.100801934140574e-06, + "loss": 1.1753, + "step": 21752 + }, + { + "epoch": 0.6387045628046274, + "grad_norm": 0.0, + "learning_rate": 6.099926261047443e-06, + "loss": 1.0698, + "step": 21753 + }, + { + "epoch": 0.6387339244817664, + "grad_norm": 0.0, + "learning_rate": 6.09905062322368e-06, + "loss": 1.25, + "step": 21754 + }, + { + "epoch": 0.6387632861589054, + "grad_norm": 0.0, + "learning_rate": 6.098175020677202e-06, + "loss": 1.2598, + "step": 21755 + }, + { + "epoch": 0.6387926478360444, + "grad_norm": 0.0, + "learning_rate": 6.097299453415922e-06, + "loss": 1.2666, + "step": 21756 + }, + { + "epoch": 0.6388220095131834, + "grad_norm": 0.0, + "learning_rate": 6.096423921447765e-06, + "loss": 1.3271, + "step": 21757 + }, + { + "epoch": 0.6388513711903224, + "grad_norm": 0.0, + "learning_rate": 6.095548424780645e-06, + "loss": 1.2031, + "step": 21758 + }, + { + "epoch": 0.6388807328674614, + "grad_norm": 0.0, + "learning_rate": 6.094672963422484e-06, + "loss": 1.2617, + "step": 21759 + }, + { + "epoch": 0.6389100945446003, + "grad_norm": 0.0, + "learning_rate": 6.09379753738119e-06, + "loss": 1.1602, + "step": 21760 + }, + { + "epoch": 0.6389394562217394, + "grad_norm": 0.0, + "learning_rate": 6.092922146664689e-06, + "loss": 1.1455, + "step": 21761 + }, + { + "epoch": 0.6389688178988784, + "grad_norm": 0.0, + "learning_rate": 6.0920467912808925e-06, + "loss": 1.2705, + "step": 21762 + }, + { + "epoch": 0.6389981795760173, + "grad_norm": 0.0, + "learning_rate": 6.09117147123772e-06, + "loss": 1.2734, + "step": 21763 + }, + { + "epoch": 0.6390275412531564, + "grad_norm": 0.0, + "learning_rate": 6.090296186543084e-06, + "loss": 1.1738, + "step": 21764 + }, + { + "epoch": 0.6390569029302954, + "grad_norm": 0.0, + "learning_rate": 6.089420937204897e-06, + "loss": 1.2197, + "step": 21765 + }, + { + "epoch": 0.6390862646074343, + "grad_norm": 0.0, + "learning_rate": 6.088545723231083e-06, + "loss": 1.2759, + "step": 21766 + }, + { + "epoch": 0.6391156262845734, + "grad_norm": 0.0, + "learning_rate": 6.087670544629549e-06, + "loss": 1.3027, + "step": 21767 + }, + { + "epoch": 0.6391449879617124, + "grad_norm": 0.0, + "learning_rate": 6.0867954014082144e-06, + "loss": 1.29, + "step": 21768 + }, + { + "epoch": 0.6391743496388513, + "grad_norm": 0.0, + "learning_rate": 6.085920293574987e-06, + "loss": 1.2842, + "step": 21769 + }, + { + "epoch": 0.6392037113159904, + "grad_norm": 0.0, + "learning_rate": 6.085045221137788e-06, + "loss": 1.3154, + "step": 21770 + }, + { + "epoch": 0.6392330729931294, + "grad_norm": 0.0, + "learning_rate": 6.084170184104528e-06, + "loss": 1.333, + "step": 21771 + }, + { + "epoch": 0.6392624346702683, + "grad_norm": 0.0, + "learning_rate": 6.08329518248312e-06, + "loss": 1.2354, + "step": 21772 + }, + { + "epoch": 0.6392917963474074, + "grad_norm": 0.0, + "learning_rate": 6.0824202162814776e-06, + "loss": 1.2563, + "step": 21773 + }, + { + "epoch": 0.6393211580245464, + "grad_norm": 0.0, + "learning_rate": 6.081545285507511e-06, + "loss": 1.4395, + "step": 21774 + }, + { + "epoch": 0.6393505197016853, + "grad_norm": 0.0, + "learning_rate": 6.080670390169134e-06, + "loss": 1.3838, + "step": 21775 + }, + { + "epoch": 0.6393798813788244, + "grad_norm": 0.0, + "learning_rate": 6.079795530274255e-06, + "loss": 1.2236, + "step": 21776 + }, + { + "epoch": 0.6394092430559634, + "grad_norm": 0.0, + "learning_rate": 6.078920705830794e-06, + "loss": 1.0801, + "step": 21777 + }, + { + "epoch": 0.6394386047331023, + "grad_norm": 0.0, + "learning_rate": 6.078045916846655e-06, + "loss": 1.1978, + "step": 21778 + }, + { + "epoch": 0.6394679664102414, + "grad_norm": 0.0, + "learning_rate": 6.077171163329752e-06, + "loss": 1.3047, + "step": 21779 + }, + { + "epoch": 0.6394973280873804, + "grad_norm": 0.0, + "learning_rate": 6.076296445287992e-06, + "loss": 1.376, + "step": 21780 + }, + { + "epoch": 0.6395266897645193, + "grad_norm": 0.0, + "learning_rate": 6.075421762729292e-06, + "loss": 1.2764, + "step": 21781 + }, + { + "epoch": 0.6395560514416584, + "grad_norm": 0.0, + "learning_rate": 6.074547115661556e-06, + "loss": 1.3672, + "step": 21782 + }, + { + "epoch": 0.6395854131187974, + "grad_norm": 0.0, + "learning_rate": 6.073672504092698e-06, + "loss": 1.2344, + "step": 21783 + }, + { + "epoch": 0.6396147747959363, + "grad_norm": 0.0, + "learning_rate": 6.072797928030625e-06, + "loss": 1.3457, + "step": 21784 + }, + { + "epoch": 0.6396441364730754, + "grad_norm": 0.0, + "learning_rate": 6.071923387483242e-06, + "loss": 1.1846, + "step": 21785 + }, + { + "epoch": 0.6396734981502143, + "grad_norm": 0.0, + "learning_rate": 6.071048882458466e-06, + "loss": 1.3389, + "step": 21786 + }, + { + "epoch": 0.6397028598273533, + "grad_norm": 0.0, + "learning_rate": 6.070174412964198e-06, + "loss": 1.3379, + "step": 21787 + }, + { + "epoch": 0.6397322215044924, + "grad_norm": 0.0, + "learning_rate": 6.069299979008353e-06, + "loss": 1.3545, + "step": 21788 + }, + { + "epoch": 0.6397615831816313, + "grad_norm": 0.0, + "learning_rate": 6.068425580598829e-06, + "loss": 1.2549, + "step": 21789 + }, + { + "epoch": 0.6397909448587703, + "grad_norm": 0.0, + "learning_rate": 6.0675512177435455e-06, + "loss": 1.2773, + "step": 21790 + }, + { + "epoch": 0.6398203065359094, + "grad_norm": 0.0, + "learning_rate": 6.0666768904504e-06, + "loss": 1.374, + "step": 21791 + }, + { + "epoch": 0.6398496682130483, + "grad_norm": 0.0, + "learning_rate": 6.0658025987273064e-06, + "loss": 1.1797, + "step": 21792 + }, + { + "epoch": 0.6398790298901873, + "grad_norm": 0.0, + "learning_rate": 6.064928342582166e-06, + "loss": 1.21, + "step": 21793 + }, + { + "epoch": 0.6399083915673264, + "grad_norm": 0.0, + "learning_rate": 6.064054122022882e-06, + "loss": 1.3145, + "step": 21794 + }, + { + "epoch": 0.6399377532444653, + "grad_norm": 0.0, + "learning_rate": 6.063179937057369e-06, + "loss": 1.0488, + "step": 21795 + }, + { + "epoch": 0.6399671149216043, + "grad_norm": 0.0, + "learning_rate": 6.0623057876935255e-06, + "loss": 1.3799, + "step": 21796 + }, + { + "epoch": 0.6399964765987434, + "grad_norm": 0.0, + "learning_rate": 6.061431673939262e-06, + "loss": 1.1992, + "step": 21797 + }, + { + "epoch": 0.6400258382758823, + "grad_norm": 0.0, + "learning_rate": 6.060557595802476e-06, + "loss": 1.2988, + "step": 21798 + }, + { + "epoch": 0.6400551999530213, + "grad_norm": 0.0, + "learning_rate": 6.059683553291085e-06, + "loss": 1.25, + "step": 21799 + }, + { + "epoch": 0.6400845616301604, + "grad_norm": 0.0, + "learning_rate": 6.058809546412976e-06, + "loss": 1.2715, + "step": 21800 + }, + { + "epoch": 0.6401139233072993, + "grad_norm": 0.0, + "learning_rate": 6.057935575176066e-06, + "loss": 1.2441, + "step": 21801 + }, + { + "epoch": 0.6401432849844383, + "grad_norm": 0.0, + "learning_rate": 6.057061639588253e-06, + "loss": 1.3398, + "step": 21802 + }, + { + "epoch": 0.6401726466615774, + "grad_norm": 0.0, + "learning_rate": 6.0561877396574435e-06, + "loss": 1.293, + "step": 21803 + }, + { + "epoch": 0.6402020083387163, + "grad_norm": 0.0, + "learning_rate": 6.0553138753915375e-06, + "loss": 1.3154, + "step": 21804 + }, + { + "epoch": 0.6402313700158553, + "grad_norm": 0.0, + "learning_rate": 6.054440046798435e-06, + "loss": 1.2852, + "step": 21805 + }, + { + "epoch": 0.6402607316929944, + "grad_norm": 0.0, + "learning_rate": 6.053566253886045e-06, + "loss": 1.2861, + "step": 21806 + }, + { + "epoch": 0.6402900933701333, + "grad_norm": 0.0, + "learning_rate": 6.052692496662265e-06, + "loss": 1.1484, + "step": 21807 + }, + { + "epoch": 0.6403194550472723, + "grad_norm": 0.0, + "learning_rate": 6.051818775134999e-06, + "loss": 1.415, + "step": 21808 + }, + { + "epoch": 0.6403488167244114, + "grad_norm": 0.0, + "learning_rate": 6.050945089312142e-06, + "loss": 1.2729, + "step": 21809 + }, + { + "epoch": 0.6403781784015503, + "grad_norm": 0.0, + "learning_rate": 6.050071439201606e-06, + "loss": 1.0674, + "step": 21810 + }, + { + "epoch": 0.6404075400786893, + "grad_norm": 0.0, + "learning_rate": 6.0491978248112816e-06, + "loss": 1.2588, + "step": 21811 + }, + { + "epoch": 0.6404369017558283, + "grad_norm": 0.0, + "learning_rate": 6.048324246149076e-06, + "loss": 1.3145, + "step": 21812 + }, + { + "epoch": 0.6404662634329673, + "grad_norm": 0.0, + "learning_rate": 6.047450703222887e-06, + "loss": 1.2568, + "step": 21813 + }, + { + "epoch": 0.6404956251101063, + "grad_norm": 0.0, + "learning_rate": 6.046577196040608e-06, + "loss": 1.3027, + "step": 21814 + }, + { + "epoch": 0.6405249867872453, + "grad_norm": 0.0, + "learning_rate": 6.04570372461015e-06, + "loss": 1.2168, + "step": 21815 + }, + { + "epoch": 0.6405543484643843, + "grad_norm": 0.0, + "learning_rate": 6.044830288939403e-06, + "loss": 1.3926, + "step": 21816 + }, + { + "epoch": 0.6405837101415233, + "grad_norm": 0.0, + "learning_rate": 6.043956889036269e-06, + "loss": 1.1978, + "step": 21817 + }, + { + "epoch": 0.6406130718186622, + "grad_norm": 0.0, + "learning_rate": 6.043083524908644e-06, + "loss": 1.3018, + "step": 21818 + }, + { + "epoch": 0.6406424334958013, + "grad_norm": 0.0, + "learning_rate": 6.042210196564431e-06, + "loss": 1.2568, + "step": 21819 + }, + { + "epoch": 0.6406717951729403, + "grad_norm": 0.0, + "learning_rate": 6.041336904011523e-06, + "loss": 1.3291, + "step": 21820 + }, + { + "epoch": 0.6407011568500792, + "grad_norm": 0.0, + "learning_rate": 6.0404636472578216e-06, + "loss": 1.2588, + "step": 21821 + }, + { + "epoch": 0.6407305185272183, + "grad_norm": 0.0, + "learning_rate": 6.039590426311217e-06, + "loss": 1.2461, + "step": 21822 + }, + { + "epoch": 0.6407598802043573, + "grad_norm": 0.0, + "learning_rate": 6.038717241179615e-06, + "loss": 1.2695, + "step": 21823 + }, + { + "epoch": 0.6407892418814962, + "grad_norm": 0.0, + "learning_rate": 6.037844091870909e-06, + "loss": 1.2627, + "step": 21824 + }, + { + "epoch": 0.6408186035586353, + "grad_norm": 0.0, + "learning_rate": 6.036970978392989e-06, + "loss": 1.2939, + "step": 21825 + }, + { + "epoch": 0.6408479652357743, + "grad_norm": 0.0, + "learning_rate": 6.036097900753757e-06, + "loss": 1.2134, + "step": 21826 + }, + { + "epoch": 0.6408773269129132, + "grad_norm": 0.0, + "learning_rate": 6.035224858961106e-06, + "loss": 1.3066, + "step": 21827 + }, + { + "epoch": 0.6409066885900523, + "grad_norm": 0.0, + "learning_rate": 6.034351853022935e-06, + "loss": 1.2158, + "step": 21828 + }, + { + "epoch": 0.6409360502671912, + "grad_norm": 0.0, + "learning_rate": 6.03347888294713e-06, + "loss": 1.2026, + "step": 21829 + }, + { + "epoch": 0.6409654119443302, + "grad_norm": 0.0, + "learning_rate": 6.0326059487415964e-06, + "loss": 1.2656, + "step": 21830 + }, + { + "epoch": 0.6409947736214693, + "grad_norm": 0.0, + "learning_rate": 6.031733050414222e-06, + "loss": 1.3037, + "step": 21831 + }, + { + "epoch": 0.6410241352986082, + "grad_norm": 0.0, + "learning_rate": 6.030860187972903e-06, + "loss": 1.1763, + "step": 21832 + }, + { + "epoch": 0.6410534969757472, + "grad_norm": 0.0, + "learning_rate": 6.029987361425531e-06, + "loss": 1.1128, + "step": 21833 + }, + { + "epoch": 0.6410828586528863, + "grad_norm": 0.0, + "learning_rate": 6.029114570779997e-06, + "loss": 1.144, + "step": 21834 + }, + { + "epoch": 0.6411122203300252, + "grad_norm": 0.0, + "learning_rate": 6.0282418160442e-06, + "loss": 1.0576, + "step": 21835 + }, + { + "epoch": 0.6411415820071642, + "grad_norm": 0.0, + "learning_rate": 6.027369097226028e-06, + "loss": 1.2446, + "step": 21836 + }, + { + "epoch": 0.6411709436843033, + "grad_norm": 0.0, + "learning_rate": 6.026496414333377e-06, + "loss": 1.2617, + "step": 21837 + }, + { + "epoch": 0.6412003053614422, + "grad_norm": 0.0, + "learning_rate": 6.025623767374133e-06, + "loss": 1.3584, + "step": 21838 + }, + { + "epoch": 0.6412296670385812, + "grad_norm": 0.0, + "learning_rate": 6.024751156356194e-06, + "loss": 1.2461, + "step": 21839 + }, + { + "epoch": 0.6412590287157203, + "grad_norm": 0.0, + "learning_rate": 6.023878581287447e-06, + "loss": 1.311, + "step": 21840 + }, + { + "epoch": 0.6412883903928592, + "grad_norm": 0.0, + "learning_rate": 6.023006042175787e-06, + "loss": 1.231, + "step": 21841 + }, + { + "epoch": 0.6413177520699982, + "grad_norm": 0.0, + "learning_rate": 6.0221335390291e-06, + "loss": 1.3281, + "step": 21842 + }, + { + "epoch": 0.6413471137471373, + "grad_norm": 0.0, + "learning_rate": 6.021261071855276e-06, + "loss": 1.2188, + "step": 21843 + }, + { + "epoch": 0.6413764754242762, + "grad_norm": 0.0, + "learning_rate": 6.020388640662209e-06, + "loss": 1.1992, + "step": 21844 + }, + { + "epoch": 0.6414058371014152, + "grad_norm": 0.0, + "learning_rate": 6.019516245457786e-06, + "loss": 1.3867, + "step": 21845 + }, + { + "epoch": 0.6414351987785543, + "grad_norm": 0.0, + "learning_rate": 6.0186438862499e-06, + "loss": 1.1748, + "step": 21846 + }, + { + "epoch": 0.6414645604556932, + "grad_norm": 0.0, + "learning_rate": 6.0177715630464315e-06, + "loss": 1.2598, + "step": 21847 + }, + { + "epoch": 0.6414939221328322, + "grad_norm": 0.0, + "learning_rate": 6.016899275855279e-06, + "loss": 1.2314, + "step": 21848 + }, + { + "epoch": 0.6415232838099713, + "grad_norm": 0.0, + "learning_rate": 6.016027024684326e-06, + "loss": 1.3984, + "step": 21849 + }, + { + "epoch": 0.6415526454871102, + "grad_norm": 0.0, + "learning_rate": 6.01515480954146e-06, + "loss": 1.1885, + "step": 21850 + }, + { + "epoch": 0.6415820071642492, + "grad_norm": 0.0, + "learning_rate": 6.0142826304345695e-06, + "loss": 1.1904, + "step": 21851 + }, + { + "epoch": 0.6416113688413883, + "grad_norm": 0.0, + "learning_rate": 6.013410487371543e-06, + "loss": 1.291, + "step": 21852 + }, + { + "epoch": 0.6416407305185272, + "grad_norm": 0.0, + "learning_rate": 6.012538380360266e-06, + "loss": 1.269, + "step": 21853 + }, + { + "epoch": 0.6416700921956662, + "grad_norm": 0.0, + "learning_rate": 6.011666309408624e-06, + "loss": 1.188, + "step": 21854 + }, + { + "epoch": 0.6416994538728052, + "grad_norm": 0.0, + "learning_rate": 6.010794274524508e-06, + "loss": 1.251, + "step": 21855 + }, + { + "epoch": 0.6417288155499442, + "grad_norm": 0.0, + "learning_rate": 6.009922275715798e-06, + "loss": 1.3008, + "step": 21856 + }, + { + "epoch": 0.6417581772270832, + "grad_norm": 0.0, + "learning_rate": 6.009050312990386e-06, + "loss": 1.1396, + "step": 21857 + }, + { + "epoch": 0.6417875389042222, + "grad_norm": 0.0, + "learning_rate": 6.008178386356149e-06, + "loss": 1.2266, + "step": 21858 + }, + { + "epoch": 0.6418169005813612, + "grad_norm": 0.0, + "learning_rate": 6.007306495820981e-06, + "loss": 1.3301, + "step": 21859 + }, + { + "epoch": 0.6418462622585002, + "grad_norm": 0.0, + "learning_rate": 6.006434641392762e-06, + "loss": 1.3359, + "step": 21860 + }, + { + "epoch": 0.6418756239356392, + "grad_norm": 0.0, + "learning_rate": 6.005562823079378e-06, + "loss": 1.1953, + "step": 21861 + }, + { + "epoch": 0.6419049856127782, + "grad_norm": 0.0, + "learning_rate": 6.004691040888714e-06, + "loss": 1.1987, + "step": 21862 + }, + { + "epoch": 0.6419343472899172, + "grad_norm": 0.0, + "learning_rate": 6.003819294828648e-06, + "loss": 1.189, + "step": 21863 + }, + { + "epoch": 0.6419637089670562, + "grad_norm": 0.0, + "learning_rate": 6.002947584907071e-06, + "loss": 1.2559, + "step": 21864 + }, + { + "epoch": 0.6419930706441952, + "grad_norm": 0.0, + "learning_rate": 6.002075911131859e-06, + "loss": 1.334, + "step": 21865 + }, + { + "epoch": 0.6420224323213342, + "grad_norm": 0.0, + "learning_rate": 6.001204273510903e-06, + "loss": 1.4258, + "step": 21866 + }, + { + "epoch": 0.6420517939984732, + "grad_norm": 0.0, + "learning_rate": 6.000332672052076e-06, + "loss": 1.1855, + "step": 21867 + }, + { + "epoch": 0.6420811556756122, + "grad_norm": 0.0, + "learning_rate": 5.999461106763268e-06, + "loss": 1.1797, + "step": 21868 + }, + { + "epoch": 0.6421105173527512, + "grad_norm": 0.0, + "learning_rate": 5.998589577652356e-06, + "loss": 1.0884, + "step": 21869 + }, + { + "epoch": 0.6421398790298902, + "grad_norm": 0.0, + "learning_rate": 5.997718084727226e-06, + "loss": 1.2754, + "step": 21870 + }, + { + "epoch": 0.6421692407070292, + "grad_norm": 0.0, + "learning_rate": 5.996846627995753e-06, + "loss": 1.3506, + "step": 21871 + }, + { + "epoch": 0.6421986023841681, + "grad_norm": 0.0, + "learning_rate": 5.995975207465824e-06, + "loss": 1.4629, + "step": 21872 + }, + { + "epoch": 0.6422279640613072, + "grad_norm": 0.0, + "learning_rate": 5.9951038231453165e-06, + "loss": 1.1704, + "step": 21873 + }, + { + "epoch": 0.6422573257384462, + "grad_norm": 0.0, + "learning_rate": 5.99423247504211e-06, + "loss": 1.4404, + "step": 21874 + }, + { + "epoch": 0.6422866874155851, + "grad_norm": 0.0, + "learning_rate": 5.993361163164086e-06, + "loss": 1.1372, + "step": 21875 + }, + { + "epoch": 0.6423160490927242, + "grad_norm": 0.0, + "learning_rate": 5.992489887519122e-06, + "loss": 1.2588, + "step": 21876 + }, + { + "epoch": 0.6423454107698632, + "grad_norm": 0.0, + "learning_rate": 5.9916186481150985e-06, + "loss": 1.1562, + "step": 21877 + }, + { + "epoch": 0.6423747724470021, + "grad_norm": 0.0, + "learning_rate": 5.990747444959892e-06, + "loss": 1.0825, + "step": 21878 + }, + { + "epoch": 0.6424041341241412, + "grad_norm": 0.0, + "learning_rate": 5.989876278061386e-06, + "loss": 1.1675, + "step": 21879 + }, + { + "epoch": 0.6424334958012802, + "grad_norm": 0.0, + "learning_rate": 5.989005147427455e-06, + "loss": 1.2251, + "step": 21880 + }, + { + "epoch": 0.6424628574784191, + "grad_norm": 0.0, + "learning_rate": 5.98813405306598e-06, + "loss": 1.2109, + "step": 21881 + }, + { + "epoch": 0.6424922191555582, + "grad_norm": 0.0, + "learning_rate": 5.987262994984835e-06, + "loss": 1.2812, + "step": 21882 + }, + { + "epoch": 0.6425215808326972, + "grad_norm": 0.0, + "learning_rate": 5.986391973191894e-06, + "loss": 1.2891, + "step": 21883 + }, + { + "epoch": 0.6425509425098361, + "grad_norm": 0.0, + "learning_rate": 5.985520987695043e-06, + "loss": 1.2422, + "step": 21884 + }, + { + "epoch": 0.6425803041869752, + "grad_norm": 0.0, + "learning_rate": 5.9846500385021515e-06, + "loss": 1.2744, + "step": 21885 + }, + { + "epoch": 0.6426096658641142, + "grad_norm": 0.0, + "learning_rate": 5.983779125621101e-06, + "loss": 1.1592, + "step": 21886 + }, + { + "epoch": 0.6426390275412531, + "grad_norm": 0.0, + "learning_rate": 5.982908249059759e-06, + "loss": 0.938, + "step": 21887 + }, + { + "epoch": 0.6426683892183922, + "grad_norm": 0.0, + "learning_rate": 5.982037408826011e-06, + "loss": 1.3584, + "step": 21888 + }, + { + "epoch": 0.6426977508955312, + "grad_norm": 0.0, + "learning_rate": 5.981166604927726e-06, + "loss": 1.292, + "step": 21889 + }, + { + "epoch": 0.6427271125726701, + "grad_norm": 0.0, + "learning_rate": 5.980295837372783e-06, + "loss": 1.3252, + "step": 21890 + }, + { + "epoch": 0.6427564742498092, + "grad_norm": 0.0, + "learning_rate": 5.979425106169054e-06, + "loss": 1.208, + "step": 21891 + }, + { + "epoch": 0.6427858359269482, + "grad_norm": 0.0, + "learning_rate": 5.978554411324409e-06, + "loss": 1.2446, + "step": 21892 + }, + { + "epoch": 0.6428151976040871, + "grad_norm": 0.0, + "learning_rate": 5.9776837528467304e-06, + "loss": 1.2231, + "step": 21893 + }, + { + "epoch": 0.6428445592812262, + "grad_norm": 0.0, + "learning_rate": 5.976813130743886e-06, + "loss": 1.3477, + "step": 21894 + }, + { + "epoch": 0.6428739209583652, + "grad_norm": 0.0, + "learning_rate": 5.975942545023754e-06, + "loss": 1.2944, + "step": 21895 + }, + { + "epoch": 0.6429032826355041, + "grad_norm": 0.0, + "learning_rate": 5.9750719956941985e-06, + "loss": 1.251, + "step": 21896 + }, + { + "epoch": 0.6429326443126432, + "grad_norm": 0.0, + "learning_rate": 5.9742014827631026e-06, + "loss": 1.2266, + "step": 21897 + }, + { + "epoch": 0.6429620059897821, + "grad_norm": 0.0, + "learning_rate": 5.973331006238331e-06, + "loss": 1.2871, + "step": 21898 + }, + { + "epoch": 0.6429913676669211, + "grad_norm": 0.0, + "learning_rate": 5.97246056612776e-06, + "loss": 1.2578, + "step": 21899 + }, + { + "epoch": 0.6430207293440602, + "grad_norm": 0.0, + "learning_rate": 5.9715901624392554e-06, + "loss": 1.3115, + "step": 21900 + }, + { + "epoch": 0.6430500910211991, + "grad_norm": 0.0, + "learning_rate": 5.9707197951807e-06, + "loss": 1.2393, + "step": 21901 + }, + { + "epoch": 0.6430794526983381, + "grad_norm": 0.0, + "learning_rate": 5.969849464359956e-06, + "loss": 1.1699, + "step": 21902 + }, + { + "epoch": 0.6431088143754772, + "grad_norm": 0.0, + "learning_rate": 5.96897916998489e-06, + "loss": 1.2236, + "step": 21903 + }, + { + "epoch": 0.6431381760526161, + "grad_norm": 0.0, + "learning_rate": 5.968108912063382e-06, + "loss": 1.4453, + "step": 21904 + }, + { + "epoch": 0.6431675377297551, + "grad_norm": 0.0, + "learning_rate": 5.9672386906032964e-06, + "loss": 1.333, + "step": 21905 + }, + { + "epoch": 0.6431968994068942, + "grad_norm": 0.0, + "learning_rate": 5.966368505612505e-06, + "loss": 1.2529, + "step": 21906 + }, + { + "epoch": 0.6432262610840331, + "grad_norm": 0.0, + "learning_rate": 5.965498357098872e-06, + "loss": 1.2363, + "step": 21907 + }, + { + "epoch": 0.6432556227611721, + "grad_norm": 0.0, + "learning_rate": 5.964628245070276e-06, + "loss": 1.1958, + "step": 21908 + }, + { + "epoch": 0.6432849844383112, + "grad_norm": 0.0, + "learning_rate": 5.963758169534577e-06, + "loss": 1.2891, + "step": 21909 + }, + { + "epoch": 0.6433143461154501, + "grad_norm": 0.0, + "learning_rate": 5.962888130499649e-06, + "loss": 1.3613, + "step": 21910 + }, + { + "epoch": 0.6433437077925891, + "grad_norm": 0.0, + "learning_rate": 5.962018127973358e-06, + "loss": 1.1982, + "step": 21911 + }, + { + "epoch": 0.6433730694697282, + "grad_norm": 0.0, + "learning_rate": 5.961148161963567e-06, + "loss": 1.3799, + "step": 21912 + }, + { + "epoch": 0.6434024311468671, + "grad_norm": 0.0, + "learning_rate": 5.960278232478152e-06, + "loss": 1.2393, + "step": 21913 + }, + { + "epoch": 0.6434317928240061, + "grad_norm": 0.0, + "learning_rate": 5.959408339524972e-06, + "loss": 1.2529, + "step": 21914 + }, + { + "epoch": 0.6434611545011452, + "grad_norm": 0.0, + "learning_rate": 5.958538483111901e-06, + "loss": 1.3828, + "step": 21915 + }, + { + "epoch": 0.6434905161782841, + "grad_norm": 0.0, + "learning_rate": 5.957668663246796e-06, + "loss": 1.1934, + "step": 21916 + }, + { + "epoch": 0.6435198778554231, + "grad_norm": 0.0, + "learning_rate": 5.9567988799375335e-06, + "loss": 1.1699, + "step": 21917 + }, + { + "epoch": 0.643549239532562, + "grad_norm": 0.0, + "learning_rate": 5.955929133191972e-06, + "loss": 1.271, + "step": 21918 + }, + { + "epoch": 0.6435786012097011, + "grad_norm": 0.0, + "learning_rate": 5.9550594230179815e-06, + "loss": 1.2852, + "step": 21919 + }, + { + "epoch": 0.6436079628868401, + "grad_norm": 0.0, + "learning_rate": 5.95418974942342e-06, + "loss": 1.2183, + "step": 21920 + }, + { + "epoch": 0.643637324563979, + "grad_norm": 0.0, + "learning_rate": 5.953320112416161e-06, + "loss": 1.252, + "step": 21921 + }, + { + "epoch": 0.6436666862411181, + "grad_norm": 0.0, + "learning_rate": 5.952450512004064e-06, + "loss": 1.1416, + "step": 21922 + }, + { + "epoch": 0.6436960479182571, + "grad_norm": 0.0, + "learning_rate": 5.951580948194993e-06, + "loss": 1.2202, + "step": 21923 + }, + { + "epoch": 0.643725409595396, + "grad_norm": 0.0, + "learning_rate": 5.950711420996812e-06, + "loss": 1.2656, + "step": 21924 + }, + { + "epoch": 0.6437547712725351, + "grad_norm": 0.0, + "learning_rate": 5.949841930417382e-06, + "loss": 1.1294, + "step": 21925 + }, + { + "epoch": 0.6437841329496741, + "grad_norm": 0.0, + "learning_rate": 5.948972476464574e-06, + "loss": 1.2637, + "step": 21926 + }, + { + "epoch": 0.643813494626813, + "grad_norm": 0.0, + "learning_rate": 5.948103059146241e-06, + "loss": 1.2607, + "step": 21927 + }, + { + "epoch": 0.6438428563039521, + "grad_norm": 0.0, + "learning_rate": 5.947233678470252e-06, + "loss": 1.2593, + "step": 21928 + }, + { + "epoch": 0.6438722179810911, + "grad_norm": 0.0, + "learning_rate": 5.946364334444465e-06, + "loss": 1.1943, + "step": 21929 + }, + { + "epoch": 0.64390157965823, + "grad_norm": 0.0, + "learning_rate": 5.945495027076746e-06, + "loss": 1.1025, + "step": 21930 + }, + { + "epoch": 0.6439309413353691, + "grad_norm": 0.0, + "learning_rate": 5.944625756374956e-06, + "loss": 1.3008, + "step": 21931 + }, + { + "epoch": 0.6439603030125081, + "grad_norm": 0.0, + "learning_rate": 5.943756522346947e-06, + "loss": 1.2749, + "step": 21932 + }, + { + "epoch": 0.643989664689647, + "grad_norm": 0.0, + "learning_rate": 5.94288732500059e-06, + "loss": 1.3154, + "step": 21933 + }, + { + "epoch": 0.6440190263667861, + "grad_norm": 0.0, + "learning_rate": 5.942018164343742e-06, + "loss": 1.3408, + "step": 21934 + }, + { + "epoch": 0.6440483880439251, + "grad_norm": 0.0, + "learning_rate": 5.941149040384264e-06, + "loss": 1.2217, + "step": 21935 + }, + { + "epoch": 0.644077749721064, + "grad_norm": 0.0, + "learning_rate": 5.940279953130011e-06, + "loss": 1.2261, + "step": 21936 + }, + { + "epoch": 0.6441071113982031, + "grad_norm": 0.0, + "learning_rate": 5.939410902588852e-06, + "loss": 1.2725, + "step": 21937 + }, + { + "epoch": 0.644136473075342, + "grad_norm": 0.0, + "learning_rate": 5.938541888768635e-06, + "loss": 1.2056, + "step": 21938 + }, + { + "epoch": 0.644165834752481, + "grad_norm": 0.0, + "learning_rate": 5.937672911677227e-06, + "loss": 1.3721, + "step": 21939 + }, + { + "epoch": 0.6441951964296201, + "grad_norm": 0.0, + "learning_rate": 5.9368039713224804e-06, + "loss": 1.3184, + "step": 21940 + }, + { + "epoch": 0.644224558106759, + "grad_norm": 0.0, + "learning_rate": 5.935935067712258e-06, + "loss": 1.269, + "step": 21941 + }, + { + "epoch": 0.644253919783898, + "grad_norm": 0.0, + "learning_rate": 5.935066200854417e-06, + "loss": 1.2705, + "step": 21942 + }, + { + "epoch": 0.6442832814610371, + "grad_norm": 0.0, + "learning_rate": 5.9341973707568115e-06, + "loss": 1.25, + "step": 21943 + }, + { + "epoch": 0.644312643138176, + "grad_norm": 0.0, + "learning_rate": 5.933328577427302e-06, + "loss": 1.3008, + "step": 21944 + }, + { + "epoch": 0.644342004815315, + "grad_norm": 0.0, + "learning_rate": 5.93245982087374e-06, + "loss": 1.3594, + "step": 21945 + }, + { + "epoch": 0.6443713664924541, + "grad_norm": 0.0, + "learning_rate": 5.9315911011039884e-06, + "loss": 1.376, + "step": 21946 + }, + { + "epoch": 0.644400728169593, + "grad_norm": 0.0, + "learning_rate": 5.930722418125898e-06, + "loss": 1.1577, + "step": 21947 + }, + { + "epoch": 0.644430089846732, + "grad_norm": 0.0, + "learning_rate": 5.929853771947332e-06, + "loss": 1.2852, + "step": 21948 + }, + { + "epoch": 0.6444594515238711, + "grad_norm": 0.0, + "learning_rate": 5.9289851625761345e-06, + "loss": 1.3242, + "step": 21949 + }, + { + "epoch": 0.64448881320101, + "grad_norm": 0.0, + "learning_rate": 5.92811659002017e-06, + "loss": 1.2212, + "step": 21950 + }, + { + "epoch": 0.644518174878149, + "grad_norm": 0.0, + "learning_rate": 5.927248054287293e-06, + "loss": 1.3047, + "step": 21951 + }, + { + "epoch": 0.6445475365552881, + "grad_norm": 0.0, + "learning_rate": 5.92637955538535e-06, + "loss": 1.1489, + "step": 21952 + }, + { + "epoch": 0.644576898232427, + "grad_norm": 0.0, + "learning_rate": 5.925511093322203e-06, + "loss": 1.2251, + "step": 21953 + }, + { + "epoch": 0.644606259909566, + "grad_norm": 0.0, + "learning_rate": 5.9246426681057e-06, + "loss": 1.2305, + "step": 21954 + }, + { + "epoch": 0.6446356215867051, + "grad_norm": 0.0, + "learning_rate": 5.9237742797436995e-06, + "loss": 1.1792, + "step": 21955 + }, + { + "epoch": 0.644664983263844, + "grad_norm": 0.0, + "learning_rate": 5.92290592824405e-06, + "loss": 1.2676, + "step": 21956 + }, + { + "epoch": 0.644694344940983, + "grad_norm": 0.0, + "learning_rate": 5.922037613614607e-06, + "loss": 1.3789, + "step": 21957 + }, + { + "epoch": 0.6447237066181221, + "grad_norm": 0.0, + "learning_rate": 5.921169335863222e-06, + "loss": 1.3086, + "step": 21958 + }, + { + "epoch": 0.644753068295261, + "grad_norm": 0.0, + "learning_rate": 5.920301094997749e-06, + "loss": 1.2319, + "step": 21959 + }, + { + "epoch": 0.6447824299724, + "grad_norm": 0.0, + "learning_rate": 5.919432891026039e-06, + "loss": 1.2173, + "step": 21960 + }, + { + "epoch": 0.6448117916495391, + "grad_norm": 0.0, + "learning_rate": 5.918564723955937e-06, + "loss": 1.189, + "step": 21961 + }, + { + "epoch": 0.644841153326678, + "grad_norm": 0.0, + "learning_rate": 5.9176965937953046e-06, + "loss": 1.397, + "step": 21962 + }, + { + "epoch": 0.644870515003817, + "grad_norm": 0.0, + "learning_rate": 5.916828500551985e-06, + "loss": 1.1255, + "step": 21963 + }, + { + "epoch": 0.644899876680956, + "grad_norm": 0.0, + "learning_rate": 5.915960444233833e-06, + "loss": 1.2959, + "step": 21964 + }, + { + "epoch": 0.644929238358095, + "grad_norm": 0.0, + "learning_rate": 5.915092424848692e-06, + "loss": 1.2969, + "step": 21965 + }, + { + "epoch": 0.644958600035234, + "grad_norm": 0.0, + "learning_rate": 5.914224442404421e-06, + "loss": 1.3477, + "step": 21966 + }, + { + "epoch": 0.644987961712373, + "grad_norm": 0.0, + "learning_rate": 5.913356496908863e-06, + "loss": 1.1895, + "step": 21967 + }, + { + "epoch": 0.645017323389512, + "grad_norm": 0.0, + "learning_rate": 5.912488588369872e-06, + "loss": 1.1528, + "step": 21968 + }, + { + "epoch": 0.645046685066651, + "grad_norm": 0.0, + "learning_rate": 5.9116207167952865e-06, + "loss": 1.3535, + "step": 21969 + }, + { + "epoch": 0.64507604674379, + "grad_norm": 0.0, + "learning_rate": 5.910752882192967e-06, + "loss": 1.2764, + "step": 21970 + }, + { + "epoch": 0.645105408420929, + "grad_norm": 0.0, + "learning_rate": 5.909885084570757e-06, + "loss": 1.2676, + "step": 21971 + }, + { + "epoch": 0.645134770098068, + "grad_norm": 0.0, + "learning_rate": 5.909017323936502e-06, + "loss": 1.1904, + "step": 21972 + }, + { + "epoch": 0.645164131775207, + "grad_norm": 0.0, + "learning_rate": 5.908149600298052e-06, + "loss": 1.3467, + "step": 21973 + }, + { + "epoch": 0.645193493452346, + "grad_norm": 0.0, + "learning_rate": 5.90728191366325e-06, + "loss": 1.2085, + "step": 21974 + }, + { + "epoch": 0.645222855129485, + "grad_norm": 0.0, + "learning_rate": 5.90641426403995e-06, + "loss": 1.3193, + "step": 21975 + }, + { + "epoch": 0.645252216806624, + "grad_norm": 0.0, + "learning_rate": 5.905546651435992e-06, + "loss": 1.2358, + "step": 21976 + }, + { + "epoch": 0.645281578483763, + "grad_norm": 0.0, + "learning_rate": 5.904679075859225e-06, + "loss": 1.2949, + "step": 21977 + }, + { + "epoch": 0.645310940160902, + "grad_norm": 0.0, + "learning_rate": 5.903811537317492e-06, + "loss": 1.1553, + "step": 21978 + }, + { + "epoch": 0.645340301838041, + "grad_norm": 0.0, + "learning_rate": 5.902944035818645e-06, + "loss": 1.2461, + "step": 21979 + }, + { + "epoch": 0.64536966351518, + "grad_norm": 0.0, + "learning_rate": 5.902076571370522e-06, + "loss": 1.3086, + "step": 21980 + }, + { + "epoch": 0.645399025192319, + "grad_norm": 0.0, + "learning_rate": 5.901209143980966e-06, + "loss": 1.2764, + "step": 21981 + }, + { + "epoch": 0.645428386869458, + "grad_norm": 0.0, + "learning_rate": 5.90034175365783e-06, + "loss": 1.332, + "step": 21982 + }, + { + "epoch": 0.645457748546597, + "grad_norm": 0.0, + "learning_rate": 5.899474400408952e-06, + "loss": 1.2192, + "step": 21983 + }, + { + "epoch": 0.645487110223736, + "grad_norm": 0.0, + "learning_rate": 5.898607084242178e-06, + "loss": 1.293, + "step": 21984 + }, + { + "epoch": 0.645516471900875, + "grad_norm": 0.0, + "learning_rate": 5.897739805165348e-06, + "loss": 1.2871, + "step": 21985 + }, + { + "epoch": 0.645545833578014, + "grad_norm": 0.0, + "learning_rate": 5.896872563186311e-06, + "loss": 1.1982, + "step": 21986 + }, + { + "epoch": 0.645575195255153, + "grad_norm": 0.0, + "learning_rate": 5.896005358312904e-06, + "loss": 1.3008, + "step": 21987 + }, + { + "epoch": 0.645604556932292, + "grad_norm": 0.0, + "learning_rate": 5.895138190552974e-06, + "loss": 1.1865, + "step": 21988 + }, + { + "epoch": 0.645633918609431, + "grad_norm": 0.0, + "learning_rate": 5.894271059914356e-06, + "loss": 1.3916, + "step": 21989 + }, + { + "epoch": 0.6456632802865699, + "grad_norm": 0.0, + "learning_rate": 5.8934039664049e-06, + "loss": 1.1836, + "step": 21990 + }, + { + "epoch": 0.645692641963709, + "grad_norm": 0.0, + "learning_rate": 5.892536910032445e-06, + "loss": 1.188, + "step": 21991 + }, + { + "epoch": 0.645722003640848, + "grad_norm": 0.0, + "learning_rate": 5.8916698908048275e-06, + "loss": 1.2881, + "step": 21992 + }, + { + "epoch": 0.6457513653179869, + "grad_norm": 0.0, + "learning_rate": 5.890802908729894e-06, + "loss": 1.2422, + "step": 21993 + }, + { + "epoch": 0.645780726995126, + "grad_norm": 0.0, + "learning_rate": 5.889935963815476e-06, + "loss": 1.1875, + "step": 21994 + }, + { + "epoch": 0.645810088672265, + "grad_norm": 0.0, + "learning_rate": 5.889069056069427e-06, + "loss": 1.2686, + "step": 21995 + }, + { + "epoch": 0.6458394503494039, + "grad_norm": 0.0, + "learning_rate": 5.888202185499575e-06, + "loss": 1.3418, + "step": 21996 + }, + { + "epoch": 0.645868812026543, + "grad_norm": 0.0, + "learning_rate": 5.8873353521137665e-06, + "loss": 1.3096, + "step": 21997 + }, + { + "epoch": 0.645898173703682, + "grad_norm": 0.0, + "learning_rate": 5.886468555919833e-06, + "loss": 1.3379, + "step": 21998 + }, + { + "epoch": 0.6459275353808209, + "grad_norm": 0.0, + "learning_rate": 5.885601796925624e-06, + "loss": 1.2441, + "step": 21999 + }, + { + "epoch": 0.64595689705796, + "grad_norm": 0.0, + "learning_rate": 5.88473507513897e-06, + "loss": 1.186, + "step": 22000 + }, + { + "epoch": 0.645986258735099, + "grad_norm": 0.0, + "learning_rate": 5.883868390567708e-06, + "loss": 1.2705, + "step": 22001 + }, + { + "epoch": 0.6460156204122379, + "grad_norm": 0.0, + "learning_rate": 5.883001743219682e-06, + "loss": 1.2002, + "step": 22002 + }, + { + "epoch": 0.646044982089377, + "grad_norm": 0.0, + "learning_rate": 5.88213513310272e-06, + "loss": 1.3037, + "step": 22003 + }, + { + "epoch": 0.646074343766516, + "grad_norm": 0.0, + "learning_rate": 5.8812685602246715e-06, + "loss": 1.2319, + "step": 22004 + }, + { + "epoch": 0.6461037054436549, + "grad_norm": 0.0, + "learning_rate": 5.88040202459336e-06, + "loss": 1.3789, + "step": 22005 + }, + { + "epoch": 0.646133067120794, + "grad_norm": 0.0, + "learning_rate": 5.8795355262166334e-06, + "loss": 1.3105, + "step": 22006 + }, + { + "epoch": 0.646162428797933, + "grad_norm": 0.0, + "learning_rate": 5.878669065102318e-06, + "loss": 1.2549, + "step": 22007 + }, + { + "epoch": 0.6461917904750719, + "grad_norm": 0.0, + "learning_rate": 5.8778026412582566e-06, + "loss": 1.2148, + "step": 22008 + }, + { + "epoch": 0.646221152152211, + "grad_norm": 0.0, + "learning_rate": 5.876936254692278e-06, + "loss": 1.1445, + "step": 22009 + }, + { + "epoch": 0.64625051382935, + "grad_norm": 0.0, + "learning_rate": 5.876069905412225e-06, + "loss": 1.2861, + "step": 22010 + }, + { + "epoch": 0.6462798755064889, + "grad_norm": 0.0, + "learning_rate": 5.875203593425927e-06, + "loss": 1.2129, + "step": 22011 + }, + { + "epoch": 0.646309237183628, + "grad_norm": 0.0, + "learning_rate": 5.874337318741219e-06, + "loss": 1.292, + "step": 22012 + }, + { + "epoch": 0.6463385988607669, + "grad_norm": 0.0, + "learning_rate": 5.873471081365935e-06, + "loss": 1.3242, + "step": 22013 + }, + { + "epoch": 0.6463679605379059, + "grad_norm": 0.0, + "learning_rate": 5.872604881307906e-06, + "loss": 1.1895, + "step": 22014 + }, + { + "epoch": 0.646397322215045, + "grad_norm": 0.0, + "learning_rate": 5.871738718574972e-06, + "loss": 1.2529, + "step": 22015 + }, + { + "epoch": 0.6464266838921839, + "grad_norm": 0.0, + "learning_rate": 5.87087259317496e-06, + "loss": 1.1621, + "step": 22016 + }, + { + "epoch": 0.6464560455693229, + "grad_norm": 0.0, + "learning_rate": 5.870006505115706e-06, + "loss": 1.2725, + "step": 22017 + }, + { + "epoch": 0.6464854072464619, + "grad_norm": 0.0, + "learning_rate": 5.869140454405038e-06, + "loss": 1.2881, + "step": 22018 + }, + { + "epoch": 0.6465147689236009, + "grad_norm": 0.0, + "learning_rate": 5.868274441050794e-06, + "loss": 1.2178, + "step": 22019 + }, + { + "epoch": 0.6465441306007399, + "grad_norm": 0.0, + "learning_rate": 5.867408465060805e-06, + "loss": 1.1968, + "step": 22020 + }, + { + "epoch": 0.6465734922778789, + "grad_norm": 0.0, + "learning_rate": 5.866542526442893e-06, + "loss": 1.2617, + "step": 22021 + }, + { + "epoch": 0.6466028539550179, + "grad_norm": 0.0, + "learning_rate": 5.865676625204901e-06, + "loss": 1.3232, + "step": 22022 + }, + { + "epoch": 0.6466322156321569, + "grad_norm": 0.0, + "learning_rate": 5.864810761354649e-06, + "loss": 1.1733, + "step": 22023 + }, + { + "epoch": 0.6466615773092959, + "grad_norm": 0.0, + "learning_rate": 5.863944934899976e-06, + "loss": 1.2471, + "step": 22024 + }, + { + "epoch": 0.6466909389864349, + "grad_norm": 0.0, + "learning_rate": 5.8630791458487066e-06, + "loss": 1.2983, + "step": 22025 + }, + { + "epoch": 0.6467203006635739, + "grad_norm": 0.0, + "learning_rate": 5.862213394208674e-06, + "loss": 1.3408, + "step": 22026 + }, + { + "epoch": 0.6467496623407128, + "grad_norm": 0.0, + "learning_rate": 5.861347679987701e-06, + "loss": 1.3037, + "step": 22027 + }, + { + "epoch": 0.6467790240178519, + "grad_norm": 0.0, + "learning_rate": 5.860482003193625e-06, + "loss": 1.3203, + "step": 22028 + }, + { + "epoch": 0.6468083856949909, + "grad_norm": 0.0, + "learning_rate": 5.859616363834273e-06, + "loss": 1.1738, + "step": 22029 + }, + { + "epoch": 0.6468377473721298, + "grad_norm": 0.0, + "learning_rate": 5.858750761917465e-06, + "loss": 1.2998, + "step": 22030 + }, + { + "epoch": 0.6468671090492689, + "grad_norm": 0.0, + "learning_rate": 5.857885197451036e-06, + "loss": 1.2383, + "step": 22031 + }, + { + "epoch": 0.6468964707264079, + "grad_norm": 0.0, + "learning_rate": 5.8570196704428114e-06, + "loss": 1.3428, + "step": 22032 + }, + { + "epoch": 0.6469258324035468, + "grad_norm": 0.0, + "learning_rate": 5.8561541809006215e-06, + "loss": 1.2979, + "step": 22033 + }, + { + "epoch": 0.6469551940806859, + "grad_norm": 0.0, + "learning_rate": 5.855288728832287e-06, + "loss": 1.293, + "step": 22034 + }, + { + "epoch": 0.6469845557578249, + "grad_norm": 0.0, + "learning_rate": 5.854423314245641e-06, + "loss": 1.2446, + "step": 22035 + }, + { + "epoch": 0.6470139174349638, + "grad_norm": 0.0, + "learning_rate": 5.853557937148504e-06, + "loss": 1.3252, + "step": 22036 + }, + { + "epoch": 0.6470432791121029, + "grad_norm": 0.0, + "learning_rate": 5.852692597548708e-06, + "loss": 1.189, + "step": 22037 + }, + { + "epoch": 0.6470726407892419, + "grad_norm": 0.0, + "learning_rate": 5.851827295454069e-06, + "loss": 1.2134, + "step": 22038 + }, + { + "epoch": 0.6471020024663808, + "grad_norm": 0.0, + "learning_rate": 5.8509620308724245e-06, + "loss": 1.2266, + "step": 22039 + }, + { + "epoch": 0.6471313641435199, + "grad_norm": 0.0, + "learning_rate": 5.850096803811591e-06, + "loss": 1.2373, + "step": 22040 + }, + { + "epoch": 0.6471607258206589, + "grad_norm": 0.0, + "learning_rate": 5.849231614279393e-06, + "loss": 1.3047, + "step": 22041 + }, + { + "epoch": 0.6471900874977978, + "grad_norm": 0.0, + "learning_rate": 5.8483664622836615e-06, + "loss": 1.1582, + "step": 22042 + }, + { + "epoch": 0.6472194491749369, + "grad_norm": 0.0, + "learning_rate": 5.8475013478322084e-06, + "loss": 1.3398, + "step": 22043 + }, + { + "epoch": 0.6472488108520759, + "grad_norm": 0.0, + "learning_rate": 5.8466362709328684e-06, + "loss": 1.3057, + "step": 22044 + }, + { + "epoch": 0.6472781725292148, + "grad_norm": 0.0, + "learning_rate": 5.845771231593459e-06, + "loss": 1.3545, + "step": 22045 + }, + { + "epoch": 0.6473075342063539, + "grad_norm": 0.0, + "learning_rate": 5.844906229821806e-06, + "loss": 1.3662, + "step": 22046 + }, + { + "epoch": 0.6473368958834929, + "grad_norm": 0.0, + "learning_rate": 5.8440412656257265e-06, + "loss": 1.2183, + "step": 22047 + }, + { + "epoch": 0.6473662575606318, + "grad_norm": 0.0, + "learning_rate": 5.84317633901305e-06, + "loss": 1.3047, + "step": 22048 + }, + { + "epoch": 0.6473956192377709, + "grad_norm": 0.0, + "learning_rate": 5.842311449991593e-06, + "loss": 1.2905, + "step": 22049 + }, + { + "epoch": 0.6474249809149099, + "grad_norm": 0.0, + "learning_rate": 5.8414465985691785e-06, + "loss": 1.2256, + "step": 22050 + }, + { + "epoch": 0.6474543425920488, + "grad_norm": 0.0, + "learning_rate": 5.8405817847536285e-06, + "loss": 1.1572, + "step": 22051 + }, + { + "epoch": 0.6474837042691879, + "grad_norm": 0.0, + "learning_rate": 5.8397170085527585e-06, + "loss": 1.3086, + "step": 22052 + }, + { + "epoch": 0.6475130659463268, + "grad_norm": 0.0, + "learning_rate": 5.8388522699743976e-06, + "loss": 1.3027, + "step": 22053 + }, + { + "epoch": 0.6475424276234658, + "grad_norm": 0.0, + "learning_rate": 5.837987569026359e-06, + "loss": 1.1899, + "step": 22054 + }, + { + "epoch": 0.6475717893006049, + "grad_norm": 0.0, + "learning_rate": 5.837122905716466e-06, + "loss": 1.3096, + "step": 22055 + }, + { + "epoch": 0.6476011509777438, + "grad_norm": 0.0, + "learning_rate": 5.836258280052534e-06, + "loss": 1.2485, + "step": 22056 + }, + { + "epoch": 0.6476305126548828, + "grad_norm": 0.0, + "learning_rate": 5.8353936920423885e-06, + "loss": 1.2129, + "step": 22057 + }, + { + "epoch": 0.6476598743320219, + "grad_norm": 0.0, + "learning_rate": 5.834529141693843e-06, + "loss": 1.2441, + "step": 22058 + }, + { + "epoch": 0.6476892360091608, + "grad_norm": 0.0, + "learning_rate": 5.833664629014719e-06, + "loss": 1.2207, + "step": 22059 + }, + { + "epoch": 0.6477185976862998, + "grad_norm": 0.0, + "learning_rate": 5.832800154012832e-06, + "loss": 1.248, + "step": 22060 + }, + { + "epoch": 0.6477479593634389, + "grad_norm": 0.0, + "learning_rate": 5.8319357166959955e-06, + "loss": 1.2402, + "step": 22061 + }, + { + "epoch": 0.6477773210405778, + "grad_norm": 0.0, + "learning_rate": 5.831071317072037e-06, + "loss": 1.1973, + "step": 22062 + }, + { + "epoch": 0.6478066827177168, + "grad_norm": 0.0, + "learning_rate": 5.830206955148765e-06, + "loss": 1.271, + "step": 22063 + }, + { + "epoch": 0.6478360443948559, + "grad_norm": 0.0, + "learning_rate": 5.829342630934002e-06, + "loss": 1.2744, + "step": 22064 + }, + { + "epoch": 0.6478654060719948, + "grad_norm": 0.0, + "learning_rate": 5.8284783444355574e-06, + "loss": 1.0547, + "step": 22065 + }, + { + "epoch": 0.6478947677491338, + "grad_norm": 0.0, + "learning_rate": 5.827614095661256e-06, + "loss": 1.2822, + "step": 22066 + }, + { + "epoch": 0.6479241294262729, + "grad_norm": 0.0, + "learning_rate": 5.82674988461891e-06, + "loss": 1.2803, + "step": 22067 + }, + { + "epoch": 0.6479534911034118, + "grad_norm": 0.0, + "learning_rate": 5.825885711316333e-06, + "loss": 1.2764, + "step": 22068 + }, + { + "epoch": 0.6479828527805508, + "grad_norm": 0.0, + "learning_rate": 5.8250215757613405e-06, + "loss": 1.4316, + "step": 22069 + }, + { + "epoch": 0.6480122144576899, + "grad_norm": 0.0, + "learning_rate": 5.824157477961743e-06, + "loss": 1.3848, + "step": 22070 + }, + { + "epoch": 0.6480415761348288, + "grad_norm": 0.0, + "learning_rate": 5.823293417925362e-06, + "loss": 1.0986, + "step": 22071 + }, + { + "epoch": 0.6480709378119678, + "grad_norm": 0.0, + "learning_rate": 5.822429395660005e-06, + "loss": 1.3281, + "step": 22072 + }, + { + "epoch": 0.6481002994891069, + "grad_norm": 0.0, + "learning_rate": 5.821565411173493e-06, + "loss": 1.2192, + "step": 22073 + }, + { + "epoch": 0.6481296611662458, + "grad_norm": 0.0, + "learning_rate": 5.820701464473631e-06, + "loss": 1.3477, + "step": 22074 + }, + { + "epoch": 0.6481590228433848, + "grad_norm": 0.0, + "learning_rate": 5.819837555568239e-06, + "loss": 1.1934, + "step": 22075 + }, + { + "epoch": 0.6481883845205239, + "grad_norm": 0.0, + "learning_rate": 5.818973684465128e-06, + "loss": 1.2285, + "step": 22076 + }, + { + "epoch": 0.6482177461976628, + "grad_norm": 0.0, + "learning_rate": 5.818109851172108e-06, + "loss": 1.3193, + "step": 22077 + }, + { + "epoch": 0.6482471078748018, + "grad_norm": 0.0, + "learning_rate": 5.817246055696988e-06, + "loss": 1.2725, + "step": 22078 + }, + { + "epoch": 0.6482764695519408, + "grad_norm": 0.0, + "learning_rate": 5.816382298047587e-06, + "loss": 1.166, + "step": 22079 + }, + { + "epoch": 0.6483058312290798, + "grad_norm": 0.0, + "learning_rate": 5.8155185782317125e-06, + "loss": 1.293, + "step": 22080 + }, + { + "epoch": 0.6483351929062188, + "grad_norm": 0.0, + "learning_rate": 5.8146548962571705e-06, + "loss": 1.3115, + "step": 22081 + }, + { + "epoch": 0.6483645545833578, + "grad_norm": 0.0, + "learning_rate": 5.813791252131781e-06, + "loss": 1.248, + "step": 22082 + }, + { + "epoch": 0.6483939162604968, + "grad_norm": 0.0, + "learning_rate": 5.812927645863349e-06, + "loss": 1.1895, + "step": 22083 + }, + { + "epoch": 0.6484232779376358, + "grad_norm": 0.0, + "learning_rate": 5.812064077459685e-06, + "loss": 1.1919, + "step": 22084 + }, + { + "epoch": 0.6484526396147748, + "grad_norm": 0.0, + "learning_rate": 5.8112005469285925e-06, + "loss": 1.1094, + "step": 22085 + }, + { + "epoch": 0.6484820012919138, + "grad_norm": 0.0, + "learning_rate": 5.810337054277893e-06, + "loss": 1.2803, + "step": 22086 + }, + { + "epoch": 0.6485113629690528, + "grad_norm": 0.0, + "learning_rate": 5.809473599515382e-06, + "loss": 1.1763, + "step": 22087 + }, + { + "epoch": 0.6485407246461918, + "grad_norm": 0.0, + "learning_rate": 5.808610182648879e-06, + "loss": 1.2734, + "step": 22088 + }, + { + "epoch": 0.6485700863233308, + "grad_norm": 0.0, + "learning_rate": 5.807746803686187e-06, + "loss": 1.2354, + "step": 22089 + }, + { + "epoch": 0.6485994480004698, + "grad_norm": 0.0, + "learning_rate": 5.806883462635112e-06, + "loss": 1.2939, + "step": 22090 + }, + { + "epoch": 0.6486288096776088, + "grad_norm": 0.0, + "learning_rate": 5.806020159503467e-06, + "loss": 1.3154, + "step": 22091 + }, + { + "epoch": 0.6486581713547478, + "grad_norm": 0.0, + "learning_rate": 5.805156894299056e-06, + "loss": 1.3218, + "step": 22092 + }, + { + "epoch": 0.6486875330318868, + "grad_norm": 0.0, + "learning_rate": 5.804293667029686e-06, + "loss": 1.2861, + "step": 22093 + }, + { + "epoch": 0.6487168947090258, + "grad_norm": 0.0, + "learning_rate": 5.803430477703157e-06, + "loss": 1.3926, + "step": 22094 + }, + { + "epoch": 0.6487462563861648, + "grad_norm": 0.0, + "learning_rate": 5.802567326327286e-06, + "loss": 1.1821, + "step": 22095 + }, + { + "epoch": 0.6487756180633037, + "grad_norm": 0.0, + "learning_rate": 5.8017042129098696e-06, + "loss": 1.3232, + "step": 22096 + }, + { + "epoch": 0.6488049797404428, + "grad_norm": 0.0, + "learning_rate": 5.800841137458722e-06, + "loss": 1.2266, + "step": 22097 + }, + { + "epoch": 0.6488343414175818, + "grad_norm": 0.0, + "learning_rate": 5.799978099981642e-06, + "loss": 1.0405, + "step": 22098 + }, + { + "epoch": 0.6488637030947207, + "grad_norm": 0.0, + "learning_rate": 5.799115100486432e-06, + "loss": 1.2676, + "step": 22099 + }, + { + "epoch": 0.6488930647718598, + "grad_norm": 0.0, + "learning_rate": 5.798252138980905e-06, + "loss": 1.2725, + "step": 22100 + }, + { + "epoch": 0.6489224264489988, + "grad_norm": 0.0, + "learning_rate": 5.79738921547286e-06, + "loss": 1.1968, + "step": 22101 + }, + { + "epoch": 0.6489517881261377, + "grad_norm": 0.0, + "learning_rate": 5.7965263299701005e-06, + "loss": 1.1543, + "step": 22102 + }, + { + "epoch": 0.6489811498032768, + "grad_norm": 0.0, + "learning_rate": 5.795663482480425e-06, + "loss": 1.3721, + "step": 22103 + }, + { + "epoch": 0.6490105114804158, + "grad_norm": 0.0, + "learning_rate": 5.794800673011644e-06, + "loss": 1.2739, + "step": 22104 + }, + { + "epoch": 0.6490398731575547, + "grad_norm": 0.0, + "learning_rate": 5.7939379015715565e-06, + "loss": 1.2861, + "step": 22105 + }, + { + "epoch": 0.6490692348346938, + "grad_norm": 0.0, + "learning_rate": 5.7930751681679685e-06, + "loss": 1.3525, + "step": 22106 + }, + { + "epoch": 0.6490985965118328, + "grad_norm": 0.0, + "learning_rate": 5.792212472808676e-06, + "loss": 1.3848, + "step": 22107 + }, + { + "epoch": 0.6491279581889717, + "grad_norm": 0.0, + "learning_rate": 5.79134981550149e-06, + "loss": 1.2349, + "step": 22108 + }, + { + "epoch": 0.6491573198661108, + "grad_norm": 0.0, + "learning_rate": 5.7904871962542035e-06, + "loss": 1.2588, + "step": 22109 + }, + { + "epoch": 0.6491866815432498, + "grad_norm": 0.0, + "learning_rate": 5.789624615074614e-06, + "loss": 1.2949, + "step": 22110 + }, + { + "epoch": 0.6492160432203887, + "grad_norm": 0.0, + "learning_rate": 5.788762071970531e-06, + "loss": 1.3579, + "step": 22111 + }, + { + "epoch": 0.6492454048975278, + "grad_norm": 0.0, + "learning_rate": 5.787899566949749e-06, + "loss": 1.2998, + "step": 22112 + }, + { + "epoch": 0.6492747665746668, + "grad_norm": 0.0, + "learning_rate": 5.787037100020072e-06, + "loss": 1.3447, + "step": 22113 + }, + { + "epoch": 0.6493041282518057, + "grad_norm": 0.0, + "learning_rate": 5.786174671189294e-06, + "loss": 1.2471, + "step": 22114 + }, + { + "epoch": 0.6493334899289448, + "grad_norm": 0.0, + "learning_rate": 5.785312280465222e-06, + "loss": 1.0781, + "step": 22115 + }, + { + "epoch": 0.6493628516060838, + "grad_norm": 0.0, + "learning_rate": 5.784449927855651e-06, + "loss": 1.2744, + "step": 22116 + }, + { + "epoch": 0.6493922132832227, + "grad_norm": 0.0, + "learning_rate": 5.783587613368378e-06, + "loss": 1.1382, + "step": 22117 + }, + { + "epoch": 0.6494215749603617, + "grad_norm": 0.0, + "learning_rate": 5.782725337011202e-06, + "loss": 1.2427, + "step": 22118 + }, + { + "epoch": 0.6494509366375008, + "grad_norm": 0.0, + "learning_rate": 5.781863098791917e-06, + "loss": 1.2461, + "step": 22119 + }, + { + "epoch": 0.6494802983146397, + "grad_norm": 0.0, + "learning_rate": 5.781000898718328e-06, + "loss": 1.2603, + "step": 22120 + }, + { + "epoch": 0.6495096599917787, + "grad_norm": 0.0, + "learning_rate": 5.780138736798224e-06, + "loss": 1.2285, + "step": 22121 + }, + { + "epoch": 0.6495390216689177, + "grad_norm": 0.0, + "learning_rate": 5.779276613039412e-06, + "loss": 1.1938, + "step": 22122 + }, + { + "epoch": 0.6495683833460567, + "grad_norm": 0.0, + "learning_rate": 5.778414527449677e-06, + "loss": 1.3867, + "step": 22123 + }, + { + "epoch": 0.6495977450231957, + "grad_norm": 0.0, + "learning_rate": 5.7775524800368256e-06, + "loss": 1.1738, + "step": 22124 + }, + { + "epoch": 0.6496271067003347, + "grad_norm": 0.0, + "learning_rate": 5.776690470808648e-06, + "loss": 1.1714, + "step": 22125 + }, + { + "epoch": 0.6496564683774737, + "grad_norm": 0.0, + "learning_rate": 5.7758284997729396e-06, + "loss": 1.3662, + "step": 22126 + }, + { + "epoch": 0.6496858300546127, + "grad_norm": 0.0, + "learning_rate": 5.7749665669374925e-06, + "loss": 1.2354, + "step": 22127 + }, + { + "epoch": 0.6497151917317517, + "grad_norm": 0.0, + "learning_rate": 5.7741046723101076e-06, + "loss": 1.3213, + "step": 22128 + }, + { + "epoch": 0.6497445534088907, + "grad_norm": 0.0, + "learning_rate": 5.773242815898578e-06, + "loss": 1.3018, + "step": 22129 + }, + { + "epoch": 0.6497739150860297, + "grad_norm": 0.0, + "learning_rate": 5.7723809977106915e-06, + "loss": 1.3691, + "step": 22130 + }, + { + "epoch": 0.6498032767631687, + "grad_norm": 0.0, + "learning_rate": 5.77151921775425e-06, + "loss": 1.1855, + "step": 22131 + }, + { + "epoch": 0.6498326384403077, + "grad_norm": 0.0, + "learning_rate": 5.770657476037041e-06, + "loss": 1.1294, + "step": 22132 + }, + { + "epoch": 0.6498620001174467, + "grad_norm": 0.0, + "learning_rate": 5.769795772566864e-06, + "loss": 1.2676, + "step": 22133 + }, + { + "epoch": 0.6498913617945857, + "grad_norm": 0.0, + "learning_rate": 5.768934107351503e-06, + "loss": 1.2012, + "step": 22134 + }, + { + "epoch": 0.6499207234717247, + "grad_norm": 0.0, + "learning_rate": 5.768072480398757e-06, + "loss": 1.3301, + "step": 22135 + }, + { + "epoch": 0.6499500851488637, + "grad_norm": 0.0, + "learning_rate": 5.7672108917164106e-06, + "loss": 1.144, + "step": 22136 + }, + { + "epoch": 0.6499794468260027, + "grad_norm": 0.0, + "learning_rate": 5.766349341312264e-06, + "loss": 1.2832, + "step": 22137 + }, + { + "epoch": 0.6500088085031417, + "grad_norm": 0.0, + "learning_rate": 5.765487829194105e-06, + "loss": 1.3506, + "step": 22138 + }, + { + "epoch": 0.6500381701802807, + "grad_norm": 0.0, + "learning_rate": 5.76462635536972e-06, + "loss": 1.2363, + "step": 22139 + }, + { + "epoch": 0.6500675318574197, + "grad_norm": 0.0, + "learning_rate": 5.7637649198469085e-06, + "loss": 1.2764, + "step": 22140 + }, + { + "epoch": 0.6500968935345587, + "grad_norm": 0.0, + "learning_rate": 5.762903522633454e-06, + "loss": 1.2627, + "step": 22141 + }, + { + "epoch": 0.6501262552116976, + "grad_norm": 0.0, + "learning_rate": 5.76204216373715e-06, + "loss": 1.2632, + "step": 22142 + }, + { + "epoch": 0.6501556168888367, + "grad_norm": 0.0, + "learning_rate": 5.761180843165779e-06, + "loss": 1.0879, + "step": 22143 + }, + { + "epoch": 0.6501849785659757, + "grad_norm": 0.0, + "learning_rate": 5.760319560927139e-06, + "loss": 1.0654, + "step": 22144 + }, + { + "epoch": 0.6502143402431146, + "grad_norm": 0.0, + "learning_rate": 5.759458317029012e-06, + "loss": 1.3701, + "step": 22145 + }, + { + "epoch": 0.6502437019202537, + "grad_norm": 0.0, + "learning_rate": 5.758597111479192e-06, + "loss": 1.1504, + "step": 22146 + }, + { + "epoch": 0.6502730635973927, + "grad_norm": 0.0, + "learning_rate": 5.757735944285466e-06, + "loss": 1.3262, + "step": 22147 + }, + { + "epoch": 0.6503024252745316, + "grad_norm": 0.0, + "learning_rate": 5.756874815455615e-06, + "loss": 1.2061, + "step": 22148 + }, + { + "epoch": 0.6503317869516707, + "grad_norm": 0.0, + "learning_rate": 5.756013724997438e-06, + "loss": 1.1099, + "step": 22149 + }, + { + "epoch": 0.6503611486288097, + "grad_norm": 0.0, + "learning_rate": 5.755152672918714e-06, + "loss": 1.3994, + "step": 22150 + }, + { + "epoch": 0.6503905103059486, + "grad_norm": 0.0, + "learning_rate": 5.7542916592272314e-06, + "loss": 1.2676, + "step": 22151 + }, + { + "epoch": 0.6504198719830877, + "grad_norm": 0.0, + "learning_rate": 5.753430683930773e-06, + "loss": 1.3545, + "step": 22152 + }, + { + "epoch": 0.6504492336602267, + "grad_norm": 0.0, + "learning_rate": 5.752569747037133e-06, + "loss": 1.3037, + "step": 22153 + }, + { + "epoch": 0.6504785953373656, + "grad_norm": 0.0, + "learning_rate": 5.751708848554088e-06, + "loss": 1.2881, + "step": 22154 + }, + { + "epoch": 0.6505079570145047, + "grad_norm": 0.0, + "learning_rate": 5.750847988489432e-06, + "loss": 1.2622, + "step": 22155 + }, + { + "epoch": 0.6505373186916437, + "grad_norm": 0.0, + "learning_rate": 5.749987166850941e-06, + "loss": 1.166, + "step": 22156 + }, + { + "epoch": 0.6505666803687826, + "grad_norm": 0.0, + "learning_rate": 5.749126383646414e-06, + "loss": 1.2539, + "step": 22157 + }, + { + "epoch": 0.6505960420459217, + "grad_norm": 0.0, + "learning_rate": 5.748265638883621e-06, + "loss": 1.4131, + "step": 22158 + }, + { + "epoch": 0.6506254037230607, + "grad_norm": 0.0, + "learning_rate": 5.747404932570346e-06, + "loss": 1.2305, + "step": 22159 + }, + { + "epoch": 0.6506547654001996, + "grad_norm": 0.0, + "learning_rate": 5.746544264714383e-06, + "loss": 1.376, + "step": 22160 + }, + { + "epoch": 0.6506841270773387, + "grad_norm": 0.0, + "learning_rate": 5.745683635323505e-06, + "loss": 1.2817, + "step": 22161 + }, + { + "epoch": 0.6507134887544777, + "grad_norm": 0.0, + "learning_rate": 5.744823044405503e-06, + "loss": 1.0874, + "step": 22162 + }, + { + "epoch": 0.6507428504316166, + "grad_norm": 0.0, + "learning_rate": 5.743962491968151e-06, + "loss": 1.2871, + "step": 22163 + }, + { + "epoch": 0.6507722121087557, + "grad_norm": 0.0, + "learning_rate": 5.743101978019242e-06, + "loss": 1.3223, + "step": 22164 + }, + { + "epoch": 0.6508015737858946, + "grad_norm": 0.0, + "learning_rate": 5.742241502566551e-06, + "loss": 1.3574, + "step": 22165 + }, + { + "epoch": 0.6508309354630336, + "grad_norm": 0.0, + "learning_rate": 5.7413810656178614e-06, + "loss": 1.1841, + "step": 22166 + }, + { + "epoch": 0.6508602971401727, + "grad_norm": 0.0, + "learning_rate": 5.740520667180953e-06, + "loss": 1.2676, + "step": 22167 + }, + { + "epoch": 0.6508896588173116, + "grad_norm": 0.0, + "learning_rate": 5.739660307263603e-06, + "loss": 1.3164, + "step": 22168 + }, + { + "epoch": 0.6509190204944506, + "grad_norm": 0.0, + "learning_rate": 5.7387999858735996e-06, + "loss": 1.332, + "step": 22169 + }, + { + "epoch": 0.6509483821715897, + "grad_norm": 0.0, + "learning_rate": 5.7379397030187155e-06, + "loss": 1.248, + "step": 22170 + }, + { + "epoch": 0.6509777438487286, + "grad_norm": 0.0, + "learning_rate": 5.737079458706738e-06, + "loss": 1.2656, + "step": 22171 + }, + { + "epoch": 0.6510071055258676, + "grad_norm": 0.0, + "learning_rate": 5.736219252945439e-06, + "loss": 1.2329, + "step": 22172 + }, + { + "epoch": 0.6510364672030067, + "grad_norm": 0.0, + "learning_rate": 5.735359085742605e-06, + "loss": 1.332, + "step": 22173 + }, + { + "epoch": 0.6510658288801456, + "grad_norm": 0.0, + "learning_rate": 5.7344989571060115e-06, + "loss": 1.2075, + "step": 22174 + }, + { + "epoch": 0.6510951905572846, + "grad_norm": 0.0, + "learning_rate": 5.733638867043436e-06, + "loss": 1.1475, + "step": 22175 + }, + { + "epoch": 0.6511245522344237, + "grad_norm": 0.0, + "learning_rate": 5.732778815562652e-06, + "loss": 1.1572, + "step": 22176 + }, + { + "epoch": 0.6511539139115626, + "grad_norm": 0.0, + "learning_rate": 5.731918802671448e-06, + "loss": 1.3223, + "step": 22177 + }, + { + "epoch": 0.6511832755887016, + "grad_norm": 0.0, + "learning_rate": 5.731058828377594e-06, + "loss": 1.3223, + "step": 22178 + }, + { + "epoch": 0.6512126372658407, + "grad_norm": 0.0, + "learning_rate": 5.730198892688864e-06, + "loss": 1.2354, + "step": 22179 + }, + { + "epoch": 0.6512419989429796, + "grad_norm": 0.0, + "learning_rate": 5.729338995613043e-06, + "loss": 1.1719, + "step": 22180 + }, + { + "epoch": 0.6512713606201186, + "grad_norm": 0.0, + "learning_rate": 5.728479137157901e-06, + "loss": 1.2676, + "step": 22181 + }, + { + "epoch": 0.6513007222972577, + "grad_norm": 0.0, + "learning_rate": 5.727619317331223e-06, + "loss": 1.3262, + "step": 22182 + }, + { + "epoch": 0.6513300839743966, + "grad_norm": 0.0, + "learning_rate": 5.726759536140769e-06, + "loss": 1.2695, + "step": 22183 + }, + { + "epoch": 0.6513594456515356, + "grad_norm": 0.0, + "learning_rate": 5.7258997935943275e-06, + "loss": 1.1777, + "step": 22184 + }, + { + "epoch": 0.6513888073286747, + "grad_norm": 0.0, + "learning_rate": 5.725040089699664e-06, + "loss": 1.2832, + "step": 22185 + }, + { + "epoch": 0.6514181690058136, + "grad_norm": 0.0, + "learning_rate": 5.724180424464562e-06, + "loss": 1.2344, + "step": 22186 + }, + { + "epoch": 0.6514475306829526, + "grad_norm": 0.0, + "learning_rate": 5.723320797896792e-06, + "loss": 1.208, + "step": 22187 + }, + { + "epoch": 0.6514768923600917, + "grad_norm": 0.0, + "learning_rate": 5.722461210004123e-06, + "loss": 1.3848, + "step": 22188 + }, + { + "epoch": 0.6515062540372306, + "grad_norm": 0.0, + "learning_rate": 5.721601660794338e-06, + "loss": 1.3779, + "step": 22189 + }, + { + "epoch": 0.6515356157143696, + "grad_norm": 0.0, + "learning_rate": 5.7207421502752035e-06, + "loss": 1.3809, + "step": 22190 + }, + { + "epoch": 0.6515649773915086, + "grad_norm": 0.0, + "learning_rate": 5.7198826784544934e-06, + "loss": 1.2549, + "step": 22191 + }, + { + "epoch": 0.6515943390686476, + "grad_norm": 0.0, + "learning_rate": 5.719023245339977e-06, + "loss": 1.1479, + "step": 22192 + }, + { + "epoch": 0.6516237007457866, + "grad_norm": 0.0, + "learning_rate": 5.718163850939435e-06, + "loss": 1.2734, + "step": 22193 + }, + { + "epoch": 0.6516530624229256, + "grad_norm": 0.0, + "learning_rate": 5.717304495260628e-06, + "loss": 1.2559, + "step": 22194 + }, + { + "epoch": 0.6516824241000646, + "grad_norm": 0.0, + "learning_rate": 5.716445178311338e-06, + "loss": 1.3291, + "step": 22195 + }, + { + "epoch": 0.6517117857772036, + "grad_norm": 0.0, + "learning_rate": 5.715585900099327e-06, + "loss": 1.4258, + "step": 22196 + }, + { + "epoch": 0.6517411474543426, + "grad_norm": 0.0, + "learning_rate": 5.714726660632375e-06, + "loss": 1.3232, + "step": 22197 + }, + { + "epoch": 0.6517705091314816, + "grad_norm": 0.0, + "learning_rate": 5.713867459918245e-06, + "loss": 1.3242, + "step": 22198 + }, + { + "epoch": 0.6517998708086206, + "grad_norm": 0.0, + "learning_rate": 5.713008297964711e-06, + "loss": 1.3193, + "step": 22199 + }, + { + "epoch": 0.6518292324857596, + "grad_norm": 0.0, + "learning_rate": 5.712149174779542e-06, + "loss": 1.167, + "step": 22200 + }, + { + "epoch": 0.6518585941628986, + "grad_norm": 0.0, + "learning_rate": 5.711290090370501e-06, + "loss": 1.165, + "step": 22201 + }, + { + "epoch": 0.6518879558400376, + "grad_norm": 0.0, + "learning_rate": 5.710431044745365e-06, + "loss": 1.2275, + "step": 22202 + }, + { + "epoch": 0.6519173175171766, + "grad_norm": 0.0, + "learning_rate": 5.709572037911897e-06, + "loss": 1.3232, + "step": 22203 + }, + { + "epoch": 0.6519466791943156, + "grad_norm": 0.0, + "learning_rate": 5.708713069877872e-06, + "loss": 1.3711, + "step": 22204 + }, + { + "epoch": 0.6519760408714546, + "grad_norm": 0.0, + "learning_rate": 5.707854140651048e-06, + "loss": 1.0645, + "step": 22205 + }, + { + "epoch": 0.6520054025485936, + "grad_norm": 0.0, + "learning_rate": 5.706995250239202e-06, + "loss": 1.3057, + "step": 22206 + }, + { + "epoch": 0.6520347642257326, + "grad_norm": 0.0, + "learning_rate": 5.706136398650104e-06, + "loss": 1.3428, + "step": 22207 + }, + { + "epoch": 0.6520641259028715, + "grad_norm": 0.0, + "learning_rate": 5.7052775858915044e-06, + "loss": 1.3516, + "step": 22208 + }, + { + "epoch": 0.6520934875800106, + "grad_norm": 0.0, + "learning_rate": 5.704418811971184e-06, + "loss": 1.166, + "step": 22209 + }, + { + "epoch": 0.6521228492571496, + "grad_norm": 0.0, + "learning_rate": 5.7035600768969015e-06, + "loss": 1.252, + "step": 22210 + }, + { + "epoch": 0.6521522109342885, + "grad_norm": 0.0, + "learning_rate": 5.702701380676428e-06, + "loss": 1.3213, + "step": 22211 + }, + { + "epoch": 0.6521815726114276, + "grad_norm": 0.0, + "learning_rate": 5.701842723317523e-06, + "loss": 1.3574, + "step": 22212 + }, + { + "epoch": 0.6522109342885666, + "grad_norm": 0.0, + "learning_rate": 5.700984104827958e-06, + "loss": 1.1636, + "step": 22213 + }, + { + "epoch": 0.6522402959657055, + "grad_norm": 0.0, + "learning_rate": 5.700125525215496e-06, + "loss": 1.3262, + "step": 22214 + }, + { + "epoch": 0.6522696576428446, + "grad_norm": 0.0, + "learning_rate": 5.6992669844879e-06, + "loss": 1.2568, + "step": 22215 + }, + { + "epoch": 0.6522990193199836, + "grad_norm": 0.0, + "learning_rate": 5.698408482652934e-06, + "loss": 1.2739, + "step": 22216 + }, + { + "epoch": 0.6523283809971225, + "grad_norm": 0.0, + "learning_rate": 5.697550019718357e-06, + "loss": 1.2598, + "step": 22217 + }, + { + "epoch": 0.6523577426742616, + "grad_norm": 0.0, + "learning_rate": 5.696691595691942e-06, + "loss": 1.2251, + "step": 22218 + }, + { + "epoch": 0.6523871043514006, + "grad_norm": 0.0, + "learning_rate": 5.695833210581443e-06, + "loss": 1.1616, + "step": 22219 + }, + { + "epoch": 0.6524164660285395, + "grad_norm": 0.0, + "learning_rate": 5.69497486439463e-06, + "loss": 1.2173, + "step": 22220 + }, + { + "epoch": 0.6524458277056785, + "grad_norm": 0.0, + "learning_rate": 5.694116557139257e-06, + "loss": 1.3477, + "step": 22221 + }, + { + "epoch": 0.6524751893828176, + "grad_norm": 0.0, + "learning_rate": 5.693258288823097e-06, + "loss": 1.2969, + "step": 22222 + }, + { + "epoch": 0.6525045510599565, + "grad_norm": 0.0, + "learning_rate": 5.692400059453904e-06, + "loss": 1.2402, + "step": 22223 + }, + { + "epoch": 0.6525339127370955, + "grad_norm": 0.0, + "learning_rate": 5.691541869039441e-06, + "loss": 1.209, + "step": 22224 + }, + { + "epoch": 0.6525632744142346, + "grad_norm": 0.0, + "learning_rate": 5.6906837175874645e-06, + "loss": 1.2539, + "step": 22225 + }, + { + "epoch": 0.6525926360913735, + "grad_norm": 0.0, + "learning_rate": 5.689825605105741e-06, + "loss": 1.2856, + "step": 22226 + }, + { + "epoch": 0.6526219977685125, + "grad_norm": 0.0, + "learning_rate": 5.68896753160203e-06, + "loss": 1.2168, + "step": 22227 + }, + { + "epoch": 0.6526513594456516, + "grad_norm": 0.0, + "learning_rate": 5.688109497084085e-06, + "loss": 1.2793, + "step": 22228 + }, + { + "epoch": 0.6526807211227905, + "grad_norm": 0.0, + "learning_rate": 5.687251501559674e-06, + "loss": 1.2637, + "step": 22229 + }, + { + "epoch": 0.6527100827999295, + "grad_norm": 0.0, + "learning_rate": 5.686393545036548e-06, + "loss": 1.3828, + "step": 22230 + }, + { + "epoch": 0.6527394444770686, + "grad_norm": 0.0, + "learning_rate": 5.685535627522475e-06, + "loss": 1.2656, + "step": 22231 + }, + { + "epoch": 0.6527688061542075, + "grad_norm": 0.0, + "learning_rate": 5.684677749025207e-06, + "loss": 1.3506, + "step": 22232 + }, + { + "epoch": 0.6527981678313465, + "grad_norm": 0.0, + "learning_rate": 5.683819909552505e-06, + "loss": 1.2578, + "step": 22233 + }, + { + "epoch": 0.6528275295084855, + "grad_norm": 0.0, + "learning_rate": 5.68296210911212e-06, + "loss": 1.2363, + "step": 22234 + }, + { + "epoch": 0.6528568911856245, + "grad_norm": 0.0, + "learning_rate": 5.682104347711818e-06, + "loss": 1.3115, + "step": 22235 + }, + { + "epoch": 0.6528862528627635, + "grad_norm": 0.0, + "learning_rate": 5.681246625359351e-06, + "loss": 1.2969, + "step": 22236 + }, + { + "epoch": 0.6529156145399025, + "grad_norm": 0.0, + "learning_rate": 5.680388942062476e-06, + "loss": 1.3516, + "step": 22237 + }, + { + "epoch": 0.6529449762170415, + "grad_norm": 0.0, + "learning_rate": 5.67953129782895e-06, + "loss": 1.3301, + "step": 22238 + }, + { + "epoch": 0.6529743378941805, + "grad_norm": 0.0, + "learning_rate": 5.678673692666532e-06, + "loss": 1.0137, + "step": 22239 + }, + { + "epoch": 0.6530036995713195, + "grad_norm": 0.0, + "learning_rate": 5.677816126582974e-06, + "loss": 1.3105, + "step": 22240 + }, + { + "epoch": 0.6530330612484585, + "grad_norm": 0.0, + "learning_rate": 5.676958599586028e-06, + "loss": 1.2471, + "step": 22241 + }, + { + "epoch": 0.6530624229255975, + "grad_norm": 0.0, + "learning_rate": 5.6761011116834566e-06, + "loss": 1.334, + "step": 22242 + }, + { + "epoch": 0.6530917846027365, + "grad_norm": 0.0, + "learning_rate": 5.675243662883005e-06, + "loss": 1.2461, + "step": 22243 + }, + { + "epoch": 0.6531211462798755, + "grad_norm": 0.0, + "learning_rate": 5.674386253192437e-06, + "loss": 1.2144, + "step": 22244 + }, + { + "epoch": 0.6531505079570145, + "grad_norm": 0.0, + "learning_rate": 5.673528882619498e-06, + "loss": 1.2441, + "step": 22245 + }, + { + "epoch": 0.6531798696341535, + "grad_norm": 0.0, + "learning_rate": 5.672671551171949e-06, + "loss": 1.2275, + "step": 22246 + }, + { + "epoch": 0.6532092313112925, + "grad_norm": 0.0, + "learning_rate": 5.67181425885754e-06, + "loss": 1.064, + "step": 22247 + }, + { + "epoch": 0.6532385929884315, + "grad_norm": 0.0, + "learning_rate": 5.670957005684023e-06, + "loss": 1.1748, + "step": 22248 + }, + { + "epoch": 0.6532679546655705, + "grad_norm": 0.0, + "learning_rate": 5.670099791659151e-06, + "loss": 1.2627, + "step": 22249 + }, + { + "epoch": 0.6532973163427095, + "grad_norm": 0.0, + "learning_rate": 5.66924261679067e-06, + "loss": 1.2944, + "step": 22250 + }, + { + "epoch": 0.6533266780198485, + "grad_norm": 0.0, + "learning_rate": 5.668385481086342e-06, + "loss": 1.2949, + "step": 22251 + }, + { + "epoch": 0.6533560396969875, + "grad_norm": 0.0, + "learning_rate": 5.6675283845539096e-06, + "loss": 1.3877, + "step": 22252 + }, + { + "epoch": 0.6533854013741265, + "grad_norm": 0.0, + "learning_rate": 5.666671327201132e-06, + "loss": 1.3447, + "step": 22253 + }, + { + "epoch": 0.6534147630512654, + "grad_norm": 0.0, + "learning_rate": 5.665814309035751e-06, + "loss": 1.2832, + "step": 22254 + }, + { + "epoch": 0.6534441247284045, + "grad_norm": 0.0, + "learning_rate": 5.664957330065524e-06, + "loss": 1.2676, + "step": 22255 + }, + { + "epoch": 0.6534734864055435, + "grad_norm": 0.0, + "learning_rate": 5.664100390298199e-06, + "loss": 1.2812, + "step": 22256 + }, + { + "epoch": 0.6535028480826824, + "grad_norm": 0.0, + "learning_rate": 5.663243489741526e-06, + "loss": 1.2451, + "step": 22257 + }, + { + "epoch": 0.6535322097598215, + "grad_norm": 0.0, + "learning_rate": 5.66238662840325e-06, + "loss": 1.2041, + "step": 22258 + }, + { + "epoch": 0.6535615714369605, + "grad_norm": 0.0, + "learning_rate": 5.661529806291121e-06, + "loss": 1.2373, + "step": 22259 + }, + { + "epoch": 0.6535909331140994, + "grad_norm": 0.0, + "learning_rate": 5.660673023412891e-06, + "loss": 1.1855, + "step": 22260 + }, + { + "epoch": 0.6536202947912385, + "grad_norm": 0.0, + "learning_rate": 5.659816279776305e-06, + "loss": 1.249, + "step": 22261 + }, + { + "epoch": 0.6536496564683775, + "grad_norm": 0.0, + "learning_rate": 5.658959575389113e-06, + "loss": 1.4199, + "step": 22262 + }, + { + "epoch": 0.6536790181455164, + "grad_norm": 0.0, + "learning_rate": 5.658102910259062e-06, + "loss": 1.1689, + "step": 22263 + }, + { + "epoch": 0.6537083798226555, + "grad_norm": 0.0, + "learning_rate": 5.6572462843939e-06, + "loss": 1.207, + "step": 22264 + }, + { + "epoch": 0.6537377414997945, + "grad_norm": 0.0, + "learning_rate": 5.656389697801367e-06, + "loss": 1.2314, + "step": 22265 + }, + { + "epoch": 0.6537671031769334, + "grad_norm": 0.0, + "learning_rate": 5.655533150489219e-06, + "loss": 1.2847, + "step": 22266 + }, + { + "epoch": 0.6537964648540725, + "grad_norm": 0.0, + "learning_rate": 5.654676642465196e-06, + "loss": 1.2783, + "step": 22267 + }, + { + "epoch": 0.6538258265312115, + "grad_norm": 0.0, + "learning_rate": 5.653820173737042e-06, + "loss": 1.2871, + "step": 22268 + }, + { + "epoch": 0.6538551882083504, + "grad_norm": 0.0, + "learning_rate": 5.652963744312507e-06, + "loss": 1.0962, + "step": 22269 + }, + { + "epoch": 0.6538845498854895, + "grad_norm": 0.0, + "learning_rate": 5.652107354199333e-06, + "loss": 1.2505, + "step": 22270 + }, + { + "epoch": 0.6539139115626285, + "grad_norm": 0.0, + "learning_rate": 5.6512510034052695e-06, + "loss": 1.1982, + "step": 22271 + }, + { + "epoch": 0.6539432732397674, + "grad_norm": 0.0, + "learning_rate": 5.650394691938055e-06, + "loss": 1.2051, + "step": 22272 + }, + { + "epoch": 0.6539726349169065, + "grad_norm": 0.0, + "learning_rate": 5.649538419805437e-06, + "loss": 1.3633, + "step": 22273 + }, + { + "epoch": 0.6540019965940455, + "grad_norm": 0.0, + "learning_rate": 5.648682187015153e-06, + "loss": 1.3086, + "step": 22274 + }, + { + "epoch": 0.6540313582711844, + "grad_norm": 0.0, + "learning_rate": 5.647825993574953e-06, + "loss": 1.3955, + "step": 22275 + }, + { + "epoch": 0.6540607199483235, + "grad_norm": 0.0, + "learning_rate": 5.646969839492578e-06, + "loss": 1.2207, + "step": 22276 + }, + { + "epoch": 0.6540900816254624, + "grad_norm": 0.0, + "learning_rate": 5.6461137247757656e-06, + "loss": 1.3281, + "step": 22277 + }, + { + "epoch": 0.6541194433026014, + "grad_norm": 0.0, + "learning_rate": 5.645257649432267e-06, + "loss": 1.4111, + "step": 22278 + }, + { + "epoch": 0.6541488049797405, + "grad_norm": 0.0, + "learning_rate": 5.644401613469814e-06, + "loss": 1.1372, + "step": 22279 + }, + { + "epoch": 0.6541781666568794, + "grad_norm": 0.0, + "learning_rate": 5.6435456168961555e-06, + "loss": 1.2178, + "step": 22280 + }, + { + "epoch": 0.6542075283340184, + "grad_norm": 0.0, + "learning_rate": 5.642689659719032e-06, + "loss": 1.2168, + "step": 22281 + }, + { + "epoch": 0.6542368900111575, + "grad_norm": 0.0, + "learning_rate": 5.641833741946179e-06, + "loss": 1.2041, + "step": 22282 + }, + { + "epoch": 0.6542662516882964, + "grad_norm": 0.0, + "learning_rate": 5.640977863585338e-06, + "loss": 1.2803, + "step": 22283 + }, + { + "epoch": 0.6542956133654354, + "grad_norm": 0.0, + "learning_rate": 5.640122024644253e-06, + "loss": 1.3516, + "step": 22284 + }, + { + "epoch": 0.6543249750425745, + "grad_norm": 0.0, + "learning_rate": 5.639266225130662e-06, + "loss": 1.2632, + "step": 22285 + }, + { + "epoch": 0.6543543367197134, + "grad_norm": 0.0, + "learning_rate": 5.6384104650522995e-06, + "loss": 1.0991, + "step": 22286 + }, + { + "epoch": 0.6543836983968524, + "grad_norm": 0.0, + "learning_rate": 5.637554744416911e-06, + "loss": 1.2354, + "step": 22287 + }, + { + "epoch": 0.6544130600739915, + "grad_norm": 0.0, + "learning_rate": 5.636699063232234e-06, + "loss": 1.3062, + "step": 22288 + }, + { + "epoch": 0.6544424217511304, + "grad_norm": 0.0, + "learning_rate": 5.635843421506003e-06, + "loss": 1.2598, + "step": 22289 + }, + { + "epoch": 0.6544717834282694, + "grad_norm": 0.0, + "learning_rate": 5.634987819245954e-06, + "loss": 1.2051, + "step": 22290 + }, + { + "epoch": 0.6545011451054085, + "grad_norm": 0.0, + "learning_rate": 5.634132256459832e-06, + "loss": 1.3486, + "step": 22291 + }, + { + "epoch": 0.6545305067825474, + "grad_norm": 0.0, + "learning_rate": 5.633276733155367e-06, + "loss": 1.1406, + "step": 22292 + }, + { + "epoch": 0.6545598684596864, + "grad_norm": 0.0, + "learning_rate": 5.632421249340303e-06, + "loss": 1.2671, + "step": 22293 + }, + { + "epoch": 0.6545892301368255, + "grad_norm": 0.0, + "learning_rate": 5.6315658050223675e-06, + "loss": 1.335, + "step": 22294 + }, + { + "epoch": 0.6546185918139644, + "grad_norm": 0.0, + "learning_rate": 5.630710400209305e-06, + "loss": 1.2783, + "step": 22295 + }, + { + "epoch": 0.6546479534911034, + "grad_norm": 0.0, + "learning_rate": 5.629855034908848e-06, + "loss": 1.2861, + "step": 22296 + }, + { + "epoch": 0.6546773151682425, + "grad_norm": 0.0, + "learning_rate": 5.628999709128731e-06, + "loss": 1.2764, + "step": 22297 + }, + { + "epoch": 0.6547066768453814, + "grad_norm": 0.0, + "learning_rate": 5.628144422876688e-06, + "loss": 1.251, + "step": 22298 + }, + { + "epoch": 0.6547360385225204, + "grad_norm": 0.0, + "learning_rate": 5.627289176160451e-06, + "loss": 1.2686, + "step": 22299 + }, + { + "epoch": 0.6547654001996595, + "grad_norm": 0.0, + "learning_rate": 5.626433968987764e-06, + "loss": 1.2388, + "step": 22300 + }, + { + "epoch": 0.6547947618767984, + "grad_norm": 0.0, + "learning_rate": 5.625578801366347e-06, + "loss": 1.2295, + "step": 22301 + }, + { + "epoch": 0.6548241235539374, + "grad_norm": 0.0, + "learning_rate": 5.624723673303948e-06, + "loss": 1.2979, + "step": 22302 + }, + { + "epoch": 0.6548534852310764, + "grad_norm": 0.0, + "learning_rate": 5.623868584808287e-06, + "loss": 1.189, + "step": 22303 + }, + { + "epoch": 0.6548828469082154, + "grad_norm": 0.0, + "learning_rate": 5.623013535887109e-06, + "loss": 1.2871, + "step": 22304 + }, + { + "epoch": 0.6549122085853544, + "grad_norm": 0.0, + "learning_rate": 5.62215852654814e-06, + "loss": 1.2803, + "step": 22305 + }, + { + "epoch": 0.6549415702624934, + "grad_norm": 0.0, + "learning_rate": 5.621303556799113e-06, + "loss": 1.3994, + "step": 22306 + }, + { + "epoch": 0.6549709319396324, + "grad_norm": 0.0, + "learning_rate": 5.620448626647761e-06, + "loss": 1.3154, + "step": 22307 + }, + { + "epoch": 0.6550002936167714, + "grad_norm": 0.0, + "learning_rate": 5.619593736101807e-06, + "loss": 1.2759, + "step": 22308 + }, + { + "epoch": 0.6550296552939104, + "grad_norm": 0.0, + "learning_rate": 5.618738885168993e-06, + "loss": 1.3145, + "step": 22309 + }, + { + "epoch": 0.6550590169710494, + "grad_norm": 0.0, + "learning_rate": 5.6178840738570425e-06, + "loss": 1.1162, + "step": 22310 + }, + { + "epoch": 0.6550883786481884, + "grad_norm": 0.0, + "learning_rate": 5.617029302173691e-06, + "loss": 1.1963, + "step": 22311 + }, + { + "epoch": 0.6551177403253274, + "grad_norm": 0.0, + "learning_rate": 5.616174570126665e-06, + "loss": 1.2017, + "step": 22312 + }, + { + "epoch": 0.6551471020024664, + "grad_norm": 0.0, + "learning_rate": 5.615319877723699e-06, + "loss": 1.2275, + "step": 22313 + }, + { + "epoch": 0.6551764636796054, + "grad_norm": 0.0, + "learning_rate": 5.614465224972514e-06, + "loss": 1.1797, + "step": 22314 + }, + { + "epoch": 0.6552058253567444, + "grad_norm": 0.0, + "learning_rate": 5.613610611880844e-06, + "loss": 1.2622, + "step": 22315 + }, + { + "epoch": 0.6552351870338834, + "grad_norm": 0.0, + "learning_rate": 5.612756038456419e-06, + "loss": 1.0552, + "step": 22316 + }, + { + "epoch": 0.6552645487110224, + "grad_norm": 0.0, + "learning_rate": 5.61190150470696e-06, + "loss": 1.249, + "step": 22317 + }, + { + "epoch": 0.6552939103881614, + "grad_norm": 0.0, + "learning_rate": 5.611047010640203e-06, + "loss": 1.3369, + "step": 22318 + }, + { + "epoch": 0.6553232720653004, + "grad_norm": 0.0, + "learning_rate": 5.610192556263868e-06, + "loss": 1.2261, + "step": 22319 + }, + { + "epoch": 0.6553526337424393, + "grad_norm": 0.0, + "learning_rate": 5.609338141585689e-06, + "loss": 1.3018, + "step": 22320 + }, + { + "epoch": 0.6553819954195783, + "grad_norm": 0.0, + "learning_rate": 5.608483766613392e-06, + "loss": 1.2559, + "step": 22321 + }, + { + "epoch": 0.6554113570967174, + "grad_norm": 0.0, + "learning_rate": 5.607629431354699e-06, + "loss": 1.3184, + "step": 22322 + }, + { + "epoch": 0.6554407187738563, + "grad_norm": 0.0, + "learning_rate": 5.606775135817335e-06, + "loss": 1.2153, + "step": 22323 + }, + { + "epoch": 0.6554700804509953, + "grad_norm": 0.0, + "learning_rate": 5.605920880009032e-06, + "loss": 1.2734, + "step": 22324 + }, + { + "epoch": 0.6554994421281344, + "grad_norm": 0.0, + "learning_rate": 5.605066663937513e-06, + "loss": 1.147, + "step": 22325 + }, + { + "epoch": 0.6555288038052733, + "grad_norm": 0.0, + "learning_rate": 5.604212487610496e-06, + "loss": 1.209, + "step": 22326 + }, + { + "epoch": 0.6555581654824123, + "grad_norm": 0.0, + "learning_rate": 5.603358351035717e-06, + "loss": 1.3105, + "step": 22327 + }, + { + "epoch": 0.6555875271595514, + "grad_norm": 0.0, + "learning_rate": 5.60250425422089e-06, + "loss": 1.249, + "step": 22328 + }, + { + "epoch": 0.6556168888366903, + "grad_norm": 0.0, + "learning_rate": 5.6016501971737465e-06, + "loss": 1.2354, + "step": 22329 + }, + { + "epoch": 0.6556462505138293, + "grad_norm": 0.0, + "learning_rate": 5.6007961799020075e-06, + "loss": 1.2021, + "step": 22330 + }, + { + "epoch": 0.6556756121909684, + "grad_norm": 0.0, + "learning_rate": 5.599942202413394e-06, + "loss": 1.1606, + "step": 22331 + }, + { + "epoch": 0.6557049738681073, + "grad_norm": 0.0, + "learning_rate": 5.599088264715626e-06, + "loss": 1.2549, + "step": 22332 + }, + { + "epoch": 0.6557343355452463, + "grad_norm": 0.0, + "learning_rate": 5.598234366816435e-06, + "loss": 1.2715, + "step": 22333 + }, + { + "epoch": 0.6557636972223854, + "grad_norm": 0.0, + "learning_rate": 5.597380508723538e-06, + "loss": 1.2637, + "step": 22334 + }, + { + "epoch": 0.6557930588995243, + "grad_norm": 0.0, + "learning_rate": 5.596526690444652e-06, + "loss": 1.2031, + "step": 22335 + }, + { + "epoch": 0.6558224205766633, + "grad_norm": 0.0, + "learning_rate": 5.5956729119875084e-06, + "loss": 1.2183, + "step": 22336 + }, + { + "epoch": 0.6558517822538024, + "grad_norm": 0.0, + "learning_rate": 5.59481917335982e-06, + "loss": 1.1885, + "step": 22337 + }, + { + "epoch": 0.6558811439309413, + "grad_norm": 0.0, + "learning_rate": 5.593965474569312e-06, + "loss": 1.3184, + "step": 22338 + }, + { + "epoch": 0.6559105056080803, + "grad_norm": 0.0, + "learning_rate": 5.5931118156236975e-06, + "loss": 1.2959, + "step": 22339 + }, + { + "epoch": 0.6559398672852194, + "grad_norm": 0.0, + "learning_rate": 5.592258196530706e-06, + "loss": 1.1548, + "step": 22340 + }, + { + "epoch": 0.6559692289623583, + "grad_norm": 0.0, + "learning_rate": 5.5914046172980485e-06, + "loss": 1.2373, + "step": 22341 + }, + { + "epoch": 0.6559985906394973, + "grad_norm": 0.0, + "learning_rate": 5.590551077933453e-06, + "loss": 1.2632, + "step": 22342 + }, + { + "epoch": 0.6560279523166364, + "grad_norm": 0.0, + "learning_rate": 5.589697578444628e-06, + "loss": 1.1875, + "step": 22343 + }, + { + "epoch": 0.6560573139937753, + "grad_norm": 0.0, + "learning_rate": 5.588844118839302e-06, + "loss": 1.0962, + "step": 22344 + }, + { + "epoch": 0.6560866756709143, + "grad_norm": 0.0, + "learning_rate": 5.587990699125187e-06, + "loss": 1.3809, + "step": 22345 + }, + { + "epoch": 0.6561160373480533, + "grad_norm": 0.0, + "learning_rate": 5.587137319310003e-06, + "loss": 1.2056, + "step": 22346 + }, + { + "epoch": 0.6561453990251923, + "grad_norm": 0.0, + "learning_rate": 5.586283979401468e-06, + "loss": 1.2993, + "step": 22347 + }, + { + "epoch": 0.6561747607023313, + "grad_norm": 0.0, + "learning_rate": 5.585430679407291e-06, + "loss": 1.2876, + "step": 22348 + }, + { + "epoch": 0.6562041223794703, + "grad_norm": 0.0, + "learning_rate": 5.5845774193352e-06, + "loss": 1.375, + "step": 22349 + }, + { + "epoch": 0.6562334840566093, + "grad_norm": 0.0, + "learning_rate": 5.583724199192901e-06, + "loss": 1.2739, + "step": 22350 + }, + { + "epoch": 0.6562628457337483, + "grad_norm": 0.0, + "learning_rate": 5.582871018988121e-06, + "loss": 1.3594, + "step": 22351 + }, + { + "epoch": 0.6562922074108873, + "grad_norm": 0.0, + "learning_rate": 5.582017878728565e-06, + "loss": 0.9966, + "step": 22352 + }, + { + "epoch": 0.6563215690880263, + "grad_norm": 0.0, + "learning_rate": 5.581164778421956e-06, + "loss": 1.209, + "step": 22353 + }, + { + "epoch": 0.6563509307651653, + "grad_norm": 0.0, + "learning_rate": 5.580311718076006e-06, + "loss": 1.2852, + "step": 22354 + }, + { + "epoch": 0.6563802924423043, + "grad_norm": 0.0, + "learning_rate": 5.57945869769843e-06, + "loss": 1.2178, + "step": 22355 + }, + { + "epoch": 0.6564096541194433, + "grad_norm": 0.0, + "learning_rate": 5.578605717296939e-06, + "loss": 1.2236, + "step": 22356 + }, + { + "epoch": 0.6564390157965823, + "grad_norm": 0.0, + "learning_rate": 5.5777527768792464e-06, + "loss": 1.0854, + "step": 22357 + }, + { + "epoch": 0.6564683774737213, + "grad_norm": 0.0, + "learning_rate": 5.576899876453073e-06, + "loss": 1.1309, + "step": 22358 + }, + { + "epoch": 0.6564977391508603, + "grad_norm": 0.0, + "learning_rate": 5.576047016026121e-06, + "loss": 1.248, + "step": 22359 + }, + { + "epoch": 0.6565271008279993, + "grad_norm": 0.0, + "learning_rate": 5.575194195606113e-06, + "loss": 1.2773, + "step": 22360 + }, + { + "epoch": 0.6565564625051383, + "grad_norm": 0.0, + "learning_rate": 5.574341415200752e-06, + "loss": 1.1973, + "step": 22361 + }, + { + "epoch": 0.6565858241822773, + "grad_norm": 0.0, + "learning_rate": 5.573488674817764e-06, + "loss": 1.2314, + "step": 22362 + }, + { + "epoch": 0.6566151858594163, + "grad_norm": 0.0, + "learning_rate": 5.572635974464845e-06, + "loss": 1.1699, + "step": 22363 + }, + { + "epoch": 0.6566445475365553, + "grad_norm": 0.0, + "learning_rate": 5.571783314149716e-06, + "loss": 1.2241, + "step": 22364 + }, + { + "epoch": 0.6566739092136943, + "grad_norm": 0.0, + "learning_rate": 5.570930693880083e-06, + "loss": 1.1162, + "step": 22365 + }, + { + "epoch": 0.6567032708908332, + "grad_norm": 0.0, + "learning_rate": 5.570078113663656e-06, + "loss": 1.2529, + "step": 22366 + }, + { + "epoch": 0.6567326325679723, + "grad_norm": 0.0, + "learning_rate": 5.56922557350815e-06, + "loss": 1.3379, + "step": 22367 + }, + { + "epoch": 0.6567619942451113, + "grad_norm": 0.0, + "learning_rate": 5.568373073421269e-06, + "loss": 1.2246, + "step": 22368 + }, + { + "epoch": 0.6567913559222502, + "grad_norm": 0.0, + "learning_rate": 5.567520613410729e-06, + "loss": 1.249, + "step": 22369 + }, + { + "epoch": 0.6568207175993893, + "grad_norm": 0.0, + "learning_rate": 5.566668193484235e-06, + "loss": 1.2583, + "step": 22370 + }, + { + "epoch": 0.6568500792765283, + "grad_norm": 0.0, + "learning_rate": 5.565815813649495e-06, + "loss": 1.335, + "step": 22371 + }, + { + "epoch": 0.6568794409536672, + "grad_norm": 0.0, + "learning_rate": 5.5649634739142146e-06, + "loss": 1.2705, + "step": 22372 + }, + { + "epoch": 0.6569088026308063, + "grad_norm": 0.0, + "learning_rate": 5.56411117428611e-06, + "loss": 1.3867, + "step": 22373 + }, + { + "epoch": 0.6569381643079453, + "grad_norm": 0.0, + "learning_rate": 5.563258914772883e-06, + "loss": 1.2773, + "step": 22374 + }, + { + "epoch": 0.6569675259850842, + "grad_norm": 0.0, + "learning_rate": 5.562406695382239e-06, + "loss": 1.2627, + "step": 22375 + }, + { + "epoch": 0.6569968876622233, + "grad_norm": 0.0, + "learning_rate": 5.561554516121892e-06, + "loss": 1.2627, + "step": 22376 + }, + { + "epoch": 0.6570262493393623, + "grad_norm": 0.0, + "learning_rate": 5.56070237699954e-06, + "loss": 1.2915, + "step": 22377 + }, + { + "epoch": 0.6570556110165012, + "grad_norm": 0.0, + "learning_rate": 5.5598502780228955e-06, + "loss": 1.2803, + "step": 22378 + }, + { + "epoch": 0.6570849726936403, + "grad_norm": 0.0, + "learning_rate": 5.5589982191996635e-06, + "loss": 1.3691, + "step": 22379 + }, + { + "epoch": 0.6571143343707793, + "grad_norm": 0.0, + "learning_rate": 5.558146200537548e-06, + "loss": 1.2412, + "step": 22380 + }, + { + "epoch": 0.6571436960479182, + "grad_norm": 0.0, + "learning_rate": 5.55729422204425e-06, + "loss": 1.2949, + "step": 22381 + }, + { + "epoch": 0.6571730577250573, + "grad_norm": 0.0, + "learning_rate": 5.556442283727484e-06, + "loss": 1.3291, + "step": 22382 + }, + { + "epoch": 0.6572024194021963, + "grad_norm": 0.0, + "learning_rate": 5.5555903855949415e-06, + "loss": 1.1899, + "step": 22383 + }, + { + "epoch": 0.6572317810793352, + "grad_norm": 0.0, + "learning_rate": 5.554738527654337e-06, + "loss": 1.2061, + "step": 22384 + }, + { + "epoch": 0.6572611427564743, + "grad_norm": 0.0, + "learning_rate": 5.553886709913373e-06, + "loss": 1.0771, + "step": 22385 + }, + { + "epoch": 0.6572905044336133, + "grad_norm": 0.0, + "learning_rate": 5.553034932379745e-06, + "loss": 1.0859, + "step": 22386 + }, + { + "epoch": 0.6573198661107522, + "grad_norm": 0.0, + "learning_rate": 5.5521831950611694e-06, + "loss": 1.2344, + "step": 22387 + }, + { + "epoch": 0.6573492277878913, + "grad_norm": 0.0, + "learning_rate": 5.551331497965331e-06, + "loss": 1.2188, + "step": 22388 + }, + { + "epoch": 0.6573785894650302, + "grad_norm": 0.0, + "learning_rate": 5.550479841099947e-06, + "loss": 1.2026, + "step": 22389 + }, + { + "epoch": 0.6574079511421692, + "grad_norm": 0.0, + "learning_rate": 5.549628224472707e-06, + "loss": 1.1816, + "step": 22390 + }, + { + "epoch": 0.6574373128193083, + "grad_norm": 0.0, + "learning_rate": 5.5487766480913255e-06, + "loss": 1.2666, + "step": 22391 + }, + { + "epoch": 0.6574666744964472, + "grad_norm": 0.0, + "learning_rate": 5.547925111963491e-06, + "loss": 1.2734, + "step": 22392 + }, + { + "epoch": 0.6574960361735862, + "grad_norm": 0.0, + "learning_rate": 5.547073616096914e-06, + "loss": 1.1953, + "step": 22393 + }, + { + "epoch": 0.6575253978507253, + "grad_norm": 0.0, + "learning_rate": 5.546222160499291e-06, + "loss": 1.1309, + "step": 22394 + }, + { + "epoch": 0.6575547595278642, + "grad_norm": 0.0, + "learning_rate": 5.545370745178322e-06, + "loss": 1.2764, + "step": 22395 + }, + { + "epoch": 0.6575841212050032, + "grad_norm": 0.0, + "learning_rate": 5.544519370141706e-06, + "loss": 1.3428, + "step": 22396 + }, + { + "epoch": 0.6576134828821423, + "grad_norm": 0.0, + "learning_rate": 5.543668035397138e-06, + "loss": 1.3462, + "step": 22397 + }, + { + "epoch": 0.6576428445592812, + "grad_norm": 0.0, + "learning_rate": 5.542816740952326e-06, + "loss": 1.3584, + "step": 22398 + }, + { + "epoch": 0.6576722062364202, + "grad_norm": 0.0, + "learning_rate": 5.541965486814958e-06, + "loss": 1.1479, + "step": 22399 + }, + { + "epoch": 0.6577015679135593, + "grad_norm": 0.0, + "learning_rate": 5.541114272992743e-06, + "loss": 1.4258, + "step": 22400 + }, + { + "epoch": 0.6577309295906982, + "grad_norm": 0.0, + "learning_rate": 5.540263099493371e-06, + "loss": 1.2236, + "step": 22401 + }, + { + "epoch": 0.6577602912678372, + "grad_norm": 0.0, + "learning_rate": 5.5394119663245435e-06, + "loss": 1.3062, + "step": 22402 + }, + { + "epoch": 0.6577896529449763, + "grad_norm": 0.0, + "learning_rate": 5.538560873493957e-06, + "loss": 1.332, + "step": 22403 + }, + { + "epoch": 0.6578190146221152, + "grad_norm": 0.0, + "learning_rate": 5.5377098210093075e-06, + "loss": 1.1816, + "step": 22404 + }, + { + "epoch": 0.6578483762992542, + "grad_norm": 0.0, + "learning_rate": 5.536858808878289e-06, + "loss": 1.2808, + "step": 22405 + }, + { + "epoch": 0.6578777379763933, + "grad_norm": 0.0, + "learning_rate": 5.536007837108598e-06, + "loss": 1.1865, + "step": 22406 + }, + { + "epoch": 0.6579070996535322, + "grad_norm": 0.0, + "learning_rate": 5.535156905707935e-06, + "loss": 1.2217, + "step": 22407 + }, + { + "epoch": 0.6579364613306712, + "grad_norm": 0.0, + "learning_rate": 5.5343060146839856e-06, + "loss": 1.3271, + "step": 22408 + }, + { + "epoch": 0.6579658230078103, + "grad_norm": 0.0, + "learning_rate": 5.533455164044456e-06, + "loss": 1.2363, + "step": 22409 + }, + { + "epoch": 0.6579951846849492, + "grad_norm": 0.0, + "learning_rate": 5.532604353797031e-06, + "loss": 1.2344, + "step": 22410 + }, + { + "epoch": 0.6580245463620882, + "grad_norm": 0.0, + "learning_rate": 5.531753583949413e-06, + "loss": 1.3398, + "step": 22411 + }, + { + "epoch": 0.6580539080392273, + "grad_norm": 0.0, + "learning_rate": 5.5309028545092915e-06, + "loss": 1.3828, + "step": 22412 + }, + { + "epoch": 0.6580832697163662, + "grad_norm": 0.0, + "learning_rate": 5.5300521654843595e-06, + "loss": 1.2139, + "step": 22413 + }, + { + "epoch": 0.6581126313935052, + "grad_norm": 0.0, + "learning_rate": 5.529201516882312e-06, + "loss": 1.2949, + "step": 22414 + }, + { + "epoch": 0.6581419930706442, + "grad_norm": 0.0, + "learning_rate": 5.528350908710836e-06, + "loss": 1.2949, + "step": 22415 + }, + { + "epoch": 0.6581713547477832, + "grad_norm": 0.0, + "learning_rate": 5.527500340977629e-06, + "loss": 1.3789, + "step": 22416 + }, + { + "epoch": 0.6582007164249222, + "grad_norm": 0.0, + "learning_rate": 5.526649813690381e-06, + "loss": 1.3198, + "step": 22417 + }, + { + "epoch": 0.6582300781020612, + "grad_norm": 0.0, + "learning_rate": 5.525799326856787e-06, + "loss": 1.0801, + "step": 22418 + }, + { + "epoch": 0.6582594397792002, + "grad_norm": 0.0, + "learning_rate": 5.524948880484537e-06, + "loss": 1.2842, + "step": 22419 + }, + { + "epoch": 0.6582888014563392, + "grad_norm": 0.0, + "learning_rate": 5.524098474581318e-06, + "loss": 1.2559, + "step": 22420 + }, + { + "epoch": 0.6583181631334781, + "grad_norm": 0.0, + "learning_rate": 5.523248109154822e-06, + "loss": 1.2017, + "step": 22421 + }, + { + "epoch": 0.6583475248106172, + "grad_norm": 0.0, + "learning_rate": 5.522397784212741e-06, + "loss": 1.1924, + "step": 22422 + }, + { + "epoch": 0.6583768864877562, + "grad_norm": 0.0, + "learning_rate": 5.521547499762765e-06, + "loss": 1.1206, + "step": 22423 + }, + { + "epoch": 0.6584062481648951, + "grad_norm": 0.0, + "learning_rate": 5.520697255812577e-06, + "loss": 1.2988, + "step": 22424 + }, + { + "epoch": 0.6584356098420342, + "grad_norm": 0.0, + "learning_rate": 5.519847052369876e-06, + "loss": 1.3506, + "step": 22425 + }, + { + "epoch": 0.6584649715191732, + "grad_norm": 0.0, + "learning_rate": 5.518996889442341e-06, + "loss": 1.249, + "step": 22426 + }, + { + "epoch": 0.6584943331963121, + "grad_norm": 0.0, + "learning_rate": 5.518146767037669e-06, + "loss": 1.1777, + "step": 22427 + }, + { + "epoch": 0.6585236948734512, + "grad_norm": 0.0, + "learning_rate": 5.517296685163542e-06, + "loss": 1.1392, + "step": 22428 + }, + { + "epoch": 0.6585530565505902, + "grad_norm": 0.0, + "learning_rate": 5.516446643827651e-06, + "loss": 1.0986, + "step": 22429 + }, + { + "epoch": 0.6585824182277291, + "grad_norm": 0.0, + "learning_rate": 5.515596643037678e-06, + "loss": 1.2266, + "step": 22430 + }, + { + "epoch": 0.6586117799048682, + "grad_norm": 0.0, + "learning_rate": 5.514746682801314e-06, + "loss": 1.251, + "step": 22431 + }, + { + "epoch": 0.6586411415820072, + "grad_norm": 0.0, + "learning_rate": 5.513896763126243e-06, + "loss": 1.2729, + "step": 22432 + }, + { + "epoch": 0.6586705032591461, + "grad_norm": 0.0, + "learning_rate": 5.513046884020155e-06, + "loss": 1.2085, + "step": 22433 + }, + { + "epoch": 0.6586998649362852, + "grad_norm": 0.0, + "learning_rate": 5.512197045490733e-06, + "loss": 1.1221, + "step": 22434 + }, + { + "epoch": 0.6587292266134241, + "grad_norm": 0.0, + "learning_rate": 5.5113472475456595e-06, + "loss": 1.3516, + "step": 22435 + }, + { + "epoch": 0.6587585882905631, + "grad_norm": 0.0, + "learning_rate": 5.510497490192626e-06, + "loss": 1.1592, + "step": 22436 + }, + { + "epoch": 0.6587879499677022, + "grad_norm": 0.0, + "learning_rate": 5.509647773439314e-06, + "loss": 1.2402, + "step": 22437 + }, + { + "epoch": 0.6588173116448411, + "grad_norm": 0.0, + "learning_rate": 5.508798097293406e-06, + "loss": 1.3135, + "step": 22438 + }, + { + "epoch": 0.6588466733219801, + "grad_norm": 0.0, + "learning_rate": 5.507948461762584e-06, + "loss": 1.2441, + "step": 22439 + }, + { + "epoch": 0.6588760349991192, + "grad_norm": 0.0, + "learning_rate": 5.507098866854539e-06, + "loss": 1.3179, + "step": 22440 + }, + { + "epoch": 0.6589053966762581, + "grad_norm": 0.0, + "learning_rate": 5.506249312576945e-06, + "loss": 1.2793, + "step": 22441 + }, + { + "epoch": 0.6589347583533971, + "grad_norm": 0.0, + "learning_rate": 5.505399798937492e-06, + "loss": 1.3027, + "step": 22442 + }, + { + "epoch": 0.6589641200305362, + "grad_norm": 0.0, + "learning_rate": 5.504550325943861e-06, + "loss": 1.3047, + "step": 22443 + }, + { + "epoch": 0.6589934817076751, + "grad_norm": 0.0, + "learning_rate": 5.503700893603733e-06, + "loss": 1.2998, + "step": 22444 + }, + { + "epoch": 0.6590228433848141, + "grad_norm": 0.0, + "learning_rate": 5.5028515019247885e-06, + "loss": 1.2568, + "step": 22445 + }, + { + "epoch": 0.6590522050619532, + "grad_norm": 0.0, + "learning_rate": 5.502002150914707e-06, + "loss": 1.209, + "step": 22446 + }, + { + "epoch": 0.6590815667390921, + "grad_norm": 0.0, + "learning_rate": 5.501152840581175e-06, + "loss": 1.2305, + "step": 22447 + }, + { + "epoch": 0.6591109284162311, + "grad_norm": 0.0, + "learning_rate": 5.500303570931866e-06, + "loss": 1.2622, + "step": 22448 + }, + { + "epoch": 0.6591402900933702, + "grad_norm": 0.0, + "learning_rate": 5.499454341974468e-06, + "loss": 1.1963, + "step": 22449 + }, + { + "epoch": 0.6591696517705091, + "grad_norm": 0.0, + "learning_rate": 5.498605153716653e-06, + "loss": 1.1792, + "step": 22450 + }, + { + "epoch": 0.6591990134476481, + "grad_norm": 0.0, + "learning_rate": 5.497756006166108e-06, + "loss": 1.1953, + "step": 22451 + }, + { + "epoch": 0.6592283751247872, + "grad_norm": 0.0, + "learning_rate": 5.496906899330509e-06, + "loss": 1.2349, + "step": 22452 + }, + { + "epoch": 0.6592577368019261, + "grad_norm": 0.0, + "learning_rate": 5.4960578332175354e-06, + "loss": 1.2549, + "step": 22453 + }, + { + "epoch": 0.6592870984790651, + "grad_norm": 0.0, + "learning_rate": 5.495208807834863e-06, + "loss": 1.3076, + "step": 22454 + }, + { + "epoch": 0.6593164601562042, + "grad_norm": 0.0, + "learning_rate": 5.494359823190167e-06, + "loss": 1.2783, + "step": 22455 + }, + { + "epoch": 0.6593458218333431, + "grad_norm": 0.0, + "learning_rate": 5.493510879291133e-06, + "loss": 1.2666, + "step": 22456 + }, + { + "epoch": 0.6593751835104821, + "grad_norm": 0.0, + "learning_rate": 5.4926619761454306e-06, + "loss": 1.2002, + "step": 22457 + }, + { + "epoch": 0.6594045451876211, + "grad_norm": 0.0, + "learning_rate": 5.491813113760744e-06, + "loss": 1.208, + "step": 22458 + }, + { + "epoch": 0.6594339068647601, + "grad_norm": 0.0, + "learning_rate": 5.490964292144743e-06, + "loss": 1.3574, + "step": 22459 + }, + { + "epoch": 0.6594632685418991, + "grad_norm": 0.0, + "learning_rate": 5.49011551130511e-06, + "loss": 1.1865, + "step": 22460 + }, + { + "epoch": 0.6594926302190381, + "grad_norm": 0.0, + "learning_rate": 5.489266771249517e-06, + "loss": 1.3066, + "step": 22461 + }, + { + "epoch": 0.6595219918961771, + "grad_norm": 0.0, + "learning_rate": 5.4884180719856396e-06, + "loss": 1.1543, + "step": 22462 + }, + { + "epoch": 0.6595513535733161, + "grad_norm": 0.0, + "learning_rate": 5.4875694135211545e-06, + "loss": 1.2202, + "step": 22463 + }, + { + "epoch": 0.6595807152504551, + "grad_norm": 0.0, + "learning_rate": 5.486720795863729e-06, + "loss": 1.1416, + "step": 22464 + }, + { + "epoch": 0.6596100769275941, + "grad_norm": 0.0, + "learning_rate": 5.485872219021049e-06, + "loss": 1.1899, + "step": 22465 + }, + { + "epoch": 0.6596394386047331, + "grad_norm": 0.0, + "learning_rate": 5.485023683000777e-06, + "loss": 1.1709, + "step": 22466 + }, + { + "epoch": 0.6596688002818721, + "grad_norm": 0.0, + "learning_rate": 5.484175187810597e-06, + "loss": 1.2344, + "step": 22467 + }, + { + "epoch": 0.6596981619590111, + "grad_norm": 0.0, + "learning_rate": 5.483326733458178e-06, + "loss": 1.2295, + "step": 22468 + }, + { + "epoch": 0.6597275236361501, + "grad_norm": 0.0, + "learning_rate": 5.48247831995119e-06, + "loss": 1.2349, + "step": 22469 + }, + { + "epoch": 0.6597568853132891, + "grad_norm": 0.0, + "learning_rate": 5.481629947297305e-06, + "loss": 1.2217, + "step": 22470 + }, + { + "epoch": 0.6597862469904281, + "grad_norm": 0.0, + "learning_rate": 5.4807816155042015e-06, + "loss": 1.2744, + "step": 22471 + }, + { + "epoch": 0.659815608667567, + "grad_norm": 0.0, + "learning_rate": 5.4799333245795476e-06, + "loss": 1.1387, + "step": 22472 + }, + { + "epoch": 0.6598449703447061, + "grad_norm": 0.0, + "learning_rate": 5.47908507453101e-06, + "loss": 1.332, + "step": 22473 + }, + { + "epoch": 0.6598743320218451, + "grad_norm": 0.0, + "learning_rate": 5.478236865366267e-06, + "loss": 1.1914, + "step": 22474 + }, + { + "epoch": 0.659903693698984, + "grad_norm": 0.0, + "learning_rate": 5.4773886970929824e-06, + "loss": 1.2705, + "step": 22475 + }, + { + "epoch": 0.6599330553761231, + "grad_norm": 0.0, + "learning_rate": 5.476540569718834e-06, + "loss": 1.3389, + "step": 22476 + }, + { + "epoch": 0.6599624170532621, + "grad_norm": 0.0, + "learning_rate": 5.475692483251488e-06, + "loss": 1.1865, + "step": 22477 + }, + { + "epoch": 0.659991778730401, + "grad_norm": 0.0, + "learning_rate": 5.474844437698614e-06, + "loss": 1.1582, + "step": 22478 + }, + { + "epoch": 0.6600211404075401, + "grad_norm": 0.0, + "learning_rate": 5.473996433067877e-06, + "loss": 1.1489, + "step": 22479 + }, + { + "epoch": 0.6600505020846791, + "grad_norm": 0.0, + "learning_rate": 5.473148469366951e-06, + "loss": 1.2158, + "step": 22480 + }, + { + "epoch": 0.660079863761818, + "grad_norm": 0.0, + "learning_rate": 5.472300546603502e-06, + "loss": 1.2676, + "step": 22481 + }, + { + "epoch": 0.6601092254389571, + "grad_norm": 0.0, + "learning_rate": 5.4714526647852e-06, + "loss": 1.208, + "step": 22482 + }, + { + "epoch": 0.6601385871160961, + "grad_norm": 0.0, + "learning_rate": 5.470604823919712e-06, + "loss": 1.2446, + "step": 22483 + }, + { + "epoch": 0.660167948793235, + "grad_norm": 0.0, + "learning_rate": 5.469757024014702e-06, + "loss": 1.2627, + "step": 22484 + }, + { + "epoch": 0.6601973104703741, + "grad_norm": 0.0, + "learning_rate": 5.468909265077842e-06, + "loss": 1.249, + "step": 22485 + }, + { + "epoch": 0.6602266721475131, + "grad_norm": 0.0, + "learning_rate": 5.468061547116796e-06, + "loss": 1.1914, + "step": 22486 + }, + { + "epoch": 0.660256033824652, + "grad_norm": 0.0, + "learning_rate": 5.467213870139231e-06, + "loss": 1.3584, + "step": 22487 + }, + { + "epoch": 0.6602853955017911, + "grad_norm": 0.0, + "learning_rate": 5.4663662341528064e-06, + "loss": 1.2227, + "step": 22488 + }, + { + "epoch": 0.6603147571789301, + "grad_norm": 0.0, + "learning_rate": 5.4655186391651985e-06, + "loss": 1.1616, + "step": 22489 + }, + { + "epoch": 0.660344118856069, + "grad_norm": 0.0, + "learning_rate": 5.464671085184062e-06, + "loss": 1.2783, + "step": 22490 + }, + { + "epoch": 0.6603734805332081, + "grad_norm": 0.0, + "learning_rate": 5.4638235722170705e-06, + "loss": 1.1504, + "step": 22491 + }, + { + "epoch": 0.6604028422103471, + "grad_norm": 0.0, + "learning_rate": 5.462976100271884e-06, + "loss": 1.2881, + "step": 22492 + }, + { + "epoch": 0.660432203887486, + "grad_norm": 0.0, + "learning_rate": 5.462128669356168e-06, + "loss": 1.207, + "step": 22493 + }, + { + "epoch": 0.6604615655646251, + "grad_norm": 0.0, + "learning_rate": 5.461281279477583e-06, + "loss": 1.2881, + "step": 22494 + }, + { + "epoch": 0.6604909272417641, + "grad_norm": 0.0, + "learning_rate": 5.46043393064379e-06, + "loss": 1.2842, + "step": 22495 + }, + { + "epoch": 0.660520288918903, + "grad_norm": 0.0, + "learning_rate": 5.45958662286246e-06, + "loss": 1.2383, + "step": 22496 + }, + { + "epoch": 0.6605496505960421, + "grad_norm": 0.0, + "learning_rate": 5.458739356141247e-06, + "loss": 1.2383, + "step": 22497 + }, + { + "epoch": 0.660579012273181, + "grad_norm": 0.0, + "learning_rate": 5.457892130487821e-06, + "loss": 1.2915, + "step": 22498 + }, + { + "epoch": 0.66060837395032, + "grad_norm": 0.0, + "learning_rate": 5.457044945909835e-06, + "loss": 1.3398, + "step": 22499 + }, + { + "epoch": 0.6606377356274591, + "grad_norm": 0.0, + "learning_rate": 5.4561978024149596e-06, + "loss": 1.3057, + "step": 22500 + }, + { + "epoch": 0.660667097304598, + "grad_norm": 0.0, + "learning_rate": 5.455350700010851e-06, + "loss": 1.1963, + "step": 22501 + }, + { + "epoch": 0.660696458981737, + "grad_norm": 0.0, + "learning_rate": 5.454503638705171e-06, + "loss": 1.2759, + "step": 22502 + }, + { + "epoch": 0.6607258206588761, + "grad_norm": 0.0, + "learning_rate": 5.453656618505577e-06, + "loss": 1.2544, + "step": 22503 + }, + { + "epoch": 0.660755182336015, + "grad_norm": 0.0, + "learning_rate": 5.452809639419727e-06, + "loss": 1.333, + "step": 22504 + }, + { + "epoch": 0.660784544013154, + "grad_norm": 0.0, + "learning_rate": 5.451962701455289e-06, + "loss": 1.3174, + "step": 22505 + }, + { + "epoch": 0.6608139056902931, + "grad_norm": 0.0, + "learning_rate": 5.451115804619913e-06, + "loss": 1.2734, + "step": 22506 + }, + { + "epoch": 0.660843267367432, + "grad_norm": 0.0, + "learning_rate": 5.450268948921265e-06, + "loss": 1.3096, + "step": 22507 + }, + { + "epoch": 0.660872629044571, + "grad_norm": 0.0, + "learning_rate": 5.449422134366995e-06, + "loss": 1.2939, + "step": 22508 + }, + { + "epoch": 0.6609019907217101, + "grad_norm": 0.0, + "learning_rate": 5.448575360964772e-06, + "loss": 1.29, + "step": 22509 + }, + { + "epoch": 0.660931352398849, + "grad_norm": 0.0, + "learning_rate": 5.447728628722246e-06, + "loss": 1.1875, + "step": 22510 + }, + { + "epoch": 0.660960714075988, + "grad_norm": 0.0, + "learning_rate": 5.446881937647078e-06, + "loss": 1.1934, + "step": 22511 + }, + { + "epoch": 0.6609900757531271, + "grad_norm": 0.0, + "learning_rate": 5.446035287746921e-06, + "loss": 1.2744, + "step": 22512 + }, + { + "epoch": 0.661019437430266, + "grad_norm": 0.0, + "learning_rate": 5.445188679029429e-06, + "loss": 1.2021, + "step": 22513 + }, + { + "epoch": 0.661048799107405, + "grad_norm": 0.0, + "learning_rate": 5.444342111502267e-06, + "loss": 1.1699, + "step": 22514 + }, + { + "epoch": 0.6610781607845441, + "grad_norm": 0.0, + "learning_rate": 5.443495585173083e-06, + "loss": 1.248, + "step": 22515 + }, + { + "epoch": 0.661107522461683, + "grad_norm": 0.0, + "learning_rate": 5.442649100049537e-06, + "loss": 1.1802, + "step": 22516 + }, + { + "epoch": 0.661136884138822, + "grad_norm": 0.0, + "learning_rate": 5.441802656139286e-06, + "loss": 1.1982, + "step": 22517 + }, + { + "epoch": 0.6611662458159611, + "grad_norm": 0.0, + "learning_rate": 5.440956253449978e-06, + "loss": 1.3936, + "step": 22518 + }, + { + "epoch": 0.6611956074931, + "grad_norm": 0.0, + "learning_rate": 5.440109891989267e-06, + "loss": 1.3164, + "step": 22519 + }, + { + "epoch": 0.661224969170239, + "grad_norm": 0.0, + "learning_rate": 5.439263571764812e-06, + "loss": 1.3027, + "step": 22520 + }, + { + "epoch": 0.661254330847378, + "grad_norm": 0.0, + "learning_rate": 5.438417292784269e-06, + "loss": 1.251, + "step": 22521 + }, + { + "epoch": 0.661283692524517, + "grad_norm": 0.0, + "learning_rate": 5.437571055055278e-06, + "loss": 1.0977, + "step": 22522 + }, + { + "epoch": 0.661313054201656, + "grad_norm": 0.0, + "learning_rate": 5.4367248585855074e-06, + "loss": 1.3057, + "step": 22523 + }, + { + "epoch": 0.6613424158787949, + "grad_norm": 0.0, + "learning_rate": 5.4358787033825974e-06, + "loss": 1.3643, + "step": 22524 + }, + { + "epoch": 0.661371777555934, + "grad_norm": 0.0, + "learning_rate": 5.4350325894542096e-06, + "loss": 1.3291, + "step": 22525 + }, + { + "epoch": 0.661401139233073, + "grad_norm": 0.0, + "learning_rate": 5.434186516807991e-06, + "loss": 1.186, + "step": 22526 + }, + { + "epoch": 0.6614305009102119, + "grad_norm": 0.0, + "learning_rate": 5.433340485451593e-06, + "loss": 1.2061, + "step": 22527 + }, + { + "epoch": 0.661459862587351, + "grad_norm": 0.0, + "learning_rate": 5.4324944953926626e-06, + "loss": 1.1689, + "step": 22528 + }, + { + "epoch": 0.66148922426449, + "grad_norm": 0.0, + "learning_rate": 5.431648546638858e-06, + "loss": 1.1401, + "step": 22529 + }, + { + "epoch": 0.6615185859416289, + "grad_norm": 0.0, + "learning_rate": 5.430802639197822e-06, + "loss": 1.2891, + "step": 22530 + }, + { + "epoch": 0.661547947618768, + "grad_norm": 0.0, + "learning_rate": 5.429956773077212e-06, + "loss": 1.3018, + "step": 22531 + }, + { + "epoch": 0.661577309295907, + "grad_norm": 0.0, + "learning_rate": 5.429110948284672e-06, + "loss": 1.3672, + "step": 22532 + }, + { + "epoch": 0.6616066709730459, + "grad_norm": 0.0, + "learning_rate": 5.428265164827851e-06, + "loss": 1.1709, + "step": 22533 + }, + { + "epoch": 0.661636032650185, + "grad_norm": 0.0, + "learning_rate": 5.4274194227144006e-06, + "loss": 1.4072, + "step": 22534 + }, + { + "epoch": 0.661665394327324, + "grad_norm": 0.0, + "learning_rate": 5.426573721951969e-06, + "loss": 1.2764, + "step": 22535 + }, + { + "epoch": 0.6616947560044629, + "grad_norm": 0.0, + "learning_rate": 5.425728062548201e-06, + "loss": 1.3135, + "step": 22536 + }, + { + "epoch": 0.661724117681602, + "grad_norm": 0.0, + "learning_rate": 5.424882444510742e-06, + "loss": 1.2783, + "step": 22537 + }, + { + "epoch": 0.661753479358741, + "grad_norm": 0.0, + "learning_rate": 5.4240368678472465e-06, + "loss": 1.2471, + "step": 22538 + }, + { + "epoch": 0.6617828410358799, + "grad_norm": 0.0, + "learning_rate": 5.423191332565354e-06, + "loss": 1.2075, + "step": 22539 + }, + { + "epoch": 0.661812202713019, + "grad_norm": 0.0, + "learning_rate": 5.422345838672719e-06, + "loss": 1.2393, + "step": 22540 + }, + { + "epoch": 0.661841564390158, + "grad_norm": 0.0, + "learning_rate": 5.4215003861769825e-06, + "loss": 1.3057, + "step": 22541 + }, + { + "epoch": 0.6618709260672969, + "grad_norm": 0.0, + "learning_rate": 5.42065497508579e-06, + "loss": 1.3281, + "step": 22542 + }, + { + "epoch": 0.661900287744436, + "grad_norm": 0.0, + "learning_rate": 5.419809605406787e-06, + "loss": 1.335, + "step": 22543 + }, + { + "epoch": 0.661929649421575, + "grad_norm": 0.0, + "learning_rate": 5.418964277147616e-06, + "loss": 1.2305, + "step": 22544 + }, + { + "epoch": 0.6619590110987139, + "grad_norm": 0.0, + "learning_rate": 5.418118990315928e-06, + "loss": 1.3799, + "step": 22545 + }, + { + "epoch": 0.661988372775853, + "grad_norm": 0.0, + "learning_rate": 5.4172737449193605e-06, + "loss": 1.2705, + "step": 22546 + }, + { + "epoch": 0.662017734452992, + "grad_norm": 0.0, + "learning_rate": 5.416428540965562e-06, + "loss": 1.2056, + "step": 22547 + }, + { + "epoch": 0.6620470961301309, + "grad_norm": 0.0, + "learning_rate": 5.415583378462172e-06, + "loss": 1.2236, + "step": 22548 + }, + { + "epoch": 0.66207645780727, + "grad_norm": 0.0, + "learning_rate": 5.414738257416839e-06, + "loss": 1.3193, + "step": 22549 + }, + { + "epoch": 0.6621058194844089, + "grad_norm": 0.0, + "learning_rate": 5.413893177837201e-06, + "loss": 1.1064, + "step": 22550 + }, + { + "epoch": 0.6621351811615479, + "grad_norm": 0.0, + "learning_rate": 5.413048139730902e-06, + "loss": 1.2129, + "step": 22551 + }, + { + "epoch": 0.662164542838687, + "grad_norm": 0.0, + "learning_rate": 5.412203143105583e-06, + "loss": 1.2441, + "step": 22552 + }, + { + "epoch": 0.6621939045158259, + "grad_norm": 0.0, + "learning_rate": 5.4113581879688825e-06, + "loss": 1.335, + "step": 22553 + }, + { + "epoch": 0.6622232661929649, + "grad_norm": 0.0, + "learning_rate": 5.41051327432845e-06, + "loss": 1.1885, + "step": 22554 + }, + { + "epoch": 0.662252627870104, + "grad_norm": 0.0, + "learning_rate": 5.409668402191917e-06, + "loss": 1.189, + "step": 22555 + }, + { + "epoch": 0.6622819895472429, + "grad_norm": 0.0, + "learning_rate": 5.408823571566932e-06, + "loss": 1.373, + "step": 22556 + }, + { + "epoch": 0.6623113512243819, + "grad_norm": 0.0, + "learning_rate": 5.4079787824611265e-06, + "loss": 1.2412, + "step": 22557 + }, + { + "epoch": 0.662340712901521, + "grad_norm": 0.0, + "learning_rate": 5.407134034882149e-06, + "loss": 1.373, + "step": 22558 + }, + { + "epoch": 0.6623700745786599, + "grad_norm": 0.0, + "learning_rate": 5.406289328837635e-06, + "loss": 1.2393, + "step": 22559 + }, + { + "epoch": 0.6623994362557989, + "grad_norm": 0.0, + "learning_rate": 5.4054446643352245e-06, + "loss": 1.252, + "step": 22560 + }, + { + "epoch": 0.662428797932938, + "grad_norm": 0.0, + "learning_rate": 5.404600041382553e-06, + "loss": 1.0894, + "step": 22561 + }, + { + "epoch": 0.6624581596100769, + "grad_norm": 0.0, + "learning_rate": 5.403755459987257e-06, + "loss": 1.124, + "step": 22562 + }, + { + "epoch": 0.6624875212872159, + "grad_norm": 0.0, + "learning_rate": 5.402910920156981e-06, + "loss": 1.2441, + "step": 22563 + }, + { + "epoch": 0.662516882964355, + "grad_norm": 0.0, + "learning_rate": 5.4020664218993546e-06, + "loss": 1.3301, + "step": 22564 + }, + { + "epoch": 0.6625462446414939, + "grad_norm": 0.0, + "learning_rate": 5.4012219652220234e-06, + "loss": 1.1855, + "step": 22565 + }, + { + "epoch": 0.6625756063186329, + "grad_norm": 0.0, + "learning_rate": 5.400377550132616e-06, + "loss": 1.3682, + "step": 22566 + }, + { + "epoch": 0.662604967995772, + "grad_norm": 0.0, + "learning_rate": 5.3995331766387795e-06, + "loss": 1.2168, + "step": 22567 + }, + { + "epoch": 0.6626343296729109, + "grad_norm": 0.0, + "learning_rate": 5.398688844748135e-06, + "loss": 1.291, + "step": 22568 + }, + { + "epoch": 0.6626636913500499, + "grad_norm": 0.0, + "learning_rate": 5.39784455446833e-06, + "loss": 1.2754, + "step": 22569 + }, + { + "epoch": 0.662693053027189, + "grad_norm": 0.0, + "learning_rate": 5.397000305806992e-06, + "loss": 1.2871, + "step": 22570 + }, + { + "epoch": 0.6627224147043279, + "grad_norm": 0.0, + "learning_rate": 5.396156098771762e-06, + "loss": 1.1958, + "step": 22571 + }, + { + "epoch": 0.6627517763814669, + "grad_norm": 0.0, + "learning_rate": 5.395311933370272e-06, + "loss": 1.3086, + "step": 22572 + }, + { + "epoch": 0.662781138058606, + "grad_norm": 0.0, + "learning_rate": 5.394467809610151e-06, + "loss": 1.209, + "step": 22573 + }, + { + "epoch": 0.6628104997357449, + "grad_norm": 0.0, + "learning_rate": 5.393623727499042e-06, + "loss": 1.2598, + "step": 22574 + }, + { + "epoch": 0.6628398614128839, + "grad_norm": 0.0, + "learning_rate": 5.3927796870445725e-06, + "loss": 1.3135, + "step": 22575 + }, + { + "epoch": 0.6628692230900229, + "grad_norm": 0.0, + "learning_rate": 5.391935688254377e-06, + "loss": 1.2871, + "step": 22576 + }, + { + "epoch": 0.6628985847671619, + "grad_norm": 0.0, + "learning_rate": 5.391091731136082e-06, + "loss": 1.2197, + "step": 22577 + }, + { + "epoch": 0.6629279464443009, + "grad_norm": 0.0, + "learning_rate": 5.390247815697329e-06, + "loss": 1.3442, + "step": 22578 + }, + { + "epoch": 0.6629573081214399, + "grad_norm": 0.0, + "learning_rate": 5.389403941945743e-06, + "loss": 1.1353, + "step": 22579 + }, + { + "epoch": 0.6629866697985789, + "grad_norm": 0.0, + "learning_rate": 5.388560109888961e-06, + "loss": 1.334, + "step": 22580 + }, + { + "epoch": 0.6630160314757179, + "grad_norm": 0.0, + "learning_rate": 5.3877163195346105e-06, + "loss": 1.3721, + "step": 22581 + }, + { + "epoch": 0.6630453931528569, + "grad_norm": 0.0, + "learning_rate": 5.38687257089032e-06, + "loss": 1.415, + "step": 22582 + }, + { + "epoch": 0.6630747548299959, + "grad_norm": 0.0, + "learning_rate": 5.386028863963724e-06, + "loss": 1.2861, + "step": 22583 + }, + { + "epoch": 0.6631041165071349, + "grad_norm": 0.0, + "learning_rate": 5.385185198762453e-06, + "loss": 1.1582, + "step": 22584 + }, + { + "epoch": 0.6631334781842739, + "grad_norm": 0.0, + "learning_rate": 5.384341575294133e-06, + "loss": 1.3691, + "step": 22585 + }, + { + "epoch": 0.6631628398614129, + "grad_norm": 0.0, + "learning_rate": 5.38349799356639e-06, + "loss": 1.2383, + "step": 22586 + }, + { + "epoch": 0.6631922015385519, + "grad_norm": 0.0, + "learning_rate": 5.382654453586862e-06, + "loss": 1.2549, + "step": 22587 + }, + { + "epoch": 0.6632215632156909, + "grad_norm": 0.0, + "learning_rate": 5.381810955363167e-06, + "loss": 1.3906, + "step": 22588 + }, + { + "epoch": 0.6632509248928299, + "grad_norm": 0.0, + "learning_rate": 5.3809674989029425e-06, + "loss": 1.1045, + "step": 22589 + }, + { + "epoch": 0.6632802865699688, + "grad_norm": 0.0, + "learning_rate": 5.380124084213812e-06, + "loss": 1.3213, + "step": 22590 + }, + { + "epoch": 0.6633096482471079, + "grad_norm": 0.0, + "learning_rate": 5.3792807113034e-06, + "loss": 1.144, + "step": 22591 + }, + { + "epoch": 0.6633390099242469, + "grad_norm": 0.0, + "learning_rate": 5.378437380179342e-06, + "loss": 1.166, + "step": 22592 + }, + { + "epoch": 0.6633683716013858, + "grad_norm": 0.0, + "learning_rate": 5.377594090849251e-06, + "loss": 1.251, + "step": 22593 + }, + { + "epoch": 0.6633977332785249, + "grad_norm": 0.0, + "learning_rate": 5.376750843320764e-06, + "loss": 1.2666, + "step": 22594 + }, + { + "epoch": 0.6634270949556639, + "grad_norm": 0.0, + "learning_rate": 5.375907637601501e-06, + "loss": 1.2744, + "step": 22595 + }, + { + "epoch": 0.6634564566328028, + "grad_norm": 0.0, + "learning_rate": 5.375064473699092e-06, + "loss": 1.2832, + "step": 22596 + }, + { + "epoch": 0.6634858183099419, + "grad_norm": 0.0, + "learning_rate": 5.374221351621155e-06, + "loss": 1.1499, + "step": 22597 + }, + { + "epoch": 0.6635151799870809, + "grad_norm": 0.0, + "learning_rate": 5.373378271375323e-06, + "loss": 1.2734, + "step": 22598 + }, + { + "epoch": 0.6635445416642198, + "grad_norm": 0.0, + "learning_rate": 5.372535232969217e-06, + "loss": 1.2891, + "step": 22599 + }, + { + "epoch": 0.6635739033413589, + "grad_norm": 0.0, + "learning_rate": 5.371692236410459e-06, + "loss": 1.1929, + "step": 22600 + }, + { + "epoch": 0.6636032650184979, + "grad_norm": 0.0, + "learning_rate": 5.370849281706675e-06, + "loss": 1.1318, + "step": 22601 + }, + { + "epoch": 0.6636326266956368, + "grad_norm": 0.0, + "learning_rate": 5.370006368865481e-06, + "loss": 1.1318, + "step": 22602 + }, + { + "epoch": 0.6636619883727759, + "grad_norm": 0.0, + "learning_rate": 5.36916349789451e-06, + "loss": 1.3066, + "step": 22603 + }, + { + "epoch": 0.6636913500499149, + "grad_norm": 0.0, + "learning_rate": 5.368320668801375e-06, + "loss": 1.2725, + "step": 22604 + }, + { + "epoch": 0.6637207117270538, + "grad_norm": 0.0, + "learning_rate": 5.367477881593708e-06, + "loss": 1.2773, + "step": 22605 + }, + { + "epoch": 0.6637500734041929, + "grad_norm": 0.0, + "learning_rate": 5.366635136279119e-06, + "loss": 1.251, + "step": 22606 + }, + { + "epoch": 0.6637794350813319, + "grad_norm": 0.0, + "learning_rate": 5.36579243286524e-06, + "loss": 1.3105, + "step": 22607 + }, + { + "epoch": 0.6638087967584708, + "grad_norm": 0.0, + "learning_rate": 5.364949771359687e-06, + "loss": 1.1807, + "step": 22608 + }, + { + "epoch": 0.6638381584356099, + "grad_norm": 0.0, + "learning_rate": 5.364107151770079e-06, + "loss": 1.3574, + "step": 22609 + }, + { + "epoch": 0.6638675201127489, + "grad_norm": 0.0, + "learning_rate": 5.363264574104038e-06, + "loss": 1.2734, + "step": 22610 + }, + { + "epoch": 0.6638968817898878, + "grad_norm": 0.0, + "learning_rate": 5.3624220383691795e-06, + "loss": 1.2217, + "step": 22611 + }, + { + "epoch": 0.6639262434670269, + "grad_norm": 0.0, + "learning_rate": 5.36157954457313e-06, + "loss": 1.1914, + "step": 22612 + }, + { + "epoch": 0.6639556051441659, + "grad_norm": 0.0, + "learning_rate": 5.3607370927235005e-06, + "loss": 1.2998, + "step": 22613 + }, + { + "epoch": 0.6639849668213048, + "grad_norm": 0.0, + "learning_rate": 5.359894682827917e-06, + "loss": 1.0898, + "step": 22614 + }, + { + "epoch": 0.6640143284984439, + "grad_norm": 0.0, + "learning_rate": 5.359052314893991e-06, + "loss": 1.2017, + "step": 22615 + }, + { + "epoch": 0.6640436901755828, + "grad_norm": 0.0, + "learning_rate": 5.358209988929346e-06, + "loss": 1.2793, + "step": 22616 + }, + { + "epoch": 0.6640730518527218, + "grad_norm": 0.0, + "learning_rate": 5.3573677049415986e-06, + "loss": 1.187, + "step": 22617 + }, + { + "epoch": 0.6641024135298609, + "grad_norm": 0.0, + "learning_rate": 5.356525462938363e-06, + "loss": 1.249, + "step": 22618 + }, + { + "epoch": 0.6641317752069998, + "grad_norm": 0.0, + "learning_rate": 5.355683262927254e-06, + "loss": 1.2676, + "step": 22619 + }, + { + "epoch": 0.6641611368841388, + "grad_norm": 0.0, + "learning_rate": 5.354841104915893e-06, + "loss": 1.1558, + "step": 22620 + }, + { + "epoch": 0.6641904985612779, + "grad_norm": 0.0, + "learning_rate": 5.3539989889118945e-06, + "loss": 1.2217, + "step": 22621 + }, + { + "epoch": 0.6642198602384168, + "grad_norm": 0.0, + "learning_rate": 5.35315691492287e-06, + "loss": 1.2114, + "step": 22622 + }, + { + "epoch": 0.6642492219155558, + "grad_norm": 0.0, + "learning_rate": 5.352314882956441e-06, + "loss": 1.3594, + "step": 22623 + }, + { + "epoch": 0.6642785835926948, + "grad_norm": 0.0, + "learning_rate": 5.351472893020217e-06, + "loss": 1.1953, + "step": 22624 + }, + { + "epoch": 0.6643079452698338, + "grad_norm": 0.0, + "learning_rate": 5.350630945121816e-06, + "loss": 1.2031, + "step": 22625 + }, + { + "epoch": 0.6643373069469728, + "grad_norm": 0.0, + "learning_rate": 5.349789039268847e-06, + "loss": 1.2686, + "step": 22626 + }, + { + "epoch": 0.6643666686241118, + "grad_norm": 0.0, + "learning_rate": 5.348947175468928e-06, + "loss": 1.3003, + "step": 22627 + }, + { + "epoch": 0.6643960303012508, + "grad_norm": 0.0, + "learning_rate": 5.348105353729669e-06, + "loss": 1.0972, + "step": 22628 + }, + { + "epoch": 0.6644253919783898, + "grad_norm": 0.0, + "learning_rate": 5.347263574058687e-06, + "loss": 1.2568, + "step": 22629 + }, + { + "epoch": 0.6644547536555288, + "grad_norm": 0.0, + "learning_rate": 5.346421836463593e-06, + "loss": 1.3477, + "step": 22630 + }, + { + "epoch": 0.6644841153326678, + "grad_norm": 0.0, + "learning_rate": 5.3455801409519935e-06, + "loss": 1.3682, + "step": 22631 + }, + { + "epoch": 0.6645134770098068, + "grad_norm": 0.0, + "learning_rate": 5.34473848753151e-06, + "loss": 1.1226, + "step": 22632 + }, + { + "epoch": 0.6645428386869457, + "grad_norm": 0.0, + "learning_rate": 5.343896876209749e-06, + "loss": 1.2588, + "step": 22633 + }, + { + "epoch": 0.6645722003640848, + "grad_norm": 0.0, + "learning_rate": 5.343055306994319e-06, + "loss": 1.2363, + "step": 22634 + }, + { + "epoch": 0.6646015620412238, + "grad_norm": 0.0, + "learning_rate": 5.342213779892829e-06, + "loss": 1.25, + "step": 22635 + }, + { + "epoch": 0.6646309237183627, + "grad_norm": 0.0, + "learning_rate": 5.341372294912897e-06, + "loss": 1.248, + "step": 22636 + }, + { + "epoch": 0.6646602853955018, + "grad_norm": 0.0, + "learning_rate": 5.340530852062124e-06, + "loss": 1.2559, + "step": 22637 + }, + { + "epoch": 0.6646896470726408, + "grad_norm": 0.0, + "learning_rate": 5.339689451348129e-06, + "loss": 1.2256, + "step": 22638 + }, + { + "epoch": 0.6647190087497797, + "grad_norm": 0.0, + "learning_rate": 5.338848092778511e-06, + "loss": 1.1724, + "step": 22639 + }, + { + "epoch": 0.6647483704269188, + "grad_norm": 0.0, + "learning_rate": 5.338006776360889e-06, + "loss": 1.1592, + "step": 22640 + }, + { + "epoch": 0.6647777321040578, + "grad_norm": 0.0, + "learning_rate": 5.337165502102864e-06, + "loss": 1.2139, + "step": 22641 + }, + { + "epoch": 0.6648070937811967, + "grad_norm": 0.0, + "learning_rate": 5.336324270012046e-06, + "loss": 1.2725, + "step": 22642 + }, + { + "epoch": 0.6648364554583358, + "grad_norm": 0.0, + "learning_rate": 5.335483080096041e-06, + "loss": 1.3623, + "step": 22643 + }, + { + "epoch": 0.6648658171354748, + "grad_norm": 0.0, + "learning_rate": 5.3346419323624545e-06, + "loss": 1.2373, + "step": 22644 + }, + { + "epoch": 0.6648951788126137, + "grad_norm": 0.0, + "learning_rate": 5.333800826818898e-06, + "loss": 1.2598, + "step": 22645 + }, + { + "epoch": 0.6649245404897528, + "grad_norm": 0.0, + "learning_rate": 5.332959763472973e-06, + "loss": 1.0698, + "step": 22646 + }, + { + "epoch": 0.6649539021668918, + "grad_norm": 0.0, + "learning_rate": 5.3321187423322925e-06, + "loss": 1.2285, + "step": 22647 + }, + { + "epoch": 0.6649832638440307, + "grad_norm": 0.0, + "learning_rate": 5.3312777634044565e-06, + "loss": 1.3506, + "step": 22648 + }, + { + "epoch": 0.6650126255211698, + "grad_norm": 0.0, + "learning_rate": 5.330436826697072e-06, + "loss": 1.1606, + "step": 22649 + }, + { + "epoch": 0.6650419871983088, + "grad_norm": 0.0, + "learning_rate": 5.329595932217744e-06, + "loss": 1.3428, + "step": 22650 + }, + { + "epoch": 0.6650713488754477, + "grad_norm": 0.0, + "learning_rate": 5.328755079974071e-06, + "loss": 1.0156, + "step": 22651 + }, + { + "epoch": 0.6651007105525868, + "grad_norm": 0.0, + "learning_rate": 5.327914269973666e-06, + "loss": 1.3311, + "step": 22652 + }, + { + "epoch": 0.6651300722297258, + "grad_norm": 0.0, + "learning_rate": 5.327073502224126e-06, + "loss": 1.2139, + "step": 22653 + }, + { + "epoch": 0.6651594339068647, + "grad_norm": 0.0, + "learning_rate": 5.32623277673306e-06, + "loss": 1.2695, + "step": 22654 + }, + { + "epoch": 0.6651887955840038, + "grad_norm": 0.0, + "learning_rate": 5.325392093508064e-06, + "loss": 1.167, + "step": 22655 + }, + { + "epoch": 0.6652181572611428, + "grad_norm": 0.0, + "learning_rate": 5.32455145255675e-06, + "loss": 1.3164, + "step": 22656 + }, + { + "epoch": 0.6652475189382817, + "grad_norm": 0.0, + "learning_rate": 5.323710853886712e-06, + "loss": 1.2422, + "step": 22657 + }, + { + "epoch": 0.6652768806154208, + "grad_norm": 0.0, + "learning_rate": 5.3228702975055555e-06, + "loss": 1.147, + "step": 22658 + }, + { + "epoch": 0.6653062422925597, + "grad_norm": 0.0, + "learning_rate": 5.3220297834208815e-06, + "loss": 1.1504, + "step": 22659 + }, + { + "epoch": 0.6653356039696987, + "grad_norm": 0.0, + "learning_rate": 5.321189311640286e-06, + "loss": 1.1865, + "step": 22660 + }, + { + "epoch": 0.6653649656468378, + "grad_norm": 0.0, + "learning_rate": 5.320348882171377e-06, + "loss": 1.3877, + "step": 22661 + }, + { + "epoch": 0.6653943273239767, + "grad_norm": 0.0, + "learning_rate": 5.319508495021748e-06, + "loss": 1.2422, + "step": 22662 + }, + { + "epoch": 0.6654236890011157, + "grad_norm": 0.0, + "learning_rate": 5.3186681501990065e-06, + "loss": 1.2021, + "step": 22663 + }, + { + "epoch": 0.6654530506782548, + "grad_norm": 0.0, + "learning_rate": 5.317827847710744e-06, + "loss": 1.2295, + "step": 22664 + }, + { + "epoch": 0.6654824123553937, + "grad_norm": 0.0, + "learning_rate": 5.316987587564567e-06, + "loss": 1.2803, + "step": 22665 + }, + { + "epoch": 0.6655117740325327, + "grad_norm": 0.0, + "learning_rate": 5.316147369768071e-06, + "loss": 1.209, + "step": 22666 + }, + { + "epoch": 0.6655411357096718, + "grad_norm": 0.0, + "learning_rate": 5.315307194328854e-06, + "loss": 1.2266, + "step": 22667 + }, + { + "epoch": 0.6655704973868107, + "grad_norm": 0.0, + "learning_rate": 5.314467061254509e-06, + "loss": 1.2881, + "step": 22668 + }, + { + "epoch": 0.6655998590639497, + "grad_norm": 0.0, + "learning_rate": 5.313626970552644e-06, + "loss": 1.3154, + "step": 22669 + }, + { + "epoch": 0.6656292207410888, + "grad_norm": 0.0, + "learning_rate": 5.312786922230848e-06, + "loss": 1.2891, + "step": 22670 + }, + { + "epoch": 0.6656585824182277, + "grad_norm": 0.0, + "learning_rate": 5.311946916296718e-06, + "loss": 1.3174, + "step": 22671 + }, + { + "epoch": 0.6656879440953667, + "grad_norm": 0.0, + "learning_rate": 5.311106952757856e-06, + "loss": 1.2949, + "step": 22672 + }, + { + "epoch": 0.6657173057725058, + "grad_norm": 0.0, + "learning_rate": 5.3102670316218545e-06, + "loss": 1.2246, + "step": 22673 + }, + { + "epoch": 0.6657466674496447, + "grad_norm": 0.0, + "learning_rate": 5.30942715289631e-06, + "loss": 1.2334, + "step": 22674 + }, + { + "epoch": 0.6657760291267837, + "grad_norm": 0.0, + "learning_rate": 5.308587316588812e-06, + "loss": 1.2549, + "step": 22675 + }, + { + "epoch": 0.6658053908039228, + "grad_norm": 0.0, + "learning_rate": 5.307747522706965e-06, + "loss": 1.2373, + "step": 22676 + }, + { + "epoch": 0.6658347524810617, + "grad_norm": 0.0, + "learning_rate": 5.306907771258355e-06, + "loss": 1.2959, + "step": 22677 + }, + { + "epoch": 0.6658641141582007, + "grad_norm": 0.0, + "learning_rate": 5.306068062250585e-06, + "loss": 1.3135, + "step": 22678 + }, + { + "epoch": 0.6658934758353398, + "grad_norm": 0.0, + "learning_rate": 5.305228395691242e-06, + "loss": 1.207, + "step": 22679 + }, + { + "epoch": 0.6659228375124787, + "grad_norm": 0.0, + "learning_rate": 5.3043887715879175e-06, + "loss": 1.21, + "step": 22680 + }, + { + "epoch": 0.6659521991896177, + "grad_norm": 0.0, + "learning_rate": 5.303549189948212e-06, + "loss": 1.2539, + "step": 22681 + }, + { + "epoch": 0.6659815608667567, + "grad_norm": 0.0, + "learning_rate": 5.3027096507797134e-06, + "loss": 1.1494, + "step": 22682 + }, + { + "epoch": 0.6660109225438957, + "grad_norm": 0.0, + "learning_rate": 5.301870154090014e-06, + "loss": 1.4131, + "step": 22683 + }, + { + "epoch": 0.6660402842210347, + "grad_norm": 0.0, + "learning_rate": 5.301030699886703e-06, + "loss": 1.2158, + "step": 22684 + }, + { + "epoch": 0.6660696458981737, + "grad_norm": 0.0, + "learning_rate": 5.300191288177379e-06, + "loss": 1.1421, + "step": 22685 + }, + { + "epoch": 0.6660990075753127, + "grad_norm": 0.0, + "learning_rate": 5.299351918969624e-06, + "loss": 1.2319, + "step": 22686 + }, + { + "epoch": 0.6661283692524517, + "grad_norm": 0.0, + "learning_rate": 5.298512592271039e-06, + "loss": 1.2354, + "step": 22687 + }, + { + "epoch": 0.6661577309295907, + "grad_norm": 0.0, + "learning_rate": 5.297673308089204e-06, + "loss": 1.2471, + "step": 22688 + }, + { + "epoch": 0.6661870926067297, + "grad_norm": 0.0, + "learning_rate": 5.296834066431716e-06, + "loss": 1.2529, + "step": 22689 + }, + { + "epoch": 0.6662164542838687, + "grad_norm": 0.0, + "learning_rate": 5.295994867306164e-06, + "loss": 1.1729, + "step": 22690 + }, + { + "epoch": 0.6662458159610077, + "grad_norm": 0.0, + "learning_rate": 5.295155710720135e-06, + "loss": 1.2969, + "step": 22691 + }, + { + "epoch": 0.6662751776381467, + "grad_norm": 0.0, + "learning_rate": 5.294316596681216e-06, + "loss": 1.2432, + "step": 22692 + }, + { + "epoch": 0.6663045393152857, + "grad_norm": 0.0, + "learning_rate": 5.293477525196995e-06, + "loss": 1.2588, + "step": 22693 + }, + { + "epoch": 0.6663339009924247, + "grad_norm": 0.0, + "learning_rate": 5.2926384962750645e-06, + "loss": 1.1924, + "step": 22694 + }, + { + "epoch": 0.6663632626695637, + "grad_norm": 0.0, + "learning_rate": 5.291799509923008e-06, + "loss": 1.2607, + "step": 22695 + }, + { + "epoch": 0.6663926243467027, + "grad_norm": 0.0, + "learning_rate": 5.2909605661484156e-06, + "loss": 1.1924, + "step": 22696 + }, + { + "epoch": 0.6664219860238417, + "grad_norm": 0.0, + "learning_rate": 5.29012166495887e-06, + "loss": 1.2607, + "step": 22697 + }, + { + "epoch": 0.6664513477009807, + "grad_norm": 0.0, + "learning_rate": 5.289282806361969e-06, + "loss": 1.168, + "step": 22698 + }, + { + "epoch": 0.6664807093781197, + "grad_norm": 0.0, + "learning_rate": 5.288443990365286e-06, + "loss": 1.1665, + "step": 22699 + }, + { + "epoch": 0.6665100710552587, + "grad_norm": 0.0, + "learning_rate": 5.287605216976406e-06, + "loss": 1.2461, + "step": 22700 + }, + { + "epoch": 0.6665394327323977, + "grad_norm": 0.0, + "learning_rate": 5.286766486202923e-06, + "loss": 1.1338, + "step": 22701 + }, + { + "epoch": 0.6665687944095366, + "grad_norm": 0.0, + "learning_rate": 5.2859277980524135e-06, + "loss": 1.2471, + "step": 22702 + }, + { + "epoch": 0.6665981560866757, + "grad_norm": 0.0, + "learning_rate": 5.285089152532471e-06, + "loss": 1.2871, + "step": 22703 + }, + { + "epoch": 0.6666275177638147, + "grad_norm": 0.0, + "learning_rate": 5.28425054965067e-06, + "loss": 1.209, + "step": 22704 + }, + { + "epoch": 0.6666568794409536, + "grad_norm": 0.0, + "learning_rate": 5.283411989414604e-06, + "loss": 1.2021, + "step": 22705 + }, + { + "epoch": 0.6666862411180927, + "grad_norm": 0.0, + "learning_rate": 5.28257347183185e-06, + "loss": 1.2627, + "step": 22706 + }, + { + "epoch": 0.6667156027952317, + "grad_norm": 0.0, + "learning_rate": 5.281734996909993e-06, + "loss": 1.1416, + "step": 22707 + }, + { + "epoch": 0.6667449644723706, + "grad_norm": 0.0, + "learning_rate": 5.28089656465661e-06, + "loss": 1.2314, + "step": 22708 + }, + { + "epoch": 0.6667743261495097, + "grad_norm": 0.0, + "learning_rate": 5.280058175079293e-06, + "loss": 1.1572, + "step": 22709 + }, + { + "epoch": 0.6668036878266487, + "grad_norm": 0.0, + "learning_rate": 5.2792198281856185e-06, + "loss": 1.2178, + "step": 22710 + }, + { + "epoch": 0.6668330495037876, + "grad_norm": 0.0, + "learning_rate": 5.278381523983165e-06, + "loss": 1.1465, + "step": 22711 + }, + { + "epoch": 0.6668624111809267, + "grad_norm": 0.0, + "learning_rate": 5.277543262479518e-06, + "loss": 1.29, + "step": 22712 + }, + { + "epoch": 0.6668917728580657, + "grad_norm": 0.0, + "learning_rate": 5.276705043682255e-06, + "loss": 1.1973, + "step": 22713 + }, + { + "epoch": 0.6669211345352046, + "grad_norm": 0.0, + "learning_rate": 5.275866867598961e-06, + "loss": 1.333, + "step": 22714 + }, + { + "epoch": 0.6669504962123437, + "grad_norm": 0.0, + "learning_rate": 5.275028734237213e-06, + "loss": 1.145, + "step": 22715 + }, + { + "epoch": 0.6669798578894827, + "grad_norm": 0.0, + "learning_rate": 5.274190643604591e-06, + "loss": 1.3555, + "step": 22716 + }, + { + "epoch": 0.6670092195666216, + "grad_norm": 0.0, + "learning_rate": 5.273352595708667e-06, + "loss": 1.1763, + "step": 22717 + }, + { + "epoch": 0.6670385812437607, + "grad_norm": 0.0, + "learning_rate": 5.272514590557032e-06, + "loss": 1.1514, + "step": 22718 + }, + { + "epoch": 0.6670679429208997, + "grad_norm": 0.0, + "learning_rate": 5.271676628157259e-06, + "loss": 1.2783, + "step": 22719 + }, + { + "epoch": 0.6670973045980386, + "grad_norm": 0.0, + "learning_rate": 5.270838708516919e-06, + "loss": 1.1943, + "step": 22720 + }, + { + "epoch": 0.6671266662751777, + "grad_norm": 0.0, + "learning_rate": 5.2700008316436016e-06, + "loss": 1.1348, + "step": 22721 + }, + { + "epoch": 0.6671560279523167, + "grad_norm": 0.0, + "learning_rate": 5.269162997544877e-06, + "loss": 1.2471, + "step": 22722 + }, + { + "epoch": 0.6671853896294556, + "grad_norm": 0.0, + "learning_rate": 5.268325206228323e-06, + "loss": 1.2617, + "step": 22723 + }, + { + "epoch": 0.6672147513065946, + "grad_norm": 0.0, + "learning_rate": 5.267487457701512e-06, + "loss": 1.2129, + "step": 22724 + }, + { + "epoch": 0.6672441129837337, + "grad_norm": 0.0, + "learning_rate": 5.266649751972028e-06, + "loss": 1.168, + "step": 22725 + }, + { + "epoch": 0.6672734746608726, + "grad_norm": 0.0, + "learning_rate": 5.2658120890474395e-06, + "loss": 1.3428, + "step": 22726 + }, + { + "epoch": 0.6673028363380116, + "grad_norm": 0.0, + "learning_rate": 5.2649744689353265e-06, + "loss": 1.2676, + "step": 22727 + }, + { + "epoch": 0.6673321980151506, + "grad_norm": 0.0, + "learning_rate": 5.264136891643263e-06, + "loss": 1.2197, + "step": 22728 + }, + { + "epoch": 0.6673615596922896, + "grad_norm": 0.0, + "learning_rate": 5.263299357178819e-06, + "loss": 1.209, + "step": 22729 + }, + { + "epoch": 0.6673909213694286, + "grad_norm": 0.0, + "learning_rate": 5.262461865549576e-06, + "loss": 1.2871, + "step": 22730 + }, + { + "epoch": 0.6674202830465676, + "grad_norm": 0.0, + "learning_rate": 5.261624416763105e-06, + "loss": 1.3008, + "step": 22731 + }, + { + "epoch": 0.6674496447237066, + "grad_norm": 0.0, + "learning_rate": 5.260787010826975e-06, + "loss": 1.1138, + "step": 22732 + }, + { + "epoch": 0.6674790064008456, + "grad_norm": 0.0, + "learning_rate": 5.259949647748759e-06, + "loss": 1.2246, + "step": 22733 + }, + { + "epoch": 0.6675083680779846, + "grad_norm": 0.0, + "learning_rate": 5.259112327536037e-06, + "loss": 1.125, + "step": 22734 + }, + { + "epoch": 0.6675377297551236, + "grad_norm": 0.0, + "learning_rate": 5.258275050196372e-06, + "loss": 1.2842, + "step": 22735 + }, + { + "epoch": 0.6675670914322626, + "grad_norm": 0.0, + "learning_rate": 5.257437815737344e-06, + "loss": 1.2871, + "step": 22736 + }, + { + "epoch": 0.6675964531094016, + "grad_norm": 0.0, + "learning_rate": 5.256600624166517e-06, + "loss": 1.2744, + "step": 22737 + }, + { + "epoch": 0.6676258147865406, + "grad_norm": 0.0, + "learning_rate": 5.2557634754914675e-06, + "loss": 1.1753, + "step": 22738 + }, + { + "epoch": 0.6676551764636796, + "grad_norm": 0.0, + "learning_rate": 5.254926369719766e-06, + "loss": 1.2114, + "step": 22739 + }, + { + "epoch": 0.6676845381408186, + "grad_norm": 0.0, + "learning_rate": 5.25408930685898e-06, + "loss": 1.2266, + "step": 22740 + }, + { + "epoch": 0.6677138998179576, + "grad_norm": 0.0, + "learning_rate": 5.2532522869166795e-06, + "loss": 1.4385, + "step": 22741 + }, + { + "epoch": 0.6677432614950966, + "grad_norm": 0.0, + "learning_rate": 5.252415309900429e-06, + "loss": 1.0923, + "step": 22742 + }, + { + "epoch": 0.6677726231722356, + "grad_norm": 0.0, + "learning_rate": 5.2515783758178085e-06, + "loss": 1.1758, + "step": 22743 + }, + { + "epoch": 0.6678019848493746, + "grad_norm": 0.0, + "learning_rate": 5.250741484676376e-06, + "loss": 1.1865, + "step": 22744 + }, + { + "epoch": 0.6678313465265135, + "grad_norm": 0.0, + "learning_rate": 5.249904636483709e-06, + "loss": 1.3193, + "step": 22745 + }, + { + "epoch": 0.6678607082036526, + "grad_norm": 0.0, + "learning_rate": 5.249067831247368e-06, + "loss": 1.2192, + "step": 22746 + }, + { + "epoch": 0.6678900698807916, + "grad_norm": 0.0, + "learning_rate": 5.2482310689749295e-06, + "loss": 1.1572, + "step": 22747 + }, + { + "epoch": 0.6679194315579305, + "grad_norm": 0.0, + "learning_rate": 5.247394349673951e-06, + "loss": 1.2505, + "step": 22748 + }, + { + "epoch": 0.6679487932350696, + "grad_norm": 0.0, + "learning_rate": 5.246557673351997e-06, + "loss": 1.1968, + "step": 22749 + }, + { + "epoch": 0.6679781549122086, + "grad_norm": 0.0, + "learning_rate": 5.245721040016646e-06, + "loss": 1.3213, + "step": 22750 + }, + { + "epoch": 0.6680075165893475, + "grad_norm": 0.0, + "learning_rate": 5.244884449675451e-06, + "loss": 1.1265, + "step": 22751 + }, + { + "epoch": 0.6680368782664866, + "grad_norm": 0.0, + "learning_rate": 5.244047902335989e-06, + "loss": 1.2158, + "step": 22752 + }, + { + "epoch": 0.6680662399436256, + "grad_norm": 0.0, + "learning_rate": 5.243211398005817e-06, + "loss": 1.1289, + "step": 22753 + }, + { + "epoch": 0.6680956016207645, + "grad_norm": 0.0, + "learning_rate": 5.242374936692505e-06, + "loss": 1.165, + "step": 22754 + }, + { + "epoch": 0.6681249632979036, + "grad_norm": 0.0, + "learning_rate": 5.241538518403615e-06, + "loss": 1.3496, + "step": 22755 + }, + { + "epoch": 0.6681543249750426, + "grad_norm": 0.0, + "learning_rate": 5.240702143146711e-06, + "loss": 1.2422, + "step": 22756 + }, + { + "epoch": 0.6681836866521815, + "grad_norm": 0.0, + "learning_rate": 5.239865810929352e-06, + "loss": 1.1611, + "step": 22757 + }, + { + "epoch": 0.6682130483293206, + "grad_norm": 0.0, + "learning_rate": 5.23902952175911e-06, + "loss": 1.2979, + "step": 22758 + }, + { + "epoch": 0.6682424100064596, + "grad_norm": 0.0, + "learning_rate": 5.238193275643544e-06, + "loss": 1.2764, + "step": 22759 + }, + { + "epoch": 0.6682717716835985, + "grad_norm": 0.0, + "learning_rate": 5.23735707259021e-06, + "loss": 1.2451, + "step": 22760 + }, + { + "epoch": 0.6683011333607376, + "grad_norm": 0.0, + "learning_rate": 5.236520912606682e-06, + "loss": 1.3564, + "step": 22761 + }, + { + "epoch": 0.6683304950378766, + "grad_norm": 0.0, + "learning_rate": 5.235684795700509e-06, + "loss": 1.2227, + "step": 22762 + }, + { + "epoch": 0.6683598567150155, + "grad_norm": 0.0, + "learning_rate": 5.2348487218792645e-06, + "loss": 1.3174, + "step": 22763 + }, + { + "epoch": 0.6683892183921546, + "grad_norm": 0.0, + "learning_rate": 5.234012691150501e-06, + "loss": 1.3047, + "step": 22764 + }, + { + "epoch": 0.6684185800692936, + "grad_norm": 0.0, + "learning_rate": 5.2331767035217825e-06, + "loss": 1.1562, + "step": 22765 + }, + { + "epoch": 0.6684479417464325, + "grad_norm": 0.0, + "learning_rate": 5.232340759000665e-06, + "loss": 1.2866, + "step": 22766 + }, + { + "epoch": 0.6684773034235716, + "grad_norm": 0.0, + "learning_rate": 5.2315048575947145e-06, + "loss": 1.2617, + "step": 22767 + }, + { + "epoch": 0.6685066651007106, + "grad_norm": 0.0, + "learning_rate": 5.230668999311487e-06, + "loss": 1.1938, + "step": 22768 + }, + { + "epoch": 0.6685360267778495, + "grad_norm": 0.0, + "learning_rate": 5.229833184158537e-06, + "loss": 1.3486, + "step": 22769 + }, + { + "epoch": 0.6685653884549886, + "grad_norm": 0.0, + "learning_rate": 5.22899741214343e-06, + "loss": 1.2344, + "step": 22770 + }, + { + "epoch": 0.6685947501321275, + "grad_norm": 0.0, + "learning_rate": 5.228161683273718e-06, + "loss": 1.3579, + "step": 22771 + }, + { + "epoch": 0.6686241118092665, + "grad_norm": 0.0, + "learning_rate": 5.22732599755697e-06, + "loss": 1.1733, + "step": 22772 + }, + { + "epoch": 0.6686534734864056, + "grad_norm": 0.0, + "learning_rate": 5.226490355000728e-06, + "loss": 1.3003, + "step": 22773 + }, + { + "epoch": 0.6686828351635445, + "grad_norm": 0.0, + "learning_rate": 5.225654755612559e-06, + "loss": 1.0, + "step": 22774 + }, + { + "epoch": 0.6687121968406835, + "grad_norm": 0.0, + "learning_rate": 5.224819199400014e-06, + "loss": 1.2607, + "step": 22775 + }, + { + "epoch": 0.6687415585178226, + "grad_norm": 0.0, + "learning_rate": 5.223983686370655e-06, + "loss": 1.2808, + "step": 22776 + }, + { + "epoch": 0.6687709201949615, + "grad_norm": 0.0, + "learning_rate": 5.2231482165320345e-06, + "loss": 1.2227, + "step": 22777 + }, + { + "epoch": 0.6688002818721005, + "grad_norm": 0.0, + "learning_rate": 5.222312789891706e-06, + "loss": 1.2202, + "step": 22778 + }, + { + "epoch": 0.6688296435492396, + "grad_norm": 0.0, + "learning_rate": 5.221477406457228e-06, + "loss": 1.1133, + "step": 22779 + }, + { + "epoch": 0.6688590052263785, + "grad_norm": 0.0, + "learning_rate": 5.220642066236155e-06, + "loss": 1.2148, + "step": 22780 + }, + { + "epoch": 0.6688883669035175, + "grad_norm": 0.0, + "learning_rate": 5.21980676923604e-06, + "loss": 1.252, + "step": 22781 + }, + { + "epoch": 0.6689177285806566, + "grad_norm": 0.0, + "learning_rate": 5.218971515464432e-06, + "loss": 1.2637, + "step": 22782 + }, + { + "epoch": 0.6689470902577955, + "grad_norm": 0.0, + "learning_rate": 5.218136304928893e-06, + "loss": 1.2598, + "step": 22783 + }, + { + "epoch": 0.6689764519349345, + "grad_norm": 0.0, + "learning_rate": 5.217301137636967e-06, + "loss": 1.1426, + "step": 22784 + }, + { + "epoch": 0.6690058136120736, + "grad_norm": 0.0, + "learning_rate": 5.216466013596217e-06, + "loss": 1.2642, + "step": 22785 + }, + { + "epoch": 0.6690351752892125, + "grad_norm": 0.0, + "learning_rate": 5.215630932814186e-06, + "loss": 1.2734, + "step": 22786 + }, + { + "epoch": 0.6690645369663515, + "grad_norm": 0.0, + "learning_rate": 5.214795895298435e-06, + "loss": 1.3008, + "step": 22787 + }, + { + "epoch": 0.6690938986434906, + "grad_norm": 0.0, + "learning_rate": 5.213960901056508e-06, + "loss": 1.2646, + "step": 22788 + }, + { + "epoch": 0.6691232603206295, + "grad_norm": 0.0, + "learning_rate": 5.213125950095958e-06, + "loss": 1.1826, + "step": 22789 + }, + { + "epoch": 0.6691526219977685, + "grad_norm": 0.0, + "learning_rate": 5.212291042424338e-06, + "loss": 1.21, + "step": 22790 + }, + { + "epoch": 0.6691819836749076, + "grad_norm": 0.0, + "learning_rate": 5.211456178049192e-06, + "loss": 1.27, + "step": 22791 + }, + { + "epoch": 0.6692113453520465, + "grad_norm": 0.0, + "learning_rate": 5.210621356978077e-06, + "loss": 1.1558, + "step": 22792 + }, + { + "epoch": 0.6692407070291855, + "grad_norm": 0.0, + "learning_rate": 5.209786579218536e-06, + "loss": 1.2363, + "step": 22793 + }, + { + "epoch": 0.6692700687063246, + "grad_norm": 0.0, + "learning_rate": 5.208951844778126e-06, + "loss": 1.2344, + "step": 22794 + }, + { + "epoch": 0.6692994303834635, + "grad_norm": 0.0, + "learning_rate": 5.208117153664387e-06, + "loss": 1.1812, + "step": 22795 + }, + { + "epoch": 0.6693287920606025, + "grad_norm": 0.0, + "learning_rate": 5.207282505884876e-06, + "loss": 1.2988, + "step": 22796 + }, + { + "epoch": 0.6693581537377415, + "grad_norm": 0.0, + "learning_rate": 5.206447901447142e-06, + "loss": 1.3369, + "step": 22797 + }, + { + "epoch": 0.6693875154148805, + "grad_norm": 0.0, + "learning_rate": 5.205613340358717e-06, + "loss": 1.2002, + "step": 22798 + }, + { + "epoch": 0.6694168770920195, + "grad_norm": 0.0, + "learning_rate": 5.2047788226271645e-06, + "loss": 1.1006, + "step": 22799 + }, + { + "epoch": 0.6694462387691585, + "grad_norm": 0.0, + "learning_rate": 5.203944348260019e-06, + "loss": 1.2021, + "step": 22800 + }, + { + "epoch": 0.6694756004462975, + "grad_norm": 0.0, + "learning_rate": 5.203109917264839e-06, + "loss": 1.2183, + "step": 22801 + }, + { + "epoch": 0.6695049621234365, + "grad_norm": 0.0, + "learning_rate": 5.20227552964916e-06, + "loss": 1.23, + "step": 22802 + }, + { + "epoch": 0.6695343238005755, + "grad_norm": 0.0, + "learning_rate": 5.201441185420536e-06, + "loss": 1.2266, + "step": 22803 + }, + { + "epoch": 0.6695636854777145, + "grad_norm": 0.0, + "learning_rate": 5.2006068845865074e-06, + "loss": 1.1914, + "step": 22804 + }, + { + "epoch": 0.6695930471548535, + "grad_norm": 0.0, + "learning_rate": 5.199772627154622e-06, + "loss": 1.3828, + "step": 22805 + }, + { + "epoch": 0.6696224088319925, + "grad_norm": 0.0, + "learning_rate": 5.198938413132417e-06, + "loss": 1.2144, + "step": 22806 + }, + { + "epoch": 0.6696517705091315, + "grad_norm": 0.0, + "learning_rate": 5.198104242527444e-06, + "loss": 1.4062, + "step": 22807 + }, + { + "epoch": 0.6696811321862705, + "grad_norm": 0.0, + "learning_rate": 5.197270115347246e-06, + "loss": 1.1421, + "step": 22808 + }, + { + "epoch": 0.6697104938634095, + "grad_norm": 0.0, + "learning_rate": 5.19643603159936e-06, + "loss": 1.2695, + "step": 22809 + }, + { + "epoch": 0.6697398555405485, + "grad_norm": 0.0, + "learning_rate": 5.195601991291337e-06, + "loss": 1.2373, + "step": 22810 + }, + { + "epoch": 0.6697692172176875, + "grad_norm": 0.0, + "learning_rate": 5.1947679944307115e-06, + "loss": 1.2041, + "step": 22811 + }, + { + "epoch": 0.6697985788948265, + "grad_norm": 0.0, + "learning_rate": 5.193934041025034e-06, + "loss": 1.1929, + "step": 22812 + }, + { + "epoch": 0.6698279405719655, + "grad_norm": 0.0, + "learning_rate": 5.19310013108184e-06, + "loss": 1.1367, + "step": 22813 + }, + { + "epoch": 0.6698573022491044, + "grad_norm": 0.0, + "learning_rate": 5.192266264608675e-06, + "loss": 1.1299, + "step": 22814 + }, + { + "epoch": 0.6698866639262435, + "grad_norm": 0.0, + "learning_rate": 5.191432441613073e-06, + "loss": 1.061, + "step": 22815 + }, + { + "epoch": 0.6699160256033825, + "grad_norm": 0.0, + "learning_rate": 5.190598662102582e-06, + "loss": 1.3496, + "step": 22816 + }, + { + "epoch": 0.6699453872805214, + "grad_norm": 0.0, + "learning_rate": 5.18976492608474e-06, + "loss": 1.2002, + "step": 22817 + }, + { + "epoch": 0.6699747489576605, + "grad_norm": 0.0, + "learning_rate": 5.188931233567081e-06, + "loss": 1.2363, + "step": 22818 + }, + { + "epoch": 0.6700041106347995, + "grad_norm": 0.0, + "learning_rate": 5.188097584557154e-06, + "loss": 1.3203, + "step": 22819 + }, + { + "epoch": 0.6700334723119384, + "grad_norm": 0.0, + "learning_rate": 5.187263979062487e-06, + "loss": 1.1846, + "step": 22820 + }, + { + "epoch": 0.6700628339890775, + "grad_norm": 0.0, + "learning_rate": 5.18643041709063e-06, + "loss": 1.2095, + "step": 22821 + }, + { + "epoch": 0.6700921956662165, + "grad_norm": 0.0, + "learning_rate": 5.185596898649115e-06, + "loss": 1.1968, + "step": 22822 + }, + { + "epoch": 0.6701215573433554, + "grad_norm": 0.0, + "learning_rate": 5.18476342374548e-06, + "loss": 1.2168, + "step": 22823 + }, + { + "epoch": 0.6701509190204944, + "grad_norm": 0.0, + "learning_rate": 5.183929992387259e-06, + "loss": 1.1445, + "step": 22824 + }, + { + "epoch": 0.6701802806976335, + "grad_norm": 0.0, + "learning_rate": 5.183096604581997e-06, + "loss": 1.335, + "step": 22825 + }, + { + "epoch": 0.6702096423747724, + "grad_norm": 0.0, + "learning_rate": 5.18226326033722e-06, + "loss": 1.3564, + "step": 22826 + }, + { + "epoch": 0.6702390040519114, + "grad_norm": 0.0, + "learning_rate": 5.181429959660476e-06, + "loss": 1.3315, + "step": 22827 + }, + { + "epoch": 0.6702683657290505, + "grad_norm": 0.0, + "learning_rate": 5.180596702559294e-06, + "loss": 1.2671, + "step": 22828 + }, + { + "epoch": 0.6702977274061894, + "grad_norm": 0.0, + "learning_rate": 5.179763489041211e-06, + "loss": 1.248, + "step": 22829 + }, + { + "epoch": 0.6703270890833284, + "grad_norm": 0.0, + "learning_rate": 5.178930319113761e-06, + "loss": 1.2646, + "step": 22830 + }, + { + "epoch": 0.6703564507604675, + "grad_norm": 0.0, + "learning_rate": 5.178097192784475e-06, + "loss": 0.979, + "step": 22831 + }, + { + "epoch": 0.6703858124376064, + "grad_norm": 0.0, + "learning_rate": 5.177264110060894e-06, + "loss": 1.2539, + "step": 22832 + }, + { + "epoch": 0.6704151741147454, + "grad_norm": 0.0, + "learning_rate": 5.176431070950546e-06, + "loss": 1.2227, + "step": 22833 + }, + { + "epoch": 0.6704445357918845, + "grad_norm": 0.0, + "learning_rate": 5.1755980754609705e-06, + "loss": 1.2725, + "step": 22834 + }, + { + "epoch": 0.6704738974690234, + "grad_norm": 0.0, + "learning_rate": 5.174765123599693e-06, + "loss": 1.3799, + "step": 22835 + }, + { + "epoch": 0.6705032591461624, + "grad_norm": 0.0, + "learning_rate": 5.173932215374256e-06, + "loss": 1.1646, + "step": 22836 + }, + { + "epoch": 0.6705326208233015, + "grad_norm": 0.0, + "learning_rate": 5.173099350792183e-06, + "loss": 1.1885, + "step": 22837 + }, + { + "epoch": 0.6705619825004404, + "grad_norm": 0.0, + "learning_rate": 5.172266529861011e-06, + "loss": 1.1147, + "step": 22838 + }, + { + "epoch": 0.6705913441775794, + "grad_norm": 0.0, + "learning_rate": 5.171433752588268e-06, + "loss": 1.2354, + "step": 22839 + }, + { + "epoch": 0.6706207058547184, + "grad_norm": 0.0, + "learning_rate": 5.170601018981483e-06, + "loss": 1.2827, + "step": 22840 + }, + { + "epoch": 0.6706500675318574, + "grad_norm": 0.0, + "learning_rate": 5.169768329048193e-06, + "loss": 1.2783, + "step": 22841 + }, + { + "epoch": 0.6706794292089964, + "grad_norm": 0.0, + "learning_rate": 5.1689356827959215e-06, + "loss": 1.2656, + "step": 22842 + }, + { + "epoch": 0.6707087908861354, + "grad_norm": 0.0, + "learning_rate": 5.168103080232206e-06, + "loss": 1.3188, + "step": 22843 + }, + { + "epoch": 0.6707381525632744, + "grad_norm": 0.0, + "learning_rate": 5.167270521364568e-06, + "loss": 1.3691, + "step": 22844 + }, + { + "epoch": 0.6707675142404134, + "grad_norm": 0.0, + "learning_rate": 5.166438006200545e-06, + "loss": 1.126, + "step": 22845 + }, + { + "epoch": 0.6707968759175524, + "grad_norm": 0.0, + "learning_rate": 5.16560553474766e-06, + "loss": 0.9766, + "step": 22846 + }, + { + "epoch": 0.6708262375946914, + "grad_norm": 0.0, + "learning_rate": 5.164773107013441e-06, + "loss": 1.1689, + "step": 22847 + }, + { + "epoch": 0.6708555992718304, + "grad_norm": 0.0, + "learning_rate": 5.163940723005419e-06, + "loss": 1.2168, + "step": 22848 + }, + { + "epoch": 0.6708849609489694, + "grad_norm": 0.0, + "learning_rate": 5.163108382731115e-06, + "loss": 1.3125, + "step": 22849 + }, + { + "epoch": 0.6709143226261084, + "grad_norm": 0.0, + "learning_rate": 5.162276086198064e-06, + "loss": 1.2065, + "step": 22850 + }, + { + "epoch": 0.6709436843032474, + "grad_norm": 0.0, + "learning_rate": 5.161443833413786e-06, + "loss": 1.2168, + "step": 22851 + }, + { + "epoch": 0.6709730459803864, + "grad_norm": 0.0, + "learning_rate": 5.160611624385815e-06, + "loss": 1.2559, + "step": 22852 + }, + { + "epoch": 0.6710024076575254, + "grad_norm": 0.0, + "learning_rate": 5.159779459121672e-06, + "loss": 1.3262, + "step": 22853 + }, + { + "epoch": 0.6710317693346644, + "grad_norm": 0.0, + "learning_rate": 5.158947337628884e-06, + "loss": 1.3252, + "step": 22854 + }, + { + "epoch": 0.6710611310118034, + "grad_norm": 0.0, + "learning_rate": 5.1581152599149686e-06, + "loss": 1.3438, + "step": 22855 + }, + { + "epoch": 0.6710904926889424, + "grad_norm": 0.0, + "learning_rate": 5.157283225987463e-06, + "loss": 1.2715, + "step": 22856 + }, + { + "epoch": 0.6711198543660813, + "grad_norm": 0.0, + "learning_rate": 5.1564512358538845e-06, + "loss": 1.2637, + "step": 22857 + }, + { + "epoch": 0.6711492160432204, + "grad_norm": 0.0, + "learning_rate": 5.155619289521754e-06, + "loss": 1.4287, + "step": 22858 + }, + { + "epoch": 0.6711785777203594, + "grad_norm": 0.0, + "learning_rate": 5.1547873869986035e-06, + "loss": 1.374, + "step": 22859 + }, + { + "epoch": 0.6712079393974983, + "grad_norm": 0.0, + "learning_rate": 5.1539555282919474e-06, + "loss": 1.2559, + "step": 22860 + }, + { + "epoch": 0.6712373010746374, + "grad_norm": 0.0, + "learning_rate": 5.153123713409316e-06, + "loss": 1.3066, + "step": 22861 + }, + { + "epoch": 0.6712666627517764, + "grad_norm": 0.0, + "learning_rate": 5.152291942358228e-06, + "loss": 1.4395, + "step": 22862 + }, + { + "epoch": 0.6712960244289153, + "grad_norm": 0.0, + "learning_rate": 5.151460215146207e-06, + "loss": 1.2197, + "step": 22863 + }, + { + "epoch": 0.6713253861060544, + "grad_norm": 0.0, + "learning_rate": 5.150628531780767e-06, + "loss": 1.1509, + "step": 22864 + }, + { + "epoch": 0.6713547477831934, + "grad_norm": 0.0, + "learning_rate": 5.149796892269441e-06, + "loss": 1.2559, + "step": 22865 + }, + { + "epoch": 0.6713841094603323, + "grad_norm": 0.0, + "learning_rate": 5.1489652966197415e-06, + "loss": 1.2744, + "step": 22866 + }, + { + "epoch": 0.6714134711374714, + "grad_norm": 0.0, + "learning_rate": 5.148133744839189e-06, + "loss": 1.1167, + "step": 22867 + }, + { + "epoch": 0.6714428328146104, + "grad_norm": 0.0, + "learning_rate": 5.14730223693531e-06, + "loss": 1.2871, + "step": 22868 + }, + { + "epoch": 0.6714721944917493, + "grad_norm": 0.0, + "learning_rate": 5.146470772915615e-06, + "loss": 1.2329, + "step": 22869 + }, + { + "epoch": 0.6715015561688884, + "grad_norm": 0.0, + "learning_rate": 5.1456393527876305e-06, + "loss": 1.2451, + "step": 22870 + }, + { + "epoch": 0.6715309178460274, + "grad_norm": 0.0, + "learning_rate": 5.1448079765588744e-06, + "loss": 1.2734, + "step": 22871 + }, + { + "epoch": 0.6715602795231663, + "grad_norm": 0.0, + "learning_rate": 5.143976644236862e-06, + "loss": 1.2324, + "step": 22872 + }, + { + "epoch": 0.6715896412003054, + "grad_norm": 0.0, + "learning_rate": 5.143145355829109e-06, + "loss": 1.1519, + "step": 22873 + }, + { + "epoch": 0.6716190028774444, + "grad_norm": 0.0, + "learning_rate": 5.142314111343141e-06, + "loss": 1.2607, + "step": 22874 + }, + { + "epoch": 0.6716483645545833, + "grad_norm": 0.0, + "learning_rate": 5.141482910786466e-06, + "loss": 1.1719, + "step": 22875 + }, + { + "epoch": 0.6716777262317224, + "grad_norm": 0.0, + "learning_rate": 5.140651754166609e-06, + "loss": 1.1973, + "step": 22876 + }, + { + "epoch": 0.6717070879088614, + "grad_norm": 0.0, + "learning_rate": 5.1398206414910836e-06, + "loss": 1.25, + "step": 22877 + }, + { + "epoch": 0.6717364495860003, + "grad_norm": 0.0, + "learning_rate": 5.1389895727674035e-06, + "loss": 1.2842, + "step": 22878 + }, + { + "epoch": 0.6717658112631394, + "grad_norm": 0.0, + "learning_rate": 5.138158548003087e-06, + "loss": 1.1914, + "step": 22879 + }, + { + "epoch": 0.6717951729402784, + "grad_norm": 0.0, + "learning_rate": 5.137327567205643e-06, + "loss": 1.2925, + "step": 22880 + }, + { + "epoch": 0.6718245346174173, + "grad_norm": 0.0, + "learning_rate": 5.136496630382594e-06, + "loss": 1.2451, + "step": 22881 + }, + { + "epoch": 0.6718538962945564, + "grad_norm": 0.0, + "learning_rate": 5.135665737541451e-06, + "loss": 1.2266, + "step": 22882 + }, + { + "epoch": 0.6718832579716953, + "grad_norm": 0.0, + "learning_rate": 5.13483488868973e-06, + "loss": 1.2822, + "step": 22883 + }, + { + "epoch": 0.6719126196488343, + "grad_norm": 0.0, + "learning_rate": 5.134004083834939e-06, + "loss": 1.2354, + "step": 22884 + }, + { + "epoch": 0.6719419813259734, + "grad_norm": 0.0, + "learning_rate": 5.1331733229846e-06, + "loss": 1.2397, + "step": 22885 + }, + { + "epoch": 0.6719713430031123, + "grad_norm": 0.0, + "learning_rate": 5.132342606146221e-06, + "loss": 1.2256, + "step": 22886 + }, + { + "epoch": 0.6720007046802513, + "grad_norm": 0.0, + "learning_rate": 5.131511933327315e-06, + "loss": 1.1914, + "step": 22887 + }, + { + "epoch": 0.6720300663573904, + "grad_norm": 0.0, + "learning_rate": 5.130681304535392e-06, + "loss": 1.2939, + "step": 22888 + }, + { + "epoch": 0.6720594280345293, + "grad_norm": 0.0, + "learning_rate": 5.129850719777962e-06, + "loss": 1.1943, + "step": 22889 + }, + { + "epoch": 0.6720887897116683, + "grad_norm": 0.0, + "learning_rate": 5.129020179062542e-06, + "loss": 1.3408, + "step": 22890 + }, + { + "epoch": 0.6721181513888074, + "grad_norm": 0.0, + "learning_rate": 5.128189682396638e-06, + "loss": 1.1821, + "step": 22891 + }, + { + "epoch": 0.6721475130659463, + "grad_norm": 0.0, + "learning_rate": 5.127359229787765e-06, + "loss": 1.3008, + "step": 22892 + }, + { + "epoch": 0.6721768747430853, + "grad_norm": 0.0, + "learning_rate": 5.1265288212434265e-06, + "loss": 1.2197, + "step": 22893 + }, + { + "epoch": 0.6722062364202244, + "grad_norm": 0.0, + "learning_rate": 5.12569845677114e-06, + "loss": 1.2471, + "step": 22894 + }, + { + "epoch": 0.6722355980973633, + "grad_norm": 0.0, + "learning_rate": 5.124868136378411e-06, + "loss": 1.2803, + "step": 22895 + }, + { + "epoch": 0.6722649597745023, + "grad_norm": 0.0, + "learning_rate": 5.124037860072748e-06, + "loss": 1.1851, + "step": 22896 + }, + { + "epoch": 0.6722943214516414, + "grad_norm": 0.0, + "learning_rate": 5.12320762786166e-06, + "loss": 1.1846, + "step": 22897 + }, + { + "epoch": 0.6723236831287803, + "grad_norm": 0.0, + "learning_rate": 5.122377439752649e-06, + "loss": 1.123, + "step": 22898 + }, + { + "epoch": 0.6723530448059193, + "grad_norm": 0.0, + "learning_rate": 5.121547295753233e-06, + "loss": 1.2988, + "step": 22899 + }, + { + "epoch": 0.6723824064830584, + "grad_norm": 0.0, + "learning_rate": 5.12071719587091e-06, + "loss": 1.1177, + "step": 22900 + }, + { + "epoch": 0.6724117681601973, + "grad_norm": 0.0, + "learning_rate": 5.119887140113194e-06, + "loss": 1.2104, + "step": 22901 + }, + { + "epoch": 0.6724411298373363, + "grad_norm": 0.0, + "learning_rate": 5.1190571284875904e-06, + "loss": 1.3447, + "step": 22902 + }, + { + "epoch": 0.6724704915144754, + "grad_norm": 0.0, + "learning_rate": 5.118227161001601e-06, + "loss": 1.1401, + "step": 22903 + }, + { + "epoch": 0.6724998531916143, + "grad_norm": 0.0, + "learning_rate": 5.117397237662731e-06, + "loss": 1.2607, + "step": 22904 + }, + { + "epoch": 0.6725292148687533, + "grad_norm": 0.0, + "learning_rate": 5.116567358478492e-06, + "loss": 1.25, + "step": 22905 + }, + { + "epoch": 0.6725585765458924, + "grad_norm": 0.0, + "learning_rate": 5.1157375234563854e-06, + "loss": 1.1846, + "step": 22906 + }, + { + "epoch": 0.6725879382230313, + "grad_norm": 0.0, + "learning_rate": 5.114907732603911e-06, + "loss": 1.3809, + "step": 22907 + }, + { + "epoch": 0.6726172999001703, + "grad_norm": 0.0, + "learning_rate": 5.11407798592858e-06, + "loss": 1.146, + "step": 22908 + }, + { + "epoch": 0.6726466615773093, + "grad_norm": 0.0, + "learning_rate": 5.11324828343789e-06, + "loss": 1.1548, + "step": 22909 + }, + { + "epoch": 0.6726760232544483, + "grad_norm": 0.0, + "learning_rate": 5.112418625139352e-06, + "loss": 1.2671, + "step": 22910 + }, + { + "epoch": 0.6727053849315873, + "grad_norm": 0.0, + "learning_rate": 5.111589011040463e-06, + "loss": 1.2734, + "step": 22911 + }, + { + "epoch": 0.6727347466087263, + "grad_norm": 0.0, + "learning_rate": 5.110759441148727e-06, + "loss": 1.2051, + "step": 22912 + }, + { + "epoch": 0.6727641082858653, + "grad_norm": 0.0, + "learning_rate": 5.109929915471641e-06, + "loss": 1.2041, + "step": 22913 + }, + { + "epoch": 0.6727934699630043, + "grad_norm": 0.0, + "learning_rate": 5.109100434016716e-06, + "loss": 1.127, + "step": 22914 + }, + { + "epoch": 0.6728228316401433, + "grad_norm": 0.0, + "learning_rate": 5.1082709967914465e-06, + "loss": 1.2695, + "step": 22915 + }, + { + "epoch": 0.6728521933172823, + "grad_norm": 0.0, + "learning_rate": 5.1074416038033335e-06, + "loss": 1.1982, + "step": 22916 + }, + { + "epoch": 0.6728815549944213, + "grad_norm": 0.0, + "learning_rate": 5.1066122550598815e-06, + "loss": 1.332, + "step": 22917 + }, + { + "epoch": 0.6729109166715603, + "grad_norm": 0.0, + "learning_rate": 5.105782950568585e-06, + "loss": 1.2373, + "step": 22918 + }, + { + "epoch": 0.6729402783486993, + "grad_norm": 0.0, + "learning_rate": 5.104953690336951e-06, + "loss": 1.2197, + "step": 22919 + }, + { + "epoch": 0.6729696400258383, + "grad_norm": 0.0, + "learning_rate": 5.104124474372474e-06, + "loss": 1.2632, + "step": 22920 + }, + { + "epoch": 0.6729990017029773, + "grad_norm": 0.0, + "learning_rate": 5.103295302682653e-06, + "loss": 1.1216, + "step": 22921 + }, + { + "epoch": 0.6730283633801163, + "grad_norm": 0.0, + "learning_rate": 5.102466175274983e-06, + "loss": 1.4297, + "step": 22922 + }, + { + "epoch": 0.6730577250572553, + "grad_norm": 0.0, + "learning_rate": 5.1016370921569695e-06, + "loss": 1.2764, + "step": 22923 + }, + { + "epoch": 0.6730870867343942, + "grad_norm": 0.0, + "learning_rate": 5.100808053336102e-06, + "loss": 1.1221, + "step": 22924 + }, + { + "epoch": 0.6731164484115333, + "grad_norm": 0.0, + "learning_rate": 5.099979058819886e-06, + "loss": 1.1611, + "step": 22925 + }, + { + "epoch": 0.6731458100886722, + "grad_norm": 0.0, + "learning_rate": 5.099150108615813e-06, + "loss": 1.3066, + "step": 22926 + }, + { + "epoch": 0.6731751717658112, + "grad_norm": 0.0, + "learning_rate": 5.098321202731384e-06, + "loss": 1.061, + "step": 22927 + }, + { + "epoch": 0.6732045334429503, + "grad_norm": 0.0, + "learning_rate": 5.097492341174088e-06, + "loss": 1.2373, + "step": 22928 + }, + { + "epoch": 0.6732338951200892, + "grad_norm": 0.0, + "learning_rate": 5.096663523951423e-06, + "loss": 1.126, + "step": 22929 + }, + { + "epoch": 0.6732632567972282, + "grad_norm": 0.0, + "learning_rate": 5.095834751070888e-06, + "loss": 1.2295, + "step": 22930 + }, + { + "epoch": 0.6732926184743673, + "grad_norm": 0.0, + "learning_rate": 5.095006022539972e-06, + "loss": 1.1245, + "step": 22931 + }, + { + "epoch": 0.6733219801515062, + "grad_norm": 0.0, + "learning_rate": 5.0941773383661775e-06, + "loss": 1.2441, + "step": 22932 + }, + { + "epoch": 0.6733513418286452, + "grad_norm": 0.0, + "learning_rate": 5.093348698556988e-06, + "loss": 1.1924, + "step": 22933 + }, + { + "epoch": 0.6733807035057843, + "grad_norm": 0.0, + "learning_rate": 5.092520103119909e-06, + "loss": 1.3496, + "step": 22934 + }, + { + "epoch": 0.6734100651829232, + "grad_norm": 0.0, + "learning_rate": 5.091691552062425e-06, + "loss": 1.2764, + "step": 22935 + }, + { + "epoch": 0.6734394268600622, + "grad_norm": 0.0, + "learning_rate": 5.0908630453920335e-06, + "loss": 1.167, + "step": 22936 + }, + { + "epoch": 0.6734687885372013, + "grad_norm": 0.0, + "learning_rate": 5.090034583116223e-06, + "loss": 1.1318, + "step": 22937 + }, + { + "epoch": 0.6734981502143402, + "grad_norm": 0.0, + "learning_rate": 5.089206165242484e-06, + "loss": 1.166, + "step": 22938 + }, + { + "epoch": 0.6735275118914792, + "grad_norm": 0.0, + "learning_rate": 5.088377791778316e-06, + "loss": 1.2715, + "step": 22939 + }, + { + "epoch": 0.6735568735686183, + "grad_norm": 0.0, + "learning_rate": 5.0875494627312e-06, + "loss": 1.2168, + "step": 22940 + }, + { + "epoch": 0.6735862352457572, + "grad_norm": 0.0, + "learning_rate": 5.086721178108638e-06, + "loss": 1.2344, + "step": 22941 + }, + { + "epoch": 0.6736155969228962, + "grad_norm": 0.0, + "learning_rate": 5.08589293791811e-06, + "loss": 1.3223, + "step": 22942 + }, + { + "epoch": 0.6736449586000353, + "grad_norm": 0.0, + "learning_rate": 5.085064742167114e-06, + "loss": 1.3115, + "step": 22943 + }, + { + "epoch": 0.6736743202771742, + "grad_norm": 0.0, + "learning_rate": 5.084236590863137e-06, + "loss": 1.2832, + "step": 22944 + }, + { + "epoch": 0.6737036819543132, + "grad_norm": 0.0, + "learning_rate": 5.083408484013668e-06, + "loss": 1.1924, + "step": 22945 + }, + { + "epoch": 0.6737330436314523, + "grad_norm": 0.0, + "learning_rate": 5.082580421626194e-06, + "loss": 1.2192, + "step": 22946 + }, + { + "epoch": 0.6737624053085912, + "grad_norm": 0.0, + "learning_rate": 5.081752403708201e-06, + "loss": 1.2393, + "step": 22947 + }, + { + "epoch": 0.6737917669857302, + "grad_norm": 0.0, + "learning_rate": 5.0809244302671845e-06, + "loss": 1.3203, + "step": 22948 + }, + { + "epoch": 0.6738211286628693, + "grad_norm": 0.0, + "learning_rate": 5.080096501310625e-06, + "loss": 1.3994, + "step": 22949 + }, + { + "epoch": 0.6738504903400082, + "grad_norm": 0.0, + "learning_rate": 5.079268616846016e-06, + "loss": 1.2559, + "step": 22950 + }, + { + "epoch": 0.6738798520171472, + "grad_norm": 0.0, + "learning_rate": 5.078440776880837e-06, + "loss": 1.0723, + "step": 22951 + }, + { + "epoch": 0.6739092136942862, + "grad_norm": 0.0, + "learning_rate": 5.077612981422587e-06, + "loss": 1.0825, + "step": 22952 + }, + { + "epoch": 0.6739385753714252, + "grad_norm": 0.0, + "learning_rate": 5.076785230478736e-06, + "loss": 1.2852, + "step": 22953 + }, + { + "epoch": 0.6739679370485642, + "grad_norm": 0.0, + "learning_rate": 5.075957524056781e-06, + "loss": 1.2129, + "step": 22954 + }, + { + "epoch": 0.6739972987257032, + "grad_norm": 0.0, + "learning_rate": 5.075129862164201e-06, + "loss": 1.186, + "step": 22955 + }, + { + "epoch": 0.6740266604028422, + "grad_norm": 0.0, + "learning_rate": 5.074302244808481e-06, + "loss": 1.2388, + "step": 22956 + }, + { + "epoch": 0.6740560220799812, + "grad_norm": 0.0, + "learning_rate": 5.073474671997111e-06, + "loss": 1.2729, + "step": 22957 + }, + { + "epoch": 0.6740853837571202, + "grad_norm": 0.0, + "learning_rate": 5.072647143737567e-06, + "loss": 1.1582, + "step": 22958 + }, + { + "epoch": 0.6741147454342592, + "grad_norm": 0.0, + "learning_rate": 5.071819660037341e-06, + "loss": 1.3145, + "step": 22959 + }, + { + "epoch": 0.6741441071113982, + "grad_norm": 0.0, + "learning_rate": 5.070992220903912e-06, + "loss": 1.29, + "step": 22960 + }, + { + "epoch": 0.6741734687885372, + "grad_norm": 0.0, + "learning_rate": 5.070164826344761e-06, + "loss": 1.3545, + "step": 22961 + }, + { + "epoch": 0.6742028304656762, + "grad_norm": 0.0, + "learning_rate": 5.069337476367371e-06, + "loss": 1.1758, + "step": 22962 + }, + { + "epoch": 0.6742321921428152, + "grad_norm": 0.0, + "learning_rate": 5.068510170979226e-06, + "loss": 1.2842, + "step": 22963 + }, + { + "epoch": 0.6742615538199542, + "grad_norm": 0.0, + "learning_rate": 5.067682910187807e-06, + "loss": 1.3047, + "step": 22964 + }, + { + "epoch": 0.6742909154970932, + "grad_norm": 0.0, + "learning_rate": 5.066855694000593e-06, + "loss": 1.1973, + "step": 22965 + }, + { + "epoch": 0.6743202771742322, + "grad_norm": 0.0, + "learning_rate": 5.0660285224250684e-06, + "loss": 1.2803, + "step": 22966 + }, + { + "epoch": 0.6743496388513712, + "grad_norm": 0.0, + "learning_rate": 5.0652013954687085e-06, + "loss": 1.1006, + "step": 22967 + }, + { + "epoch": 0.6743790005285102, + "grad_norm": 0.0, + "learning_rate": 5.064374313139e-06, + "loss": 1.1089, + "step": 22968 + }, + { + "epoch": 0.6744083622056491, + "grad_norm": 0.0, + "learning_rate": 5.063547275443418e-06, + "loss": 1.229, + "step": 22969 + }, + { + "epoch": 0.6744377238827882, + "grad_norm": 0.0, + "learning_rate": 5.0627202823894415e-06, + "loss": 1.3057, + "step": 22970 + }, + { + "epoch": 0.6744670855599272, + "grad_norm": 0.0, + "learning_rate": 5.061893333984548e-06, + "loss": 1.2412, + "step": 22971 + }, + { + "epoch": 0.6744964472370661, + "grad_norm": 0.0, + "learning_rate": 5.06106643023622e-06, + "loss": 1.4551, + "step": 22972 + }, + { + "epoch": 0.6745258089142052, + "grad_norm": 0.0, + "learning_rate": 5.0602395711519305e-06, + "loss": 1.21, + "step": 22973 + }, + { + "epoch": 0.6745551705913442, + "grad_norm": 0.0, + "learning_rate": 5.059412756739163e-06, + "loss": 1.2246, + "step": 22974 + }, + { + "epoch": 0.6745845322684831, + "grad_norm": 0.0, + "learning_rate": 5.058585987005391e-06, + "loss": 1.3232, + "step": 22975 + }, + { + "epoch": 0.6746138939456222, + "grad_norm": 0.0, + "learning_rate": 5.057759261958087e-06, + "loss": 1.2568, + "step": 22976 + }, + { + "epoch": 0.6746432556227612, + "grad_norm": 0.0, + "learning_rate": 5.056932581604741e-06, + "loss": 1.2588, + "step": 22977 + }, + { + "epoch": 0.6746726172999001, + "grad_norm": 0.0, + "learning_rate": 5.056105945952811e-06, + "loss": 1.1904, + "step": 22978 + }, + { + "epoch": 0.6747019789770392, + "grad_norm": 0.0, + "learning_rate": 5.055279355009785e-06, + "loss": 1.1948, + "step": 22979 + }, + { + "epoch": 0.6747313406541782, + "grad_norm": 0.0, + "learning_rate": 5.05445280878313e-06, + "loss": 1.2256, + "step": 22980 + }, + { + "epoch": 0.6747607023313171, + "grad_norm": 0.0, + "learning_rate": 5.053626307280329e-06, + "loss": 1.3311, + "step": 22981 + }, + { + "epoch": 0.6747900640084562, + "grad_norm": 0.0, + "learning_rate": 5.0527998505088485e-06, + "loss": 1.2568, + "step": 22982 + }, + { + "epoch": 0.6748194256855952, + "grad_norm": 0.0, + "learning_rate": 5.05197343847617e-06, + "loss": 1.1963, + "step": 22983 + }, + { + "epoch": 0.6748487873627341, + "grad_norm": 0.0, + "learning_rate": 5.051147071189762e-06, + "loss": 1.292, + "step": 22984 + }, + { + "epoch": 0.6748781490398732, + "grad_norm": 0.0, + "learning_rate": 5.0503207486570986e-06, + "loss": 1.166, + "step": 22985 + }, + { + "epoch": 0.6749075107170122, + "grad_norm": 0.0, + "learning_rate": 5.049494470885653e-06, + "loss": 1.3506, + "step": 22986 + }, + { + "epoch": 0.6749368723941511, + "grad_norm": 0.0, + "learning_rate": 5.0486682378828924e-06, + "loss": 1.2622, + "step": 22987 + }, + { + "epoch": 0.6749662340712902, + "grad_norm": 0.0, + "learning_rate": 5.047842049656298e-06, + "loss": 1.1382, + "step": 22988 + }, + { + "epoch": 0.6749955957484292, + "grad_norm": 0.0, + "learning_rate": 5.04701590621333e-06, + "loss": 1.2637, + "step": 22989 + }, + { + "epoch": 0.6750249574255681, + "grad_norm": 0.0, + "learning_rate": 5.04618980756147e-06, + "loss": 1.1572, + "step": 22990 + }, + { + "epoch": 0.6750543191027072, + "grad_norm": 0.0, + "learning_rate": 5.0453637537081815e-06, + "loss": 1.2314, + "step": 22991 + }, + { + "epoch": 0.6750836807798462, + "grad_norm": 0.0, + "learning_rate": 5.044537744660943e-06, + "loss": 1.3076, + "step": 22992 + }, + { + "epoch": 0.6751130424569851, + "grad_norm": 0.0, + "learning_rate": 5.043711780427215e-06, + "loss": 1.3828, + "step": 22993 + }, + { + "epoch": 0.6751424041341242, + "grad_norm": 0.0, + "learning_rate": 5.042885861014472e-06, + "loss": 1.2822, + "step": 22994 + }, + { + "epoch": 0.6751717658112631, + "grad_norm": 0.0, + "learning_rate": 5.042059986430183e-06, + "loss": 1.2256, + "step": 22995 + }, + { + "epoch": 0.6752011274884021, + "grad_norm": 0.0, + "learning_rate": 5.0412341566818115e-06, + "loss": 1.2646, + "step": 22996 + }, + { + "epoch": 0.6752304891655412, + "grad_norm": 0.0, + "learning_rate": 5.040408371776832e-06, + "loss": 1.291, + "step": 22997 + }, + { + "epoch": 0.6752598508426801, + "grad_norm": 0.0, + "learning_rate": 5.039582631722706e-06, + "loss": 1.1558, + "step": 22998 + }, + { + "epoch": 0.6752892125198191, + "grad_norm": 0.0, + "learning_rate": 5.03875693652691e-06, + "loss": 1.1875, + "step": 22999 + }, + { + "epoch": 0.6753185741969582, + "grad_norm": 0.0, + "learning_rate": 5.0379312861968994e-06, + "loss": 1.3281, + "step": 23000 + }, + { + "epoch": 0.6753479358740971, + "grad_norm": 0.0, + "learning_rate": 5.037105680740152e-06, + "loss": 1.3291, + "step": 23001 + }, + { + "epoch": 0.6753772975512361, + "grad_norm": 0.0, + "learning_rate": 5.03628012016413e-06, + "loss": 1.2168, + "step": 23002 + }, + { + "epoch": 0.6754066592283752, + "grad_norm": 0.0, + "learning_rate": 5.035454604476296e-06, + "loss": 1.165, + "step": 23003 + }, + { + "epoch": 0.6754360209055141, + "grad_norm": 0.0, + "learning_rate": 5.0346291336841195e-06, + "loss": 1.1221, + "step": 23004 + }, + { + "epoch": 0.6754653825826531, + "grad_norm": 0.0, + "learning_rate": 5.033803707795057e-06, + "loss": 1.3291, + "step": 23005 + }, + { + "epoch": 0.6754947442597922, + "grad_norm": 0.0, + "learning_rate": 5.032978326816587e-06, + "loss": 1.2876, + "step": 23006 + }, + { + "epoch": 0.6755241059369311, + "grad_norm": 0.0, + "learning_rate": 5.032152990756158e-06, + "loss": 1.0244, + "step": 23007 + }, + { + "epoch": 0.6755534676140701, + "grad_norm": 0.0, + "learning_rate": 5.031327699621249e-06, + "loss": 1.2007, + "step": 23008 + }, + { + "epoch": 0.6755828292912092, + "grad_norm": 0.0, + "learning_rate": 5.030502453419315e-06, + "loss": 1.2563, + "step": 23009 + }, + { + "epoch": 0.6756121909683481, + "grad_norm": 0.0, + "learning_rate": 5.0296772521578196e-06, + "loss": 1.0952, + "step": 23010 + }, + { + "epoch": 0.6756415526454871, + "grad_norm": 0.0, + "learning_rate": 5.028852095844222e-06, + "loss": 1.2134, + "step": 23011 + }, + { + "epoch": 0.6756709143226262, + "grad_norm": 0.0, + "learning_rate": 5.0280269844859915e-06, + "loss": 1.2607, + "step": 23012 + }, + { + "epoch": 0.6757002759997651, + "grad_norm": 0.0, + "learning_rate": 5.027201918090583e-06, + "loss": 1.1777, + "step": 23013 + }, + { + "epoch": 0.6757296376769041, + "grad_norm": 0.0, + "learning_rate": 5.026376896665465e-06, + "loss": 1.3203, + "step": 23014 + }, + { + "epoch": 0.6757589993540432, + "grad_norm": 0.0, + "learning_rate": 5.025551920218095e-06, + "loss": 1.2793, + "step": 23015 + }, + { + "epoch": 0.6757883610311821, + "grad_norm": 0.0, + "learning_rate": 5.024726988755928e-06, + "loss": 1.3916, + "step": 23016 + }, + { + "epoch": 0.6758177227083211, + "grad_norm": 0.0, + "learning_rate": 5.023902102286433e-06, + "loss": 1.1709, + "step": 23017 + }, + { + "epoch": 0.6758470843854602, + "grad_norm": 0.0, + "learning_rate": 5.023077260817066e-06, + "loss": 1.2002, + "step": 23018 + }, + { + "epoch": 0.6758764460625991, + "grad_norm": 0.0, + "learning_rate": 5.022252464355288e-06, + "loss": 1.2441, + "step": 23019 + }, + { + "epoch": 0.6759058077397381, + "grad_norm": 0.0, + "learning_rate": 5.021427712908548e-06, + "loss": 1.2012, + "step": 23020 + }, + { + "epoch": 0.6759351694168771, + "grad_norm": 0.0, + "learning_rate": 5.020603006484319e-06, + "loss": 1.127, + "step": 23021 + }, + { + "epoch": 0.6759645310940161, + "grad_norm": 0.0, + "learning_rate": 5.0197783450900475e-06, + "loss": 1.0146, + "step": 23022 + }, + { + "epoch": 0.6759938927711551, + "grad_norm": 0.0, + "learning_rate": 5.0189537287332e-06, + "loss": 1.3184, + "step": 23023 + }, + { + "epoch": 0.6760232544482941, + "grad_norm": 0.0, + "learning_rate": 5.0181291574212296e-06, + "loss": 1.2002, + "step": 23024 + }, + { + "epoch": 0.6760526161254331, + "grad_norm": 0.0, + "learning_rate": 5.017304631161588e-06, + "loss": 1.2441, + "step": 23025 + }, + { + "epoch": 0.6760819778025721, + "grad_norm": 0.0, + "learning_rate": 5.0164801499617424e-06, + "loss": 1.2124, + "step": 23026 + }, + { + "epoch": 0.676111339479711, + "grad_norm": 0.0, + "learning_rate": 5.015655713829142e-06, + "loss": 1.3066, + "step": 23027 + }, + { + "epoch": 0.6761407011568501, + "grad_norm": 0.0, + "learning_rate": 5.014831322771243e-06, + "loss": 1.1909, + "step": 23028 + }, + { + "epoch": 0.6761700628339891, + "grad_norm": 0.0, + "learning_rate": 5.014006976795498e-06, + "loss": 1.1929, + "step": 23029 + }, + { + "epoch": 0.676199424511128, + "grad_norm": 0.0, + "learning_rate": 5.013182675909369e-06, + "loss": 1.2051, + "step": 23030 + }, + { + "epoch": 0.6762287861882671, + "grad_norm": 0.0, + "learning_rate": 5.012358420120302e-06, + "loss": 1.1406, + "step": 23031 + }, + { + "epoch": 0.6762581478654061, + "grad_norm": 0.0, + "learning_rate": 5.011534209435759e-06, + "loss": 1.2158, + "step": 23032 + }, + { + "epoch": 0.676287509542545, + "grad_norm": 0.0, + "learning_rate": 5.0107100438631895e-06, + "loss": 1.2305, + "step": 23033 + }, + { + "epoch": 0.6763168712196841, + "grad_norm": 0.0, + "learning_rate": 5.009885923410047e-06, + "loss": 1.1226, + "step": 23034 + }, + { + "epoch": 0.676346232896823, + "grad_norm": 0.0, + "learning_rate": 5.009061848083785e-06, + "loss": 1.2505, + "step": 23035 + }, + { + "epoch": 0.676375594573962, + "grad_norm": 0.0, + "learning_rate": 5.00823781789185e-06, + "loss": 1.3086, + "step": 23036 + }, + { + "epoch": 0.6764049562511011, + "grad_norm": 0.0, + "learning_rate": 5.0074138328417025e-06, + "loss": 1.3271, + "step": 23037 + }, + { + "epoch": 0.67643431792824, + "grad_norm": 0.0, + "learning_rate": 5.006589892940787e-06, + "loss": 1.1899, + "step": 23038 + }, + { + "epoch": 0.676463679605379, + "grad_norm": 0.0, + "learning_rate": 5.005765998196561e-06, + "loss": 1.1831, + "step": 23039 + }, + { + "epoch": 0.6764930412825181, + "grad_norm": 0.0, + "learning_rate": 5.00494214861647e-06, + "loss": 1.2373, + "step": 23040 + }, + { + "epoch": 0.676522402959657, + "grad_norm": 0.0, + "learning_rate": 5.004118344207969e-06, + "loss": 1.3457, + "step": 23041 + }, + { + "epoch": 0.676551764636796, + "grad_norm": 0.0, + "learning_rate": 5.003294584978504e-06, + "loss": 1.1934, + "step": 23042 + }, + { + "epoch": 0.6765811263139351, + "grad_norm": 0.0, + "learning_rate": 5.002470870935527e-06, + "loss": 1.3975, + "step": 23043 + }, + { + "epoch": 0.676610487991074, + "grad_norm": 0.0, + "learning_rate": 5.001647202086487e-06, + "loss": 1.3994, + "step": 23044 + }, + { + "epoch": 0.676639849668213, + "grad_norm": 0.0, + "learning_rate": 5.000823578438827e-06, + "loss": 1.3164, + "step": 23045 + }, + { + "epoch": 0.6766692113453521, + "grad_norm": 0.0, + "learning_rate": 5.000000000000003e-06, + "loss": 1.064, + "step": 23046 + }, + { + "epoch": 0.676698573022491, + "grad_norm": 0.0, + "learning_rate": 4.9991764667774554e-06, + "loss": 1.2285, + "step": 23047 + }, + { + "epoch": 0.67672793469963, + "grad_norm": 0.0, + "learning_rate": 4.99835297877864e-06, + "loss": 1.2158, + "step": 23048 + }, + { + "epoch": 0.6767572963767691, + "grad_norm": 0.0, + "learning_rate": 4.997529536010996e-06, + "loss": 1.2871, + "step": 23049 + }, + { + "epoch": 0.676786658053908, + "grad_norm": 0.0, + "learning_rate": 4.996706138481976e-06, + "loss": 1.2578, + "step": 23050 + }, + { + "epoch": 0.676816019731047, + "grad_norm": 0.0, + "learning_rate": 4.9958827861990256e-06, + "loss": 1.1621, + "step": 23051 + }, + { + "epoch": 0.6768453814081861, + "grad_norm": 0.0, + "learning_rate": 4.995059479169587e-06, + "loss": 1.2354, + "step": 23052 + }, + { + "epoch": 0.676874743085325, + "grad_norm": 0.0, + "learning_rate": 4.994236217401109e-06, + "loss": 1.1182, + "step": 23053 + }, + { + "epoch": 0.676904104762464, + "grad_norm": 0.0, + "learning_rate": 4.9934130009010305e-06, + "loss": 1.3311, + "step": 23054 + }, + { + "epoch": 0.6769334664396031, + "grad_norm": 0.0, + "learning_rate": 4.992589829676804e-06, + "loss": 1.1396, + "step": 23055 + }, + { + "epoch": 0.676962828116742, + "grad_norm": 0.0, + "learning_rate": 4.991766703735866e-06, + "loss": 1.293, + "step": 23056 + }, + { + "epoch": 0.676992189793881, + "grad_norm": 0.0, + "learning_rate": 4.990943623085669e-06, + "loss": 1.1714, + "step": 23057 + }, + { + "epoch": 0.67702155147102, + "grad_norm": 0.0, + "learning_rate": 4.990120587733651e-06, + "loss": 1.1953, + "step": 23058 + }, + { + "epoch": 0.677050913148159, + "grad_norm": 0.0, + "learning_rate": 4.989297597687256e-06, + "loss": 1.1602, + "step": 23059 + }, + { + "epoch": 0.677080274825298, + "grad_norm": 0.0, + "learning_rate": 4.9884746529539214e-06, + "loss": 1.1797, + "step": 23060 + }, + { + "epoch": 0.677109636502437, + "grad_norm": 0.0, + "learning_rate": 4.9876517535410985e-06, + "loss": 1.3135, + "step": 23061 + }, + { + "epoch": 0.677138998179576, + "grad_norm": 0.0, + "learning_rate": 4.986828899456221e-06, + "loss": 1.3379, + "step": 23062 + }, + { + "epoch": 0.677168359856715, + "grad_norm": 0.0, + "learning_rate": 4.986006090706736e-06, + "loss": 1.1963, + "step": 23063 + }, + { + "epoch": 0.677197721533854, + "grad_norm": 0.0, + "learning_rate": 4.985183327300084e-06, + "loss": 1.2236, + "step": 23064 + }, + { + "epoch": 0.677227083210993, + "grad_norm": 0.0, + "learning_rate": 4.984360609243698e-06, + "loss": 1.2842, + "step": 23065 + }, + { + "epoch": 0.677256444888132, + "grad_norm": 0.0, + "learning_rate": 4.983537936545029e-06, + "loss": 1.1646, + "step": 23066 + }, + { + "epoch": 0.677285806565271, + "grad_norm": 0.0, + "learning_rate": 4.98271530921151e-06, + "loss": 1.2822, + "step": 23067 + }, + { + "epoch": 0.67731516824241, + "grad_norm": 0.0, + "learning_rate": 4.981892727250581e-06, + "loss": 1.3008, + "step": 23068 + }, + { + "epoch": 0.677344529919549, + "grad_norm": 0.0, + "learning_rate": 4.981070190669678e-06, + "loss": 1.1616, + "step": 23069 + }, + { + "epoch": 0.677373891596688, + "grad_norm": 0.0, + "learning_rate": 4.980247699476247e-06, + "loss": 1.3105, + "step": 23070 + }, + { + "epoch": 0.677403253273827, + "grad_norm": 0.0, + "learning_rate": 4.979425253677716e-06, + "loss": 1.1543, + "step": 23071 + }, + { + "epoch": 0.677432614950966, + "grad_norm": 0.0, + "learning_rate": 4.978602853281534e-06, + "loss": 1.2993, + "step": 23072 + }, + { + "epoch": 0.677461976628105, + "grad_norm": 0.0, + "learning_rate": 4.977780498295132e-06, + "loss": 1.2891, + "step": 23073 + }, + { + "epoch": 0.677491338305244, + "grad_norm": 0.0, + "learning_rate": 4.976958188725943e-06, + "loss": 1.2002, + "step": 23074 + }, + { + "epoch": 0.677520699982383, + "grad_norm": 0.0, + "learning_rate": 4.976135924581411e-06, + "loss": 1.4268, + "step": 23075 + }, + { + "epoch": 0.677550061659522, + "grad_norm": 0.0, + "learning_rate": 4.975313705868969e-06, + "loss": 1.2251, + "step": 23076 + }, + { + "epoch": 0.677579423336661, + "grad_norm": 0.0, + "learning_rate": 4.974491532596053e-06, + "loss": 1.1738, + "step": 23077 + }, + { + "epoch": 0.6776087850138, + "grad_norm": 0.0, + "learning_rate": 4.973669404770092e-06, + "loss": 1.1885, + "step": 23078 + }, + { + "epoch": 0.677638146690939, + "grad_norm": 0.0, + "learning_rate": 4.972847322398529e-06, + "loss": 1.3052, + "step": 23079 + }, + { + "epoch": 0.677667508368078, + "grad_norm": 0.0, + "learning_rate": 4.972025285488793e-06, + "loss": 1.1875, + "step": 23080 + }, + { + "epoch": 0.677696870045217, + "grad_norm": 0.0, + "learning_rate": 4.971203294048323e-06, + "loss": 1.2261, + "step": 23081 + }, + { + "epoch": 0.677726231722356, + "grad_norm": 0.0, + "learning_rate": 4.9703813480845495e-06, + "loss": 1.3086, + "step": 23082 + }, + { + "epoch": 0.677755593399495, + "grad_norm": 0.0, + "learning_rate": 4.969559447604906e-06, + "loss": 1.1909, + "step": 23083 + }, + { + "epoch": 0.6777849550766339, + "grad_norm": 0.0, + "learning_rate": 4.968737592616823e-06, + "loss": 1.1655, + "step": 23084 + }, + { + "epoch": 0.677814316753773, + "grad_norm": 0.0, + "learning_rate": 4.967915783127732e-06, + "loss": 1.2017, + "step": 23085 + }, + { + "epoch": 0.677843678430912, + "grad_norm": 0.0, + "learning_rate": 4.967094019145072e-06, + "loss": 1.0859, + "step": 23086 + }, + { + "epoch": 0.6778730401080509, + "grad_norm": 0.0, + "learning_rate": 4.9662723006762644e-06, + "loss": 1.1216, + "step": 23087 + }, + { + "epoch": 0.67790240178519, + "grad_norm": 0.0, + "learning_rate": 4.965450627728751e-06, + "loss": 1.1968, + "step": 23088 + }, + { + "epoch": 0.677931763462329, + "grad_norm": 0.0, + "learning_rate": 4.964629000309951e-06, + "loss": 1.2549, + "step": 23089 + }, + { + "epoch": 0.6779611251394679, + "grad_norm": 0.0, + "learning_rate": 4.9638074184273055e-06, + "loss": 1.3447, + "step": 23090 + }, + { + "epoch": 0.677990486816607, + "grad_norm": 0.0, + "learning_rate": 4.96298588208824e-06, + "loss": 1.1572, + "step": 23091 + }, + { + "epoch": 0.678019848493746, + "grad_norm": 0.0, + "learning_rate": 4.962164391300182e-06, + "loss": 1.3193, + "step": 23092 + }, + { + "epoch": 0.6780492101708849, + "grad_norm": 0.0, + "learning_rate": 4.961342946070562e-06, + "loss": 1.3545, + "step": 23093 + }, + { + "epoch": 0.678078571848024, + "grad_norm": 0.0, + "learning_rate": 4.960521546406805e-06, + "loss": 1.2197, + "step": 23094 + }, + { + "epoch": 0.678107933525163, + "grad_norm": 0.0, + "learning_rate": 4.959700192316345e-06, + "loss": 1.2803, + "step": 23095 + }, + { + "epoch": 0.6781372952023019, + "grad_norm": 0.0, + "learning_rate": 4.958878883806604e-06, + "loss": 1.2441, + "step": 23096 + }, + { + "epoch": 0.678166656879441, + "grad_norm": 0.0, + "learning_rate": 4.958057620885015e-06, + "loss": 1.291, + "step": 23097 + }, + { + "epoch": 0.67819601855658, + "grad_norm": 0.0, + "learning_rate": 4.957236403558999e-06, + "loss": 1.3672, + "step": 23098 + }, + { + "epoch": 0.6782253802337189, + "grad_norm": 0.0, + "learning_rate": 4.9564152318359906e-06, + "loss": 1.3086, + "step": 23099 + }, + { + "epoch": 0.678254741910858, + "grad_norm": 0.0, + "learning_rate": 4.95559410572341e-06, + "loss": 1.1953, + "step": 23100 + }, + { + "epoch": 0.678284103587997, + "grad_norm": 0.0, + "learning_rate": 4.954773025228684e-06, + "loss": 1.2686, + "step": 23101 + }, + { + "epoch": 0.6783134652651359, + "grad_norm": 0.0, + "learning_rate": 4.953951990359237e-06, + "loss": 1.0518, + "step": 23102 + }, + { + "epoch": 0.678342826942275, + "grad_norm": 0.0, + "learning_rate": 4.953131001122491e-06, + "loss": 1.1338, + "step": 23103 + }, + { + "epoch": 0.678372188619414, + "grad_norm": 0.0, + "learning_rate": 4.952310057525877e-06, + "loss": 1.2373, + "step": 23104 + }, + { + "epoch": 0.6784015502965529, + "grad_norm": 0.0, + "learning_rate": 4.9514891595768135e-06, + "loss": 1.1738, + "step": 23105 + }, + { + "epoch": 0.678430911973692, + "grad_norm": 0.0, + "learning_rate": 4.95066830728273e-06, + "loss": 1.2158, + "step": 23106 + }, + { + "epoch": 0.678460273650831, + "grad_norm": 0.0, + "learning_rate": 4.949847500651046e-06, + "loss": 1.3262, + "step": 23107 + }, + { + "epoch": 0.6784896353279699, + "grad_norm": 0.0, + "learning_rate": 4.949026739689183e-06, + "loss": 1.3594, + "step": 23108 + }, + { + "epoch": 0.678518997005109, + "grad_norm": 0.0, + "learning_rate": 4.9482060244045626e-06, + "loss": 1.1797, + "step": 23109 + }, + { + "epoch": 0.6785483586822479, + "grad_norm": 0.0, + "learning_rate": 4.947385354804612e-06, + "loss": 1.2305, + "step": 23110 + }, + { + "epoch": 0.6785777203593869, + "grad_norm": 0.0, + "learning_rate": 4.946564730896747e-06, + "loss": 1.2607, + "step": 23111 + }, + { + "epoch": 0.678607082036526, + "grad_norm": 0.0, + "learning_rate": 4.945744152688394e-06, + "loss": 1.1445, + "step": 23112 + }, + { + "epoch": 0.6786364437136649, + "grad_norm": 0.0, + "learning_rate": 4.944923620186971e-06, + "loss": 1.1743, + "step": 23113 + }, + { + "epoch": 0.6786658053908039, + "grad_norm": 0.0, + "learning_rate": 4.9441031333998945e-06, + "loss": 1.248, + "step": 23114 + }, + { + "epoch": 0.678695167067943, + "grad_norm": 0.0, + "learning_rate": 4.9432826923345925e-06, + "loss": 1.3154, + "step": 23115 + }, + { + "epoch": 0.6787245287450819, + "grad_norm": 0.0, + "learning_rate": 4.94246229699848e-06, + "loss": 1.3506, + "step": 23116 + }, + { + "epoch": 0.6787538904222209, + "grad_norm": 0.0, + "learning_rate": 4.941641947398976e-06, + "loss": 1.1826, + "step": 23117 + }, + { + "epoch": 0.67878325209936, + "grad_norm": 0.0, + "learning_rate": 4.940821643543496e-06, + "loss": 1.1816, + "step": 23118 + }, + { + "epoch": 0.6788126137764989, + "grad_norm": 0.0, + "learning_rate": 4.940001385439466e-06, + "loss": 1.3135, + "step": 23119 + }, + { + "epoch": 0.6788419754536379, + "grad_norm": 0.0, + "learning_rate": 4.939181173094294e-06, + "loss": 1.3281, + "step": 23120 + }, + { + "epoch": 0.678871337130777, + "grad_norm": 0.0, + "learning_rate": 4.938361006515406e-06, + "loss": 1.3008, + "step": 23121 + }, + { + "epoch": 0.6789006988079159, + "grad_norm": 0.0, + "learning_rate": 4.937540885710218e-06, + "loss": 1.2197, + "step": 23122 + }, + { + "epoch": 0.6789300604850549, + "grad_norm": 0.0, + "learning_rate": 4.936720810686139e-06, + "loss": 1.1626, + "step": 23123 + }, + { + "epoch": 0.678959422162194, + "grad_norm": 0.0, + "learning_rate": 4.935900781450594e-06, + "loss": 1.1895, + "step": 23124 + }, + { + "epoch": 0.6789887838393329, + "grad_norm": 0.0, + "learning_rate": 4.935080798010996e-06, + "loss": 1.2539, + "step": 23125 + }, + { + "epoch": 0.6790181455164719, + "grad_norm": 0.0, + "learning_rate": 4.93426086037476e-06, + "loss": 1.1763, + "step": 23126 + }, + { + "epoch": 0.6790475071936108, + "grad_norm": 0.0, + "learning_rate": 4.933440968549295e-06, + "loss": 1.2988, + "step": 23127 + }, + { + "epoch": 0.6790768688707499, + "grad_norm": 0.0, + "learning_rate": 4.932621122542026e-06, + "loss": 1.2646, + "step": 23128 + }, + { + "epoch": 0.6791062305478889, + "grad_norm": 0.0, + "learning_rate": 4.931801322360358e-06, + "loss": 1.1543, + "step": 23129 + }, + { + "epoch": 0.6791355922250278, + "grad_norm": 0.0, + "learning_rate": 4.930981568011713e-06, + "loss": 1.2705, + "step": 23130 + }, + { + "epoch": 0.6791649539021669, + "grad_norm": 0.0, + "learning_rate": 4.930161859503494e-06, + "loss": 1.2217, + "step": 23131 + }, + { + "epoch": 0.6791943155793059, + "grad_norm": 0.0, + "learning_rate": 4.929342196843129e-06, + "loss": 1.2627, + "step": 23132 + }, + { + "epoch": 0.6792236772564448, + "grad_norm": 0.0, + "learning_rate": 4.928522580038016e-06, + "loss": 1.2324, + "step": 23133 + }, + { + "epoch": 0.6792530389335839, + "grad_norm": 0.0, + "learning_rate": 4.92770300909557e-06, + "loss": 1.2695, + "step": 23134 + }, + { + "epoch": 0.6792824006107229, + "grad_norm": 0.0, + "learning_rate": 4.926883484023207e-06, + "loss": 1.2036, + "step": 23135 + }, + { + "epoch": 0.6793117622878618, + "grad_norm": 0.0, + "learning_rate": 4.9260640048283334e-06, + "loss": 1.1245, + "step": 23136 + }, + { + "epoch": 0.6793411239650009, + "grad_norm": 0.0, + "learning_rate": 4.925244571518365e-06, + "loss": 1.2842, + "step": 23137 + }, + { + "epoch": 0.6793704856421399, + "grad_norm": 0.0, + "learning_rate": 4.924425184100708e-06, + "loss": 1.3994, + "step": 23138 + }, + { + "epoch": 0.6793998473192788, + "grad_norm": 0.0, + "learning_rate": 4.923605842582775e-06, + "loss": 1.228, + "step": 23139 + }, + { + "epoch": 0.6794292089964179, + "grad_norm": 0.0, + "learning_rate": 4.922786546971976e-06, + "loss": 1.2158, + "step": 23140 + }, + { + "epoch": 0.6794585706735569, + "grad_norm": 0.0, + "learning_rate": 4.9219672972757184e-06, + "loss": 1.3945, + "step": 23141 + }, + { + "epoch": 0.6794879323506958, + "grad_norm": 0.0, + "learning_rate": 4.92114809350141e-06, + "loss": 1.1982, + "step": 23142 + }, + { + "epoch": 0.6795172940278349, + "grad_norm": 0.0, + "learning_rate": 4.920328935656458e-06, + "loss": 1.2476, + "step": 23143 + }, + { + "epoch": 0.6795466557049739, + "grad_norm": 0.0, + "learning_rate": 4.9195098237482755e-06, + "loss": 1.2256, + "step": 23144 + }, + { + "epoch": 0.6795760173821128, + "grad_norm": 0.0, + "learning_rate": 4.918690757784262e-06, + "loss": 1.2378, + "step": 23145 + }, + { + "epoch": 0.6796053790592519, + "grad_norm": 0.0, + "learning_rate": 4.9178717377718345e-06, + "loss": 1.2563, + "step": 23146 + }, + { + "epoch": 0.6796347407363909, + "grad_norm": 0.0, + "learning_rate": 4.9170527637183896e-06, + "loss": 1.2427, + "step": 23147 + }, + { + "epoch": 0.6796641024135298, + "grad_norm": 0.0, + "learning_rate": 4.9162338356313424e-06, + "loss": 1.3145, + "step": 23148 + }, + { + "epoch": 0.6796934640906689, + "grad_norm": 0.0, + "learning_rate": 4.915414953518095e-06, + "loss": 1.2832, + "step": 23149 + }, + { + "epoch": 0.6797228257678078, + "grad_norm": 0.0, + "learning_rate": 4.914596117386053e-06, + "loss": 1.2256, + "step": 23150 + }, + { + "epoch": 0.6797521874449468, + "grad_norm": 0.0, + "learning_rate": 4.913777327242615e-06, + "loss": 1.3008, + "step": 23151 + }, + { + "epoch": 0.6797815491220859, + "grad_norm": 0.0, + "learning_rate": 4.912958583095197e-06, + "loss": 1.2612, + "step": 23152 + }, + { + "epoch": 0.6798109107992248, + "grad_norm": 0.0, + "learning_rate": 4.912139884951196e-06, + "loss": 1.1865, + "step": 23153 + }, + { + "epoch": 0.6798402724763638, + "grad_norm": 0.0, + "learning_rate": 4.9113212328180126e-06, + "loss": 1.2583, + "step": 23154 + }, + { + "epoch": 0.6798696341535029, + "grad_norm": 0.0, + "learning_rate": 4.91050262670306e-06, + "loss": 1.3496, + "step": 23155 + }, + { + "epoch": 0.6798989958306418, + "grad_norm": 0.0, + "learning_rate": 4.9096840666137315e-06, + "loss": 1.2422, + "step": 23156 + }, + { + "epoch": 0.6799283575077808, + "grad_norm": 0.0, + "learning_rate": 4.90886555255744e-06, + "loss": 1.3213, + "step": 23157 + }, + { + "epoch": 0.6799577191849199, + "grad_norm": 0.0, + "learning_rate": 4.9080470845415755e-06, + "loss": 1.2402, + "step": 23158 + }, + { + "epoch": 0.6799870808620588, + "grad_norm": 0.0, + "learning_rate": 4.907228662573547e-06, + "loss": 1.2109, + "step": 23159 + }, + { + "epoch": 0.6800164425391978, + "grad_norm": 0.0, + "learning_rate": 4.906410286660751e-06, + "loss": 1.1919, + "step": 23160 + }, + { + "epoch": 0.6800458042163369, + "grad_norm": 0.0, + "learning_rate": 4.905591956810596e-06, + "loss": 1.2373, + "step": 23161 + }, + { + "epoch": 0.6800751658934758, + "grad_norm": 0.0, + "learning_rate": 4.904773673030476e-06, + "loss": 1.1426, + "step": 23162 + }, + { + "epoch": 0.6801045275706148, + "grad_norm": 0.0, + "learning_rate": 4.903955435327791e-06, + "loss": 1.2725, + "step": 23163 + }, + { + "epoch": 0.6801338892477539, + "grad_norm": 0.0, + "learning_rate": 4.9031372437099434e-06, + "loss": 1.1558, + "step": 23164 + }, + { + "epoch": 0.6801632509248928, + "grad_norm": 0.0, + "learning_rate": 4.902319098184333e-06, + "loss": 1.1826, + "step": 23165 + }, + { + "epoch": 0.6801926126020318, + "grad_norm": 0.0, + "learning_rate": 4.901500998758355e-06, + "loss": 1.2539, + "step": 23166 + }, + { + "epoch": 0.6802219742791709, + "grad_norm": 0.0, + "learning_rate": 4.900682945439407e-06, + "loss": 1.3115, + "step": 23167 + }, + { + "epoch": 0.6802513359563098, + "grad_norm": 0.0, + "learning_rate": 4.8998649382348915e-06, + "loss": 1.2949, + "step": 23168 + }, + { + "epoch": 0.6802806976334488, + "grad_norm": 0.0, + "learning_rate": 4.899046977152201e-06, + "loss": 1.3613, + "step": 23169 + }, + { + "epoch": 0.6803100593105879, + "grad_norm": 0.0, + "learning_rate": 4.898229062198737e-06, + "loss": 1.3145, + "step": 23170 + }, + { + "epoch": 0.6803394209877268, + "grad_norm": 0.0, + "learning_rate": 4.897411193381896e-06, + "loss": 1.3047, + "step": 23171 + }, + { + "epoch": 0.6803687826648658, + "grad_norm": 0.0, + "learning_rate": 4.8965933707090675e-06, + "loss": 1.332, + "step": 23172 + }, + { + "epoch": 0.6803981443420049, + "grad_norm": 0.0, + "learning_rate": 4.895775594187658e-06, + "loss": 1.1943, + "step": 23173 + }, + { + "epoch": 0.6804275060191438, + "grad_norm": 0.0, + "learning_rate": 4.894957863825054e-06, + "loss": 1.2051, + "step": 23174 + }, + { + "epoch": 0.6804568676962828, + "grad_norm": 0.0, + "learning_rate": 4.894140179628655e-06, + "loss": 1.2017, + "step": 23175 + }, + { + "epoch": 0.6804862293734218, + "grad_norm": 0.0, + "learning_rate": 4.893322541605849e-06, + "loss": 1.1475, + "step": 23176 + }, + { + "epoch": 0.6805155910505608, + "grad_norm": 0.0, + "learning_rate": 4.892504949764041e-06, + "loss": 1.292, + "step": 23177 + }, + { + "epoch": 0.6805449527276998, + "grad_norm": 0.0, + "learning_rate": 4.891687404110613e-06, + "loss": 1.2349, + "step": 23178 + }, + { + "epoch": 0.6805743144048388, + "grad_norm": 0.0, + "learning_rate": 4.890869904652969e-06, + "loss": 1.1318, + "step": 23179 + }, + { + "epoch": 0.6806036760819778, + "grad_norm": 0.0, + "learning_rate": 4.890052451398494e-06, + "loss": 1.2666, + "step": 23180 + }, + { + "epoch": 0.6806330377591168, + "grad_norm": 0.0, + "learning_rate": 4.889235044354585e-06, + "loss": 1.2305, + "step": 23181 + }, + { + "epoch": 0.6806623994362558, + "grad_norm": 0.0, + "learning_rate": 4.888417683528638e-06, + "loss": 1.2822, + "step": 23182 + }, + { + "epoch": 0.6806917611133948, + "grad_norm": 0.0, + "learning_rate": 4.8876003689280305e-06, + "loss": 1.208, + "step": 23183 + }, + { + "epoch": 0.6807211227905338, + "grad_norm": 0.0, + "learning_rate": 4.886783100560165e-06, + "loss": 1.4355, + "step": 23184 + }, + { + "epoch": 0.6807504844676728, + "grad_norm": 0.0, + "learning_rate": 4.885965878432427e-06, + "loss": 1.209, + "step": 23185 + }, + { + "epoch": 0.6807798461448118, + "grad_norm": 0.0, + "learning_rate": 4.885148702552213e-06, + "loss": 1.2085, + "step": 23186 + }, + { + "epoch": 0.6808092078219508, + "grad_norm": 0.0, + "learning_rate": 4.8843315729269045e-06, + "loss": 1.0928, + "step": 23187 + }, + { + "epoch": 0.6808385694990898, + "grad_norm": 0.0, + "learning_rate": 4.883514489563901e-06, + "loss": 1.2432, + "step": 23188 + }, + { + "epoch": 0.6808679311762288, + "grad_norm": 0.0, + "learning_rate": 4.882697452470585e-06, + "loss": 1.1992, + "step": 23189 + }, + { + "epoch": 0.6808972928533678, + "grad_norm": 0.0, + "learning_rate": 4.881880461654348e-06, + "loss": 1.2158, + "step": 23190 + }, + { + "epoch": 0.6809266545305068, + "grad_norm": 0.0, + "learning_rate": 4.881063517122575e-06, + "loss": 1.4043, + "step": 23191 + }, + { + "epoch": 0.6809560162076458, + "grad_norm": 0.0, + "learning_rate": 4.880246618882652e-06, + "loss": 1.3066, + "step": 23192 + }, + { + "epoch": 0.6809853778847847, + "grad_norm": 0.0, + "learning_rate": 4.879429766941975e-06, + "loss": 1.2139, + "step": 23193 + }, + { + "epoch": 0.6810147395619238, + "grad_norm": 0.0, + "learning_rate": 4.878612961307921e-06, + "loss": 1.3076, + "step": 23194 + }, + { + "epoch": 0.6810441012390628, + "grad_norm": 0.0, + "learning_rate": 4.877796201987886e-06, + "loss": 1.2246, + "step": 23195 + }, + { + "epoch": 0.6810734629162017, + "grad_norm": 0.0, + "learning_rate": 4.876979488989248e-06, + "loss": 1.2451, + "step": 23196 + }, + { + "epoch": 0.6811028245933408, + "grad_norm": 0.0, + "learning_rate": 4.876162822319401e-06, + "loss": 1.252, + "step": 23197 + }, + { + "epoch": 0.6811321862704798, + "grad_norm": 0.0, + "learning_rate": 4.875346201985725e-06, + "loss": 1.2383, + "step": 23198 + }, + { + "epoch": 0.6811615479476187, + "grad_norm": 0.0, + "learning_rate": 4.8745296279956064e-06, + "loss": 1.2192, + "step": 23199 + }, + { + "epoch": 0.6811909096247578, + "grad_norm": 0.0, + "learning_rate": 4.873713100356424e-06, + "loss": 1.2588, + "step": 23200 + }, + { + "epoch": 0.6812202713018968, + "grad_norm": 0.0, + "learning_rate": 4.872896619075572e-06, + "loss": 1.1738, + "step": 23201 + }, + { + "epoch": 0.6812496329790357, + "grad_norm": 0.0, + "learning_rate": 4.8720801841604285e-06, + "loss": 1.1577, + "step": 23202 + }, + { + "epoch": 0.6812789946561748, + "grad_norm": 0.0, + "learning_rate": 4.8712637956183725e-06, + "loss": 1.2476, + "step": 23203 + }, + { + "epoch": 0.6813083563333138, + "grad_norm": 0.0, + "learning_rate": 4.870447453456796e-06, + "loss": 1.2627, + "step": 23204 + }, + { + "epoch": 0.6813377180104527, + "grad_norm": 0.0, + "learning_rate": 4.869631157683074e-06, + "loss": 1.2676, + "step": 23205 + }, + { + "epoch": 0.6813670796875918, + "grad_norm": 0.0, + "learning_rate": 4.868814908304593e-06, + "loss": 1.3018, + "step": 23206 + }, + { + "epoch": 0.6813964413647308, + "grad_norm": 0.0, + "learning_rate": 4.867998705328735e-06, + "loss": 1.2812, + "step": 23207 + }, + { + "epoch": 0.6814258030418697, + "grad_norm": 0.0, + "learning_rate": 4.867182548762878e-06, + "loss": 1.2666, + "step": 23208 + }, + { + "epoch": 0.6814551647190088, + "grad_norm": 0.0, + "learning_rate": 4.8663664386144e-06, + "loss": 1.2441, + "step": 23209 + }, + { + "epoch": 0.6814845263961478, + "grad_norm": 0.0, + "learning_rate": 4.865550374890688e-06, + "loss": 1.2744, + "step": 23210 + }, + { + "epoch": 0.6815138880732867, + "grad_norm": 0.0, + "learning_rate": 4.86473435759912e-06, + "loss": 1.2402, + "step": 23211 + }, + { + "epoch": 0.6815432497504258, + "grad_norm": 0.0, + "learning_rate": 4.863918386747071e-06, + "loss": 1.1758, + "step": 23212 + }, + { + "epoch": 0.6815726114275648, + "grad_norm": 0.0, + "learning_rate": 4.863102462341927e-06, + "loss": 1.2676, + "step": 23213 + }, + { + "epoch": 0.6816019731047037, + "grad_norm": 0.0, + "learning_rate": 4.862286584391063e-06, + "loss": 1.2021, + "step": 23214 + }, + { + "epoch": 0.6816313347818428, + "grad_norm": 0.0, + "learning_rate": 4.861470752901856e-06, + "loss": 1.1919, + "step": 23215 + }, + { + "epoch": 0.6816606964589818, + "grad_norm": 0.0, + "learning_rate": 4.860654967881681e-06, + "loss": 1.2646, + "step": 23216 + }, + { + "epoch": 0.6816900581361207, + "grad_norm": 0.0, + "learning_rate": 4.859839229337925e-06, + "loss": 1.0742, + "step": 23217 + }, + { + "epoch": 0.6817194198132598, + "grad_norm": 0.0, + "learning_rate": 4.859023537277955e-06, + "loss": 1.3047, + "step": 23218 + }, + { + "epoch": 0.6817487814903987, + "grad_norm": 0.0, + "learning_rate": 4.858207891709155e-06, + "loss": 1.3164, + "step": 23219 + }, + { + "epoch": 0.6817781431675377, + "grad_norm": 0.0, + "learning_rate": 4.857392292638898e-06, + "loss": 1.3301, + "step": 23220 + }, + { + "epoch": 0.6818075048446768, + "grad_norm": 0.0, + "learning_rate": 4.856576740074556e-06, + "loss": 1.3457, + "step": 23221 + }, + { + "epoch": 0.6818368665218157, + "grad_norm": 0.0, + "learning_rate": 4.855761234023512e-06, + "loss": 1.1738, + "step": 23222 + }, + { + "epoch": 0.6818662281989547, + "grad_norm": 0.0, + "learning_rate": 4.854945774493135e-06, + "loss": 1.1787, + "step": 23223 + }, + { + "epoch": 0.6818955898760938, + "grad_norm": 0.0, + "learning_rate": 4.8541303614908015e-06, + "loss": 1.3477, + "step": 23224 + }, + { + "epoch": 0.6819249515532327, + "grad_norm": 0.0, + "learning_rate": 4.853314995023882e-06, + "loss": 1.2637, + "step": 23225 + }, + { + "epoch": 0.6819543132303717, + "grad_norm": 0.0, + "learning_rate": 4.852499675099756e-06, + "loss": 1.2295, + "step": 23226 + }, + { + "epoch": 0.6819836749075107, + "grad_norm": 0.0, + "learning_rate": 4.851684401725791e-06, + "loss": 1.3154, + "step": 23227 + }, + { + "epoch": 0.6820130365846497, + "grad_norm": 0.0, + "learning_rate": 4.850869174909366e-06, + "loss": 1.1104, + "step": 23228 + }, + { + "epoch": 0.6820423982617887, + "grad_norm": 0.0, + "learning_rate": 4.850053994657847e-06, + "loss": 1.1846, + "step": 23229 + }, + { + "epoch": 0.6820717599389277, + "grad_norm": 0.0, + "learning_rate": 4.84923886097861e-06, + "loss": 1.3164, + "step": 23230 + }, + { + "epoch": 0.6821011216160667, + "grad_norm": 0.0, + "learning_rate": 4.848423773879026e-06, + "loss": 1.3438, + "step": 23231 + }, + { + "epoch": 0.6821304832932057, + "grad_norm": 0.0, + "learning_rate": 4.8476087333664665e-06, + "loss": 1.3154, + "step": 23232 + }, + { + "epoch": 0.6821598449703447, + "grad_norm": 0.0, + "learning_rate": 4.8467937394483e-06, + "loss": 1.1514, + "step": 23233 + }, + { + "epoch": 0.6821892066474837, + "grad_norm": 0.0, + "learning_rate": 4.845978792131892e-06, + "loss": 1.2891, + "step": 23234 + }, + { + "epoch": 0.6822185683246227, + "grad_norm": 0.0, + "learning_rate": 4.845163891424624e-06, + "loss": 1.1772, + "step": 23235 + }, + { + "epoch": 0.6822479300017616, + "grad_norm": 0.0, + "learning_rate": 4.844349037333854e-06, + "loss": 1.2666, + "step": 23236 + }, + { + "epoch": 0.6822772916789007, + "grad_norm": 0.0, + "learning_rate": 4.843534229866961e-06, + "loss": 1.1426, + "step": 23237 + }, + { + "epoch": 0.6823066533560397, + "grad_norm": 0.0, + "learning_rate": 4.842719469031308e-06, + "loss": 1.3086, + "step": 23238 + }, + { + "epoch": 0.6823360150331786, + "grad_norm": 0.0, + "learning_rate": 4.8419047548342625e-06, + "loss": 1.0776, + "step": 23239 + }, + { + "epoch": 0.6823653767103177, + "grad_norm": 0.0, + "learning_rate": 4.841090087283193e-06, + "loss": 1.2568, + "step": 23240 + }, + { + "epoch": 0.6823947383874567, + "grad_norm": 0.0, + "learning_rate": 4.840275466385466e-06, + "loss": 1.2344, + "step": 23241 + }, + { + "epoch": 0.6824241000645956, + "grad_norm": 0.0, + "learning_rate": 4.839460892148451e-06, + "loss": 1.2266, + "step": 23242 + }, + { + "epoch": 0.6824534617417347, + "grad_norm": 0.0, + "learning_rate": 4.838646364579509e-06, + "loss": 1.1963, + "step": 23243 + }, + { + "epoch": 0.6824828234188737, + "grad_norm": 0.0, + "learning_rate": 4.837831883686014e-06, + "loss": 1.1611, + "step": 23244 + }, + { + "epoch": 0.6825121850960126, + "grad_norm": 0.0, + "learning_rate": 4.837017449475324e-06, + "loss": 1.1904, + "step": 23245 + }, + { + "epoch": 0.6825415467731517, + "grad_norm": 0.0, + "learning_rate": 4.836203061954811e-06, + "loss": 1.2949, + "step": 23246 + }, + { + "epoch": 0.6825709084502907, + "grad_norm": 0.0, + "learning_rate": 4.835388721131836e-06, + "loss": 1.3525, + "step": 23247 + }, + { + "epoch": 0.6826002701274296, + "grad_norm": 0.0, + "learning_rate": 4.834574427013764e-06, + "loss": 1.1943, + "step": 23248 + }, + { + "epoch": 0.6826296318045687, + "grad_norm": 0.0, + "learning_rate": 4.8337601796079535e-06, + "loss": 1.25, + "step": 23249 + }, + { + "epoch": 0.6826589934817077, + "grad_norm": 0.0, + "learning_rate": 4.832945978921777e-06, + "loss": 1.1797, + "step": 23250 + }, + { + "epoch": 0.6826883551588466, + "grad_norm": 0.0, + "learning_rate": 4.832131824962594e-06, + "loss": 1.1758, + "step": 23251 + }, + { + "epoch": 0.6827177168359857, + "grad_norm": 0.0, + "learning_rate": 4.831317717737763e-06, + "loss": 1.208, + "step": 23252 + }, + { + "epoch": 0.6827470785131247, + "grad_norm": 0.0, + "learning_rate": 4.830503657254652e-06, + "loss": 1.1689, + "step": 23253 + }, + { + "epoch": 0.6827764401902636, + "grad_norm": 0.0, + "learning_rate": 4.8296896435206164e-06, + "loss": 1.1865, + "step": 23254 + }, + { + "epoch": 0.6828058018674027, + "grad_norm": 0.0, + "learning_rate": 4.828875676543026e-06, + "loss": 1.228, + "step": 23255 + }, + { + "epoch": 0.6828351635445417, + "grad_norm": 0.0, + "learning_rate": 4.828061756329238e-06, + "loss": 1.229, + "step": 23256 + }, + { + "epoch": 0.6828645252216806, + "grad_norm": 0.0, + "learning_rate": 4.8272478828866105e-06, + "loss": 1.2949, + "step": 23257 + }, + { + "epoch": 0.6828938868988197, + "grad_norm": 0.0, + "learning_rate": 4.826434056222502e-06, + "loss": 1.1436, + "step": 23258 + }, + { + "epoch": 0.6829232485759587, + "grad_norm": 0.0, + "learning_rate": 4.825620276344279e-06, + "loss": 1.2959, + "step": 23259 + }, + { + "epoch": 0.6829526102530976, + "grad_norm": 0.0, + "learning_rate": 4.824806543259298e-06, + "loss": 1.2207, + "step": 23260 + }, + { + "epoch": 0.6829819719302367, + "grad_norm": 0.0, + "learning_rate": 4.8239928569749114e-06, + "loss": 1.1978, + "step": 23261 + }, + { + "epoch": 0.6830113336073756, + "grad_norm": 0.0, + "learning_rate": 4.823179217498488e-06, + "loss": 1.2173, + "step": 23262 + }, + { + "epoch": 0.6830406952845146, + "grad_norm": 0.0, + "learning_rate": 4.822365624837381e-06, + "loss": 1.2339, + "step": 23263 + }, + { + "epoch": 0.6830700569616537, + "grad_norm": 0.0, + "learning_rate": 4.821552078998946e-06, + "loss": 1.333, + "step": 23264 + }, + { + "epoch": 0.6830994186387926, + "grad_norm": 0.0, + "learning_rate": 4.8207385799905395e-06, + "loss": 1.1216, + "step": 23265 + }, + { + "epoch": 0.6831287803159316, + "grad_norm": 0.0, + "learning_rate": 4.8199251278195234e-06, + "loss": 1.3071, + "step": 23266 + }, + { + "epoch": 0.6831581419930707, + "grad_norm": 0.0, + "learning_rate": 4.819111722493249e-06, + "loss": 1.209, + "step": 23267 + }, + { + "epoch": 0.6831875036702096, + "grad_norm": 0.0, + "learning_rate": 4.818298364019075e-06, + "loss": 1.252, + "step": 23268 + }, + { + "epoch": 0.6832168653473486, + "grad_norm": 0.0, + "learning_rate": 4.817485052404354e-06, + "loss": 1.1953, + "step": 23269 + }, + { + "epoch": 0.6832462270244877, + "grad_norm": 0.0, + "learning_rate": 4.8166717876564465e-06, + "loss": 1.2578, + "step": 23270 + }, + { + "epoch": 0.6832755887016266, + "grad_norm": 0.0, + "learning_rate": 4.815858569782703e-06, + "loss": 1.228, + "step": 23271 + }, + { + "epoch": 0.6833049503787656, + "grad_norm": 0.0, + "learning_rate": 4.8150453987904785e-06, + "loss": 1.3926, + "step": 23272 + }, + { + "epoch": 0.6833343120559047, + "grad_norm": 0.0, + "learning_rate": 4.814232274687126e-06, + "loss": 1.3643, + "step": 23273 + }, + { + "epoch": 0.6833636737330436, + "grad_norm": 0.0, + "learning_rate": 4.813419197479996e-06, + "loss": 1.2559, + "step": 23274 + }, + { + "epoch": 0.6833930354101826, + "grad_norm": 0.0, + "learning_rate": 4.812606167176448e-06, + "loss": 1.0796, + "step": 23275 + }, + { + "epoch": 0.6834223970873217, + "grad_norm": 0.0, + "learning_rate": 4.811793183783827e-06, + "loss": 1.2529, + "step": 23276 + }, + { + "epoch": 0.6834517587644606, + "grad_norm": 0.0, + "learning_rate": 4.810980247309493e-06, + "loss": 1.2959, + "step": 23277 + }, + { + "epoch": 0.6834811204415996, + "grad_norm": 0.0, + "learning_rate": 4.81016735776079e-06, + "loss": 1.2383, + "step": 23278 + }, + { + "epoch": 0.6835104821187387, + "grad_norm": 0.0, + "learning_rate": 4.809354515145076e-06, + "loss": 1.2383, + "step": 23279 + }, + { + "epoch": 0.6835398437958776, + "grad_norm": 0.0, + "learning_rate": 4.808541719469699e-06, + "loss": 1.1514, + "step": 23280 + }, + { + "epoch": 0.6835692054730166, + "grad_norm": 0.0, + "learning_rate": 4.807728970742008e-06, + "loss": 1.2734, + "step": 23281 + }, + { + "epoch": 0.6835985671501557, + "grad_norm": 0.0, + "learning_rate": 4.806916268969354e-06, + "loss": 1.3418, + "step": 23282 + }, + { + "epoch": 0.6836279288272946, + "grad_norm": 0.0, + "learning_rate": 4.806103614159082e-06, + "loss": 1.1348, + "step": 23283 + }, + { + "epoch": 0.6836572905044336, + "grad_norm": 0.0, + "learning_rate": 4.80529100631855e-06, + "loss": 1.3828, + "step": 23284 + }, + { + "epoch": 0.6836866521815727, + "grad_norm": 0.0, + "learning_rate": 4.804478445455097e-06, + "loss": 1.2803, + "step": 23285 + }, + { + "epoch": 0.6837160138587116, + "grad_norm": 0.0, + "learning_rate": 4.80366593157608e-06, + "loss": 1.0132, + "step": 23286 + }, + { + "epoch": 0.6837453755358506, + "grad_norm": 0.0, + "learning_rate": 4.802853464688844e-06, + "loss": 1.208, + "step": 23287 + }, + { + "epoch": 0.6837747372129896, + "grad_norm": 0.0, + "learning_rate": 4.8020410448007334e-06, + "loss": 1.2344, + "step": 23288 + }, + { + "epoch": 0.6838040988901286, + "grad_norm": 0.0, + "learning_rate": 4.801228671919098e-06, + "loss": 1.1963, + "step": 23289 + }, + { + "epoch": 0.6838334605672676, + "grad_norm": 0.0, + "learning_rate": 4.800416346051279e-06, + "loss": 1.2803, + "step": 23290 + }, + { + "epoch": 0.6838628222444066, + "grad_norm": 0.0, + "learning_rate": 4.799604067204631e-06, + "loss": 1.2256, + "step": 23291 + }, + { + "epoch": 0.6838921839215456, + "grad_norm": 0.0, + "learning_rate": 4.798791835386491e-06, + "loss": 1.2812, + "step": 23292 + }, + { + "epoch": 0.6839215455986846, + "grad_norm": 0.0, + "learning_rate": 4.797979650604212e-06, + "loss": 1.416, + "step": 23293 + }, + { + "epoch": 0.6839509072758236, + "grad_norm": 0.0, + "learning_rate": 4.797167512865131e-06, + "loss": 1.2012, + "step": 23294 + }, + { + "epoch": 0.6839802689529626, + "grad_norm": 0.0, + "learning_rate": 4.7963554221766e-06, + "loss": 1.1938, + "step": 23295 + }, + { + "epoch": 0.6840096306301016, + "grad_norm": 0.0, + "learning_rate": 4.7955433785459615e-06, + "loss": 1.1831, + "step": 23296 + }, + { + "epoch": 0.6840389923072406, + "grad_norm": 0.0, + "learning_rate": 4.794731381980556e-06, + "loss": 1.1392, + "step": 23297 + }, + { + "epoch": 0.6840683539843796, + "grad_norm": 0.0, + "learning_rate": 4.793919432487725e-06, + "loss": 1.2476, + "step": 23298 + }, + { + "epoch": 0.6840977156615186, + "grad_norm": 0.0, + "learning_rate": 4.793107530074817e-06, + "loss": 1.2705, + "step": 23299 + }, + { + "epoch": 0.6841270773386576, + "grad_norm": 0.0, + "learning_rate": 4.79229567474917e-06, + "loss": 1.2651, + "step": 23300 + }, + { + "epoch": 0.6841564390157966, + "grad_norm": 0.0, + "learning_rate": 4.791483866518124e-06, + "loss": 1.3691, + "step": 23301 + }, + { + "epoch": 0.6841858006929356, + "grad_norm": 0.0, + "learning_rate": 4.790672105389028e-06, + "loss": 1.2446, + "step": 23302 + }, + { + "epoch": 0.6842151623700746, + "grad_norm": 0.0, + "learning_rate": 4.789860391369213e-06, + "loss": 1.2427, + "step": 23303 + }, + { + "epoch": 0.6842445240472136, + "grad_norm": 0.0, + "learning_rate": 4.78904872446603e-06, + "loss": 1.2075, + "step": 23304 + }, + { + "epoch": 0.6842738857243525, + "grad_norm": 0.0, + "learning_rate": 4.7882371046868125e-06, + "loss": 1.1821, + "step": 23305 + }, + { + "epoch": 0.6843032474014916, + "grad_norm": 0.0, + "learning_rate": 4.7874255320389025e-06, + "loss": 1.2627, + "step": 23306 + }, + { + "epoch": 0.6843326090786306, + "grad_norm": 0.0, + "learning_rate": 4.786614006529634e-06, + "loss": 1.2197, + "step": 23307 + }, + { + "epoch": 0.6843619707557695, + "grad_norm": 0.0, + "learning_rate": 4.7858025281663546e-06, + "loss": 1.0918, + "step": 23308 + }, + { + "epoch": 0.6843913324329086, + "grad_norm": 0.0, + "learning_rate": 4.784991096956398e-06, + "loss": 1.2012, + "step": 23309 + }, + { + "epoch": 0.6844206941100476, + "grad_norm": 0.0, + "learning_rate": 4.7841797129070986e-06, + "loss": 1.29, + "step": 23310 + }, + { + "epoch": 0.6844500557871865, + "grad_norm": 0.0, + "learning_rate": 4.783368376025802e-06, + "loss": 1.2148, + "step": 23311 + }, + { + "epoch": 0.6844794174643256, + "grad_norm": 0.0, + "learning_rate": 4.7825570863198415e-06, + "loss": 1.2178, + "step": 23312 + }, + { + "epoch": 0.6845087791414646, + "grad_norm": 0.0, + "learning_rate": 4.781745843796552e-06, + "loss": 1.2666, + "step": 23313 + }, + { + "epoch": 0.6845381408186035, + "grad_norm": 0.0, + "learning_rate": 4.780934648463269e-06, + "loss": 1.293, + "step": 23314 + }, + { + "epoch": 0.6845675024957426, + "grad_norm": 0.0, + "learning_rate": 4.780123500327334e-06, + "loss": 1.3174, + "step": 23315 + }, + { + "epoch": 0.6845968641728816, + "grad_norm": 0.0, + "learning_rate": 4.779312399396076e-06, + "loss": 1.2134, + "step": 23316 + }, + { + "epoch": 0.6846262258500205, + "grad_norm": 0.0, + "learning_rate": 4.7785013456768346e-06, + "loss": 1.2012, + "step": 23317 + }, + { + "epoch": 0.6846555875271596, + "grad_norm": 0.0, + "learning_rate": 4.777690339176942e-06, + "loss": 1.2725, + "step": 23318 + }, + { + "epoch": 0.6846849492042986, + "grad_norm": 0.0, + "learning_rate": 4.776879379903734e-06, + "loss": 1.3779, + "step": 23319 + }, + { + "epoch": 0.6847143108814375, + "grad_norm": 0.0, + "learning_rate": 4.7760684678645445e-06, + "loss": 1.4473, + "step": 23320 + }, + { + "epoch": 0.6847436725585766, + "grad_norm": 0.0, + "learning_rate": 4.775257603066707e-06, + "loss": 1.2568, + "step": 23321 + }, + { + "epoch": 0.6847730342357156, + "grad_norm": 0.0, + "learning_rate": 4.77444678551755e-06, + "loss": 1.2979, + "step": 23322 + }, + { + "epoch": 0.6848023959128545, + "grad_norm": 0.0, + "learning_rate": 4.773636015224408e-06, + "loss": 1.3228, + "step": 23323 + }, + { + "epoch": 0.6848317575899936, + "grad_norm": 0.0, + "learning_rate": 4.772825292194617e-06, + "loss": 1.2256, + "step": 23324 + }, + { + "epoch": 0.6848611192671326, + "grad_norm": 0.0, + "learning_rate": 4.772014616435502e-06, + "loss": 1.145, + "step": 23325 + }, + { + "epoch": 0.6848904809442715, + "grad_norm": 0.0, + "learning_rate": 4.771203987954401e-06, + "loss": 1.2563, + "step": 23326 + }, + { + "epoch": 0.6849198426214105, + "grad_norm": 0.0, + "learning_rate": 4.7703934067586375e-06, + "loss": 1.3037, + "step": 23327 + }, + { + "epoch": 0.6849492042985496, + "grad_norm": 0.0, + "learning_rate": 4.7695828728555505e-06, + "loss": 1.2271, + "step": 23328 + }, + { + "epoch": 0.6849785659756885, + "grad_norm": 0.0, + "learning_rate": 4.768772386252464e-06, + "loss": 1.2529, + "step": 23329 + }, + { + "epoch": 0.6850079276528275, + "grad_norm": 0.0, + "learning_rate": 4.767961946956708e-06, + "loss": 1.3252, + "step": 23330 + }, + { + "epoch": 0.6850372893299665, + "grad_norm": 0.0, + "learning_rate": 4.767151554975614e-06, + "loss": 1.1992, + "step": 23331 + }, + { + "epoch": 0.6850666510071055, + "grad_norm": 0.0, + "learning_rate": 4.766341210316504e-06, + "loss": 1.3262, + "step": 23332 + }, + { + "epoch": 0.6850960126842445, + "grad_norm": 0.0, + "learning_rate": 4.7655309129867145e-06, + "loss": 1.1602, + "step": 23333 + }, + { + "epoch": 0.6851253743613835, + "grad_norm": 0.0, + "learning_rate": 4.764720662993566e-06, + "loss": 1.415, + "step": 23334 + }, + { + "epoch": 0.6851547360385225, + "grad_norm": 0.0, + "learning_rate": 4.763910460344393e-06, + "loss": 1.3525, + "step": 23335 + }, + { + "epoch": 0.6851840977156615, + "grad_norm": 0.0, + "learning_rate": 4.763100305046514e-06, + "loss": 1.1934, + "step": 23336 + }, + { + "epoch": 0.6852134593928005, + "grad_norm": 0.0, + "learning_rate": 4.762290197107269e-06, + "loss": 1.2715, + "step": 23337 + }, + { + "epoch": 0.6852428210699395, + "grad_norm": 0.0, + "learning_rate": 4.761480136533967e-06, + "loss": 1.1807, + "step": 23338 + }, + { + "epoch": 0.6852721827470785, + "grad_norm": 0.0, + "learning_rate": 4.7606701233339454e-06, + "loss": 1.4131, + "step": 23339 + }, + { + "epoch": 0.6853015444242175, + "grad_norm": 0.0, + "learning_rate": 4.759860157514526e-06, + "loss": 1.2842, + "step": 23340 + }, + { + "epoch": 0.6853309061013565, + "grad_norm": 0.0, + "learning_rate": 4.75905023908303e-06, + "loss": 1.1543, + "step": 23341 + }, + { + "epoch": 0.6853602677784955, + "grad_norm": 0.0, + "learning_rate": 4.758240368046787e-06, + "loss": 1.1382, + "step": 23342 + }, + { + "epoch": 0.6853896294556345, + "grad_norm": 0.0, + "learning_rate": 4.757430544413116e-06, + "loss": 1.1416, + "step": 23343 + }, + { + "epoch": 0.6854189911327735, + "grad_norm": 0.0, + "learning_rate": 4.756620768189347e-06, + "loss": 1.3242, + "step": 23344 + }, + { + "epoch": 0.6854483528099125, + "grad_norm": 0.0, + "learning_rate": 4.7558110393828e-06, + "loss": 1.3223, + "step": 23345 + }, + { + "epoch": 0.6854777144870515, + "grad_norm": 0.0, + "learning_rate": 4.755001358000796e-06, + "loss": 1.293, + "step": 23346 + }, + { + "epoch": 0.6855070761641905, + "grad_norm": 0.0, + "learning_rate": 4.7541917240506545e-06, + "loss": 1.2197, + "step": 23347 + }, + { + "epoch": 0.6855364378413294, + "grad_norm": 0.0, + "learning_rate": 4.753382137539704e-06, + "loss": 1.2441, + "step": 23348 + }, + { + "epoch": 0.6855657995184685, + "grad_norm": 0.0, + "learning_rate": 4.7525725984752625e-06, + "loss": 1.2695, + "step": 23349 + }, + { + "epoch": 0.6855951611956075, + "grad_norm": 0.0, + "learning_rate": 4.751763106864648e-06, + "loss": 1.2119, + "step": 23350 + }, + { + "epoch": 0.6856245228727464, + "grad_norm": 0.0, + "learning_rate": 4.750953662715186e-06, + "loss": 1.1553, + "step": 23351 + }, + { + "epoch": 0.6856538845498855, + "grad_norm": 0.0, + "learning_rate": 4.750144266034192e-06, + "loss": 1.1699, + "step": 23352 + }, + { + "epoch": 0.6856832462270245, + "grad_norm": 0.0, + "learning_rate": 4.749334916828992e-06, + "loss": 1.21, + "step": 23353 + }, + { + "epoch": 0.6857126079041634, + "grad_norm": 0.0, + "learning_rate": 4.7485256151069e-06, + "loss": 1.1792, + "step": 23354 + }, + { + "epoch": 0.6857419695813025, + "grad_norm": 0.0, + "learning_rate": 4.747716360875235e-06, + "loss": 1.1958, + "step": 23355 + }, + { + "epoch": 0.6857713312584415, + "grad_norm": 0.0, + "learning_rate": 4.746907154141314e-06, + "loss": 1.2969, + "step": 23356 + }, + { + "epoch": 0.6858006929355804, + "grad_norm": 0.0, + "learning_rate": 4.74609799491246e-06, + "loss": 1.1382, + "step": 23357 + }, + { + "epoch": 0.6858300546127195, + "grad_norm": 0.0, + "learning_rate": 4.745288883195988e-06, + "loss": 1.1924, + "step": 23358 + }, + { + "epoch": 0.6858594162898585, + "grad_norm": 0.0, + "learning_rate": 4.74447981899921e-06, + "loss": 1.4355, + "step": 23359 + }, + { + "epoch": 0.6858887779669974, + "grad_norm": 0.0, + "learning_rate": 4.743670802329451e-06, + "loss": 1.2168, + "step": 23360 + }, + { + "epoch": 0.6859181396441365, + "grad_norm": 0.0, + "learning_rate": 4.742861833194019e-06, + "loss": 1.2598, + "step": 23361 + }, + { + "epoch": 0.6859475013212755, + "grad_norm": 0.0, + "learning_rate": 4.742052911600241e-06, + "loss": 1.2495, + "step": 23362 + }, + { + "epoch": 0.6859768629984144, + "grad_norm": 0.0, + "learning_rate": 4.741244037555418e-06, + "loss": 1.2778, + "step": 23363 + }, + { + "epoch": 0.6860062246755535, + "grad_norm": 0.0, + "learning_rate": 4.740435211066876e-06, + "loss": 1.1846, + "step": 23364 + }, + { + "epoch": 0.6860355863526925, + "grad_norm": 0.0, + "learning_rate": 4.739626432141922e-06, + "loss": 1.1782, + "step": 23365 + }, + { + "epoch": 0.6860649480298314, + "grad_norm": 0.0, + "learning_rate": 4.738817700787876e-06, + "loss": 1.3145, + "step": 23366 + }, + { + "epoch": 0.6860943097069705, + "grad_norm": 0.0, + "learning_rate": 4.738009017012045e-06, + "loss": 1.25, + "step": 23367 + }, + { + "epoch": 0.6861236713841095, + "grad_norm": 0.0, + "learning_rate": 4.737200380821752e-06, + "loss": 1.3828, + "step": 23368 + }, + { + "epoch": 0.6861530330612484, + "grad_norm": 0.0, + "learning_rate": 4.736391792224302e-06, + "loss": 1.1885, + "step": 23369 + }, + { + "epoch": 0.6861823947383875, + "grad_norm": 0.0, + "learning_rate": 4.7355832512270094e-06, + "loss": 1.2939, + "step": 23370 + }, + { + "epoch": 0.6862117564155265, + "grad_norm": 0.0, + "learning_rate": 4.734774757837185e-06, + "loss": 1.375, + "step": 23371 + }, + { + "epoch": 0.6862411180926654, + "grad_norm": 0.0, + "learning_rate": 4.733966312062138e-06, + "loss": 1.2266, + "step": 23372 + }, + { + "epoch": 0.6862704797698045, + "grad_norm": 0.0, + "learning_rate": 4.733157913909186e-06, + "loss": 1.2598, + "step": 23373 + }, + { + "epoch": 0.6862998414469434, + "grad_norm": 0.0, + "learning_rate": 4.732349563385632e-06, + "loss": 1.3242, + "step": 23374 + }, + { + "epoch": 0.6863292031240824, + "grad_norm": 0.0, + "learning_rate": 4.731541260498793e-06, + "loss": 1.2051, + "step": 23375 + }, + { + "epoch": 0.6863585648012215, + "grad_norm": 0.0, + "learning_rate": 4.730733005255973e-06, + "loss": 1.2031, + "step": 23376 + }, + { + "epoch": 0.6863879264783604, + "grad_norm": 0.0, + "learning_rate": 4.729924797664488e-06, + "loss": 1.2656, + "step": 23377 + }, + { + "epoch": 0.6864172881554994, + "grad_norm": 0.0, + "learning_rate": 4.729116637731641e-06, + "loss": 1.1562, + "step": 23378 + }, + { + "epoch": 0.6864466498326385, + "grad_norm": 0.0, + "learning_rate": 4.728308525464743e-06, + "loss": 1.1631, + "step": 23379 + }, + { + "epoch": 0.6864760115097774, + "grad_norm": 0.0, + "learning_rate": 4.727500460871102e-06, + "loss": 1.2725, + "step": 23380 + }, + { + "epoch": 0.6865053731869164, + "grad_norm": 0.0, + "learning_rate": 4.7266924439580195e-06, + "loss": 1.3174, + "step": 23381 + }, + { + "epoch": 0.6865347348640555, + "grad_norm": 0.0, + "learning_rate": 4.725884474732812e-06, + "loss": 1.1938, + "step": 23382 + }, + { + "epoch": 0.6865640965411944, + "grad_norm": 0.0, + "learning_rate": 4.725076553202779e-06, + "loss": 1.1279, + "step": 23383 + }, + { + "epoch": 0.6865934582183334, + "grad_norm": 0.0, + "learning_rate": 4.7242686793752324e-06, + "loss": 1.2881, + "step": 23384 + }, + { + "epoch": 0.6866228198954725, + "grad_norm": 0.0, + "learning_rate": 4.7234608532574725e-06, + "loss": 1.2168, + "step": 23385 + }, + { + "epoch": 0.6866521815726114, + "grad_norm": 0.0, + "learning_rate": 4.72265307485681e-06, + "loss": 1.2637, + "step": 23386 + }, + { + "epoch": 0.6866815432497504, + "grad_norm": 0.0, + "learning_rate": 4.721845344180548e-06, + "loss": 1.0479, + "step": 23387 + }, + { + "epoch": 0.6867109049268895, + "grad_norm": 0.0, + "learning_rate": 4.72103766123599e-06, + "loss": 1.1636, + "step": 23388 + }, + { + "epoch": 0.6867402666040284, + "grad_norm": 0.0, + "learning_rate": 4.72023002603044e-06, + "loss": 1.1938, + "step": 23389 + }, + { + "epoch": 0.6867696282811674, + "grad_norm": 0.0, + "learning_rate": 4.7194224385712e-06, + "loss": 1.2417, + "step": 23390 + }, + { + "epoch": 0.6867989899583065, + "grad_norm": 0.0, + "learning_rate": 4.718614898865577e-06, + "loss": 1.1953, + "step": 23391 + }, + { + "epoch": 0.6868283516354454, + "grad_norm": 0.0, + "learning_rate": 4.717807406920869e-06, + "loss": 1.2002, + "step": 23392 + }, + { + "epoch": 0.6868577133125844, + "grad_norm": 0.0, + "learning_rate": 4.716999962744385e-06, + "loss": 1.2031, + "step": 23393 + }, + { + "epoch": 0.6868870749897235, + "grad_norm": 0.0, + "learning_rate": 4.716192566343424e-06, + "loss": 1.3154, + "step": 23394 + }, + { + "epoch": 0.6869164366668624, + "grad_norm": 0.0, + "learning_rate": 4.715385217725287e-06, + "loss": 1.2871, + "step": 23395 + }, + { + "epoch": 0.6869457983440014, + "grad_norm": 0.0, + "learning_rate": 4.71457791689727e-06, + "loss": 1.1826, + "step": 23396 + }, + { + "epoch": 0.6869751600211405, + "grad_norm": 0.0, + "learning_rate": 4.713770663866682e-06, + "loss": 1.2251, + "step": 23397 + }, + { + "epoch": 0.6870045216982794, + "grad_norm": 0.0, + "learning_rate": 4.712963458640821e-06, + "loss": 1.3545, + "step": 23398 + }, + { + "epoch": 0.6870338833754184, + "grad_norm": 0.0, + "learning_rate": 4.712156301226981e-06, + "loss": 1.1621, + "step": 23399 + }, + { + "epoch": 0.6870632450525574, + "grad_norm": 0.0, + "learning_rate": 4.7113491916324705e-06, + "loss": 1.2095, + "step": 23400 + }, + { + "epoch": 0.6870926067296964, + "grad_norm": 0.0, + "learning_rate": 4.710542129864579e-06, + "loss": 1.0737, + "step": 23401 + }, + { + "epoch": 0.6871219684068354, + "grad_norm": 0.0, + "learning_rate": 4.7097351159306135e-06, + "loss": 1.2017, + "step": 23402 + }, + { + "epoch": 0.6871513300839744, + "grad_norm": 0.0, + "learning_rate": 4.708928149837869e-06, + "loss": 1.1353, + "step": 23403 + }, + { + "epoch": 0.6871806917611134, + "grad_norm": 0.0, + "learning_rate": 4.708121231593642e-06, + "loss": 1.3516, + "step": 23404 + }, + { + "epoch": 0.6872100534382524, + "grad_norm": 0.0, + "learning_rate": 4.707314361205226e-06, + "loss": 1.1836, + "step": 23405 + }, + { + "epoch": 0.6872394151153914, + "grad_norm": 0.0, + "learning_rate": 4.706507538679925e-06, + "loss": 1.1729, + "step": 23406 + }, + { + "epoch": 0.6872687767925304, + "grad_norm": 0.0, + "learning_rate": 4.705700764025032e-06, + "loss": 1.2344, + "step": 23407 + }, + { + "epoch": 0.6872981384696694, + "grad_norm": 0.0, + "learning_rate": 4.70489403724784e-06, + "loss": 1.2183, + "step": 23408 + }, + { + "epoch": 0.6873275001468084, + "grad_norm": 0.0, + "learning_rate": 4.70408735835565e-06, + "loss": 1.2285, + "step": 23409 + }, + { + "epoch": 0.6873568618239474, + "grad_norm": 0.0, + "learning_rate": 4.703280727355751e-06, + "loss": 1.3076, + "step": 23410 + }, + { + "epoch": 0.6873862235010864, + "grad_norm": 0.0, + "learning_rate": 4.702474144255445e-06, + "loss": 1.2925, + "step": 23411 + }, + { + "epoch": 0.6874155851782254, + "grad_norm": 0.0, + "learning_rate": 4.701667609062021e-06, + "loss": 1.208, + "step": 23412 + }, + { + "epoch": 0.6874449468553644, + "grad_norm": 0.0, + "learning_rate": 4.700861121782776e-06, + "loss": 1.2197, + "step": 23413 + }, + { + "epoch": 0.6874743085325034, + "grad_norm": 0.0, + "learning_rate": 4.700054682424996e-06, + "loss": 1.3057, + "step": 23414 + }, + { + "epoch": 0.6875036702096424, + "grad_norm": 0.0, + "learning_rate": 4.699248290995982e-06, + "loss": 1.2588, + "step": 23415 + }, + { + "epoch": 0.6875330318867814, + "grad_norm": 0.0, + "learning_rate": 4.698441947503019e-06, + "loss": 1.1729, + "step": 23416 + }, + { + "epoch": 0.6875623935639203, + "grad_norm": 0.0, + "learning_rate": 4.697635651953408e-06, + "loss": 1.2012, + "step": 23417 + }, + { + "epoch": 0.6875917552410594, + "grad_norm": 0.0, + "learning_rate": 4.696829404354436e-06, + "loss": 1.2959, + "step": 23418 + }, + { + "epoch": 0.6876211169181984, + "grad_norm": 0.0, + "learning_rate": 4.6960232047133935e-06, + "loss": 1.2881, + "step": 23419 + }, + { + "epoch": 0.6876504785953373, + "grad_norm": 0.0, + "learning_rate": 4.695217053037572e-06, + "loss": 1.3936, + "step": 23420 + }, + { + "epoch": 0.6876798402724764, + "grad_norm": 0.0, + "learning_rate": 4.694410949334257e-06, + "loss": 1.2363, + "step": 23421 + }, + { + "epoch": 0.6877092019496154, + "grad_norm": 0.0, + "learning_rate": 4.693604893610746e-06, + "loss": 1.3379, + "step": 23422 + }, + { + "epoch": 0.6877385636267543, + "grad_norm": 0.0, + "learning_rate": 4.692798885874322e-06, + "loss": 1.1284, + "step": 23423 + }, + { + "epoch": 0.6877679253038934, + "grad_norm": 0.0, + "learning_rate": 4.69199292613228e-06, + "loss": 1.2744, + "step": 23424 + }, + { + "epoch": 0.6877972869810324, + "grad_norm": 0.0, + "learning_rate": 4.691187014391902e-06, + "loss": 1.228, + "step": 23425 + }, + { + "epoch": 0.6878266486581713, + "grad_norm": 0.0, + "learning_rate": 4.690381150660482e-06, + "loss": 1.292, + "step": 23426 + }, + { + "epoch": 0.6878560103353104, + "grad_norm": 0.0, + "learning_rate": 4.689575334945306e-06, + "loss": 1.1846, + "step": 23427 + }, + { + "epoch": 0.6878853720124494, + "grad_norm": 0.0, + "learning_rate": 4.688769567253661e-06, + "loss": 1.1392, + "step": 23428 + }, + { + "epoch": 0.6879147336895883, + "grad_norm": 0.0, + "learning_rate": 4.687963847592832e-06, + "loss": 1.3184, + "step": 23429 + }, + { + "epoch": 0.6879440953667273, + "grad_norm": 0.0, + "learning_rate": 4.687158175970104e-06, + "loss": 1.3364, + "step": 23430 + }, + { + "epoch": 0.6879734570438664, + "grad_norm": 0.0, + "learning_rate": 4.686352552392768e-06, + "loss": 1.2949, + "step": 23431 + }, + { + "epoch": 0.6880028187210053, + "grad_norm": 0.0, + "learning_rate": 4.6855469768681025e-06, + "loss": 1.2002, + "step": 23432 + }, + { + "epoch": 0.6880321803981443, + "grad_norm": 0.0, + "learning_rate": 4.6847414494034e-06, + "loss": 1.2871, + "step": 23433 + }, + { + "epoch": 0.6880615420752834, + "grad_norm": 0.0, + "learning_rate": 4.683935970005939e-06, + "loss": 1.1792, + "step": 23434 + }, + { + "epoch": 0.6880909037524223, + "grad_norm": 0.0, + "learning_rate": 4.683130538683011e-06, + "loss": 1.2637, + "step": 23435 + }, + { + "epoch": 0.6881202654295613, + "grad_norm": 0.0, + "learning_rate": 4.6823251554418945e-06, + "loss": 1.2773, + "step": 23436 + }, + { + "epoch": 0.6881496271067004, + "grad_norm": 0.0, + "learning_rate": 4.681519820289874e-06, + "loss": 1.2334, + "step": 23437 + }, + { + "epoch": 0.6881789887838393, + "grad_norm": 0.0, + "learning_rate": 4.680714533234231e-06, + "loss": 1.3896, + "step": 23438 + }, + { + "epoch": 0.6882083504609783, + "grad_norm": 0.0, + "learning_rate": 4.679909294282246e-06, + "loss": 1.2817, + "step": 23439 + }, + { + "epoch": 0.6882377121381174, + "grad_norm": 0.0, + "learning_rate": 4.679104103441206e-06, + "loss": 1.2705, + "step": 23440 + }, + { + "epoch": 0.6882670738152563, + "grad_norm": 0.0, + "learning_rate": 4.6782989607183875e-06, + "loss": 1.2695, + "step": 23441 + }, + { + "epoch": 0.6882964354923953, + "grad_norm": 0.0, + "learning_rate": 4.6774938661210775e-06, + "loss": 1.1162, + "step": 23442 + }, + { + "epoch": 0.6883257971695343, + "grad_norm": 0.0, + "learning_rate": 4.676688819656554e-06, + "loss": 1.2705, + "step": 23443 + }, + { + "epoch": 0.6883551588466733, + "grad_norm": 0.0, + "learning_rate": 4.675883821332097e-06, + "loss": 1.2344, + "step": 23444 + }, + { + "epoch": 0.6883845205238123, + "grad_norm": 0.0, + "learning_rate": 4.675078871154982e-06, + "loss": 1.1909, + "step": 23445 + }, + { + "epoch": 0.6884138822009513, + "grad_norm": 0.0, + "learning_rate": 4.6742739691324965e-06, + "loss": 1.3418, + "step": 23446 + }, + { + "epoch": 0.6884432438780903, + "grad_norm": 0.0, + "learning_rate": 4.6734691152719155e-06, + "loss": 1.2197, + "step": 23447 + }, + { + "epoch": 0.6884726055552293, + "grad_norm": 0.0, + "learning_rate": 4.672664309580513e-06, + "loss": 1.2402, + "step": 23448 + }, + { + "epoch": 0.6885019672323683, + "grad_norm": 0.0, + "learning_rate": 4.671859552065577e-06, + "loss": 1.1602, + "step": 23449 + }, + { + "epoch": 0.6885313289095073, + "grad_norm": 0.0, + "learning_rate": 4.6710548427343745e-06, + "loss": 1.3398, + "step": 23450 + }, + { + "epoch": 0.6885606905866463, + "grad_norm": 0.0, + "learning_rate": 4.670250181594192e-06, + "loss": 1.3271, + "step": 23451 + }, + { + "epoch": 0.6885900522637853, + "grad_norm": 0.0, + "learning_rate": 4.6694455686523025e-06, + "loss": 1.1353, + "step": 23452 + }, + { + "epoch": 0.6886194139409243, + "grad_norm": 0.0, + "learning_rate": 4.668641003915982e-06, + "loss": 1.2285, + "step": 23453 + }, + { + "epoch": 0.6886487756180633, + "grad_norm": 0.0, + "learning_rate": 4.667836487392501e-06, + "loss": 1.1655, + "step": 23454 + }, + { + "epoch": 0.6886781372952023, + "grad_norm": 0.0, + "learning_rate": 4.667032019089145e-06, + "loss": 1.2246, + "step": 23455 + }, + { + "epoch": 0.6887074989723413, + "grad_norm": 0.0, + "learning_rate": 4.66622759901318e-06, + "loss": 1.2637, + "step": 23456 + }, + { + "epoch": 0.6887368606494803, + "grad_norm": 0.0, + "learning_rate": 4.665423227171889e-06, + "loss": 1.3652, + "step": 23457 + }, + { + "epoch": 0.6887662223266193, + "grad_norm": 0.0, + "learning_rate": 4.664618903572542e-06, + "loss": 1.3291, + "step": 23458 + }, + { + "epoch": 0.6887955840037583, + "grad_norm": 0.0, + "learning_rate": 4.663814628222408e-06, + "loss": 1.2822, + "step": 23459 + }, + { + "epoch": 0.6888249456808972, + "grad_norm": 0.0, + "learning_rate": 4.66301040112877e-06, + "loss": 1.1953, + "step": 23460 + }, + { + "epoch": 0.6888543073580363, + "grad_norm": 0.0, + "learning_rate": 4.662206222298895e-06, + "loss": 1.2246, + "step": 23461 + }, + { + "epoch": 0.6888836690351753, + "grad_norm": 0.0, + "learning_rate": 4.661402091740057e-06, + "loss": 1.1685, + "step": 23462 + }, + { + "epoch": 0.6889130307123142, + "grad_norm": 0.0, + "learning_rate": 4.660598009459523e-06, + "loss": 1.2119, + "step": 23463 + }, + { + "epoch": 0.6889423923894533, + "grad_norm": 0.0, + "learning_rate": 4.659793975464572e-06, + "loss": 1.3184, + "step": 23464 + }, + { + "epoch": 0.6889717540665923, + "grad_norm": 0.0, + "learning_rate": 4.658989989762468e-06, + "loss": 1.2441, + "step": 23465 + }, + { + "epoch": 0.6890011157437312, + "grad_norm": 0.0, + "learning_rate": 4.658186052360489e-06, + "loss": 1.2148, + "step": 23466 + }, + { + "epoch": 0.6890304774208703, + "grad_norm": 0.0, + "learning_rate": 4.6573821632659026e-06, + "loss": 1.3887, + "step": 23467 + }, + { + "epoch": 0.6890598390980093, + "grad_norm": 0.0, + "learning_rate": 4.656578322485976e-06, + "loss": 1.1812, + "step": 23468 + }, + { + "epoch": 0.6890892007751482, + "grad_norm": 0.0, + "learning_rate": 4.655774530027981e-06, + "loss": 1.2148, + "step": 23469 + }, + { + "epoch": 0.6891185624522873, + "grad_norm": 0.0, + "learning_rate": 4.654970785899182e-06, + "loss": 1.2002, + "step": 23470 + }, + { + "epoch": 0.6891479241294263, + "grad_norm": 0.0, + "learning_rate": 4.654167090106855e-06, + "loss": 1.2607, + "step": 23471 + }, + { + "epoch": 0.6891772858065652, + "grad_norm": 0.0, + "learning_rate": 4.653363442658259e-06, + "loss": 1.2959, + "step": 23472 + }, + { + "epoch": 0.6892066474837043, + "grad_norm": 0.0, + "learning_rate": 4.652559843560671e-06, + "loss": 1.3506, + "step": 23473 + }, + { + "epoch": 0.6892360091608433, + "grad_norm": 0.0, + "learning_rate": 4.651756292821351e-06, + "loss": 1.3223, + "step": 23474 + }, + { + "epoch": 0.6892653708379822, + "grad_norm": 0.0, + "learning_rate": 4.6509527904475714e-06, + "loss": 1.2607, + "step": 23475 + }, + { + "epoch": 0.6892947325151213, + "grad_norm": 0.0, + "learning_rate": 4.650149336446596e-06, + "loss": 1.2656, + "step": 23476 + }, + { + "epoch": 0.6893240941922603, + "grad_norm": 0.0, + "learning_rate": 4.64934593082569e-06, + "loss": 1.2764, + "step": 23477 + }, + { + "epoch": 0.6893534558693992, + "grad_norm": 0.0, + "learning_rate": 4.64854257359212e-06, + "loss": 1.3389, + "step": 23478 + }, + { + "epoch": 0.6893828175465383, + "grad_norm": 0.0, + "learning_rate": 4.647739264753145e-06, + "loss": 1.1157, + "step": 23479 + }, + { + "epoch": 0.6894121792236773, + "grad_norm": 0.0, + "learning_rate": 4.646936004316039e-06, + "loss": 1.0566, + "step": 23480 + }, + { + "epoch": 0.6894415409008162, + "grad_norm": 0.0, + "learning_rate": 4.646132792288059e-06, + "loss": 1.2373, + "step": 23481 + }, + { + "epoch": 0.6894709025779553, + "grad_norm": 0.0, + "learning_rate": 4.645329628676472e-06, + "loss": 1.272, + "step": 23482 + }, + { + "epoch": 0.6895002642550943, + "grad_norm": 0.0, + "learning_rate": 4.64452651348854e-06, + "loss": 1.3154, + "step": 23483 + }, + { + "epoch": 0.6895296259322332, + "grad_norm": 0.0, + "learning_rate": 4.643723446731529e-06, + "loss": 1.1279, + "step": 23484 + }, + { + "epoch": 0.6895589876093723, + "grad_norm": 0.0, + "learning_rate": 4.642920428412697e-06, + "loss": 1.2319, + "step": 23485 + }, + { + "epoch": 0.6895883492865112, + "grad_norm": 0.0, + "learning_rate": 4.642117458539309e-06, + "loss": 1.2168, + "step": 23486 + }, + { + "epoch": 0.6896177109636502, + "grad_norm": 0.0, + "learning_rate": 4.641314537118625e-06, + "loss": 1.3291, + "step": 23487 + }, + { + "epoch": 0.6896470726407893, + "grad_norm": 0.0, + "learning_rate": 4.640511664157903e-06, + "loss": 1.2378, + "step": 23488 + }, + { + "epoch": 0.6896764343179282, + "grad_norm": 0.0, + "learning_rate": 4.639708839664409e-06, + "loss": 1.1846, + "step": 23489 + }, + { + "epoch": 0.6897057959950672, + "grad_norm": 0.0, + "learning_rate": 4.638906063645398e-06, + "loss": 1.1748, + "step": 23490 + }, + { + "epoch": 0.6897351576722063, + "grad_norm": 0.0, + "learning_rate": 4.6381033361081355e-06, + "loss": 1.3379, + "step": 23491 + }, + { + "epoch": 0.6897645193493452, + "grad_norm": 0.0, + "learning_rate": 4.637300657059877e-06, + "loss": 1.29, + "step": 23492 + }, + { + "epoch": 0.6897938810264842, + "grad_norm": 0.0, + "learning_rate": 4.636498026507883e-06, + "loss": 1.0601, + "step": 23493 + }, + { + "epoch": 0.6898232427036233, + "grad_norm": 0.0, + "learning_rate": 4.635695444459408e-06, + "loss": 1.2344, + "step": 23494 + }, + { + "epoch": 0.6898526043807622, + "grad_norm": 0.0, + "learning_rate": 4.634892910921715e-06, + "loss": 1.3574, + "step": 23495 + }, + { + "epoch": 0.6898819660579012, + "grad_norm": 0.0, + "learning_rate": 4.634090425902061e-06, + "loss": 1.229, + "step": 23496 + }, + { + "epoch": 0.6899113277350403, + "grad_norm": 0.0, + "learning_rate": 4.633287989407696e-06, + "loss": 1.2998, + "step": 23497 + }, + { + "epoch": 0.6899406894121792, + "grad_norm": 0.0, + "learning_rate": 4.632485601445887e-06, + "loss": 1.2646, + "step": 23498 + }, + { + "epoch": 0.6899700510893182, + "grad_norm": 0.0, + "learning_rate": 4.631683262023883e-06, + "loss": 1.1899, + "step": 23499 + }, + { + "epoch": 0.6899994127664573, + "grad_norm": 0.0, + "learning_rate": 4.630880971148944e-06, + "loss": 1.2529, + "step": 23500 + }, + { + "epoch": 0.6900287744435962, + "grad_norm": 0.0, + "learning_rate": 4.6300787288283235e-06, + "loss": 1.252, + "step": 23501 + }, + { + "epoch": 0.6900581361207352, + "grad_norm": 0.0, + "learning_rate": 4.629276535069277e-06, + "loss": 1.4141, + "step": 23502 + }, + { + "epoch": 0.6900874977978743, + "grad_norm": 0.0, + "learning_rate": 4.628474389879054e-06, + "loss": 1.3311, + "step": 23503 + }, + { + "epoch": 0.6901168594750132, + "grad_norm": 0.0, + "learning_rate": 4.627672293264916e-06, + "loss": 1.2158, + "step": 23504 + }, + { + "epoch": 0.6901462211521522, + "grad_norm": 0.0, + "learning_rate": 4.626870245234111e-06, + "loss": 1.3555, + "step": 23505 + }, + { + "epoch": 0.6901755828292913, + "grad_norm": 0.0, + "learning_rate": 4.626068245793898e-06, + "loss": 1.1431, + "step": 23506 + }, + { + "epoch": 0.6902049445064302, + "grad_norm": 0.0, + "learning_rate": 4.625266294951525e-06, + "loss": 1.3193, + "step": 23507 + }, + { + "epoch": 0.6902343061835692, + "grad_norm": 0.0, + "learning_rate": 4.624464392714242e-06, + "loss": 1.1514, + "step": 23508 + }, + { + "epoch": 0.6902636678607083, + "grad_norm": 0.0, + "learning_rate": 4.623662539089309e-06, + "loss": 1.3418, + "step": 23509 + }, + { + "epoch": 0.6902930295378472, + "grad_norm": 0.0, + "learning_rate": 4.622860734083972e-06, + "loss": 1.2861, + "step": 23510 + }, + { + "epoch": 0.6903223912149862, + "grad_norm": 0.0, + "learning_rate": 4.622058977705482e-06, + "loss": 1.2251, + "step": 23511 + }, + { + "epoch": 0.6903517528921252, + "grad_norm": 0.0, + "learning_rate": 4.621257269961086e-06, + "loss": 1.2002, + "step": 23512 + }, + { + "epoch": 0.6903811145692642, + "grad_norm": 0.0, + "learning_rate": 4.6204556108580424e-06, + "loss": 1.2334, + "step": 23513 + }, + { + "epoch": 0.6904104762464032, + "grad_norm": 0.0, + "learning_rate": 4.619654000403592e-06, + "loss": 1.1982, + "step": 23514 + }, + { + "epoch": 0.6904398379235422, + "grad_norm": 0.0, + "learning_rate": 4.618852438604993e-06, + "loss": 1.2393, + "step": 23515 + }, + { + "epoch": 0.6904691996006812, + "grad_norm": 0.0, + "learning_rate": 4.618050925469488e-06, + "loss": 1.335, + "step": 23516 + }, + { + "epoch": 0.6904985612778202, + "grad_norm": 0.0, + "learning_rate": 4.6172494610043285e-06, + "loss": 1.0283, + "step": 23517 + }, + { + "epoch": 0.6905279229549592, + "grad_norm": 0.0, + "learning_rate": 4.61644804521676e-06, + "loss": 1.1621, + "step": 23518 + }, + { + "epoch": 0.6905572846320982, + "grad_norm": 0.0, + "learning_rate": 4.615646678114027e-06, + "loss": 1.25, + "step": 23519 + }, + { + "epoch": 0.6905866463092372, + "grad_norm": 0.0, + "learning_rate": 4.614845359703384e-06, + "loss": 1.335, + "step": 23520 + }, + { + "epoch": 0.6906160079863762, + "grad_norm": 0.0, + "learning_rate": 4.61404408999207e-06, + "loss": 1.2695, + "step": 23521 + }, + { + "epoch": 0.6906453696635152, + "grad_norm": 0.0, + "learning_rate": 4.613242868987337e-06, + "loss": 1.2017, + "step": 23522 + }, + { + "epoch": 0.6906747313406542, + "grad_norm": 0.0, + "learning_rate": 4.612441696696426e-06, + "loss": 1.1479, + "step": 23523 + }, + { + "epoch": 0.6907040930177932, + "grad_norm": 0.0, + "learning_rate": 4.611640573126588e-06, + "loss": 1.165, + "step": 23524 + }, + { + "epoch": 0.6907334546949322, + "grad_norm": 0.0, + "learning_rate": 4.610839498285065e-06, + "loss": 1.356, + "step": 23525 + }, + { + "epoch": 0.6907628163720712, + "grad_norm": 0.0, + "learning_rate": 4.610038472179099e-06, + "loss": 1.2349, + "step": 23526 + }, + { + "epoch": 0.6907921780492102, + "grad_norm": 0.0, + "learning_rate": 4.6092374948159355e-06, + "loss": 1.2764, + "step": 23527 + }, + { + "epoch": 0.6908215397263492, + "grad_norm": 0.0, + "learning_rate": 4.608436566202815e-06, + "loss": 1.2856, + "step": 23528 + }, + { + "epoch": 0.6908509014034881, + "grad_norm": 0.0, + "learning_rate": 4.607635686346988e-06, + "loss": 1.3066, + "step": 23529 + }, + { + "epoch": 0.6908802630806271, + "grad_norm": 0.0, + "learning_rate": 4.6068348552556875e-06, + "loss": 1.2305, + "step": 23530 + }, + { + "epoch": 0.6909096247577662, + "grad_norm": 0.0, + "learning_rate": 4.606034072936164e-06, + "loss": 1.1719, + "step": 23531 + }, + { + "epoch": 0.6909389864349051, + "grad_norm": 0.0, + "learning_rate": 4.605233339395653e-06, + "loss": 1.2607, + "step": 23532 + }, + { + "epoch": 0.6909683481120441, + "grad_norm": 0.0, + "learning_rate": 4.604432654641402e-06, + "loss": 1.3252, + "step": 23533 + }, + { + "epoch": 0.6909977097891832, + "grad_norm": 0.0, + "learning_rate": 4.603632018680648e-06, + "loss": 1.2188, + "step": 23534 + }, + { + "epoch": 0.6910270714663221, + "grad_norm": 0.0, + "learning_rate": 4.602831431520633e-06, + "loss": 1.3447, + "step": 23535 + }, + { + "epoch": 0.6910564331434611, + "grad_norm": 0.0, + "learning_rate": 4.602030893168595e-06, + "loss": 1.4004, + "step": 23536 + }, + { + "epoch": 0.6910857948206002, + "grad_norm": 0.0, + "learning_rate": 4.601230403631769e-06, + "loss": 1.2168, + "step": 23537 + }, + { + "epoch": 0.6911151564977391, + "grad_norm": 0.0, + "learning_rate": 4.6004299629174045e-06, + "loss": 1.3867, + "step": 23538 + }, + { + "epoch": 0.6911445181748781, + "grad_norm": 0.0, + "learning_rate": 4.59962957103273e-06, + "loss": 1.3164, + "step": 23539 + }, + { + "epoch": 0.6911738798520172, + "grad_norm": 0.0, + "learning_rate": 4.598829227984992e-06, + "loss": 1.3594, + "step": 23540 + }, + { + "epoch": 0.6912032415291561, + "grad_norm": 0.0, + "learning_rate": 4.598028933781421e-06, + "loss": 1.2939, + "step": 23541 + }, + { + "epoch": 0.6912326032062951, + "grad_norm": 0.0, + "learning_rate": 4.597228688429265e-06, + "loss": 1.3379, + "step": 23542 + }, + { + "epoch": 0.6912619648834342, + "grad_norm": 0.0, + "learning_rate": 4.596428491935747e-06, + "loss": 1.2725, + "step": 23543 + }, + { + "epoch": 0.6912913265605731, + "grad_norm": 0.0, + "learning_rate": 4.5956283443081125e-06, + "loss": 1.1987, + "step": 23544 + }, + { + "epoch": 0.6913206882377121, + "grad_norm": 0.0, + "learning_rate": 4.5948282455535965e-06, + "loss": 1.2217, + "step": 23545 + }, + { + "epoch": 0.6913500499148512, + "grad_norm": 0.0, + "learning_rate": 4.594028195679428e-06, + "loss": 1.2842, + "step": 23546 + }, + { + "epoch": 0.6913794115919901, + "grad_norm": 0.0, + "learning_rate": 4.5932281946928525e-06, + "loss": 1.3096, + "step": 23547 + }, + { + "epoch": 0.6914087732691291, + "grad_norm": 0.0, + "learning_rate": 4.5924282426010936e-06, + "loss": 1.2578, + "step": 23548 + }, + { + "epoch": 0.6914381349462682, + "grad_norm": 0.0, + "learning_rate": 4.591628339411396e-06, + "loss": 1.0693, + "step": 23549 + }, + { + "epoch": 0.6914674966234071, + "grad_norm": 0.0, + "learning_rate": 4.590828485130987e-06, + "loss": 1.2217, + "step": 23550 + }, + { + "epoch": 0.6914968583005461, + "grad_norm": 0.0, + "learning_rate": 4.590028679767102e-06, + "loss": 1.2637, + "step": 23551 + }, + { + "epoch": 0.6915262199776852, + "grad_norm": 0.0, + "learning_rate": 4.589228923326969e-06, + "loss": 1.146, + "step": 23552 + }, + { + "epoch": 0.6915555816548241, + "grad_norm": 0.0, + "learning_rate": 4.58842921581783e-06, + "loss": 1.2334, + "step": 23553 + }, + { + "epoch": 0.6915849433319631, + "grad_norm": 0.0, + "learning_rate": 4.587629557246906e-06, + "loss": 1.188, + "step": 23554 + }, + { + "epoch": 0.6916143050091021, + "grad_norm": 0.0, + "learning_rate": 4.586829947621439e-06, + "loss": 1.2241, + "step": 23555 + }, + { + "epoch": 0.6916436666862411, + "grad_norm": 0.0, + "learning_rate": 4.5860303869486546e-06, + "loss": 1.3389, + "step": 23556 + }, + { + "epoch": 0.6916730283633801, + "grad_norm": 0.0, + "learning_rate": 4.58523087523578e-06, + "loss": 1.3179, + "step": 23557 + }, + { + "epoch": 0.6917023900405191, + "grad_norm": 0.0, + "learning_rate": 4.584431412490052e-06, + "loss": 1.2373, + "step": 23558 + }, + { + "epoch": 0.6917317517176581, + "grad_norm": 0.0, + "learning_rate": 4.5836319987187e-06, + "loss": 1.2891, + "step": 23559 + }, + { + "epoch": 0.6917611133947971, + "grad_norm": 0.0, + "learning_rate": 4.58283263392895e-06, + "loss": 1.3115, + "step": 23560 + }, + { + "epoch": 0.6917904750719361, + "grad_norm": 0.0, + "learning_rate": 4.582033318128028e-06, + "loss": 1.3408, + "step": 23561 + }, + { + "epoch": 0.6918198367490751, + "grad_norm": 0.0, + "learning_rate": 4.581234051323169e-06, + "loss": 1.2354, + "step": 23562 + }, + { + "epoch": 0.6918491984262141, + "grad_norm": 0.0, + "learning_rate": 4.580434833521595e-06, + "loss": 1.3271, + "step": 23563 + }, + { + "epoch": 0.6918785601033531, + "grad_norm": 0.0, + "learning_rate": 4.579635664730542e-06, + "loss": 1.292, + "step": 23564 + }, + { + "epoch": 0.6919079217804921, + "grad_norm": 0.0, + "learning_rate": 4.57883654495723e-06, + "loss": 1.3535, + "step": 23565 + }, + { + "epoch": 0.6919372834576311, + "grad_norm": 0.0, + "learning_rate": 4.578037474208885e-06, + "loss": 1.2812, + "step": 23566 + }, + { + "epoch": 0.6919666451347701, + "grad_norm": 0.0, + "learning_rate": 4.577238452492743e-06, + "loss": 1.2617, + "step": 23567 + }, + { + "epoch": 0.6919960068119091, + "grad_norm": 0.0, + "learning_rate": 4.576439479816015e-06, + "loss": 1.252, + "step": 23568 + }, + { + "epoch": 0.692025368489048, + "grad_norm": 0.0, + "learning_rate": 4.575640556185937e-06, + "loss": 1.145, + "step": 23569 + }, + { + "epoch": 0.6920547301661871, + "grad_norm": 0.0, + "learning_rate": 4.574841681609727e-06, + "loss": 1.2627, + "step": 23570 + }, + { + "epoch": 0.6920840918433261, + "grad_norm": 0.0, + "learning_rate": 4.574042856094617e-06, + "loss": 1.208, + "step": 23571 + }, + { + "epoch": 0.692113453520465, + "grad_norm": 0.0, + "learning_rate": 4.5732440796478235e-06, + "loss": 1.2373, + "step": 23572 + }, + { + "epoch": 0.6921428151976041, + "grad_norm": 0.0, + "learning_rate": 4.572445352276578e-06, + "loss": 1.2988, + "step": 23573 + }, + { + "epoch": 0.6921721768747431, + "grad_norm": 0.0, + "learning_rate": 4.571646673988097e-06, + "loss": 1.3799, + "step": 23574 + }, + { + "epoch": 0.692201538551882, + "grad_norm": 0.0, + "learning_rate": 4.570848044789607e-06, + "loss": 1.0586, + "step": 23575 + }, + { + "epoch": 0.6922309002290211, + "grad_norm": 0.0, + "learning_rate": 4.570049464688329e-06, + "loss": 1.251, + "step": 23576 + }, + { + "epoch": 0.6922602619061601, + "grad_norm": 0.0, + "learning_rate": 4.569250933691479e-06, + "loss": 1.2378, + "step": 23577 + }, + { + "epoch": 0.692289623583299, + "grad_norm": 0.0, + "learning_rate": 4.568452451806289e-06, + "loss": 1.2539, + "step": 23578 + }, + { + "epoch": 0.6923189852604381, + "grad_norm": 0.0, + "learning_rate": 4.5676540190399694e-06, + "loss": 1.2119, + "step": 23579 + }, + { + "epoch": 0.6923483469375771, + "grad_norm": 0.0, + "learning_rate": 4.566855635399751e-06, + "loss": 1.1836, + "step": 23580 + }, + { + "epoch": 0.692377708614716, + "grad_norm": 0.0, + "learning_rate": 4.566057300892843e-06, + "loss": 1.188, + "step": 23581 + }, + { + "epoch": 0.6924070702918551, + "grad_norm": 0.0, + "learning_rate": 4.565259015526475e-06, + "loss": 1.1782, + "step": 23582 + }, + { + "epoch": 0.6924364319689941, + "grad_norm": 0.0, + "learning_rate": 4.564460779307862e-06, + "loss": 1.2783, + "step": 23583 + }, + { + "epoch": 0.692465793646133, + "grad_norm": 0.0, + "learning_rate": 4.563662592244222e-06, + "loss": 1.2808, + "step": 23584 + }, + { + "epoch": 0.6924951553232721, + "grad_norm": 0.0, + "learning_rate": 4.562864454342774e-06, + "loss": 1.2588, + "step": 23585 + }, + { + "epoch": 0.6925245170004111, + "grad_norm": 0.0, + "learning_rate": 4.56206636561073e-06, + "loss": 1.2393, + "step": 23586 + }, + { + "epoch": 0.69255387867755, + "grad_norm": 0.0, + "learning_rate": 4.561268326055318e-06, + "loss": 1.1753, + "step": 23587 + }, + { + "epoch": 0.6925832403546891, + "grad_norm": 0.0, + "learning_rate": 4.560470335683744e-06, + "loss": 1.2803, + "step": 23588 + }, + { + "epoch": 0.6926126020318281, + "grad_norm": 0.0, + "learning_rate": 4.559672394503235e-06, + "loss": 1.2422, + "step": 23589 + }, + { + "epoch": 0.692641963708967, + "grad_norm": 0.0, + "learning_rate": 4.558874502520997e-06, + "loss": 1.3799, + "step": 23590 + }, + { + "epoch": 0.6926713253861061, + "grad_norm": 0.0, + "learning_rate": 4.5580766597442546e-06, + "loss": 1.1855, + "step": 23591 + }, + { + "epoch": 0.6927006870632451, + "grad_norm": 0.0, + "learning_rate": 4.5572788661802196e-06, + "loss": 1.2432, + "step": 23592 + }, + { + "epoch": 0.692730048740384, + "grad_norm": 0.0, + "learning_rate": 4.556481121836104e-06, + "loss": 1.1807, + "step": 23593 + }, + { + "epoch": 0.6927594104175231, + "grad_norm": 0.0, + "learning_rate": 4.555683426719124e-06, + "loss": 1.2666, + "step": 23594 + }, + { + "epoch": 0.692788772094662, + "grad_norm": 0.0, + "learning_rate": 4.55488578083649e-06, + "loss": 1.2793, + "step": 23595 + }, + { + "epoch": 0.692818133771801, + "grad_norm": 0.0, + "learning_rate": 4.554088184195422e-06, + "loss": 1.0884, + "step": 23596 + }, + { + "epoch": 0.6928474954489401, + "grad_norm": 0.0, + "learning_rate": 4.553290636803126e-06, + "loss": 1.2637, + "step": 23597 + }, + { + "epoch": 0.692876857126079, + "grad_norm": 0.0, + "learning_rate": 4.55249313866682e-06, + "loss": 1.1323, + "step": 23598 + }, + { + "epoch": 0.692906218803218, + "grad_norm": 0.0, + "learning_rate": 4.551695689793715e-06, + "loss": 1.2266, + "step": 23599 + }, + { + "epoch": 0.6929355804803571, + "grad_norm": 0.0, + "learning_rate": 4.5508982901910215e-06, + "loss": 1.4023, + "step": 23600 + }, + { + "epoch": 0.692964942157496, + "grad_norm": 0.0, + "learning_rate": 4.5501009398659455e-06, + "loss": 1.1758, + "step": 23601 + }, + { + "epoch": 0.692994303834635, + "grad_norm": 0.0, + "learning_rate": 4.549303638825706e-06, + "loss": 1.2607, + "step": 23602 + }, + { + "epoch": 0.6930236655117741, + "grad_norm": 0.0, + "learning_rate": 4.548506387077507e-06, + "loss": 1.1626, + "step": 23603 + }, + { + "epoch": 0.693053027188913, + "grad_norm": 0.0, + "learning_rate": 4.547709184628564e-06, + "loss": 1.2051, + "step": 23604 + }, + { + "epoch": 0.693082388866052, + "grad_norm": 0.0, + "learning_rate": 4.546912031486084e-06, + "loss": 1.208, + "step": 23605 + }, + { + "epoch": 0.6931117505431911, + "grad_norm": 0.0, + "learning_rate": 4.5461149276572705e-06, + "loss": 1.2666, + "step": 23606 + }, + { + "epoch": 0.69314111222033, + "grad_norm": 0.0, + "learning_rate": 4.54531787314934e-06, + "loss": 1.2783, + "step": 23607 + }, + { + "epoch": 0.693170473897469, + "grad_norm": 0.0, + "learning_rate": 4.544520867969498e-06, + "loss": 1.1855, + "step": 23608 + }, + { + "epoch": 0.6931998355746081, + "grad_norm": 0.0, + "learning_rate": 4.543723912124949e-06, + "loss": 1.2559, + "step": 23609 + }, + { + "epoch": 0.693229197251747, + "grad_norm": 0.0, + "learning_rate": 4.542927005622899e-06, + "loss": 1.2178, + "step": 23610 + }, + { + "epoch": 0.693258558928886, + "grad_norm": 0.0, + "learning_rate": 4.542130148470563e-06, + "loss": 1.1523, + "step": 23611 + }, + { + "epoch": 0.6932879206060251, + "grad_norm": 0.0, + "learning_rate": 4.541333340675137e-06, + "loss": 1.2588, + "step": 23612 + }, + { + "epoch": 0.693317282283164, + "grad_norm": 0.0, + "learning_rate": 4.540536582243835e-06, + "loss": 1.2246, + "step": 23613 + }, + { + "epoch": 0.693346643960303, + "grad_norm": 0.0, + "learning_rate": 4.5397398731838595e-06, + "loss": 1.2363, + "step": 23614 + }, + { + "epoch": 0.6933760056374421, + "grad_norm": 0.0, + "learning_rate": 4.538943213502411e-06, + "loss": 1.333, + "step": 23615 + }, + { + "epoch": 0.693405367314581, + "grad_norm": 0.0, + "learning_rate": 4.5381466032067e-06, + "loss": 1.3096, + "step": 23616 + }, + { + "epoch": 0.69343472899172, + "grad_norm": 0.0, + "learning_rate": 4.53735004230393e-06, + "loss": 1.2173, + "step": 23617 + }, + { + "epoch": 0.6934640906688591, + "grad_norm": 0.0, + "learning_rate": 4.536553530801301e-06, + "loss": 1.291, + "step": 23618 + }, + { + "epoch": 0.693493452345998, + "grad_norm": 0.0, + "learning_rate": 4.535757068706015e-06, + "loss": 1.1899, + "step": 23619 + }, + { + "epoch": 0.693522814023137, + "grad_norm": 0.0, + "learning_rate": 4.53496065602528e-06, + "loss": 1.3545, + "step": 23620 + }, + { + "epoch": 0.693552175700276, + "grad_norm": 0.0, + "learning_rate": 4.534164292766292e-06, + "loss": 1.144, + "step": 23621 + }, + { + "epoch": 0.693581537377415, + "grad_norm": 0.0, + "learning_rate": 4.53336797893626e-06, + "loss": 1.3203, + "step": 23622 + }, + { + "epoch": 0.693610899054554, + "grad_norm": 0.0, + "learning_rate": 4.5325717145423816e-06, + "loss": 1.2754, + "step": 23623 + }, + { + "epoch": 0.693640260731693, + "grad_norm": 0.0, + "learning_rate": 4.531775499591857e-06, + "loss": 1.251, + "step": 23624 + }, + { + "epoch": 0.693669622408832, + "grad_norm": 0.0, + "learning_rate": 4.530979334091889e-06, + "loss": 1.3564, + "step": 23625 + }, + { + "epoch": 0.693698984085971, + "grad_norm": 0.0, + "learning_rate": 4.5301832180496685e-06, + "loss": 1.4248, + "step": 23626 + }, + { + "epoch": 0.69372834576311, + "grad_norm": 0.0, + "learning_rate": 4.529387151472407e-06, + "loss": 1.1548, + "step": 23627 + }, + { + "epoch": 0.693757707440249, + "grad_norm": 0.0, + "learning_rate": 4.528591134367296e-06, + "loss": 1.0801, + "step": 23628 + }, + { + "epoch": 0.693787069117388, + "grad_norm": 0.0, + "learning_rate": 4.52779516674154e-06, + "loss": 1.2041, + "step": 23629 + }, + { + "epoch": 0.6938164307945269, + "grad_norm": 0.0, + "learning_rate": 4.526999248602328e-06, + "loss": 1.2666, + "step": 23630 + }, + { + "epoch": 0.693845792471666, + "grad_norm": 0.0, + "learning_rate": 4.526203379956869e-06, + "loss": 1.2183, + "step": 23631 + }, + { + "epoch": 0.693875154148805, + "grad_norm": 0.0, + "learning_rate": 4.525407560812355e-06, + "loss": 1.1963, + "step": 23632 + }, + { + "epoch": 0.6939045158259439, + "grad_norm": 0.0, + "learning_rate": 4.524611791175981e-06, + "loss": 1.1523, + "step": 23633 + }, + { + "epoch": 0.693933877503083, + "grad_norm": 0.0, + "learning_rate": 4.523816071054946e-06, + "loss": 1.2852, + "step": 23634 + }, + { + "epoch": 0.693963239180222, + "grad_norm": 0.0, + "learning_rate": 4.5230204004564415e-06, + "loss": 1.2314, + "step": 23635 + }, + { + "epoch": 0.6939926008573609, + "grad_norm": 0.0, + "learning_rate": 4.522224779387668e-06, + "loss": 1.2441, + "step": 23636 + }, + { + "epoch": 0.6940219625345, + "grad_norm": 0.0, + "learning_rate": 4.521429207855816e-06, + "loss": 1.332, + "step": 23637 + }, + { + "epoch": 0.694051324211639, + "grad_norm": 0.0, + "learning_rate": 4.520633685868087e-06, + "loss": 1.3574, + "step": 23638 + }, + { + "epoch": 0.6940806858887779, + "grad_norm": 0.0, + "learning_rate": 4.519838213431667e-06, + "loss": 1.2588, + "step": 23639 + }, + { + "epoch": 0.694110047565917, + "grad_norm": 0.0, + "learning_rate": 4.519042790553757e-06, + "loss": 1.2197, + "step": 23640 + }, + { + "epoch": 0.694139409243056, + "grad_norm": 0.0, + "learning_rate": 4.518247417241547e-06, + "loss": 1.0996, + "step": 23641 + }, + { + "epoch": 0.6941687709201949, + "grad_norm": 0.0, + "learning_rate": 4.517452093502229e-06, + "loss": 1.1987, + "step": 23642 + }, + { + "epoch": 0.694198132597334, + "grad_norm": 0.0, + "learning_rate": 4.516656819342993e-06, + "loss": 1.3369, + "step": 23643 + }, + { + "epoch": 0.694227494274473, + "grad_norm": 0.0, + "learning_rate": 4.515861594771036e-06, + "loss": 1.2305, + "step": 23644 + }, + { + "epoch": 0.6942568559516119, + "grad_norm": 0.0, + "learning_rate": 4.5150664197935475e-06, + "loss": 1.3301, + "step": 23645 + }, + { + "epoch": 0.694286217628751, + "grad_norm": 0.0, + "learning_rate": 4.514271294417715e-06, + "loss": 1.3115, + "step": 23646 + }, + { + "epoch": 0.6943155793058899, + "grad_norm": 0.0, + "learning_rate": 4.513476218650736e-06, + "loss": 1.2671, + "step": 23647 + }, + { + "epoch": 0.6943449409830289, + "grad_norm": 0.0, + "learning_rate": 4.5126811924997956e-06, + "loss": 1.1045, + "step": 23648 + }, + { + "epoch": 0.694374302660168, + "grad_norm": 0.0, + "learning_rate": 4.511886215972086e-06, + "loss": 1.2627, + "step": 23649 + }, + { + "epoch": 0.6944036643373069, + "grad_norm": 0.0, + "learning_rate": 4.5110912890747896e-06, + "loss": 1.2373, + "step": 23650 + }, + { + "epoch": 0.6944330260144459, + "grad_norm": 0.0, + "learning_rate": 4.5102964118151045e-06, + "loss": 1.2734, + "step": 23651 + }, + { + "epoch": 0.694462387691585, + "grad_norm": 0.0, + "learning_rate": 4.509501584200212e-06, + "loss": 1.3643, + "step": 23652 + }, + { + "epoch": 0.6944917493687239, + "grad_norm": 0.0, + "learning_rate": 4.508706806237305e-06, + "loss": 1.3071, + "step": 23653 + }, + { + "epoch": 0.6945211110458629, + "grad_norm": 0.0, + "learning_rate": 4.50791207793357e-06, + "loss": 1.1641, + "step": 23654 + }, + { + "epoch": 0.694550472723002, + "grad_norm": 0.0, + "learning_rate": 4.507117399296189e-06, + "loss": 1.3252, + "step": 23655 + }, + { + "epoch": 0.6945798344001409, + "grad_norm": 0.0, + "learning_rate": 4.506322770332355e-06, + "loss": 1.2075, + "step": 23656 + }, + { + "epoch": 0.6946091960772799, + "grad_norm": 0.0, + "learning_rate": 4.505528191049252e-06, + "loss": 1.1709, + "step": 23657 + }, + { + "epoch": 0.694638557754419, + "grad_norm": 0.0, + "learning_rate": 4.504733661454064e-06, + "loss": 1.2754, + "step": 23658 + }, + { + "epoch": 0.6946679194315579, + "grad_norm": 0.0, + "learning_rate": 4.503939181553974e-06, + "loss": 1.207, + "step": 23659 + }, + { + "epoch": 0.6946972811086969, + "grad_norm": 0.0, + "learning_rate": 4.503144751356172e-06, + "loss": 1.3809, + "step": 23660 + }, + { + "epoch": 0.694726642785836, + "grad_norm": 0.0, + "learning_rate": 4.5023503708678365e-06, + "loss": 1.207, + "step": 23661 + }, + { + "epoch": 0.6947560044629749, + "grad_norm": 0.0, + "learning_rate": 4.501556040096159e-06, + "loss": 1.3662, + "step": 23662 + }, + { + "epoch": 0.6947853661401139, + "grad_norm": 0.0, + "learning_rate": 4.500761759048317e-06, + "loss": 1.2061, + "step": 23663 + }, + { + "epoch": 0.694814727817253, + "grad_norm": 0.0, + "learning_rate": 4.499967527731492e-06, + "loss": 1.2534, + "step": 23664 + }, + { + "epoch": 0.6948440894943919, + "grad_norm": 0.0, + "learning_rate": 4.499173346152873e-06, + "loss": 1.1602, + "step": 23665 + }, + { + "epoch": 0.6948734511715309, + "grad_norm": 0.0, + "learning_rate": 4.498379214319638e-06, + "loss": 1.2637, + "step": 23666 + }, + { + "epoch": 0.69490281284867, + "grad_norm": 0.0, + "learning_rate": 4.49758513223897e-06, + "loss": 1.2197, + "step": 23667 + }, + { + "epoch": 0.6949321745258089, + "grad_norm": 0.0, + "learning_rate": 4.496791099918043e-06, + "loss": 1.3428, + "step": 23668 + }, + { + "epoch": 0.6949615362029479, + "grad_norm": 0.0, + "learning_rate": 4.495997117364048e-06, + "loss": 1.2515, + "step": 23669 + }, + { + "epoch": 0.6949908978800869, + "grad_norm": 0.0, + "learning_rate": 4.495203184584158e-06, + "loss": 1.2642, + "step": 23670 + }, + { + "epoch": 0.6950202595572259, + "grad_norm": 0.0, + "learning_rate": 4.494409301585558e-06, + "loss": 1.3135, + "step": 23671 + }, + { + "epoch": 0.6950496212343649, + "grad_norm": 0.0, + "learning_rate": 4.493615468375426e-06, + "loss": 1.3013, + "step": 23672 + }, + { + "epoch": 0.6950789829115039, + "grad_norm": 0.0, + "learning_rate": 4.492821684960938e-06, + "loss": 1.2622, + "step": 23673 + }, + { + "epoch": 0.6951083445886429, + "grad_norm": 0.0, + "learning_rate": 4.492027951349274e-06, + "loss": 1.291, + "step": 23674 + }, + { + "epoch": 0.6951377062657819, + "grad_norm": 0.0, + "learning_rate": 4.491234267547609e-06, + "loss": 1.124, + "step": 23675 + }, + { + "epoch": 0.6951670679429209, + "grad_norm": 0.0, + "learning_rate": 4.490440633563126e-06, + "loss": 1.207, + "step": 23676 + }, + { + "epoch": 0.6951964296200599, + "grad_norm": 0.0, + "learning_rate": 4.489647049402996e-06, + "loss": 1.3574, + "step": 23677 + }, + { + "epoch": 0.6952257912971989, + "grad_norm": 0.0, + "learning_rate": 4.488853515074403e-06, + "loss": 1.272, + "step": 23678 + }, + { + "epoch": 0.6952551529743379, + "grad_norm": 0.0, + "learning_rate": 4.488060030584516e-06, + "loss": 1.2744, + "step": 23679 + }, + { + "epoch": 0.6952845146514769, + "grad_norm": 0.0, + "learning_rate": 4.487266595940516e-06, + "loss": 1.1196, + "step": 23680 + }, + { + "epoch": 0.6953138763286159, + "grad_norm": 0.0, + "learning_rate": 4.486473211149577e-06, + "loss": 1.3057, + "step": 23681 + }, + { + "epoch": 0.6953432380057549, + "grad_norm": 0.0, + "learning_rate": 4.485679876218873e-06, + "loss": 1.1763, + "step": 23682 + }, + { + "epoch": 0.6953725996828939, + "grad_norm": 0.0, + "learning_rate": 4.484886591155577e-06, + "loss": 1.1191, + "step": 23683 + }, + { + "epoch": 0.6954019613600328, + "grad_norm": 0.0, + "learning_rate": 4.484093355966861e-06, + "loss": 1.2217, + "step": 23684 + }, + { + "epoch": 0.6954313230371719, + "grad_norm": 0.0, + "learning_rate": 4.483300170659904e-06, + "loss": 1.2969, + "step": 23685 + }, + { + "epoch": 0.6954606847143109, + "grad_norm": 0.0, + "learning_rate": 4.4825070352418746e-06, + "loss": 1.0859, + "step": 23686 + }, + { + "epoch": 0.6954900463914498, + "grad_norm": 0.0, + "learning_rate": 4.48171394971995e-06, + "loss": 1.2266, + "step": 23687 + }, + { + "epoch": 0.6955194080685889, + "grad_norm": 0.0, + "learning_rate": 4.480920914101296e-06, + "loss": 1.1865, + "step": 23688 + }, + { + "epoch": 0.6955487697457279, + "grad_norm": 0.0, + "learning_rate": 4.480127928393091e-06, + "loss": 1.1885, + "step": 23689 + }, + { + "epoch": 0.6955781314228668, + "grad_norm": 0.0, + "learning_rate": 4.479334992602503e-06, + "loss": 1.2568, + "step": 23690 + }, + { + "epoch": 0.6956074931000059, + "grad_norm": 0.0, + "learning_rate": 4.478542106736702e-06, + "loss": 1.1763, + "step": 23691 + }, + { + "epoch": 0.6956368547771449, + "grad_norm": 0.0, + "learning_rate": 4.477749270802856e-06, + "loss": 1.1465, + "step": 23692 + }, + { + "epoch": 0.6956662164542838, + "grad_norm": 0.0, + "learning_rate": 4.47695648480814e-06, + "loss": 1.3301, + "step": 23693 + }, + { + "epoch": 0.6956955781314229, + "grad_norm": 0.0, + "learning_rate": 4.476163748759722e-06, + "loss": 1.2852, + "step": 23694 + }, + { + "epoch": 0.6957249398085619, + "grad_norm": 0.0, + "learning_rate": 4.4753710626647665e-06, + "loss": 1.3096, + "step": 23695 + }, + { + "epoch": 0.6957543014857008, + "grad_norm": 0.0, + "learning_rate": 4.474578426530448e-06, + "loss": 1.2227, + "step": 23696 + }, + { + "epoch": 0.6957836631628399, + "grad_norm": 0.0, + "learning_rate": 4.4737858403639325e-06, + "loss": 1.3086, + "step": 23697 + }, + { + "epoch": 0.6958130248399789, + "grad_norm": 0.0, + "learning_rate": 4.472993304172387e-06, + "loss": 1.1479, + "step": 23698 + }, + { + "epoch": 0.6958423865171178, + "grad_norm": 0.0, + "learning_rate": 4.472200817962975e-06, + "loss": 1.2271, + "step": 23699 + }, + { + "epoch": 0.6958717481942569, + "grad_norm": 0.0, + "learning_rate": 4.47140838174287e-06, + "loss": 1.2617, + "step": 23700 + }, + { + "epoch": 0.6959011098713959, + "grad_norm": 0.0, + "learning_rate": 4.470615995519231e-06, + "loss": 1.3613, + "step": 23701 + }, + { + "epoch": 0.6959304715485348, + "grad_norm": 0.0, + "learning_rate": 4.469823659299232e-06, + "loss": 1.105, + "step": 23702 + }, + { + "epoch": 0.6959598332256739, + "grad_norm": 0.0, + "learning_rate": 4.469031373090033e-06, + "loss": 1.2593, + "step": 23703 + }, + { + "epoch": 0.6959891949028129, + "grad_norm": 0.0, + "learning_rate": 4.468239136898796e-06, + "loss": 1.2686, + "step": 23704 + }, + { + "epoch": 0.6960185565799518, + "grad_norm": 0.0, + "learning_rate": 4.4674469507326946e-06, + "loss": 1.2192, + "step": 23705 + }, + { + "epoch": 0.6960479182570909, + "grad_norm": 0.0, + "learning_rate": 4.466654814598887e-06, + "loss": 1.1699, + "step": 23706 + }, + { + "epoch": 0.6960772799342299, + "grad_norm": 0.0, + "learning_rate": 4.465862728504535e-06, + "loss": 1.2812, + "step": 23707 + }, + { + "epoch": 0.6961066416113688, + "grad_norm": 0.0, + "learning_rate": 4.465070692456801e-06, + "loss": 1.1274, + "step": 23708 + }, + { + "epoch": 0.6961360032885079, + "grad_norm": 0.0, + "learning_rate": 4.464278706462855e-06, + "loss": 1.2822, + "step": 23709 + }, + { + "epoch": 0.6961653649656468, + "grad_norm": 0.0, + "learning_rate": 4.463486770529849e-06, + "loss": 1.207, + "step": 23710 + }, + { + "epoch": 0.6961947266427858, + "grad_norm": 0.0, + "learning_rate": 4.462694884664956e-06, + "loss": 1.189, + "step": 23711 + }, + { + "epoch": 0.6962240883199249, + "grad_norm": 0.0, + "learning_rate": 4.461903048875326e-06, + "loss": 1.3789, + "step": 23712 + }, + { + "epoch": 0.6962534499970638, + "grad_norm": 0.0, + "learning_rate": 4.461111263168129e-06, + "loss": 1.3174, + "step": 23713 + }, + { + "epoch": 0.6962828116742028, + "grad_norm": 0.0, + "learning_rate": 4.46031952755052e-06, + "loss": 1.2158, + "step": 23714 + }, + { + "epoch": 0.6963121733513419, + "grad_norm": 0.0, + "learning_rate": 4.4595278420296625e-06, + "loss": 1.2349, + "step": 23715 + }, + { + "epoch": 0.6963415350284808, + "grad_norm": 0.0, + "learning_rate": 4.458736206612713e-06, + "loss": 1.2969, + "step": 23716 + }, + { + "epoch": 0.6963708967056198, + "grad_norm": 0.0, + "learning_rate": 4.457944621306827e-06, + "loss": 1.3037, + "step": 23717 + }, + { + "epoch": 0.6964002583827589, + "grad_norm": 0.0, + "learning_rate": 4.4571530861191705e-06, + "loss": 1.2715, + "step": 23718 + }, + { + "epoch": 0.6964296200598978, + "grad_norm": 0.0, + "learning_rate": 4.456361601056895e-06, + "loss": 1.3486, + "step": 23719 + }, + { + "epoch": 0.6964589817370368, + "grad_norm": 0.0, + "learning_rate": 4.455570166127166e-06, + "loss": 1.3672, + "step": 23720 + }, + { + "epoch": 0.6964883434141759, + "grad_norm": 0.0, + "learning_rate": 4.45477878133713e-06, + "loss": 1.1514, + "step": 23721 + }, + { + "epoch": 0.6965177050913148, + "grad_norm": 0.0, + "learning_rate": 4.453987446693959e-06, + "loss": 1.1641, + "step": 23722 + }, + { + "epoch": 0.6965470667684538, + "grad_norm": 0.0, + "learning_rate": 4.453196162204795e-06, + "loss": 1.1987, + "step": 23723 + }, + { + "epoch": 0.6965764284455929, + "grad_norm": 0.0, + "learning_rate": 4.452404927876796e-06, + "loss": 1.2607, + "step": 23724 + }, + { + "epoch": 0.6966057901227318, + "grad_norm": 0.0, + "learning_rate": 4.451613743717123e-06, + "loss": 1.189, + "step": 23725 + }, + { + "epoch": 0.6966351517998708, + "grad_norm": 0.0, + "learning_rate": 4.450822609732924e-06, + "loss": 1.2656, + "step": 23726 + }, + { + "epoch": 0.6966645134770099, + "grad_norm": 0.0, + "learning_rate": 4.450031525931361e-06, + "loss": 1.2949, + "step": 23727 + }, + { + "epoch": 0.6966938751541488, + "grad_norm": 0.0, + "learning_rate": 4.449240492319581e-06, + "loss": 1.3262, + "step": 23728 + }, + { + "epoch": 0.6967232368312878, + "grad_norm": 0.0, + "learning_rate": 4.448449508904744e-06, + "loss": 1.2598, + "step": 23729 + }, + { + "epoch": 0.6967525985084267, + "grad_norm": 0.0, + "learning_rate": 4.447658575694e-06, + "loss": 1.2812, + "step": 23730 + }, + { + "epoch": 0.6967819601855658, + "grad_norm": 0.0, + "learning_rate": 4.446867692694502e-06, + "loss": 1.3477, + "step": 23731 + }, + { + "epoch": 0.6968113218627048, + "grad_norm": 0.0, + "learning_rate": 4.446076859913402e-06, + "loss": 1.4941, + "step": 23732 + }, + { + "epoch": 0.6968406835398437, + "grad_norm": 0.0, + "learning_rate": 4.445286077357847e-06, + "loss": 1.1475, + "step": 23733 + }, + { + "epoch": 0.6968700452169828, + "grad_norm": 0.0, + "learning_rate": 4.444495345034996e-06, + "loss": 1.1953, + "step": 23734 + }, + { + "epoch": 0.6968994068941218, + "grad_norm": 0.0, + "learning_rate": 4.443704662951993e-06, + "loss": 1.2065, + "step": 23735 + }, + { + "epoch": 0.6969287685712607, + "grad_norm": 0.0, + "learning_rate": 4.442914031115996e-06, + "loss": 1.1523, + "step": 23736 + }, + { + "epoch": 0.6969581302483998, + "grad_norm": 0.0, + "learning_rate": 4.442123449534146e-06, + "loss": 1.2129, + "step": 23737 + }, + { + "epoch": 0.6969874919255388, + "grad_norm": 0.0, + "learning_rate": 4.4413329182136e-06, + "loss": 1.2676, + "step": 23738 + }, + { + "epoch": 0.6970168536026777, + "grad_norm": 0.0, + "learning_rate": 4.440542437161506e-06, + "loss": 1.2344, + "step": 23739 + }, + { + "epoch": 0.6970462152798168, + "grad_norm": 0.0, + "learning_rate": 4.439752006385009e-06, + "loss": 1.2461, + "step": 23740 + }, + { + "epoch": 0.6970755769569558, + "grad_norm": 0.0, + "learning_rate": 4.438961625891256e-06, + "loss": 1.1724, + "step": 23741 + }, + { + "epoch": 0.6971049386340947, + "grad_norm": 0.0, + "learning_rate": 4.4381712956874e-06, + "loss": 1.1909, + "step": 23742 + }, + { + "epoch": 0.6971343003112338, + "grad_norm": 0.0, + "learning_rate": 4.437381015780585e-06, + "loss": 1.1616, + "step": 23743 + }, + { + "epoch": 0.6971636619883728, + "grad_norm": 0.0, + "learning_rate": 4.436590786177956e-06, + "loss": 1.2646, + "step": 23744 + }, + { + "epoch": 0.6971930236655117, + "grad_norm": 0.0, + "learning_rate": 4.435800606886665e-06, + "loss": 1.0825, + "step": 23745 + }, + { + "epoch": 0.6972223853426508, + "grad_norm": 0.0, + "learning_rate": 4.435010477913848e-06, + "loss": 1.3262, + "step": 23746 + }, + { + "epoch": 0.6972517470197898, + "grad_norm": 0.0, + "learning_rate": 4.434220399266665e-06, + "loss": 1.2549, + "step": 23747 + }, + { + "epoch": 0.6972811086969287, + "grad_norm": 0.0, + "learning_rate": 4.433430370952247e-06, + "loss": 1.0835, + "step": 23748 + }, + { + "epoch": 0.6973104703740678, + "grad_norm": 0.0, + "learning_rate": 4.432640392977745e-06, + "loss": 1.3057, + "step": 23749 + }, + { + "epoch": 0.6973398320512068, + "grad_norm": 0.0, + "learning_rate": 4.431850465350299e-06, + "loss": 1.2754, + "step": 23750 + }, + { + "epoch": 0.6973691937283457, + "grad_norm": 0.0, + "learning_rate": 4.431060588077059e-06, + "loss": 1.1226, + "step": 23751 + }, + { + "epoch": 0.6973985554054848, + "grad_norm": 0.0, + "learning_rate": 4.430270761165164e-06, + "loss": 1.3037, + "step": 23752 + }, + { + "epoch": 0.6974279170826237, + "grad_norm": 0.0, + "learning_rate": 4.429480984621754e-06, + "loss": 1.2529, + "step": 23753 + }, + { + "epoch": 0.6974572787597627, + "grad_norm": 0.0, + "learning_rate": 4.428691258453977e-06, + "loss": 1.2949, + "step": 23754 + }, + { + "epoch": 0.6974866404369018, + "grad_norm": 0.0, + "learning_rate": 4.427901582668972e-06, + "loss": 1.3184, + "step": 23755 + }, + { + "epoch": 0.6975160021140407, + "grad_norm": 0.0, + "learning_rate": 4.427111957273881e-06, + "loss": 1.1343, + "step": 23756 + }, + { + "epoch": 0.6975453637911797, + "grad_norm": 0.0, + "learning_rate": 4.426322382275838e-06, + "loss": 1.0898, + "step": 23757 + }, + { + "epoch": 0.6975747254683188, + "grad_norm": 0.0, + "learning_rate": 4.425532857681995e-06, + "loss": 1.2305, + "step": 23758 + }, + { + "epoch": 0.6976040871454577, + "grad_norm": 0.0, + "learning_rate": 4.4247433834994815e-06, + "loss": 1.1348, + "step": 23759 + }, + { + "epoch": 0.6976334488225967, + "grad_norm": 0.0, + "learning_rate": 4.423953959735444e-06, + "loss": 1.166, + "step": 23760 + }, + { + "epoch": 0.6976628104997358, + "grad_norm": 0.0, + "learning_rate": 4.4231645863970165e-06, + "loss": 1.2051, + "step": 23761 + }, + { + "epoch": 0.6976921721768747, + "grad_norm": 0.0, + "learning_rate": 4.4223752634913434e-06, + "loss": 1.2646, + "step": 23762 + }, + { + "epoch": 0.6977215338540137, + "grad_norm": 0.0, + "learning_rate": 4.421585991025559e-06, + "loss": 1.3564, + "step": 23763 + }, + { + "epoch": 0.6977508955311528, + "grad_norm": 0.0, + "learning_rate": 4.420796769006801e-06, + "loss": 1.2842, + "step": 23764 + }, + { + "epoch": 0.6977802572082917, + "grad_norm": 0.0, + "learning_rate": 4.420007597442207e-06, + "loss": 1.3184, + "step": 23765 + }, + { + "epoch": 0.6978096188854307, + "grad_norm": 0.0, + "learning_rate": 4.419218476338909e-06, + "loss": 1.3223, + "step": 23766 + }, + { + "epoch": 0.6978389805625698, + "grad_norm": 0.0, + "learning_rate": 4.418429405704052e-06, + "loss": 1.2095, + "step": 23767 + }, + { + "epoch": 0.6978683422397087, + "grad_norm": 0.0, + "learning_rate": 4.4176403855447634e-06, + "loss": 1.2432, + "step": 23768 + }, + { + "epoch": 0.6978977039168477, + "grad_norm": 0.0, + "learning_rate": 4.416851415868186e-06, + "loss": 1.2666, + "step": 23769 + }, + { + "epoch": 0.6979270655939868, + "grad_norm": 0.0, + "learning_rate": 4.4160624966814484e-06, + "loss": 1.1245, + "step": 23770 + }, + { + "epoch": 0.6979564272711257, + "grad_norm": 0.0, + "learning_rate": 4.415273627991691e-06, + "loss": 1.3027, + "step": 23771 + }, + { + "epoch": 0.6979857889482647, + "grad_norm": 0.0, + "learning_rate": 4.414484809806047e-06, + "loss": 1.2256, + "step": 23772 + }, + { + "epoch": 0.6980151506254038, + "grad_norm": 0.0, + "learning_rate": 4.413696042131641e-06, + "loss": 1.0947, + "step": 23773 + }, + { + "epoch": 0.6980445123025427, + "grad_norm": 0.0, + "learning_rate": 4.412907324975616e-06, + "loss": 1.2715, + "step": 23774 + }, + { + "epoch": 0.6980738739796817, + "grad_norm": 0.0, + "learning_rate": 4.412118658345097e-06, + "loss": 1.1299, + "step": 23775 + }, + { + "epoch": 0.6981032356568208, + "grad_norm": 0.0, + "learning_rate": 4.4113300422472234e-06, + "loss": 1.2461, + "step": 23776 + }, + { + "epoch": 0.6981325973339597, + "grad_norm": 0.0, + "learning_rate": 4.410541476689119e-06, + "loss": 1.2383, + "step": 23777 + }, + { + "epoch": 0.6981619590110987, + "grad_norm": 0.0, + "learning_rate": 4.409752961677926e-06, + "loss": 1.2441, + "step": 23778 + }, + { + "epoch": 0.6981913206882377, + "grad_norm": 0.0, + "learning_rate": 4.408964497220767e-06, + "loss": 1.2754, + "step": 23779 + }, + { + "epoch": 0.6982206823653767, + "grad_norm": 0.0, + "learning_rate": 4.408176083324773e-06, + "loss": 1.1216, + "step": 23780 + }, + { + "epoch": 0.6982500440425157, + "grad_norm": 0.0, + "learning_rate": 4.4073877199970725e-06, + "loss": 1.2559, + "step": 23781 + }, + { + "epoch": 0.6982794057196547, + "grad_norm": 0.0, + "learning_rate": 4.4065994072448005e-06, + "loss": 1.2871, + "step": 23782 + }, + { + "epoch": 0.6983087673967937, + "grad_norm": 0.0, + "learning_rate": 4.405811145075083e-06, + "loss": 1.2705, + "step": 23783 + }, + { + "epoch": 0.6983381290739327, + "grad_norm": 0.0, + "learning_rate": 4.4050229334950436e-06, + "loss": 1.291, + "step": 23784 + }, + { + "epoch": 0.6983674907510717, + "grad_norm": 0.0, + "learning_rate": 4.4042347725118184e-06, + "loss": 1.103, + "step": 23785 + }, + { + "epoch": 0.6983968524282107, + "grad_norm": 0.0, + "learning_rate": 4.403446662132528e-06, + "loss": 1.1489, + "step": 23786 + }, + { + "epoch": 0.6984262141053497, + "grad_norm": 0.0, + "learning_rate": 4.402658602364306e-06, + "loss": 1.0605, + "step": 23787 + }, + { + "epoch": 0.6984555757824887, + "grad_norm": 0.0, + "learning_rate": 4.401870593214276e-06, + "loss": 1.2607, + "step": 23788 + }, + { + "epoch": 0.6984849374596277, + "grad_norm": 0.0, + "learning_rate": 4.401082634689563e-06, + "loss": 1.3062, + "step": 23789 + }, + { + "epoch": 0.6985142991367667, + "grad_norm": 0.0, + "learning_rate": 4.400294726797291e-06, + "loss": 1.335, + "step": 23790 + }, + { + "epoch": 0.6985436608139057, + "grad_norm": 0.0, + "learning_rate": 4.399506869544592e-06, + "loss": 1.2705, + "step": 23791 + }, + { + "epoch": 0.6985730224910447, + "grad_norm": 0.0, + "learning_rate": 4.398719062938586e-06, + "loss": 1.3408, + "step": 23792 + }, + { + "epoch": 0.6986023841681837, + "grad_norm": 0.0, + "learning_rate": 4.397931306986394e-06, + "loss": 1.1113, + "step": 23793 + }, + { + "epoch": 0.6986317458453227, + "grad_norm": 0.0, + "learning_rate": 4.397143601695147e-06, + "loss": 1.3135, + "step": 23794 + }, + { + "epoch": 0.6986611075224617, + "grad_norm": 0.0, + "learning_rate": 4.396355947071963e-06, + "loss": 1.3262, + "step": 23795 + }, + { + "epoch": 0.6986904691996007, + "grad_norm": 0.0, + "learning_rate": 4.395568343123972e-06, + "loss": 1.2568, + "step": 23796 + }, + { + "epoch": 0.6987198308767397, + "grad_norm": 0.0, + "learning_rate": 4.3947807898582895e-06, + "loss": 1.1968, + "step": 23797 + }, + { + "epoch": 0.6987491925538787, + "grad_norm": 0.0, + "learning_rate": 4.3939932872820405e-06, + "loss": 1.1992, + "step": 23798 + }, + { + "epoch": 0.6987785542310176, + "grad_norm": 0.0, + "learning_rate": 4.393205835402342e-06, + "loss": 1.2754, + "step": 23799 + }, + { + "epoch": 0.6988079159081567, + "grad_norm": 0.0, + "learning_rate": 4.392418434226324e-06, + "loss": 1.1655, + "step": 23800 + }, + { + "epoch": 0.6988372775852957, + "grad_norm": 0.0, + "learning_rate": 4.391631083761101e-06, + "loss": 1.2378, + "step": 23801 + }, + { + "epoch": 0.6988666392624346, + "grad_norm": 0.0, + "learning_rate": 4.390843784013792e-06, + "loss": 1.1836, + "step": 23802 + }, + { + "epoch": 0.6988960009395737, + "grad_norm": 0.0, + "learning_rate": 4.390056534991521e-06, + "loss": 1.2568, + "step": 23803 + }, + { + "epoch": 0.6989253626167127, + "grad_norm": 0.0, + "learning_rate": 4.389269336701407e-06, + "loss": 1.1587, + "step": 23804 + }, + { + "epoch": 0.6989547242938516, + "grad_norm": 0.0, + "learning_rate": 4.388482189150567e-06, + "loss": 1.3115, + "step": 23805 + }, + { + "epoch": 0.6989840859709907, + "grad_norm": 0.0, + "learning_rate": 4.387695092346116e-06, + "loss": 1.2036, + "step": 23806 + }, + { + "epoch": 0.6990134476481297, + "grad_norm": 0.0, + "learning_rate": 4.386908046295178e-06, + "loss": 1.1631, + "step": 23807 + }, + { + "epoch": 0.6990428093252686, + "grad_norm": 0.0, + "learning_rate": 4.386121051004866e-06, + "loss": 1.2275, + "step": 23808 + }, + { + "epoch": 0.6990721710024077, + "grad_norm": 0.0, + "learning_rate": 4.3853341064823014e-06, + "loss": 1.2075, + "step": 23809 + }, + { + "epoch": 0.6991015326795467, + "grad_norm": 0.0, + "learning_rate": 4.384547212734597e-06, + "loss": 1.1777, + "step": 23810 + }, + { + "epoch": 0.6991308943566856, + "grad_norm": 0.0, + "learning_rate": 4.383760369768872e-06, + "loss": 1.4248, + "step": 23811 + }, + { + "epoch": 0.6991602560338247, + "grad_norm": 0.0, + "learning_rate": 4.382973577592241e-06, + "loss": 1.3555, + "step": 23812 + }, + { + "epoch": 0.6991896177109637, + "grad_norm": 0.0, + "learning_rate": 4.382186836211818e-06, + "loss": 1.2354, + "step": 23813 + }, + { + "epoch": 0.6992189793881026, + "grad_norm": 0.0, + "learning_rate": 4.381400145634719e-06, + "loss": 1.3115, + "step": 23814 + }, + { + "epoch": 0.6992483410652417, + "grad_norm": 0.0, + "learning_rate": 4.380613505868053e-06, + "loss": 1.2012, + "step": 23815 + }, + { + "epoch": 0.6992777027423807, + "grad_norm": 0.0, + "learning_rate": 4.3798269169189424e-06, + "loss": 1.1807, + "step": 23816 + }, + { + "epoch": 0.6993070644195196, + "grad_norm": 0.0, + "learning_rate": 4.379040378794492e-06, + "loss": 1.3535, + "step": 23817 + }, + { + "epoch": 0.6993364260966587, + "grad_norm": 0.0, + "learning_rate": 4.378253891501824e-06, + "loss": 1.3203, + "step": 23818 + }, + { + "epoch": 0.6993657877737977, + "grad_norm": 0.0, + "learning_rate": 4.377467455048041e-06, + "loss": 1.2354, + "step": 23819 + }, + { + "epoch": 0.6993951494509366, + "grad_norm": 0.0, + "learning_rate": 4.376681069440264e-06, + "loss": 1.3135, + "step": 23820 + }, + { + "epoch": 0.6994245111280757, + "grad_norm": 0.0, + "learning_rate": 4.3758947346856e-06, + "loss": 1.1436, + "step": 23821 + }, + { + "epoch": 0.6994538728052146, + "grad_norm": 0.0, + "learning_rate": 4.37510845079116e-06, + "loss": 1.1523, + "step": 23822 + }, + { + "epoch": 0.6994832344823536, + "grad_norm": 0.0, + "learning_rate": 4.3743222177640544e-06, + "loss": 1.2715, + "step": 23823 + }, + { + "epoch": 0.6995125961594927, + "grad_norm": 0.0, + "learning_rate": 4.373536035611391e-06, + "loss": 1.2178, + "step": 23824 + }, + { + "epoch": 0.6995419578366316, + "grad_norm": 0.0, + "learning_rate": 4.372749904340286e-06, + "loss": 1.2949, + "step": 23825 + }, + { + "epoch": 0.6995713195137706, + "grad_norm": 0.0, + "learning_rate": 4.37196382395784e-06, + "loss": 1.2505, + "step": 23826 + }, + { + "epoch": 0.6996006811909097, + "grad_norm": 0.0, + "learning_rate": 4.371177794471171e-06, + "loss": 1.2397, + "step": 23827 + }, + { + "epoch": 0.6996300428680486, + "grad_norm": 0.0, + "learning_rate": 4.370391815887382e-06, + "loss": 1.084, + "step": 23828 + }, + { + "epoch": 0.6996594045451876, + "grad_norm": 0.0, + "learning_rate": 4.3696058882135826e-06, + "loss": 1.1479, + "step": 23829 + }, + { + "epoch": 0.6996887662223267, + "grad_norm": 0.0, + "learning_rate": 4.3688200114568754e-06, + "loss": 1.2822, + "step": 23830 + }, + { + "epoch": 0.6997181278994656, + "grad_norm": 0.0, + "learning_rate": 4.368034185624374e-06, + "loss": 1.2656, + "step": 23831 + }, + { + "epoch": 0.6997474895766046, + "grad_norm": 0.0, + "learning_rate": 4.367248410723183e-06, + "loss": 1.2373, + "step": 23832 + }, + { + "epoch": 0.6997768512537436, + "grad_norm": 0.0, + "learning_rate": 4.366462686760402e-06, + "loss": 1.1279, + "step": 23833 + }, + { + "epoch": 0.6998062129308826, + "grad_norm": 0.0, + "learning_rate": 4.365677013743146e-06, + "loss": 1.2563, + "step": 23834 + }, + { + "epoch": 0.6998355746080216, + "grad_norm": 0.0, + "learning_rate": 4.364891391678513e-06, + "loss": 1.3467, + "step": 23835 + }, + { + "epoch": 0.6998649362851606, + "grad_norm": 0.0, + "learning_rate": 4.364105820573612e-06, + "loss": 1.2012, + "step": 23836 + }, + { + "epoch": 0.6998942979622996, + "grad_norm": 0.0, + "learning_rate": 4.3633203004355475e-06, + "loss": 1.2686, + "step": 23837 + }, + { + "epoch": 0.6999236596394386, + "grad_norm": 0.0, + "learning_rate": 4.36253483127142e-06, + "loss": 1.2036, + "step": 23838 + }, + { + "epoch": 0.6999530213165776, + "grad_norm": 0.0, + "learning_rate": 4.36174941308833e-06, + "loss": 1.2603, + "step": 23839 + }, + { + "epoch": 0.6999823829937166, + "grad_norm": 0.0, + "learning_rate": 4.360964045893388e-06, + "loss": 1.251, + "step": 23840 + }, + { + "epoch": 0.7000117446708556, + "grad_norm": 0.0, + "learning_rate": 4.3601787296936925e-06, + "loss": 1.1201, + "step": 23841 + }, + { + "epoch": 0.7000411063479945, + "grad_norm": 0.0, + "learning_rate": 4.359393464496341e-06, + "loss": 1.2241, + "step": 23842 + }, + { + "epoch": 0.7000704680251336, + "grad_norm": 0.0, + "learning_rate": 4.358608250308443e-06, + "loss": 1.207, + "step": 23843 + }, + { + "epoch": 0.7000998297022726, + "grad_norm": 0.0, + "learning_rate": 4.357823087137091e-06, + "loss": 1.1982, + "step": 23844 + }, + { + "epoch": 0.7001291913794115, + "grad_norm": 0.0, + "learning_rate": 4.3570379749893945e-06, + "loss": 1.248, + "step": 23845 + }, + { + "epoch": 0.7001585530565506, + "grad_norm": 0.0, + "learning_rate": 4.356252913872449e-06, + "loss": 1.1992, + "step": 23846 + }, + { + "epoch": 0.7001879147336896, + "grad_norm": 0.0, + "learning_rate": 4.3554679037933524e-06, + "loss": 1.2646, + "step": 23847 + }, + { + "epoch": 0.7002172764108285, + "grad_norm": 0.0, + "learning_rate": 4.354682944759202e-06, + "loss": 1.2871, + "step": 23848 + }, + { + "epoch": 0.7002466380879676, + "grad_norm": 0.0, + "learning_rate": 4.353898036777104e-06, + "loss": 1.0786, + "step": 23849 + }, + { + "epoch": 0.7002759997651066, + "grad_norm": 0.0, + "learning_rate": 4.35311317985415e-06, + "loss": 1.292, + "step": 23850 + }, + { + "epoch": 0.7003053614422455, + "grad_norm": 0.0, + "learning_rate": 4.352328373997439e-06, + "loss": 1.3496, + "step": 23851 + }, + { + "epoch": 0.7003347231193846, + "grad_norm": 0.0, + "learning_rate": 4.35154361921407e-06, + "loss": 1.0879, + "step": 23852 + }, + { + "epoch": 0.7003640847965236, + "grad_norm": 0.0, + "learning_rate": 4.3507589155111396e-06, + "loss": 1.3467, + "step": 23853 + }, + { + "epoch": 0.7003934464736625, + "grad_norm": 0.0, + "learning_rate": 4.349974262895743e-06, + "loss": 1.1885, + "step": 23854 + }, + { + "epoch": 0.7004228081508016, + "grad_norm": 0.0, + "learning_rate": 4.3491896613749725e-06, + "loss": 1.3057, + "step": 23855 + }, + { + "epoch": 0.7004521698279406, + "grad_norm": 0.0, + "learning_rate": 4.348405110955931e-06, + "loss": 1.1821, + "step": 23856 + }, + { + "epoch": 0.7004815315050795, + "grad_norm": 0.0, + "learning_rate": 4.347620611645707e-06, + "loss": 1.1675, + "step": 23857 + }, + { + "epoch": 0.7005108931822186, + "grad_norm": 0.0, + "learning_rate": 4.346836163451399e-06, + "loss": 1.1172, + "step": 23858 + }, + { + "epoch": 0.7005402548593576, + "grad_norm": 0.0, + "learning_rate": 4.346051766380097e-06, + "loss": 1.231, + "step": 23859 + }, + { + "epoch": 0.7005696165364965, + "grad_norm": 0.0, + "learning_rate": 4.3452674204389e-06, + "loss": 1.2686, + "step": 23860 + }, + { + "epoch": 0.7005989782136356, + "grad_norm": 0.0, + "learning_rate": 4.344483125634897e-06, + "loss": 1.2378, + "step": 23861 + }, + { + "epoch": 0.7006283398907746, + "grad_norm": 0.0, + "learning_rate": 4.343698881975183e-06, + "loss": 1.2217, + "step": 23862 + }, + { + "epoch": 0.7006577015679135, + "grad_norm": 0.0, + "learning_rate": 4.342914689466849e-06, + "loss": 1.2598, + "step": 23863 + }, + { + "epoch": 0.7006870632450526, + "grad_norm": 0.0, + "learning_rate": 4.342130548116981e-06, + "loss": 1.1943, + "step": 23864 + }, + { + "epoch": 0.7007164249221915, + "grad_norm": 0.0, + "learning_rate": 4.3413464579326795e-06, + "loss": 1.0132, + "step": 23865 + }, + { + "epoch": 0.7007457865993305, + "grad_norm": 0.0, + "learning_rate": 4.340562418921028e-06, + "loss": 1.229, + "step": 23866 + }, + { + "epoch": 0.7007751482764696, + "grad_norm": 0.0, + "learning_rate": 4.339778431089124e-06, + "loss": 1.2173, + "step": 23867 + }, + { + "epoch": 0.7008045099536085, + "grad_norm": 0.0, + "learning_rate": 4.3389944944440486e-06, + "loss": 1.2334, + "step": 23868 + }, + { + "epoch": 0.7008338716307475, + "grad_norm": 0.0, + "learning_rate": 4.3382106089929e-06, + "loss": 1.2095, + "step": 23869 + }, + { + "epoch": 0.7008632333078866, + "grad_norm": 0.0, + "learning_rate": 4.3374267747427624e-06, + "loss": 1.2495, + "step": 23870 + }, + { + "epoch": 0.7008925949850255, + "grad_norm": 0.0, + "learning_rate": 4.336642991700725e-06, + "loss": 1.2617, + "step": 23871 + }, + { + "epoch": 0.7009219566621645, + "grad_norm": 0.0, + "learning_rate": 4.3358592598738746e-06, + "loss": 1.2412, + "step": 23872 + }, + { + "epoch": 0.7009513183393036, + "grad_norm": 0.0, + "learning_rate": 4.335075579269298e-06, + "loss": 1.1294, + "step": 23873 + }, + { + "epoch": 0.7009806800164425, + "grad_norm": 0.0, + "learning_rate": 4.334291949894085e-06, + "loss": 1.3545, + "step": 23874 + }, + { + "epoch": 0.7010100416935815, + "grad_norm": 0.0, + "learning_rate": 4.333508371755318e-06, + "loss": 1.228, + "step": 23875 + }, + { + "epoch": 0.7010394033707206, + "grad_norm": 0.0, + "learning_rate": 4.332724844860091e-06, + "loss": 1.2002, + "step": 23876 + }, + { + "epoch": 0.7010687650478595, + "grad_norm": 0.0, + "learning_rate": 4.331941369215483e-06, + "loss": 1.3633, + "step": 23877 + }, + { + "epoch": 0.7010981267249985, + "grad_norm": 0.0, + "learning_rate": 4.331157944828582e-06, + "loss": 1.1489, + "step": 23878 + }, + { + "epoch": 0.7011274884021376, + "grad_norm": 0.0, + "learning_rate": 4.330374571706467e-06, + "loss": 1.1582, + "step": 23879 + }, + { + "epoch": 0.7011568500792765, + "grad_norm": 0.0, + "learning_rate": 4.329591249856231e-06, + "loss": 1.2744, + "step": 23880 + }, + { + "epoch": 0.7011862117564155, + "grad_norm": 0.0, + "learning_rate": 4.328807979284954e-06, + "loss": 1.1494, + "step": 23881 + }, + { + "epoch": 0.7012155734335546, + "grad_norm": 0.0, + "learning_rate": 4.328024759999714e-06, + "loss": 1.2285, + "step": 23882 + }, + { + "epoch": 0.7012449351106935, + "grad_norm": 0.0, + "learning_rate": 4.327241592007603e-06, + "loss": 1.2012, + "step": 23883 + }, + { + "epoch": 0.7012742967878325, + "grad_norm": 0.0, + "learning_rate": 4.3264584753156945e-06, + "loss": 1.106, + "step": 23884 + }, + { + "epoch": 0.7013036584649716, + "grad_norm": 0.0, + "learning_rate": 4.32567540993108e-06, + "loss": 1.3369, + "step": 23885 + }, + { + "epoch": 0.7013330201421105, + "grad_norm": 0.0, + "learning_rate": 4.324892395860835e-06, + "loss": 1.2188, + "step": 23886 + }, + { + "epoch": 0.7013623818192495, + "grad_norm": 0.0, + "learning_rate": 4.324109433112043e-06, + "loss": 1.2002, + "step": 23887 + }, + { + "epoch": 0.7013917434963886, + "grad_norm": 0.0, + "learning_rate": 4.3233265216917775e-06, + "loss": 1.3389, + "step": 23888 + }, + { + "epoch": 0.7014211051735275, + "grad_norm": 0.0, + "learning_rate": 4.322543661607128e-06, + "loss": 1.1943, + "step": 23889 + }, + { + "epoch": 0.7014504668506665, + "grad_norm": 0.0, + "learning_rate": 4.321760852865171e-06, + "loss": 1.3071, + "step": 23890 + }, + { + "epoch": 0.7014798285278055, + "grad_norm": 0.0, + "learning_rate": 4.320978095472981e-06, + "loss": 1.2891, + "step": 23891 + }, + { + "epoch": 0.7015091902049445, + "grad_norm": 0.0, + "learning_rate": 4.320195389437644e-06, + "loss": 1.2676, + "step": 23892 + }, + { + "epoch": 0.7015385518820835, + "grad_norm": 0.0, + "learning_rate": 4.319412734766231e-06, + "loss": 1.2539, + "step": 23893 + }, + { + "epoch": 0.7015679135592225, + "grad_norm": 0.0, + "learning_rate": 4.318630131465827e-06, + "loss": 1.3086, + "step": 23894 + }, + { + "epoch": 0.7015972752363615, + "grad_norm": 0.0, + "learning_rate": 4.317847579543506e-06, + "loss": 1.2314, + "step": 23895 + }, + { + "epoch": 0.7016266369135005, + "grad_norm": 0.0, + "learning_rate": 4.317065079006345e-06, + "loss": 1.2686, + "step": 23896 + }, + { + "epoch": 0.7016559985906395, + "grad_norm": 0.0, + "learning_rate": 4.316282629861416e-06, + "loss": 1.2036, + "step": 23897 + }, + { + "epoch": 0.7016853602677785, + "grad_norm": 0.0, + "learning_rate": 4.3155002321158025e-06, + "loss": 1.3486, + "step": 23898 + }, + { + "epoch": 0.7017147219449175, + "grad_norm": 0.0, + "learning_rate": 4.314717885776572e-06, + "loss": 1.2153, + "step": 23899 + }, + { + "epoch": 0.7017440836220565, + "grad_norm": 0.0, + "learning_rate": 4.313935590850807e-06, + "loss": 1.27, + "step": 23900 + }, + { + "epoch": 0.7017734452991955, + "grad_norm": 0.0, + "learning_rate": 4.31315334734558e-06, + "loss": 1.1611, + "step": 23901 + }, + { + "epoch": 0.7018028069763345, + "grad_norm": 0.0, + "learning_rate": 4.312371155267962e-06, + "loss": 1.2275, + "step": 23902 + }, + { + "epoch": 0.7018321686534735, + "grad_norm": 0.0, + "learning_rate": 4.3115890146250295e-06, + "loss": 1.167, + "step": 23903 + }, + { + "epoch": 0.7018615303306125, + "grad_norm": 0.0, + "learning_rate": 4.31080692542385e-06, + "loss": 1.2471, + "step": 23904 + }, + { + "epoch": 0.7018908920077515, + "grad_norm": 0.0, + "learning_rate": 4.310024887671505e-06, + "loss": 1.1318, + "step": 23905 + }, + { + "epoch": 0.7019202536848905, + "grad_norm": 0.0, + "learning_rate": 4.309242901375059e-06, + "loss": 1.3164, + "step": 23906 + }, + { + "epoch": 0.7019496153620295, + "grad_norm": 0.0, + "learning_rate": 4.308460966541589e-06, + "loss": 1.2441, + "step": 23907 + }, + { + "epoch": 0.7019789770391685, + "grad_norm": 0.0, + "learning_rate": 4.307679083178161e-06, + "loss": 1.2539, + "step": 23908 + }, + { + "epoch": 0.7020083387163075, + "grad_norm": 0.0, + "learning_rate": 4.3068972512918525e-06, + "loss": 1.2549, + "step": 23909 + }, + { + "epoch": 0.7020377003934465, + "grad_norm": 0.0, + "learning_rate": 4.306115470889732e-06, + "loss": 1.2534, + "step": 23910 + }, + { + "epoch": 0.7020670620705854, + "grad_norm": 0.0, + "learning_rate": 4.305333741978865e-06, + "loss": 1.1875, + "step": 23911 + }, + { + "epoch": 0.7020964237477245, + "grad_norm": 0.0, + "learning_rate": 4.3045520645663245e-06, + "loss": 1.1289, + "step": 23912 + }, + { + "epoch": 0.7021257854248635, + "grad_norm": 0.0, + "learning_rate": 4.303770438659174e-06, + "loss": 1.1714, + "step": 23913 + }, + { + "epoch": 0.7021551471020024, + "grad_norm": 0.0, + "learning_rate": 4.30298886426449e-06, + "loss": 1.2261, + "step": 23914 + }, + { + "epoch": 0.7021845087791415, + "grad_norm": 0.0, + "learning_rate": 4.302207341389334e-06, + "loss": 1.2144, + "step": 23915 + }, + { + "epoch": 0.7022138704562805, + "grad_norm": 0.0, + "learning_rate": 4.301425870040779e-06, + "loss": 1.2119, + "step": 23916 + }, + { + "epoch": 0.7022432321334194, + "grad_norm": 0.0, + "learning_rate": 4.300644450225886e-06, + "loss": 1.208, + "step": 23917 + }, + { + "epoch": 0.7022725938105585, + "grad_norm": 0.0, + "learning_rate": 4.299863081951728e-06, + "loss": 1.2734, + "step": 23918 + }, + { + "epoch": 0.7023019554876975, + "grad_norm": 0.0, + "learning_rate": 4.299081765225369e-06, + "loss": 1.166, + "step": 23919 + }, + { + "epoch": 0.7023313171648364, + "grad_norm": 0.0, + "learning_rate": 4.298300500053873e-06, + "loss": 1.3447, + "step": 23920 + }, + { + "epoch": 0.7023606788419755, + "grad_norm": 0.0, + "learning_rate": 4.297519286444305e-06, + "loss": 1.2344, + "step": 23921 + }, + { + "epoch": 0.7023900405191145, + "grad_norm": 0.0, + "learning_rate": 4.296738124403728e-06, + "loss": 1.167, + "step": 23922 + }, + { + "epoch": 0.7024194021962534, + "grad_norm": 0.0, + "learning_rate": 4.295957013939213e-06, + "loss": 1.252, + "step": 23923 + }, + { + "epoch": 0.7024487638733925, + "grad_norm": 0.0, + "learning_rate": 4.295175955057814e-06, + "loss": 1.1216, + "step": 23924 + }, + { + "epoch": 0.7024781255505315, + "grad_norm": 0.0, + "learning_rate": 4.294394947766605e-06, + "loss": 1.3145, + "step": 23925 + }, + { + "epoch": 0.7025074872276704, + "grad_norm": 0.0, + "learning_rate": 4.29361399207264e-06, + "loss": 1.1978, + "step": 23926 + }, + { + "epoch": 0.7025368489048095, + "grad_norm": 0.0, + "learning_rate": 4.292833087982992e-06, + "loss": 1.1929, + "step": 23927 + }, + { + "epoch": 0.7025662105819485, + "grad_norm": 0.0, + "learning_rate": 4.2920522355047076e-06, + "loss": 1.2881, + "step": 23928 + }, + { + "epoch": 0.7025955722590874, + "grad_norm": 0.0, + "learning_rate": 4.291271434644862e-06, + "loss": 1.3223, + "step": 23929 + }, + { + "epoch": 0.7026249339362265, + "grad_norm": 0.0, + "learning_rate": 4.29049068541051e-06, + "loss": 1.2881, + "step": 23930 + }, + { + "epoch": 0.7026542956133655, + "grad_norm": 0.0, + "learning_rate": 4.28970998780871e-06, + "loss": 1.3145, + "step": 23931 + }, + { + "epoch": 0.7026836572905044, + "grad_norm": 0.0, + "learning_rate": 4.288929341846528e-06, + "loss": 1.2432, + "step": 23932 + }, + { + "epoch": 0.7027130189676434, + "grad_norm": 0.0, + "learning_rate": 4.288148747531017e-06, + "loss": 1.25, + "step": 23933 + }, + { + "epoch": 0.7027423806447824, + "grad_norm": 0.0, + "learning_rate": 4.287368204869243e-06, + "loss": 1.2422, + "step": 23934 + }, + { + "epoch": 0.7027717423219214, + "grad_norm": 0.0, + "learning_rate": 4.2865877138682625e-06, + "loss": 1.2754, + "step": 23935 + }, + { + "epoch": 0.7028011039990604, + "grad_norm": 0.0, + "learning_rate": 4.285807274535132e-06, + "loss": 1.2827, + "step": 23936 + }, + { + "epoch": 0.7028304656761994, + "grad_norm": 0.0, + "learning_rate": 4.285026886876906e-06, + "loss": 1.2109, + "step": 23937 + }, + { + "epoch": 0.7028598273533384, + "grad_norm": 0.0, + "learning_rate": 4.284246550900649e-06, + "loss": 1.1792, + "step": 23938 + }, + { + "epoch": 0.7028891890304774, + "grad_norm": 0.0, + "learning_rate": 4.2834662666134145e-06, + "loss": 1.1685, + "step": 23939 + }, + { + "epoch": 0.7029185507076164, + "grad_norm": 0.0, + "learning_rate": 4.282686034022255e-06, + "loss": 1.0889, + "step": 23940 + }, + { + "epoch": 0.7029479123847554, + "grad_norm": 0.0, + "learning_rate": 4.281905853134233e-06, + "loss": 1.2095, + "step": 23941 + }, + { + "epoch": 0.7029772740618944, + "grad_norm": 0.0, + "learning_rate": 4.281125723956397e-06, + "loss": 1.2549, + "step": 23942 + }, + { + "epoch": 0.7030066357390334, + "grad_norm": 0.0, + "learning_rate": 4.28034564649581e-06, + "loss": 1.2148, + "step": 23943 + }, + { + "epoch": 0.7030359974161724, + "grad_norm": 0.0, + "learning_rate": 4.2795656207595225e-06, + "loss": 1.2646, + "step": 23944 + }, + { + "epoch": 0.7030653590933114, + "grad_norm": 0.0, + "learning_rate": 4.278785646754587e-06, + "loss": 1.3271, + "step": 23945 + }, + { + "epoch": 0.7030947207704504, + "grad_norm": 0.0, + "learning_rate": 4.278005724488056e-06, + "loss": 1.0449, + "step": 23946 + }, + { + "epoch": 0.7031240824475894, + "grad_norm": 0.0, + "learning_rate": 4.277225853966987e-06, + "loss": 1.3086, + "step": 23947 + }, + { + "epoch": 0.7031534441247284, + "grad_norm": 0.0, + "learning_rate": 4.276446035198427e-06, + "loss": 1.2871, + "step": 23948 + }, + { + "epoch": 0.7031828058018674, + "grad_norm": 0.0, + "learning_rate": 4.275666268189436e-06, + "loss": 1.1992, + "step": 23949 + }, + { + "epoch": 0.7032121674790064, + "grad_norm": 0.0, + "learning_rate": 4.27488655294706e-06, + "loss": 1.0918, + "step": 23950 + }, + { + "epoch": 0.7032415291561454, + "grad_norm": 0.0, + "learning_rate": 4.274106889478347e-06, + "loss": 1.2197, + "step": 23951 + }, + { + "epoch": 0.7032708908332844, + "grad_norm": 0.0, + "learning_rate": 4.273327277790359e-06, + "loss": 1.2988, + "step": 23952 + }, + { + "epoch": 0.7033002525104234, + "grad_norm": 0.0, + "learning_rate": 4.272547717890133e-06, + "loss": 1.1436, + "step": 23953 + }, + { + "epoch": 0.7033296141875623, + "grad_norm": 0.0, + "learning_rate": 4.271768209784728e-06, + "loss": 1.2383, + "step": 23954 + }, + { + "epoch": 0.7033589758647014, + "grad_norm": 0.0, + "learning_rate": 4.2709887534811855e-06, + "loss": 1.1826, + "step": 23955 + }, + { + "epoch": 0.7033883375418404, + "grad_norm": 0.0, + "learning_rate": 4.270209348986564e-06, + "loss": 1.2363, + "step": 23956 + }, + { + "epoch": 0.7034176992189793, + "grad_norm": 0.0, + "learning_rate": 4.2694299963079025e-06, + "loss": 1.2686, + "step": 23957 + }, + { + "epoch": 0.7034470608961184, + "grad_norm": 0.0, + "learning_rate": 4.268650695452255e-06, + "loss": 1.2803, + "step": 23958 + }, + { + "epoch": 0.7034764225732574, + "grad_norm": 0.0, + "learning_rate": 4.26787144642667e-06, + "loss": 1.2271, + "step": 23959 + }, + { + "epoch": 0.7035057842503963, + "grad_norm": 0.0, + "learning_rate": 4.26709224923819e-06, + "loss": 1.2871, + "step": 23960 + }, + { + "epoch": 0.7035351459275354, + "grad_norm": 0.0, + "learning_rate": 4.266313103893864e-06, + "loss": 1.313, + "step": 23961 + }, + { + "epoch": 0.7035645076046744, + "grad_norm": 0.0, + "learning_rate": 4.2655340104007325e-06, + "loss": 1.2998, + "step": 23962 + }, + { + "epoch": 0.7035938692818133, + "grad_norm": 0.0, + "learning_rate": 4.2647549687658494e-06, + "loss": 1.2158, + "step": 23963 + }, + { + "epoch": 0.7036232309589524, + "grad_norm": 0.0, + "learning_rate": 4.263975978996252e-06, + "loss": 1.3623, + "step": 23964 + }, + { + "epoch": 0.7036525926360914, + "grad_norm": 0.0, + "learning_rate": 4.2631970410989934e-06, + "loss": 1.3188, + "step": 23965 + }, + { + "epoch": 0.7036819543132303, + "grad_norm": 0.0, + "learning_rate": 4.2624181550811084e-06, + "loss": 1.2241, + "step": 23966 + }, + { + "epoch": 0.7037113159903694, + "grad_norm": 0.0, + "learning_rate": 4.26163932094965e-06, + "loss": 1.2842, + "step": 23967 + }, + { + "epoch": 0.7037406776675084, + "grad_norm": 0.0, + "learning_rate": 4.260860538711657e-06, + "loss": 1.1055, + "step": 23968 + }, + { + "epoch": 0.7037700393446473, + "grad_norm": 0.0, + "learning_rate": 4.26008180837417e-06, + "loss": 1.3242, + "step": 23969 + }, + { + "epoch": 0.7037994010217864, + "grad_norm": 0.0, + "learning_rate": 4.259303129944236e-06, + "loss": 1.1748, + "step": 23970 + }, + { + "epoch": 0.7038287626989254, + "grad_norm": 0.0, + "learning_rate": 4.258524503428888e-06, + "loss": 1.1372, + "step": 23971 + }, + { + "epoch": 0.7038581243760643, + "grad_norm": 0.0, + "learning_rate": 4.257745928835178e-06, + "loss": 1.4175, + "step": 23972 + }, + { + "epoch": 0.7038874860532034, + "grad_norm": 0.0, + "learning_rate": 4.256967406170138e-06, + "loss": 1.1631, + "step": 23973 + }, + { + "epoch": 0.7039168477303424, + "grad_norm": 0.0, + "learning_rate": 4.256188935440817e-06, + "loss": 1.2822, + "step": 23974 + }, + { + "epoch": 0.7039462094074813, + "grad_norm": 0.0, + "learning_rate": 4.255410516654247e-06, + "loss": 1.1943, + "step": 23975 + }, + { + "epoch": 0.7039755710846204, + "grad_norm": 0.0, + "learning_rate": 4.2546321498174736e-06, + "loss": 1.1426, + "step": 23976 + }, + { + "epoch": 0.7040049327617593, + "grad_norm": 0.0, + "learning_rate": 4.253853834937533e-06, + "loss": 1.0942, + "step": 23977 + }, + { + "epoch": 0.7040342944388983, + "grad_norm": 0.0, + "learning_rate": 4.253075572021465e-06, + "loss": 1.1689, + "step": 23978 + }, + { + "epoch": 0.7040636561160374, + "grad_norm": 0.0, + "learning_rate": 4.252297361076307e-06, + "loss": 1.165, + "step": 23979 + }, + { + "epoch": 0.7040930177931763, + "grad_norm": 0.0, + "learning_rate": 4.25151920210909e-06, + "loss": 1.2021, + "step": 23980 + }, + { + "epoch": 0.7041223794703153, + "grad_norm": 0.0, + "learning_rate": 4.250741095126862e-06, + "loss": 1.2188, + "step": 23981 + }, + { + "epoch": 0.7041517411474544, + "grad_norm": 0.0, + "learning_rate": 4.249963040136651e-06, + "loss": 1.25, + "step": 23982 + }, + { + "epoch": 0.7041811028245933, + "grad_norm": 0.0, + "learning_rate": 4.2491850371455005e-06, + "loss": 1.2002, + "step": 23983 + }, + { + "epoch": 0.7042104645017323, + "grad_norm": 0.0, + "learning_rate": 4.248407086160442e-06, + "loss": 1.3413, + "step": 23984 + }, + { + "epoch": 0.7042398261788714, + "grad_norm": 0.0, + "learning_rate": 4.247629187188512e-06, + "loss": 1.1689, + "step": 23985 + }, + { + "epoch": 0.7042691878560103, + "grad_norm": 0.0, + "learning_rate": 4.246851340236741e-06, + "loss": 1.3091, + "step": 23986 + }, + { + "epoch": 0.7042985495331493, + "grad_norm": 0.0, + "learning_rate": 4.24607354531217e-06, + "loss": 1.332, + "step": 23987 + }, + { + "epoch": 0.7043279112102884, + "grad_norm": 0.0, + "learning_rate": 4.245295802421828e-06, + "loss": 1.2119, + "step": 23988 + }, + { + "epoch": 0.7043572728874273, + "grad_norm": 0.0, + "learning_rate": 4.2445181115727475e-06, + "loss": 1.2314, + "step": 23989 + }, + { + "epoch": 0.7043866345645663, + "grad_norm": 0.0, + "learning_rate": 4.243740472771968e-06, + "loss": 1.2031, + "step": 23990 + }, + { + "epoch": 0.7044159962417054, + "grad_norm": 0.0, + "learning_rate": 4.242962886026512e-06, + "loss": 1.2217, + "step": 23991 + }, + { + "epoch": 0.7044453579188443, + "grad_norm": 0.0, + "learning_rate": 4.242185351343423e-06, + "loss": 1.1875, + "step": 23992 + }, + { + "epoch": 0.7044747195959833, + "grad_norm": 0.0, + "learning_rate": 4.2414078687297256e-06, + "loss": 1.3555, + "step": 23993 + }, + { + "epoch": 0.7045040812731224, + "grad_norm": 0.0, + "learning_rate": 4.240630438192451e-06, + "loss": 1.2158, + "step": 23994 + }, + { + "epoch": 0.7045334429502613, + "grad_norm": 0.0, + "learning_rate": 4.239853059738628e-06, + "loss": 1.2324, + "step": 23995 + }, + { + "epoch": 0.7045628046274003, + "grad_norm": 0.0, + "learning_rate": 4.239075733375292e-06, + "loss": 1.335, + "step": 23996 + }, + { + "epoch": 0.7045921663045394, + "grad_norm": 0.0, + "learning_rate": 4.238298459109466e-06, + "loss": 1.2617, + "step": 23997 + }, + { + "epoch": 0.7046215279816783, + "grad_norm": 0.0, + "learning_rate": 4.237521236948186e-06, + "loss": 1.293, + "step": 23998 + }, + { + "epoch": 0.7046508896588173, + "grad_norm": 0.0, + "learning_rate": 4.236744066898476e-06, + "loss": 1.3135, + "step": 23999 + }, + { + "epoch": 0.7046802513359564, + "grad_norm": 0.0, + "learning_rate": 4.235966948967364e-06, + "loss": 1.2246, + "step": 24000 + }, + { + "epoch": 0.7047096130130953, + "grad_norm": 0.0, + "learning_rate": 4.235189883161881e-06, + "loss": 1.2197, + "step": 24001 + }, + { + "epoch": 0.7047389746902343, + "grad_norm": 0.0, + "learning_rate": 4.234412869489053e-06, + "loss": 1.2568, + "step": 24002 + }, + { + "epoch": 0.7047683363673733, + "grad_norm": 0.0, + "learning_rate": 4.233635907955907e-06, + "loss": 1.2295, + "step": 24003 + }, + { + "epoch": 0.7047976980445123, + "grad_norm": 0.0, + "learning_rate": 4.232858998569462e-06, + "loss": 1.2412, + "step": 24004 + }, + { + "epoch": 0.7048270597216513, + "grad_norm": 0.0, + "learning_rate": 4.232082141336757e-06, + "loss": 1.2197, + "step": 24005 + }, + { + "epoch": 0.7048564213987903, + "grad_norm": 0.0, + "learning_rate": 4.231305336264804e-06, + "loss": 1.415, + "step": 24006 + }, + { + "epoch": 0.7048857830759293, + "grad_norm": 0.0, + "learning_rate": 4.230528583360639e-06, + "loss": 1.3945, + "step": 24007 + }, + { + "epoch": 0.7049151447530683, + "grad_norm": 0.0, + "learning_rate": 4.229751882631281e-06, + "loss": 1.2227, + "step": 24008 + }, + { + "epoch": 0.7049445064302073, + "grad_norm": 0.0, + "learning_rate": 4.228975234083756e-06, + "loss": 1.2192, + "step": 24009 + }, + { + "epoch": 0.7049738681073463, + "grad_norm": 0.0, + "learning_rate": 4.228198637725085e-06, + "loss": 1.3105, + "step": 24010 + }, + { + "epoch": 0.7050032297844853, + "grad_norm": 0.0, + "learning_rate": 4.2274220935622875e-06, + "loss": 1.2803, + "step": 24011 + }, + { + "epoch": 0.7050325914616243, + "grad_norm": 0.0, + "learning_rate": 4.226645601602395e-06, + "loss": 1.1675, + "step": 24012 + }, + { + "epoch": 0.7050619531387633, + "grad_norm": 0.0, + "learning_rate": 4.225869161852422e-06, + "loss": 1.2329, + "step": 24013 + }, + { + "epoch": 0.7050913148159023, + "grad_norm": 0.0, + "learning_rate": 4.225092774319395e-06, + "loss": 1.335, + "step": 24014 + }, + { + "epoch": 0.7051206764930413, + "grad_norm": 0.0, + "learning_rate": 4.22431643901033e-06, + "loss": 1.1885, + "step": 24015 + }, + { + "epoch": 0.7051500381701803, + "grad_norm": 0.0, + "learning_rate": 4.223540155932254e-06, + "loss": 1.2725, + "step": 24016 + }, + { + "epoch": 0.7051793998473193, + "grad_norm": 0.0, + "learning_rate": 4.222763925092183e-06, + "loss": 1.1504, + "step": 24017 + }, + { + "epoch": 0.7052087615244583, + "grad_norm": 0.0, + "learning_rate": 4.2219877464971395e-06, + "loss": 1.3672, + "step": 24018 + }, + { + "epoch": 0.7052381232015973, + "grad_norm": 0.0, + "learning_rate": 4.221211620154139e-06, + "loss": 1.3115, + "step": 24019 + }, + { + "epoch": 0.7052674848787363, + "grad_norm": 0.0, + "learning_rate": 4.2204355460702e-06, + "loss": 1.1968, + "step": 24020 + }, + { + "epoch": 0.7052968465558753, + "grad_norm": 0.0, + "learning_rate": 4.219659524252344e-06, + "loss": 1.3184, + "step": 24021 + }, + { + "epoch": 0.7053262082330143, + "grad_norm": 0.0, + "learning_rate": 4.218883554707585e-06, + "loss": 1.1543, + "step": 24022 + }, + { + "epoch": 0.7053555699101532, + "grad_norm": 0.0, + "learning_rate": 4.218107637442946e-06, + "loss": 1.0186, + "step": 24023 + }, + { + "epoch": 0.7053849315872923, + "grad_norm": 0.0, + "learning_rate": 4.217331772465435e-06, + "loss": 1.1899, + "step": 24024 + }, + { + "epoch": 0.7054142932644313, + "grad_norm": 0.0, + "learning_rate": 4.2165559597820795e-06, + "loss": 1.2339, + "step": 24025 + }, + { + "epoch": 0.7054436549415702, + "grad_norm": 0.0, + "learning_rate": 4.2157801993998896e-06, + "loss": 1.208, + "step": 24026 + }, + { + "epoch": 0.7054730166187093, + "grad_norm": 0.0, + "learning_rate": 4.21500449132588e-06, + "loss": 1.2046, + "step": 24027 + }, + { + "epoch": 0.7055023782958483, + "grad_norm": 0.0, + "learning_rate": 4.214228835567066e-06, + "loss": 1.1914, + "step": 24028 + }, + { + "epoch": 0.7055317399729872, + "grad_norm": 0.0, + "learning_rate": 4.2134532321304585e-06, + "loss": 1.2812, + "step": 24029 + }, + { + "epoch": 0.7055611016501263, + "grad_norm": 0.0, + "learning_rate": 4.212677681023079e-06, + "loss": 1.2822, + "step": 24030 + }, + { + "epoch": 0.7055904633272653, + "grad_norm": 0.0, + "learning_rate": 4.2119021822519355e-06, + "loss": 1.2524, + "step": 24031 + }, + { + "epoch": 0.7056198250044042, + "grad_norm": 0.0, + "learning_rate": 4.211126735824045e-06, + "loss": 1.2715, + "step": 24032 + }, + { + "epoch": 0.7056491866815432, + "grad_norm": 0.0, + "learning_rate": 4.210351341746419e-06, + "loss": 1.2197, + "step": 24033 + }, + { + "epoch": 0.7056785483586823, + "grad_norm": 0.0, + "learning_rate": 4.209576000026068e-06, + "loss": 1.1484, + "step": 24034 + }, + { + "epoch": 0.7057079100358212, + "grad_norm": 0.0, + "learning_rate": 4.20880071067e-06, + "loss": 1.2627, + "step": 24035 + }, + { + "epoch": 0.7057372717129602, + "grad_norm": 0.0, + "learning_rate": 4.208025473685234e-06, + "loss": 1.3955, + "step": 24036 + }, + { + "epoch": 0.7057666333900993, + "grad_norm": 0.0, + "learning_rate": 4.207250289078778e-06, + "loss": 1.2041, + "step": 24037 + }, + { + "epoch": 0.7057959950672382, + "grad_norm": 0.0, + "learning_rate": 4.206475156857638e-06, + "loss": 1.2891, + "step": 24038 + }, + { + "epoch": 0.7058253567443772, + "grad_norm": 0.0, + "learning_rate": 4.205700077028829e-06, + "loss": 1.124, + "step": 24039 + }, + { + "epoch": 0.7058547184215163, + "grad_norm": 0.0, + "learning_rate": 4.204925049599355e-06, + "loss": 1.2314, + "step": 24040 + }, + { + "epoch": 0.7058840800986552, + "grad_norm": 0.0, + "learning_rate": 4.204150074576232e-06, + "loss": 1.231, + "step": 24041 + }, + { + "epoch": 0.7059134417757942, + "grad_norm": 0.0, + "learning_rate": 4.203375151966463e-06, + "loss": 1.2246, + "step": 24042 + }, + { + "epoch": 0.7059428034529333, + "grad_norm": 0.0, + "learning_rate": 4.2026002817770586e-06, + "loss": 1.1646, + "step": 24043 + }, + { + "epoch": 0.7059721651300722, + "grad_norm": 0.0, + "learning_rate": 4.201825464015021e-06, + "loss": 1.2227, + "step": 24044 + }, + { + "epoch": 0.7060015268072112, + "grad_norm": 0.0, + "learning_rate": 4.201050698687363e-06, + "loss": 1.208, + "step": 24045 + }, + { + "epoch": 0.7060308884843502, + "grad_norm": 0.0, + "learning_rate": 4.2002759858010855e-06, + "loss": 1.2646, + "step": 24046 + }, + { + "epoch": 0.7060602501614892, + "grad_norm": 0.0, + "learning_rate": 4.199501325363201e-06, + "loss": 1.1416, + "step": 24047 + }, + { + "epoch": 0.7060896118386282, + "grad_norm": 0.0, + "learning_rate": 4.198726717380712e-06, + "loss": 1.1304, + "step": 24048 + }, + { + "epoch": 0.7061189735157672, + "grad_norm": 0.0, + "learning_rate": 4.197952161860619e-06, + "loss": 1.2549, + "step": 24049 + }, + { + "epoch": 0.7061483351929062, + "grad_norm": 0.0, + "learning_rate": 4.197177658809934e-06, + "loss": 1.2676, + "step": 24050 + }, + { + "epoch": 0.7061776968700452, + "grad_norm": 0.0, + "learning_rate": 4.196403208235659e-06, + "loss": 1.2881, + "step": 24051 + }, + { + "epoch": 0.7062070585471842, + "grad_norm": 0.0, + "learning_rate": 4.1956288101447954e-06, + "loss": 1.165, + "step": 24052 + }, + { + "epoch": 0.7062364202243232, + "grad_norm": 0.0, + "learning_rate": 4.194854464544343e-06, + "loss": 1.2461, + "step": 24053 + }, + { + "epoch": 0.7062657819014622, + "grad_norm": 0.0, + "learning_rate": 4.194080171441311e-06, + "loss": 1.2676, + "step": 24054 + }, + { + "epoch": 0.7062951435786012, + "grad_norm": 0.0, + "learning_rate": 4.193305930842697e-06, + "loss": 1.1313, + "step": 24055 + }, + { + "epoch": 0.7063245052557402, + "grad_norm": 0.0, + "learning_rate": 4.192531742755508e-06, + "loss": 1.1479, + "step": 24056 + }, + { + "epoch": 0.7063538669328792, + "grad_norm": 0.0, + "learning_rate": 4.191757607186741e-06, + "loss": 1.3721, + "step": 24057 + }, + { + "epoch": 0.7063832286100182, + "grad_norm": 0.0, + "learning_rate": 4.190983524143399e-06, + "loss": 1.0283, + "step": 24058 + }, + { + "epoch": 0.7064125902871572, + "grad_norm": 0.0, + "learning_rate": 4.1902094936324775e-06, + "loss": 1.3486, + "step": 24059 + }, + { + "epoch": 0.7064419519642962, + "grad_norm": 0.0, + "learning_rate": 4.189435515660979e-06, + "loss": 1.1558, + "step": 24060 + }, + { + "epoch": 0.7064713136414352, + "grad_norm": 0.0, + "learning_rate": 4.1886615902359055e-06, + "loss": 1.3179, + "step": 24061 + }, + { + "epoch": 0.7065006753185742, + "grad_norm": 0.0, + "learning_rate": 4.18788771736425e-06, + "loss": 1.2314, + "step": 24062 + }, + { + "epoch": 0.7065300369957132, + "grad_norm": 0.0, + "learning_rate": 4.1871138970530175e-06, + "loss": 1.3457, + "step": 24063 + }, + { + "epoch": 0.7065593986728522, + "grad_norm": 0.0, + "learning_rate": 4.186340129309201e-06, + "loss": 1.1436, + "step": 24064 + }, + { + "epoch": 0.7065887603499912, + "grad_norm": 0.0, + "learning_rate": 4.185566414139801e-06, + "loss": 1.2656, + "step": 24065 + }, + { + "epoch": 0.7066181220271301, + "grad_norm": 0.0, + "learning_rate": 4.184792751551814e-06, + "loss": 1.2593, + "step": 24066 + }, + { + "epoch": 0.7066474837042692, + "grad_norm": 0.0, + "learning_rate": 4.184019141552236e-06, + "loss": 1.1509, + "step": 24067 + }, + { + "epoch": 0.7066768453814082, + "grad_norm": 0.0, + "learning_rate": 4.1832455841480616e-06, + "loss": 1.3135, + "step": 24068 + }, + { + "epoch": 0.7067062070585471, + "grad_norm": 0.0, + "learning_rate": 4.182472079346283e-06, + "loss": 1.3901, + "step": 24069 + }, + { + "epoch": 0.7067355687356862, + "grad_norm": 0.0, + "learning_rate": 4.181698627153904e-06, + "loss": 1.1279, + "step": 24070 + }, + { + "epoch": 0.7067649304128252, + "grad_norm": 0.0, + "learning_rate": 4.180925227577911e-06, + "loss": 1.1914, + "step": 24071 + }, + { + "epoch": 0.7067942920899641, + "grad_norm": 0.0, + "learning_rate": 4.180151880625304e-06, + "loss": 1.3076, + "step": 24072 + }, + { + "epoch": 0.7068236537671032, + "grad_norm": 0.0, + "learning_rate": 4.179378586303071e-06, + "loss": 1.3584, + "step": 24073 + }, + { + "epoch": 0.7068530154442422, + "grad_norm": 0.0, + "learning_rate": 4.178605344618212e-06, + "loss": 1.1934, + "step": 24074 + }, + { + "epoch": 0.7068823771213811, + "grad_norm": 0.0, + "learning_rate": 4.1778321555777145e-06, + "loss": 1.1748, + "step": 24075 + }, + { + "epoch": 0.7069117387985202, + "grad_norm": 0.0, + "learning_rate": 4.177059019188573e-06, + "loss": 1.1631, + "step": 24076 + }, + { + "epoch": 0.7069411004756592, + "grad_norm": 0.0, + "learning_rate": 4.176285935457778e-06, + "loss": 1.2842, + "step": 24077 + }, + { + "epoch": 0.7069704621527981, + "grad_norm": 0.0, + "learning_rate": 4.175512904392317e-06, + "loss": 1.3115, + "step": 24078 + }, + { + "epoch": 0.7069998238299372, + "grad_norm": 0.0, + "learning_rate": 4.174739925999188e-06, + "loss": 1.4053, + "step": 24079 + }, + { + "epoch": 0.7070291855070762, + "grad_norm": 0.0, + "learning_rate": 4.173967000285375e-06, + "loss": 1.3018, + "step": 24080 + }, + { + "epoch": 0.7070585471842151, + "grad_norm": 0.0, + "learning_rate": 4.173194127257872e-06, + "loss": 1.1709, + "step": 24081 + }, + { + "epoch": 0.7070879088613542, + "grad_norm": 0.0, + "learning_rate": 4.172421306923668e-06, + "loss": 1.2354, + "step": 24082 + }, + { + "epoch": 0.7071172705384932, + "grad_norm": 0.0, + "learning_rate": 4.171648539289751e-06, + "loss": 1.1802, + "step": 24083 + }, + { + "epoch": 0.7071466322156321, + "grad_norm": 0.0, + "learning_rate": 4.170875824363105e-06, + "loss": 1.1694, + "step": 24084 + }, + { + "epoch": 0.7071759938927712, + "grad_norm": 0.0, + "learning_rate": 4.170103162150726e-06, + "loss": 1.0684, + "step": 24085 + }, + { + "epoch": 0.7072053555699102, + "grad_norm": 0.0, + "learning_rate": 4.169330552659593e-06, + "loss": 1.1196, + "step": 24086 + }, + { + "epoch": 0.7072347172470491, + "grad_norm": 0.0, + "learning_rate": 4.168557995896701e-06, + "loss": 1.23, + "step": 24087 + }, + { + "epoch": 0.7072640789241882, + "grad_norm": 0.0, + "learning_rate": 4.1677854918690305e-06, + "loss": 1.1523, + "step": 24088 + }, + { + "epoch": 0.7072934406013272, + "grad_norm": 0.0, + "learning_rate": 4.167013040583568e-06, + "loss": 1.2305, + "step": 24089 + }, + { + "epoch": 0.7073228022784661, + "grad_norm": 0.0, + "learning_rate": 4.166240642047303e-06, + "loss": 1.376, + "step": 24090 + }, + { + "epoch": 0.7073521639556052, + "grad_norm": 0.0, + "learning_rate": 4.165468296267219e-06, + "loss": 1.1479, + "step": 24091 + }, + { + "epoch": 0.7073815256327441, + "grad_norm": 0.0, + "learning_rate": 4.164696003250299e-06, + "loss": 1.2656, + "step": 24092 + }, + { + "epoch": 0.7074108873098831, + "grad_norm": 0.0, + "learning_rate": 4.163923763003524e-06, + "loss": 1.2461, + "step": 24093 + }, + { + "epoch": 0.7074402489870222, + "grad_norm": 0.0, + "learning_rate": 4.163151575533884e-06, + "loss": 1.1377, + "step": 24094 + }, + { + "epoch": 0.7074696106641611, + "grad_norm": 0.0, + "learning_rate": 4.162379440848355e-06, + "loss": 1.2119, + "step": 24095 + }, + { + "epoch": 0.7074989723413001, + "grad_norm": 0.0, + "learning_rate": 4.161607358953928e-06, + "loss": 1.1958, + "step": 24096 + }, + { + "epoch": 0.7075283340184392, + "grad_norm": 0.0, + "learning_rate": 4.1608353298575825e-06, + "loss": 1.2041, + "step": 24097 + }, + { + "epoch": 0.7075576956955781, + "grad_norm": 0.0, + "learning_rate": 4.160063353566293e-06, + "loss": 1.2471, + "step": 24098 + }, + { + "epoch": 0.7075870573727171, + "grad_norm": 0.0, + "learning_rate": 4.15929143008705e-06, + "loss": 1.3135, + "step": 24099 + }, + { + "epoch": 0.7076164190498562, + "grad_norm": 0.0, + "learning_rate": 4.158519559426831e-06, + "loss": 1.207, + "step": 24100 + }, + { + "epoch": 0.7076457807269951, + "grad_norm": 0.0, + "learning_rate": 4.157747741592616e-06, + "loss": 1.2959, + "step": 24101 + }, + { + "epoch": 0.7076751424041341, + "grad_norm": 0.0, + "learning_rate": 4.15697597659138e-06, + "loss": 1.2266, + "step": 24102 + }, + { + "epoch": 0.7077045040812732, + "grad_norm": 0.0, + "learning_rate": 4.15620426443011e-06, + "loss": 1.3037, + "step": 24103 + }, + { + "epoch": 0.7077338657584121, + "grad_norm": 0.0, + "learning_rate": 4.155432605115779e-06, + "loss": 1.1621, + "step": 24104 + }, + { + "epoch": 0.7077632274355511, + "grad_norm": 0.0, + "learning_rate": 4.154660998655371e-06, + "loss": 1.4746, + "step": 24105 + }, + { + "epoch": 0.7077925891126902, + "grad_norm": 0.0, + "learning_rate": 4.15388944505586e-06, + "loss": 1.1479, + "step": 24106 + }, + { + "epoch": 0.7078219507898291, + "grad_norm": 0.0, + "learning_rate": 4.153117944324224e-06, + "loss": 1.2422, + "step": 24107 + }, + { + "epoch": 0.7078513124669681, + "grad_norm": 0.0, + "learning_rate": 4.152346496467441e-06, + "loss": 1.1382, + "step": 24108 + }, + { + "epoch": 0.7078806741441072, + "grad_norm": 0.0, + "learning_rate": 4.151575101492481e-06, + "loss": 1.125, + "step": 24109 + }, + { + "epoch": 0.7079100358212461, + "grad_norm": 0.0, + "learning_rate": 4.15080375940633e-06, + "loss": 1.2446, + "step": 24110 + }, + { + "epoch": 0.7079393974983851, + "grad_norm": 0.0, + "learning_rate": 4.150032470215955e-06, + "loss": 1.2578, + "step": 24111 + }, + { + "epoch": 0.7079687591755242, + "grad_norm": 0.0, + "learning_rate": 4.149261233928337e-06, + "loss": 1.2246, + "step": 24112 + }, + { + "epoch": 0.7079981208526631, + "grad_norm": 0.0, + "learning_rate": 4.1484900505504455e-06, + "loss": 1.3574, + "step": 24113 + }, + { + "epoch": 0.7080274825298021, + "grad_norm": 0.0, + "learning_rate": 4.14771892008926e-06, + "loss": 1.2188, + "step": 24114 + }, + { + "epoch": 0.7080568442069411, + "grad_norm": 0.0, + "learning_rate": 4.146947842551752e-06, + "loss": 1.248, + "step": 24115 + }, + { + "epoch": 0.7080862058840801, + "grad_norm": 0.0, + "learning_rate": 4.146176817944893e-06, + "loss": 1.2383, + "step": 24116 + }, + { + "epoch": 0.7081155675612191, + "grad_norm": 0.0, + "learning_rate": 4.145405846275656e-06, + "loss": 1.2871, + "step": 24117 + }, + { + "epoch": 0.7081449292383581, + "grad_norm": 0.0, + "learning_rate": 4.1446349275510104e-06, + "loss": 1.2344, + "step": 24118 + }, + { + "epoch": 0.7081742909154971, + "grad_norm": 0.0, + "learning_rate": 4.143864061777934e-06, + "loss": 1.333, + "step": 24119 + }, + { + "epoch": 0.7082036525926361, + "grad_norm": 0.0, + "learning_rate": 4.14309324896339e-06, + "loss": 1.2056, + "step": 24120 + }, + { + "epoch": 0.7082330142697751, + "grad_norm": 0.0, + "learning_rate": 4.142322489114359e-06, + "loss": 1.1567, + "step": 24121 + }, + { + "epoch": 0.7082623759469141, + "grad_norm": 0.0, + "learning_rate": 4.141551782237801e-06, + "loss": 1.2017, + "step": 24122 + }, + { + "epoch": 0.7082917376240531, + "grad_norm": 0.0, + "learning_rate": 4.140781128340695e-06, + "loss": 1.1484, + "step": 24123 + }, + { + "epoch": 0.7083210993011921, + "grad_norm": 0.0, + "learning_rate": 4.140010527430006e-06, + "loss": 1.2441, + "step": 24124 + }, + { + "epoch": 0.7083504609783311, + "grad_norm": 0.0, + "learning_rate": 4.139239979512703e-06, + "loss": 1.1567, + "step": 24125 + }, + { + "epoch": 0.7083798226554701, + "grad_norm": 0.0, + "learning_rate": 4.138469484595753e-06, + "loss": 1.1411, + "step": 24126 + }, + { + "epoch": 0.7084091843326091, + "grad_norm": 0.0, + "learning_rate": 4.137699042686121e-06, + "loss": 1.3252, + "step": 24127 + }, + { + "epoch": 0.7084385460097481, + "grad_norm": 0.0, + "learning_rate": 4.136928653790781e-06, + "loss": 1.2778, + "step": 24128 + }, + { + "epoch": 0.708467907686887, + "grad_norm": 0.0, + "learning_rate": 4.136158317916694e-06, + "loss": 1.2285, + "step": 24129 + }, + { + "epoch": 0.7084972693640261, + "grad_norm": 0.0, + "learning_rate": 4.135388035070833e-06, + "loss": 1.3096, + "step": 24130 + }, + { + "epoch": 0.7085266310411651, + "grad_norm": 0.0, + "learning_rate": 4.134617805260155e-06, + "loss": 1.375, + "step": 24131 + }, + { + "epoch": 0.708555992718304, + "grad_norm": 0.0, + "learning_rate": 4.133847628491639e-06, + "loss": 1.1787, + "step": 24132 + }, + { + "epoch": 0.708585354395443, + "grad_norm": 0.0, + "learning_rate": 4.133077504772232e-06, + "loss": 1.2207, + "step": 24133 + }, + { + "epoch": 0.7086147160725821, + "grad_norm": 0.0, + "learning_rate": 4.132307434108914e-06, + "loss": 1.3184, + "step": 24134 + }, + { + "epoch": 0.708644077749721, + "grad_norm": 0.0, + "learning_rate": 4.131537416508638e-06, + "loss": 1.145, + "step": 24135 + }, + { + "epoch": 0.70867343942686, + "grad_norm": 0.0, + "learning_rate": 4.130767451978375e-06, + "loss": 1.1543, + "step": 24136 + }, + { + "epoch": 0.7087028011039991, + "grad_norm": 0.0, + "learning_rate": 4.1299975405250844e-06, + "loss": 1.2031, + "step": 24137 + }, + { + "epoch": 0.708732162781138, + "grad_norm": 0.0, + "learning_rate": 4.1292276821557275e-06, + "loss": 1.2627, + "step": 24138 + }, + { + "epoch": 0.708761524458277, + "grad_norm": 0.0, + "learning_rate": 4.128457876877271e-06, + "loss": 1.1758, + "step": 24139 + }, + { + "epoch": 0.7087908861354161, + "grad_norm": 0.0, + "learning_rate": 4.127688124696674e-06, + "loss": 1.2217, + "step": 24140 + }, + { + "epoch": 0.708820247812555, + "grad_norm": 0.0, + "learning_rate": 4.126918425620897e-06, + "loss": 1.2705, + "step": 24141 + }, + { + "epoch": 0.708849609489694, + "grad_norm": 0.0, + "learning_rate": 4.126148779656896e-06, + "loss": 1.3125, + "step": 24142 + }, + { + "epoch": 0.7088789711668331, + "grad_norm": 0.0, + "learning_rate": 4.125379186811641e-06, + "loss": 1.3887, + "step": 24143 + }, + { + "epoch": 0.708908332843972, + "grad_norm": 0.0, + "learning_rate": 4.124609647092083e-06, + "loss": 1.334, + "step": 24144 + }, + { + "epoch": 0.708937694521111, + "grad_norm": 0.0, + "learning_rate": 4.1238401605051885e-06, + "loss": 1.0464, + "step": 24145 + }, + { + "epoch": 0.7089670561982501, + "grad_norm": 0.0, + "learning_rate": 4.1230707270579106e-06, + "loss": 1.2939, + "step": 24146 + }, + { + "epoch": 0.708996417875389, + "grad_norm": 0.0, + "learning_rate": 4.122301346757207e-06, + "loss": 1.3325, + "step": 24147 + }, + { + "epoch": 0.709025779552528, + "grad_norm": 0.0, + "learning_rate": 4.12153201961004e-06, + "loss": 1.2275, + "step": 24148 + }, + { + "epoch": 0.7090551412296671, + "grad_norm": 0.0, + "learning_rate": 4.120762745623366e-06, + "loss": 1.1611, + "step": 24149 + }, + { + "epoch": 0.709084502906806, + "grad_norm": 0.0, + "learning_rate": 4.11999352480414e-06, + "loss": 1.2266, + "step": 24150 + }, + { + "epoch": 0.709113864583945, + "grad_norm": 0.0, + "learning_rate": 4.119224357159314e-06, + "loss": 1.249, + "step": 24151 + }, + { + "epoch": 0.7091432262610841, + "grad_norm": 0.0, + "learning_rate": 4.1184552426958525e-06, + "loss": 1.188, + "step": 24152 + }, + { + "epoch": 0.709172587938223, + "grad_norm": 0.0, + "learning_rate": 4.117686181420703e-06, + "loss": 1.209, + "step": 24153 + }, + { + "epoch": 0.709201949615362, + "grad_norm": 0.0, + "learning_rate": 4.1169171733408264e-06, + "loss": 1.1689, + "step": 24154 + }, + { + "epoch": 0.709231311292501, + "grad_norm": 0.0, + "learning_rate": 4.116148218463172e-06, + "loss": 1.0479, + "step": 24155 + }, + { + "epoch": 0.70926067296964, + "grad_norm": 0.0, + "learning_rate": 4.1153793167946985e-06, + "loss": 1.3477, + "step": 24156 + }, + { + "epoch": 0.709290034646779, + "grad_norm": 0.0, + "learning_rate": 4.114610468342361e-06, + "loss": 1.3311, + "step": 24157 + }, + { + "epoch": 0.709319396323918, + "grad_norm": 0.0, + "learning_rate": 4.113841673113101e-06, + "loss": 1.3223, + "step": 24158 + }, + { + "epoch": 0.709348758001057, + "grad_norm": 0.0, + "learning_rate": 4.1130729311138825e-06, + "loss": 1.2158, + "step": 24159 + }, + { + "epoch": 0.709378119678196, + "grad_norm": 0.0, + "learning_rate": 4.11230424235165e-06, + "loss": 1.2695, + "step": 24160 + }, + { + "epoch": 0.709407481355335, + "grad_norm": 0.0, + "learning_rate": 4.111535606833361e-06, + "loss": 1.0376, + "step": 24161 + }, + { + "epoch": 0.709436843032474, + "grad_norm": 0.0, + "learning_rate": 4.110767024565961e-06, + "loss": 1.2871, + "step": 24162 + }, + { + "epoch": 0.709466204709613, + "grad_norm": 0.0, + "learning_rate": 4.109998495556405e-06, + "loss": 1.2549, + "step": 24163 + }, + { + "epoch": 0.709495566386752, + "grad_norm": 0.0, + "learning_rate": 4.109230019811641e-06, + "loss": 1.1318, + "step": 24164 + }, + { + "epoch": 0.709524928063891, + "grad_norm": 0.0, + "learning_rate": 4.108461597338618e-06, + "loss": 1.0898, + "step": 24165 + }, + { + "epoch": 0.70955428974103, + "grad_norm": 0.0, + "learning_rate": 4.107693228144288e-06, + "loss": 1.1377, + "step": 24166 + }, + { + "epoch": 0.709583651418169, + "grad_norm": 0.0, + "learning_rate": 4.106924912235592e-06, + "loss": 1.3545, + "step": 24167 + }, + { + "epoch": 0.709613013095308, + "grad_norm": 0.0, + "learning_rate": 4.106156649619488e-06, + "loss": 1.29, + "step": 24168 + }, + { + "epoch": 0.709642374772447, + "grad_norm": 0.0, + "learning_rate": 4.105388440302914e-06, + "loss": 1.3184, + "step": 24169 + }, + { + "epoch": 0.709671736449586, + "grad_norm": 0.0, + "learning_rate": 4.104620284292826e-06, + "loss": 1.2881, + "step": 24170 + }, + { + "epoch": 0.709701098126725, + "grad_norm": 0.0, + "learning_rate": 4.103852181596164e-06, + "loss": 1.2119, + "step": 24171 + }, + { + "epoch": 0.709730459803864, + "grad_norm": 0.0, + "learning_rate": 4.103084132219879e-06, + "loss": 1.3154, + "step": 24172 + }, + { + "epoch": 0.709759821481003, + "grad_norm": 0.0, + "learning_rate": 4.102316136170915e-06, + "loss": 1.1904, + "step": 24173 + }, + { + "epoch": 0.709789183158142, + "grad_norm": 0.0, + "learning_rate": 4.1015481934562164e-06, + "loss": 1.2212, + "step": 24174 + }, + { + "epoch": 0.709818544835281, + "grad_norm": 0.0, + "learning_rate": 4.1007803040827275e-06, + "loss": 1.2012, + "step": 24175 + }, + { + "epoch": 0.70984790651242, + "grad_norm": 0.0, + "learning_rate": 4.10001246805739e-06, + "loss": 1.0835, + "step": 24176 + }, + { + "epoch": 0.709877268189559, + "grad_norm": 0.0, + "learning_rate": 4.099244685387155e-06, + "loss": 1.1675, + "step": 24177 + }, + { + "epoch": 0.709906629866698, + "grad_norm": 0.0, + "learning_rate": 4.0984769560789575e-06, + "loss": 1.1641, + "step": 24178 + }, + { + "epoch": 0.709935991543837, + "grad_norm": 0.0, + "learning_rate": 4.097709280139748e-06, + "loss": 1.2412, + "step": 24179 + }, + { + "epoch": 0.709965353220976, + "grad_norm": 0.0, + "learning_rate": 4.0969416575764605e-06, + "loss": 1.2598, + "step": 24180 + }, + { + "epoch": 0.7099947148981149, + "grad_norm": 0.0, + "learning_rate": 4.096174088396046e-06, + "loss": 1.1934, + "step": 24181 + }, + { + "epoch": 0.710024076575254, + "grad_norm": 0.0, + "learning_rate": 4.095406572605442e-06, + "loss": 1.1235, + "step": 24182 + }, + { + "epoch": 0.710053438252393, + "grad_norm": 0.0, + "learning_rate": 4.0946391102115875e-06, + "loss": 1.1445, + "step": 24183 + }, + { + "epoch": 0.7100827999295319, + "grad_norm": 0.0, + "learning_rate": 4.09387170122142e-06, + "loss": 1.2881, + "step": 24184 + }, + { + "epoch": 0.710112161606671, + "grad_norm": 0.0, + "learning_rate": 4.093104345641887e-06, + "loss": 1.248, + "step": 24185 + }, + { + "epoch": 0.71014152328381, + "grad_norm": 0.0, + "learning_rate": 4.092337043479925e-06, + "loss": 1.186, + "step": 24186 + }, + { + "epoch": 0.7101708849609489, + "grad_norm": 0.0, + "learning_rate": 4.091569794742468e-06, + "loss": 1.1602, + "step": 24187 + }, + { + "epoch": 0.710200246638088, + "grad_norm": 0.0, + "learning_rate": 4.090802599436462e-06, + "loss": 1.2261, + "step": 24188 + }, + { + "epoch": 0.710229608315227, + "grad_norm": 0.0, + "learning_rate": 4.090035457568842e-06, + "loss": 1.1582, + "step": 24189 + }, + { + "epoch": 0.7102589699923659, + "grad_norm": 0.0, + "learning_rate": 4.089268369146544e-06, + "loss": 1.3564, + "step": 24190 + }, + { + "epoch": 0.710288331669505, + "grad_norm": 0.0, + "learning_rate": 4.088501334176502e-06, + "loss": 1.2251, + "step": 24191 + }, + { + "epoch": 0.710317693346644, + "grad_norm": 0.0, + "learning_rate": 4.087734352665661e-06, + "loss": 1.293, + "step": 24192 + }, + { + "epoch": 0.7103470550237829, + "grad_norm": 0.0, + "learning_rate": 4.086967424620949e-06, + "loss": 1.3086, + "step": 24193 + }, + { + "epoch": 0.710376416700922, + "grad_norm": 0.0, + "learning_rate": 4.086200550049309e-06, + "loss": 1.3887, + "step": 24194 + }, + { + "epoch": 0.710405778378061, + "grad_norm": 0.0, + "learning_rate": 4.085433728957671e-06, + "loss": 1.1899, + "step": 24195 + }, + { + "epoch": 0.7104351400551999, + "grad_norm": 0.0, + "learning_rate": 4.084666961352966e-06, + "loss": 1.209, + "step": 24196 + }, + { + "epoch": 0.710464501732339, + "grad_norm": 0.0, + "learning_rate": 4.083900247242138e-06, + "loss": 1.209, + "step": 24197 + }, + { + "epoch": 0.710493863409478, + "grad_norm": 0.0, + "learning_rate": 4.083133586632114e-06, + "loss": 1.1816, + "step": 24198 + }, + { + "epoch": 0.7105232250866169, + "grad_norm": 0.0, + "learning_rate": 4.082366979529828e-06, + "loss": 1.3594, + "step": 24199 + }, + { + "epoch": 0.710552586763756, + "grad_norm": 0.0, + "learning_rate": 4.08160042594221e-06, + "loss": 1.3145, + "step": 24200 + }, + { + "epoch": 0.710581948440895, + "grad_norm": 0.0, + "learning_rate": 4.080833925876199e-06, + "loss": 1.1816, + "step": 24201 + }, + { + "epoch": 0.7106113101180339, + "grad_norm": 0.0, + "learning_rate": 4.080067479338719e-06, + "loss": 1.3281, + "step": 24202 + }, + { + "epoch": 0.710640671795173, + "grad_norm": 0.0, + "learning_rate": 4.079301086336709e-06, + "loss": 1.1113, + "step": 24203 + }, + { + "epoch": 0.710670033472312, + "grad_norm": 0.0, + "learning_rate": 4.078534746877091e-06, + "loss": 1.1602, + "step": 24204 + }, + { + "epoch": 0.7106993951494509, + "grad_norm": 0.0, + "learning_rate": 4.077768460966803e-06, + "loss": 1.1875, + "step": 24205 + }, + { + "epoch": 0.71072875682659, + "grad_norm": 0.0, + "learning_rate": 4.077002228612773e-06, + "loss": 1.0967, + "step": 24206 + }, + { + "epoch": 0.7107581185037289, + "grad_norm": 0.0, + "learning_rate": 4.0762360498219276e-06, + "loss": 1.1274, + "step": 24207 + }, + { + "epoch": 0.7107874801808679, + "grad_norm": 0.0, + "learning_rate": 4.075469924601198e-06, + "loss": 1.3516, + "step": 24208 + }, + { + "epoch": 0.710816841858007, + "grad_norm": 0.0, + "learning_rate": 4.074703852957506e-06, + "loss": 1.3271, + "step": 24209 + }, + { + "epoch": 0.7108462035351459, + "grad_norm": 0.0, + "learning_rate": 4.073937834897789e-06, + "loss": 1.2314, + "step": 24210 + }, + { + "epoch": 0.7108755652122849, + "grad_norm": 0.0, + "learning_rate": 4.0731718704289655e-06, + "loss": 1.4004, + "step": 24211 + }, + { + "epoch": 0.710904926889424, + "grad_norm": 0.0, + "learning_rate": 4.072405959557971e-06, + "loss": 1.3105, + "step": 24212 + }, + { + "epoch": 0.7109342885665629, + "grad_norm": 0.0, + "learning_rate": 4.0716401022917276e-06, + "loss": 1.1689, + "step": 24213 + }, + { + "epoch": 0.7109636502437019, + "grad_norm": 0.0, + "learning_rate": 4.070874298637161e-06, + "loss": 1.1577, + "step": 24214 + }, + { + "epoch": 0.710993011920841, + "grad_norm": 0.0, + "learning_rate": 4.070108548601196e-06, + "loss": 1.3096, + "step": 24215 + }, + { + "epoch": 0.7110223735979799, + "grad_norm": 0.0, + "learning_rate": 4.069342852190754e-06, + "loss": 1.1162, + "step": 24216 + }, + { + "epoch": 0.7110517352751189, + "grad_norm": 0.0, + "learning_rate": 4.068577209412769e-06, + "loss": 1.187, + "step": 24217 + }, + { + "epoch": 0.711081096952258, + "grad_norm": 0.0, + "learning_rate": 4.067811620274154e-06, + "loss": 1.1953, + "step": 24218 + }, + { + "epoch": 0.7111104586293969, + "grad_norm": 0.0, + "learning_rate": 4.067046084781842e-06, + "loss": 1.1655, + "step": 24219 + }, + { + "epoch": 0.7111398203065359, + "grad_norm": 0.0, + "learning_rate": 4.066280602942749e-06, + "loss": 1.2627, + "step": 24220 + }, + { + "epoch": 0.711169181983675, + "grad_norm": 0.0, + "learning_rate": 4.0655151747638014e-06, + "loss": 1.1831, + "step": 24221 + }, + { + "epoch": 0.7111985436608139, + "grad_norm": 0.0, + "learning_rate": 4.064749800251922e-06, + "loss": 1.1528, + "step": 24222 + }, + { + "epoch": 0.7112279053379529, + "grad_norm": 0.0, + "learning_rate": 4.063984479414029e-06, + "loss": 1.1709, + "step": 24223 + }, + { + "epoch": 0.711257267015092, + "grad_norm": 0.0, + "learning_rate": 4.063219212257046e-06, + "loss": 1.2397, + "step": 24224 + }, + { + "epoch": 0.7112866286922309, + "grad_norm": 0.0, + "learning_rate": 4.062453998787886e-06, + "loss": 1.1689, + "step": 24225 + }, + { + "epoch": 0.7113159903693699, + "grad_norm": 0.0, + "learning_rate": 4.061688839013481e-06, + "loss": 1.2549, + "step": 24226 + }, + { + "epoch": 0.711345352046509, + "grad_norm": 0.0, + "learning_rate": 4.060923732940741e-06, + "loss": 1.2563, + "step": 24227 + }, + { + "epoch": 0.7113747137236479, + "grad_norm": 0.0, + "learning_rate": 4.060158680576591e-06, + "loss": 1.1309, + "step": 24228 + }, + { + "epoch": 0.7114040754007869, + "grad_norm": 0.0, + "learning_rate": 4.059393681927945e-06, + "loss": 1.2251, + "step": 24229 + }, + { + "epoch": 0.711433437077926, + "grad_norm": 0.0, + "learning_rate": 4.058628737001726e-06, + "loss": 1.1494, + "step": 24230 + }, + { + "epoch": 0.7114627987550649, + "grad_norm": 0.0, + "learning_rate": 4.057863845804849e-06, + "loss": 1.1992, + "step": 24231 + }, + { + "epoch": 0.7114921604322039, + "grad_norm": 0.0, + "learning_rate": 4.057099008344232e-06, + "loss": 1.2607, + "step": 24232 + }, + { + "epoch": 0.7115215221093429, + "grad_norm": 0.0, + "learning_rate": 4.056334224626786e-06, + "loss": 1.2329, + "step": 24233 + }, + { + "epoch": 0.7115508837864819, + "grad_norm": 0.0, + "learning_rate": 4.055569494659436e-06, + "loss": 1.2568, + "step": 24234 + }, + { + "epoch": 0.7115802454636209, + "grad_norm": 0.0, + "learning_rate": 4.054804818449093e-06, + "loss": 1.2666, + "step": 24235 + }, + { + "epoch": 0.7116096071407598, + "grad_norm": 0.0, + "learning_rate": 4.054040196002668e-06, + "loss": 1.374, + "step": 24236 + }, + { + "epoch": 0.7116389688178989, + "grad_norm": 0.0, + "learning_rate": 4.053275627327086e-06, + "loss": 1.0215, + "step": 24237 + }, + { + "epoch": 0.7116683304950379, + "grad_norm": 0.0, + "learning_rate": 4.0525111124292545e-06, + "loss": 1.2891, + "step": 24238 + }, + { + "epoch": 0.7116976921721768, + "grad_norm": 0.0, + "learning_rate": 4.051746651316089e-06, + "loss": 1.3291, + "step": 24239 + }, + { + "epoch": 0.7117270538493159, + "grad_norm": 0.0, + "learning_rate": 4.050982243994497e-06, + "loss": 1.1919, + "step": 24240 + }, + { + "epoch": 0.7117564155264549, + "grad_norm": 0.0, + "learning_rate": 4.050217890471401e-06, + "loss": 1.2676, + "step": 24241 + }, + { + "epoch": 0.7117857772035938, + "grad_norm": 0.0, + "learning_rate": 4.049453590753704e-06, + "loss": 1.1904, + "step": 24242 + }, + { + "epoch": 0.7118151388807329, + "grad_norm": 0.0, + "learning_rate": 4.048689344848326e-06, + "loss": 1.1289, + "step": 24243 + }, + { + "epoch": 0.7118445005578719, + "grad_norm": 0.0, + "learning_rate": 4.047925152762173e-06, + "loss": 1.332, + "step": 24244 + }, + { + "epoch": 0.7118738622350108, + "grad_norm": 0.0, + "learning_rate": 4.047161014502155e-06, + "loss": 1.2515, + "step": 24245 + }, + { + "epoch": 0.7119032239121499, + "grad_norm": 0.0, + "learning_rate": 4.046396930075188e-06, + "loss": 1.2305, + "step": 24246 + }, + { + "epoch": 0.7119325855892888, + "grad_norm": 0.0, + "learning_rate": 4.045632899488179e-06, + "loss": 1.2607, + "step": 24247 + }, + { + "epoch": 0.7119619472664278, + "grad_norm": 0.0, + "learning_rate": 4.044868922748036e-06, + "loss": 1.2832, + "step": 24248 + }, + { + "epoch": 0.7119913089435669, + "grad_norm": 0.0, + "learning_rate": 4.044104999861665e-06, + "loss": 1.293, + "step": 24249 + }, + { + "epoch": 0.7120206706207058, + "grad_norm": 0.0, + "learning_rate": 4.043341130835981e-06, + "loss": 1.2305, + "step": 24250 + }, + { + "epoch": 0.7120500322978448, + "grad_norm": 0.0, + "learning_rate": 4.042577315677885e-06, + "loss": 1.2334, + "step": 24251 + }, + { + "epoch": 0.7120793939749839, + "grad_norm": 0.0, + "learning_rate": 4.041813554394292e-06, + "loss": 1.1602, + "step": 24252 + }, + { + "epoch": 0.7121087556521228, + "grad_norm": 0.0, + "learning_rate": 4.0410498469921014e-06, + "loss": 1.3154, + "step": 24253 + }, + { + "epoch": 0.7121381173292618, + "grad_norm": 0.0, + "learning_rate": 4.040286193478226e-06, + "loss": 1.1123, + "step": 24254 + }, + { + "epoch": 0.7121674790064009, + "grad_norm": 0.0, + "learning_rate": 4.039522593859569e-06, + "loss": 1.2461, + "step": 24255 + }, + { + "epoch": 0.7121968406835398, + "grad_norm": 0.0, + "learning_rate": 4.038759048143036e-06, + "loss": 1.2163, + "step": 24256 + }, + { + "epoch": 0.7122262023606788, + "grad_norm": 0.0, + "learning_rate": 4.037995556335532e-06, + "loss": 1.082, + "step": 24257 + }, + { + "epoch": 0.7122555640378179, + "grad_norm": 0.0, + "learning_rate": 4.037232118443957e-06, + "loss": 1.2324, + "step": 24258 + }, + { + "epoch": 0.7122849257149568, + "grad_norm": 0.0, + "learning_rate": 4.036468734475223e-06, + "loss": 1.1655, + "step": 24259 + }, + { + "epoch": 0.7123142873920958, + "grad_norm": 0.0, + "learning_rate": 4.035705404436225e-06, + "loss": 1.2812, + "step": 24260 + }, + { + "epoch": 0.7123436490692349, + "grad_norm": 0.0, + "learning_rate": 4.0349421283338754e-06, + "loss": 1.2212, + "step": 24261 + }, + { + "epoch": 0.7123730107463738, + "grad_norm": 0.0, + "learning_rate": 4.034178906175071e-06, + "loss": 1.3213, + "step": 24262 + }, + { + "epoch": 0.7124023724235128, + "grad_norm": 0.0, + "learning_rate": 4.033415737966715e-06, + "loss": 1.228, + "step": 24263 + }, + { + "epoch": 0.7124317341006519, + "grad_norm": 0.0, + "learning_rate": 4.032652623715709e-06, + "loss": 1.2988, + "step": 24264 + }, + { + "epoch": 0.7124610957777908, + "grad_norm": 0.0, + "learning_rate": 4.031889563428949e-06, + "loss": 1.1528, + "step": 24265 + }, + { + "epoch": 0.7124904574549298, + "grad_norm": 0.0, + "learning_rate": 4.031126557113344e-06, + "loss": 1.2725, + "step": 24266 + }, + { + "epoch": 0.7125198191320689, + "grad_norm": 0.0, + "learning_rate": 4.030363604775787e-06, + "loss": 1.1821, + "step": 24267 + }, + { + "epoch": 0.7125491808092078, + "grad_norm": 0.0, + "learning_rate": 4.029600706423185e-06, + "loss": 1.2383, + "step": 24268 + }, + { + "epoch": 0.7125785424863468, + "grad_norm": 0.0, + "learning_rate": 4.028837862062428e-06, + "loss": 1.2158, + "step": 24269 + }, + { + "epoch": 0.7126079041634859, + "grad_norm": 0.0, + "learning_rate": 4.028075071700424e-06, + "loss": 1.1904, + "step": 24270 + }, + { + "epoch": 0.7126372658406248, + "grad_norm": 0.0, + "learning_rate": 4.027312335344065e-06, + "loss": 1.2715, + "step": 24271 + }, + { + "epoch": 0.7126666275177638, + "grad_norm": 0.0, + "learning_rate": 4.0265496530002515e-06, + "loss": 1.2559, + "step": 24272 + }, + { + "epoch": 0.7126959891949028, + "grad_norm": 0.0, + "learning_rate": 4.025787024675875e-06, + "loss": 1.2793, + "step": 24273 + }, + { + "epoch": 0.7127253508720418, + "grad_norm": 0.0, + "learning_rate": 4.02502445037784e-06, + "loss": 1.1001, + "step": 24274 + }, + { + "epoch": 0.7127547125491808, + "grad_norm": 0.0, + "learning_rate": 4.02426193011304e-06, + "loss": 1.1719, + "step": 24275 + }, + { + "epoch": 0.7127840742263198, + "grad_norm": 0.0, + "learning_rate": 4.023499463888366e-06, + "loss": 1.2808, + "step": 24276 + }, + { + "epoch": 0.7128134359034588, + "grad_norm": 0.0, + "learning_rate": 4.02273705171072e-06, + "loss": 1.2168, + "step": 24277 + }, + { + "epoch": 0.7128427975805978, + "grad_norm": 0.0, + "learning_rate": 4.021974693586991e-06, + "loss": 1.2407, + "step": 24278 + }, + { + "epoch": 0.7128721592577368, + "grad_norm": 0.0, + "learning_rate": 4.0212123895240784e-06, + "loss": 1.1831, + "step": 24279 + }, + { + "epoch": 0.7129015209348758, + "grad_norm": 0.0, + "learning_rate": 4.020450139528873e-06, + "loss": 1.0869, + "step": 24280 + }, + { + "epoch": 0.7129308826120148, + "grad_norm": 0.0, + "learning_rate": 4.019687943608269e-06, + "loss": 1.2524, + "step": 24281 + }, + { + "epoch": 0.7129602442891538, + "grad_norm": 0.0, + "learning_rate": 4.0189258017691555e-06, + "loss": 1.0562, + "step": 24282 + }, + { + "epoch": 0.7129896059662928, + "grad_norm": 0.0, + "learning_rate": 4.018163714018431e-06, + "loss": 1.2783, + "step": 24283 + }, + { + "epoch": 0.7130189676434318, + "grad_norm": 0.0, + "learning_rate": 4.017401680362983e-06, + "loss": 1.2754, + "step": 24284 + }, + { + "epoch": 0.7130483293205708, + "grad_norm": 0.0, + "learning_rate": 4.016639700809701e-06, + "loss": 1.2373, + "step": 24285 + }, + { + "epoch": 0.7130776909977098, + "grad_norm": 0.0, + "learning_rate": 4.015877775365483e-06, + "loss": 1.1909, + "step": 24286 + }, + { + "epoch": 0.7131070526748488, + "grad_norm": 0.0, + "learning_rate": 4.0151159040372135e-06, + "loss": 1.2207, + "step": 24287 + }, + { + "epoch": 0.7131364143519878, + "grad_norm": 0.0, + "learning_rate": 4.014354086831784e-06, + "loss": 1.2173, + "step": 24288 + }, + { + "epoch": 0.7131657760291268, + "grad_norm": 0.0, + "learning_rate": 4.0135923237560804e-06, + "loss": 1.2461, + "step": 24289 + }, + { + "epoch": 0.7131951377062657, + "grad_norm": 0.0, + "learning_rate": 4.012830614816998e-06, + "loss": 1.209, + "step": 24290 + }, + { + "epoch": 0.7132244993834048, + "grad_norm": 0.0, + "learning_rate": 4.012068960021418e-06, + "loss": 1.063, + "step": 24291 + }, + { + "epoch": 0.7132538610605438, + "grad_norm": 0.0, + "learning_rate": 4.011307359376235e-06, + "loss": 1.2148, + "step": 24292 + }, + { + "epoch": 0.7132832227376827, + "grad_norm": 0.0, + "learning_rate": 4.010545812888333e-06, + "loss": 1.2939, + "step": 24293 + }, + { + "epoch": 0.7133125844148218, + "grad_norm": 0.0, + "learning_rate": 4.009784320564597e-06, + "loss": 1.1343, + "step": 24294 + }, + { + "epoch": 0.7133419460919608, + "grad_norm": 0.0, + "learning_rate": 4.009022882411918e-06, + "loss": 1.0161, + "step": 24295 + }, + { + "epoch": 0.7133713077690997, + "grad_norm": 0.0, + "learning_rate": 4.008261498437179e-06, + "loss": 1.3125, + "step": 24296 + }, + { + "epoch": 0.7134006694462388, + "grad_norm": 0.0, + "learning_rate": 4.007500168647267e-06, + "loss": 1.2822, + "step": 24297 + }, + { + "epoch": 0.7134300311233778, + "grad_norm": 0.0, + "learning_rate": 4.0067388930490615e-06, + "loss": 1.1641, + "step": 24298 + }, + { + "epoch": 0.7134593928005167, + "grad_norm": 0.0, + "learning_rate": 4.005977671649454e-06, + "loss": 1.2705, + "step": 24299 + }, + { + "epoch": 0.7134887544776558, + "grad_norm": 0.0, + "learning_rate": 4.0052165044553225e-06, + "loss": 1.3281, + "step": 24300 + }, + { + "epoch": 0.7135181161547948, + "grad_norm": 0.0, + "learning_rate": 4.0044553914735575e-06, + "loss": 1.1201, + "step": 24301 + }, + { + "epoch": 0.7135474778319337, + "grad_norm": 0.0, + "learning_rate": 4.003694332711035e-06, + "loss": 1.229, + "step": 24302 + }, + { + "epoch": 0.7135768395090728, + "grad_norm": 0.0, + "learning_rate": 4.002933328174642e-06, + "loss": 1.1602, + "step": 24303 + }, + { + "epoch": 0.7136062011862118, + "grad_norm": 0.0, + "learning_rate": 4.002172377871259e-06, + "loss": 1.2656, + "step": 24304 + }, + { + "epoch": 0.7136355628633507, + "grad_norm": 0.0, + "learning_rate": 4.001411481807769e-06, + "loss": 1.2002, + "step": 24305 + }, + { + "epoch": 0.7136649245404898, + "grad_norm": 0.0, + "learning_rate": 4.00065063999105e-06, + "loss": 1.27, + "step": 24306 + }, + { + "epoch": 0.7136942862176288, + "grad_norm": 0.0, + "learning_rate": 3.999889852427979e-06, + "loss": 1.1309, + "step": 24307 + }, + { + "epoch": 0.7137236478947677, + "grad_norm": 0.0, + "learning_rate": 3.999129119125446e-06, + "loss": 1.2148, + "step": 24308 + }, + { + "epoch": 0.7137530095719068, + "grad_norm": 0.0, + "learning_rate": 3.998368440090321e-06, + "loss": 1.2051, + "step": 24309 + }, + { + "epoch": 0.7137823712490458, + "grad_norm": 0.0, + "learning_rate": 3.997607815329491e-06, + "loss": 1.2627, + "step": 24310 + }, + { + "epoch": 0.7138117329261847, + "grad_norm": 0.0, + "learning_rate": 3.996847244849828e-06, + "loss": 1.2017, + "step": 24311 + }, + { + "epoch": 0.7138410946033238, + "grad_norm": 0.0, + "learning_rate": 3.996086728658218e-06, + "loss": 1.3447, + "step": 24312 + }, + { + "epoch": 0.7138704562804628, + "grad_norm": 0.0, + "learning_rate": 3.995326266761531e-06, + "loss": 1.2759, + "step": 24313 + }, + { + "epoch": 0.7138998179576017, + "grad_norm": 0.0, + "learning_rate": 3.994565859166643e-06, + "loss": 1.2407, + "step": 24314 + }, + { + "epoch": 0.7139291796347408, + "grad_norm": 0.0, + "learning_rate": 3.993805505880437e-06, + "loss": 1.1992, + "step": 24315 + }, + { + "epoch": 0.7139585413118797, + "grad_norm": 0.0, + "learning_rate": 3.993045206909783e-06, + "loss": 1.25, + "step": 24316 + }, + { + "epoch": 0.7139879029890187, + "grad_norm": 0.0, + "learning_rate": 3.992284962261563e-06, + "loss": 1.1348, + "step": 24317 + }, + { + "epoch": 0.7140172646661578, + "grad_norm": 0.0, + "learning_rate": 3.991524771942645e-06, + "loss": 1.2266, + "step": 24318 + }, + { + "epoch": 0.7140466263432967, + "grad_norm": 0.0, + "learning_rate": 3.990764635959911e-06, + "loss": 1.3301, + "step": 24319 + }, + { + "epoch": 0.7140759880204357, + "grad_norm": 0.0, + "learning_rate": 3.990004554320231e-06, + "loss": 1.1445, + "step": 24320 + }, + { + "epoch": 0.7141053496975748, + "grad_norm": 0.0, + "learning_rate": 3.989244527030479e-06, + "loss": 1.2065, + "step": 24321 + }, + { + "epoch": 0.7141347113747137, + "grad_norm": 0.0, + "learning_rate": 3.9884845540975245e-06, + "loss": 1.2207, + "step": 24322 + }, + { + "epoch": 0.7141640730518527, + "grad_norm": 0.0, + "learning_rate": 3.987724635528248e-06, + "loss": 1.2256, + "step": 24323 + }, + { + "epoch": 0.7141934347289918, + "grad_norm": 0.0, + "learning_rate": 3.9869647713295165e-06, + "loss": 1.1377, + "step": 24324 + }, + { + "epoch": 0.7142227964061307, + "grad_norm": 0.0, + "learning_rate": 3.9862049615082e-06, + "loss": 1.2754, + "step": 24325 + }, + { + "epoch": 0.7142521580832697, + "grad_norm": 0.0, + "learning_rate": 3.985445206071174e-06, + "loss": 1.0835, + "step": 24326 + }, + { + "epoch": 0.7142815197604088, + "grad_norm": 0.0, + "learning_rate": 3.984685505025306e-06, + "loss": 1.1338, + "step": 24327 + }, + { + "epoch": 0.7143108814375477, + "grad_norm": 0.0, + "learning_rate": 3.98392585837747e-06, + "loss": 1.1665, + "step": 24328 + }, + { + "epoch": 0.7143402431146867, + "grad_norm": 0.0, + "learning_rate": 3.983166266134533e-06, + "loss": 1.1797, + "step": 24329 + }, + { + "epoch": 0.7143696047918258, + "grad_norm": 0.0, + "learning_rate": 3.982406728303365e-06, + "loss": 1.2812, + "step": 24330 + }, + { + "epoch": 0.7143989664689647, + "grad_norm": 0.0, + "learning_rate": 3.9816472448908305e-06, + "loss": 1.2334, + "step": 24331 + }, + { + "epoch": 0.7144283281461037, + "grad_norm": 0.0, + "learning_rate": 3.980887815903804e-06, + "loss": 1.3389, + "step": 24332 + }, + { + "epoch": 0.7144576898232428, + "grad_norm": 0.0, + "learning_rate": 3.980128441349152e-06, + "loss": 1.2485, + "step": 24333 + }, + { + "epoch": 0.7144870515003817, + "grad_norm": 0.0, + "learning_rate": 3.979369121233735e-06, + "loss": 1.1797, + "step": 24334 + }, + { + "epoch": 0.7145164131775207, + "grad_norm": 0.0, + "learning_rate": 3.978609855564429e-06, + "loss": 1.2666, + "step": 24335 + }, + { + "epoch": 0.7145457748546596, + "grad_norm": 0.0, + "learning_rate": 3.977850644348092e-06, + "loss": 1.2183, + "step": 24336 + }, + { + "epoch": 0.7145751365317987, + "grad_norm": 0.0, + "learning_rate": 3.977091487591602e-06, + "loss": 1.2363, + "step": 24337 + }, + { + "epoch": 0.7146044982089377, + "grad_norm": 0.0, + "learning_rate": 3.97633238530181e-06, + "loss": 1.2607, + "step": 24338 + }, + { + "epoch": 0.7146338598860766, + "grad_norm": 0.0, + "learning_rate": 3.975573337485589e-06, + "loss": 1.3105, + "step": 24339 + }, + { + "epoch": 0.7146632215632157, + "grad_norm": 0.0, + "learning_rate": 3.974814344149798e-06, + "loss": 1.061, + "step": 24340 + }, + { + "epoch": 0.7146925832403547, + "grad_norm": 0.0, + "learning_rate": 3.974055405301307e-06, + "loss": 1.0581, + "step": 24341 + }, + { + "epoch": 0.7147219449174936, + "grad_norm": 0.0, + "learning_rate": 3.973296520946973e-06, + "loss": 1.3486, + "step": 24342 + }, + { + "epoch": 0.7147513065946327, + "grad_norm": 0.0, + "learning_rate": 3.972537691093667e-06, + "loss": 1.1553, + "step": 24343 + }, + { + "epoch": 0.7147806682717717, + "grad_norm": 0.0, + "learning_rate": 3.971778915748245e-06, + "loss": 1.2041, + "step": 24344 + }, + { + "epoch": 0.7148100299489106, + "grad_norm": 0.0, + "learning_rate": 3.9710201949175696e-06, + "loss": 1.1987, + "step": 24345 + }, + { + "epoch": 0.7148393916260497, + "grad_norm": 0.0, + "learning_rate": 3.9702615286085035e-06, + "loss": 1.1987, + "step": 24346 + }, + { + "epoch": 0.7148687533031887, + "grad_norm": 0.0, + "learning_rate": 3.969502916827902e-06, + "loss": 1.2178, + "step": 24347 + }, + { + "epoch": 0.7148981149803276, + "grad_norm": 0.0, + "learning_rate": 3.9687443595826345e-06, + "loss": 1.1885, + "step": 24348 + }, + { + "epoch": 0.7149274766574667, + "grad_norm": 0.0, + "learning_rate": 3.9679858568795526e-06, + "loss": 1.2539, + "step": 24349 + }, + { + "epoch": 0.7149568383346057, + "grad_norm": 0.0, + "learning_rate": 3.967227408725524e-06, + "loss": 1.2129, + "step": 24350 + }, + { + "epoch": 0.7149862000117446, + "grad_norm": 0.0, + "learning_rate": 3.966469015127398e-06, + "loss": 1.2837, + "step": 24351 + }, + { + "epoch": 0.7150155616888837, + "grad_norm": 0.0, + "learning_rate": 3.965710676092041e-06, + "loss": 1.2173, + "step": 24352 + }, + { + "epoch": 0.7150449233660227, + "grad_norm": 0.0, + "learning_rate": 3.96495239162631e-06, + "loss": 1.2197, + "step": 24353 + }, + { + "epoch": 0.7150742850431616, + "grad_norm": 0.0, + "learning_rate": 3.964194161737058e-06, + "loss": 1.3052, + "step": 24354 + }, + { + "epoch": 0.7151036467203007, + "grad_norm": 0.0, + "learning_rate": 3.963435986431146e-06, + "loss": 1.2998, + "step": 24355 + }, + { + "epoch": 0.7151330083974397, + "grad_norm": 0.0, + "learning_rate": 3.962677865715423e-06, + "loss": 1.1377, + "step": 24356 + }, + { + "epoch": 0.7151623700745786, + "grad_norm": 0.0, + "learning_rate": 3.961919799596754e-06, + "loss": 1.1865, + "step": 24357 + }, + { + "epoch": 0.7151917317517177, + "grad_norm": 0.0, + "learning_rate": 3.961161788081988e-06, + "loss": 1.2891, + "step": 24358 + }, + { + "epoch": 0.7152210934288566, + "grad_norm": 0.0, + "learning_rate": 3.960403831177986e-06, + "loss": 1.168, + "step": 24359 + }, + { + "epoch": 0.7152504551059956, + "grad_norm": 0.0, + "learning_rate": 3.959645928891594e-06, + "loss": 1.2861, + "step": 24360 + }, + { + "epoch": 0.7152798167831347, + "grad_norm": 0.0, + "learning_rate": 3.958888081229676e-06, + "loss": 1.2393, + "step": 24361 + }, + { + "epoch": 0.7153091784602736, + "grad_norm": 0.0, + "learning_rate": 3.958130288199082e-06, + "loss": 1.2017, + "step": 24362 + }, + { + "epoch": 0.7153385401374126, + "grad_norm": 0.0, + "learning_rate": 3.957372549806658e-06, + "loss": 1.1777, + "step": 24363 + }, + { + "epoch": 0.7153679018145517, + "grad_norm": 0.0, + "learning_rate": 3.956614866059264e-06, + "loss": 1.2168, + "step": 24364 + }, + { + "epoch": 0.7153972634916906, + "grad_norm": 0.0, + "learning_rate": 3.955857236963747e-06, + "loss": 1.2754, + "step": 24365 + }, + { + "epoch": 0.7154266251688296, + "grad_norm": 0.0, + "learning_rate": 3.955099662526963e-06, + "loss": 1.2925, + "step": 24366 + }, + { + "epoch": 0.7154559868459687, + "grad_norm": 0.0, + "learning_rate": 3.9543421427557585e-06, + "loss": 1.3525, + "step": 24367 + }, + { + "epoch": 0.7154853485231076, + "grad_norm": 0.0, + "learning_rate": 3.95358467765699e-06, + "loss": 1.0947, + "step": 24368 + }, + { + "epoch": 0.7155147102002466, + "grad_norm": 0.0, + "learning_rate": 3.9528272672375025e-06, + "loss": 1.3145, + "step": 24369 + }, + { + "epoch": 0.7155440718773857, + "grad_norm": 0.0, + "learning_rate": 3.952069911504147e-06, + "loss": 1.3281, + "step": 24370 + }, + { + "epoch": 0.7155734335545246, + "grad_norm": 0.0, + "learning_rate": 3.951312610463769e-06, + "loss": 1.248, + "step": 24371 + }, + { + "epoch": 0.7156027952316636, + "grad_norm": 0.0, + "learning_rate": 3.950555364123224e-06, + "loss": 1.2559, + "step": 24372 + }, + { + "epoch": 0.7156321569088027, + "grad_norm": 0.0, + "learning_rate": 3.949798172489357e-06, + "loss": 1.3647, + "step": 24373 + }, + { + "epoch": 0.7156615185859416, + "grad_norm": 0.0, + "learning_rate": 3.949041035569009e-06, + "loss": 1.3096, + "step": 24374 + }, + { + "epoch": 0.7156908802630806, + "grad_norm": 0.0, + "learning_rate": 3.9482839533690375e-06, + "loss": 1.1519, + "step": 24375 + }, + { + "epoch": 0.7157202419402197, + "grad_norm": 0.0, + "learning_rate": 3.94752692589628e-06, + "loss": 1.1509, + "step": 24376 + }, + { + "epoch": 0.7157496036173586, + "grad_norm": 0.0, + "learning_rate": 3.9467699531575894e-06, + "loss": 1.1689, + "step": 24377 + }, + { + "epoch": 0.7157789652944976, + "grad_norm": 0.0, + "learning_rate": 3.946013035159809e-06, + "loss": 1.2695, + "step": 24378 + }, + { + "epoch": 0.7158083269716367, + "grad_norm": 0.0, + "learning_rate": 3.945256171909783e-06, + "loss": 1.334, + "step": 24379 + }, + { + "epoch": 0.7158376886487756, + "grad_norm": 0.0, + "learning_rate": 3.944499363414354e-06, + "loss": 1.2148, + "step": 24380 + }, + { + "epoch": 0.7158670503259146, + "grad_norm": 0.0, + "learning_rate": 3.943742609680371e-06, + "loss": 1.3164, + "step": 24381 + }, + { + "epoch": 0.7158964120030537, + "grad_norm": 0.0, + "learning_rate": 3.942985910714674e-06, + "loss": 1.3721, + "step": 24382 + }, + { + "epoch": 0.7159257736801926, + "grad_norm": 0.0, + "learning_rate": 3.942229266524103e-06, + "loss": 1.2197, + "step": 24383 + }, + { + "epoch": 0.7159551353573316, + "grad_norm": 0.0, + "learning_rate": 3.9414726771155075e-06, + "loss": 1.2607, + "step": 24384 + }, + { + "epoch": 0.7159844970344706, + "grad_norm": 0.0, + "learning_rate": 3.940716142495723e-06, + "loss": 1.1006, + "step": 24385 + }, + { + "epoch": 0.7160138587116096, + "grad_norm": 0.0, + "learning_rate": 3.939959662671599e-06, + "loss": 1.144, + "step": 24386 + }, + { + "epoch": 0.7160432203887486, + "grad_norm": 0.0, + "learning_rate": 3.93920323764997e-06, + "loss": 1.1426, + "step": 24387 + }, + { + "epoch": 0.7160725820658876, + "grad_norm": 0.0, + "learning_rate": 3.938446867437678e-06, + "loss": 1.2129, + "step": 24388 + }, + { + "epoch": 0.7161019437430266, + "grad_norm": 0.0, + "learning_rate": 3.937690552041562e-06, + "loss": 1.1943, + "step": 24389 + }, + { + "epoch": 0.7161313054201656, + "grad_norm": 0.0, + "learning_rate": 3.936934291468465e-06, + "loss": 1.2646, + "step": 24390 + }, + { + "epoch": 0.7161606670973046, + "grad_norm": 0.0, + "learning_rate": 3.936178085725221e-06, + "loss": 1.293, + "step": 24391 + }, + { + "epoch": 0.7161900287744436, + "grad_norm": 0.0, + "learning_rate": 3.935421934818674e-06, + "loss": 1.3623, + "step": 24392 + }, + { + "epoch": 0.7162193904515826, + "grad_norm": 0.0, + "learning_rate": 3.93466583875566e-06, + "loss": 1.0562, + "step": 24393 + }, + { + "epoch": 0.7162487521287216, + "grad_norm": 0.0, + "learning_rate": 3.933909797543017e-06, + "loss": 1.3359, + "step": 24394 + }, + { + "epoch": 0.7162781138058606, + "grad_norm": 0.0, + "learning_rate": 3.9331538111875815e-06, + "loss": 1.251, + "step": 24395 + }, + { + "epoch": 0.7163074754829996, + "grad_norm": 0.0, + "learning_rate": 3.932397879696185e-06, + "loss": 1.1982, + "step": 24396 + }, + { + "epoch": 0.7163368371601386, + "grad_norm": 0.0, + "learning_rate": 3.931642003075673e-06, + "loss": 1.2588, + "step": 24397 + }, + { + "epoch": 0.7163661988372776, + "grad_norm": 0.0, + "learning_rate": 3.930886181332873e-06, + "loss": 1.2393, + "step": 24398 + }, + { + "epoch": 0.7163955605144166, + "grad_norm": 0.0, + "learning_rate": 3.930130414474626e-06, + "loss": 1.1992, + "step": 24399 + }, + { + "epoch": 0.7164249221915556, + "grad_norm": 0.0, + "learning_rate": 3.9293747025077615e-06, + "loss": 1.1631, + "step": 24400 + }, + { + "epoch": 0.7164542838686946, + "grad_norm": 0.0, + "learning_rate": 3.928619045439119e-06, + "loss": 1.187, + "step": 24401 + }, + { + "epoch": 0.7164836455458335, + "grad_norm": 0.0, + "learning_rate": 3.92786344327553e-06, + "loss": 1.2456, + "step": 24402 + }, + { + "epoch": 0.7165130072229726, + "grad_norm": 0.0, + "learning_rate": 3.927107896023827e-06, + "loss": 1.1699, + "step": 24403 + }, + { + "epoch": 0.7165423689001116, + "grad_norm": 0.0, + "learning_rate": 3.926352403690843e-06, + "loss": 1.3115, + "step": 24404 + }, + { + "epoch": 0.7165717305772505, + "grad_norm": 0.0, + "learning_rate": 3.925596966283405e-06, + "loss": 1.2021, + "step": 24405 + }, + { + "epoch": 0.7166010922543896, + "grad_norm": 0.0, + "learning_rate": 3.924841583808353e-06, + "loss": 1.3286, + "step": 24406 + }, + { + "epoch": 0.7166304539315286, + "grad_norm": 0.0, + "learning_rate": 3.924086256272512e-06, + "loss": 1.1387, + "step": 24407 + }, + { + "epoch": 0.7166598156086675, + "grad_norm": 0.0, + "learning_rate": 3.923330983682718e-06, + "loss": 1.2666, + "step": 24408 + }, + { + "epoch": 0.7166891772858066, + "grad_norm": 0.0, + "learning_rate": 3.922575766045794e-06, + "loss": 1.2817, + "step": 24409 + }, + { + "epoch": 0.7167185389629456, + "grad_norm": 0.0, + "learning_rate": 3.921820603368579e-06, + "loss": 1.2197, + "step": 24410 + }, + { + "epoch": 0.7167479006400845, + "grad_norm": 0.0, + "learning_rate": 3.921065495657895e-06, + "loss": 1.3418, + "step": 24411 + }, + { + "epoch": 0.7167772623172236, + "grad_norm": 0.0, + "learning_rate": 3.920310442920574e-06, + "loss": 1.0952, + "step": 24412 + }, + { + "epoch": 0.7168066239943626, + "grad_norm": 0.0, + "learning_rate": 3.919555445163442e-06, + "loss": 1.2402, + "step": 24413 + }, + { + "epoch": 0.7168359856715015, + "grad_norm": 0.0, + "learning_rate": 3.918800502393324e-06, + "loss": 1.1816, + "step": 24414 + }, + { + "epoch": 0.7168653473486406, + "grad_norm": 0.0, + "learning_rate": 3.918045614617054e-06, + "loss": 1.2334, + "step": 24415 + }, + { + "epoch": 0.7168947090257796, + "grad_norm": 0.0, + "learning_rate": 3.917290781841452e-06, + "loss": 1.2227, + "step": 24416 + }, + { + "epoch": 0.7169240707029185, + "grad_norm": 0.0, + "learning_rate": 3.91653600407335e-06, + "loss": 1.1602, + "step": 24417 + }, + { + "epoch": 0.7169534323800576, + "grad_norm": 0.0, + "learning_rate": 3.91578128131957e-06, + "loss": 1.3008, + "step": 24418 + }, + { + "epoch": 0.7169827940571966, + "grad_norm": 0.0, + "learning_rate": 3.915026613586941e-06, + "loss": 1.2256, + "step": 24419 + }, + { + "epoch": 0.7170121557343355, + "grad_norm": 0.0, + "learning_rate": 3.914272000882279e-06, + "loss": 1.1963, + "step": 24420 + }, + { + "epoch": 0.7170415174114746, + "grad_norm": 0.0, + "learning_rate": 3.913517443212418e-06, + "loss": 1.2725, + "step": 24421 + }, + { + "epoch": 0.7170708790886136, + "grad_norm": 0.0, + "learning_rate": 3.912762940584177e-06, + "loss": 1.2363, + "step": 24422 + }, + { + "epoch": 0.7171002407657525, + "grad_norm": 0.0, + "learning_rate": 3.912008493004377e-06, + "loss": 1.3438, + "step": 24423 + }, + { + "epoch": 0.7171296024428916, + "grad_norm": 0.0, + "learning_rate": 3.911254100479847e-06, + "loss": 1.2725, + "step": 24424 + }, + { + "epoch": 0.7171589641200306, + "grad_norm": 0.0, + "learning_rate": 3.910499763017402e-06, + "loss": 1.2422, + "step": 24425 + }, + { + "epoch": 0.7171883257971695, + "grad_norm": 0.0, + "learning_rate": 3.909745480623869e-06, + "loss": 1.1421, + "step": 24426 + }, + { + "epoch": 0.7172176874743086, + "grad_norm": 0.0, + "learning_rate": 3.908991253306068e-06, + "loss": 1.2891, + "step": 24427 + }, + { + "epoch": 0.7172470491514475, + "grad_norm": 0.0, + "learning_rate": 3.908237081070821e-06, + "loss": 1.334, + "step": 24428 + }, + { + "epoch": 0.7172764108285865, + "grad_norm": 0.0, + "learning_rate": 3.90748296392494e-06, + "loss": 1.2588, + "step": 24429 + }, + { + "epoch": 0.7173057725057256, + "grad_norm": 0.0, + "learning_rate": 3.906728901875256e-06, + "loss": 1.1938, + "step": 24430 + }, + { + "epoch": 0.7173351341828645, + "grad_norm": 0.0, + "learning_rate": 3.905974894928583e-06, + "loss": 1.168, + "step": 24431 + }, + { + "epoch": 0.7173644958600035, + "grad_norm": 0.0, + "learning_rate": 3.905220943091735e-06, + "loss": 1.2646, + "step": 24432 + }, + { + "epoch": 0.7173938575371426, + "grad_norm": 0.0, + "learning_rate": 3.9044670463715406e-06, + "loss": 1.0298, + "step": 24433 + }, + { + "epoch": 0.7174232192142815, + "grad_norm": 0.0, + "learning_rate": 3.903713204774806e-06, + "loss": 1.1777, + "step": 24434 + }, + { + "epoch": 0.7174525808914205, + "grad_norm": 0.0, + "learning_rate": 3.9029594183083595e-06, + "loss": 1.1465, + "step": 24435 + }, + { + "epoch": 0.7174819425685595, + "grad_norm": 0.0, + "learning_rate": 3.902205686979012e-06, + "loss": 1.3198, + "step": 24436 + }, + { + "epoch": 0.7175113042456985, + "grad_norm": 0.0, + "learning_rate": 3.901452010793579e-06, + "loss": 1.2256, + "step": 24437 + }, + { + "epoch": 0.7175406659228375, + "grad_norm": 0.0, + "learning_rate": 3.900698389758874e-06, + "loss": 1.1392, + "step": 24438 + }, + { + "epoch": 0.7175700275999765, + "grad_norm": 0.0, + "learning_rate": 3.899944823881721e-06, + "loss": 1.2588, + "step": 24439 + }, + { + "epoch": 0.7175993892771155, + "grad_norm": 0.0, + "learning_rate": 3.899191313168924e-06, + "loss": 1.1333, + "step": 24440 + }, + { + "epoch": 0.7176287509542545, + "grad_norm": 0.0, + "learning_rate": 3.898437857627306e-06, + "loss": 1.1035, + "step": 24441 + }, + { + "epoch": 0.7176581126313935, + "grad_norm": 0.0, + "learning_rate": 3.897684457263678e-06, + "loss": 1.2344, + "step": 24442 + }, + { + "epoch": 0.7176874743085325, + "grad_norm": 0.0, + "learning_rate": 3.896931112084851e-06, + "loss": 1.2495, + "step": 24443 + }, + { + "epoch": 0.7177168359856715, + "grad_norm": 0.0, + "learning_rate": 3.8961778220976395e-06, + "loss": 1.2529, + "step": 24444 + }, + { + "epoch": 0.7177461976628104, + "grad_norm": 0.0, + "learning_rate": 3.8954245873088514e-06, + "loss": 1.2139, + "step": 24445 + }, + { + "epoch": 0.7177755593399495, + "grad_norm": 0.0, + "learning_rate": 3.894671407725307e-06, + "loss": 1.2305, + "step": 24446 + }, + { + "epoch": 0.7178049210170885, + "grad_norm": 0.0, + "learning_rate": 3.893918283353808e-06, + "loss": 1.1904, + "step": 24447 + }, + { + "epoch": 0.7178342826942274, + "grad_norm": 0.0, + "learning_rate": 3.893165214201173e-06, + "loss": 1.2998, + "step": 24448 + }, + { + "epoch": 0.7178636443713665, + "grad_norm": 0.0, + "learning_rate": 3.892412200274206e-06, + "loss": 1.249, + "step": 24449 + }, + { + "epoch": 0.7178930060485055, + "grad_norm": 0.0, + "learning_rate": 3.891659241579723e-06, + "loss": 1.2871, + "step": 24450 + }, + { + "epoch": 0.7179223677256444, + "grad_norm": 0.0, + "learning_rate": 3.890906338124529e-06, + "loss": 1.3066, + "step": 24451 + }, + { + "epoch": 0.7179517294027835, + "grad_norm": 0.0, + "learning_rate": 3.890153489915435e-06, + "loss": 1.2373, + "step": 24452 + }, + { + "epoch": 0.7179810910799225, + "grad_norm": 0.0, + "learning_rate": 3.8894006969592466e-06, + "loss": 1.1567, + "step": 24453 + }, + { + "epoch": 0.7180104527570614, + "grad_norm": 0.0, + "learning_rate": 3.88864795926277e-06, + "loss": 1.2368, + "step": 24454 + }, + { + "epoch": 0.7180398144342005, + "grad_norm": 0.0, + "learning_rate": 3.887895276832818e-06, + "loss": 1.1133, + "step": 24455 + }, + { + "epoch": 0.7180691761113395, + "grad_norm": 0.0, + "learning_rate": 3.887142649676192e-06, + "loss": 1.3359, + "step": 24456 + }, + { + "epoch": 0.7180985377884784, + "grad_norm": 0.0, + "learning_rate": 3.886390077799703e-06, + "loss": 1.3398, + "step": 24457 + }, + { + "epoch": 0.7181278994656175, + "grad_norm": 0.0, + "learning_rate": 3.8856375612101505e-06, + "loss": 1.2344, + "step": 24458 + }, + { + "epoch": 0.7181572611427565, + "grad_norm": 0.0, + "learning_rate": 3.884885099914348e-06, + "loss": 1.1611, + "step": 24459 + }, + { + "epoch": 0.7181866228198954, + "grad_norm": 0.0, + "learning_rate": 3.884132693919095e-06, + "loss": 1.3818, + "step": 24460 + }, + { + "epoch": 0.7182159844970345, + "grad_norm": 0.0, + "learning_rate": 3.8833803432311954e-06, + "loss": 1.3188, + "step": 24461 + }, + { + "epoch": 0.7182453461741735, + "grad_norm": 0.0, + "learning_rate": 3.882628047857455e-06, + "loss": 1.1455, + "step": 24462 + }, + { + "epoch": 0.7182747078513124, + "grad_norm": 0.0, + "learning_rate": 3.881875807804673e-06, + "loss": 1.166, + "step": 24463 + }, + { + "epoch": 0.7183040695284515, + "grad_norm": 0.0, + "learning_rate": 3.8811236230796565e-06, + "loss": 1.2861, + "step": 24464 + }, + { + "epoch": 0.7183334312055905, + "grad_norm": 0.0, + "learning_rate": 3.8803714936892044e-06, + "loss": 1.2695, + "step": 24465 + }, + { + "epoch": 0.7183627928827294, + "grad_norm": 0.0, + "learning_rate": 3.879619419640123e-06, + "loss": 1.2842, + "step": 24466 + }, + { + "epoch": 0.7183921545598685, + "grad_norm": 0.0, + "learning_rate": 3.87886740093921e-06, + "loss": 1.2329, + "step": 24467 + }, + { + "epoch": 0.7184215162370075, + "grad_norm": 0.0, + "learning_rate": 3.878115437593267e-06, + "loss": 1.2861, + "step": 24468 + }, + { + "epoch": 0.7184508779141464, + "grad_norm": 0.0, + "learning_rate": 3.877363529609092e-06, + "loss": 1.2168, + "step": 24469 + }, + { + "epoch": 0.7184802395912855, + "grad_norm": 0.0, + "learning_rate": 3.876611676993488e-06, + "loss": 1.291, + "step": 24470 + }, + { + "epoch": 0.7185096012684244, + "grad_norm": 0.0, + "learning_rate": 3.875859879753254e-06, + "loss": 1.2427, + "step": 24471 + }, + { + "epoch": 0.7185389629455634, + "grad_norm": 0.0, + "learning_rate": 3.875108137895184e-06, + "loss": 1.2134, + "step": 24472 + }, + { + "epoch": 0.7185683246227025, + "grad_norm": 0.0, + "learning_rate": 3.874356451426082e-06, + "loss": 1.3008, + "step": 24473 + }, + { + "epoch": 0.7185976862998414, + "grad_norm": 0.0, + "learning_rate": 3.873604820352742e-06, + "loss": 1.1968, + "step": 24474 + }, + { + "epoch": 0.7186270479769804, + "grad_norm": 0.0, + "learning_rate": 3.872853244681964e-06, + "loss": 1.248, + "step": 24475 + }, + { + "epoch": 0.7186564096541195, + "grad_norm": 0.0, + "learning_rate": 3.872101724420545e-06, + "loss": 1.2744, + "step": 24476 + }, + { + "epoch": 0.7186857713312584, + "grad_norm": 0.0, + "learning_rate": 3.8713502595752785e-06, + "loss": 1.1606, + "step": 24477 + }, + { + "epoch": 0.7187151330083974, + "grad_norm": 0.0, + "learning_rate": 3.870598850152958e-06, + "loss": 1.3633, + "step": 24478 + }, + { + "epoch": 0.7187444946855365, + "grad_norm": 0.0, + "learning_rate": 3.869847496160385e-06, + "loss": 1.1914, + "step": 24479 + }, + { + "epoch": 0.7187738563626754, + "grad_norm": 0.0, + "learning_rate": 3.869096197604352e-06, + "loss": 1.2275, + "step": 24480 + }, + { + "epoch": 0.7188032180398144, + "grad_norm": 0.0, + "learning_rate": 3.868344954491648e-06, + "loss": 1.1997, + "step": 24481 + }, + { + "epoch": 0.7188325797169535, + "grad_norm": 0.0, + "learning_rate": 3.867593766829074e-06, + "loss": 1.457, + "step": 24482 + }, + { + "epoch": 0.7188619413940924, + "grad_norm": 0.0, + "learning_rate": 3.866842634623418e-06, + "loss": 1.292, + "step": 24483 + }, + { + "epoch": 0.7188913030712314, + "grad_norm": 0.0, + "learning_rate": 3.8660915578814774e-06, + "loss": 1.3076, + "step": 24484 + }, + { + "epoch": 0.7189206647483705, + "grad_norm": 0.0, + "learning_rate": 3.865340536610041e-06, + "loss": 1.1621, + "step": 24485 + }, + { + "epoch": 0.7189500264255094, + "grad_norm": 0.0, + "learning_rate": 3.864589570815902e-06, + "loss": 1.1592, + "step": 24486 + }, + { + "epoch": 0.7189793881026484, + "grad_norm": 0.0, + "learning_rate": 3.8638386605058475e-06, + "loss": 1.2236, + "step": 24487 + }, + { + "epoch": 0.7190087497797875, + "grad_norm": 0.0, + "learning_rate": 3.863087805686675e-06, + "loss": 1.2163, + "step": 24488 + }, + { + "epoch": 0.7190381114569264, + "grad_norm": 0.0, + "learning_rate": 3.862337006365167e-06, + "loss": 1.2441, + "step": 24489 + }, + { + "epoch": 0.7190674731340654, + "grad_norm": 0.0, + "learning_rate": 3.861586262548122e-06, + "loss": 1.2451, + "step": 24490 + }, + { + "epoch": 0.7190968348112045, + "grad_norm": 0.0, + "learning_rate": 3.8608355742423235e-06, + "loss": 1.2285, + "step": 24491 + }, + { + "epoch": 0.7191261964883434, + "grad_norm": 0.0, + "learning_rate": 3.86008494145456e-06, + "loss": 1.3027, + "step": 24492 + }, + { + "epoch": 0.7191555581654824, + "grad_norm": 0.0, + "learning_rate": 3.859334364191624e-06, + "loss": 1.2344, + "step": 24493 + }, + { + "epoch": 0.7191849198426215, + "grad_norm": 0.0, + "learning_rate": 3.858583842460294e-06, + "loss": 1.1782, + "step": 24494 + }, + { + "epoch": 0.7192142815197604, + "grad_norm": 0.0, + "learning_rate": 3.857833376267367e-06, + "loss": 1.3022, + "step": 24495 + }, + { + "epoch": 0.7192436431968994, + "grad_norm": 0.0, + "learning_rate": 3.857082965619623e-06, + "loss": 1.124, + "step": 24496 + }, + { + "epoch": 0.7192730048740384, + "grad_norm": 0.0, + "learning_rate": 3.8563326105238545e-06, + "loss": 1.2031, + "step": 24497 + }, + { + "epoch": 0.7193023665511774, + "grad_norm": 0.0, + "learning_rate": 3.8555823109868395e-06, + "loss": 1.3018, + "step": 24498 + }, + { + "epoch": 0.7193317282283164, + "grad_norm": 0.0, + "learning_rate": 3.854832067015371e-06, + "loss": 1.0957, + "step": 24499 + }, + { + "epoch": 0.7193610899054554, + "grad_norm": 0.0, + "learning_rate": 3.854081878616231e-06, + "loss": 1.3105, + "step": 24500 + }, + { + "epoch": 0.7193904515825944, + "grad_norm": 0.0, + "learning_rate": 3.8533317457962e-06, + "loss": 1.2017, + "step": 24501 + }, + { + "epoch": 0.7194198132597334, + "grad_norm": 0.0, + "learning_rate": 3.852581668562067e-06, + "loss": 1.1147, + "step": 24502 + }, + { + "epoch": 0.7194491749368724, + "grad_norm": 0.0, + "learning_rate": 3.851831646920607e-06, + "loss": 1.2764, + "step": 24503 + }, + { + "epoch": 0.7194785366140114, + "grad_norm": 0.0, + "learning_rate": 3.851081680878612e-06, + "loss": 1.3281, + "step": 24504 + }, + { + "epoch": 0.7195078982911504, + "grad_norm": 0.0, + "learning_rate": 3.850331770442857e-06, + "loss": 1.1763, + "step": 24505 + }, + { + "epoch": 0.7195372599682894, + "grad_norm": 0.0, + "learning_rate": 3.84958191562013e-06, + "loss": 1.3291, + "step": 24506 + }, + { + "epoch": 0.7195666216454284, + "grad_norm": 0.0, + "learning_rate": 3.848832116417205e-06, + "loss": 1.2485, + "step": 24507 + }, + { + "epoch": 0.7195959833225674, + "grad_norm": 0.0, + "learning_rate": 3.848082372840871e-06, + "loss": 1.3027, + "step": 24508 + }, + { + "epoch": 0.7196253449997064, + "grad_norm": 0.0, + "learning_rate": 3.847332684897903e-06, + "loss": 1.0981, + "step": 24509 + }, + { + "epoch": 0.7196547066768454, + "grad_norm": 0.0, + "learning_rate": 3.846583052595081e-06, + "loss": 1.293, + "step": 24510 + }, + { + "epoch": 0.7196840683539844, + "grad_norm": 0.0, + "learning_rate": 3.845833475939184e-06, + "loss": 1.2754, + "step": 24511 + }, + { + "epoch": 0.7197134300311234, + "grad_norm": 0.0, + "learning_rate": 3.8450839549369885e-06, + "loss": 1.1421, + "step": 24512 + }, + { + "epoch": 0.7197427917082624, + "grad_norm": 0.0, + "learning_rate": 3.8443344895952785e-06, + "loss": 1.2666, + "step": 24513 + }, + { + "epoch": 0.7197721533854013, + "grad_norm": 0.0, + "learning_rate": 3.8435850799208245e-06, + "loss": 1.1997, + "step": 24514 + }, + { + "epoch": 0.7198015150625404, + "grad_norm": 0.0, + "learning_rate": 3.842835725920411e-06, + "loss": 1.209, + "step": 24515 + }, + { + "epoch": 0.7198308767396794, + "grad_norm": 0.0, + "learning_rate": 3.842086427600806e-06, + "loss": 1.2852, + "step": 24516 + }, + { + "epoch": 0.7198602384168183, + "grad_norm": 0.0, + "learning_rate": 3.841337184968798e-06, + "loss": 1.1475, + "step": 24517 + }, + { + "epoch": 0.7198896000939574, + "grad_norm": 0.0, + "learning_rate": 3.84058799803115e-06, + "loss": 1.2734, + "step": 24518 + }, + { + "epoch": 0.7199189617710964, + "grad_norm": 0.0, + "learning_rate": 3.839838866794644e-06, + "loss": 1.2539, + "step": 24519 + }, + { + "epoch": 0.7199483234482353, + "grad_norm": 0.0, + "learning_rate": 3.839089791266053e-06, + "loss": 1.248, + "step": 24520 + }, + { + "epoch": 0.7199776851253744, + "grad_norm": 0.0, + "learning_rate": 3.838340771452148e-06, + "loss": 1.1616, + "step": 24521 + }, + { + "epoch": 0.7200070468025134, + "grad_norm": 0.0, + "learning_rate": 3.8375918073597086e-06, + "loss": 1.2529, + "step": 24522 + }, + { + "epoch": 0.7200364084796523, + "grad_norm": 0.0, + "learning_rate": 3.836842898995501e-06, + "loss": 1.3008, + "step": 24523 + }, + { + "epoch": 0.7200657701567914, + "grad_norm": 0.0, + "learning_rate": 3.836094046366305e-06, + "loss": 1.1504, + "step": 24524 + }, + { + "epoch": 0.7200951318339304, + "grad_norm": 0.0, + "learning_rate": 3.835345249478889e-06, + "loss": 1.2988, + "step": 24525 + }, + { + "epoch": 0.7201244935110693, + "grad_norm": 0.0, + "learning_rate": 3.8345965083400246e-06, + "loss": 1.1484, + "step": 24526 + }, + { + "epoch": 0.7201538551882084, + "grad_norm": 0.0, + "learning_rate": 3.83384782295648e-06, + "loss": 1.3262, + "step": 24527 + }, + { + "epoch": 0.7201832168653474, + "grad_norm": 0.0, + "learning_rate": 3.833099193335032e-06, + "loss": 1.3652, + "step": 24528 + }, + { + "epoch": 0.7202125785424863, + "grad_norm": 0.0, + "learning_rate": 3.832350619482443e-06, + "loss": 1.3369, + "step": 24529 + }, + { + "epoch": 0.7202419402196254, + "grad_norm": 0.0, + "learning_rate": 3.831602101405491e-06, + "loss": 1.1826, + "step": 24530 + }, + { + "epoch": 0.7202713018967644, + "grad_norm": 0.0, + "learning_rate": 3.830853639110942e-06, + "loss": 1.2266, + "step": 24531 + }, + { + "epoch": 0.7203006635739033, + "grad_norm": 0.0, + "learning_rate": 3.8301052326055586e-06, + "loss": 1.3008, + "step": 24532 + }, + { + "epoch": 0.7203300252510424, + "grad_norm": 0.0, + "learning_rate": 3.829356881896118e-06, + "loss": 1.2617, + "step": 24533 + }, + { + "epoch": 0.7203593869281814, + "grad_norm": 0.0, + "learning_rate": 3.828608586989383e-06, + "loss": 1.2354, + "step": 24534 + }, + { + "epoch": 0.7203887486053203, + "grad_norm": 0.0, + "learning_rate": 3.827860347892122e-06, + "loss": 1.2578, + "step": 24535 + }, + { + "epoch": 0.7204181102824593, + "grad_norm": 0.0, + "learning_rate": 3.827112164611095e-06, + "loss": 1.2725, + "step": 24536 + }, + { + "epoch": 0.7204474719595984, + "grad_norm": 0.0, + "learning_rate": 3.8263640371530785e-06, + "loss": 1.3027, + "step": 24537 + }, + { + "epoch": 0.7204768336367373, + "grad_norm": 0.0, + "learning_rate": 3.8256159655248306e-06, + "loss": 1.2041, + "step": 24538 + }, + { + "epoch": 0.7205061953138763, + "grad_norm": 0.0, + "learning_rate": 3.824867949733121e-06, + "loss": 1.2949, + "step": 24539 + }, + { + "epoch": 0.7205355569910153, + "grad_norm": 0.0, + "learning_rate": 3.824119989784712e-06, + "loss": 1.1982, + "step": 24540 + }, + { + "epoch": 0.7205649186681543, + "grad_norm": 0.0, + "learning_rate": 3.823372085686364e-06, + "loss": 1.3535, + "step": 24541 + }, + { + "epoch": 0.7205942803452933, + "grad_norm": 0.0, + "learning_rate": 3.822624237444852e-06, + "loss": 1.373, + "step": 24542 + }, + { + "epoch": 0.7206236420224323, + "grad_norm": 0.0, + "learning_rate": 3.821876445066923e-06, + "loss": 1.2119, + "step": 24543 + }, + { + "epoch": 0.7206530036995713, + "grad_norm": 0.0, + "learning_rate": 3.821128708559352e-06, + "loss": 1.1992, + "step": 24544 + }, + { + "epoch": 0.7206823653767103, + "grad_norm": 0.0, + "learning_rate": 3.8203810279288925e-06, + "loss": 1.2578, + "step": 24545 + }, + { + "epoch": 0.7207117270538493, + "grad_norm": 0.0, + "learning_rate": 3.819633403182314e-06, + "loss": 1.0923, + "step": 24546 + }, + { + "epoch": 0.7207410887309883, + "grad_norm": 0.0, + "learning_rate": 3.8188858343263695e-06, + "loss": 1.1284, + "step": 24547 + }, + { + "epoch": 0.7207704504081273, + "grad_norm": 0.0, + "learning_rate": 3.818138321367826e-06, + "loss": 1.3486, + "step": 24548 + }, + { + "epoch": 0.7207998120852663, + "grad_norm": 0.0, + "learning_rate": 3.817390864313442e-06, + "loss": 1.2764, + "step": 24549 + }, + { + "epoch": 0.7208291737624053, + "grad_norm": 0.0, + "learning_rate": 3.816643463169976e-06, + "loss": 1.2451, + "step": 24550 + }, + { + "epoch": 0.7208585354395443, + "grad_norm": 0.0, + "learning_rate": 3.8158961179441855e-06, + "loss": 1.2441, + "step": 24551 + }, + { + "epoch": 0.7208878971166833, + "grad_norm": 0.0, + "learning_rate": 3.8151488286428275e-06, + "loss": 1.2012, + "step": 24552 + }, + { + "epoch": 0.7209172587938223, + "grad_norm": 0.0, + "learning_rate": 3.8144015952726665e-06, + "loss": 1.3203, + "step": 24553 + }, + { + "epoch": 0.7209466204709613, + "grad_norm": 0.0, + "learning_rate": 3.813654417840452e-06, + "loss": 1.082, + "step": 24554 + }, + { + "epoch": 0.7209759821481003, + "grad_norm": 0.0, + "learning_rate": 3.812907296352949e-06, + "loss": 1.1709, + "step": 24555 + }, + { + "epoch": 0.7210053438252393, + "grad_norm": 0.0, + "learning_rate": 3.8121602308169057e-06, + "loss": 1.1353, + "step": 24556 + }, + { + "epoch": 0.7210347055023782, + "grad_norm": 0.0, + "learning_rate": 3.8114132212390853e-06, + "loss": 1.2139, + "step": 24557 + }, + { + "epoch": 0.7210640671795173, + "grad_norm": 0.0, + "learning_rate": 3.8106662676262406e-06, + "loss": 1.1582, + "step": 24558 + }, + { + "epoch": 0.7210934288566563, + "grad_norm": 0.0, + "learning_rate": 3.809919369985127e-06, + "loss": 1.2822, + "step": 24559 + }, + { + "epoch": 0.7211227905337952, + "grad_norm": 0.0, + "learning_rate": 3.8091725283224965e-06, + "loss": 1.1299, + "step": 24560 + }, + { + "epoch": 0.7211521522109343, + "grad_norm": 0.0, + "learning_rate": 3.808425742645101e-06, + "loss": 1.2686, + "step": 24561 + }, + { + "epoch": 0.7211815138880733, + "grad_norm": 0.0, + "learning_rate": 3.8076790129597007e-06, + "loss": 1.2285, + "step": 24562 + }, + { + "epoch": 0.7212108755652122, + "grad_norm": 0.0, + "learning_rate": 3.806932339273042e-06, + "loss": 1.2607, + "step": 24563 + }, + { + "epoch": 0.7212402372423513, + "grad_norm": 0.0, + "learning_rate": 3.8061857215918828e-06, + "loss": 1.2061, + "step": 24564 + }, + { + "epoch": 0.7212695989194903, + "grad_norm": 0.0, + "learning_rate": 3.80543915992297e-06, + "loss": 1.2588, + "step": 24565 + }, + { + "epoch": 0.7212989605966292, + "grad_norm": 0.0, + "learning_rate": 3.80469265427306e-06, + "loss": 1.1797, + "step": 24566 + }, + { + "epoch": 0.7213283222737683, + "grad_norm": 0.0, + "learning_rate": 3.803946204648902e-06, + "loss": 1.1538, + "step": 24567 + }, + { + "epoch": 0.7213576839509073, + "grad_norm": 0.0, + "learning_rate": 3.803199811057243e-06, + "loss": 1.1499, + "step": 24568 + }, + { + "epoch": 0.7213870456280462, + "grad_norm": 0.0, + "learning_rate": 3.802453473504838e-06, + "loss": 1.2378, + "step": 24569 + }, + { + "epoch": 0.7214164073051853, + "grad_norm": 0.0, + "learning_rate": 3.801707191998428e-06, + "loss": 1.1299, + "step": 24570 + }, + { + "epoch": 0.7214457689823243, + "grad_norm": 0.0, + "learning_rate": 3.800960966544771e-06, + "loss": 1.2139, + "step": 24571 + }, + { + "epoch": 0.7214751306594632, + "grad_norm": 0.0, + "learning_rate": 3.8002147971506086e-06, + "loss": 1.1821, + "step": 24572 + }, + { + "epoch": 0.7215044923366023, + "grad_norm": 0.0, + "learning_rate": 3.799468683822696e-06, + "loss": 1.3965, + "step": 24573 + }, + { + "epoch": 0.7215338540137413, + "grad_norm": 0.0, + "learning_rate": 3.7987226265677745e-06, + "loss": 1.2686, + "step": 24574 + }, + { + "epoch": 0.7215632156908802, + "grad_norm": 0.0, + "learning_rate": 3.797976625392593e-06, + "loss": 1.1631, + "step": 24575 + }, + { + "epoch": 0.7215925773680193, + "grad_norm": 0.0, + "learning_rate": 3.797230680303894e-06, + "loss": 1.1108, + "step": 24576 + }, + { + "epoch": 0.7216219390451583, + "grad_norm": 0.0, + "learning_rate": 3.7964847913084292e-06, + "loss": 1.1641, + "step": 24577 + }, + { + "epoch": 0.7216513007222972, + "grad_norm": 0.0, + "learning_rate": 3.795738958412938e-06, + "loss": 1.1514, + "step": 24578 + }, + { + "epoch": 0.7216806623994363, + "grad_norm": 0.0, + "learning_rate": 3.7949931816241725e-06, + "loss": 1.1475, + "step": 24579 + }, + { + "epoch": 0.7217100240765753, + "grad_norm": 0.0, + "learning_rate": 3.7942474609488724e-06, + "loss": 1.3057, + "step": 24580 + }, + { + "epoch": 0.7217393857537142, + "grad_norm": 0.0, + "learning_rate": 3.7935017963937783e-06, + "loss": 1.2373, + "step": 24581 + }, + { + "epoch": 0.7217687474308533, + "grad_norm": 0.0, + "learning_rate": 3.7927561879656403e-06, + "loss": 1.0977, + "step": 24582 + }, + { + "epoch": 0.7217981091079922, + "grad_norm": 0.0, + "learning_rate": 3.7920106356711974e-06, + "loss": 1.1328, + "step": 24583 + }, + { + "epoch": 0.7218274707851312, + "grad_norm": 0.0, + "learning_rate": 3.7912651395171927e-06, + "loss": 1.2056, + "step": 24584 + }, + { + "epoch": 0.7218568324622703, + "grad_norm": 0.0, + "learning_rate": 3.7905196995103644e-06, + "loss": 1.3184, + "step": 24585 + }, + { + "epoch": 0.7218861941394092, + "grad_norm": 0.0, + "learning_rate": 3.7897743156574596e-06, + "loss": 1.3613, + "step": 24586 + }, + { + "epoch": 0.7219155558165482, + "grad_norm": 0.0, + "learning_rate": 3.789028987965213e-06, + "loss": 1.2476, + "step": 24587 + }, + { + "epoch": 0.7219449174936873, + "grad_norm": 0.0, + "learning_rate": 3.7882837164403708e-06, + "loss": 1.2197, + "step": 24588 + }, + { + "epoch": 0.7219742791708262, + "grad_norm": 0.0, + "learning_rate": 3.7875385010896703e-06, + "loss": 1.1621, + "step": 24589 + }, + { + "epoch": 0.7220036408479652, + "grad_norm": 0.0, + "learning_rate": 3.7867933419198466e-06, + "loss": 1.3057, + "step": 24590 + }, + { + "epoch": 0.7220330025251043, + "grad_norm": 0.0, + "learning_rate": 3.7860482389376453e-06, + "loss": 1.2061, + "step": 24591 + }, + { + "epoch": 0.7220623642022432, + "grad_norm": 0.0, + "learning_rate": 3.7853031921498017e-06, + "loss": 1.1738, + "step": 24592 + }, + { + "epoch": 0.7220917258793822, + "grad_norm": 0.0, + "learning_rate": 3.784558201563052e-06, + "loss": 1.1953, + "step": 24593 + }, + { + "epoch": 0.7221210875565213, + "grad_norm": 0.0, + "learning_rate": 3.783813267184131e-06, + "loss": 1.1963, + "step": 24594 + }, + { + "epoch": 0.7221504492336602, + "grad_norm": 0.0, + "learning_rate": 3.7830683890197827e-06, + "loss": 1.1968, + "step": 24595 + }, + { + "epoch": 0.7221798109107992, + "grad_norm": 0.0, + "learning_rate": 3.7823235670767355e-06, + "loss": 1.2681, + "step": 24596 + }, + { + "epoch": 0.7222091725879383, + "grad_norm": 0.0, + "learning_rate": 3.7815788013617326e-06, + "loss": 1.4023, + "step": 24597 + }, + { + "epoch": 0.7222385342650772, + "grad_norm": 0.0, + "learning_rate": 3.7808340918815046e-06, + "loss": 1.1641, + "step": 24598 + }, + { + "epoch": 0.7222678959422162, + "grad_norm": 0.0, + "learning_rate": 3.7800894386427867e-06, + "loss": 1.209, + "step": 24599 + }, + { + "epoch": 0.7222972576193553, + "grad_norm": 0.0, + "learning_rate": 3.7793448416523127e-06, + "loss": 1.1787, + "step": 24600 + }, + { + "epoch": 0.7223266192964942, + "grad_norm": 0.0, + "learning_rate": 3.7786003009168127e-06, + "loss": 1.2979, + "step": 24601 + }, + { + "epoch": 0.7223559809736332, + "grad_norm": 0.0, + "learning_rate": 3.777855816443028e-06, + "loss": 1.2207, + "step": 24602 + }, + { + "epoch": 0.7223853426507723, + "grad_norm": 0.0, + "learning_rate": 3.777111388237682e-06, + "loss": 1.2729, + "step": 24603 + }, + { + "epoch": 0.7224147043279112, + "grad_norm": 0.0, + "learning_rate": 3.7763670163075163e-06, + "loss": 1.2485, + "step": 24604 + }, + { + "epoch": 0.7224440660050502, + "grad_norm": 0.0, + "learning_rate": 3.7756227006592526e-06, + "loss": 1.1592, + "step": 24605 + }, + { + "epoch": 0.7224734276821893, + "grad_norm": 0.0, + "learning_rate": 3.774878441299631e-06, + "loss": 1.2627, + "step": 24606 + }, + { + "epoch": 0.7225027893593282, + "grad_norm": 0.0, + "learning_rate": 3.7741342382353785e-06, + "loss": 1.3291, + "step": 24607 + }, + { + "epoch": 0.7225321510364672, + "grad_norm": 0.0, + "learning_rate": 3.7733900914732237e-06, + "loss": 1.208, + "step": 24608 + }, + { + "epoch": 0.7225615127136062, + "grad_norm": 0.0, + "learning_rate": 3.7726460010198972e-06, + "loss": 1.293, + "step": 24609 + }, + { + "epoch": 0.7225908743907452, + "grad_norm": 0.0, + "learning_rate": 3.7719019668821242e-06, + "loss": 1.2109, + "step": 24610 + }, + { + "epoch": 0.7226202360678842, + "grad_norm": 0.0, + "learning_rate": 3.7711579890666407e-06, + "loss": 1.2192, + "step": 24611 + }, + { + "epoch": 0.7226495977450232, + "grad_norm": 0.0, + "learning_rate": 3.770414067580167e-06, + "loss": 1.1323, + "step": 24612 + }, + { + "epoch": 0.7226789594221622, + "grad_norm": 0.0, + "learning_rate": 3.7696702024294383e-06, + "loss": 1.2842, + "step": 24613 + }, + { + "epoch": 0.7227083210993012, + "grad_norm": 0.0, + "learning_rate": 3.768926393621173e-06, + "loss": 1.1636, + "step": 24614 + }, + { + "epoch": 0.7227376827764402, + "grad_norm": 0.0, + "learning_rate": 3.768182641162107e-06, + "loss": 1.2007, + "step": 24615 + }, + { + "epoch": 0.7227670444535792, + "grad_norm": 0.0, + "learning_rate": 3.76743894505896e-06, + "loss": 1.1133, + "step": 24616 + }, + { + "epoch": 0.7227964061307182, + "grad_norm": 0.0, + "learning_rate": 3.7666953053184595e-06, + "loss": 1.2998, + "step": 24617 + }, + { + "epoch": 0.7228257678078572, + "grad_norm": 0.0, + "learning_rate": 3.76595172194733e-06, + "loss": 1.3457, + "step": 24618 + }, + { + "epoch": 0.7228551294849962, + "grad_norm": 0.0, + "learning_rate": 3.765208194952291e-06, + "loss": 1.3525, + "step": 24619 + }, + { + "epoch": 0.7228844911621352, + "grad_norm": 0.0, + "learning_rate": 3.7644647243400757e-06, + "loss": 1.208, + "step": 24620 + }, + { + "epoch": 0.7229138528392742, + "grad_norm": 0.0, + "learning_rate": 3.7637213101173984e-06, + "loss": 1.2007, + "step": 24621 + }, + { + "epoch": 0.7229432145164132, + "grad_norm": 0.0, + "learning_rate": 3.7629779522909904e-06, + "loss": 1.2173, + "step": 24622 + }, + { + "epoch": 0.7229725761935522, + "grad_norm": 0.0, + "learning_rate": 3.76223465086757e-06, + "loss": 1.3066, + "step": 24623 + }, + { + "epoch": 0.7230019378706912, + "grad_norm": 0.0, + "learning_rate": 3.76149140585386e-06, + "loss": 1.2012, + "step": 24624 + }, + { + "epoch": 0.7230312995478302, + "grad_norm": 0.0, + "learning_rate": 3.7607482172565766e-06, + "loss": 1.2773, + "step": 24625 + }, + { + "epoch": 0.7230606612249691, + "grad_norm": 0.0, + "learning_rate": 3.7600050850824486e-06, + "loss": 1.1963, + "step": 24626 + }, + { + "epoch": 0.7230900229021082, + "grad_norm": 0.0, + "learning_rate": 3.7592620093381892e-06, + "loss": 1.3145, + "step": 24627 + }, + { + "epoch": 0.7231193845792472, + "grad_norm": 0.0, + "learning_rate": 3.758518990030525e-06, + "loss": 1.2935, + "step": 24628 + }, + { + "epoch": 0.7231487462563861, + "grad_norm": 0.0, + "learning_rate": 3.7577760271661724e-06, + "loss": 1.2827, + "step": 24629 + }, + { + "epoch": 0.7231781079335252, + "grad_norm": 0.0, + "learning_rate": 3.7570331207518464e-06, + "loss": 1.2529, + "step": 24630 + }, + { + "epoch": 0.7232074696106642, + "grad_norm": 0.0, + "learning_rate": 3.7562902707942717e-06, + "loss": 1.1484, + "step": 24631 + }, + { + "epoch": 0.7232368312878031, + "grad_norm": 0.0, + "learning_rate": 3.7555474773001644e-06, + "loss": 1.2764, + "step": 24632 + }, + { + "epoch": 0.7232661929649422, + "grad_norm": 0.0, + "learning_rate": 3.754804740276239e-06, + "loss": 1.2344, + "step": 24633 + }, + { + "epoch": 0.7232955546420812, + "grad_norm": 0.0, + "learning_rate": 3.754062059729211e-06, + "loss": 1.2734, + "step": 24634 + }, + { + "epoch": 0.7233249163192201, + "grad_norm": 0.0, + "learning_rate": 3.753319435665802e-06, + "loss": 1.1001, + "step": 24635 + }, + { + "epoch": 0.7233542779963592, + "grad_norm": 0.0, + "learning_rate": 3.7525768680927232e-06, + "loss": 1.249, + "step": 24636 + }, + { + "epoch": 0.7233836396734982, + "grad_norm": 0.0, + "learning_rate": 3.7518343570166936e-06, + "loss": 1.3262, + "step": 24637 + }, + { + "epoch": 0.7234130013506371, + "grad_norm": 0.0, + "learning_rate": 3.751091902444427e-06, + "loss": 1.3418, + "step": 24638 + }, + { + "epoch": 0.7234423630277761, + "grad_norm": 0.0, + "learning_rate": 3.750349504382633e-06, + "loss": 1.3145, + "step": 24639 + }, + { + "epoch": 0.7234717247049152, + "grad_norm": 0.0, + "learning_rate": 3.7496071628380336e-06, + "loss": 1.1841, + "step": 24640 + }, + { + "epoch": 0.7235010863820541, + "grad_norm": 0.0, + "learning_rate": 3.7488648778173366e-06, + "loss": 1.1729, + "step": 24641 + }, + { + "epoch": 0.7235304480591931, + "grad_norm": 0.0, + "learning_rate": 3.748122649327255e-06, + "loss": 1.2852, + "step": 24642 + }, + { + "epoch": 0.7235598097363322, + "grad_norm": 0.0, + "learning_rate": 3.7473804773744984e-06, + "loss": 1.0283, + "step": 24643 + }, + { + "epoch": 0.7235891714134711, + "grad_norm": 0.0, + "learning_rate": 3.7466383619657844e-06, + "loss": 1.2241, + "step": 24644 + }, + { + "epoch": 0.7236185330906101, + "grad_norm": 0.0, + "learning_rate": 3.745896303107818e-06, + "loss": 1.231, + "step": 24645 + }, + { + "epoch": 0.7236478947677492, + "grad_norm": 0.0, + "learning_rate": 3.7451543008073164e-06, + "loss": 1.2236, + "step": 24646 + }, + { + "epoch": 0.7236772564448881, + "grad_norm": 0.0, + "learning_rate": 3.7444123550709866e-06, + "loss": 1.3496, + "step": 24647 + }, + { + "epoch": 0.7237066181220271, + "grad_norm": 0.0, + "learning_rate": 3.7436704659055377e-06, + "loss": 1.2529, + "step": 24648 + }, + { + "epoch": 0.7237359797991662, + "grad_norm": 0.0, + "learning_rate": 3.7429286333176795e-06, + "loss": 1.1865, + "step": 24649 + }, + { + "epoch": 0.7237653414763051, + "grad_norm": 0.0, + "learning_rate": 3.7421868573141162e-06, + "loss": 1.3555, + "step": 24650 + }, + { + "epoch": 0.7237947031534441, + "grad_norm": 0.0, + "learning_rate": 3.7414451379015637e-06, + "loss": 1.1841, + "step": 24651 + }, + { + "epoch": 0.7238240648305831, + "grad_norm": 0.0, + "learning_rate": 3.740703475086721e-06, + "loss": 1.2715, + "step": 24652 + }, + { + "epoch": 0.7238534265077221, + "grad_norm": 0.0, + "learning_rate": 3.739961868876305e-06, + "loss": 1.1143, + "step": 24653 + }, + { + "epoch": 0.7238827881848611, + "grad_norm": 0.0, + "learning_rate": 3.7392203192770126e-06, + "loss": 1.2031, + "step": 24654 + }, + { + "epoch": 0.7239121498620001, + "grad_norm": 0.0, + "learning_rate": 3.7384788262955572e-06, + "loss": 1.228, + "step": 24655 + }, + { + "epoch": 0.7239415115391391, + "grad_norm": 0.0, + "learning_rate": 3.7377373899386428e-06, + "loss": 1.3145, + "step": 24656 + }, + { + "epoch": 0.7239708732162781, + "grad_norm": 0.0, + "learning_rate": 3.736996010212972e-06, + "loss": 1.2217, + "step": 24657 + }, + { + "epoch": 0.7240002348934171, + "grad_norm": 0.0, + "learning_rate": 3.73625468712525e-06, + "loss": 1.1099, + "step": 24658 + }, + { + "epoch": 0.7240295965705561, + "grad_norm": 0.0, + "learning_rate": 3.7355134206821788e-06, + "loss": 1.1445, + "step": 24659 + }, + { + "epoch": 0.7240589582476951, + "grad_norm": 0.0, + "learning_rate": 3.734772210890466e-06, + "loss": 1.4053, + "step": 24660 + }, + { + "epoch": 0.7240883199248341, + "grad_norm": 0.0, + "learning_rate": 3.7340310577568104e-06, + "loss": 1.1152, + "step": 24661 + }, + { + "epoch": 0.7241176816019731, + "grad_norm": 0.0, + "learning_rate": 3.7332899612879203e-06, + "loss": 1.0996, + "step": 24662 + }, + { + "epoch": 0.7241470432791121, + "grad_norm": 0.0, + "learning_rate": 3.73254892149049e-06, + "loss": 1.3438, + "step": 24663 + }, + { + "epoch": 0.7241764049562511, + "grad_norm": 0.0, + "learning_rate": 3.731807938371228e-06, + "loss": 1.2744, + "step": 24664 + }, + { + "epoch": 0.7242057666333901, + "grad_norm": 0.0, + "learning_rate": 3.731067011936832e-06, + "loss": 1.1738, + "step": 24665 + }, + { + "epoch": 0.724235128310529, + "grad_norm": 0.0, + "learning_rate": 3.730326142194003e-06, + "loss": 1.2197, + "step": 24666 + }, + { + "epoch": 0.7242644899876681, + "grad_norm": 0.0, + "learning_rate": 3.729585329149441e-06, + "loss": 1.29, + "step": 24667 + }, + { + "epoch": 0.7242938516648071, + "grad_norm": 0.0, + "learning_rate": 3.728844572809841e-06, + "loss": 1.1606, + "step": 24668 + }, + { + "epoch": 0.724323213341946, + "grad_norm": 0.0, + "learning_rate": 3.7281038731819087e-06, + "loss": 1.3193, + "step": 24669 + }, + { + "epoch": 0.7243525750190851, + "grad_norm": 0.0, + "learning_rate": 3.727363230272335e-06, + "loss": 1.2734, + "step": 24670 + }, + { + "epoch": 0.7243819366962241, + "grad_norm": 0.0, + "learning_rate": 3.726622644087827e-06, + "loss": 1.1772, + "step": 24671 + }, + { + "epoch": 0.724411298373363, + "grad_norm": 0.0, + "learning_rate": 3.7258821146350764e-06, + "loss": 1.2646, + "step": 24672 + }, + { + "epoch": 0.7244406600505021, + "grad_norm": 0.0, + "learning_rate": 3.725141641920781e-06, + "loss": 1.2109, + "step": 24673 + }, + { + "epoch": 0.7244700217276411, + "grad_norm": 0.0, + "learning_rate": 3.7244012259516326e-06, + "loss": 1.2285, + "step": 24674 + }, + { + "epoch": 0.72449938340478, + "grad_norm": 0.0, + "learning_rate": 3.7236608667343354e-06, + "loss": 1.2446, + "step": 24675 + }, + { + "epoch": 0.7245287450819191, + "grad_norm": 0.0, + "learning_rate": 3.7229205642755763e-06, + "loss": 1.1709, + "step": 24676 + }, + { + "epoch": 0.7245581067590581, + "grad_norm": 0.0, + "learning_rate": 3.7221803185820583e-06, + "loss": 1.0894, + "step": 24677 + }, + { + "epoch": 0.724587468436197, + "grad_norm": 0.0, + "learning_rate": 3.721440129660471e-06, + "loss": 1.2842, + "step": 24678 + }, + { + "epoch": 0.7246168301133361, + "grad_norm": 0.0, + "learning_rate": 3.720699997517504e-06, + "loss": 1.2646, + "step": 24679 + }, + { + "epoch": 0.7246461917904751, + "grad_norm": 0.0, + "learning_rate": 3.71995992215986e-06, + "loss": 1.0986, + "step": 24680 + }, + { + "epoch": 0.724675553467614, + "grad_norm": 0.0, + "learning_rate": 3.719219903594227e-06, + "loss": 1.2949, + "step": 24681 + }, + { + "epoch": 0.7247049151447531, + "grad_norm": 0.0, + "learning_rate": 3.7184799418272954e-06, + "loss": 1.1479, + "step": 24682 + }, + { + "epoch": 0.7247342768218921, + "grad_norm": 0.0, + "learning_rate": 3.717740036865757e-06, + "loss": 1.2896, + "step": 24683 + }, + { + "epoch": 0.724763638499031, + "grad_norm": 0.0, + "learning_rate": 3.7170001887163053e-06, + "loss": 0.939, + "step": 24684 + }, + { + "epoch": 0.7247930001761701, + "grad_norm": 0.0, + "learning_rate": 3.716260397385628e-06, + "loss": 1.1479, + "step": 24685 + }, + { + "epoch": 0.7248223618533091, + "grad_norm": 0.0, + "learning_rate": 3.715520662880421e-06, + "loss": 1.3262, + "step": 24686 + }, + { + "epoch": 0.724851723530448, + "grad_norm": 0.0, + "learning_rate": 3.71478098520737e-06, + "loss": 1.3281, + "step": 24687 + }, + { + "epoch": 0.7248810852075871, + "grad_norm": 0.0, + "learning_rate": 3.71404136437316e-06, + "loss": 1.2012, + "step": 24688 + }, + { + "epoch": 0.7249104468847261, + "grad_norm": 0.0, + "learning_rate": 3.713301800384488e-06, + "loss": 1.2441, + "step": 24689 + }, + { + "epoch": 0.724939808561865, + "grad_norm": 0.0, + "learning_rate": 3.7125622932480377e-06, + "loss": 1.2148, + "step": 24690 + }, + { + "epoch": 0.7249691702390041, + "grad_norm": 0.0, + "learning_rate": 3.711822842970497e-06, + "loss": 1.1953, + "step": 24691 + }, + { + "epoch": 0.724998531916143, + "grad_norm": 0.0, + "learning_rate": 3.711083449558549e-06, + "loss": 1.3652, + "step": 24692 + }, + { + "epoch": 0.725027893593282, + "grad_norm": 0.0, + "learning_rate": 3.710344113018887e-06, + "loss": 1.207, + "step": 24693 + }, + { + "epoch": 0.7250572552704211, + "grad_norm": 0.0, + "learning_rate": 3.7096048333581913e-06, + "loss": 1.1152, + "step": 24694 + }, + { + "epoch": 0.72508661694756, + "grad_norm": 0.0, + "learning_rate": 3.708865610583153e-06, + "loss": 1.2529, + "step": 24695 + }, + { + "epoch": 0.725115978624699, + "grad_norm": 0.0, + "learning_rate": 3.7081264447004504e-06, + "loss": 1.1943, + "step": 24696 + }, + { + "epoch": 0.7251453403018381, + "grad_norm": 0.0, + "learning_rate": 3.70738733571678e-06, + "loss": 1.2646, + "step": 24697 + }, + { + "epoch": 0.725174701978977, + "grad_norm": 0.0, + "learning_rate": 3.706648283638813e-06, + "loss": 1.1665, + "step": 24698 + }, + { + "epoch": 0.725204063656116, + "grad_norm": 0.0, + "learning_rate": 3.705909288473234e-06, + "loss": 1.228, + "step": 24699 + }, + { + "epoch": 0.7252334253332551, + "grad_norm": 0.0, + "learning_rate": 3.7051703502267333e-06, + "loss": 1.333, + "step": 24700 + }, + { + "epoch": 0.725262787010394, + "grad_norm": 0.0, + "learning_rate": 3.7044314689059846e-06, + "loss": 1.043, + "step": 24701 + }, + { + "epoch": 0.725292148687533, + "grad_norm": 0.0, + "learning_rate": 3.7036926445176792e-06, + "loss": 1.3877, + "step": 24702 + }, + { + "epoch": 0.7253215103646721, + "grad_norm": 0.0, + "learning_rate": 3.7029538770684892e-06, + "loss": 1.2192, + "step": 24703 + }, + { + "epoch": 0.725350872041811, + "grad_norm": 0.0, + "learning_rate": 3.702215166565104e-06, + "loss": 1.2422, + "step": 24704 + }, + { + "epoch": 0.72538023371895, + "grad_norm": 0.0, + "learning_rate": 3.7014765130141995e-06, + "loss": 1.2368, + "step": 24705 + }, + { + "epoch": 0.7254095953960891, + "grad_norm": 0.0, + "learning_rate": 3.7007379164224567e-06, + "loss": 1.2637, + "step": 24706 + }, + { + "epoch": 0.725438957073228, + "grad_norm": 0.0, + "learning_rate": 3.6999993767965536e-06, + "loss": 1.3169, + "step": 24707 + }, + { + "epoch": 0.725468318750367, + "grad_norm": 0.0, + "learning_rate": 3.699260894143166e-06, + "loss": 1.2061, + "step": 24708 + }, + { + "epoch": 0.7254976804275061, + "grad_norm": 0.0, + "learning_rate": 3.69852246846898e-06, + "loss": 1.2178, + "step": 24709 + }, + { + "epoch": 0.725527042104645, + "grad_norm": 0.0, + "learning_rate": 3.697784099780666e-06, + "loss": 1.3237, + "step": 24710 + }, + { + "epoch": 0.725556403781784, + "grad_norm": 0.0, + "learning_rate": 3.6970457880849066e-06, + "loss": 1.2998, + "step": 24711 + }, + { + "epoch": 0.7255857654589231, + "grad_norm": 0.0, + "learning_rate": 3.6963075333883737e-06, + "loss": 1.1909, + "step": 24712 + }, + { + "epoch": 0.725615127136062, + "grad_norm": 0.0, + "learning_rate": 3.6955693356977485e-06, + "loss": 1.2803, + "step": 24713 + }, + { + "epoch": 0.725644488813201, + "grad_norm": 0.0, + "learning_rate": 3.6948311950197057e-06, + "loss": 1.2305, + "step": 24714 + }, + { + "epoch": 0.72567385049034, + "grad_norm": 0.0, + "learning_rate": 3.694093111360919e-06, + "loss": 1.1172, + "step": 24715 + }, + { + "epoch": 0.725703212167479, + "grad_norm": 0.0, + "learning_rate": 3.6933550847280597e-06, + "loss": 1.2441, + "step": 24716 + }, + { + "epoch": 0.725732573844618, + "grad_norm": 0.0, + "learning_rate": 3.692617115127808e-06, + "loss": 1.1001, + "step": 24717 + }, + { + "epoch": 0.725761935521757, + "grad_norm": 0.0, + "learning_rate": 3.691879202566836e-06, + "loss": 1.2695, + "step": 24718 + }, + { + "epoch": 0.725791297198896, + "grad_norm": 0.0, + "learning_rate": 3.691141347051812e-06, + "loss": 1.1885, + "step": 24719 + }, + { + "epoch": 0.725820658876035, + "grad_norm": 0.0, + "learning_rate": 3.6904035485894164e-06, + "loss": 1.1523, + "step": 24720 + }, + { + "epoch": 0.725850020553174, + "grad_norm": 0.0, + "learning_rate": 3.6896658071863144e-06, + "loss": 1.3271, + "step": 24721 + }, + { + "epoch": 0.725879382230313, + "grad_norm": 0.0, + "learning_rate": 3.688928122849187e-06, + "loss": 1.2632, + "step": 24722 + }, + { + "epoch": 0.725908743907452, + "grad_norm": 0.0, + "learning_rate": 3.6881904955846916e-06, + "loss": 1.3115, + "step": 24723 + }, + { + "epoch": 0.725938105584591, + "grad_norm": 0.0, + "learning_rate": 3.6874529253995095e-06, + "loss": 1.2603, + "step": 24724 + }, + { + "epoch": 0.72596746726173, + "grad_norm": 0.0, + "learning_rate": 3.6867154123003047e-06, + "loss": 1.1792, + "step": 24725 + }, + { + "epoch": 0.725996828938869, + "grad_norm": 0.0, + "learning_rate": 3.6859779562937527e-06, + "loss": 1.2783, + "step": 24726 + }, + { + "epoch": 0.726026190616008, + "grad_norm": 0.0, + "learning_rate": 3.685240557386518e-06, + "loss": 1.3975, + "step": 24727 + }, + { + "epoch": 0.726055552293147, + "grad_norm": 0.0, + "learning_rate": 3.684503215585268e-06, + "loss": 1.1494, + "step": 24728 + }, + { + "epoch": 0.726084913970286, + "grad_norm": 0.0, + "learning_rate": 3.6837659308966747e-06, + "loss": 1.2256, + "step": 24729 + }, + { + "epoch": 0.726114275647425, + "grad_norm": 0.0, + "learning_rate": 3.6830287033274047e-06, + "loss": 1.334, + "step": 24730 + }, + { + "epoch": 0.726143637324564, + "grad_norm": 0.0, + "learning_rate": 3.6822915328841238e-06, + "loss": 1.1802, + "step": 24731 + }, + { + "epoch": 0.726172999001703, + "grad_norm": 0.0, + "learning_rate": 3.6815544195734955e-06, + "loss": 1.1255, + "step": 24732 + }, + { + "epoch": 0.726202360678842, + "grad_norm": 0.0, + "learning_rate": 3.6808173634021915e-06, + "loss": 1.249, + "step": 24733 + }, + { + "epoch": 0.726231722355981, + "grad_norm": 0.0, + "learning_rate": 3.6800803643768713e-06, + "loss": 1.2637, + "step": 24734 + }, + { + "epoch": 0.72626108403312, + "grad_norm": 0.0, + "learning_rate": 3.6793434225042068e-06, + "loss": 1.1904, + "step": 24735 + }, + { + "epoch": 0.726290445710259, + "grad_norm": 0.0, + "learning_rate": 3.6786065377908574e-06, + "loss": 1.1157, + "step": 24736 + }, + { + "epoch": 0.726319807387398, + "grad_norm": 0.0, + "learning_rate": 3.6778697102434855e-06, + "loss": 1.2383, + "step": 24737 + }, + { + "epoch": 0.726349169064537, + "grad_norm": 0.0, + "learning_rate": 3.6771329398687607e-06, + "loss": 1.1548, + "step": 24738 + }, + { + "epoch": 0.7263785307416759, + "grad_norm": 0.0, + "learning_rate": 3.676396226673341e-06, + "loss": 1.1748, + "step": 24739 + }, + { + "epoch": 0.726407892418815, + "grad_norm": 0.0, + "learning_rate": 3.67565957066389e-06, + "loss": 1.2881, + "step": 24740 + }, + { + "epoch": 0.7264372540959539, + "grad_norm": 0.0, + "learning_rate": 3.6749229718470657e-06, + "loss": 1.2666, + "step": 24741 + }, + { + "epoch": 0.7264666157730929, + "grad_norm": 0.0, + "learning_rate": 3.674186430229537e-06, + "loss": 1.2949, + "step": 24742 + }, + { + "epoch": 0.726495977450232, + "grad_norm": 0.0, + "learning_rate": 3.673449945817956e-06, + "loss": 1.333, + "step": 24743 + }, + { + "epoch": 0.7265253391273709, + "grad_norm": 0.0, + "learning_rate": 3.6727135186189923e-06, + "loss": 1.0942, + "step": 24744 + }, + { + "epoch": 0.7265547008045099, + "grad_norm": 0.0, + "learning_rate": 3.6719771486392963e-06, + "loss": 1.1694, + "step": 24745 + }, + { + "epoch": 0.726584062481649, + "grad_norm": 0.0, + "learning_rate": 3.6712408358855345e-06, + "loss": 1.2529, + "step": 24746 + }, + { + "epoch": 0.7266134241587879, + "grad_norm": 0.0, + "learning_rate": 3.670504580364367e-06, + "loss": 1.2461, + "step": 24747 + }, + { + "epoch": 0.7266427858359269, + "grad_norm": 0.0, + "learning_rate": 3.6697683820824403e-06, + "loss": 1.2871, + "step": 24748 + }, + { + "epoch": 0.726672147513066, + "grad_norm": 0.0, + "learning_rate": 3.669032241046423e-06, + "loss": 1.2773, + "step": 24749 + }, + { + "epoch": 0.7267015091902049, + "grad_norm": 0.0, + "learning_rate": 3.668296157262964e-06, + "loss": 1.1543, + "step": 24750 + }, + { + "epoch": 0.7267308708673439, + "grad_norm": 0.0, + "learning_rate": 3.6675601307387286e-06, + "loss": 1.2505, + "step": 24751 + }, + { + "epoch": 0.726760232544483, + "grad_norm": 0.0, + "learning_rate": 3.6668241614803656e-06, + "loss": 1.2896, + "step": 24752 + }, + { + "epoch": 0.7267895942216219, + "grad_norm": 0.0, + "learning_rate": 3.666088249494537e-06, + "loss": 1.1738, + "step": 24753 + }, + { + "epoch": 0.7268189558987609, + "grad_norm": 0.0, + "learning_rate": 3.6653523947878944e-06, + "loss": 1.2319, + "step": 24754 + }, + { + "epoch": 0.7268483175759, + "grad_norm": 0.0, + "learning_rate": 3.6646165973670922e-06, + "loss": 1.2256, + "step": 24755 + }, + { + "epoch": 0.7268776792530389, + "grad_norm": 0.0, + "learning_rate": 3.663880857238785e-06, + "loss": 1.1709, + "step": 24756 + }, + { + "epoch": 0.7269070409301779, + "grad_norm": 0.0, + "learning_rate": 3.6631451744096225e-06, + "loss": 1.2275, + "step": 24757 + }, + { + "epoch": 0.726936402607317, + "grad_norm": 0.0, + "learning_rate": 3.6624095488862643e-06, + "loss": 1.1738, + "step": 24758 + }, + { + "epoch": 0.7269657642844559, + "grad_norm": 0.0, + "learning_rate": 3.6616739806753564e-06, + "loss": 1.1411, + "step": 24759 + }, + { + "epoch": 0.7269951259615949, + "grad_norm": 0.0, + "learning_rate": 3.6609384697835572e-06, + "loss": 1.2656, + "step": 24760 + }, + { + "epoch": 0.727024487638734, + "grad_norm": 0.0, + "learning_rate": 3.6602030162175118e-06, + "loss": 1.2959, + "step": 24761 + }, + { + "epoch": 0.7270538493158729, + "grad_norm": 0.0, + "learning_rate": 3.6594676199838773e-06, + "loss": 1.1357, + "step": 24762 + }, + { + "epoch": 0.7270832109930119, + "grad_norm": 0.0, + "learning_rate": 3.6587322810893013e-06, + "loss": 1.2354, + "step": 24763 + }, + { + "epoch": 0.727112572670151, + "grad_norm": 0.0, + "learning_rate": 3.6579969995404318e-06, + "loss": 1.1782, + "step": 24764 + }, + { + "epoch": 0.7271419343472899, + "grad_norm": 0.0, + "learning_rate": 3.657261775343918e-06, + "loss": 1.1714, + "step": 24765 + }, + { + "epoch": 0.7271712960244289, + "grad_norm": 0.0, + "learning_rate": 3.6565266085064135e-06, + "loss": 1.2715, + "step": 24766 + }, + { + "epoch": 0.7272006577015679, + "grad_norm": 0.0, + "learning_rate": 3.6557914990345623e-06, + "loss": 1.0288, + "step": 24767 + }, + { + "epoch": 0.7272300193787069, + "grad_norm": 0.0, + "learning_rate": 3.6550564469350114e-06, + "loss": 1.2603, + "step": 24768 + }, + { + "epoch": 0.7272593810558459, + "grad_norm": 0.0, + "learning_rate": 3.654321452214412e-06, + "loss": 1.2324, + "step": 24769 + }, + { + "epoch": 0.7272887427329849, + "grad_norm": 0.0, + "learning_rate": 3.653586514879407e-06, + "loss": 1.3516, + "step": 24770 + }, + { + "epoch": 0.7273181044101239, + "grad_norm": 0.0, + "learning_rate": 3.6528516349366472e-06, + "loss": 1.2246, + "step": 24771 + }, + { + "epoch": 0.7273474660872629, + "grad_norm": 0.0, + "learning_rate": 3.6521168123927753e-06, + "loss": 1.3262, + "step": 24772 + }, + { + "epoch": 0.7273768277644019, + "grad_norm": 0.0, + "learning_rate": 3.651382047254437e-06, + "loss": 1.2031, + "step": 24773 + }, + { + "epoch": 0.7274061894415409, + "grad_norm": 0.0, + "learning_rate": 3.6506473395282726e-06, + "loss": 1.2158, + "step": 24774 + }, + { + "epoch": 0.7274355511186799, + "grad_norm": 0.0, + "learning_rate": 3.6499126892209347e-06, + "loss": 1.1611, + "step": 24775 + }, + { + "epoch": 0.7274649127958189, + "grad_norm": 0.0, + "learning_rate": 3.6491780963390634e-06, + "loss": 1.29, + "step": 24776 + }, + { + "epoch": 0.7274942744729579, + "grad_norm": 0.0, + "learning_rate": 3.6484435608892966e-06, + "loss": 1.2109, + "step": 24777 + }, + { + "epoch": 0.7275236361500969, + "grad_norm": 0.0, + "learning_rate": 3.6477090828782846e-06, + "loss": 1.0498, + "step": 24778 + }, + { + "epoch": 0.7275529978272359, + "grad_norm": 0.0, + "learning_rate": 3.6469746623126667e-06, + "loss": 1.2139, + "step": 24779 + }, + { + "epoch": 0.7275823595043749, + "grad_norm": 0.0, + "learning_rate": 3.646240299199083e-06, + "loss": 1.2666, + "step": 24780 + }, + { + "epoch": 0.7276117211815138, + "grad_norm": 0.0, + "learning_rate": 3.6455059935441726e-06, + "loss": 1.2686, + "step": 24781 + }, + { + "epoch": 0.7276410828586529, + "grad_norm": 0.0, + "learning_rate": 3.644771745354583e-06, + "loss": 1.2095, + "step": 24782 + }, + { + "epoch": 0.7276704445357919, + "grad_norm": 0.0, + "learning_rate": 3.6440375546369466e-06, + "loss": 1.3945, + "step": 24783 + }, + { + "epoch": 0.7276998062129308, + "grad_norm": 0.0, + "learning_rate": 3.643303421397909e-06, + "loss": 1.1787, + "step": 24784 + }, + { + "epoch": 0.7277291678900699, + "grad_norm": 0.0, + "learning_rate": 3.6425693456441035e-06, + "loss": 1.2139, + "step": 24785 + }, + { + "epoch": 0.7277585295672089, + "grad_norm": 0.0, + "learning_rate": 3.641835327382175e-06, + "loss": 1.1714, + "step": 24786 + }, + { + "epoch": 0.7277878912443478, + "grad_norm": 0.0, + "learning_rate": 3.641101366618758e-06, + "loss": 1.1582, + "step": 24787 + }, + { + "epoch": 0.7278172529214869, + "grad_norm": 0.0, + "learning_rate": 3.64036746336049e-06, + "loss": 1.209, + "step": 24788 + }, + { + "epoch": 0.7278466145986259, + "grad_norm": 0.0, + "learning_rate": 3.639633617614008e-06, + "loss": 1.2754, + "step": 24789 + }, + { + "epoch": 0.7278759762757648, + "grad_norm": 0.0, + "learning_rate": 3.6388998293859443e-06, + "loss": 1.3291, + "step": 24790 + }, + { + "epoch": 0.7279053379529039, + "grad_norm": 0.0, + "learning_rate": 3.6381660986829413e-06, + "loss": 1.2471, + "step": 24791 + }, + { + "epoch": 0.7279346996300429, + "grad_norm": 0.0, + "learning_rate": 3.637432425511629e-06, + "loss": 1.0161, + "step": 24792 + }, + { + "epoch": 0.7279640613071818, + "grad_norm": 0.0, + "learning_rate": 3.636698809878648e-06, + "loss": 1.2134, + "step": 24793 + }, + { + "epoch": 0.7279934229843209, + "grad_norm": 0.0, + "learning_rate": 3.6359652517906263e-06, + "loss": 1.1216, + "step": 24794 + }, + { + "epoch": 0.7280227846614599, + "grad_norm": 0.0, + "learning_rate": 3.6352317512542035e-06, + "loss": 1.1279, + "step": 24795 + }, + { + "epoch": 0.7280521463385988, + "grad_norm": 0.0, + "learning_rate": 3.634498308276011e-06, + "loss": 1.1934, + "step": 24796 + }, + { + "epoch": 0.7280815080157379, + "grad_norm": 0.0, + "learning_rate": 3.633764922862679e-06, + "loss": 1.2163, + "step": 24797 + }, + { + "epoch": 0.7281108696928769, + "grad_norm": 0.0, + "learning_rate": 3.6330315950208427e-06, + "loss": 1.1626, + "step": 24798 + }, + { + "epoch": 0.7281402313700158, + "grad_norm": 0.0, + "learning_rate": 3.6322983247571274e-06, + "loss": 1.0908, + "step": 24799 + }, + { + "epoch": 0.7281695930471549, + "grad_norm": 0.0, + "learning_rate": 3.6315651120781737e-06, + "loss": 1.207, + "step": 24800 + }, + { + "epoch": 0.7281989547242939, + "grad_norm": 0.0, + "learning_rate": 3.6308319569906037e-06, + "loss": 1.1504, + "step": 24801 + }, + { + "epoch": 0.7282283164014328, + "grad_norm": 0.0, + "learning_rate": 3.6300988595010555e-06, + "loss": 1.2256, + "step": 24802 + }, + { + "epoch": 0.7282576780785719, + "grad_norm": 0.0, + "learning_rate": 3.629365819616154e-06, + "loss": 1.1465, + "step": 24803 + }, + { + "epoch": 0.7282870397557109, + "grad_norm": 0.0, + "learning_rate": 3.6286328373425294e-06, + "loss": 1.1992, + "step": 24804 + }, + { + "epoch": 0.7283164014328498, + "grad_norm": 0.0, + "learning_rate": 3.6278999126868096e-06, + "loss": 1.3281, + "step": 24805 + }, + { + "epoch": 0.7283457631099889, + "grad_norm": 0.0, + "learning_rate": 3.6271670456556196e-06, + "loss": 1.3291, + "step": 24806 + }, + { + "epoch": 0.7283751247871278, + "grad_norm": 0.0, + "learning_rate": 3.626434236255593e-06, + "loss": 1.2129, + "step": 24807 + }, + { + "epoch": 0.7284044864642668, + "grad_norm": 0.0, + "learning_rate": 3.6257014844933503e-06, + "loss": 1.3242, + "step": 24808 + }, + { + "epoch": 0.7284338481414059, + "grad_norm": 0.0, + "learning_rate": 3.624968790375526e-06, + "loss": 1.2812, + "step": 24809 + }, + { + "epoch": 0.7284632098185448, + "grad_norm": 0.0, + "learning_rate": 3.6242361539087377e-06, + "loss": 1.1616, + "step": 24810 + }, + { + "epoch": 0.7284925714956838, + "grad_norm": 0.0, + "learning_rate": 3.623503575099617e-06, + "loss": 1.3242, + "step": 24811 + }, + { + "epoch": 0.7285219331728229, + "grad_norm": 0.0, + "learning_rate": 3.6227710539547876e-06, + "loss": 1.2236, + "step": 24812 + }, + { + "epoch": 0.7285512948499618, + "grad_norm": 0.0, + "learning_rate": 3.622038590480873e-06, + "loss": 1.1777, + "step": 24813 + }, + { + "epoch": 0.7285806565271008, + "grad_norm": 0.0, + "learning_rate": 3.621306184684493e-06, + "loss": 1.3203, + "step": 24814 + }, + { + "epoch": 0.7286100182042399, + "grad_norm": 0.0, + "learning_rate": 3.620573836572279e-06, + "loss": 1.2578, + "step": 24815 + }, + { + "epoch": 0.7286393798813788, + "grad_norm": 0.0, + "learning_rate": 3.6198415461508496e-06, + "loss": 1.2119, + "step": 24816 + }, + { + "epoch": 0.7286687415585178, + "grad_norm": 0.0, + "learning_rate": 3.619109313426823e-06, + "loss": 1.2266, + "step": 24817 + }, + { + "epoch": 0.7286981032356569, + "grad_norm": 0.0, + "learning_rate": 3.6183771384068276e-06, + "loss": 1.3115, + "step": 24818 + }, + { + "epoch": 0.7287274649127958, + "grad_norm": 0.0, + "learning_rate": 3.61764502109748e-06, + "loss": 1.2085, + "step": 24819 + }, + { + "epoch": 0.7287568265899348, + "grad_norm": 0.0, + "learning_rate": 3.6169129615054067e-06, + "loss": 1.3145, + "step": 24820 + }, + { + "epoch": 0.7287861882670739, + "grad_norm": 0.0, + "learning_rate": 3.616180959637223e-06, + "loss": 1.1855, + "step": 24821 + }, + { + "epoch": 0.7288155499442128, + "grad_norm": 0.0, + "learning_rate": 3.61544901549955e-06, + "loss": 1.2476, + "step": 24822 + }, + { + "epoch": 0.7288449116213518, + "grad_norm": 0.0, + "learning_rate": 3.6147171290990034e-06, + "loss": 1.3574, + "step": 24823 + }, + { + "epoch": 0.7288742732984909, + "grad_norm": 0.0, + "learning_rate": 3.613985300442209e-06, + "loss": 1.1519, + "step": 24824 + }, + { + "epoch": 0.7289036349756298, + "grad_norm": 0.0, + "learning_rate": 3.6132535295357806e-06, + "loss": 1.1182, + "step": 24825 + }, + { + "epoch": 0.7289329966527688, + "grad_norm": 0.0, + "learning_rate": 3.612521816386333e-06, + "loss": 1.2363, + "step": 24826 + }, + { + "epoch": 0.7289623583299079, + "grad_norm": 0.0, + "learning_rate": 3.61179016100049e-06, + "loss": 1.1865, + "step": 24827 + }, + { + "epoch": 0.7289917200070468, + "grad_norm": 0.0, + "learning_rate": 3.611058563384865e-06, + "loss": 1.2969, + "step": 24828 + }, + { + "epoch": 0.7290210816841858, + "grad_norm": 0.0, + "learning_rate": 3.610327023546073e-06, + "loss": 1.1787, + "step": 24829 + }, + { + "epoch": 0.7290504433613249, + "grad_norm": 0.0, + "learning_rate": 3.609595541490727e-06, + "loss": 1.1553, + "step": 24830 + }, + { + "epoch": 0.7290798050384638, + "grad_norm": 0.0, + "learning_rate": 3.608864117225448e-06, + "loss": 1.1699, + "step": 24831 + }, + { + "epoch": 0.7291091667156028, + "grad_norm": 0.0, + "learning_rate": 3.608132750756844e-06, + "loss": 1.2075, + "step": 24832 + }, + { + "epoch": 0.7291385283927418, + "grad_norm": 0.0, + "learning_rate": 3.607401442091537e-06, + "loss": 1.2036, + "step": 24833 + }, + { + "epoch": 0.7291678900698808, + "grad_norm": 0.0, + "learning_rate": 3.6066701912361313e-06, + "loss": 1.1357, + "step": 24834 + }, + { + "epoch": 0.7291972517470198, + "grad_norm": 0.0, + "learning_rate": 3.6059389981972493e-06, + "loss": 1.292, + "step": 24835 + }, + { + "epoch": 0.7292266134241588, + "grad_norm": 0.0, + "learning_rate": 3.6052078629814966e-06, + "loss": 1.2979, + "step": 24836 + }, + { + "epoch": 0.7292559751012978, + "grad_norm": 0.0, + "learning_rate": 3.6044767855954887e-06, + "loss": 1.3462, + "step": 24837 + }, + { + "epoch": 0.7292853367784368, + "grad_norm": 0.0, + "learning_rate": 3.6037457660458343e-06, + "loss": 1.2822, + "step": 24838 + }, + { + "epoch": 0.7293146984555757, + "grad_norm": 0.0, + "learning_rate": 3.6030148043391422e-06, + "loss": 1.2725, + "step": 24839 + }, + { + "epoch": 0.7293440601327148, + "grad_norm": 0.0, + "learning_rate": 3.6022839004820277e-06, + "loss": 1.2041, + "step": 24840 + }, + { + "epoch": 0.7293734218098538, + "grad_norm": 0.0, + "learning_rate": 3.601553054481096e-06, + "loss": 1.1953, + "step": 24841 + }, + { + "epoch": 0.7294027834869927, + "grad_norm": 0.0, + "learning_rate": 3.6008222663429615e-06, + "loss": 1.1953, + "step": 24842 + }, + { + "epoch": 0.7294321451641318, + "grad_norm": 0.0, + "learning_rate": 3.600091536074227e-06, + "loss": 1.0679, + "step": 24843 + }, + { + "epoch": 0.7294615068412708, + "grad_norm": 0.0, + "learning_rate": 3.599360863681508e-06, + "loss": 1.2695, + "step": 24844 + }, + { + "epoch": 0.7294908685184097, + "grad_norm": 0.0, + "learning_rate": 3.5986302491714065e-06, + "loss": 1.2168, + "step": 24845 + }, + { + "epoch": 0.7295202301955488, + "grad_norm": 0.0, + "learning_rate": 3.597899692550533e-06, + "loss": 1.2314, + "step": 24846 + }, + { + "epoch": 0.7295495918726878, + "grad_norm": 0.0, + "learning_rate": 3.5971691938254904e-06, + "loss": 1.1074, + "step": 24847 + }, + { + "epoch": 0.7295789535498267, + "grad_norm": 0.0, + "learning_rate": 3.596438753002883e-06, + "loss": 1.4062, + "step": 24848 + }, + { + "epoch": 0.7296083152269658, + "grad_norm": 0.0, + "learning_rate": 3.5957083700893245e-06, + "loss": 1.2285, + "step": 24849 + }, + { + "epoch": 0.7296376769041047, + "grad_norm": 0.0, + "learning_rate": 3.594978045091412e-06, + "loss": 1.248, + "step": 24850 + }, + { + "epoch": 0.7296670385812437, + "grad_norm": 0.0, + "learning_rate": 3.594247778015756e-06, + "loss": 1.2793, + "step": 24851 + }, + { + "epoch": 0.7296964002583828, + "grad_norm": 0.0, + "learning_rate": 3.593517568868958e-06, + "loss": 1.3433, + "step": 24852 + }, + { + "epoch": 0.7297257619355217, + "grad_norm": 0.0, + "learning_rate": 3.5927874176576217e-06, + "loss": 1.2354, + "step": 24853 + }, + { + "epoch": 0.7297551236126607, + "grad_norm": 0.0, + "learning_rate": 3.5920573243883494e-06, + "loss": 1.1631, + "step": 24854 + }, + { + "epoch": 0.7297844852897998, + "grad_norm": 0.0, + "learning_rate": 3.591327289067741e-06, + "loss": 1.2861, + "step": 24855 + }, + { + "epoch": 0.7298138469669387, + "grad_norm": 0.0, + "learning_rate": 3.590597311702404e-06, + "loss": 1.2676, + "step": 24856 + }, + { + "epoch": 0.7298432086440777, + "grad_norm": 0.0, + "learning_rate": 3.5898673922989336e-06, + "loss": 1.1973, + "step": 24857 + }, + { + "epoch": 0.7298725703212168, + "grad_norm": 0.0, + "learning_rate": 3.5891375308639377e-06, + "loss": 1.251, + "step": 24858 + }, + { + "epoch": 0.7299019319983557, + "grad_norm": 0.0, + "learning_rate": 3.5884077274040085e-06, + "loss": 1.2734, + "step": 24859 + }, + { + "epoch": 0.7299312936754947, + "grad_norm": 0.0, + "learning_rate": 3.5876779819257545e-06, + "loss": 1.2139, + "step": 24860 + }, + { + "epoch": 0.7299606553526338, + "grad_norm": 0.0, + "learning_rate": 3.5869482944357713e-06, + "loss": 1.1855, + "step": 24861 + }, + { + "epoch": 0.7299900170297727, + "grad_norm": 0.0, + "learning_rate": 3.586218664940657e-06, + "loss": 1.3057, + "step": 24862 + }, + { + "epoch": 0.7300193787069117, + "grad_norm": 0.0, + "learning_rate": 3.5854890934470055e-06, + "loss": 1.2607, + "step": 24863 + }, + { + "epoch": 0.7300487403840508, + "grad_norm": 0.0, + "learning_rate": 3.584759579961423e-06, + "loss": 1.2891, + "step": 24864 + }, + { + "epoch": 0.7300781020611897, + "grad_norm": 0.0, + "learning_rate": 3.5840301244905027e-06, + "loss": 1.2876, + "step": 24865 + }, + { + "epoch": 0.7301074637383287, + "grad_norm": 0.0, + "learning_rate": 3.583300727040837e-06, + "loss": 1.2246, + "step": 24866 + }, + { + "epoch": 0.7301368254154678, + "grad_norm": 0.0, + "learning_rate": 3.5825713876190306e-06, + "loss": 1.2422, + "step": 24867 + }, + { + "epoch": 0.7301661870926067, + "grad_norm": 0.0, + "learning_rate": 3.581842106231671e-06, + "loss": 1.1943, + "step": 24868 + }, + { + "epoch": 0.7301955487697457, + "grad_norm": 0.0, + "learning_rate": 3.58111288288536e-06, + "loss": 1.29, + "step": 24869 + }, + { + "epoch": 0.7302249104468848, + "grad_norm": 0.0, + "learning_rate": 3.5803837175866887e-06, + "loss": 1.1895, + "step": 24870 + }, + { + "epoch": 0.7302542721240237, + "grad_norm": 0.0, + "learning_rate": 3.5796546103422527e-06, + "loss": 1.377, + "step": 24871 + }, + { + "epoch": 0.7302836338011627, + "grad_norm": 0.0, + "learning_rate": 3.5789255611586404e-06, + "loss": 1.3818, + "step": 24872 + }, + { + "epoch": 0.7303129954783018, + "grad_norm": 0.0, + "learning_rate": 3.5781965700424515e-06, + "loss": 1.2212, + "step": 24873 + }, + { + "epoch": 0.7303423571554407, + "grad_norm": 0.0, + "learning_rate": 3.577467637000276e-06, + "loss": 1.3428, + "step": 24874 + }, + { + "epoch": 0.7303717188325797, + "grad_norm": 0.0, + "learning_rate": 3.576738762038703e-06, + "loss": 1.1616, + "step": 24875 + }, + { + "epoch": 0.7304010805097187, + "grad_norm": 0.0, + "learning_rate": 3.5760099451643283e-06, + "loss": 1.1343, + "step": 24876 + }, + { + "epoch": 0.7304304421868577, + "grad_norm": 0.0, + "learning_rate": 3.5752811863837424e-06, + "loss": 1.2764, + "step": 24877 + }, + { + "epoch": 0.7304598038639967, + "grad_norm": 0.0, + "learning_rate": 3.574552485703533e-06, + "loss": 1.252, + "step": 24878 + }, + { + "epoch": 0.7304891655411357, + "grad_norm": 0.0, + "learning_rate": 3.5738238431302874e-06, + "loss": 1.2354, + "step": 24879 + }, + { + "epoch": 0.7305185272182747, + "grad_norm": 0.0, + "learning_rate": 3.5730952586706024e-06, + "loss": 1.2529, + "step": 24880 + }, + { + "epoch": 0.7305478888954137, + "grad_norm": 0.0, + "learning_rate": 3.5723667323310598e-06, + "loss": 1.3164, + "step": 24881 + }, + { + "epoch": 0.7305772505725527, + "grad_norm": 0.0, + "learning_rate": 3.5716382641182544e-06, + "loss": 1.2192, + "step": 24882 + }, + { + "epoch": 0.7306066122496917, + "grad_norm": 0.0, + "learning_rate": 3.570909854038768e-06, + "loss": 1.2329, + "step": 24883 + }, + { + "epoch": 0.7306359739268307, + "grad_norm": 0.0, + "learning_rate": 3.5701815020991915e-06, + "loss": 1.1895, + "step": 24884 + }, + { + "epoch": 0.7306653356039697, + "grad_norm": 0.0, + "learning_rate": 3.5694532083061117e-06, + "loss": 1.2642, + "step": 24885 + }, + { + "epoch": 0.7306946972811087, + "grad_norm": 0.0, + "learning_rate": 3.5687249726661133e-06, + "loss": 1.2646, + "step": 24886 + }, + { + "epoch": 0.7307240589582477, + "grad_norm": 0.0, + "learning_rate": 3.5679967951857817e-06, + "loss": 1.2212, + "step": 24887 + }, + { + "epoch": 0.7307534206353867, + "grad_norm": 0.0, + "learning_rate": 3.5672686758717e-06, + "loss": 1.0796, + "step": 24888 + }, + { + "epoch": 0.7307827823125257, + "grad_norm": 0.0, + "learning_rate": 3.5665406147304573e-06, + "loss": 1.2554, + "step": 24889 + }, + { + "epoch": 0.7308121439896647, + "grad_norm": 0.0, + "learning_rate": 3.565812611768633e-06, + "loss": 1.1641, + "step": 24890 + }, + { + "epoch": 0.7308415056668037, + "grad_norm": 0.0, + "learning_rate": 3.5650846669928154e-06, + "loss": 1.2896, + "step": 24891 + }, + { + "epoch": 0.7308708673439427, + "grad_norm": 0.0, + "learning_rate": 3.5643567804095824e-06, + "loss": 1.2969, + "step": 24892 + }, + { + "epoch": 0.7309002290210816, + "grad_norm": 0.0, + "learning_rate": 3.563628952025523e-06, + "loss": 1.3247, + "step": 24893 + }, + { + "epoch": 0.7309295906982207, + "grad_norm": 0.0, + "learning_rate": 3.562901181847215e-06, + "loss": 1.2676, + "step": 24894 + }, + { + "epoch": 0.7309589523753597, + "grad_norm": 0.0, + "learning_rate": 3.5621734698812405e-06, + "loss": 1.0815, + "step": 24895 + }, + { + "epoch": 0.7309883140524986, + "grad_norm": 0.0, + "learning_rate": 3.561445816134179e-06, + "loss": 1.313, + "step": 24896 + }, + { + "epoch": 0.7310176757296377, + "grad_norm": 0.0, + "learning_rate": 3.560718220612609e-06, + "loss": 1.3008, + "step": 24897 + }, + { + "epoch": 0.7310470374067767, + "grad_norm": 0.0, + "learning_rate": 3.559990683323117e-06, + "loss": 1.2153, + "step": 24898 + }, + { + "epoch": 0.7310763990839156, + "grad_norm": 0.0, + "learning_rate": 3.5592632042722752e-06, + "loss": 1.0181, + "step": 24899 + }, + { + "epoch": 0.7311057607610547, + "grad_norm": 0.0, + "learning_rate": 3.5585357834666688e-06, + "loss": 1.2598, + "step": 24900 + }, + { + "epoch": 0.7311351224381937, + "grad_norm": 0.0, + "learning_rate": 3.5578084209128704e-06, + "loss": 1.2344, + "step": 24901 + }, + { + "epoch": 0.7311644841153326, + "grad_norm": 0.0, + "learning_rate": 3.5570811166174657e-06, + "loss": 1.2324, + "step": 24902 + }, + { + "epoch": 0.7311938457924717, + "grad_norm": 0.0, + "learning_rate": 3.5563538705870215e-06, + "loss": 1.1973, + "step": 24903 + }, + { + "epoch": 0.7312232074696107, + "grad_norm": 0.0, + "learning_rate": 3.5556266828281216e-06, + "loss": 1.2334, + "step": 24904 + }, + { + "epoch": 0.7312525691467496, + "grad_norm": 0.0, + "learning_rate": 3.5548995533473394e-06, + "loss": 1.3164, + "step": 24905 + }, + { + "epoch": 0.7312819308238887, + "grad_norm": 0.0, + "learning_rate": 3.5541724821512493e-06, + "loss": 1.2344, + "step": 24906 + }, + { + "epoch": 0.7313112925010277, + "grad_norm": 0.0, + "learning_rate": 3.5534454692464313e-06, + "loss": 1.3008, + "step": 24907 + }, + { + "epoch": 0.7313406541781666, + "grad_norm": 0.0, + "learning_rate": 3.5527185146394526e-06, + "loss": 1.2412, + "step": 24908 + }, + { + "epoch": 0.7313700158553057, + "grad_norm": 0.0, + "learning_rate": 3.5519916183368963e-06, + "loss": 1.1436, + "step": 24909 + }, + { + "epoch": 0.7313993775324447, + "grad_norm": 0.0, + "learning_rate": 3.5512647803453306e-06, + "loss": 1.1738, + "step": 24910 + }, + { + "epoch": 0.7314287392095836, + "grad_norm": 0.0, + "learning_rate": 3.5505380006713286e-06, + "loss": 1.2104, + "step": 24911 + }, + { + "epoch": 0.7314581008867227, + "grad_norm": 0.0, + "learning_rate": 3.5498112793214603e-06, + "loss": 1.2344, + "step": 24912 + }, + { + "epoch": 0.7314874625638617, + "grad_norm": 0.0, + "learning_rate": 3.549084616302303e-06, + "loss": 1.1772, + "step": 24913 + }, + { + "epoch": 0.7315168242410006, + "grad_norm": 0.0, + "learning_rate": 3.5483580116204263e-06, + "loss": 1.3271, + "step": 24914 + }, + { + "epoch": 0.7315461859181397, + "grad_norm": 0.0, + "learning_rate": 3.5476314652823972e-06, + "loss": 1.2549, + "step": 24915 + }, + { + "epoch": 0.7315755475952787, + "grad_norm": 0.0, + "learning_rate": 3.546904977294793e-06, + "loss": 1.2114, + "step": 24916 + }, + { + "epoch": 0.7316049092724176, + "grad_norm": 0.0, + "learning_rate": 3.5461785476641753e-06, + "loss": 1.2227, + "step": 24917 + }, + { + "epoch": 0.7316342709495567, + "grad_norm": 0.0, + "learning_rate": 3.5454521763971216e-06, + "loss": 1.3438, + "step": 24918 + }, + { + "epoch": 0.7316636326266956, + "grad_norm": 0.0, + "learning_rate": 3.5447258635001967e-06, + "loss": 1.2246, + "step": 24919 + }, + { + "epoch": 0.7316929943038346, + "grad_norm": 0.0, + "learning_rate": 3.54399960897997e-06, + "loss": 1.2549, + "step": 24920 + }, + { + "epoch": 0.7317223559809737, + "grad_norm": 0.0, + "learning_rate": 3.5432734128430046e-06, + "loss": 1.3525, + "step": 24921 + }, + { + "epoch": 0.7317517176581126, + "grad_norm": 0.0, + "learning_rate": 3.5425472750958744e-06, + "loss": 1.2969, + "step": 24922 + }, + { + "epoch": 0.7317810793352516, + "grad_norm": 0.0, + "learning_rate": 3.541821195745142e-06, + "loss": 1.1895, + "step": 24923 + }, + { + "epoch": 0.7318104410123907, + "grad_norm": 0.0, + "learning_rate": 3.541095174797372e-06, + "loss": 1.1572, + "step": 24924 + }, + { + "epoch": 0.7318398026895296, + "grad_norm": 0.0, + "learning_rate": 3.540369212259136e-06, + "loss": 1.3232, + "step": 24925 + }, + { + "epoch": 0.7318691643666686, + "grad_norm": 0.0, + "learning_rate": 3.539643308136993e-06, + "loss": 1.1567, + "step": 24926 + }, + { + "epoch": 0.7318985260438077, + "grad_norm": 0.0, + "learning_rate": 3.5389174624375155e-06, + "loss": 1.2354, + "step": 24927 + }, + { + "epoch": 0.7319278877209466, + "grad_norm": 0.0, + "learning_rate": 3.538191675167255e-06, + "loss": 1.187, + "step": 24928 + }, + { + "epoch": 0.7319572493980856, + "grad_norm": 0.0, + "learning_rate": 3.5374659463327865e-06, + "loss": 1.1011, + "step": 24929 + }, + { + "epoch": 0.7319866110752247, + "grad_norm": 0.0, + "learning_rate": 3.5367402759406653e-06, + "loss": 1.2344, + "step": 24930 + }, + { + "epoch": 0.7320159727523636, + "grad_norm": 0.0, + "learning_rate": 3.53601466399746e-06, + "loss": 1.1943, + "step": 24931 + }, + { + "epoch": 0.7320453344295026, + "grad_norm": 0.0, + "learning_rate": 3.5352891105097266e-06, + "loss": 1.2246, + "step": 24932 + }, + { + "epoch": 0.7320746961066417, + "grad_norm": 0.0, + "learning_rate": 3.5345636154840313e-06, + "loss": 1.1904, + "step": 24933 + }, + { + "epoch": 0.7321040577837806, + "grad_norm": 0.0, + "learning_rate": 3.533838178926935e-06, + "loss": 1.2119, + "step": 24934 + }, + { + "epoch": 0.7321334194609196, + "grad_norm": 0.0, + "learning_rate": 3.5331128008449945e-06, + "loss": 1.2227, + "step": 24935 + }, + { + "epoch": 0.7321627811380587, + "grad_norm": 0.0, + "learning_rate": 3.5323874812447723e-06, + "loss": 1.106, + "step": 24936 + }, + { + "epoch": 0.7321921428151976, + "grad_norm": 0.0, + "learning_rate": 3.531662220132822e-06, + "loss": 1.1348, + "step": 24937 + }, + { + "epoch": 0.7322215044923366, + "grad_norm": 0.0, + "learning_rate": 3.5309370175157108e-06, + "loss": 1.2163, + "step": 24938 + }, + { + "epoch": 0.7322508661694755, + "grad_norm": 0.0, + "learning_rate": 3.530211873399989e-06, + "loss": 1.166, + "step": 24939 + }, + { + "epoch": 0.7322802278466146, + "grad_norm": 0.0, + "learning_rate": 3.529486787792222e-06, + "loss": 1.2783, + "step": 24940 + }, + { + "epoch": 0.7323095895237536, + "grad_norm": 0.0, + "learning_rate": 3.52876176069896e-06, + "loss": 1.1206, + "step": 24941 + }, + { + "epoch": 0.7323389512008925, + "grad_norm": 0.0, + "learning_rate": 3.5280367921267665e-06, + "loss": 1.1499, + "step": 24942 + }, + { + "epoch": 0.7323683128780316, + "grad_norm": 0.0, + "learning_rate": 3.527311882082193e-06, + "loss": 1.3223, + "step": 24943 + }, + { + "epoch": 0.7323976745551706, + "grad_norm": 0.0, + "learning_rate": 3.526587030571795e-06, + "loss": 1.1245, + "step": 24944 + }, + { + "epoch": 0.7324270362323095, + "grad_norm": 0.0, + "learning_rate": 3.52586223760213e-06, + "loss": 1.0894, + "step": 24945 + }, + { + "epoch": 0.7324563979094486, + "grad_norm": 0.0, + "learning_rate": 3.5251375031797464e-06, + "loss": 1.2051, + "step": 24946 + }, + { + "epoch": 0.7324857595865876, + "grad_norm": 0.0, + "learning_rate": 3.5244128273112064e-06, + "loss": 1.2637, + "step": 24947 + }, + { + "epoch": 0.7325151212637265, + "grad_norm": 0.0, + "learning_rate": 3.5236882100030566e-06, + "loss": 1.2607, + "step": 24948 + }, + { + "epoch": 0.7325444829408656, + "grad_norm": 0.0, + "learning_rate": 3.522963651261856e-06, + "loss": 1.2959, + "step": 24949 + }, + { + "epoch": 0.7325738446180046, + "grad_norm": 0.0, + "learning_rate": 3.52223915109415e-06, + "loss": 1.2793, + "step": 24950 + }, + { + "epoch": 0.7326032062951435, + "grad_norm": 0.0, + "learning_rate": 3.521514709506498e-06, + "loss": 1.2598, + "step": 24951 + }, + { + "epoch": 0.7326325679722826, + "grad_norm": 0.0, + "learning_rate": 3.5207903265054475e-06, + "loss": 1.2393, + "step": 24952 + }, + { + "epoch": 0.7326619296494216, + "grad_norm": 0.0, + "learning_rate": 3.5200660020975508e-06, + "loss": 1.3086, + "step": 24953 + }, + { + "epoch": 0.7326912913265605, + "grad_norm": 0.0, + "learning_rate": 3.5193417362893555e-06, + "loss": 1.2539, + "step": 24954 + }, + { + "epoch": 0.7327206530036996, + "grad_norm": 0.0, + "learning_rate": 3.518617529087409e-06, + "loss": 1.2065, + "step": 24955 + }, + { + "epoch": 0.7327500146808386, + "grad_norm": 0.0, + "learning_rate": 3.5178933804982675e-06, + "loss": 1.2744, + "step": 24956 + }, + { + "epoch": 0.7327793763579775, + "grad_norm": 0.0, + "learning_rate": 3.5171692905284738e-06, + "loss": 1.2539, + "step": 24957 + }, + { + "epoch": 0.7328087380351166, + "grad_norm": 0.0, + "learning_rate": 3.516445259184581e-06, + "loss": 1.2344, + "step": 24958 + }, + { + "epoch": 0.7328380997122556, + "grad_norm": 0.0, + "learning_rate": 3.515721286473135e-06, + "loss": 1.3135, + "step": 24959 + }, + { + "epoch": 0.7328674613893945, + "grad_norm": 0.0, + "learning_rate": 3.5149973724006813e-06, + "loss": 1.29, + "step": 24960 + }, + { + "epoch": 0.7328968230665336, + "grad_norm": 0.0, + "learning_rate": 3.514273516973764e-06, + "loss": 1.3037, + "step": 24961 + }, + { + "epoch": 0.7329261847436725, + "grad_norm": 0.0, + "learning_rate": 3.5135497201989353e-06, + "loss": 1.25, + "step": 24962 + }, + { + "epoch": 0.7329555464208115, + "grad_norm": 0.0, + "learning_rate": 3.5128259820827383e-06, + "loss": 1.3223, + "step": 24963 + }, + { + "epoch": 0.7329849080979506, + "grad_norm": 0.0, + "learning_rate": 3.5121023026317137e-06, + "loss": 1.2695, + "step": 24964 + }, + { + "epoch": 0.7330142697750895, + "grad_norm": 0.0, + "learning_rate": 3.5113786818524133e-06, + "loss": 1.3916, + "step": 24965 + }, + { + "epoch": 0.7330436314522285, + "grad_norm": 0.0, + "learning_rate": 3.5106551197513738e-06, + "loss": 1.3086, + "step": 24966 + }, + { + "epoch": 0.7330729931293676, + "grad_norm": 0.0, + "learning_rate": 3.509931616335145e-06, + "loss": 1.1987, + "step": 24967 + }, + { + "epoch": 0.7331023548065065, + "grad_norm": 0.0, + "learning_rate": 3.509208171610267e-06, + "loss": 1.2441, + "step": 24968 + }, + { + "epoch": 0.7331317164836455, + "grad_norm": 0.0, + "learning_rate": 3.5084847855832814e-06, + "loss": 1.2812, + "step": 24969 + }, + { + "epoch": 0.7331610781607846, + "grad_norm": 0.0, + "learning_rate": 3.5077614582607276e-06, + "loss": 1.2163, + "step": 24970 + }, + { + "epoch": 0.7331904398379235, + "grad_norm": 0.0, + "learning_rate": 3.5070381896491525e-06, + "loss": 1.1455, + "step": 24971 + }, + { + "epoch": 0.7332198015150625, + "grad_norm": 0.0, + "learning_rate": 3.506314979755091e-06, + "loss": 1.2686, + "step": 24972 + }, + { + "epoch": 0.7332491631922016, + "grad_norm": 0.0, + "learning_rate": 3.505591828585089e-06, + "loss": 1.0957, + "step": 24973 + }, + { + "epoch": 0.7332785248693405, + "grad_norm": 0.0, + "learning_rate": 3.5048687361456835e-06, + "loss": 1.1626, + "step": 24974 + }, + { + "epoch": 0.7333078865464795, + "grad_norm": 0.0, + "learning_rate": 3.50414570244341e-06, + "loss": 1.1943, + "step": 24975 + }, + { + "epoch": 0.7333372482236186, + "grad_norm": 0.0, + "learning_rate": 3.5034227274848143e-06, + "loss": 1.2764, + "step": 24976 + }, + { + "epoch": 0.7333666099007575, + "grad_norm": 0.0, + "learning_rate": 3.502699811276431e-06, + "loss": 1.1011, + "step": 24977 + }, + { + "epoch": 0.7333959715778965, + "grad_norm": 0.0, + "learning_rate": 3.501976953824796e-06, + "loss": 1.2139, + "step": 24978 + }, + { + "epoch": 0.7334253332550356, + "grad_norm": 0.0, + "learning_rate": 3.5012541551364452e-06, + "loss": 1.1802, + "step": 24979 + }, + { + "epoch": 0.7334546949321745, + "grad_norm": 0.0, + "learning_rate": 3.5005314152179203e-06, + "loss": 1.29, + "step": 24980 + }, + { + "epoch": 0.7334840566093135, + "grad_norm": 0.0, + "learning_rate": 3.4998087340757515e-06, + "loss": 1.2607, + "step": 24981 + }, + { + "epoch": 0.7335134182864526, + "grad_norm": 0.0, + "learning_rate": 3.499086111716481e-06, + "loss": 1.2168, + "step": 24982 + }, + { + "epoch": 0.7335427799635915, + "grad_norm": 0.0, + "learning_rate": 3.498363548146638e-06, + "loss": 1.2188, + "step": 24983 + }, + { + "epoch": 0.7335721416407305, + "grad_norm": 0.0, + "learning_rate": 3.4976410433727602e-06, + "loss": 1.1758, + "step": 24984 + }, + { + "epoch": 0.7336015033178696, + "grad_norm": 0.0, + "learning_rate": 3.4969185974013788e-06, + "loss": 1.0283, + "step": 24985 + }, + { + "epoch": 0.7336308649950085, + "grad_norm": 0.0, + "learning_rate": 3.4961962102390248e-06, + "loss": 1.2334, + "step": 24986 + }, + { + "epoch": 0.7336602266721475, + "grad_norm": 0.0, + "learning_rate": 3.4954738818922372e-06, + "loss": 1.1499, + "step": 24987 + }, + { + "epoch": 0.7336895883492865, + "grad_norm": 0.0, + "learning_rate": 3.4947516123675418e-06, + "loss": 1.1851, + "step": 24988 + }, + { + "epoch": 0.7337189500264255, + "grad_norm": 0.0, + "learning_rate": 3.494029401671477e-06, + "loss": 1.2852, + "step": 24989 + }, + { + "epoch": 0.7337483117035645, + "grad_norm": 0.0, + "learning_rate": 3.493307249810568e-06, + "loss": 1.2881, + "step": 24990 + }, + { + "epoch": 0.7337776733807035, + "grad_norm": 0.0, + "learning_rate": 3.4925851567913505e-06, + "loss": 1.2344, + "step": 24991 + }, + { + "epoch": 0.7338070350578425, + "grad_norm": 0.0, + "learning_rate": 3.4918631226203515e-06, + "loss": 1.2847, + "step": 24992 + }, + { + "epoch": 0.7338363967349815, + "grad_norm": 0.0, + "learning_rate": 3.4911411473041024e-06, + "loss": 1.2627, + "step": 24993 + }, + { + "epoch": 0.7338657584121205, + "grad_norm": 0.0, + "learning_rate": 3.4904192308491304e-06, + "loss": 1.1533, + "step": 24994 + }, + { + "epoch": 0.7338951200892595, + "grad_norm": 0.0, + "learning_rate": 3.48969737326196e-06, + "loss": 1.1216, + "step": 24995 + }, + { + "epoch": 0.7339244817663985, + "grad_norm": 0.0, + "learning_rate": 3.488975574549128e-06, + "loss": 1.291, + "step": 24996 + }, + { + "epoch": 0.7339538434435375, + "grad_norm": 0.0, + "learning_rate": 3.4882538347171533e-06, + "loss": 1.2124, + "step": 24997 + }, + { + "epoch": 0.7339832051206765, + "grad_norm": 0.0, + "learning_rate": 3.4875321537725704e-06, + "loss": 1.3281, + "step": 24998 + }, + { + "epoch": 0.7340125667978155, + "grad_norm": 0.0, + "learning_rate": 3.486810531721899e-06, + "loss": 1.1982, + "step": 24999 + }, + { + "epoch": 0.7340419284749545, + "grad_norm": 0.0, + "learning_rate": 3.4860889685716716e-06, + "loss": 1.1694, + "step": 25000 + }, + { + "epoch": 0.7340712901520935, + "grad_norm": 0.0, + "learning_rate": 3.4853674643284098e-06, + "loss": 1.1846, + "step": 25001 + }, + { + "epoch": 0.7341006518292325, + "grad_norm": 0.0, + "learning_rate": 3.4846460189986386e-06, + "loss": 1.2148, + "step": 25002 + }, + { + "epoch": 0.7341300135063715, + "grad_norm": 0.0, + "learning_rate": 3.483924632588881e-06, + "loss": 1.3428, + "step": 25003 + }, + { + "epoch": 0.7341593751835105, + "grad_norm": 0.0, + "learning_rate": 3.483203305105659e-06, + "loss": 1.3311, + "step": 25004 + }, + { + "epoch": 0.7341887368606494, + "grad_norm": 0.0, + "learning_rate": 3.482482036555502e-06, + "loss": 1.2441, + "step": 25005 + }, + { + "epoch": 0.7342180985377885, + "grad_norm": 0.0, + "learning_rate": 3.4817608269449265e-06, + "loss": 1.2896, + "step": 25006 + }, + { + "epoch": 0.7342474602149275, + "grad_norm": 0.0, + "learning_rate": 3.48103967628046e-06, + "loss": 1.2568, + "step": 25007 + }, + { + "epoch": 0.7342768218920664, + "grad_norm": 0.0, + "learning_rate": 3.4803185845686204e-06, + "loss": 1.2031, + "step": 25008 + }, + { + "epoch": 0.7343061835692055, + "grad_norm": 0.0, + "learning_rate": 3.4795975518159297e-06, + "loss": 1.1592, + "step": 25009 + }, + { + "epoch": 0.7343355452463445, + "grad_norm": 0.0, + "learning_rate": 3.4788765780289056e-06, + "loss": 1.3125, + "step": 25010 + }, + { + "epoch": 0.7343649069234834, + "grad_norm": 0.0, + "learning_rate": 3.4781556632140747e-06, + "loss": 1.2627, + "step": 25011 + }, + { + "epoch": 0.7343942686006225, + "grad_norm": 0.0, + "learning_rate": 3.477434807377951e-06, + "loss": 1.29, + "step": 25012 + }, + { + "epoch": 0.7344236302777615, + "grad_norm": 0.0, + "learning_rate": 3.4767140105270513e-06, + "loss": 1.2441, + "step": 25013 + }, + { + "epoch": 0.7344529919549004, + "grad_norm": 0.0, + "learning_rate": 3.475993272667901e-06, + "loss": 1.2559, + "step": 25014 + }, + { + "epoch": 0.7344823536320395, + "grad_norm": 0.0, + "learning_rate": 3.475272593807012e-06, + "loss": 1.3037, + "step": 25015 + }, + { + "epoch": 0.7345117153091785, + "grad_norm": 0.0, + "learning_rate": 3.474551973950907e-06, + "loss": 1.3027, + "step": 25016 + }, + { + "epoch": 0.7345410769863174, + "grad_norm": 0.0, + "learning_rate": 3.473831413106099e-06, + "loss": 1.2192, + "step": 25017 + }, + { + "epoch": 0.7345704386634565, + "grad_norm": 0.0, + "learning_rate": 3.473110911279106e-06, + "loss": 1.2109, + "step": 25018 + }, + { + "epoch": 0.7345998003405955, + "grad_norm": 0.0, + "learning_rate": 3.472390468476439e-06, + "loss": 1.2363, + "step": 25019 + }, + { + "epoch": 0.7346291620177344, + "grad_norm": 0.0, + "learning_rate": 3.4716700847046192e-06, + "loss": 1.1758, + "step": 25020 + }, + { + "epoch": 0.7346585236948735, + "grad_norm": 0.0, + "learning_rate": 3.470949759970156e-06, + "loss": 1.2373, + "step": 25021 + }, + { + "epoch": 0.7346878853720125, + "grad_norm": 0.0, + "learning_rate": 3.470229494279569e-06, + "loss": 1.29, + "step": 25022 + }, + { + "epoch": 0.7347172470491514, + "grad_norm": 0.0, + "learning_rate": 3.46950928763937e-06, + "loss": 1.1851, + "step": 25023 + }, + { + "epoch": 0.7347466087262905, + "grad_norm": 0.0, + "learning_rate": 3.468789140056067e-06, + "loss": 1.1558, + "step": 25024 + }, + { + "epoch": 0.7347759704034295, + "grad_norm": 0.0, + "learning_rate": 3.4680690515361794e-06, + "loss": 1.1392, + "step": 25025 + }, + { + "epoch": 0.7348053320805684, + "grad_norm": 0.0, + "learning_rate": 3.4673490220862173e-06, + "loss": 1.2529, + "step": 25026 + }, + { + "epoch": 0.7348346937577075, + "grad_norm": 0.0, + "learning_rate": 3.4666290517126897e-06, + "loss": 1.2192, + "step": 25027 + }, + { + "epoch": 0.7348640554348465, + "grad_norm": 0.0, + "learning_rate": 3.465909140422107e-06, + "loss": 1.2197, + "step": 25028 + }, + { + "epoch": 0.7348934171119854, + "grad_norm": 0.0, + "learning_rate": 3.4651892882209837e-06, + "loss": 1.1621, + "step": 25029 + }, + { + "epoch": 0.7349227787891245, + "grad_norm": 0.0, + "learning_rate": 3.464469495115823e-06, + "loss": 1.3252, + "step": 25030 + }, + { + "epoch": 0.7349521404662634, + "grad_norm": 0.0, + "learning_rate": 3.4637497611131432e-06, + "loss": 1.1699, + "step": 25031 + }, + { + "epoch": 0.7349815021434024, + "grad_norm": 0.0, + "learning_rate": 3.4630300862194487e-06, + "loss": 1.2085, + "step": 25032 + }, + { + "epoch": 0.7350108638205415, + "grad_norm": 0.0, + "learning_rate": 3.4623104704412457e-06, + "loss": 1.252, + "step": 25033 + }, + { + "epoch": 0.7350402254976804, + "grad_norm": 0.0, + "learning_rate": 3.4615909137850445e-06, + "loss": 1.3301, + "step": 25034 + }, + { + "epoch": 0.7350695871748194, + "grad_norm": 0.0, + "learning_rate": 3.4608714162573475e-06, + "loss": 1.2568, + "step": 25035 + }, + { + "epoch": 0.7350989488519585, + "grad_norm": 0.0, + "learning_rate": 3.4601519778646687e-06, + "loss": 1.1724, + "step": 25036 + }, + { + "epoch": 0.7351283105290974, + "grad_norm": 0.0, + "learning_rate": 3.459432598613507e-06, + "loss": 1.1753, + "step": 25037 + }, + { + "epoch": 0.7351576722062364, + "grad_norm": 0.0, + "learning_rate": 3.458713278510374e-06, + "loss": 1.2334, + "step": 25038 + }, + { + "epoch": 0.7351870338833755, + "grad_norm": 0.0, + "learning_rate": 3.45799401756177e-06, + "loss": 1.2246, + "step": 25039 + }, + { + "epoch": 0.7352163955605144, + "grad_norm": 0.0, + "learning_rate": 3.4572748157742044e-06, + "loss": 1.2412, + "step": 25040 + }, + { + "epoch": 0.7352457572376534, + "grad_norm": 0.0, + "learning_rate": 3.456555673154177e-06, + "loss": 1.1226, + "step": 25041 + }, + { + "epoch": 0.7352751189147924, + "grad_norm": 0.0, + "learning_rate": 3.4558365897081937e-06, + "loss": 1.2544, + "step": 25042 + }, + { + "epoch": 0.7353044805919314, + "grad_norm": 0.0, + "learning_rate": 3.455117565442756e-06, + "loss": 1.2812, + "step": 25043 + }, + { + "epoch": 0.7353338422690704, + "grad_norm": 0.0, + "learning_rate": 3.4543986003643626e-06, + "loss": 1.1509, + "step": 25044 + }, + { + "epoch": 0.7353632039462094, + "grad_norm": 0.0, + "learning_rate": 3.4536796944795226e-06, + "loss": 1.1558, + "step": 25045 + }, + { + "epoch": 0.7353925656233484, + "grad_norm": 0.0, + "learning_rate": 3.4529608477947305e-06, + "loss": 1.2588, + "step": 25046 + }, + { + "epoch": 0.7354219273004874, + "grad_norm": 0.0, + "learning_rate": 3.452242060316493e-06, + "loss": 1.209, + "step": 25047 + }, + { + "epoch": 0.7354512889776263, + "grad_norm": 0.0, + "learning_rate": 3.4515233320513055e-06, + "loss": 1.3594, + "step": 25048 + }, + { + "epoch": 0.7354806506547654, + "grad_norm": 0.0, + "learning_rate": 3.4508046630056715e-06, + "loss": 1.2334, + "step": 25049 + }, + { + "epoch": 0.7355100123319044, + "grad_norm": 0.0, + "learning_rate": 3.450086053186088e-06, + "loss": 1.2021, + "step": 25050 + }, + { + "epoch": 0.7355393740090433, + "grad_norm": 0.0, + "learning_rate": 3.4493675025990548e-06, + "loss": 1.1934, + "step": 25051 + }, + { + "epoch": 0.7355687356861824, + "grad_norm": 0.0, + "learning_rate": 3.4486490112510685e-06, + "loss": 1.1982, + "step": 25052 + }, + { + "epoch": 0.7355980973633214, + "grad_norm": 0.0, + "learning_rate": 3.447930579148624e-06, + "loss": 1.1357, + "step": 25053 + }, + { + "epoch": 0.7356274590404603, + "grad_norm": 0.0, + "learning_rate": 3.447212206298224e-06, + "loss": 1.1592, + "step": 25054 + }, + { + "epoch": 0.7356568207175994, + "grad_norm": 0.0, + "learning_rate": 3.4464938927063584e-06, + "loss": 1.1357, + "step": 25055 + }, + { + "epoch": 0.7356861823947384, + "grad_norm": 0.0, + "learning_rate": 3.4457756383795314e-06, + "loss": 1.2627, + "step": 25056 + }, + { + "epoch": 0.7357155440718773, + "grad_norm": 0.0, + "learning_rate": 3.4450574433242334e-06, + "loss": 1.2568, + "step": 25057 + }, + { + "epoch": 0.7357449057490164, + "grad_norm": 0.0, + "learning_rate": 3.44433930754696e-06, + "loss": 1.2773, + "step": 25058 + }, + { + "epoch": 0.7357742674261554, + "grad_norm": 0.0, + "learning_rate": 3.4436212310542016e-06, + "loss": 1.1885, + "step": 25059 + }, + { + "epoch": 0.7358036291032943, + "grad_norm": 0.0, + "learning_rate": 3.442903213852459e-06, + "loss": 1.2168, + "step": 25060 + }, + { + "epoch": 0.7358329907804334, + "grad_norm": 0.0, + "learning_rate": 3.4421852559482214e-06, + "loss": 1.3398, + "step": 25061 + }, + { + "epoch": 0.7358623524575724, + "grad_norm": 0.0, + "learning_rate": 3.4414673573479794e-06, + "loss": 1.1553, + "step": 25062 + }, + { + "epoch": 0.7358917141347113, + "grad_norm": 0.0, + "learning_rate": 3.4407495180582308e-06, + "loss": 1.1523, + "step": 25063 + }, + { + "epoch": 0.7359210758118504, + "grad_norm": 0.0, + "learning_rate": 3.4400317380854608e-06, + "loss": 1.2856, + "step": 25064 + }, + { + "epoch": 0.7359504374889894, + "grad_norm": 0.0, + "learning_rate": 3.439314017436167e-06, + "loss": 1.2588, + "step": 25065 + }, + { + "epoch": 0.7359797991661283, + "grad_norm": 0.0, + "learning_rate": 3.4385963561168367e-06, + "loss": 1.2744, + "step": 25066 + }, + { + "epoch": 0.7360091608432674, + "grad_norm": 0.0, + "learning_rate": 3.43787875413396e-06, + "loss": 1.2749, + "step": 25067 + }, + { + "epoch": 0.7360385225204064, + "grad_norm": 0.0, + "learning_rate": 3.4371612114940234e-06, + "loss": 1.2451, + "step": 25068 + }, + { + "epoch": 0.7360678841975453, + "grad_norm": 0.0, + "learning_rate": 3.4364437282035213e-06, + "loss": 1.2656, + "step": 25069 + }, + { + "epoch": 0.7360972458746844, + "grad_norm": 0.0, + "learning_rate": 3.435726304268937e-06, + "loss": 1.2471, + "step": 25070 + }, + { + "epoch": 0.7361266075518234, + "grad_norm": 0.0, + "learning_rate": 3.4350089396967634e-06, + "loss": 1.2524, + "step": 25071 + }, + { + "epoch": 0.7361559692289623, + "grad_norm": 0.0, + "learning_rate": 3.4342916344934863e-06, + "loss": 1.2104, + "step": 25072 + }, + { + "epoch": 0.7361853309061014, + "grad_norm": 0.0, + "learning_rate": 3.433574388665587e-06, + "loss": 1.1846, + "step": 25073 + }, + { + "epoch": 0.7362146925832403, + "grad_norm": 0.0, + "learning_rate": 3.4328572022195604e-06, + "loss": 1.1904, + "step": 25074 + }, + { + "epoch": 0.7362440542603793, + "grad_norm": 0.0, + "learning_rate": 3.4321400751618883e-06, + "loss": 1.2676, + "step": 25075 + }, + { + "epoch": 0.7362734159375184, + "grad_norm": 0.0, + "learning_rate": 3.431423007499054e-06, + "loss": 1.1533, + "step": 25076 + }, + { + "epoch": 0.7363027776146573, + "grad_norm": 0.0, + "learning_rate": 3.4307059992375414e-06, + "loss": 1.3213, + "step": 25077 + }, + { + "epoch": 0.7363321392917963, + "grad_norm": 0.0, + "learning_rate": 3.4299890503838407e-06, + "loss": 1.2246, + "step": 25078 + }, + { + "epoch": 0.7363615009689354, + "grad_norm": 0.0, + "learning_rate": 3.4292721609444267e-06, + "loss": 1.334, + "step": 25079 + }, + { + "epoch": 0.7363908626460743, + "grad_norm": 0.0, + "learning_rate": 3.4285553309257922e-06, + "loss": 1.1587, + "step": 25080 + }, + { + "epoch": 0.7364202243232133, + "grad_norm": 0.0, + "learning_rate": 3.4278385603344143e-06, + "loss": 1.252, + "step": 25081 + }, + { + "epoch": 0.7364495860003524, + "grad_norm": 0.0, + "learning_rate": 3.4271218491767755e-06, + "loss": 1.2119, + "step": 25082 + }, + { + "epoch": 0.7364789476774913, + "grad_norm": 0.0, + "learning_rate": 3.426405197459357e-06, + "loss": 1.2891, + "step": 25083 + }, + { + "epoch": 0.7365083093546303, + "grad_norm": 0.0, + "learning_rate": 3.425688605188636e-06, + "loss": 1.2461, + "step": 25084 + }, + { + "epoch": 0.7365376710317694, + "grad_norm": 0.0, + "learning_rate": 3.424972072371101e-06, + "loss": 1.2319, + "step": 25085 + }, + { + "epoch": 0.7365670327089083, + "grad_norm": 0.0, + "learning_rate": 3.4242555990132243e-06, + "loss": 1.1104, + "step": 25086 + }, + { + "epoch": 0.7365963943860473, + "grad_norm": 0.0, + "learning_rate": 3.4235391851214904e-06, + "loss": 1.3848, + "step": 25087 + }, + { + "epoch": 0.7366257560631864, + "grad_norm": 0.0, + "learning_rate": 3.422822830702374e-06, + "loss": 1.2959, + "step": 25088 + }, + { + "epoch": 0.7366551177403253, + "grad_norm": 0.0, + "learning_rate": 3.422106535762357e-06, + "loss": 1.1709, + "step": 25089 + }, + { + "epoch": 0.7366844794174643, + "grad_norm": 0.0, + "learning_rate": 3.421390300307915e-06, + "loss": 1.2236, + "step": 25090 + }, + { + "epoch": 0.7367138410946034, + "grad_norm": 0.0, + "learning_rate": 3.4206741243455266e-06, + "loss": 1.2773, + "step": 25091 + }, + { + "epoch": 0.7367432027717423, + "grad_norm": 0.0, + "learning_rate": 3.4199580078816674e-06, + "loss": 1.1836, + "step": 25092 + }, + { + "epoch": 0.7367725644488813, + "grad_norm": 0.0, + "learning_rate": 3.419241950922809e-06, + "loss": 1.1484, + "step": 25093 + }, + { + "epoch": 0.7368019261260204, + "grad_norm": 0.0, + "learning_rate": 3.4185259534754345e-06, + "loss": 1.2666, + "step": 25094 + }, + { + "epoch": 0.7368312878031593, + "grad_norm": 0.0, + "learning_rate": 3.4178100155460116e-06, + "loss": 1.2109, + "step": 25095 + }, + { + "epoch": 0.7368606494802983, + "grad_norm": 0.0, + "learning_rate": 3.4170941371410218e-06, + "loss": 1.2461, + "step": 25096 + }, + { + "epoch": 0.7368900111574374, + "grad_norm": 0.0, + "learning_rate": 3.416378318266933e-06, + "loss": 1.2383, + "step": 25097 + }, + { + "epoch": 0.7369193728345763, + "grad_norm": 0.0, + "learning_rate": 3.415662558930224e-06, + "loss": 1.1514, + "step": 25098 + }, + { + "epoch": 0.7369487345117153, + "grad_norm": 0.0, + "learning_rate": 3.4149468591373646e-06, + "loss": 1.2568, + "step": 25099 + }, + { + "epoch": 0.7369780961888543, + "grad_norm": 0.0, + "learning_rate": 3.414231218894828e-06, + "loss": 1.3335, + "step": 25100 + }, + { + "epoch": 0.7370074578659933, + "grad_norm": 0.0, + "learning_rate": 3.413515638209084e-06, + "loss": 1.2227, + "step": 25101 + }, + { + "epoch": 0.7370368195431323, + "grad_norm": 0.0, + "learning_rate": 3.412800117086602e-06, + "loss": 1.0898, + "step": 25102 + }, + { + "epoch": 0.7370661812202713, + "grad_norm": 0.0, + "learning_rate": 3.4120846555338593e-06, + "loss": 1.2393, + "step": 25103 + }, + { + "epoch": 0.7370955428974103, + "grad_norm": 0.0, + "learning_rate": 3.4113692535573183e-06, + "loss": 1.2676, + "step": 25104 + }, + { + "epoch": 0.7371249045745493, + "grad_norm": 0.0, + "learning_rate": 3.410653911163456e-06, + "loss": 1.291, + "step": 25105 + }, + { + "epoch": 0.7371542662516883, + "grad_norm": 0.0, + "learning_rate": 3.4099386283587355e-06, + "loss": 1.3423, + "step": 25106 + }, + { + "epoch": 0.7371836279288273, + "grad_norm": 0.0, + "learning_rate": 3.409223405149633e-06, + "loss": 1.2959, + "step": 25107 + }, + { + "epoch": 0.7372129896059663, + "grad_norm": 0.0, + "learning_rate": 3.4085082415426053e-06, + "loss": 1.0181, + "step": 25108 + }, + { + "epoch": 0.7372423512831053, + "grad_norm": 0.0, + "learning_rate": 3.4077931375441287e-06, + "loss": 1.2383, + "step": 25109 + }, + { + "epoch": 0.7372717129602443, + "grad_norm": 0.0, + "learning_rate": 3.4070780931606673e-06, + "loss": 1.3281, + "step": 25110 + }, + { + "epoch": 0.7373010746373833, + "grad_norm": 0.0, + "learning_rate": 3.4063631083986837e-06, + "loss": 1.1885, + "step": 25111 + }, + { + "epoch": 0.7373304363145223, + "grad_norm": 0.0, + "learning_rate": 3.4056481832646517e-06, + "loss": 1.2861, + "step": 25112 + }, + { + "epoch": 0.7373597979916613, + "grad_norm": 0.0, + "learning_rate": 3.404933317765027e-06, + "loss": 1.0796, + "step": 25113 + }, + { + "epoch": 0.7373891596688003, + "grad_norm": 0.0, + "learning_rate": 3.404218511906283e-06, + "loss": 1.2686, + "step": 25114 + }, + { + "epoch": 0.7374185213459393, + "grad_norm": 0.0, + "learning_rate": 3.403503765694881e-06, + "loss": 1.252, + "step": 25115 + }, + { + "epoch": 0.7374478830230783, + "grad_norm": 0.0, + "learning_rate": 3.402789079137283e-06, + "loss": 1.187, + "step": 25116 + }, + { + "epoch": 0.7374772447002172, + "grad_norm": 0.0, + "learning_rate": 3.4020744522399497e-06, + "loss": 1.1826, + "step": 25117 + }, + { + "epoch": 0.7375066063773563, + "grad_norm": 0.0, + "learning_rate": 3.4013598850093498e-06, + "loss": 1.1426, + "step": 25118 + }, + { + "epoch": 0.7375359680544953, + "grad_norm": 0.0, + "learning_rate": 3.40064537745194e-06, + "loss": 1.1172, + "step": 25119 + }, + { + "epoch": 0.7375653297316342, + "grad_norm": 0.0, + "learning_rate": 3.3999309295741877e-06, + "loss": 1.1958, + "step": 25120 + }, + { + "epoch": 0.7375946914087733, + "grad_norm": 0.0, + "learning_rate": 3.3992165413825495e-06, + "loss": 1.4014, + "step": 25121 + }, + { + "epoch": 0.7376240530859123, + "grad_norm": 0.0, + "learning_rate": 3.3985022128834835e-06, + "loss": 1.2075, + "step": 25122 + }, + { + "epoch": 0.7376534147630512, + "grad_norm": 0.0, + "learning_rate": 3.397787944083456e-06, + "loss": 1.2026, + "step": 25123 + }, + { + "epoch": 0.7376827764401903, + "grad_norm": 0.0, + "learning_rate": 3.3970737349889227e-06, + "loss": 1.2368, + "step": 25124 + }, + { + "epoch": 0.7377121381173293, + "grad_norm": 0.0, + "learning_rate": 3.3963595856063435e-06, + "loss": 1.2061, + "step": 25125 + }, + { + "epoch": 0.7377414997944682, + "grad_norm": 0.0, + "learning_rate": 3.3956454959421714e-06, + "loss": 1.2471, + "step": 25126 + }, + { + "epoch": 0.7377708614716073, + "grad_norm": 0.0, + "learning_rate": 3.394931466002872e-06, + "loss": 1.0884, + "step": 25127 + }, + { + "epoch": 0.7378002231487463, + "grad_norm": 0.0, + "learning_rate": 3.394217495794896e-06, + "loss": 1.0703, + "step": 25128 + }, + { + "epoch": 0.7378295848258852, + "grad_norm": 0.0, + "learning_rate": 3.3935035853247067e-06, + "loss": 1.0503, + "step": 25129 + }, + { + "epoch": 0.7378589465030243, + "grad_norm": 0.0, + "learning_rate": 3.392789734598756e-06, + "loss": 1.1895, + "step": 25130 + }, + { + "epoch": 0.7378883081801633, + "grad_norm": 0.0, + "learning_rate": 3.392075943623496e-06, + "loss": 1.2329, + "step": 25131 + }, + { + "epoch": 0.7379176698573022, + "grad_norm": 0.0, + "learning_rate": 3.3913622124053925e-06, + "loss": 1.1548, + "step": 25132 + }, + { + "epoch": 0.7379470315344413, + "grad_norm": 0.0, + "learning_rate": 3.3906485409508873e-06, + "loss": 1.2168, + "step": 25133 + }, + { + "epoch": 0.7379763932115803, + "grad_norm": 0.0, + "learning_rate": 3.3899349292664417e-06, + "loss": 1.2354, + "step": 25134 + }, + { + "epoch": 0.7380057548887192, + "grad_norm": 0.0, + "learning_rate": 3.389221377358506e-06, + "loss": 1.1978, + "step": 25135 + }, + { + "epoch": 0.7380351165658583, + "grad_norm": 0.0, + "learning_rate": 3.3885078852335363e-06, + "loss": 1.2617, + "step": 25136 + }, + { + "epoch": 0.7380644782429973, + "grad_norm": 0.0, + "learning_rate": 3.38779445289798e-06, + "loss": 1.1626, + "step": 25137 + }, + { + "epoch": 0.7380938399201362, + "grad_norm": 0.0, + "learning_rate": 3.3870810803582955e-06, + "loss": 1.209, + "step": 25138 + }, + { + "epoch": 0.7381232015972753, + "grad_norm": 0.0, + "learning_rate": 3.3863677676209293e-06, + "loss": 1.1973, + "step": 25139 + }, + { + "epoch": 0.7381525632744143, + "grad_norm": 0.0, + "learning_rate": 3.3856545146923347e-06, + "loss": 1.0337, + "step": 25140 + }, + { + "epoch": 0.7381819249515532, + "grad_norm": 0.0, + "learning_rate": 3.3849413215789596e-06, + "loss": 1.3081, + "step": 25141 + }, + { + "epoch": 0.7382112866286922, + "grad_norm": 0.0, + "learning_rate": 3.384228188287251e-06, + "loss": 1.4014, + "step": 25142 + }, + { + "epoch": 0.7382406483058312, + "grad_norm": 0.0, + "learning_rate": 3.3835151148236644e-06, + "loss": 1.3037, + "step": 25143 + }, + { + "epoch": 0.7382700099829702, + "grad_norm": 0.0, + "learning_rate": 3.382802101194642e-06, + "loss": 1.2021, + "step": 25144 + }, + { + "epoch": 0.7382993716601092, + "grad_norm": 0.0, + "learning_rate": 3.3820891474066375e-06, + "loss": 1.1689, + "step": 25145 + }, + { + "epoch": 0.7383287333372482, + "grad_norm": 0.0, + "learning_rate": 3.381376253466093e-06, + "loss": 1.3228, + "step": 25146 + }, + { + "epoch": 0.7383580950143872, + "grad_norm": 0.0, + "learning_rate": 3.380663419379462e-06, + "loss": 1.2627, + "step": 25147 + }, + { + "epoch": 0.7383874566915262, + "grad_norm": 0.0, + "learning_rate": 3.379950645153186e-06, + "loss": 1.2817, + "step": 25148 + }, + { + "epoch": 0.7384168183686652, + "grad_norm": 0.0, + "learning_rate": 3.379237930793713e-06, + "loss": 1.1699, + "step": 25149 + }, + { + "epoch": 0.7384461800458042, + "grad_norm": 0.0, + "learning_rate": 3.378525276307485e-06, + "loss": 1.2812, + "step": 25150 + }, + { + "epoch": 0.7384755417229432, + "grad_norm": 0.0, + "learning_rate": 3.377812681700947e-06, + "loss": 1.3027, + "step": 25151 + }, + { + "epoch": 0.7385049034000822, + "grad_norm": 0.0, + "learning_rate": 3.3771001469805475e-06, + "loss": 1.2881, + "step": 25152 + }, + { + "epoch": 0.7385342650772212, + "grad_norm": 0.0, + "learning_rate": 3.3763876721527244e-06, + "loss": 1.3213, + "step": 25153 + }, + { + "epoch": 0.7385636267543602, + "grad_norm": 0.0, + "learning_rate": 3.375675257223927e-06, + "loss": 1.1367, + "step": 25154 + }, + { + "epoch": 0.7385929884314992, + "grad_norm": 0.0, + "learning_rate": 3.3749629022005916e-06, + "loss": 1.3516, + "step": 25155 + }, + { + "epoch": 0.7386223501086382, + "grad_norm": 0.0, + "learning_rate": 3.374250607089167e-06, + "loss": 1.1089, + "step": 25156 + }, + { + "epoch": 0.7386517117857772, + "grad_norm": 0.0, + "learning_rate": 3.3735383718960913e-06, + "loss": 1.2373, + "step": 25157 + }, + { + "epoch": 0.7386810734629162, + "grad_norm": 0.0, + "learning_rate": 3.372826196627804e-06, + "loss": 1.3242, + "step": 25158 + }, + { + "epoch": 0.7387104351400552, + "grad_norm": 0.0, + "learning_rate": 3.3721140812907437e-06, + "loss": 1.2646, + "step": 25159 + }, + { + "epoch": 0.7387397968171941, + "grad_norm": 0.0, + "learning_rate": 3.371402025891357e-06, + "loss": 1.2485, + "step": 25160 + }, + { + "epoch": 0.7387691584943332, + "grad_norm": 0.0, + "learning_rate": 3.3706900304360803e-06, + "loss": 1.2842, + "step": 25161 + }, + { + "epoch": 0.7387985201714722, + "grad_norm": 0.0, + "learning_rate": 3.3699780949313465e-06, + "loss": 1.3281, + "step": 25162 + }, + { + "epoch": 0.7388278818486111, + "grad_norm": 0.0, + "learning_rate": 3.369266219383602e-06, + "loss": 1.2124, + "step": 25163 + }, + { + "epoch": 0.7388572435257502, + "grad_norm": 0.0, + "learning_rate": 3.3685544037992824e-06, + "loss": 1.29, + "step": 25164 + }, + { + "epoch": 0.7388866052028892, + "grad_norm": 0.0, + "learning_rate": 3.3678426481848225e-06, + "loss": 1.3086, + "step": 25165 + }, + { + "epoch": 0.7389159668800281, + "grad_norm": 0.0, + "learning_rate": 3.3671309525466577e-06, + "loss": 1.1509, + "step": 25166 + }, + { + "epoch": 0.7389453285571672, + "grad_norm": 0.0, + "learning_rate": 3.3664193168912297e-06, + "loss": 1.1792, + "step": 25167 + }, + { + "epoch": 0.7389746902343062, + "grad_norm": 0.0, + "learning_rate": 3.365707741224967e-06, + "loss": 1.2471, + "step": 25168 + }, + { + "epoch": 0.7390040519114451, + "grad_norm": 0.0, + "learning_rate": 3.3649962255543112e-06, + "loss": 1.3389, + "step": 25169 + }, + { + "epoch": 0.7390334135885842, + "grad_norm": 0.0, + "learning_rate": 3.364284769885694e-06, + "loss": 1.2822, + "step": 25170 + }, + { + "epoch": 0.7390627752657232, + "grad_norm": 0.0, + "learning_rate": 3.3635733742255463e-06, + "loss": 1.165, + "step": 25171 + }, + { + "epoch": 0.7390921369428621, + "grad_norm": 0.0, + "learning_rate": 3.3628620385803067e-06, + "loss": 1.2793, + "step": 25172 + }, + { + "epoch": 0.7391214986200012, + "grad_norm": 0.0, + "learning_rate": 3.3621507629564066e-06, + "loss": 1.1431, + "step": 25173 + }, + { + "epoch": 0.7391508602971402, + "grad_norm": 0.0, + "learning_rate": 3.3614395473602767e-06, + "loss": 1.3711, + "step": 25174 + }, + { + "epoch": 0.7391802219742791, + "grad_norm": 0.0, + "learning_rate": 3.3607283917983447e-06, + "loss": 1.1475, + "step": 25175 + }, + { + "epoch": 0.7392095836514182, + "grad_norm": 0.0, + "learning_rate": 3.3600172962770506e-06, + "loss": 1.0757, + "step": 25176 + }, + { + "epoch": 0.7392389453285572, + "grad_norm": 0.0, + "learning_rate": 3.359306260802817e-06, + "loss": 1.25, + "step": 25177 + }, + { + "epoch": 0.7392683070056961, + "grad_norm": 0.0, + "learning_rate": 3.3585952853820814e-06, + "loss": 1.2363, + "step": 25178 + }, + { + "epoch": 0.7392976686828352, + "grad_norm": 0.0, + "learning_rate": 3.3578843700212693e-06, + "loss": 1.2256, + "step": 25179 + }, + { + "epoch": 0.7393270303599742, + "grad_norm": 0.0, + "learning_rate": 3.357173514726806e-06, + "loss": 1.1421, + "step": 25180 + }, + { + "epoch": 0.7393563920371131, + "grad_norm": 0.0, + "learning_rate": 3.356462719505128e-06, + "loss": 1.167, + "step": 25181 + }, + { + "epoch": 0.7393857537142522, + "grad_norm": 0.0, + "learning_rate": 3.3557519843626584e-06, + "loss": 1.2285, + "step": 25182 + }, + { + "epoch": 0.7394151153913912, + "grad_norm": 0.0, + "learning_rate": 3.3550413093058254e-06, + "loss": 1.1572, + "step": 25183 + }, + { + "epoch": 0.7394444770685301, + "grad_norm": 0.0, + "learning_rate": 3.3543306943410527e-06, + "loss": 1.0771, + "step": 25184 + }, + { + "epoch": 0.7394738387456692, + "grad_norm": 0.0, + "learning_rate": 3.3536201394747724e-06, + "loss": 1.0669, + "step": 25185 + }, + { + "epoch": 0.7395032004228081, + "grad_norm": 0.0, + "learning_rate": 3.3529096447134037e-06, + "loss": 1.2744, + "step": 25186 + }, + { + "epoch": 0.7395325620999471, + "grad_norm": 0.0, + "learning_rate": 3.3521992100633794e-06, + "loss": 1.1714, + "step": 25187 + }, + { + "epoch": 0.7395619237770862, + "grad_norm": 0.0, + "learning_rate": 3.3514888355311204e-06, + "loss": 1.3242, + "step": 25188 + }, + { + "epoch": 0.7395912854542251, + "grad_norm": 0.0, + "learning_rate": 3.3507785211230502e-06, + "loss": 1.1416, + "step": 25189 + }, + { + "epoch": 0.7396206471313641, + "grad_norm": 0.0, + "learning_rate": 3.350068266845592e-06, + "loss": 1.0723, + "step": 25190 + }, + { + "epoch": 0.7396500088085032, + "grad_norm": 0.0, + "learning_rate": 3.349358072705167e-06, + "loss": 1.144, + "step": 25191 + }, + { + "epoch": 0.7396793704856421, + "grad_norm": 0.0, + "learning_rate": 3.348647938708204e-06, + "loss": 1.1973, + "step": 25192 + }, + { + "epoch": 0.7397087321627811, + "grad_norm": 0.0, + "learning_rate": 3.3479378648611173e-06, + "loss": 1.2603, + "step": 25193 + }, + { + "epoch": 0.7397380938399202, + "grad_norm": 0.0, + "learning_rate": 3.3472278511703348e-06, + "loss": 1.2432, + "step": 25194 + }, + { + "epoch": 0.7397674555170591, + "grad_norm": 0.0, + "learning_rate": 3.3465178976422717e-06, + "loss": 1.1084, + "step": 25195 + }, + { + "epoch": 0.7397968171941981, + "grad_norm": 0.0, + "learning_rate": 3.345808004283355e-06, + "loss": 1.2568, + "step": 25196 + }, + { + "epoch": 0.7398261788713372, + "grad_norm": 0.0, + "learning_rate": 3.345098171100001e-06, + "loss": 1.2539, + "step": 25197 + }, + { + "epoch": 0.7398555405484761, + "grad_norm": 0.0, + "learning_rate": 3.344388398098627e-06, + "loss": 1.1826, + "step": 25198 + }, + { + "epoch": 0.7398849022256151, + "grad_norm": 0.0, + "learning_rate": 3.343678685285654e-06, + "loss": 1.1318, + "step": 25199 + }, + { + "epoch": 0.7399142639027542, + "grad_norm": 0.0, + "learning_rate": 3.342969032667497e-06, + "loss": 1.3081, + "step": 25200 + }, + { + "epoch": 0.7399436255798931, + "grad_norm": 0.0, + "learning_rate": 3.342259440250578e-06, + "loss": 1.1274, + "step": 25201 + }, + { + "epoch": 0.7399729872570321, + "grad_norm": 0.0, + "learning_rate": 3.3415499080413093e-06, + "loss": 1.3486, + "step": 25202 + }, + { + "epoch": 0.7400023489341712, + "grad_norm": 0.0, + "learning_rate": 3.3408404360461133e-06, + "loss": 1.0903, + "step": 25203 + }, + { + "epoch": 0.7400317106113101, + "grad_norm": 0.0, + "learning_rate": 3.340131024271399e-06, + "loss": 1.2451, + "step": 25204 + }, + { + "epoch": 0.7400610722884491, + "grad_norm": 0.0, + "learning_rate": 3.3394216727235893e-06, + "loss": 1.1421, + "step": 25205 + }, + { + "epoch": 0.7400904339655882, + "grad_norm": 0.0, + "learning_rate": 3.338712381409095e-06, + "loss": 1.2939, + "step": 25206 + }, + { + "epoch": 0.7401197956427271, + "grad_norm": 0.0, + "learning_rate": 3.33800315033433e-06, + "loss": 1.2842, + "step": 25207 + }, + { + "epoch": 0.7401491573198661, + "grad_norm": 0.0, + "learning_rate": 3.3372939795057067e-06, + "loss": 1.1699, + "step": 25208 + }, + { + "epoch": 0.7401785189970052, + "grad_norm": 0.0, + "learning_rate": 3.3365848689296433e-06, + "loss": 1.2803, + "step": 25209 + }, + { + "epoch": 0.7402078806741441, + "grad_norm": 0.0, + "learning_rate": 3.3358758186125482e-06, + "loss": 1.2793, + "step": 25210 + }, + { + "epoch": 0.7402372423512831, + "grad_norm": 0.0, + "learning_rate": 3.3351668285608317e-06, + "loss": 1.0811, + "step": 25211 + }, + { + "epoch": 0.7402666040284221, + "grad_norm": 0.0, + "learning_rate": 3.3344578987809127e-06, + "loss": 1.2051, + "step": 25212 + }, + { + "epoch": 0.7402959657055611, + "grad_norm": 0.0, + "learning_rate": 3.3337490292791975e-06, + "loss": 1.3008, + "step": 25213 + }, + { + "epoch": 0.7403253273827001, + "grad_norm": 0.0, + "learning_rate": 3.333040220062098e-06, + "loss": 1.189, + "step": 25214 + }, + { + "epoch": 0.7403546890598391, + "grad_norm": 0.0, + "learning_rate": 3.332331471136019e-06, + "loss": 1.4297, + "step": 25215 + }, + { + "epoch": 0.7403840507369781, + "grad_norm": 0.0, + "learning_rate": 3.3316227825073777e-06, + "loss": 1.3159, + "step": 25216 + }, + { + "epoch": 0.7404134124141171, + "grad_norm": 0.0, + "learning_rate": 3.330914154182575e-06, + "loss": 1.1387, + "step": 25217 + }, + { + "epoch": 0.7404427740912561, + "grad_norm": 0.0, + "learning_rate": 3.330205586168028e-06, + "loss": 1.3662, + "step": 25218 + }, + { + "epoch": 0.7404721357683951, + "grad_norm": 0.0, + "learning_rate": 3.3294970784701395e-06, + "loss": 1.1841, + "step": 25219 + }, + { + "epoch": 0.7405014974455341, + "grad_norm": 0.0, + "learning_rate": 3.3287886310953155e-06, + "loss": 1.2988, + "step": 25220 + }, + { + "epoch": 0.7405308591226731, + "grad_norm": 0.0, + "learning_rate": 3.328080244049966e-06, + "loss": 1.2334, + "step": 25221 + }, + { + "epoch": 0.7405602207998121, + "grad_norm": 0.0, + "learning_rate": 3.327371917340496e-06, + "loss": 1.2598, + "step": 25222 + }, + { + "epoch": 0.7405895824769511, + "grad_norm": 0.0, + "learning_rate": 3.3266636509733106e-06, + "loss": 1.1904, + "step": 25223 + }, + { + "epoch": 0.7406189441540901, + "grad_norm": 0.0, + "learning_rate": 3.3259554449548113e-06, + "loss": 1.1904, + "step": 25224 + }, + { + "epoch": 0.7406483058312291, + "grad_norm": 0.0, + "learning_rate": 3.3252472992914108e-06, + "loss": 1.27, + "step": 25225 + }, + { + "epoch": 0.740677667508368, + "grad_norm": 0.0, + "learning_rate": 3.324539213989504e-06, + "loss": 1.207, + "step": 25226 + }, + { + "epoch": 0.7407070291855071, + "grad_norm": 0.0, + "learning_rate": 3.323831189055502e-06, + "loss": 1.2754, + "step": 25227 + }, + { + "epoch": 0.7407363908626461, + "grad_norm": 0.0, + "learning_rate": 3.323123224495801e-06, + "loss": 1.2842, + "step": 25228 + }, + { + "epoch": 0.740765752539785, + "grad_norm": 0.0, + "learning_rate": 3.322415320316811e-06, + "loss": 1.231, + "step": 25229 + }, + { + "epoch": 0.7407951142169241, + "grad_norm": 0.0, + "learning_rate": 3.321707476524928e-06, + "loss": 1.207, + "step": 25230 + }, + { + "epoch": 0.7408244758940631, + "grad_norm": 0.0, + "learning_rate": 3.3209996931265553e-06, + "loss": 1.3525, + "step": 25231 + }, + { + "epoch": 0.740853837571202, + "grad_norm": 0.0, + "learning_rate": 3.3202919701280922e-06, + "loss": 1.1455, + "step": 25232 + }, + { + "epoch": 0.7408831992483411, + "grad_norm": 0.0, + "learning_rate": 3.3195843075359367e-06, + "loss": 1.2197, + "step": 25233 + }, + { + "epoch": 0.7409125609254801, + "grad_norm": 0.0, + "learning_rate": 3.318876705356495e-06, + "loss": 1.2295, + "step": 25234 + }, + { + "epoch": 0.740941922602619, + "grad_norm": 0.0, + "learning_rate": 3.3181691635961586e-06, + "loss": 1.21, + "step": 25235 + }, + { + "epoch": 0.7409712842797581, + "grad_norm": 0.0, + "learning_rate": 3.3174616822613335e-06, + "loss": 1.1973, + "step": 25236 + }, + { + "epoch": 0.7410006459568971, + "grad_norm": 0.0, + "learning_rate": 3.3167542613584137e-06, + "loss": 1.2617, + "step": 25237 + }, + { + "epoch": 0.741030007634036, + "grad_norm": 0.0, + "learning_rate": 3.3160469008937955e-06, + "loss": 1.1714, + "step": 25238 + }, + { + "epoch": 0.7410593693111751, + "grad_norm": 0.0, + "learning_rate": 3.315339600873878e-06, + "loss": 1.2695, + "step": 25239 + }, + { + "epoch": 0.7410887309883141, + "grad_norm": 0.0, + "learning_rate": 3.3146323613050534e-06, + "loss": 1.1719, + "step": 25240 + }, + { + "epoch": 0.741118092665453, + "grad_norm": 0.0, + "learning_rate": 3.3139251821937234e-06, + "loss": 1.1289, + "step": 25241 + }, + { + "epoch": 0.741147454342592, + "grad_norm": 0.0, + "learning_rate": 3.3132180635462764e-06, + "loss": 1.2485, + "step": 25242 + }, + { + "epoch": 0.7411768160197311, + "grad_norm": 0.0, + "learning_rate": 3.312511005369116e-06, + "loss": 1.2324, + "step": 25243 + }, + { + "epoch": 0.74120617769687, + "grad_norm": 0.0, + "learning_rate": 3.3118040076686262e-06, + "loss": 1.1802, + "step": 25244 + }, + { + "epoch": 0.741235539374009, + "grad_norm": 0.0, + "learning_rate": 3.311097070451209e-06, + "loss": 1.2148, + "step": 25245 + }, + { + "epoch": 0.7412649010511481, + "grad_norm": 0.0, + "learning_rate": 3.310390193723255e-06, + "loss": 1.3086, + "step": 25246 + }, + { + "epoch": 0.741294262728287, + "grad_norm": 0.0, + "learning_rate": 3.309683377491155e-06, + "loss": 1.1934, + "step": 25247 + }, + { + "epoch": 0.741323624405426, + "grad_norm": 0.0, + "learning_rate": 3.3089766217613016e-06, + "loss": 1.1611, + "step": 25248 + }, + { + "epoch": 0.7413529860825651, + "grad_norm": 0.0, + "learning_rate": 3.308269926540084e-06, + "loss": 1.2466, + "step": 25249 + }, + { + "epoch": 0.741382347759704, + "grad_norm": 0.0, + "learning_rate": 3.3075632918338974e-06, + "loss": 1.165, + "step": 25250 + }, + { + "epoch": 0.741411709436843, + "grad_norm": 0.0, + "learning_rate": 3.306856717649127e-06, + "loss": 1.3086, + "step": 25251 + }, + { + "epoch": 0.741441071113982, + "grad_norm": 0.0, + "learning_rate": 3.3061502039921688e-06, + "loss": 1.2627, + "step": 25252 + }, + { + "epoch": 0.741470432791121, + "grad_norm": 0.0, + "learning_rate": 3.305443750869405e-06, + "loss": 1.2676, + "step": 25253 + }, + { + "epoch": 0.74149979446826, + "grad_norm": 0.0, + "learning_rate": 3.3047373582872323e-06, + "loss": 1.1074, + "step": 25254 + }, + { + "epoch": 0.741529156145399, + "grad_norm": 0.0, + "learning_rate": 3.3040310262520327e-06, + "loss": 1.291, + "step": 25255 + }, + { + "epoch": 0.741558517822538, + "grad_norm": 0.0, + "learning_rate": 3.3033247547701965e-06, + "loss": 1.252, + "step": 25256 + }, + { + "epoch": 0.741587879499677, + "grad_norm": 0.0, + "learning_rate": 3.302618543848106e-06, + "loss": 1.2441, + "step": 25257 + }, + { + "epoch": 0.741617241176816, + "grad_norm": 0.0, + "learning_rate": 3.301912393492155e-06, + "loss": 1.1758, + "step": 25258 + }, + { + "epoch": 0.741646602853955, + "grad_norm": 0.0, + "learning_rate": 3.301206303708725e-06, + "loss": 1.0806, + "step": 25259 + }, + { + "epoch": 0.741675964531094, + "grad_norm": 0.0, + "learning_rate": 3.300500274504198e-06, + "loss": 1.1895, + "step": 25260 + }, + { + "epoch": 0.741705326208233, + "grad_norm": 0.0, + "learning_rate": 3.299794305884967e-06, + "loss": 1.248, + "step": 25261 + }, + { + "epoch": 0.741734687885372, + "grad_norm": 0.0, + "learning_rate": 3.2990883978574117e-06, + "loss": 1.2656, + "step": 25262 + }, + { + "epoch": 0.741764049562511, + "grad_norm": 0.0, + "learning_rate": 3.2983825504279165e-06, + "loss": 1.2314, + "step": 25263 + }, + { + "epoch": 0.74179341123965, + "grad_norm": 0.0, + "learning_rate": 3.2976767636028608e-06, + "loss": 1.3267, + "step": 25264 + }, + { + "epoch": 0.741822772916789, + "grad_norm": 0.0, + "learning_rate": 3.296971037388634e-06, + "loss": 1.2188, + "step": 25265 + }, + { + "epoch": 0.741852134593928, + "grad_norm": 0.0, + "learning_rate": 3.2962653717916115e-06, + "loss": 1.1113, + "step": 25266 + }, + { + "epoch": 0.741881496271067, + "grad_norm": 0.0, + "learning_rate": 3.2955597668181815e-06, + "loss": 1.2949, + "step": 25267 + }, + { + "epoch": 0.741910857948206, + "grad_norm": 0.0, + "learning_rate": 3.2948542224747215e-06, + "loss": 1.2773, + "step": 25268 + }, + { + "epoch": 0.741940219625345, + "grad_norm": 0.0, + "learning_rate": 3.2941487387676087e-06, + "loss": 1.1821, + "step": 25269 + }, + { + "epoch": 0.741969581302484, + "grad_norm": 0.0, + "learning_rate": 3.2934433157032286e-06, + "loss": 1.1855, + "step": 25270 + }, + { + "epoch": 0.741998942979623, + "grad_norm": 0.0, + "learning_rate": 3.2927379532879588e-06, + "loss": 1.1914, + "step": 25271 + }, + { + "epoch": 0.742028304656762, + "grad_norm": 0.0, + "learning_rate": 3.2920326515281775e-06, + "loss": 1.2817, + "step": 25272 + }, + { + "epoch": 0.742057666333901, + "grad_norm": 0.0, + "learning_rate": 3.291327410430261e-06, + "loss": 1.2886, + "step": 25273 + }, + { + "epoch": 0.74208702801104, + "grad_norm": 0.0, + "learning_rate": 3.29062223000059e-06, + "loss": 1.3369, + "step": 25274 + }, + { + "epoch": 0.742116389688179, + "grad_norm": 0.0, + "learning_rate": 3.2899171102455384e-06, + "loss": 1.1484, + "step": 25275 + }, + { + "epoch": 0.742145751365318, + "grad_norm": 0.0, + "learning_rate": 3.2892120511714877e-06, + "loss": 1.1992, + "step": 25276 + }, + { + "epoch": 0.742175113042457, + "grad_norm": 0.0, + "learning_rate": 3.2885070527848075e-06, + "loss": 1.1738, + "step": 25277 + }, + { + "epoch": 0.7422044747195959, + "grad_norm": 0.0, + "learning_rate": 3.2878021150918805e-06, + "loss": 1.1611, + "step": 25278 + }, + { + "epoch": 0.742233836396735, + "grad_norm": 0.0, + "learning_rate": 3.287097238099077e-06, + "loss": 1.2192, + "step": 25279 + }, + { + "epoch": 0.742263198073874, + "grad_norm": 0.0, + "learning_rate": 3.286392421812774e-06, + "loss": 1.2275, + "step": 25280 + }, + { + "epoch": 0.7422925597510129, + "grad_norm": 0.0, + "learning_rate": 3.2856876662393432e-06, + "loss": 1.2891, + "step": 25281 + }, + { + "epoch": 0.742321921428152, + "grad_norm": 0.0, + "learning_rate": 3.2849829713851544e-06, + "loss": 1.1943, + "step": 25282 + }, + { + "epoch": 0.742351283105291, + "grad_norm": 0.0, + "learning_rate": 3.284278337256589e-06, + "loss": 1.2178, + "step": 25283 + }, + { + "epoch": 0.7423806447824299, + "grad_norm": 0.0, + "learning_rate": 3.2835737638600095e-06, + "loss": 1.2969, + "step": 25284 + }, + { + "epoch": 0.742410006459569, + "grad_norm": 0.0, + "learning_rate": 3.2828692512017966e-06, + "loss": 1.2568, + "step": 25285 + }, + { + "epoch": 0.742439368136708, + "grad_norm": 0.0, + "learning_rate": 3.2821647992883156e-06, + "loss": 1.2705, + "step": 25286 + }, + { + "epoch": 0.7424687298138469, + "grad_norm": 0.0, + "learning_rate": 3.281460408125944e-06, + "loss": 1.29, + "step": 25287 + }, + { + "epoch": 0.742498091490986, + "grad_norm": 0.0, + "learning_rate": 3.2807560777210425e-06, + "loss": 1.2646, + "step": 25288 + }, + { + "epoch": 0.742527453168125, + "grad_norm": 0.0, + "learning_rate": 3.2800518080799826e-06, + "loss": 1.1475, + "step": 25289 + }, + { + "epoch": 0.7425568148452639, + "grad_norm": 0.0, + "learning_rate": 3.279347599209137e-06, + "loss": 1.3311, + "step": 25290 + }, + { + "epoch": 0.742586176522403, + "grad_norm": 0.0, + "learning_rate": 3.27864345111487e-06, + "loss": 1.1953, + "step": 25291 + }, + { + "epoch": 0.742615538199542, + "grad_norm": 0.0, + "learning_rate": 3.277939363803555e-06, + "loss": 1.3037, + "step": 25292 + }, + { + "epoch": 0.7426448998766809, + "grad_norm": 0.0, + "learning_rate": 3.2772353372815524e-06, + "loss": 1.1133, + "step": 25293 + }, + { + "epoch": 0.74267426155382, + "grad_norm": 0.0, + "learning_rate": 3.276531371555236e-06, + "loss": 1.2529, + "step": 25294 + }, + { + "epoch": 0.742703623230959, + "grad_norm": 0.0, + "learning_rate": 3.2758274666309675e-06, + "loss": 1.207, + "step": 25295 + }, + { + "epoch": 0.7427329849080979, + "grad_norm": 0.0, + "learning_rate": 3.275123622515114e-06, + "loss": 1.1885, + "step": 25296 + }, + { + "epoch": 0.742762346585237, + "grad_norm": 0.0, + "learning_rate": 3.27441983921404e-06, + "loss": 1.2607, + "step": 25297 + }, + { + "epoch": 0.742791708262376, + "grad_norm": 0.0, + "learning_rate": 3.2737161167341058e-06, + "loss": 1.1807, + "step": 25298 + }, + { + "epoch": 0.7428210699395149, + "grad_norm": 0.0, + "learning_rate": 3.2730124550816834e-06, + "loss": 1.2764, + "step": 25299 + }, + { + "epoch": 0.742850431616654, + "grad_norm": 0.0, + "learning_rate": 3.272308854263129e-06, + "loss": 1.0938, + "step": 25300 + }, + { + "epoch": 0.742879793293793, + "grad_norm": 0.0, + "learning_rate": 3.2716053142848116e-06, + "loss": 1.2969, + "step": 25301 + }, + { + "epoch": 0.7429091549709319, + "grad_norm": 0.0, + "learning_rate": 3.2709018351530863e-06, + "loss": 1.2764, + "step": 25302 + }, + { + "epoch": 0.742938516648071, + "grad_norm": 0.0, + "learning_rate": 3.270198416874323e-06, + "loss": 1.2705, + "step": 25303 + }, + { + "epoch": 0.7429678783252099, + "grad_norm": 0.0, + "learning_rate": 3.2694950594548793e-06, + "loss": 1.2793, + "step": 25304 + }, + { + "epoch": 0.7429972400023489, + "grad_norm": 0.0, + "learning_rate": 3.2687917629011155e-06, + "loss": 1.2354, + "step": 25305 + }, + { + "epoch": 0.743026601679488, + "grad_norm": 0.0, + "learning_rate": 3.2680885272193884e-06, + "loss": 1.2334, + "step": 25306 + }, + { + "epoch": 0.7430559633566269, + "grad_norm": 0.0, + "learning_rate": 3.2673853524160635e-06, + "loss": 1.2983, + "step": 25307 + }, + { + "epoch": 0.7430853250337659, + "grad_norm": 0.0, + "learning_rate": 3.2666822384974973e-06, + "loss": 1.21, + "step": 25308 + }, + { + "epoch": 0.743114686710905, + "grad_norm": 0.0, + "learning_rate": 3.265979185470045e-06, + "loss": 1.2388, + "step": 25309 + }, + { + "epoch": 0.7431440483880439, + "grad_norm": 0.0, + "learning_rate": 3.2652761933400713e-06, + "loss": 1.2646, + "step": 25310 + }, + { + "epoch": 0.7431734100651829, + "grad_norm": 0.0, + "learning_rate": 3.2645732621139258e-06, + "loss": 1.2832, + "step": 25311 + }, + { + "epoch": 0.743202771742322, + "grad_norm": 0.0, + "learning_rate": 3.2638703917979754e-06, + "loss": 1.0981, + "step": 25312 + }, + { + "epoch": 0.7432321334194609, + "grad_norm": 0.0, + "learning_rate": 3.263167582398564e-06, + "loss": 1.1338, + "step": 25313 + }, + { + "epoch": 0.7432614950965999, + "grad_norm": 0.0, + "learning_rate": 3.2624648339220575e-06, + "loss": 1.186, + "step": 25314 + }, + { + "epoch": 0.743290856773739, + "grad_norm": 0.0, + "learning_rate": 3.2617621463748027e-06, + "loss": 1.0962, + "step": 25315 + }, + { + "epoch": 0.7433202184508779, + "grad_norm": 0.0, + "learning_rate": 3.261059519763162e-06, + "loss": 1.2949, + "step": 25316 + }, + { + "epoch": 0.7433495801280169, + "grad_norm": 0.0, + "learning_rate": 3.2603569540934867e-06, + "loss": 1.1514, + "step": 25317 + }, + { + "epoch": 0.743378941805156, + "grad_norm": 0.0, + "learning_rate": 3.259654449372126e-06, + "loss": 1.271, + "step": 25318 + }, + { + "epoch": 0.7434083034822949, + "grad_norm": 0.0, + "learning_rate": 3.258952005605439e-06, + "loss": 1.1543, + "step": 25319 + }, + { + "epoch": 0.7434376651594339, + "grad_norm": 0.0, + "learning_rate": 3.2582496227997763e-06, + "loss": 1.1943, + "step": 25320 + }, + { + "epoch": 0.743467026836573, + "grad_norm": 0.0, + "learning_rate": 3.2575473009614876e-06, + "loss": 1.1553, + "step": 25321 + }, + { + "epoch": 0.7434963885137119, + "grad_norm": 0.0, + "learning_rate": 3.2568450400969232e-06, + "loss": 1.3174, + "step": 25322 + }, + { + "epoch": 0.7435257501908509, + "grad_norm": 0.0, + "learning_rate": 3.2561428402124397e-06, + "loss": 1.1216, + "step": 25323 + }, + { + "epoch": 0.74355511186799, + "grad_norm": 0.0, + "learning_rate": 3.2554407013143783e-06, + "loss": 1.1367, + "step": 25324 + }, + { + "epoch": 0.7435844735451289, + "grad_norm": 0.0, + "learning_rate": 3.2547386234090995e-06, + "loss": 1.1216, + "step": 25325 + }, + { + "epoch": 0.7436138352222679, + "grad_norm": 0.0, + "learning_rate": 3.254036606502943e-06, + "loss": 1.1055, + "step": 25326 + }, + { + "epoch": 0.7436431968994069, + "grad_norm": 0.0, + "learning_rate": 3.2533346506022633e-06, + "loss": 1.2256, + "step": 25327 + }, + { + "epoch": 0.7436725585765459, + "grad_norm": 0.0, + "learning_rate": 3.2526327557134075e-06, + "loss": 1.251, + "step": 25328 + }, + { + "epoch": 0.7437019202536849, + "grad_norm": 0.0, + "learning_rate": 3.2519309218427207e-06, + "loss": 1.1167, + "step": 25329 + }, + { + "epoch": 0.7437312819308239, + "grad_norm": 0.0, + "learning_rate": 3.251229148996551e-06, + "loss": 1.335, + "step": 25330 + }, + { + "epoch": 0.7437606436079629, + "grad_norm": 0.0, + "learning_rate": 3.2505274371812413e-06, + "loss": 1.2153, + "step": 25331 + }, + { + "epoch": 0.7437900052851019, + "grad_norm": 0.0, + "learning_rate": 3.2498257864031444e-06, + "loss": 1.0981, + "step": 25332 + }, + { + "epoch": 0.7438193669622409, + "grad_norm": 0.0, + "learning_rate": 3.2491241966685983e-06, + "loss": 1.2441, + "step": 25333 + }, + { + "epoch": 0.7438487286393799, + "grad_norm": 0.0, + "learning_rate": 3.2484226679839536e-06, + "loss": 1.2285, + "step": 25334 + }, + { + "epoch": 0.7438780903165189, + "grad_norm": 0.0, + "learning_rate": 3.2477212003555494e-06, + "loss": 1.2715, + "step": 25335 + }, + { + "epoch": 0.7439074519936579, + "grad_norm": 0.0, + "learning_rate": 3.2470197937897342e-06, + "loss": 1.2725, + "step": 25336 + }, + { + "epoch": 0.7439368136707969, + "grad_norm": 0.0, + "learning_rate": 3.2463184482928512e-06, + "loss": 1.2236, + "step": 25337 + }, + { + "epoch": 0.7439661753479359, + "grad_norm": 0.0, + "learning_rate": 3.245617163871234e-06, + "loss": 1.23, + "step": 25338 + }, + { + "epoch": 0.7439955370250749, + "grad_norm": 0.0, + "learning_rate": 3.2449159405312348e-06, + "loss": 1.2002, + "step": 25339 + }, + { + "epoch": 0.7440248987022139, + "grad_norm": 0.0, + "learning_rate": 3.2442147782791865e-06, + "loss": 1.2285, + "step": 25340 + }, + { + "epoch": 0.7440542603793528, + "grad_norm": 0.0, + "learning_rate": 3.243513677121438e-06, + "loss": 1.1543, + "step": 25341 + }, + { + "epoch": 0.7440836220564918, + "grad_norm": 0.0, + "learning_rate": 3.242812637064322e-06, + "loss": 1.2935, + "step": 25342 + }, + { + "epoch": 0.7441129837336309, + "grad_norm": 0.0, + "learning_rate": 3.2421116581141855e-06, + "loss": 1.2812, + "step": 25343 + }, + { + "epoch": 0.7441423454107698, + "grad_norm": 0.0, + "learning_rate": 3.2414107402773643e-06, + "loss": 1.207, + "step": 25344 + }, + { + "epoch": 0.7441717070879088, + "grad_norm": 0.0, + "learning_rate": 3.240709883560196e-06, + "loss": 1.3179, + "step": 25345 + }, + { + "epoch": 0.7442010687650479, + "grad_norm": 0.0, + "learning_rate": 3.240009087969016e-06, + "loss": 1.1753, + "step": 25346 + }, + { + "epoch": 0.7442304304421868, + "grad_norm": 0.0, + "learning_rate": 3.239308353510169e-06, + "loss": 1.1602, + "step": 25347 + }, + { + "epoch": 0.7442597921193258, + "grad_norm": 0.0, + "learning_rate": 3.238607680189988e-06, + "loss": 1.3691, + "step": 25348 + }, + { + "epoch": 0.7442891537964649, + "grad_norm": 0.0, + "learning_rate": 3.237907068014805e-06, + "loss": 1.2686, + "step": 25349 + }, + { + "epoch": 0.7443185154736038, + "grad_norm": 0.0, + "learning_rate": 3.2372065169909638e-06, + "loss": 1.271, + "step": 25350 + }, + { + "epoch": 0.7443478771507428, + "grad_norm": 0.0, + "learning_rate": 3.236506027124794e-06, + "loss": 1.1982, + "step": 25351 + }, + { + "epoch": 0.7443772388278819, + "grad_norm": 0.0, + "learning_rate": 3.2358055984226345e-06, + "loss": 1.2578, + "step": 25352 + }, + { + "epoch": 0.7444066005050208, + "grad_norm": 0.0, + "learning_rate": 3.2351052308908172e-06, + "loss": 1.2705, + "step": 25353 + }, + { + "epoch": 0.7444359621821598, + "grad_norm": 0.0, + "learning_rate": 3.2344049245356767e-06, + "loss": 1.2744, + "step": 25354 + }, + { + "epoch": 0.7444653238592989, + "grad_norm": 0.0, + "learning_rate": 3.233704679363541e-06, + "loss": 1.1943, + "step": 25355 + }, + { + "epoch": 0.7444946855364378, + "grad_norm": 0.0, + "learning_rate": 3.2330044953807503e-06, + "loss": 1.3032, + "step": 25356 + }, + { + "epoch": 0.7445240472135768, + "grad_norm": 0.0, + "learning_rate": 3.232304372593633e-06, + "loss": 1.2891, + "step": 25357 + }, + { + "epoch": 0.7445534088907159, + "grad_norm": 0.0, + "learning_rate": 3.2316043110085184e-06, + "loss": 1.1914, + "step": 25358 + }, + { + "epoch": 0.7445827705678548, + "grad_norm": 0.0, + "learning_rate": 3.2309043106317405e-06, + "loss": 1.2319, + "step": 25359 + }, + { + "epoch": 0.7446121322449938, + "grad_norm": 0.0, + "learning_rate": 3.2302043714696263e-06, + "loss": 1.2852, + "step": 25360 + }, + { + "epoch": 0.7446414939221329, + "grad_norm": 0.0, + "learning_rate": 3.2295044935285116e-06, + "loss": 1.2881, + "step": 25361 + }, + { + "epoch": 0.7446708555992718, + "grad_norm": 0.0, + "learning_rate": 3.2288046768147207e-06, + "loss": 1.2261, + "step": 25362 + }, + { + "epoch": 0.7447002172764108, + "grad_norm": 0.0, + "learning_rate": 3.228104921334584e-06, + "loss": 1.2114, + "step": 25363 + }, + { + "epoch": 0.7447295789535499, + "grad_norm": 0.0, + "learning_rate": 3.2274052270944258e-06, + "loss": 1.1938, + "step": 25364 + }, + { + "epoch": 0.7447589406306888, + "grad_norm": 0.0, + "learning_rate": 3.2267055941005785e-06, + "loss": 1.2246, + "step": 25365 + }, + { + "epoch": 0.7447883023078278, + "grad_norm": 0.0, + "learning_rate": 3.2260060223593683e-06, + "loss": 1.188, + "step": 25366 + }, + { + "epoch": 0.7448176639849668, + "grad_norm": 0.0, + "learning_rate": 3.2253065118771177e-06, + "loss": 1.2051, + "step": 25367 + }, + { + "epoch": 0.7448470256621058, + "grad_norm": 0.0, + "learning_rate": 3.2246070626601576e-06, + "loss": 1.2568, + "step": 25368 + }, + { + "epoch": 0.7448763873392448, + "grad_norm": 0.0, + "learning_rate": 3.2239076747148114e-06, + "loss": 1.2134, + "step": 25369 + }, + { + "epoch": 0.7449057490163838, + "grad_norm": 0.0, + "learning_rate": 3.223208348047403e-06, + "loss": 1.2988, + "step": 25370 + }, + { + "epoch": 0.7449351106935228, + "grad_norm": 0.0, + "learning_rate": 3.2225090826642536e-06, + "loss": 1.1904, + "step": 25371 + }, + { + "epoch": 0.7449644723706618, + "grad_norm": 0.0, + "learning_rate": 3.2218098785716946e-06, + "loss": 1.2109, + "step": 25372 + }, + { + "epoch": 0.7449938340478008, + "grad_norm": 0.0, + "learning_rate": 3.2211107357760406e-06, + "loss": 1.1992, + "step": 25373 + }, + { + "epoch": 0.7450231957249398, + "grad_norm": 0.0, + "learning_rate": 3.220411654283623e-06, + "loss": 1.1934, + "step": 25374 + }, + { + "epoch": 0.7450525574020788, + "grad_norm": 0.0, + "learning_rate": 3.2197126341007547e-06, + "loss": 1.251, + "step": 25375 + }, + { + "epoch": 0.7450819190792178, + "grad_norm": 0.0, + "learning_rate": 3.2190136752337643e-06, + "loss": 1.2461, + "step": 25376 + }, + { + "epoch": 0.7451112807563568, + "grad_norm": 0.0, + "learning_rate": 3.218314777688971e-06, + "loss": 1.1094, + "step": 25377 + }, + { + "epoch": 0.7451406424334958, + "grad_norm": 0.0, + "learning_rate": 3.2176159414726935e-06, + "loss": 1.1528, + "step": 25378 + }, + { + "epoch": 0.7451700041106348, + "grad_norm": 0.0, + "learning_rate": 3.2169171665912526e-06, + "loss": 1.2192, + "step": 25379 + }, + { + "epoch": 0.7451993657877738, + "grad_norm": 0.0, + "learning_rate": 3.2162184530509623e-06, + "loss": 1.2646, + "step": 25380 + }, + { + "epoch": 0.7452287274649128, + "grad_norm": 0.0, + "learning_rate": 3.21551980085815e-06, + "loss": 1.1382, + "step": 25381 + }, + { + "epoch": 0.7452580891420518, + "grad_norm": 0.0, + "learning_rate": 3.2148212100191266e-06, + "loss": 1.2393, + "step": 25382 + }, + { + "epoch": 0.7452874508191908, + "grad_norm": 0.0, + "learning_rate": 3.214122680540216e-06, + "loss": 1.417, + "step": 25383 + }, + { + "epoch": 0.7453168124963298, + "grad_norm": 0.0, + "learning_rate": 3.213424212427728e-06, + "loss": 1.1934, + "step": 25384 + }, + { + "epoch": 0.7453461741734688, + "grad_norm": 0.0, + "learning_rate": 3.212725805687986e-06, + "loss": 1.1392, + "step": 25385 + }, + { + "epoch": 0.7453755358506078, + "grad_norm": 0.0, + "learning_rate": 3.2120274603273037e-06, + "loss": 1.2319, + "step": 25386 + }, + { + "epoch": 0.7454048975277467, + "grad_norm": 0.0, + "learning_rate": 3.2113291763519947e-06, + "loss": 1.2744, + "step": 25387 + }, + { + "epoch": 0.7454342592048858, + "grad_norm": 0.0, + "learning_rate": 3.2106309537683755e-06, + "loss": 1.2051, + "step": 25388 + }, + { + "epoch": 0.7454636208820248, + "grad_norm": 0.0, + "learning_rate": 3.209932792582755e-06, + "loss": 1.1953, + "step": 25389 + }, + { + "epoch": 0.7454929825591637, + "grad_norm": 0.0, + "learning_rate": 3.209234692801455e-06, + "loss": 1.3193, + "step": 25390 + }, + { + "epoch": 0.7455223442363028, + "grad_norm": 0.0, + "learning_rate": 3.208536654430783e-06, + "loss": 1.2603, + "step": 25391 + }, + { + "epoch": 0.7455517059134418, + "grad_norm": 0.0, + "learning_rate": 3.207838677477054e-06, + "loss": 1.2822, + "step": 25392 + }, + { + "epoch": 0.7455810675905807, + "grad_norm": 0.0, + "learning_rate": 3.207140761946581e-06, + "loss": 1.313, + "step": 25393 + }, + { + "epoch": 0.7456104292677198, + "grad_norm": 0.0, + "learning_rate": 3.2064429078456737e-06, + "loss": 1.3506, + "step": 25394 + }, + { + "epoch": 0.7456397909448588, + "grad_norm": 0.0, + "learning_rate": 3.2057451151806394e-06, + "loss": 1.2661, + "step": 25395 + }, + { + "epoch": 0.7456691526219977, + "grad_norm": 0.0, + "learning_rate": 3.2050473839577955e-06, + "loss": 1.3145, + "step": 25396 + }, + { + "epoch": 0.7456985142991368, + "grad_norm": 0.0, + "learning_rate": 3.204349714183448e-06, + "loss": 1.3633, + "step": 25397 + }, + { + "epoch": 0.7457278759762758, + "grad_norm": 0.0, + "learning_rate": 3.2036521058639026e-06, + "loss": 1.3223, + "step": 25398 + }, + { + "epoch": 0.7457572376534147, + "grad_norm": 0.0, + "learning_rate": 3.2029545590054766e-06, + "loss": 1.2739, + "step": 25399 + }, + { + "epoch": 0.7457865993305538, + "grad_norm": 0.0, + "learning_rate": 3.202257073614469e-06, + "loss": 1.1309, + "step": 25400 + }, + { + "epoch": 0.7458159610076928, + "grad_norm": 0.0, + "learning_rate": 3.2015596496971945e-06, + "loss": 1.2246, + "step": 25401 + }, + { + "epoch": 0.7458453226848317, + "grad_norm": 0.0, + "learning_rate": 3.200862287259958e-06, + "loss": 1.3018, + "step": 25402 + }, + { + "epoch": 0.7458746843619708, + "grad_norm": 0.0, + "learning_rate": 3.2001649863090647e-06, + "loss": 1.2964, + "step": 25403 + }, + { + "epoch": 0.7459040460391098, + "grad_norm": 0.0, + "learning_rate": 3.1994677468508185e-06, + "loss": 1.1973, + "step": 25404 + }, + { + "epoch": 0.7459334077162487, + "grad_norm": 0.0, + "learning_rate": 3.1987705688915294e-06, + "loss": 1.2344, + "step": 25405 + }, + { + "epoch": 0.7459627693933878, + "grad_norm": 0.0, + "learning_rate": 3.1980734524375003e-06, + "loss": 1.1367, + "step": 25406 + }, + { + "epoch": 0.7459921310705268, + "grad_norm": 0.0, + "learning_rate": 3.197376397495031e-06, + "loss": 1.208, + "step": 25407 + }, + { + "epoch": 0.7460214927476657, + "grad_norm": 0.0, + "learning_rate": 3.196679404070433e-06, + "loss": 1.2397, + "step": 25408 + }, + { + "epoch": 0.7460508544248048, + "grad_norm": 0.0, + "learning_rate": 3.195982472170003e-06, + "loss": 1.3418, + "step": 25409 + }, + { + "epoch": 0.7460802161019437, + "grad_norm": 0.0, + "learning_rate": 3.195285601800048e-06, + "loss": 1.25, + "step": 25410 + }, + { + "epoch": 0.7461095777790827, + "grad_norm": 0.0, + "learning_rate": 3.1945887929668685e-06, + "loss": 1.2832, + "step": 25411 + }, + { + "epoch": 0.7461389394562218, + "grad_norm": 0.0, + "learning_rate": 3.1938920456767652e-06, + "loss": 1.1401, + "step": 25412 + }, + { + "epoch": 0.7461683011333607, + "grad_norm": 0.0, + "learning_rate": 3.1931953599360366e-06, + "loss": 1.3203, + "step": 25413 + }, + { + "epoch": 0.7461976628104997, + "grad_norm": 0.0, + "learning_rate": 3.1924987357509884e-06, + "loss": 1.1782, + "step": 25414 + }, + { + "epoch": 0.7462270244876388, + "grad_norm": 0.0, + "learning_rate": 3.1918021731279136e-06, + "loss": 1.2832, + "step": 25415 + }, + { + "epoch": 0.7462563861647777, + "grad_norm": 0.0, + "learning_rate": 3.1911056720731193e-06, + "loss": 1.0581, + "step": 25416 + }, + { + "epoch": 0.7462857478419167, + "grad_norm": 0.0, + "learning_rate": 3.1904092325929e-06, + "loss": 1.1641, + "step": 25417 + }, + { + "epoch": 0.7463151095190558, + "grad_norm": 0.0, + "learning_rate": 3.189712854693555e-06, + "loss": 1.2686, + "step": 25418 + }, + { + "epoch": 0.7463444711961947, + "grad_norm": 0.0, + "learning_rate": 3.18901653838138e-06, + "loss": 1.2412, + "step": 25419 + }, + { + "epoch": 0.7463738328733337, + "grad_norm": 0.0, + "learning_rate": 3.188320283662669e-06, + "loss": 1.3115, + "step": 25420 + }, + { + "epoch": 0.7464031945504728, + "grad_norm": 0.0, + "learning_rate": 3.1876240905437262e-06, + "loss": 1.1641, + "step": 25421 + }, + { + "epoch": 0.7464325562276117, + "grad_norm": 0.0, + "learning_rate": 3.18692795903084e-06, + "loss": 1.21, + "step": 25422 + }, + { + "epoch": 0.7464619179047507, + "grad_norm": 0.0, + "learning_rate": 3.1862318891303122e-06, + "loss": 1.2139, + "step": 25423 + }, + { + "epoch": 0.7464912795818898, + "grad_norm": 0.0, + "learning_rate": 3.1855358808484318e-06, + "loss": 1.2969, + "step": 25424 + }, + { + "epoch": 0.7465206412590287, + "grad_norm": 0.0, + "learning_rate": 3.1848399341914983e-06, + "loss": 1.2764, + "step": 25425 + }, + { + "epoch": 0.7465500029361677, + "grad_norm": 0.0, + "learning_rate": 3.1841440491658026e-06, + "loss": 1.2095, + "step": 25426 + }, + { + "epoch": 0.7465793646133068, + "grad_norm": 0.0, + "learning_rate": 3.183448225777638e-06, + "loss": 1.2993, + "step": 25427 + }, + { + "epoch": 0.7466087262904457, + "grad_norm": 0.0, + "learning_rate": 3.182752464033296e-06, + "loss": 1.3174, + "step": 25428 + }, + { + "epoch": 0.7466380879675847, + "grad_norm": 0.0, + "learning_rate": 3.1820567639390677e-06, + "loss": 1.1211, + "step": 25429 + }, + { + "epoch": 0.7466674496447238, + "grad_norm": 0.0, + "learning_rate": 3.1813611255012487e-06, + "loss": 1.2002, + "step": 25430 + }, + { + "epoch": 0.7466968113218627, + "grad_norm": 0.0, + "learning_rate": 3.180665548726124e-06, + "loss": 1.3389, + "step": 25431 + }, + { + "epoch": 0.7467261729990017, + "grad_norm": 0.0, + "learning_rate": 3.17997003361999e-06, + "loss": 1.2773, + "step": 25432 + }, + { + "epoch": 0.7467555346761408, + "grad_norm": 0.0, + "learning_rate": 3.1792745801891313e-06, + "loss": 1.2676, + "step": 25433 + }, + { + "epoch": 0.7467848963532797, + "grad_norm": 0.0, + "learning_rate": 3.178579188439842e-06, + "loss": 1.2812, + "step": 25434 + }, + { + "epoch": 0.7468142580304187, + "grad_norm": 0.0, + "learning_rate": 3.1778838583784088e-06, + "loss": 1.1416, + "step": 25435 + }, + { + "epoch": 0.7468436197075577, + "grad_norm": 0.0, + "learning_rate": 3.1771885900111175e-06, + "loss": 1.2051, + "step": 25436 + }, + { + "epoch": 0.7468729813846967, + "grad_norm": 0.0, + "learning_rate": 3.176493383344259e-06, + "loss": 1.2598, + "step": 25437 + }, + { + "epoch": 0.7469023430618357, + "grad_norm": 0.0, + "learning_rate": 3.1757982383841137e-06, + "loss": 1.1343, + "step": 25438 + }, + { + "epoch": 0.7469317047389747, + "grad_norm": 0.0, + "learning_rate": 3.175103155136977e-06, + "loss": 1.4004, + "step": 25439 + }, + { + "epoch": 0.7469610664161137, + "grad_norm": 0.0, + "learning_rate": 3.1744081336091258e-06, + "loss": 1.3877, + "step": 25440 + }, + { + "epoch": 0.7469904280932527, + "grad_norm": 0.0, + "learning_rate": 3.173713173806855e-06, + "loss": 1.2539, + "step": 25441 + }, + { + "epoch": 0.7470197897703917, + "grad_norm": 0.0, + "learning_rate": 3.1730182757364425e-06, + "loss": 1.2656, + "step": 25442 + }, + { + "epoch": 0.7470491514475307, + "grad_norm": 0.0, + "learning_rate": 3.1723234394041746e-06, + "loss": 1.3496, + "step": 25443 + }, + { + "epoch": 0.7470785131246697, + "grad_norm": 0.0, + "learning_rate": 3.171628664816332e-06, + "loss": 1.2041, + "step": 25444 + }, + { + "epoch": 0.7471078748018086, + "grad_norm": 0.0, + "learning_rate": 3.1709339519792027e-06, + "loss": 1.2471, + "step": 25445 + }, + { + "epoch": 0.7471372364789477, + "grad_norm": 0.0, + "learning_rate": 3.170239300899066e-06, + "loss": 1.3193, + "step": 25446 + }, + { + "epoch": 0.7471665981560867, + "grad_norm": 0.0, + "learning_rate": 3.169544711582202e-06, + "loss": 1.1582, + "step": 25447 + }, + { + "epoch": 0.7471959598332256, + "grad_norm": 0.0, + "learning_rate": 3.1688501840348986e-06, + "loss": 1.2266, + "step": 25448 + }, + { + "epoch": 0.7472253215103647, + "grad_norm": 0.0, + "learning_rate": 3.168155718263427e-06, + "loss": 1.2705, + "step": 25449 + }, + { + "epoch": 0.7472546831875037, + "grad_norm": 0.0, + "learning_rate": 3.1674613142740785e-06, + "loss": 1.3457, + "step": 25450 + }, + { + "epoch": 0.7472840448646426, + "grad_norm": 0.0, + "learning_rate": 3.166766972073125e-06, + "loss": 1.3828, + "step": 25451 + }, + { + "epoch": 0.7473134065417817, + "grad_norm": 0.0, + "learning_rate": 3.1660726916668493e-06, + "loss": 1.2603, + "step": 25452 + }, + { + "epoch": 0.7473427682189207, + "grad_norm": 0.0, + "learning_rate": 3.1653784730615246e-06, + "loss": 1.1675, + "step": 25453 + }, + { + "epoch": 0.7473721298960596, + "grad_norm": 0.0, + "learning_rate": 3.164684316263437e-06, + "loss": 1.1133, + "step": 25454 + }, + { + "epoch": 0.7474014915731987, + "grad_norm": 0.0, + "learning_rate": 3.1639902212788578e-06, + "loss": 1.207, + "step": 25455 + }, + { + "epoch": 0.7474308532503376, + "grad_norm": 0.0, + "learning_rate": 3.163296188114063e-06, + "loss": 1.0889, + "step": 25456 + }, + { + "epoch": 0.7474602149274766, + "grad_norm": 0.0, + "learning_rate": 3.1626022167753367e-06, + "loss": 1.1611, + "step": 25457 + }, + { + "epoch": 0.7474895766046157, + "grad_norm": 0.0, + "learning_rate": 3.1619083072689447e-06, + "loss": 1.1958, + "step": 25458 + }, + { + "epoch": 0.7475189382817546, + "grad_norm": 0.0, + "learning_rate": 3.1612144596011706e-06, + "loss": 1.2158, + "step": 25459 + }, + { + "epoch": 0.7475482999588936, + "grad_norm": 0.0, + "learning_rate": 3.1605206737782856e-06, + "loss": 1.2661, + "step": 25460 + }, + { + "epoch": 0.7475776616360327, + "grad_norm": 0.0, + "learning_rate": 3.1598269498065647e-06, + "loss": 1.2803, + "step": 25461 + }, + { + "epoch": 0.7476070233131716, + "grad_norm": 0.0, + "learning_rate": 3.1591332876922763e-06, + "loss": 1.2339, + "step": 25462 + }, + { + "epoch": 0.7476363849903106, + "grad_norm": 0.0, + "learning_rate": 3.158439687441701e-06, + "loss": 1.2588, + "step": 25463 + }, + { + "epoch": 0.7476657466674497, + "grad_norm": 0.0, + "learning_rate": 3.157746149061104e-06, + "loss": 1.1572, + "step": 25464 + }, + { + "epoch": 0.7476951083445886, + "grad_norm": 0.0, + "learning_rate": 3.157052672556765e-06, + "loss": 1.2192, + "step": 25465 + }, + { + "epoch": 0.7477244700217276, + "grad_norm": 0.0, + "learning_rate": 3.156359257934951e-06, + "loss": 1.1299, + "step": 25466 + }, + { + "epoch": 0.7477538316988667, + "grad_norm": 0.0, + "learning_rate": 3.1556659052019322e-06, + "loss": 1.167, + "step": 25467 + }, + { + "epoch": 0.7477831933760056, + "grad_norm": 0.0, + "learning_rate": 3.1549726143639805e-06, + "loss": 1.293, + "step": 25468 + }, + { + "epoch": 0.7478125550531446, + "grad_norm": 0.0, + "learning_rate": 3.1542793854273613e-06, + "loss": 1.2383, + "step": 25469 + }, + { + "epoch": 0.7478419167302837, + "grad_norm": 0.0, + "learning_rate": 3.153586218398349e-06, + "loss": 1.3584, + "step": 25470 + }, + { + "epoch": 0.7478712784074226, + "grad_norm": 0.0, + "learning_rate": 3.1528931132832074e-06, + "loss": 1.1738, + "step": 25471 + }, + { + "epoch": 0.7479006400845616, + "grad_norm": 0.0, + "learning_rate": 3.1522000700882093e-06, + "loss": 1.2305, + "step": 25472 + }, + { + "epoch": 0.7479300017617007, + "grad_norm": 0.0, + "learning_rate": 3.1515070888196176e-06, + "loss": 1.1118, + "step": 25473 + }, + { + "epoch": 0.7479593634388396, + "grad_norm": 0.0, + "learning_rate": 3.1508141694837045e-06, + "loss": 1.3252, + "step": 25474 + }, + { + "epoch": 0.7479887251159786, + "grad_norm": 0.0, + "learning_rate": 3.1501213120867323e-06, + "loss": 1.2393, + "step": 25475 + }, + { + "epoch": 0.7480180867931177, + "grad_norm": 0.0, + "learning_rate": 3.1494285166349668e-06, + "loss": 1.166, + "step": 25476 + }, + { + "epoch": 0.7480474484702566, + "grad_norm": 0.0, + "learning_rate": 3.1487357831346756e-06, + "loss": 1.2256, + "step": 25477 + }, + { + "epoch": 0.7480768101473956, + "grad_norm": 0.0, + "learning_rate": 3.1480431115921162e-06, + "loss": 1.2788, + "step": 25478 + }, + { + "epoch": 0.7481061718245346, + "grad_norm": 0.0, + "learning_rate": 3.147350502013562e-06, + "loss": 1.2725, + "step": 25479 + }, + { + "epoch": 0.7481355335016736, + "grad_norm": 0.0, + "learning_rate": 3.146657954405269e-06, + "loss": 1.1382, + "step": 25480 + }, + { + "epoch": 0.7481648951788126, + "grad_norm": 0.0, + "learning_rate": 3.1459654687735064e-06, + "loss": 1.2441, + "step": 25481 + }, + { + "epoch": 0.7481942568559516, + "grad_norm": 0.0, + "learning_rate": 3.1452730451245297e-06, + "loss": 1.249, + "step": 25482 + }, + { + "epoch": 0.7482236185330906, + "grad_norm": 0.0, + "learning_rate": 3.1445806834646076e-06, + "loss": 1.2559, + "step": 25483 + }, + { + "epoch": 0.7482529802102296, + "grad_norm": 0.0, + "learning_rate": 3.143888383799999e-06, + "loss": 1.1841, + "step": 25484 + }, + { + "epoch": 0.7482823418873686, + "grad_norm": 0.0, + "learning_rate": 3.143196146136963e-06, + "loss": 1.1699, + "step": 25485 + }, + { + "epoch": 0.7483117035645076, + "grad_norm": 0.0, + "learning_rate": 3.1425039704817595e-06, + "loss": 1.3301, + "step": 25486 + }, + { + "epoch": 0.7483410652416466, + "grad_norm": 0.0, + "learning_rate": 3.1418118568406463e-06, + "loss": 1.1665, + "step": 25487 + }, + { + "epoch": 0.7483704269187856, + "grad_norm": 0.0, + "learning_rate": 3.141119805219889e-06, + "loss": 1.2861, + "step": 25488 + }, + { + "epoch": 0.7483997885959246, + "grad_norm": 0.0, + "learning_rate": 3.1404278156257374e-06, + "loss": 1.2437, + "step": 25489 + }, + { + "epoch": 0.7484291502730636, + "grad_norm": 0.0, + "learning_rate": 3.139735888064458e-06, + "loss": 1.3135, + "step": 25490 + }, + { + "epoch": 0.7484585119502026, + "grad_norm": 0.0, + "learning_rate": 3.1390440225423003e-06, + "loss": 1.0762, + "step": 25491 + }, + { + "epoch": 0.7484878736273416, + "grad_norm": 0.0, + "learning_rate": 3.1383522190655314e-06, + "loss": 1.1548, + "step": 25492 + }, + { + "epoch": 0.7485172353044806, + "grad_norm": 0.0, + "learning_rate": 3.137660477640393e-06, + "loss": 1.2227, + "step": 25493 + }, + { + "epoch": 0.7485465969816196, + "grad_norm": 0.0, + "learning_rate": 3.1369687982731523e-06, + "loss": 1.2666, + "step": 25494 + }, + { + "epoch": 0.7485759586587586, + "grad_norm": 0.0, + "learning_rate": 3.136277180970061e-06, + "loss": 1.1665, + "step": 25495 + }, + { + "epoch": 0.7486053203358976, + "grad_norm": 0.0, + "learning_rate": 3.1355856257373696e-06, + "loss": 1.1353, + "step": 25496 + }, + { + "epoch": 0.7486346820130366, + "grad_norm": 0.0, + "learning_rate": 3.1348941325813375e-06, + "loss": 1.2275, + "step": 25497 + }, + { + "epoch": 0.7486640436901756, + "grad_norm": 0.0, + "learning_rate": 3.134202701508214e-06, + "loss": 1.1943, + "step": 25498 + }, + { + "epoch": 0.7486934053673145, + "grad_norm": 0.0, + "learning_rate": 3.133511332524256e-06, + "loss": 1.2305, + "step": 25499 + }, + { + "epoch": 0.7487227670444536, + "grad_norm": 0.0, + "learning_rate": 3.1328200256357143e-06, + "loss": 1.3203, + "step": 25500 + }, + { + "epoch": 0.7487521287215926, + "grad_norm": 0.0, + "learning_rate": 3.13212878084884e-06, + "loss": 1.1895, + "step": 25501 + }, + { + "epoch": 0.7487814903987315, + "grad_norm": 0.0, + "learning_rate": 3.131437598169881e-06, + "loss": 1.1509, + "step": 25502 + }, + { + "epoch": 0.7488108520758706, + "grad_norm": 0.0, + "learning_rate": 3.130746477605093e-06, + "loss": 1.0264, + "step": 25503 + }, + { + "epoch": 0.7488402137530096, + "grad_norm": 0.0, + "learning_rate": 3.1300554191607246e-06, + "loss": 1.2598, + "step": 25504 + }, + { + "epoch": 0.7488695754301485, + "grad_norm": 0.0, + "learning_rate": 3.129364422843021e-06, + "loss": 1.3076, + "step": 25505 + }, + { + "epoch": 0.7488989371072876, + "grad_norm": 0.0, + "learning_rate": 3.128673488658237e-06, + "loss": 1.2832, + "step": 25506 + }, + { + "epoch": 0.7489282987844266, + "grad_norm": 0.0, + "learning_rate": 3.127982616612617e-06, + "loss": 1.2861, + "step": 25507 + }, + { + "epoch": 0.7489576604615655, + "grad_norm": 0.0, + "learning_rate": 3.1272918067124113e-06, + "loss": 1.2334, + "step": 25508 + }, + { + "epoch": 0.7489870221387046, + "grad_norm": 0.0, + "learning_rate": 3.1266010589638673e-06, + "loss": 1.2617, + "step": 25509 + }, + { + "epoch": 0.7490163838158436, + "grad_norm": 0.0, + "learning_rate": 3.12591037337323e-06, + "loss": 1.2266, + "step": 25510 + }, + { + "epoch": 0.7490457454929825, + "grad_norm": 0.0, + "learning_rate": 3.125219749946743e-06, + "loss": 1.3071, + "step": 25511 + }, + { + "epoch": 0.7490751071701216, + "grad_norm": 0.0, + "learning_rate": 3.1245291886906557e-06, + "loss": 1.3188, + "step": 25512 + }, + { + "epoch": 0.7491044688472606, + "grad_norm": 0.0, + "learning_rate": 3.1238386896112106e-06, + "loss": 1.3008, + "step": 25513 + }, + { + "epoch": 0.7491338305243995, + "grad_norm": 0.0, + "learning_rate": 3.123148252714656e-06, + "loss": 1.3076, + "step": 25514 + }, + { + "epoch": 0.7491631922015386, + "grad_norm": 0.0, + "learning_rate": 3.1224578780072324e-06, + "loss": 1.25, + "step": 25515 + }, + { + "epoch": 0.7491925538786776, + "grad_norm": 0.0, + "learning_rate": 3.121767565495181e-06, + "loss": 1.1265, + "step": 25516 + }, + { + "epoch": 0.7492219155558165, + "grad_norm": 0.0, + "learning_rate": 3.1210773151847528e-06, + "loss": 1.2822, + "step": 25517 + }, + { + "epoch": 0.7492512772329556, + "grad_norm": 0.0, + "learning_rate": 3.1203871270821785e-06, + "loss": 1.1704, + "step": 25518 + }, + { + "epoch": 0.7492806389100946, + "grad_norm": 0.0, + "learning_rate": 3.119697001193708e-06, + "loss": 1.1357, + "step": 25519 + }, + { + "epoch": 0.7493100005872335, + "grad_norm": 0.0, + "learning_rate": 3.119006937525576e-06, + "loss": 1.2148, + "step": 25520 + }, + { + "epoch": 0.7493393622643726, + "grad_norm": 0.0, + "learning_rate": 3.11831693608403e-06, + "loss": 1.1167, + "step": 25521 + }, + { + "epoch": 0.7493687239415115, + "grad_norm": 0.0, + "learning_rate": 3.1176269968753036e-06, + "loss": 1.1519, + "step": 25522 + }, + { + "epoch": 0.7493980856186505, + "grad_norm": 0.0, + "learning_rate": 3.116937119905642e-06, + "loss": 1.189, + "step": 25523 + }, + { + "epoch": 0.7494274472957896, + "grad_norm": 0.0, + "learning_rate": 3.11624730518128e-06, + "loss": 1.2344, + "step": 25524 + }, + { + "epoch": 0.7494568089729285, + "grad_norm": 0.0, + "learning_rate": 3.115557552708457e-06, + "loss": 1.3975, + "step": 25525 + }, + { + "epoch": 0.7494861706500675, + "grad_norm": 0.0, + "learning_rate": 3.1148678624934104e-06, + "loss": 1.1855, + "step": 25526 + }, + { + "epoch": 0.7495155323272066, + "grad_norm": 0.0, + "learning_rate": 3.1141782345423745e-06, + "loss": 1.2588, + "step": 25527 + }, + { + "epoch": 0.7495448940043455, + "grad_norm": 0.0, + "learning_rate": 3.113488668861591e-06, + "loss": 1.1943, + "step": 25528 + }, + { + "epoch": 0.7495742556814845, + "grad_norm": 0.0, + "learning_rate": 3.112799165457289e-06, + "loss": 1.21, + "step": 25529 + }, + { + "epoch": 0.7496036173586236, + "grad_norm": 0.0, + "learning_rate": 3.1121097243357126e-06, + "loss": 1.2402, + "step": 25530 + }, + { + "epoch": 0.7496329790357625, + "grad_norm": 0.0, + "learning_rate": 3.111420345503088e-06, + "loss": 1.1514, + "step": 25531 + }, + { + "epoch": 0.7496623407129015, + "grad_norm": 0.0, + "learning_rate": 3.1107310289656557e-06, + "loss": 1.2188, + "step": 25532 + }, + { + "epoch": 0.7496917023900406, + "grad_norm": 0.0, + "learning_rate": 3.1100417747296486e-06, + "loss": 1.209, + "step": 25533 + }, + { + "epoch": 0.7497210640671795, + "grad_norm": 0.0, + "learning_rate": 3.109352582801297e-06, + "loss": 1.2109, + "step": 25534 + }, + { + "epoch": 0.7497504257443185, + "grad_norm": 0.0, + "learning_rate": 3.1086634531868367e-06, + "loss": 1.2612, + "step": 25535 + }, + { + "epoch": 0.7497797874214576, + "grad_norm": 0.0, + "learning_rate": 3.1079743858924927e-06, + "loss": 1.2373, + "step": 25536 + }, + { + "epoch": 0.7498091490985965, + "grad_norm": 0.0, + "learning_rate": 3.1072853809245053e-06, + "loss": 1.249, + "step": 25537 + }, + { + "epoch": 0.7498385107757355, + "grad_norm": 0.0, + "learning_rate": 3.106596438289098e-06, + "loss": 1.3428, + "step": 25538 + }, + { + "epoch": 0.7498678724528746, + "grad_norm": 0.0, + "learning_rate": 3.105907557992508e-06, + "loss": 1.2861, + "step": 25539 + }, + { + "epoch": 0.7498972341300135, + "grad_norm": 0.0, + "learning_rate": 3.1052187400409584e-06, + "loss": 1.1538, + "step": 25540 + }, + { + "epoch": 0.7499265958071525, + "grad_norm": 0.0, + "learning_rate": 3.104529984440684e-06, + "loss": 1.2441, + "step": 25541 + }, + { + "epoch": 0.7499559574842916, + "grad_norm": 0.0, + "learning_rate": 3.103841291197911e-06, + "loss": 1.2617, + "step": 25542 + }, + { + "epoch": 0.7499853191614305, + "grad_norm": 0.0, + "learning_rate": 3.1031526603188677e-06, + "loss": 1.377, + "step": 25543 + }, + { + "epoch": 0.7500146808385695, + "grad_norm": 0.0, + "learning_rate": 3.1024640918097815e-06, + "loss": 1.1787, + "step": 25544 + }, + { + "epoch": 0.7500440425157084, + "grad_norm": 0.0, + "learning_rate": 3.1017755856768763e-06, + "loss": 1.2051, + "step": 25545 + }, + { + "epoch": 0.7500734041928475, + "grad_norm": 0.0, + "learning_rate": 3.101087141926383e-06, + "loss": 1.0415, + "step": 25546 + }, + { + "epoch": 0.7501027658699865, + "grad_norm": 0.0, + "learning_rate": 3.1003987605645226e-06, + "loss": 1.2104, + "step": 25547 + }, + { + "epoch": 0.7501321275471254, + "grad_norm": 0.0, + "learning_rate": 3.0997104415975278e-06, + "loss": 1.2617, + "step": 25548 + }, + { + "epoch": 0.7501614892242645, + "grad_norm": 0.0, + "learning_rate": 3.099022185031617e-06, + "loss": 1.061, + "step": 25549 + }, + { + "epoch": 0.7501908509014035, + "grad_norm": 0.0, + "learning_rate": 3.098333990873017e-06, + "loss": 1.2656, + "step": 25550 + }, + { + "epoch": 0.7502202125785424, + "grad_norm": 0.0, + "learning_rate": 3.097645859127946e-06, + "loss": 1.064, + "step": 25551 + }, + { + "epoch": 0.7502495742556815, + "grad_norm": 0.0, + "learning_rate": 3.0969577898026347e-06, + "loss": 1.1724, + "step": 25552 + }, + { + "epoch": 0.7502789359328205, + "grad_norm": 0.0, + "learning_rate": 3.096269782903303e-06, + "loss": 1.2354, + "step": 25553 + }, + { + "epoch": 0.7503082976099594, + "grad_norm": 0.0, + "learning_rate": 3.095581838436168e-06, + "loss": 1.2266, + "step": 25554 + }, + { + "epoch": 0.7503376592870985, + "grad_norm": 0.0, + "learning_rate": 3.0948939564074575e-06, + "loss": 1.231, + "step": 25555 + }, + { + "epoch": 0.7503670209642375, + "grad_norm": 0.0, + "learning_rate": 3.0942061368233865e-06, + "loss": 1.1763, + "step": 25556 + }, + { + "epoch": 0.7503963826413764, + "grad_norm": 0.0, + "learning_rate": 3.0935183796901815e-06, + "loss": 1.3066, + "step": 25557 + }, + { + "epoch": 0.7504257443185155, + "grad_norm": 0.0, + "learning_rate": 3.092830685014058e-06, + "loss": 1.2764, + "step": 25558 + }, + { + "epoch": 0.7504551059956545, + "grad_norm": 0.0, + "learning_rate": 3.0921430528012355e-06, + "loss": 1.2188, + "step": 25559 + }, + { + "epoch": 0.7504844676727934, + "grad_norm": 0.0, + "learning_rate": 3.091455483057929e-06, + "loss": 1.0928, + "step": 25560 + }, + { + "epoch": 0.7505138293499325, + "grad_norm": 0.0, + "learning_rate": 3.090767975790364e-06, + "loss": 1.1157, + "step": 25561 + }, + { + "epoch": 0.7505431910270715, + "grad_norm": 0.0, + "learning_rate": 3.09008053100475e-06, + "loss": 1.3193, + "step": 25562 + }, + { + "epoch": 0.7505725527042104, + "grad_norm": 0.0, + "learning_rate": 3.0893931487073104e-06, + "loss": 1.1685, + "step": 25563 + }, + { + "epoch": 0.7506019143813495, + "grad_norm": 0.0, + "learning_rate": 3.0887058289042595e-06, + "loss": 1.2754, + "step": 25564 + }, + { + "epoch": 0.7506312760584885, + "grad_norm": 0.0, + "learning_rate": 3.0880185716018072e-06, + "loss": 1.2373, + "step": 25565 + }, + { + "epoch": 0.7506606377356274, + "grad_norm": 0.0, + "learning_rate": 3.0873313768061775e-06, + "loss": 1.2754, + "step": 25566 + }, + { + "epoch": 0.7506899994127665, + "grad_norm": 0.0, + "learning_rate": 3.08664424452358e-06, + "loss": 1.3135, + "step": 25567 + }, + { + "epoch": 0.7507193610899054, + "grad_norm": 0.0, + "learning_rate": 3.085957174760229e-06, + "loss": 1.1362, + "step": 25568 + }, + { + "epoch": 0.7507487227670444, + "grad_norm": 0.0, + "learning_rate": 3.085270167522335e-06, + "loss": 1.2373, + "step": 25569 + }, + { + "epoch": 0.7507780844441835, + "grad_norm": 0.0, + "learning_rate": 3.0845832228161167e-06, + "loss": 1.1611, + "step": 25570 + }, + { + "epoch": 0.7508074461213224, + "grad_norm": 0.0, + "learning_rate": 3.0838963406477795e-06, + "loss": 1.2573, + "step": 25571 + }, + { + "epoch": 0.7508368077984614, + "grad_norm": 0.0, + "learning_rate": 3.0832095210235434e-06, + "loss": 1.293, + "step": 25572 + }, + { + "epoch": 0.7508661694756005, + "grad_norm": 0.0, + "learning_rate": 3.082522763949615e-06, + "loss": 1.3789, + "step": 25573 + }, + { + "epoch": 0.7508955311527394, + "grad_norm": 0.0, + "learning_rate": 3.081836069432204e-06, + "loss": 1.207, + "step": 25574 + }, + { + "epoch": 0.7509248928298784, + "grad_norm": 0.0, + "learning_rate": 3.081149437477522e-06, + "loss": 1.2402, + "step": 25575 + }, + { + "epoch": 0.7509542545070175, + "grad_norm": 0.0, + "learning_rate": 3.0804628680917734e-06, + "loss": 1.1138, + "step": 25576 + }, + { + "epoch": 0.7509836161841564, + "grad_norm": 0.0, + "learning_rate": 3.079776361281175e-06, + "loss": 1.2612, + "step": 25577 + }, + { + "epoch": 0.7510129778612954, + "grad_norm": 0.0, + "learning_rate": 3.0790899170519282e-06, + "loss": 1.165, + "step": 25578 + }, + { + "epoch": 0.7510423395384345, + "grad_norm": 0.0, + "learning_rate": 3.078403535410247e-06, + "loss": 1.2627, + "step": 25579 + }, + { + "epoch": 0.7510717012155734, + "grad_norm": 0.0, + "learning_rate": 3.0777172163623314e-06, + "loss": 1.1045, + "step": 25580 + }, + { + "epoch": 0.7511010628927124, + "grad_norm": 0.0, + "learning_rate": 3.0770309599143955e-06, + "loss": 1.2456, + "step": 25581 + }, + { + "epoch": 0.7511304245698515, + "grad_norm": 0.0, + "learning_rate": 3.076344766072642e-06, + "loss": 1.2393, + "step": 25582 + }, + { + "epoch": 0.7511597862469904, + "grad_norm": 0.0, + "learning_rate": 3.075658634843276e-06, + "loss": 1.2539, + "step": 25583 + }, + { + "epoch": 0.7511891479241294, + "grad_norm": 0.0, + "learning_rate": 3.0749725662325012e-06, + "loss": 1.2686, + "step": 25584 + }, + { + "epoch": 0.7512185096012685, + "grad_norm": 0.0, + "learning_rate": 3.074286560246521e-06, + "loss": 1.2217, + "step": 25585 + }, + { + "epoch": 0.7512478712784074, + "grad_norm": 0.0, + "learning_rate": 3.0736006168915435e-06, + "loss": 1.1699, + "step": 25586 + }, + { + "epoch": 0.7512772329555464, + "grad_norm": 0.0, + "learning_rate": 3.0729147361737666e-06, + "loss": 1.1045, + "step": 25587 + }, + { + "epoch": 0.7513065946326855, + "grad_norm": 0.0, + "learning_rate": 3.072228918099399e-06, + "loss": 1.2725, + "step": 25588 + }, + { + "epoch": 0.7513359563098244, + "grad_norm": 0.0, + "learning_rate": 3.0715431626746374e-06, + "loss": 1.3398, + "step": 25589 + }, + { + "epoch": 0.7513653179869634, + "grad_norm": 0.0, + "learning_rate": 3.070857469905687e-06, + "loss": 1.2803, + "step": 25590 + }, + { + "epoch": 0.7513946796641024, + "grad_norm": 0.0, + "learning_rate": 3.0701718397987477e-06, + "loss": 1.2959, + "step": 25591 + }, + { + "epoch": 0.7514240413412414, + "grad_norm": 0.0, + "learning_rate": 3.0694862723600193e-06, + "loss": 1.2832, + "step": 25592 + }, + { + "epoch": 0.7514534030183804, + "grad_norm": 0.0, + "learning_rate": 3.068800767595701e-06, + "loss": 1.2939, + "step": 25593 + }, + { + "epoch": 0.7514827646955194, + "grad_norm": 0.0, + "learning_rate": 3.06811532551199e-06, + "loss": 1.3467, + "step": 25594 + }, + { + "epoch": 0.7515121263726584, + "grad_norm": 0.0, + "learning_rate": 3.0674299461150902e-06, + "loss": 1.335, + "step": 25595 + }, + { + "epoch": 0.7515414880497974, + "grad_norm": 0.0, + "learning_rate": 3.0667446294111936e-06, + "loss": 1.3037, + "step": 25596 + }, + { + "epoch": 0.7515708497269364, + "grad_norm": 0.0, + "learning_rate": 3.0660593754065037e-06, + "loss": 1.3105, + "step": 25597 + }, + { + "epoch": 0.7516002114040754, + "grad_norm": 0.0, + "learning_rate": 3.0653741841072147e-06, + "loss": 1.2095, + "step": 25598 + }, + { + "epoch": 0.7516295730812144, + "grad_norm": 0.0, + "learning_rate": 3.064689055519524e-06, + "loss": 1.1558, + "step": 25599 + }, + { + "epoch": 0.7516589347583534, + "grad_norm": 0.0, + "learning_rate": 3.064003989649622e-06, + "loss": 1.0713, + "step": 25600 + }, + { + "epoch": 0.7516882964354924, + "grad_norm": 0.0, + "learning_rate": 3.0633189865037127e-06, + "loss": 1.2705, + "step": 25601 + }, + { + "epoch": 0.7517176581126314, + "grad_norm": 0.0, + "learning_rate": 3.0626340460879823e-06, + "loss": 1.3398, + "step": 25602 + }, + { + "epoch": 0.7517470197897704, + "grad_norm": 0.0, + "learning_rate": 3.0619491684086323e-06, + "loss": 1.1904, + "step": 25603 + }, + { + "epoch": 0.7517763814669094, + "grad_norm": 0.0, + "learning_rate": 3.0612643534718524e-06, + "loss": 1.0703, + "step": 25604 + }, + { + "epoch": 0.7518057431440484, + "grad_norm": 0.0, + "learning_rate": 3.060579601283833e-06, + "loss": 1.4004, + "step": 25605 + }, + { + "epoch": 0.7518351048211874, + "grad_norm": 0.0, + "learning_rate": 3.059894911850774e-06, + "loss": 1.2368, + "step": 25606 + }, + { + "epoch": 0.7518644664983264, + "grad_norm": 0.0, + "learning_rate": 3.059210285178861e-06, + "loss": 1.2227, + "step": 25607 + }, + { + "epoch": 0.7518938281754654, + "grad_norm": 0.0, + "learning_rate": 3.058525721274287e-06, + "loss": 1.2031, + "step": 25608 + }, + { + "epoch": 0.7519231898526044, + "grad_norm": 0.0, + "learning_rate": 3.0578412201432407e-06, + "loss": 1.2002, + "step": 25609 + }, + { + "epoch": 0.7519525515297434, + "grad_norm": 0.0, + "learning_rate": 3.0571567817919156e-06, + "loss": 1.166, + "step": 25610 + }, + { + "epoch": 0.7519819132068823, + "grad_norm": 0.0, + "learning_rate": 3.0564724062264982e-06, + "loss": 1.123, + "step": 25611 + }, + { + "epoch": 0.7520112748840214, + "grad_norm": 0.0, + "learning_rate": 3.055788093453181e-06, + "loss": 1.2251, + "step": 25612 + }, + { + "epoch": 0.7520406365611604, + "grad_norm": 0.0, + "learning_rate": 3.055103843478151e-06, + "loss": 1.2305, + "step": 25613 + }, + { + "epoch": 0.7520699982382993, + "grad_norm": 0.0, + "learning_rate": 3.0544196563075933e-06, + "loss": 1.1982, + "step": 25614 + }, + { + "epoch": 0.7520993599154384, + "grad_norm": 0.0, + "learning_rate": 3.0537355319477e-06, + "loss": 1.1826, + "step": 25615 + }, + { + "epoch": 0.7521287215925774, + "grad_norm": 0.0, + "learning_rate": 3.0530514704046543e-06, + "loss": 1.2012, + "step": 25616 + }, + { + "epoch": 0.7521580832697163, + "grad_norm": 0.0, + "learning_rate": 3.052367471684644e-06, + "loss": 1.2529, + "step": 25617 + }, + { + "epoch": 0.7521874449468554, + "grad_norm": 0.0, + "learning_rate": 3.051683535793851e-06, + "loss": 1.2871, + "step": 25618 + }, + { + "epoch": 0.7522168066239944, + "grad_norm": 0.0, + "learning_rate": 3.0509996627384665e-06, + "loss": 1.187, + "step": 25619 + }, + { + "epoch": 0.7522461683011333, + "grad_norm": 0.0, + "learning_rate": 3.050315852524669e-06, + "loss": 1.167, + "step": 25620 + }, + { + "epoch": 0.7522755299782724, + "grad_norm": 0.0, + "learning_rate": 3.0496321051586475e-06, + "loss": 1.1514, + "step": 25621 + }, + { + "epoch": 0.7523048916554114, + "grad_norm": 0.0, + "learning_rate": 3.0489484206465826e-06, + "loss": 1.2534, + "step": 25622 + }, + { + "epoch": 0.7523342533325503, + "grad_norm": 0.0, + "learning_rate": 3.0482647989946578e-06, + "loss": 1.041, + "step": 25623 + }, + { + "epoch": 0.7523636150096894, + "grad_norm": 0.0, + "learning_rate": 3.0475812402090553e-06, + "loss": 1.2617, + "step": 25624 + }, + { + "epoch": 0.7523929766868284, + "grad_norm": 0.0, + "learning_rate": 3.0468977442959523e-06, + "loss": 1.2832, + "step": 25625 + }, + { + "epoch": 0.7524223383639673, + "grad_norm": 0.0, + "learning_rate": 3.046214311261537e-06, + "loss": 1.2676, + "step": 25626 + }, + { + "epoch": 0.7524517000411064, + "grad_norm": 0.0, + "learning_rate": 3.045530941111984e-06, + "loss": 1.1729, + "step": 25627 + }, + { + "epoch": 0.7524810617182454, + "grad_norm": 0.0, + "learning_rate": 3.0448476338534784e-06, + "loss": 1.1616, + "step": 25628 + }, + { + "epoch": 0.7525104233953843, + "grad_norm": 0.0, + "learning_rate": 3.0441643894921934e-06, + "loss": 1.3467, + "step": 25629 + }, + { + "epoch": 0.7525397850725234, + "grad_norm": 0.0, + "learning_rate": 3.0434812080343148e-06, + "loss": 1.1997, + "step": 25630 + }, + { + "epoch": 0.7525691467496624, + "grad_norm": 0.0, + "learning_rate": 3.0427980894860166e-06, + "loss": 1.2285, + "step": 25631 + }, + { + "epoch": 0.7525985084268013, + "grad_norm": 0.0, + "learning_rate": 3.042115033853478e-06, + "loss": 1.2559, + "step": 25632 + }, + { + "epoch": 0.7526278701039404, + "grad_norm": 0.0, + "learning_rate": 3.0414320411428744e-06, + "loss": 1.0229, + "step": 25633 + }, + { + "epoch": 0.7526572317810793, + "grad_norm": 0.0, + "learning_rate": 3.0407491113603802e-06, + "loss": 1.1216, + "step": 25634 + }, + { + "epoch": 0.7526865934582183, + "grad_norm": 0.0, + "learning_rate": 3.0400662445121774e-06, + "loss": 1.2861, + "step": 25635 + }, + { + "epoch": 0.7527159551353574, + "grad_norm": 0.0, + "learning_rate": 3.0393834406044344e-06, + "loss": 1.3408, + "step": 25636 + }, + { + "epoch": 0.7527453168124963, + "grad_norm": 0.0, + "learning_rate": 3.0387006996433333e-06, + "loss": 1.1577, + "step": 25637 + }, + { + "epoch": 0.7527746784896353, + "grad_norm": 0.0, + "learning_rate": 3.038018021635041e-06, + "loss": 1.248, + "step": 25638 + }, + { + "epoch": 0.7528040401667744, + "grad_norm": 0.0, + "learning_rate": 3.0373354065857373e-06, + "loss": 1.2026, + "step": 25639 + }, + { + "epoch": 0.7528334018439133, + "grad_norm": 0.0, + "learning_rate": 3.036652854501594e-06, + "loss": 1.2773, + "step": 25640 + }, + { + "epoch": 0.7528627635210523, + "grad_norm": 0.0, + "learning_rate": 3.0359703653887816e-06, + "loss": 1.1523, + "step": 25641 + }, + { + "epoch": 0.7528921251981914, + "grad_norm": 0.0, + "learning_rate": 3.035287939253473e-06, + "loss": 1.2334, + "step": 25642 + }, + { + "epoch": 0.7529214868753303, + "grad_norm": 0.0, + "learning_rate": 3.0346055761018354e-06, + "loss": 1.2295, + "step": 25643 + }, + { + "epoch": 0.7529508485524693, + "grad_norm": 0.0, + "learning_rate": 3.033923275940047e-06, + "loss": 1.2061, + "step": 25644 + }, + { + "epoch": 0.7529802102296083, + "grad_norm": 0.0, + "learning_rate": 3.0332410387742707e-06, + "loss": 1.2637, + "step": 25645 + }, + { + "epoch": 0.7530095719067473, + "grad_norm": 0.0, + "learning_rate": 3.032558864610684e-06, + "loss": 1.3906, + "step": 25646 + }, + { + "epoch": 0.7530389335838863, + "grad_norm": 0.0, + "learning_rate": 3.0318767534554517e-06, + "loss": 1.1875, + "step": 25647 + }, + { + "epoch": 0.7530682952610253, + "grad_norm": 0.0, + "learning_rate": 3.031194705314742e-06, + "loss": 1.1553, + "step": 25648 + }, + { + "epoch": 0.7530976569381643, + "grad_norm": 0.0, + "learning_rate": 3.0305127201947206e-06, + "loss": 1.229, + "step": 25649 + }, + { + "epoch": 0.7531270186153033, + "grad_norm": 0.0, + "learning_rate": 3.029830798101561e-06, + "loss": 1.2764, + "step": 25650 + }, + { + "epoch": 0.7531563802924423, + "grad_norm": 0.0, + "learning_rate": 3.029148939041423e-06, + "loss": 1.1299, + "step": 25651 + }, + { + "epoch": 0.7531857419695813, + "grad_norm": 0.0, + "learning_rate": 3.0284671430204794e-06, + "loss": 1.2437, + "step": 25652 + }, + { + "epoch": 0.7532151036467203, + "grad_norm": 0.0, + "learning_rate": 3.0277854100448934e-06, + "loss": 1.2231, + "step": 25653 + }, + { + "epoch": 0.7532444653238592, + "grad_norm": 0.0, + "learning_rate": 3.0271037401208257e-06, + "loss": 1.1558, + "step": 25654 + }, + { + "epoch": 0.7532738270009983, + "grad_norm": 0.0, + "learning_rate": 3.0264221332544484e-06, + "loss": 1.1973, + "step": 25655 + }, + { + "epoch": 0.7533031886781373, + "grad_norm": 0.0, + "learning_rate": 3.025740589451921e-06, + "loss": 1.3105, + "step": 25656 + }, + { + "epoch": 0.7533325503552762, + "grad_norm": 0.0, + "learning_rate": 3.025059108719408e-06, + "loss": 1.3828, + "step": 25657 + }, + { + "epoch": 0.7533619120324153, + "grad_norm": 0.0, + "learning_rate": 3.0243776910630682e-06, + "loss": 1.2256, + "step": 25658 + }, + { + "epoch": 0.7533912737095543, + "grad_norm": 0.0, + "learning_rate": 3.0236963364890715e-06, + "loss": 1.1572, + "step": 25659 + }, + { + "epoch": 0.7534206353866932, + "grad_norm": 0.0, + "learning_rate": 3.0230150450035723e-06, + "loss": 1.1035, + "step": 25660 + }, + { + "epoch": 0.7534499970638323, + "grad_norm": 0.0, + "learning_rate": 3.0223338166127382e-06, + "loss": 1.2207, + "step": 25661 + }, + { + "epoch": 0.7534793587409713, + "grad_norm": 0.0, + "learning_rate": 3.021652651322725e-06, + "loss": 1.2861, + "step": 25662 + }, + { + "epoch": 0.7535087204181102, + "grad_norm": 0.0, + "learning_rate": 3.020971549139692e-06, + "loss": 1.2178, + "step": 25663 + }, + { + "epoch": 0.7535380820952493, + "grad_norm": 0.0, + "learning_rate": 3.0202905100698033e-06, + "loss": 1.1948, + "step": 25664 + }, + { + "epoch": 0.7535674437723883, + "grad_norm": 0.0, + "learning_rate": 3.0196095341192156e-06, + "loss": 1.1235, + "step": 25665 + }, + { + "epoch": 0.7535968054495272, + "grad_norm": 0.0, + "learning_rate": 3.018928621294086e-06, + "loss": 1.2959, + "step": 25666 + }, + { + "epoch": 0.7536261671266663, + "grad_norm": 0.0, + "learning_rate": 3.01824777160057e-06, + "loss": 1.207, + "step": 25667 + }, + { + "epoch": 0.7536555288038053, + "grad_norm": 0.0, + "learning_rate": 3.0175669850448306e-06, + "loss": 1.1357, + "step": 25668 + }, + { + "epoch": 0.7536848904809442, + "grad_norm": 0.0, + "learning_rate": 3.0168862616330173e-06, + "loss": 1.2441, + "step": 25669 + }, + { + "epoch": 0.7537142521580833, + "grad_norm": 0.0, + "learning_rate": 3.0162056013712947e-06, + "loss": 1.2334, + "step": 25670 + }, + { + "epoch": 0.7537436138352223, + "grad_norm": 0.0, + "learning_rate": 3.015525004265809e-06, + "loss": 1.2544, + "step": 25671 + }, + { + "epoch": 0.7537729755123612, + "grad_norm": 0.0, + "learning_rate": 3.0148444703227264e-06, + "loss": 1.2803, + "step": 25672 + }, + { + "epoch": 0.7538023371895003, + "grad_norm": 0.0, + "learning_rate": 3.01416399954819e-06, + "loss": 1.0493, + "step": 25673 + }, + { + "epoch": 0.7538316988666393, + "grad_norm": 0.0, + "learning_rate": 3.0134835919483554e-06, + "loss": 1.1011, + "step": 25674 + }, + { + "epoch": 0.7538610605437782, + "grad_norm": 0.0, + "learning_rate": 3.012803247529381e-06, + "loss": 1.2246, + "step": 25675 + }, + { + "epoch": 0.7538904222209173, + "grad_norm": 0.0, + "learning_rate": 3.0121229662974128e-06, + "loss": 1.1021, + "step": 25676 + }, + { + "epoch": 0.7539197838980563, + "grad_norm": 0.0, + "learning_rate": 3.0114427482586095e-06, + "loss": 1.1963, + "step": 25677 + }, + { + "epoch": 0.7539491455751952, + "grad_norm": 0.0, + "learning_rate": 3.0107625934191163e-06, + "loss": 1.145, + "step": 25678 + }, + { + "epoch": 0.7539785072523343, + "grad_norm": 0.0, + "learning_rate": 3.01008250178509e-06, + "loss": 1.2666, + "step": 25679 + }, + { + "epoch": 0.7540078689294732, + "grad_norm": 0.0, + "learning_rate": 3.009402473362677e-06, + "loss": 1.2832, + "step": 25680 + }, + { + "epoch": 0.7540372306066122, + "grad_norm": 0.0, + "learning_rate": 3.0087225081580284e-06, + "loss": 1.1821, + "step": 25681 + }, + { + "epoch": 0.7540665922837513, + "grad_norm": 0.0, + "learning_rate": 3.0080426061772937e-06, + "loss": 1.2085, + "step": 25682 + }, + { + "epoch": 0.7540959539608902, + "grad_norm": 0.0, + "learning_rate": 3.007362767426616e-06, + "loss": 1.4248, + "step": 25683 + }, + { + "epoch": 0.7541253156380292, + "grad_norm": 0.0, + "learning_rate": 3.006682991912151e-06, + "loss": 1.1509, + "step": 25684 + }, + { + "epoch": 0.7541546773151683, + "grad_norm": 0.0, + "learning_rate": 3.0060032796400395e-06, + "loss": 1.3066, + "step": 25685 + }, + { + "epoch": 0.7541840389923072, + "grad_norm": 0.0, + "learning_rate": 3.005323630616435e-06, + "loss": 1.2773, + "step": 25686 + }, + { + "epoch": 0.7542134006694462, + "grad_norm": 0.0, + "learning_rate": 3.004644044847477e-06, + "loss": 1.1895, + "step": 25687 + }, + { + "epoch": 0.7542427623465853, + "grad_norm": 0.0, + "learning_rate": 3.003964522339318e-06, + "loss": 1.3467, + "step": 25688 + }, + { + "epoch": 0.7542721240237242, + "grad_norm": 0.0, + "learning_rate": 3.003285063098098e-06, + "loss": 1.2236, + "step": 25689 + }, + { + "epoch": 0.7543014857008632, + "grad_norm": 0.0, + "learning_rate": 3.0026056671299643e-06, + "loss": 1.2212, + "step": 25690 + }, + { + "epoch": 0.7543308473780023, + "grad_norm": 0.0, + "learning_rate": 3.00192633444106e-06, + "loss": 1.2007, + "step": 25691 + }, + { + "epoch": 0.7543602090551412, + "grad_norm": 0.0, + "learning_rate": 3.0012470650375237e-06, + "loss": 1.2056, + "step": 25692 + }, + { + "epoch": 0.7543895707322802, + "grad_norm": 0.0, + "learning_rate": 3.000567858925506e-06, + "loss": 1.1357, + "step": 25693 + }, + { + "epoch": 0.7544189324094193, + "grad_norm": 0.0, + "learning_rate": 2.9998887161111434e-06, + "loss": 1.1934, + "step": 25694 + }, + { + "epoch": 0.7544482940865582, + "grad_norm": 0.0, + "learning_rate": 2.999209636600583e-06, + "loss": 1.2104, + "step": 25695 + }, + { + "epoch": 0.7544776557636972, + "grad_norm": 0.0, + "learning_rate": 2.9985306203999587e-06, + "loss": 1.1343, + "step": 25696 + }, + { + "epoch": 0.7545070174408363, + "grad_norm": 0.0, + "learning_rate": 2.9978516675154223e-06, + "loss": 1.1294, + "step": 25697 + }, + { + "epoch": 0.7545363791179752, + "grad_norm": 0.0, + "learning_rate": 2.9971727779531002e-06, + "loss": 1.1387, + "step": 25698 + }, + { + "epoch": 0.7545657407951142, + "grad_norm": 0.0, + "learning_rate": 2.9964939517191405e-06, + "loss": 1.1416, + "step": 25699 + }, + { + "epoch": 0.7545951024722533, + "grad_norm": 0.0, + "learning_rate": 2.9958151888196774e-06, + "loss": 1.2856, + "step": 25700 + }, + { + "epoch": 0.7546244641493922, + "grad_norm": 0.0, + "learning_rate": 2.9951364892608536e-06, + "loss": 1.1885, + "step": 25701 + }, + { + "epoch": 0.7546538258265312, + "grad_norm": 0.0, + "learning_rate": 2.9944578530488045e-06, + "loss": 1.2529, + "step": 25702 + }, + { + "epoch": 0.7546831875036702, + "grad_norm": 0.0, + "learning_rate": 2.993779280189665e-06, + "loss": 1.2168, + "step": 25703 + }, + { + "epoch": 0.7547125491808092, + "grad_norm": 0.0, + "learning_rate": 2.9931007706895765e-06, + "loss": 1.1328, + "step": 25704 + }, + { + "epoch": 0.7547419108579482, + "grad_norm": 0.0, + "learning_rate": 2.9924223245546723e-06, + "loss": 1.3398, + "step": 25705 + }, + { + "epoch": 0.7547712725350872, + "grad_norm": 0.0, + "learning_rate": 2.991743941791088e-06, + "loss": 1.0918, + "step": 25706 + }, + { + "epoch": 0.7548006342122262, + "grad_norm": 0.0, + "learning_rate": 2.991065622404954e-06, + "loss": 1.3486, + "step": 25707 + }, + { + "epoch": 0.7548299958893652, + "grad_norm": 0.0, + "learning_rate": 2.9903873664024117e-06, + "loss": 1.2451, + "step": 25708 + }, + { + "epoch": 0.7548593575665042, + "grad_norm": 0.0, + "learning_rate": 2.989709173789589e-06, + "loss": 1.1909, + "step": 25709 + }, + { + "epoch": 0.7548887192436432, + "grad_norm": 0.0, + "learning_rate": 2.9890310445726244e-06, + "loss": 1.3584, + "step": 25710 + }, + { + "epoch": 0.7549180809207822, + "grad_norm": 0.0, + "learning_rate": 2.988352978757648e-06, + "loss": 1.2563, + "step": 25711 + }, + { + "epoch": 0.7549474425979212, + "grad_norm": 0.0, + "learning_rate": 2.9876749763507886e-06, + "loss": 1.2485, + "step": 25712 + }, + { + "epoch": 0.7549768042750602, + "grad_norm": 0.0, + "learning_rate": 2.9869970373581824e-06, + "loss": 1.3037, + "step": 25713 + }, + { + "epoch": 0.7550061659521992, + "grad_norm": 0.0, + "learning_rate": 2.986319161785959e-06, + "loss": 1.125, + "step": 25714 + }, + { + "epoch": 0.7550355276293382, + "grad_norm": 0.0, + "learning_rate": 2.985641349640247e-06, + "loss": 1.3154, + "step": 25715 + }, + { + "epoch": 0.7550648893064772, + "grad_norm": 0.0, + "learning_rate": 2.9849636009271744e-06, + "loss": 1.4326, + "step": 25716 + }, + { + "epoch": 0.7550942509836162, + "grad_norm": 0.0, + "learning_rate": 2.9842859156528746e-06, + "loss": 1.1528, + "step": 25717 + }, + { + "epoch": 0.7551236126607552, + "grad_norm": 0.0, + "learning_rate": 2.983608293823471e-06, + "loss": 1.3477, + "step": 25718 + }, + { + "epoch": 0.7551529743378942, + "grad_norm": 0.0, + "learning_rate": 2.9829307354450985e-06, + "loss": 1.2158, + "step": 25719 + }, + { + "epoch": 0.7551823360150332, + "grad_norm": 0.0, + "learning_rate": 2.9822532405238768e-06, + "loss": 1.209, + "step": 25720 + }, + { + "epoch": 0.7552116976921722, + "grad_norm": 0.0, + "learning_rate": 2.9815758090659396e-06, + "loss": 1.2109, + "step": 25721 + }, + { + "epoch": 0.7552410593693112, + "grad_norm": 0.0, + "learning_rate": 2.9808984410774123e-06, + "loss": 1.2793, + "step": 25722 + }, + { + "epoch": 0.7552704210464501, + "grad_norm": 0.0, + "learning_rate": 2.980221136564413e-06, + "loss": 1.2314, + "step": 25723 + }, + { + "epoch": 0.7552997827235892, + "grad_norm": 0.0, + "learning_rate": 2.9795438955330735e-06, + "loss": 1.2197, + "step": 25724 + }, + { + "epoch": 0.7553291444007282, + "grad_norm": 0.0, + "learning_rate": 2.9788667179895135e-06, + "loss": 1.2793, + "step": 25725 + }, + { + "epoch": 0.7553585060778671, + "grad_norm": 0.0, + "learning_rate": 2.9781896039398637e-06, + "loss": 1.2979, + "step": 25726 + }, + { + "epoch": 0.7553878677550062, + "grad_norm": 0.0, + "learning_rate": 2.9775125533902403e-06, + "loss": 1.2402, + "step": 25727 + }, + { + "epoch": 0.7554172294321452, + "grad_norm": 0.0, + "learning_rate": 2.976835566346773e-06, + "loss": 1.231, + "step": 25728 + }, + { + "epoch": 0.7554465911092841, + "grad_norm": 0.0, + "learning_rate": 2.9761586428155797e-06, + "loss": 1.3262, + "step": 25729 + }, + { + "epoch": 0.7554759527864232, + "grad_norm": 0.0, + "learning_rate": 2.9754817828027826e-06, + "loss": 1.3354, + "step": 25730 + }, + { + "epoch": 0.7555053144635622, + "grad_norm": 0.0, + "learning_rate": 2.9748049863145033e-06, + "loss": 1.0938, + "step": 25731 + }, + { + "epoch": 0.7555346761407011, + "grad_norm": 0.0, + "learning_rate": 2.9741282533568583e-06, + "loss": 1.2188, + "step": 25732 + }, + { + "epoch": 0.7555640378178402, + "grad_norm": 0.0, + "learning_rate": 2.9734515839359733e-06, + "loss": 1.2207, + "step": 25733 + }, + { + "epoch": 0.7555933994949792, + "grad_norm": 0.0, + "learning_rate": 2.9727749780579627e-06, + "loss": 1.2363, + "step": 25734 + }, + { + "epoch": 0.7556227611721181, + "grad_norm": 0.0, + "learning_rate": 2.9720984357289504e-06, + "loss": 1.1025, + "step": 25735 + }, + { + "epoch": 0.7556521228492572, + "grad_norm": 0.0, + "learning_rate": 2.9714219569550496e-06, + "loss": 1.1929, + "step": 25736 + }, + { + "epoch": 0.7556814845263962, + "grad_norm": 0.0, + "learning_rate": 2.9707455417423826e-06, + "loss": 1.3281, + "step": 25737 + }, + { + "epoch": 0.7557108462035351, + "grad_norm": 0.0, + "learning_rate": 2.9700691900970636e-06, + "loss": 1.3975, + "step": 25738 + }, + { + "epoch": 0.7557402078806742, + "grad_norm": 0.0, + "learning_rate": 2.96939290202521e-06, + "loss": 1.3345, + "step": 25739 + }, + { + "epoch": 0.7557695695578132, + "grad_norm": 0.0, + "learning_rate": 2.968716677532937e-06, + "loss": 1.2773, + "step": 25740 + }, + { + "epoch": 0.7557989312349521, + "grad_norm": 0.0, + "learning_rate": 2.968040516626356e-06, + "loss": 1.3037, + "step": 25741 + }, + { + "epoch": 0.7558282929120912, + "grad_norm": 0.0, + "learning_rate": 2.967364419311589e-06, + "loss": 1.3115, + "step": 25742 + }, + { + "epoch": 0.7558576545892302, + "grad_norm": 0.0, + "learning_rate": 2.9666883855947438e-06, + "loss": 1.2041, + "step": 25743 + }, + { + "epoch": 0.7558870162663691, + "grad_norm": 0.0, + "learning_rate": 2.966012415481939e-06, + "loss": 1.1299, + "step": 25744 + }, + { + "epoch": 0.7559163779435082, + "grad_norm": 0.0, + "learning_rate": 2.965336508979283e-06, + "loss": 1.3262, + "step": 25745 + }, + { + "epoch": 0.7559457396206472, + "grad_norm": 0.0, + "learning_rate": 2.9646606660928933e-06, + "loss": 1.2627, + "step": 25746 + }, + { + "epoch": 0.7559751012977861, + "grad_norm": 0.0, + "learning_rate": 2.963984886828879e-06, + "loss": 1.271, + "step": 25747 + }, + { + "epoch": 0.7560044629749251, + "grad_norm": 0.0, + "learning_rate": 2.9633091711933514e-06, + "loss": 1.2178, + "step": 25748 + }, + { + "epoch": 0.7560338246520641, + "grad_norm": 0.0, + "learning_rate": 2.9626335191924183e-06, + "loss": 1.1807, + "step": 25749 + }, + { + "epoch": 0.7560631863292031, + "grad_norm": 0.0, + "learning_rate": 2.9619579308321957e-06, + "loss": 1.4258, + "step": 25750 + }, + { + "epoch": 0.7560925480063421, + "grad_norm": 0.0, + "learning_rate": 2.9612824061187894e-06, + "loss": 1.0229, + "step": 25751 + }, + { + "epoch": 0.7561219096834811, + "grad_norm": 0.0, + "learning_rate": 2.9606069450583065e-06, + "loss": 1.2842, + "step": 25752 + }, + { + "epoch": 0.7561512713606201, + "grad_norm": 0.0, + "learning_rate": 2.95993154765686e-06, + "loss": 1.0972, + "step": 25753 + }, + { + "epoch": 0.7561806330377591, + "grad_norm": 0.0, + "learning_rate": 2.959256213920556e-06, + "loss": 1.2197, + "step": 25754 + }, + { + "epoch": 0.7562099947148981, + "grad_norm": 0.0, + "learning_rate": 2.9585809438555015e-06, + "loss": 1.1118, + "step": 25755 + }, + { + "epoch": 0.7562393563920371, + "grad_norm": 0.0, + "learning_rate": 2.957905737467799e-06, + "loss": 1.3096, + "step": 25756 + }, + { + "epoch": 0.7562687180691761, + "grad_norm": 0.0, + "learning_rate": 2.9572305947635626e-06, + "loss": 1.2119, + "step": 25757 + }, + { + "epoch": 0.7562980797463151, + "grad_norm": 0.0, + "learning_rate": 2.95655551574889e-06, + "loss": 1.2646, + "step": 25758 + }, + { + "epoch": 0.7563274414234541, + "grad_norm": 0.0, + "learning_rate": 2.955880500429893e-06, + "loss": 1.2549, + "step": 25759 + }, + { + "epoch": 0.7563568031005931, + "grad_norm": 0.0, + "learning_rate": 2.9552055488126717e-06, + "loss": 1.249, + "step": 25760 + }, + { + "epoch": 0.7563861647777321, + "grad_norm": 0.0, + "learning_rate": 2.9545306609033285e-06, + "loss": 1.1724, + "step": 25761 + }, + { + "epoch": 0.7564155264548711, + "grad_norm": 0.0, + "learning_rate": 2.9538558367079727e-06, + "loss": 1.333, + "step": 25762 + }, + { + "epoch": 0.75644488813201, + "grad_norm": 0.0, + "learning_rate": 2.953181076232702e-06, + "loss": 1.2549, + "step": 25763 + }, + { + "epoch": 0.7564742498091491, + "grad_norm": 0.0, + "learning_rate": 2.9525063794836194e-06, + "loss": 1.3418, + "step": 25764 + }, + { + "epoch": 0.7565036114862881, + "grad_norm": 0.0, + "learning_rate": 2.9518317464668244e-06, + "loss": 1.1909, + "step": 25765 + }, + { + "epoch": 0.756532973163427, + "grad_norm": 0.0, + "learning_rate": 2.9511571771884227e-06, + "loss": 1.1514, + "step": 25766 + }, + { + "epoch": 0.7565623348405661, + "grad_norm": 0.0, + "learning_rate": 2.9504826716545086e-06, + "loss": 1.2744, + "step": 25767 + }, + { + "epoch": 0.7565916965177051, + "grad_norm": 0.0, + "learning_rate": 2.9498082298711884e-06, + "loss": 1.1997, + "step": 25768 + }, + { + "epoch": 0.756621058194844, + "grad_norm": 0.0, + "learning_rate": 2.9491338518445545e-06, + "loss": 1.2471, + "step": 25769 + }, + { + "epoch": 0.7566504198719831, + "grad_norm": 0.0, + "learning_rate": 2.9484595375807125e-06, + "loss": 1.2559, + "step": 25770 + }, + { + "epoch": 0.7566797815491221, + "grad_norm": 0.0, + "learning_rate": 2.947785287085757e-06, + "loss": 1.3223, + "step": 25771 + }, + { + "epoch": 0.756709143226261, + "grad_norm": 0.0, + "learning_rate": 2.947111100365785e-06, + "loss": 1.1118, + "step": 25772 + }, + { + "epoch": 0.7567385049034001, + "grad_norm": 0.0, + "learning_rate": 2.9464369774268932e-06, + "loss": 1.3457, + "step": 25773 + }, + { + "epoch": 0.7567678665805391, + "grad_norm": 0.0, + "learning_rate": 2.9457629182751758e-06, + "loss": 1.2266, + "step": 25774 + }, + { + "epoch": 0.756797228257678, + "grad_norm": 0.0, + "learning_rate": 2.9450889229167334e-06, + "loss": 1.1343, + "step": 25775 + }, + { + "epoch": 0.7568265899348171, + "grad_norm": 0.0, + "learning_rate": 2.944414991357656e-06, + "loss": 1.1226, + "step": 25776 + }, + { + "epoch": 0.7568559516119561, + "grad_norm": 0.0, + "learning_rate": 2.943741123604044e-06, + "loss": 1.2822, + "step": 25777 + }, + { + "epoch": 0.756885313289095, + "grad_norm": 0.0, + "learning_rate": 2.9430673196619876e-06, + "loss": 1.3018, + "step": 25778 + }, + { + "epoch": 0.7569146749662341, + "grad_norm": 0.0, + "learning_rate": 2.9423935795375814e-06, + "loss": 1.3062, + "step": 25779 + }, + { + "epoch": 0.7569440366433731, + "grad_norm": 0.0, + "learning_rate": 2.941719903236917e-06, + "loss": 1.1382, + "step": 25780 + }, + { + "epoch": 0.756973398320512, + "grad_norm": 0.0, + "learning_rate": 2.941046290766084e-06, + "loss": 1.1812, + "step": 25781 + }, + { + "epoch": 0.7570027599976511, + "grad_norm": 0.0, + "learning_rate": 2.9403727421311803e-06, + "loss": 1.2979, + "step": 25782 + }, + { + "epoch": 0.7570321216747901, + "grad_norm": 0.0, + "learning_rate": 2.939699257338291e-06, + "loss": 1.2021, + "step": 25783 + }, + { + "epoch": 0.757061483351929, + "grad_norm": 0.0, + "learning_rate": 2.939025836393513e-06, + "loss": 1.3545, + "step": 25784 + }, + { + "epoch": 0.7570908450290681, + "grad_norm": 0.0, + "learning_rate": 2.9383524793029293e-06, + "loss": 1.3984, + "step": 25785 + }, + { + "epoch": 0.757120206706207, + "grad_norm": 0.0, + "learning_rate": 2.937679186072635e-06, + "loss": 1.2173, + "step": 25786 + }, + { + "epoch": 0.757149568383346, + "grad_norm": 0.0, + "learning_rate": 2.937005956708717e-06, + "loss": 1.2539, + "step": 25787 + }, + { + "epoch": 0.7571789300604851, + "grad_norm": 0.0, + "learning_rate": 2.9363327912172633e-06, + "loss": 1.146, + "step": 25788 + }, + { + "epoch": 0.757208291737624, + "grad_norm": 0.0, + "learning_rate": 2.9356596896043587e-06, + "loss": 1.2666, + "step": 25789 + }, + { + "epoch": 0.757237653414763, + "grad_norm": 0.0, + "learning_rate": 2.934986651876095e-06, + "loss": 1.248, + "step": 25790 + }, + { + "epoch": 0.7572670150919021, + "grad_norm": 0.0, + "learning_rate": 2.934313678038556e-06, + "loss": 1.1895, + "step": 25791 + }, + { + "epoch": 0.757296376769041, + "grad_norm": 0.0, + "learning_rate": 2.9336407680978254e-06, + "loss": 1.2842, + "step": 25792 + }, + { + "epoch": 0.75732573844618, + "grad_norm": 0.0, + "learning_rate": 2.932967922059995e-06, + "loss": 1.3525, + "step": 25793 + }, + { + "epoch": 0.7573551001233191, + "grad_norm": 0.0, + "learning_rate": 2.9322951399311405e-06, + "loss": 1.1978, + "step": 25794 + }, + { + "epoch": 0.757384461800458, + "grad_norm": 0.0, + "learning_rate": 2.931622421717356e-06, + "loss": 1.0234, + "step": 25795 + }, + { + "epoch": 0.757413823477597, + "grad_norm": 0.0, + "learning_rate": 2.930949767424719e-06, + "loss": 1.1401, + "step": 25796 + }, + { + "epoch": 0.7574431851547361, + "grad_norm": 0.0, + "learning_rate": 2.930277177059314e-06, + "loss": 1.252, + "step": 25797 + }, + { + "epoch": 0.757472546831875, + "grad_norm": 0.0, + "learning_rate": 2.9296046506272203e-06, + "loss": 1.1943, + "step": 25798 + }, + { + "epoch": 0.757501908509014, + "grad_norm": 0.0, + "learning_rate": 2.9289321881345257e-06, + "loss": 1.2158, + "step": 25799 + }, + { + "epoch": 0.7575312701861531, + "grad_norm": 0.0, + "learning_rate": 2.9282597895873078e-06, + "loss": 1.2754, + "step": 25800 + }, + { + "epoch": 0.757560631863292, + "grad_norm": 0.0, + "learning_rate": 2.927587454991645e-06, + "loss": 1.2803, + "step": 25801 + }, + { + "epoch": 0.757589993540431, + "grad_norm": 0.0, + "learning_rate": 2.9269151843536226e-06, + "loss": 1.2139, + "step": 25802 + }, + { + "epoch": 0.7576193552175701, + "grad_norm": 0.0, + "learning_rate": 2.9262429776793187e-06, + "loss": 1.1792, + "step": 25803 + }, + { + "epoch": 0.757648716894709, + "grad_norm": 0.0, + "learning_rate": 2.9255708349748103e-06, + "loss": 1.2705, + "step": 25804 + }, + { + "epoch": 0.757678078571848, + "grad_norm": 0.0, + "learning_rate": 2.924898756246174e-06, + "loss": 1.207, + "step": 25805 + }, + { + "epoch": 0.7577074402489871, + "grad_norm": 0.0, + "learning_rate": 2.924226741499493e-06, + "loss": 1.2007, + "step": 25806 + }, + { + "epoch": 0.757736801926126, + "grad_norm": 0.0, + "learning_rate": 2.923554790740838e-06, + "loss": 1.2656, + "step": 25807 + }, + { + "epoch": 0.757766163603265, + "grad_norm": 0.0, + "learning_rate": 2.922882903976293e-06, + "loss": 1.2568, + "step": 25808 + }, + { + "epoch": 0.7577955252804041, + "grad_norm": 0.0, + "learning_rate": 2.922211081211931e-06, + "loss": 1.3027, + "step": 25809 + }, + { + "epoch": 0.757824886957543, + "grad_norm": 0.0, + "learning_rate": 2.9215393224538233e-06, + "loss": 1.3379, + "step": 25810 + }, + { + "epoch": 0.757854248634682, + "grad_norm": 0.0, + "learning_rate": 2.9208676277080503e-06, + "loss": 1.3262, + "step": 25811 + }, + { + "epoch": 0.757883610311821, + "grad_norm": 0.0, + "learning_rate": 2.9201959969806857e-06, + "loss": 1.1655, + "step": 25812 + }, + { + "epoch": 0.75791297198896, + "grad_norm": 0.0, + "learning_rate": 2.9195244302778027e-06, + "loss": 1.3096, + "step": 25813 + }, + { + "epoch": 0.757942333666099, + "grad_norm": 0.0, + "learning_rate": 2.918852927605469e-06, + "loss": 1.0522, + "step": 25814 + }, + { + "epoch": 0.757971695343238, + "grad_norm": 0.0, + "learning_rate": 2.9181814889697647e-06, + "loss": 1.1064, + "step": 25815 + }, + { + "epoch": 0.758001057020377, + "grad_norm": 0.0, + "learning_rate": 2.917510114376756e-06, + "loss": 1.2607, + "step": 25816 + }, + { + "epoch": 0.758030418697516, + "grad_norm": 0.0, + "learning_rate": 2.916838803832521e-06, + "loss": 1.0654, + "step": 25817 + }, + { + "epoch": 0.758059780374655, + "grad_norm": 0.0, + "learning_rate": 2.916167557343124e-06, + "loss": 1.1216, + "step": 25818 + }, + { + "epoch": 0.758089142051794, + "grad_norm": 0.0, + "learning_rate": 2.9154963749146394e-06, + "loss": 1.2627, + "step": 25819 + }, + { + "epoch": 0.758118503728933, + "grad_norm": 0.0, + "learning_rate": 2.9148252565531364e-06, + "loss": 1.1953, + "step": 25820 + }, + { + "epoch": 0.758147865406072, + "grad_norm": 0.0, + "learning_rate": 2.914154202264683e-06, + "loss": 1.2773, + "step": 25821 + }, + { + "epoch": 0.758177227083211, + "grad_norm": 0.0, + "learning_rate": 2.913483212055348e-06, + "loss": 1.103, + "step": 25822 + }, + { + "epoch": 0.75820658876035, + "grad_norm": 0.0, + "learning_rate": 2.9128122859311957e-06, + "loss": 1.2412, + "step": 25823 + }, + { + "epoch": 0.758235950437489, + "grad_norm": 0.0, + "learning_rate": 2.9121414238983014e-06, + "loss": 1.189, + "step": 25824 + }, + { + "epoch": 0.758265312114628, + "grad_norm": 0.0, + "learning_rate": 2.9114706259627225e-06, + "loss": 1.2075, + "step": 25825 + }, + { + "epoch": 0.758294673791767, + "grad_norm": 0.0, + "learning_rate": 2.9107998921305347e-06, + "loss": 1.2051, + "step": 25826 + }, + { + "epoch": 0.758324035468906, + "grad_norm": 0.0, + "learning_rate": 2.9101292224077983e-06, + "loss": 1.2344, + "step": 25827 + }, + { + "epoch": 0.758353397146045, + "grad_norm": 0.0, + "learning_rate": 2.9094586168005788e-06, + "loss": 1.2598, + "step": 25828 + }, + { + "epoch": 0.758382758823184, + "grad_norm": 0.0, + "learning_rate": 2.9087880753149413e-06, + "loss": 1.1895, + "step": 25829 + }, + { + "epoch": 0.758412120500323, + "grad_norm": 0.0, + "learning_rate": 2.9081175979569464e-06, + "loss": 1.1875, + "step": 25830 + }, + { + "epoch": 0.758441482177462, + "grad_norm": 0.0, + "learning_rate": 2.9074471847326635e-06, + "loss": 1.2012, + "step": 25831 + }, + { + "epoch": 0.758470843854601, + "grad_norm": 0.0, + "learning_rate": 2.9067768356481485e-06, + "loss": 1.3711, + "step": 25832 + }, + { + "epoch": 0.75850020553174, + "grad_norm": 0.0, + "learning_rate": 2.906106550709471e-06, + "loss": 1.2251, + "step": 25833 + }, + { + "epoch": 0.758529567208879, + "grad_norm": 0.0, + "learning_rate": 2.905436329922685e-06, + "loss": 1.2939, + "step": 25834 + }, + { + "epoch": 0.758558928886018, + "grad_norm": 0.0, + "learning_rate": 2.9047661732938583e-06, + "loss": 1.1943, + "step": 25835 + }, + { + "epoch": 0.758588290563157, + "grad_norm": 0.0, + "learning_rate": 2.9040960808290488e-06, + "loss": 1.1572, + "step": 25836 + }, + { + "epoch": 0.758617652240296, + "grad_norm": 0.0, + "learning_rate": 2.903426052534315e-06, + "loss": 1.1948, + "step": 25837 + }, + { + "epoch": 0.7586470139174349, + "grad_norm": 0.0, + "learning_rate": 2.902756088415715e-06, + "loss": 1.105, + "step": 25838 + }, + { + "epoch": 0.758676375594574, + "grad_norm": 0.0, + "learning_rate": 2.9020861884793105e-06, + "loss": 1.2441, + "step": 25839 + }, + { + "epoch": 0.758705737271713, + "grad_norm": 0.0, + "learning_rate": 2.90141635273116e-06, + "loss": 1.3438, + "step": 25840 + }, + { + "epoch": 0.7587350989488519, + "grad_norm": 0.0, + "learning_rate": 2.900746581177316e-06, + "loss": 1.2891, + "step": 25841 + }, + { + "epoch": 0.758764460625991, + "grad_norm": 0.0, + "learning_rate": 2.9000768738238414e-06, + "loss": 1.2402, + "step": 25842 + }, + { + "epoch": 0.75879382230313, + "grad_norm": 0.0, + "learning_rate": 2.8994072306767873e-06, + "loss": 1.2607, + "step": 25843 + }, + { + "epoch": 0.7588231839802689, + "grad_norm": 0.0, + "learning_rate": 2.898737651742215e-06, + "loss": 1.1587, + "step": 25844 + }, + { + "epoch": 0.758852545657408, + "grad_norm": 0.0, + "learning_rate": 2.8980681370261765e-06, + "loss": 1.291, + "step": 25845 + }, + { + "epoch": 0.758881907334547, + "grad_norm": 0.0, + "learning_rate": 2.897398686534727e-06, + "loss": 1.1387, + "step": 25846 + }, + { + "epoch": 0.7589112690116859, + "grad_norm": 0.0, + "learning_rate": 2.896729300273917e-06, + "loss": 1.1162, + "step": 25847 + }, + { + "epoch": 0.7589406306888249, + "grad_norm": 0.0, + "learning_rate": 2.8960599782498065e-06, + "loss": 1.3101, + "step": 25848 + }, + { + "epoch": 0.758969992365964, + "grad_norm": 0.0, + "learning_rate": 2.895390720468445e-06, + "loss": 1.2598, + "step": 25849 + }, + { + "epoch": 0.7589993540431029, + "grad_norm": 0.0, + "learning_rate": 2.8947215269358807e-06, + "loss": 1.208, + "step": 25850 + }, + { + "epoch": 0.7590287157202419, + "grad_norm": 0.0, + "learning_rate": 2.8940523976581738e-06, + "loss": 1.1499, + "step": 25851 + }, + { + "epoch": 0.759058077397381, + "grad_norm": 0.0, + "learning_rate": 2.8933833326413697e-06, + "loss": 1.2949, + "step": 25852 + }, + { + "epoch": 0.7590874390745199, + "grad_norm": 0.0, + "learning_rate": 2.8927143318915207e-06, + "loss": 1.3105, + "step": 25853 + }, + { + "epoch": 0.7591168007516589, + "grad_norm": 0.0, + "learning_rate": 2.892045395414673e-06, + "loss": 1.1802, + "step": 25854 + }, + { + "epoch": 0.759146162428798, + "grad_norm": 0.0, + "learning_rate": 2.8913765232168824e-06, + "loss": 1.2578, + "step": 25855 + }, + { + "epoch": 0.7591755241059369, + "grad_norm": 0.0, + "learning_rate": 2.890707715304192e-06, + "loss": 1.3643, + "step": 25856 + }, + { + "epoch": 0.7592048857830759, + "grad_norm": 0.0, + "learning_rate": 2.890038971682655e-06, + "loss": 1.249, + "step": 25857 + }, + { + "epoch": 0.759234247460215, + "grad_norm": 0.0, + "learning_rate": 2.8893702923583123e-06, + "loss": 1.4033, + "step": 25858 + }, + { + "epoch": 0.7592636091373539, + "grad_norm": 0.0, + "learning_rate": 2.888701677337219e-06, + "loss": 1.2803, + "step": 25859 + }, + { + "epoch": 0.7592929708144929, + "grad_norm": 0.0, + "learning_rate": 2.888033126625418e-06, + "loss": 1.2686, + "step": 25860 + }, + { + "epoch": 0.759322332491632, + "grad_norm": 0.0, + "learning_rate": 2.8873646402289544e-06, + "loss": 1.1758, + "step": 25861 + }, + { + "epoch": 0.7593516941687709, + "grad_norm": 0.0, + "learning_rate": 2.886696218153875e-06, + "loss": 1.2285, + "step": 25862 + }, + { + "epoch": 0.7593810558459099, + "grad_norm": 0.0, + "learning_rate": 2.8860278604062186e-06, + "loss": 1.2427, + "step": 25863 + }, + { + "epoch": 0.7594104175230489, + "grad_norm": 0.0, + "learning_rate": 2.8853595669920386e-06, + "loss": 1.3184, + "step": 25864 + }, + { + "epoch": 0.7594397792001879, + "grad_norm": 0.0, + "learning_rate": 2.88469133791737e-06, + "loss": 1.2861, + "step": 25865 + }, + { + "epoch": 0.7594691408773269, + "grad_norm": 0.0, + "learning_rate": 2.884023173188264e-06, + "loss": 1.2334, + "step": 25866 + }, + { + "epoch": 0.7594985025544659, + "grad_norm": 0.0, + "learning_rate": 2.8833550728107552e-06, + "loss": 1.2476, + "step": 25867 + }, + { + "epoch": 0.7595278642316049, + "grad_norm": 0.0, + "learning_rate": 2.882687036790892e-06, + "loss": 1.1152, + "step": 25868 + }, + { + "epoch": 0.7595572259087439, + "grad_norm": 0.0, + "learning_rate": 2.8820190651347135e-06, + "loss": 1.2729, + "step": 25869 + }, + { + "epoch": 0.7595865875858829, + "grad_norm": 0.0, + "learning_rate": 2.881351157848259e-06, + "loss": 1.2451, + "step": 25870 + }, + { + "epoch": 0.7596159492630219, + "grad_norm": 0.0, + "learning_rate": 2.880683314937569e-06, + "loss": 1.2554, + "step": 25871 + }, + { + "epoch": 0.7596453109401609, + "grad_norm": 0.0, + "learning_rate": 2.8800155364086803e-06, + "loss": 1.146, + "step": 25872 + }, + { + "epoch": 0.7596746726172999, + "grad_norm": 0.0, + "learning_rate": 2.8793478222676376e-06, + "loss": 1.2471, + "step": 25873 + }, + { + "epoch": 0.7597040342944389, + "grad_norm": 0.0, + "learning_rate": 2.8786801725204727e-06, + "loss": 1.3008, + "step": 25874 + }, + { + "epoch": 0.7597333959715779, + "grad_norm": 0.0, + "learning_rate": 2.87801258717323e-06, + "loss": 1.2031, + "step": 25875 + }, + { + "epoch": 0.7597627576487169, + "grad_norm": 0.0, + "learning_rate": 2.8773450662319402e-06, + "loss": 1.4434, + "step": 25876 + }, + { + "epoch": 0.7597921193258559, + "grad_norm": 0.0, + "learning_rate": 2.8766776097026506e-06, + "loss": 1.1973, + "step": 25877 + }, + { + "epoch": 0.7598214810029948, + "grad_norm": 0.0, + "learning_rate": 2.876010217591385e-06, + "loss": 1.1885, + "step": 25878 + }, + { + "epoch": 0.7598508426801339, + "grad_norm": 0.0, + "learning_rate": 2.8753428899041813e-06, + "loss": 1.1348, + "step": 25879 + }, + { + "epoch": 0.7598802043572729, + "grad_norm": 0.0, + "learning_rate": 2.8746756266470786e-06, + "loss": 1.1963, + "step": 25880 + }, + { + "epoch": 0.7599095660344118, + "grad_norm": 0.0, + "learning_rate": 2.874008427826107e-06, + "loss": 1.1973, + "step": 25881 + }, + { + "epoch": 0.7599389277115509, + "grad_norm": 0.0, + "learning_rate": 2.873341293447305e-06, + "loss": 1.2256, + "step": 25882 + }, + { + "epoch": 0.7599682893886899, + "grad_norm": 0.0, + "learning_rate": 2.8726742235166992e-06, + "loss": 1.2695, + "step": 25883 + }, + { + "epoch": 0.7599976510658288, + "grad_norm": 0.0, + "learning_rate": 2.8720072180403293e-06, + "loss": 1.1279, + "step": 25884 + }, + { + "epoch": 0.7600270127429679, + "grad_norm": 0.0, + "learning_rate": 2.8713402770242237e-06, + "loss": 1.3145, + "step": 25885 + }, + { + "epoch": 0.7600563744201069, + "grad_norm": 0.0, + "learning_rate": 2.8706734004744142e-06, + "loss": 1.2529, + "step": 25886 + }, + { + "epoch": 0.7600857360972458, + "grad_norm": 0.0, + "learning_rate": 2.8700065883969262e-06, + "loss": 1.3398, + "step": 25887 + }, + { + "epoch": 0.7601150977743849, + "grad_norm": 0.0, + "learning_rate": 2.869339840797799e-06, + "loss": 1.1406, + "step": 25888 + }, + { + "epoch": 0.7601444594515239, + "grad_norm": 0.0, + "learning_rate": 2.868673157683057e-06, + "loss": 1.2339, + "step": 25889 + }, + { + "epoch": 0.7601738211286628, + "grad_norm": 0.0, + "learning_rate": 2.8680065390587286e-06, + "loss": 1.2061, + "step": 25890 + }, + { + "epoch": 0.7602031828058019, + "grad_norm": 0.0, + "learning_rate": 2.8673399849308446e-06, + "loss": 1.1772, + "step": 25891 + }, + { + "epoch": 0.7602325444829409, + "grad_norm": 0.0, + "learning_rate": 2.86667349530543e-06, + "loss": 1.1577, + "step": 25892 + }, + { + "epoch": 0.7602619061600798, + "grad_norm": 0.0, + "learning_rate": 2.8660070701885167e-06, + "loss": 1.2578, + "step": 25893 + }, + { + "epoch": 0.7602912678372189, + "grad_norm": 0.0, + "learning_rate": 2.8653407095861287e-06, + "loss": 1.124, + "step": 25894 + }, + { + "epoch": 0.7603206295143579, + "grad_norm": 0.0, + "learning_rate": 2.8646744135042913e-06, + "loss": 1.0381, + "step": 25895 + }, + { + "epoch": 0.7603499911914968, + "grad_norm": 0.0, + "learning_rate": 2.8640081819490284e-06, + "loss": 1.3252, + "step": 25896 + }, + { + "epoch": 0.7603793528686359, + "grad_norm": 0.0, + "learning_rate": 2.863342014926369e-06, + "loss": 1.2568, + "step": 25897 + }, + { + "epoch": 0.7604087145457749, + "grad_norm": 0.0, + "learning_rate": 2.8626759124423363e-06, + "loss": 1.3086, + "step": 25898 + }, + { + "epoch": 0.7604380762229138, + "grad_norm": 0.0, + "learning_rate": 2.8620098745029503e-06, + "loss": 1.1445, + "step": 25899 + }, + { + "epoch": 0.7604674379000529, + "grad_norm": 0.0, + "learning_rate": 2.8613439011142397e-06, + "loss": 1.1943, + "step": 25900 + }, + { + "epoch": 0.7604967995771919, + "grad_norm": 0.0, + "learning_rate": 2.8606779922822214e-06, + "loss": 1.2607, + "step": 25901 + }, + { + "epoch": 0.7605261612543308, + "grad_norm": 0.0, + "learning_rate": 2.8600121480129263e-06, + "loss": 1.3057, + "step": 25902 + }, + { + "epoch": 0.7605555229314699, + "grad_norm": 0.0, + "learning_rate": 2.8593463683123632e-06, + "loss": 1.2158, + "step": 25903 + }, + { + "epoch": 0.7605848846086088, + "grad_norm": 0.0, + "learning_rate": 2.858680653186564e-06, + "loss": 1.2041, + "step": 25904 + }, + { + "epoch": 0.7606142462857478, + "grad_norm": 0.0, + "learning_rate": 2.858015002641541e-06, + "loss": 1.1309, + "step": 25905 + }, + { + "epoch": 0.7606436079628869, + "grad_norm": 0.0, + "learning_rate": 2.8573494166833204e-06, + "loss": 1.186, + "step": 25906 + }, + { + "epoch": 0.7606729696400258, + "grad_norm": 0.0, + "learning_rate": 2.8566838953179154e-06, + "loss": 1.2617, + "step": 25907 + }, + { + "epoch": 0.7607023313171648, + "grad_norm": 0.0, + "learning_rate": 2.8560184385513503e-06, + "loss": 1.1245, + "step": 25908 + }, + { + "epoch": 0.7607316929943039, + "grad_norm": 0.0, + "learning_rate": 2.8553530463896406e-06, + "loss": 1.2212, + "step": 25909 + }, + { + "epoch": 0.7607610546714428, + "grad_norm": 0.0, + "learning_rate": 2.8546877188388024e-06, + "loss": 1.2656, + "step": 25910 + }, + { + "epoch": 0.7607904163485818, + "grad_norm": 0.0, + "learning_rate": 2.8540224559048535e-06, + "loss": 1.2148, + "step": 25911 + }, + { + "epoch": 0.7608197780257209, + "grad_norm": 0.0, + "learning_rate": 2.853357257593806e-06, + "loss": 1.1855, + "step": 25912 + }, + { + "epoch": 0.7608491397028598, + "grad_norm": 0.0, + "learning_rate": 2.8526921239116833e-06, + "loss": 1.1348, + "step": 25913 + }, + { + "epoch": 0.7608785013799988, + "grad_norm": 0.0, + "learning_rate": 2.8520270548644925e-06, + "loss": 1.2764, + "step": 25914 + }, + { + "epoch": 0.7609078630571379, + "grad_norm": 0.0, + "learning_rate": 2.851362050458254e-06, + "loss": 1.3145, + "step": 25915 + }, + { + "epoch": 0.7609372247342768, + "grad_norm": 0.0, + "learning_rate": 2.8506971106989766e-06, + "loss": 1.1313, + "step": 25916 + }, + { + "epoch": 0.7609665864114158, + "grad_norm": 0.0, + "learning_rate": 2.8500322355926804e-06, + "loss": 1.3418, + "step": 25917 + }, + { + "epoch": 0.7609959480885549, + "grad_norm": 0.0, + "learning_rate": 2.8493674251453717e-06, + "loss": 1.1724, + "step": 25918 + }, + { + "epoch": 0.7610253097656938, + "grad_norm": 0.0, + "learning_rate": 2.8487026793630667e-06, + "loss": 1.1865, + "step": 25919 + }, + { + "epoch": 0.7610546714428328, + "grad_norm": 0.0, + "learning_rate": 2.8480379982517734e-06, + "loss": 1.2061, + "step": 25920 + }, + { + "epoch": 0.7610840331199719, + "grad_norm": 0.0, + "learning_rate": 2.847373381817502e-06, + "loss": 1.0869, + "step": 25921 + }, + { + "epoch": 0.7611133947971108, + "grad_norm": 0.0, + "learning_rate": 2.8467088300662683e-06, + "loss": 1.2305, + "step": 25922 + }, + { + "epoch": 0.7611427564742498, + "grad_norm": 0.0, + "learning_rate": 2.846044343004074e-06, + "loss": 1.1548, + "step": 25923 + }, + { + "epoch": 0.7611721181513889, + "grad_norm": 0.0, + "learning_rate": 2.845379920636937e-06, + "loss": 1.166, + "step": 25924 + }, + { + "epoch": 0.7612014798285278, + "grad_norm": 0.0, + "learning_rate": 2.844715562970859e-06, + "loss": 1.1304, + "step": 25925 + }, + { + "epoch": 0.7612308415056668, + "grad_norm": 0.0, + "learning_rate": 2.8440512700118526e-06, + "loss": 1.1519, + "step": 25926 + }, + { + "epoch": 0.7612602031828059, + "grad_norm": 0.0, + "learning_rate": 2.843387041765927e-06, + "loss": 1.2959, + "step": 25927 + }, + { + "epoch": 0.7612895648599448, + "grad_norm": 0.0, + "learning_rate": 2.8427228782390794e-06, + "loss": 1.2637, + "step": 25928 + }, + { + "epoch": 0.7613189265370838, + "grad_norm": 0.0, + "learning_rate": 2.8420587794373243e-06, + "loss": 1.3242, + "step": 25929 + }, + { + "epoch": 0.7613482882142228, + "grad_norm": 0.0, + "learning_rate": 2.8413947453666623e-06, + "loss": 1.2261, + "step": 25930 + }, + { + "epoch": 0.7613776498913618, + "grad_norm": 0.0, + "learning_rate": 2.8407307760331038e-06, + "loss": 1.2651, + "step": 25931 + }, + { + "epoch": 0.7614070115685008, + "grad_norm": 0.0, + "learning_rate": 2.840066871442647e-06, + "loss": 1.2852, + "step": 25932 + }, + { + "epoch": 0.7614363732456398, + "grad_norm": 0.0, + "learning_rate": 2.8394030316013023e-06, + "loss": 1.2725, + "step": 25933 + }, + { + "epoch": 0.7614657349227788, + "grad_norm": 0.0, + "learning_rate": 2.8387392565150694e-06, + "loss": 1.1299, + "step": 25934 + }, + { + "epoch": 0.7614950965999178, + "grad_norm": 0.0, + "learning_rate": 2.8380755461899522e-06, + "loss": 1.1567, + "step": 25935 + }, + { + "epoch": 0.7615244582770568, + "grad_norm": 0.0, + "learning_rate": 2.837411900631949e-06, + "loss": 1.2109, + "step": 25936 + }, + { + "epoch": 0.7615538199541958, + "grad_norm": 0.0, + "learning_rate": 2.836748319847068e-06, + "loss": 1.3076, + "step": 25937 + }, + { + "epoch": 0.7615831816313348, + "grad_norm": 0.0, + "learning_rate": 2.8360848038413056e-06, + "loss": 1.2617, + "step": 25938 + }, + { + "epoch": 0.7616125433084738, + "grad_norm": 0.0, + "learning_rate": 2.83542135262066e-06, + "loss": 1.3018, + "step": 25939 + }, + { + "epoch": 0.7616419049856128, + "grad_norm": 0.0, + "learning_rate": 2.8347579661911374e-06, + "loss": 1.231, + "step": 25940 + }, + { + "epoch": 0.7616712666627518, + "grad_norm": 0.0, + "learning_rate": 2.8340946445587293e-06, + "loss": 1.2168, + "step": 25941 + }, + { + "epoch": 0.7617006283398908, + "grad_norm": 0.0, + "learning_rate": 2.8334313877294426e-06, + "loss": 1.269, + "step": 25942 + }, + { + "epoch": 0.7617299900170298, + "grad_norm": 0.0, + "learning_rate": 2.8327681957092712e-06, + "loss": 1.2324, + "step": 25943 + }, + { + "epoch": 0.7617593516941688, + "grad_norm": 0.0, + "learning_rate": 2.832105068504213e-06, + "loss": 1.2871, + "step": 25944 + }, + { + "epoch": 0.7617887133713078, + "grad_norm": 0.0, + "learning_rate": 2.8314420061202607e-06, + "loss": 1.2607, + "step": 25945 + }, + { + "epoch": 0.7618180750484468, + "grad_norm": 0.0, + "learning_rate": 2.830779008563417e-06, + "loss": 1.291, + "step": 25946 + }, + { + "epoch": 0.7618474367255857, + "grad_norm": 0.0, + "learning_rate": 2.830116075839675e-06, + "loss": 1.2266, + "step": 25947 + }, + { + "epoch": 0.7618767984027247, + "grad_norm": 0.0, + "learning_rate": 2.8294532079550275e-06, + "loss": 1.1899, + "step": 25948 + }, + { + "epoch": 0.7619061600798638, + "grad_norm": 0.0, + "learning_rate": 2.8287904049154735e-06, + "loss": 1.1055, + "step": 25949 + }, + { + "epoch": 0.7619355217570027, + "grad_norm": 0.0, + "learning_rate": 2.8281276667270006e-06, + "loss": 1.2324, + "step": 25950 + }, + { + "epoch": 0.7619648834341417, + "grad_norm": 0.0, + "learning_rate": 2.82746499339561e-06, + "loss": 1.1069, + "step": 25951 + }, + { + "epoch": 0.7619942451112808, + "grad_norm": 0.0, + "learning_rate": 2.8268023849272897e-06, + "loss": 1.147, + "step": 25952 + }, + { + "epoch": 0.7620236067884197, + "grad_norm": 0.0, + "learning_rate": 2.826139841328033e-06, + "loss": 1.1885, + "step": 25953 + }, + { + "epoch": 0.7620529684655587, + "grad_norm": 0.0, + "learning_rate": 2.825477362603828e-06, + "loss": 1.145, + "step": 25954 + }, + { + "epoch": 0.7620823301426978, + "grad_norm": 0.0, + "learning_rate": 2.8248149487606712e-06, + "loss": 1.2617, + "step": 25955 + }, + { + "epoch": 0.7621116918198367, + "grad_norm": 0.0, + "learning_rate": 2.824152599804547e-06, + "loss": 1.1157, + "step": 25956 + }, + { + "epoch": 0.7621410534969757, + "grad_norm": 0.0, + "learning_rate": 2.8234903157414527e-06, + "loss": 1.2812, + "step": 25957 + }, + { + "epoch": 0.7621704151741148, + "grad_norm": 0.0, + "learning_rate": 2.8228280965773734e-06, + "loss": 1.1685, + "step": 25958 + }, + { + "epoch": 0.7621997768512537, + "grad_norm": 0.0, + "learning_rate": 2.822165942318297e-06, + "loss": 1.0996, + "step": 25959 + }, + { + "epoch": 0.7622291385283927, + "grad_norm": 0.0, + "learning_rate": 2.8215038529702123e-06, + "loss": 1.3027, + "step": 25960 + }, + { + "epoch": 0.7622585002055318, + "grad_norm": 0.0, + "learning_rate": 2.8208418285391037e-06, + "loss": 1.2217, + "step": 25961 + }, + { + "epoch": 0.7622878618826707, + "grad_norm": 0.0, + "learning_rate": 2.8201798690309644e-06, + "loss": 1.2266, + "step": 25962 + }, + { + "epoch": 0.7623172235598097, + "grad_norm": 0.0, + "learning_rate": 2.8195179744517744e-06, + "loss": 1.2622, + "step": 25963 + }, + { + "epoch": 0.7623465852369488, + "grad_norm": 0.0, + "learning_rate": 2.8188561448075256e-06, + "loss": 1.2217, + "step": 25964 + }, + { + "epoch": 0.7623759469140877, + "grad_norm": 0.0, + "learning_rate": 2.8181943801041956e-06, + "loss": 1.2119, + "step": 25965 + }, + { + "epoch": 0.7624053085912267, + "grad_norm": 0.0, + "learning_rate": 2.8175326803477774e-06, + "loss": 1.1396, + "step": 25966 + }, + { + "epoch": 0.7624346702683658, + "grad_norm": 0.0, + "learning_rate": 2.8168710455442496e-06, + "loss": 1.2197, + "step": 25967 + }, + { + "epoch": 0.7624640319455047, + "grad_norm": 0.0, + "learning_rate": 2.816209475699597e-06, + "loss": 1.2656, + "step": 25968 + }, + { + "epoch": 0.7624933936226437, + "grad_norm": 0.0, + "learning_rate": 2.815547970819802e-06, + "loss": 1.2866, + "step": 25969 + }, + { + "epoch": 0.7625227552997828, + "grad_norm": 0.0, + "learning_rate": 2.814886530910844e-06, + "loss": 1.21, + "step": 25970 + }, + { + "epoch": 0.7625521169769217, + "grad_norm": 0.0, + "learning_rate": 2.8142251559787093e-06, + "loss": 1.2539, + "step": 25971 + }, + { + "epoch": 0.7625814786540607, + "grad_norm": 0.0, + "learning_rate": 2.8135638460293747e-06, + "loss": 1.1235, + "step": 25972 + }, + { + "epoch": 0.7626108403311997, + "grad_norm": 0.0, + "learning_rate": 2.8129026010688255e-06, + "loss": 1.2139, + "step": 25973 + }, + { + "epoch": 0.7626402020083387, + "grad_norm": 0.0, + "learning_rate": 2.8122414211030345e-06, + "loss": 1.166, + "step": 25974 + }, + { + "epoch": 0.7626695636854777, + "grad_norm": 0.0, + "learning_rate": 2.81158030613799e-06, + "loss": 1.1787, + "step": 25975 + }, + { + "epoch": 0.7626989253626167, + "grad_norm": 0.0, + "learning_rate": 2.810919256179665e-06, + "loss": 1.25, + "step": 25976 + }, + { + "epoch": 0.7627282870397557, + "grad_norm": 0.0, + "learning_rate": 2.8102582712340385e-06, + "loss": 1.3101, + "step": 25977 + }, + { + "epoch": 0.7627576487168947, + "grad_norm": 0.0, + "learning_rate": 2.809597351307087e-06, + "loss": 1.1299, + "step": 25978 + }, + { + "epoch": 0.7627870103940337, + "grad_norm": 0.0, + "learning_rate": 2.808936496404786e-06, + "loss": 1.1621, + "step": 25979 + }, + { + "epoch": 0.7628163720711727, + "grad_norm": 0.0, + "learning_rate": 2.8082757065331166e-06, + "loss": 1.168, + "step": 25980 + }, + { + "epoch": 0.7628457337483117, + "grad_norm": 0.0, + "learning_rate": 2.8076149816980492e-06, + "loss": 1.2173, + "step": 25981 + }, + { + "epoch": 0.7628750954254507, + "grad_norm": 0.0, + "learning_rate": 2.806954321905564e-06, + "loss": 1.3467, + "step": 25982 + }, + { + "epoch": 0.7629044571025897, + "grad_norm": 0.0, + "learning_rate": 2.806293727161633e-06, + "loss": 1.249, + "step": 25983 + }, + { + "epoch": 0.7629338187797287, + "grad_norm": 0.0, + "learning_rate": 2.8056331974722305e-06, + "loss": 1.2134, + "step": 25984 + }, + { + "epoch": 0.7629631804568677, + "grad_norm": 0.0, + "learning_rate": 2.8049727328433263e-06, + "loss": 1.21, + "step": 25985 + }, + { + "epoch": 0.7629925421340067, + "grad_norm": 0.0, + "learning_rate": 2.8043123332808996e-06, + "loss": 1.252, + "step": 25986 + }, + { + "epoch": 0.7630219038111457, + "grad_norm": 0.0, + "learning_rate": 2.8036519987909192e-06, + "loss": 1.335, + "step": 25987 + }, + { + "epoch": 0.7630512654882847, + "grad_norm": 0.0, + "learning_rate": 2.802991729379353e-06, + "loss": 1.1797, + "step": 25988 + }, + { + "epoch": 0.7630806271654237, + "grad_norm": 0.0, + "learning_rate": 2.80233152505218e-06, + "loss": 1.1831, + "step": 25989 + }, + { + "epoch": 0.7631099888425626, + "grad_norm": 0.0, + "learning_rate": 2.8016713858153633e-06, + "loss": 1.2803, + "step": 25990 + }, + { + "epoch": 0.7631393505197017, + "grad_norm": 0.0, + "learning_rate": 2.801011311674878e-06, + "loss": 1.2344, + "step": 25991 + }, + { + "epoch": 0.7631687121968407, + "grad_norm": 0.0, + "learning_rate": 2.8003513026366914e-06, + "loss": 1.1533, + "step": 25992 + }, + { + "epoch": 0.7631980738739796, + "grad_norm": 0.0, + "learning_rate": 2.799691358706772e-06, + "loss": 1.2725, + "step": 25993 + }, + { + "epoch": 0.7632274355511187, + "grad_norm": 0.0, + "learning_rate": 2.7990314798910843e-06, + "loss": 1.2725, + "step": 25994 + }, + { + "epoch": 0.7632567972282577, + "grad_norm": 0.0, + "learning_rate": 2.798371666195603e-06, + "loss": 1.2363, + "step": 25995 + }, + { + "epoch": 0.7632861589053966, + "grad_norm": 0.0, + "learning_rate": 2.79771191762629e-06, + "loss": 1.2183, + "step": 25996 + }, + { + "epoch": 0.7633155205825357, + "grad_norm": 0.0, + "learning_rate": 2.7970522341891094e-06, + "loss": 1.252, + "step": 25997 + }, + { + "epoch": 0.7633448822596747, + "grad_norm": 0.0, + "learning_rate": 2.7963926158900347e-06, + "loss": 1.3057, + "step": 25998 + }, + { + "epoch": 0.7633742439368136, + "grad_norm": 0.0, + "learning_rate": 2.795733062735022e-06, + "loss": 1.0825, + "step": 25999 + }, + { + "epoch": 0.7634036056139527, + "grad_norm": 0.0, + "learning_rate": 2.795073574730044e-06, + "loss": 1.2217, + "step": 26000 + }, + { + "epoch": 0.7634329672910917, + "grad_norm": 0.0, + "learning_rate": 2.794414151881061e-06, + "loss": 1.2148, + "step": 26001 + }, + { + "epoch": 0.7634623289682306, + "grad_norm": 0.0, + "learning_rate": 2.7937547941940357e-06, + "loss": 1.3262, + "step": 26002 + }, + { + "epoch": 0.7634916906453697, + "grad_norm": 0.0, + "learning_rate": 2.7930955016749282e-06, + "loss": 1.2729, + "step": 26003 + }, + { + "epoch": 0.7635210523225087, + "grad_norm": 0.0, + "learning_rate": 2.7924362743297074e-06, + "loss": 1.1475, + "step": 26004 + }, + { + "epoch": 0.7635504139996476, + "grad_norm": 0.0, + "learning_rate": 2.791777112164328e-06, + "loss": 1.2705, + "step": 26005 + }, + { + "epoch": 0.7635797756767867, + "grad_norm": 0.0, + "learning_rate": 2.791118015184757e-06, + "loss": 1.1548, + "step": 26006 + }, + { + "epoch": 0.7636091373539257, + "grad_norm": 0.0, + "learning_rate": 2.7904589833969533e-06, + "loss": 1.2295, + "step": 26007 + }, + { + "epoch": 0.7636384990310646, + "grad_norm": 0.0, + "learning_rate": 2.789800016806874e-06, + "loss": 1.1631, + "step": 26008 + }, + { + "epoch": 0.7636678607082037, + "grad_norm": 0.0, + "learning_rate": 2.7891411154204793e-06, + "loss": 1.2041, + "step": 26009 + }, + { + "epoch": 0.7636972223853427, + "grad_norm": 0.0, + "learning_rate": 2.788482279243726e-06, + "loss": 1.1528, + "step": 26010 + }, + { + "epoch": 0.7637265840624816, + "grad_norm": 0.0, + "learning_rate": 2.7878235082825778e-06, + "loss": 1.3291, + "step": 26011 + }, + { + "epoch": 0.7637559457396207, + "grad_norm": 0.0, + "learning_rate": 2.7871648025429856e-06, + "loss": 1.144, + "step": 26012 + }, + { + "epoch": 0.7637853074167597, + "grad_norm": 0.0, + "learning_rate": 2.786506162030912e-06, + "loss": 1.2676, + "step": 26013 + }, + { + "epoch": 0.7638146690938986, + "grad_norm": 0.0, + "learning_rate": 2.7858475867523072e-06, + "loss": 1.293, + "step": 26014 + }, + { + "epoch": 0.7638440307710377, + "grad_norm": 0.0, + "learning_rate": 2.785189076713134e-06, + "loss": 1.2393, + "step": 26015 + }, + { + "epoch": 0.7638733924481766, + "grad_norm": 0.0, + "learning_rate": 2.7845306319193433e-06, + "loss": 1.1377, + "step": 26016 + }, + { + "epoch": 0.7639027541253156, + "grad_norm": 0.0, + "learning_rate": 2.783872252376889e-06, + "loss": 1.207, + "step": 26017 + }, + { + "epoch": 0.7639321158024547, + "grad_norm": 0.0, + "learning_rate": 2.783213938091728e-06, + "loss": 1.2539, + "step": 26018 + }, + { + "epoch": 0.7639614774795936, + "grad_norm": 0.0, + "learning_rate": 2.782555689069807e-06, + "loss": 1.3711, + "step": 26019 + }, + { + "epoch": 0.7639908391567326, + "grad_norm": 0.0, + "learning_rate": 2.781897505317087e-06, + "loss": 1.3042, + "step": 26020 + }, + { + "epoch": 0.7640202008338717, + "grad_norm": 0.0, + "learning_rate": 2.781239386839514e-06, + "loss": 1.2637, + "step": 26021 + }, + { + "epoch": 0.7640495625110106, + "grad_norm": 0.0, + "learning_rate": 2.780581333643044e-06, + "loss": 1.1382, + "step": 26022 + }, + { + "epoch": 0.7640789241881496, + "grad_norm": 0.0, + "learning_rate": 2.7799233457336226e-06, + "loss": 1.1528, + "step": 26023 + }, + { + "epoch": 0.7641082858652887, + "grad_norm": 0.0, + "learning_rate": 2.779265423117208e-06, + "loss": 0.9292, + "step": 26024 + }, + { + "epoch": 0.7641376475424276, + "grad_norm": 0.0, + "learning_rate": 2.7786075657997446e-06, + "loss": 1.2197, + "step": 26025 + }, + { + "epoch": 0.7641670092195666, + "grad_norm": 0.0, + "learning_rate": 2.7779497737871817e-06, + "loss": 1.2251, + "step": 26026 + }, + { + "epoch": 0.7641963708967057, + "grad_norm": 0.0, + "learning_rate": 2.777292047085469e-06, + "loss": 1.1978, + "step": 26027 + }, + { + "epoch": 0.7642257325738446, + "grad_norm": 0.0, + "learning_rate": 2.7766343857005516e-06, + "loss": 1.2676, + "step": 26028 + }, + { + "epoch": 0.7642550942509836, + "grad_norm": 0.0, + "learning_rate": 2.7759767896383826e-06, + "loss": 1.209, + "step": 26029 + }, + { + "epoch": 0.7642844559281227, + "grad_norm": 0.0, + "learning_rate": 2.7753192589049015e-06, + "loss": 1.2334, + "step": 26030 + }, + { + "epoch": 0.7643138176052616, + "grad_norm": 0.0, + "learning_rate": 2.774661793506063e-06, + "loss": 1.3076, + "step": 26031 + }, + { + "epoch": 0.7643431792824006, + "grad_norm": 0.0, + "learning_rate": 2.7740043934478066e-06, + "loss": 1.1016, + "step": 26032 + }, + { + "epoch": 0.7643725409595397, + "grad_norm": 0.0, + "learning_rate": 2.773347058736081e-06, + "loss": 1.1318, + "step": 26033 + }, + { + "epoch": 0.7644019026366786, + "grad_norm": 0.0, + "learning_rate": 2.7726897893768245e-06, + "loss": 1.1699, + "step": 26034 + }, + { + "epoch": 0.7644312643138176, + "grad_norm": 0.0, + "learning_rate": 2.772032585375989e-06, + "loss": 1.0581, + "step": 26035 + }, + { + "epoch": 0.7644606259909567, + "grad_norm": 0.0, + "learning_rate": 2.7713754467395126e-06, + "loss": 1.2393, + "step": 26036 + }, + { + "epoch": 0.7644899876680956, + "grad_norm": 0.0, + "learning_rate": 2.7707183734733377e-06, + "loss": 1.3926, + "step": 26037 + }, + { + "epoch": 0.7645193493452346, + "grad_norm": 0.0, + "learning_rate": 2.7700613655834095e-06, + "loss": 1.2275, + "step": 26038 + }, + { + "epoch": 0.7645487110223737, + "grad_norm": 0.0, + "learning_rate": 2.769404423075667e-06, + "loss": 1.3857, + "step": 26039 + }, + { + "epoch": 0.7645780726995126, + "grad_norm": 0.0, + "learning_rate": 2.768747545956053e-06, + "loss": 1.2607, + "step": 26040 + }, + { + "epoch": 0.7646074343766516, + "grad_norm": 0.0, + "learning_rate": 2.7680907342305084e-06, + "loss": 1.1382, + "step": 26041 + }, + { + "epoch": 0.7646367960537906, + "grad_norm": 0.0, + "learning_rate": 2.7674339879049705e-06, + "loss": 1.2979, + "step": 26042 + }, + { + "epoch": 0.7646661577309296, + "grad_norm": 0.0, + "learning_rate": 2.7667773069853767e-06, + "loss": 1.2979, + "step": 26043 + }, + { + "epoch": 0.7646955194080686, + "grad_norm": 0.0, + "learning_rate": 2.766120691477672e-06, + "loss": 1.2334, + "step": 26044 + }, + { + "epoch": 0.7647248810852076, + "grad_norm": 0.0, + "learning_rate": 2.765464141387786e-06, + "loss": 1.1758, + "step": 26045 + }, + { + "epoch": 0.7647542427623466, + "grad_norm": 0.0, + "learning_rate": 2.764807656721664e-06, + "loss": 1.1001, + "step": 26046 + }, + { + "epoch": 0.7647836044394856, + "grad_norm": 0.0, + "learning_rate": 2.7641512374852395e-06, + "loss": 1.2598, + "step": 26047 + }, + { + "epoch": 0.7648129661166245, + "grad_norm": 0.0, + "learning_rate": 2.7634948836844455e-06, + "loss": 1.2749, + "step": 26048 + }, + { + "epoch": 0.7648423277937636, + "grad_norm": 0.0, + "learning_rate": 2.7628385953252246e-06, + "loss": 1.1582, + "step": 26049 + }, + { + "epoch": 0.7648716894709026, + "grad_norm": 0.0, + "learning_rate": 2.7621823724135067e-06, + "loss": 1.3262, + "step": 26050 + }, + { + "epoch": 0.7649010511480415, + "grad_norm": 0.0, + "learning_rate": 2.7615262149552268e-06, + "loss": 1.249, + "step": 26051 + }, + { + "epoch": 0.7649304128251806, + "grad_norm": 0.0, + "learning_rate": 2.7608701229563162e-06, + "loss": 1.166, + "step": 26052 + }, + { + "epoch": 0.7649597745023196, + "grad_norm": 0.0, + "learning_rate": 2.7602140964227153e-06, + "loss": 1.125, + "step": 26053 + }, + { + "epoch": 0.7649891361794585, + "grad_norm": 0.0, + "learning_rate": 2.7595581353603487e-06, + "loss": 1.2061, + "step": 26054 + }, + { + "epoch": 0.7650184978565976, + "grad_norm": 0.0, + "learning_rate": 2.758902239775155e-06, + "loss": 1.2666, + "step": 26055 + }, + { + "epoch": 0.7650478595337366, + "grad_norm": 0.0, + "learning_rate": 2.758246409673063e-06, + "loss": 1.1567, + "step": 26056 + }, + { + "epoch": 0.7650772212108755, + "grad_norm": 0.0, + "learning_rate": 2.7575906450600033e-06, + "loss": 1.1704, + "step": 26057 + }, + { + "epoch": 0.7651065828880146, + "grad_norm": 0.0, + "learning_rate": 2.756934945941906e-06, + "loss": 1.2886, + "step": 26058 + }, + { + "epoch": 0.7651359445651535, + "grad_norm": 0.0, + "learning_rate": 2.756279312324698e-06, + "loss": 1.1538, + "step": 26059 + }, + { + "epoch": 0.7651653062422925, + "grad_norm": 0.0, + "learning_rate": 2.755623744214315e-06, + "loss": 1.1787, + "step": 26060 + }, + { + "epoch": 0.7651946679194316, + "grad_norm": 0.0, + "learning_rate": 2.754968241616678e-06, + "loss": 1.2061, + "step": 26061 + }, + { + "epoch": 0.7652240295965705, + "grad_norm": 0.0, + "learning_rate": 2.7543128045377222e-06, + "loss": 1.1689, + "step": 26062 + }, + { + "epoch": 0.7652533912737095, + "grad_norm": 0.0, + "learning_rate": 2.7536574329833677e-06, + "loss": 1.2188, + "step": 26063 + }, + { + "epoch": 0.7652827529508486, + "grad_norm": 0.0, + "learning_rate": 2.7530021269595486e-06, + "loss": 1.1772, + "step": 26064 + }, + { + "epoch": 0.7653121146279875, + "grad_norm": 0.0, + "learning_rate": 2.752346886472187e-06, + "loss": 1.2549, + "step": 26065 + }, + { + "epoch": 0.7653414763051265, + "grad_norm": 0.0, + "learning_rate": 2.751691711527209e-06, + "loss": 1.2627, + "step": 26066 + }, + { + "epoch": 0.7653708379822656, + "grad_norm": 0.0, + "learning_rate": 2.751036602130538e-06, + "loss": 1.1929, + "step": 26067 + }, + { + "epoch": 0.7654001996594045, + "grad_norm": 0.0, + "learning_rate": 2.7503815582880977e-06, + "loss": 1.2871, + "step": 26068 + }, + { + "epoch": 0.7654295613365435, + "grad_norm": 0.0, + "learning_rate": 2.749726580005816e-06, + "loss": 1.3525, + "step": 26069 + }, + { + "epoch": 0.7654589230136826, + "grad_norm": 0.0, + "learning_rate": 2.749071667289611e-06, + "loss": 1.064, + "step": 26070 + }, + { + "epoch": 0.7654882846908215, + "grad_norm": 0.0, + "learning_rate": 2.748416820145411e-06, + "loss": 1.3096, + "step": 26071 + }, + { + "epoch": 0.7655176463679605, + "grad_norm": 0.0, + "learning_rate": 2.7477620385791316e-06, + "loss": 1.2168, + "step": 26072 + }, + { + "epoch": 0.7655470080450996, + "grad_norm": 0.0, + "learning_rate": 2.7471073225967004e-06, + "loss": 1.333, + "step": 26073 + }, + { + "epoch": 0.7655763697222385, + "grad_norm": 0.0, + "learning_rate": 2.746452672204035e-06, + "loss": 1.1772, + "step": 26074 + }, + { + "epoch": 0.7656057313993775, + "grad_norm": 0.0, + "learning_rate": 2.7457980874070556e-06, + "loss": 1.207, + "step": 26075 + }, + { + "epoch": 0.7656350930765166, + "grad_norm": 0.0, + "learning_rate": 2.7451435682116823e-06, + "loss": 1.3564, + "step": 26076 + }, + { + "epoch": 0.7656644547536555, + "grad_norm": 0.0, + "learning_rate": 2.7444891146238307e-06, + "loss": 1.0786, + "step": 26077 + }, + { + "epoch": 0.7656938164307945, + "grad_norm": 0.0, + "learning_rate": 2.7438347266494247e-06, + "loss": 1.2173, + "step": 26078 + }, + { + "epoch": 0.7657231781079336, + "grad_norm": 0.0, + "learning_rate": 2.7431804042943777e-06, + "loss": 1.2422, + "step": 26079 + }, + { + "epoch": 0.7657525397850725, + "grad_norm": 0.0, + "learning_rate": 2.7425261475646115e-06, + "loss": 1.168, + "step": 26080 + }, + { + "epoch": 0.7657819014622115, + "grad_norm": 0.0, + "learning_rate": 2.741871956466037e-06, + "loss": 1.1216, + "step": 26081 + }, + { + "epoch": 0.7658112631393506, + "grad_norm": 0.0, + "learning_rate": 2.741217831004578e-06, + "loss": 1.1899, + "step": 26082 + }, + { + "epoch": 0.7658406248164895, + "grad_norm": 0.0, + "learning_rate": 2.7405637711861398e-06, + "loss": 1.2319, + "step": 26083 + }, + { + "epoch": 0.7658699864936285, + "grad_norm": 0.0, + "learning_rate": 2.739909777016646e-06, + "loss": 1.249, + "step": 26084 + }, + { + "epoch": 0.7658993481707675, + "grad_norm": 0.0, + "learning_rate": 2.739255848502007e-06, + "loss": 1.3008, + "step": 26085 + }, + { + "epoch": 0.7659287098479065, + "grad_norm": 0.0, + "learning_rate": 2.738601985648135e-06, + "loss": 1.1714, + "step": 26086 + }, + { + "epoch": 0.7659580715250455, + "grad_norm": 0.0, + "learning_rate": 2.7379481884609473e-06, + "loss": 1.2046, + "step": 26087 + }, + { + "epoch": 0.7659874332021845, + "grad_norm": 0.0, + "learning_rate": 2.73729445694635e-06, + "loss": 1.2461, + "step": 26088 + }, + { + "epoch": 0.7660167948793235, + "grad_norm": 0.0, + "learning_rate": 2.7366407911102634e-06, + "loss": 1.3008, + "step": 26089 + }, + { + "epoch": 0.7660461565564625, + "grad_norm": 0.0, + "learning_rate": 2.735987190958593e-06, + "loss": 1.1562, + "step": 26090 + }, + { + "epoch": 0.7660755182336015, + "grad_norm": 0.0, + "learning_rate": 2.735333656497251e-06, + "loss": 1.2646, + "step": 26091 + }, + { + "epoch": 0.7661048799107405, + "grad_norm": 0.0, + "learning_rate": 2.734680187732145e-06, + "loss": 1.2144, + "step": 26092 + }, + { + "epoch": 0.7661342415878795, + "grad_norm": 0.0, + "learning_rate": 2.7340267846691892e-06, + "loss": 1.3169, + "step": 26093 + }, + { + "epoch": 0.7661636032650185, + "grad_norm": 0.0, + "learning_rate": 2.733373447314287e-06, + "loss": 1.2529, + "step": 26094 + }, + { + "epoch": 0.7661929649421575, + "grad_norm": 0.0, + "learning_rate": 2.7327201756733536e-06, + "loss": 1.1255, + "step": 26095 + }, + { + "epoch": 0.7662223266192965, + "grad_norm": 0.0, + "learning_rate": 2.732066969752292e-06, + "loss": 1.2227, + "step": 26096 + }, + { + "epoch": 0.7662516882964355, + "grad_norm": 0.0, + "learning_rate": 2.731413829557007e-06, + "loss": 1.1191, + "step": 26097 + }, + { + "epoch": 0.7662810499735745, + "grad_norm": 0.0, + "learning_rate": 2.730760755093411e-06, + "loss": 1.0278, + "step": 26098 + }, + { + "epoch": 0.7663104116507135, + "grad_norm": 0.0, + "learning_rate": 2.7301077463674084e-06, + "loss": 1.25, + "step": 26099 + }, + { + "epoch": 0.7663397733278525, + "grad_norm": 0.0, + "learning_rate": 2.7294548033849023e-06, + "loss": 1.2168, + "step": 26100 + }, + { + "epoch": 0.7663691350049915, + "grad_norm": 0.0, + "learning_rate": 2.7288019261517964e-06, + "loss": 1.2441, + "step": 26101 + }, + { + "epoch": 0.7663984966821304, + "grad_norm": 0.0, + "learning_rate": 2.728149114673999e-06, + "loss": 1.0723, + "step": 26102 + }, + { + "epoch": 0.7664278583592695, + "grad_norm": 0.0, + "learning_rate": 2.7274963689574086e-06, + "loss": 1.1533, + "step": 26103 + }, + { + "epoch": 0.7664572200364085, + "grad_norm": 0.0, + "learning_rate": 2.726843689007934e-06, + "loss": 1.1675, + "step": 26104 + }, + { + "epoch": 0.7664865817135474, + "grad_norm": 0.0, + "learning_rate": 2.7261910748314745e-06, + "loss": 1.0483, + "step": 26105 + }, + { + "epoch": 0.7665159433906865, + "grad_norm": 0.0, + "learning_rate": 2.7255385264339297e-06, + "loss": 1.2314, + "step": 26106 + }, + { + "epoch": 0.7665453050678255, + "grad_norm": 0.0, + "learning_rate": 2.7248860438212076e-06, + "loss": 1.2354, + "step": 26107 + }, + { + "epoch": 0.7665746667449644, + "grad_norm": 0.0, + "learning_rate": 2.7242336269991998e-06, + "loss": 1.2129, + "step": 26108 + }, + { + "epoch": 0.7666040284221035, + "grad_norm": 0.0, + "learning_rate": 2.723581275973812e-06, + "loss": 1.251, + "step": 26109 + }, + { + "epoch": 0.7666333900992425, + "grad_norm": 0.0, + "learning_rate": 2.7229289907509406e-06, + "loss": 1.2871, + "step": 26110 + }, + { + "epoch": 0.7666627517763814, + "grad_norm": 0.0, + "learning_rate": 2.7222767713364896e-06, + "loss": 1.2383, + "step": 26111 + }, + { + "epoch": 0.7666921134535205, + "grad_norm": 0.0, + "learning_rate": 2.721624617736349e-06, + "loss": 1.2354, + "step": 26112 + }, + { + "epoch": 0.7667214751306595, + "grad_norm": 0.0, + "learning_rate": 2.7209725299564248e-06, + "loss": 1.2627, + "step": 26113 + }, + { + "epoch": 0.7667508368077984, + "grad_norm": 0.0, + "learning_rate": 2.7203205080026096e-06, + "loss": 1.2036, + "step": 26114 + }, + { + "epoch": 0.7667801984849375, + "grad_norm": 0.0, + "learning_rate": 2.7196685518808007e-06, + "loss": 1.085, + "step": 26115 + }, + { + "epoch": 0.7668095601620765, + "grad_norm": 0.0, + "learning_rate": 2.719016661596894e-06, + "loss": 1.1738, + "step": 26116 + }, + { + "epoch": 0.7668389218392154, + "grad_norm": 0.0, + "learning_rate": 2.7183648371567815e-06, + "loss": 1.2275, + "step": 26117 + }, + { + "epoch": 0.7668682835163545, + "grad_norm": 0.0, + "learning_rate": 2.717713078566363e-06, + "loss": 1.1611, + "step": 26118 + }, + { + "epoch": 0.7668976451934935, + "grad_norm": 0.0, + "learning_rate": 2.7170613858315274e-06, + "loss": 1.2085, + "step": 26119 + }, + { + "epoch": 0.7669270068706324, + "grad_norm": 0.0, + "learning_rate": 2.716409758958174e-06, + "loss": 1.1465, + "step": 26120 + }, + { + "epoch": 0.7669563685477715, + "grad_norm": 0.0, + "learning_rate": 2.7157581979521885e-06, + "loss": 1.2236, + "step": 26121 + }, + { + "epoch": 0.7669857302249105, + "grad_norm": 0.0, + "learning_rate": 2.715106702819471e-06, + "loss": 1.2002, + "step": 26122 + }, + { + "epoch": 0.7670150919020494, + "grad_norm": 0.0, + "learning_rate": 2.71445527356591e-06, + "loss": 1.2598, + "step": 26123 + }, + { + "epoch": 0.7670444535791885, + "grad_norm": 0.0, + "learning_rate": 2.7138039101973944e-06, + "loss": 1.1123, + "step": 26124 + }, + { + "epoch": 0.7670738152563275, + "grad_norm": 0.0, + "learning_rate": 2.7131526127198172e-06, + "loss": 1.2744, + "step": 26125 + }, + { + "epoch": 0.7671031769334664, + "grad_norm": 0.0, + "learning_rate": 2.7125013811390634e-06, + "loss": 1.1123, + "step": 26126 + }, + { + "epoch": 0.7671325386106055, + "grad_norm": 0.0, + "learning_rate": 2.711850215461029e-06, + "loss": 1.2285, + "step": 26127 + }, + { + "epoch": 0.7671619002877444, + "grad_norm": 0.0, + "learning_rate": 2.711199115691597e-06, + "loss": 1.2725, + "step": 26128 + }, + { + "epoch": 0.7671912619648834, + "grad_norm": 0.0, + "learning_rate": 2.7105480818366614e-06, + "loss": 1.2197, + "step": 26129 + }, + { + "epoch": 0.7672206236420225, + "grad_norm": 0.0, + "learning_rate": 2.7098971139021025e-06, + "loss": 1.3408, + "step": 26130 + }, + { + "epoch": 0.7672499853191614, + "grad_norm": 0.0, + "learning_rate": 2.7092462118938144e-06, + "loss": 1.146, + "step": 26131 + }, + { + "epoch": 0.7672793469963004, + "grad_norm": 0.0, + "learning_rate": 2.70859537581768e-06, + "loss": 1.2236, + "step": 26132 + }, + { + "epoch": 0.7673087086734395, + "grad_norm": 0.0, + "learning_rate": 2.7079446056795854e-06, + "loss": 1.2637, + "step": 26133 + }, + { + "epoch": 0.7673380703505784, + "grad_norm": 0.0, + "learning_rate": 2.707293901485415e-06, + "loss": 1.2329, + "step": 26134 + }, + { + "epoch": 0.7673674320277174, + "grad_norm": 0.0, + "learning_rate": 2.70664326324105e-06, + "loss": 1.251, + "step": 26135 + }, + { + "epoch": 0.7673967937048565, + "grad_norm": 0.0, + "learning_rate": 2.7059926909523814e-06, + "loss": 1.1992, + "step": 26136 + }, + { + "epoch": 0.7674261553819954, + "grad_norm": 0.0, + "learning_rate": 2.7053421846252847e-06, + "loss": 1.1846, + "step": 26137 + }, + { + "epoch": 0.7674555170591344, + "grad_norm": 0.0, + "learning_rate": 2.704691744265652e-06, + "loss": 1.1582, + "step": 26138 + }, + { + "epoch": 0.7674848787362735, + "grad_norm": 0.0, + "learning_rate": 2.704041369879359e-06, + "loss": 1.3711, + "step": 26139 + }, + { + "epoch": 0.7675142404134124, + "grad_norm": 0.0, + "learning_rate": 2.703391061472288e-06, + "loss": 1.1792, + "step": 26140 + }, + { + "epoch": 0.7675436020905514, + "grad_norm": 0.0, + "learning_rate": 2.702740819050318e-06, + "loss": 1.1353, + "step": 26141 + }, + { + "epoch": 0.7675729637676905, + "grad_norm": 0.0, + "learning_rate": 2.702090642619334e-06, + "loss": 1.2422, + "step": 26142 + }, + { + "epoch": 0.7676023254448294, + "grad_norm": 0.0, + "learning_rate": 2.701440532185211e-06, + "loss": 1.2485, + "step": 26143 + }, + { + "epoch": 0.7676316871219684, + "grad_norm": 0.0, + "learning_rate": 2.7007904877538326e-06, + "loss": 1.1577, + "step": 26144 + }, + { + "epoch": 0.7676610487991075, + "grad_norm": 0.0, + "learning_rate": 2.700140509331076e-06, + "loss": 1.2456, + "step": 26145 + }, + { + "epoch": 0.7676904104762464, + "grad_norm": 0.0, + "learning_rate": 2.699490596922815e-06, + "loss": 1.2056, + "step": 26146 + }, + { + "epoch": 0.7677197721533854, + "grad_norm": 0.0, + "learning_rate": 2.6988407505349336e-06, + "loss": 1.2007, + "step": 26147 + }, + { + "epoch": 0.7677491338305245, + "grad_norm": 0.0, + "learning_rate": 2.6981909701733045e-06, + "loss": 1.3086, + "step": 26148 + }, + { + "epoch": 0.7677784955076634, + "grad_norm": 0.0, + "learning_rate": 2.697541255843805e-06, + "loss": 1.3271, + "step": 26149 + }, + { + "epoch": 0.7678078571848024, + "grad_norm": 0.0, + "learning_rate": 2.6968916075523067e-06, + "loss": 1.209, + "step": 26150 + }, + { + "epoch": 0.7678372188619413, + "grad_norm": 0.0, + "learning_rate": 2.6962420253046917e-06, + "loss": 1.1074, + "step": 26151 + }, + { + "epoch": 0.7678665805390804, + "grad_norm": 0.0, + "learning_rate": 2.695592509106827e-06, + "loss": 1.1377, + "step": 26152 + }, + { + "epoch": 0.7678959422162194, + "grad_norm": 0.0, + "learning_rate": 2.694943058964593e-06, + "loss": 1.1924, + "step": 26153 + }, + { + "epoch": 0.7679253038933583, + "grad_norm": 0.0, + "learning_rate": 2.69429367488386e-06, + "loss": 1.2432, + "step": 26154 + }, + { + "epoch": 0.7679546655704974, + "grad_norm": 0.0, + "learning_rate": 2.693644356870496e-06, + "loss": 1.2085, + "step": 26155 + }, + { + "epoch": 0.7679840272476364, + "grad_norm": 0.0, + "learning_rate": 2.6929951049303813e-06, + "loss": 1.168, + "step": 26156 + }, + { + "epoch": 0.7680133889247753, + "grad_norm": 0.0, + "learning_rate": 2.6923459190693833e-06, + "loss": 1.1719, + "step": 26157 + }, + { + "epoch": 0.7680427506019144, + "grad_norm": 0.0, + "learning_rate": 2.6916967992933728e-06, + "loss": 1.1919, + "step": 26158 + }, + { + "epoch": 0.7680721122790534, + "grad_norm": 0.0, + "learning_rate": 2.6910477456082175e-06, + "loss": 1.1948, + "step": 26159 + }, + { + "epoch": 0.7681014739561923, + "grad_norm": 0.0, + "learning_rate": 2.690398758019791e-06, + "loss": 1.1338, + "step": 26160 + }, + { + "epoch": 0.7681308356333314, + "grad_norm": 0.0, + "learning_rate": 2.689749836533958e-06, + "loss": 1.3242, + "step": 26161 + }, + { + "epoch": 0.7681601973104704, + "grad_norm": 0.0, + "learning_rate": 2.6891009811565925e-06, + "loss": 1.1768, + "step": 26162 + }, + { + "epoch": 0.7681895589876093, + "grad_norm": 0.0, + "learning_rate": 2.6884521918935593e-06, + "loss": 1.2393, + "step": 26163 + }, + { + "epoch": 0.7682189206647484, + "grad_norm": 0.0, + "learning_rate": 2.6878034687507247e-06, + "loss": 1.2773, + "step": 26164 + }, + { + "epoch": 0.7682482823418874, + "grad_norm": 0.0, + "learning_rate": 2.687154811733956e-06, + "loss": 1.2617, + "step": 26165 + }, + { + "epoch": 0.7682776440190263, + "grad_norm": 0.0, + "learning_rate": 2.6865062208491176e-06, + "loss": 1.3457, + "step": 26166 + }, + { + "epoch": 0.7683070056961654, + "grad_norm": 0.0, + "learning_rate": 2.6858576961020787e-06, + "loss": 1.3311, + "step": 26167 + }, + { + "epoch": 0.7683363673733044, + "grad_norm": 0.0, + "learning_rate": 2.6852092374986984e-06, + "loss": 1.2803, + "step": 26168 + }, + { + "epoch": 0.7683657290504433, + "grad_norm": 0.0, + "learning_rate": 2.684560845044848e-06, + "loss": 1.1997, + "step": 26169 + }, + { + "epoch": 0.7683950907275824, + "grad_norm": 0.0, + "learning_rate": 2.683912518746383e-06, + "loss": 1.2495, + "step": 26170 + }, + { + "epoch": 0.7684244524047213, + "grad_norm": 0.0, + "learning_rate": 2.683264258609175e-06, + "loss": 1.1631, + "step": 26171 + }, + { + "epoch": 0.7684538140818603, + "grad_norm": 0.0, + "learning_rate": 2.682616064639082e-06, + "loss": 1.2432, + "step": 26172 + }, + { + "epoch": 0.7684831757589994, + "grad_norm": 0.0, + "learning_rate": 2.681967936841966e-06, + "loss": 1.165, + "step": 26173 + }, + { + "epoch": 0.7685125374361383, + "grad_norm": 0.0, + "learning_rate": 2.681319875223688e-06, + "loss": 1.0962, + "step": 26174 + }, + { + "epoch": 0.7685418991132773, + "grad_norm": 0.0, + "learning_rate": 2.6806718797901066e-06, + "loss": 1.2783, + "step": 26175 + }, + { + "epoch": 0.7685712607904164, + "grad_norm": 0.0, + "learning_rate": 2.680023950547086e-06, + "loss": 1.2852, + "step": 26176 + }, + { + "epoch": 0.7686006224675553, + "grad_norm": 0.0, + "learning_rate": 2.6793760875004803e-06, + "loss": 1.2222, + "step": 26177 + }, + { + "epoch": 0.7686299841446943, + "grad_norm": 0.0, + "learning_rate": 2.678728290656154e-06, + "loss": 1.2051, + "step": 26178 + }, + { + "epoch": 0.7686593458218334, + "grad_norm": 0.0, + "learning_rate": 2.678080560019961e-06, + "loss": 1.1294, + "step": 26179 + }, + { + "epoch": 0.7686887074989723, + "grad_norm": 0.0, + "learning_rate": 2.6774328955977626e-06, + "loss": 1.4326, + "step": 26180 + }, + { + "epoch": 0.7687180691761113, + "grad_norm": 0.0, + "learning_rate": 2.6767852973954143e-06, + "loss": 1.1616, + "step": 26181 + }, + { + "epoch": 0.7687474308532504, + "grad_norm": 0.0, + "learning_rate": 2.6761377654187724e-06, + "loss": 1.1377, + "step": 26182 + }, + { + "epoch": 0.7687767925303893, + "grad_norm": 0.0, + "learning_rate": 2.675490299673692e-06, + "loss": 1.1299, + "step": 26183 + }, + { + "epoch": 0.7688061542075283, + "grad_norm": 0.0, + "learning_rate": 2.674842900166026e-06, + "loss": 1.3213, + "step": 26184 + }, + { + "epoch": 0.7688355158846674, + "grad_norm": 0.0, + "learning_rate": 2.674195566901634e-06, + "loss": 1.1982, + "step": 26185 + }, + { + "epoch": 0.7688648775618063, + "grad_norm": 0.0, + "learning_rate": 2.673548299886366e-06, + "loss": 1.2051, + "step": 26186 + }, + { + "epoch": 0.7688942392389453, + "grad_norm": 0.0, + "learning_rate": 2.6729010991260784e-06, + "loss": 1.2793, + "step": 26187 + }, + { + "epoch": 0.7689236009160844, + "grad_norm": 0.0, + "learning_rate": 2.6722539646266232e-06, + "loss": 1.2681, + "step": 26188 + }, + { + "epoch": 0.7689529625932233, + "grad_norm": 0.0, + "learning_rate": 2.6716068963938525e-06, + "loss": 1.2334, + "step": 26189 + }, + { + "epoch": 0.7689823242703623, + "grad_norm": 0.0, + "learning_rate": 2.6709598944336147e-06, + "loss": 1.1157, + "step": 26190 + }, + { + "epoch": 0.7690116859475014, + "grad_norm": 0.0, + "learning_rate": 2.670312958751766e-06, + "loss": 1.1504, + "step": 26191 + }, + { + "epoch": 0.7690410476246403, + "grad_norm": 0.0, + "learning_rate": 2.6696660893541514e-06, + "loss": 1.0815, + "step": 26192 + }, + { + "epoch": 0.7690704093017793, + "grad_norm": 0.0, + "learning_rate": 2.669019286246627e-06, + "loss": 1.2949, + "step": 26193 + }, + { + "epoch": 0.7690997709789184, + "grad_norm": 0.0, + "learning_rate": 2.668372549435038e-06, + "loss": 1.1738, + "step": 26194 + }, + { + "epoch": 0.7691291326560573, + "grad_norm": 0.0, + "learning_rate": 2.6677258789252314e-06, + "loss": 1.1978, + "step": 26195 + }, + { + "epoch": 0.7691584943331963, + "grad_norm": 0.0, + "learning_rate": 2.66707927472306e-06, + "loss": 1.2383, + "step": 26196 + }, + { + "epoch": 0.7691878560103353, + "grad_norm": 0.0, + "learning_rate": 2.666432736834369e-06, + "loss": 1.2305, + "step": 26197 + }, + { + "epoch": 0.7692172176874743, + "grad_norm": 0.0, + "learning_rate": 2.6657862652650044e-06, + "loss": 1.2793, + "step": 26198 + }, + { + "epoch": 0.7692465793646133, + "grad_norm": 0.0, + "learning_rate": 2.66513986002081e-06, + "loss": 1.1357, + "step": 26199 + }, + { + "epoch": 0.7692759410417523, + "grad_norm": 0.0, + "learning_rate": 2.664493521107638e-06, + "loss": 1.2402, + "step": 26200 + }, + { + "epoch": 0.7693053027188913, + "grad_norm": 0.0, + "learning_rate": 2.6638472485313273e-06, + "loss": 1.2939, + "step": 26201 + }, + { + "epoch": 0.7693346643960303, + "grad_norm": 0.0, + "learning_rate": 2.663201042297726e-06, + "loss": 1.2261, + "step": 26202 + }, + { + "epoch": 0.7693640260731693, + "grad_norm": 0.0, + "learning_rate": 2.6625549024126772e-06, + "loss": 1.2891, + "step": 26203 + }, + { + "epoch": 0.7693933877503083, + "grad_norm": 0.0, + "learning_rate": 2.6619088288820215e-06, + "loss": 1.2529, + "step": 26204 + }, + { + "epoch": 0.7694227494274473, + "grad_norm": 0.0, + "learning_rate": 2.6612628217116055e-06, + "loss": 1.2383, + "step": 26205 + }, + { + "epoch": 0.7694521111045863, + "grad_norm": 0.0, + "learning_rate": 2.6606168809072708e-06, + "loss": 1.2549, + "step": 26206 + }, + { + "epoch": 0.7694814727817253, + "grad_norm": 0.0, + "learning_rate": 2.659971006474855e-06, + "loss": 1.3164, + "step": 26207 + }, + { + "epoch": 0.7695108344588643, + "grad_norm": 0.0, + "learning_rate": 2.6593251984202006e-06, + "loss": 1.2959, + "step": 26208 + }, + { + "epoch": 0.7695401961360033, + "grad_norm": 0.0, + "learning_rate": 2.6586794567491503e-06, + "loss": 1.0684, + "step": 26209 + }, + { + "epoch": 0.7695695578131423, + "grad_norm": 0.0, + "learning_rate": 2.6580337814675383e-06, + "loss": 1.1997, + "step": 26210 + }, + { + "epoch": 0.7695989194902813, + "grad_norm": 0.0, + "learning_rate": 2.65738817258121e-06, + "loss": 1.1689, + "step": 26211 + }, + { + "epoch": 0.7696282811674203, + "grad_norm": 0.0, + "learning_rate": 2.656742630096002e-06, + "loss": 1.1543, + "step": 26212 + }, + { + "epoch": 0.7696576428445593, + "grad_norm": 0.0, + "learning_rate": 2.6560971540177503e-06, + "loss": 1.0635, + "step": 26213 + }, + { + "epoch": 0.7696870045216982, + "grad_norm": 0.0, + "learning_rate": 2.655451744352293e-06, + "loss": 1.291, + "step": 26214 + }, + { + "epoch": 0.7697163661988373, + "grad_norm": 0.0, + "learning_rate": 2.6548064011054643e-06, + "loss": 1.1777, + "step": 26215 + }, + { + "epoch": 0.7697457278759763, + "grad_norm": 0.0, + "learning_rate": 2.6541611242831046e-06, + "loss": 1.168, + "step": 26216 + }, + { + "epoch": 0.7697750895531152, + "grad_norm": 0.0, + "learning_rate": 2.6535159138910436e-06, + "loss": 1.1748, + "step": 26217 + }, + { + "epoch": 0.7698044512302543, + "grad_norm": 0.0, + "learning_rate": 2.652870769935124e-06, + "loss": 1.3652, + "step": 26218 + }, + { + "epoch": 0.7698338129073933, + "grad_norm": 0.0, + "learning_rate": 2.652225692421172e-06, + "loss": 1.1797, + "step": 26219 + }, + { + "epoch": 0.7698631745845322, + "grad_norm": 0.0, + "learning_rate": 2.651580681355028e-06, + "loss": 1.2363, + "step": 26220 + }, + { + "epoch": 0.7698925362616713, + "grad_norm": 0.0, + "learning_rate": 2.6509357367425213e-06, + "loss": 1.2324, + "step": 26221 + }, + { + "epoch": 0.7699218979388103, + "grad_norm": 0.0, + "learning_rate": 2.650290858589486e-06, + "loss": 1.1216, + "step": 26222 + }, + { + "epoch": 0.7699512596159492, + "grad_norm": 0.0, + "learning_rate": 2.6496460469017517e-06, + "loss": 1.1064, + "step": 26223 + }, + { + "epoch": 0.7699806212930883, + "grad_norm": 0.0, + "learning_rate": 2.649001301685148e-06, + "loss": 1.1914, + "step": 26224 + }, + { + "epoch": 0.7700099829702273, + "grad_norm": 0.0, + "learning_rate": 2.6483566229455114e-06, + "loss": 1.25, + "step": 26225 + }, + { + "epoch": 0.7700393446473662, + "grad_norm": 0.0, + "learning_rate": 2.6477120106886655e-06, + "loss": 1.2271, + "step": 26226 + }, + { + "epoch": 0.7700687063245053, + "grad_norm": 0.0, + "learning_rate": 2.6470674649204455e-06, + "loss": 1.2012, + "step": 26227 + }, + { + "epoch": 0.7700980680016443, + "grad_norm": 0.0, + "learning_rate": 2.646422985646675e-06, + "loss": 1.2788, + "step": 26228 + }, + { + "epoch": 0.7701274296787832, + "grad_norm": 0.0, + "learning_rate": 2.6457785728731876e-06, + "loss": 1.1299, + "step": 26229 + }, + { + "epoch": 0.7701567913559223, + "grad_norm": 0.0, + "learning_rate": 2.645134226605808e-06, + "loss": 1.187, + "step": 26230 + }, + { + "epoch": 0.7701861530330613, + "grad_norm": 0.0, + "learning_rate": 2.6444899468503626e-06, + "loss": 1.167, + "step": 26231 + }, + { + "epoch": 0.7702155147102002, + "grad_norm": 0.0, + "learning_rate": 2.6438457336126756e-06, + "loss": 1.1489, + "step": 26232 + }, + { + "epoch": 0.7702448763873393, + "grad_norm": 0.0, + "learning_rate": 2.6432015868985795e-06, + "loss": 1.3018, + "step": 26233 + }, + { + "epoch": 0.7702742380644783, + "grad_norm": 0.0, + "learning_rate": 2.6425575067138943e-06, + "loss": 1.2148, + "step": 26234 + }, + { + "epoch": 0.7703035997416172, + "grad_norm": 0.0, + "learning_rate": 2.641913493064444e-06, + "loss": 1.1572, + "step": 26235 + }, + { + "epoch": 0.7703329614187563, + "grad_norm": 0.0, + "learning_rate": 2.6412695459560567e-06, + "loss": 1.2036, + "step": 26236 + }, + { + "epoch": 0.7703623230958953, + "grad_norm": 0.0, + "learning_rate": 2.640625665394554e-06, + "loss": 1.1934, + "step": 26237 + }, + { + "epoch": 0.7703916847730342, + "grad_norm": 0.0, + "learning_rate": 2.639981851385758e-06, + "loss": 1.2148, + "step": 26238 + }, + { + "epoch": 0.7704210464501733, + "grad_norm": 0.0, + "learning_rate": 2.639338103935487e-06, + "loss": 1.2881, + "step": 26239 + }, + { + "epoch": 0.7704504081273122, + "grad_norm": 0.0, + "learning_rate": 2.6386944230495705e-06, + "loss": 1.3311, + "step": 26240 + }, + { + "epoch": 0.7704797698044512, + "grad_norm": 0.0, + "learning_rate": 2.638050808733823e-06, + "loss": 1.2051, + "step": 26241 + }, + { + "epoch": 0.7705091314815903, + "grad_norm": 0.0, + "learning_rate": 2.6374072609940694e-06, + "loss": 1.0796, + "step": 26242 + }, + { + "epoch": 0.7705384931587292, + "grad_norm": 0.0, + "learning_rate": 2.636763779836129e-06, + "loss": 1.1851, + "step": 26243 + }, + { + "epoch": 0.7705678548358682, + "grad_norm": 0.0, + "learning_rate": 2.636120365265815e-06, + "loss": 1.3262, + "step": 26244 + }, + { + "epoch": 0.7705972165130073, + "grad_norm": 0.0, + "learning_rate": 2.6354770172889542e-06, + "loss": 1.2949, + "step": 26245 + }, + { + "epoch": 0.7706265781901462, + "grad_norm": 0.0, + "learning_rate": 2.6348337359113607e-06, + "loss": 1.2539, + "step": 26246 + }, + { + "epoch": 0.7706559398672852, + "grad_norm": 0.0, + "learning_rate": 2.634190521138852e-06, + "loss": 1.3066, + "step": 26247 + }, + { + "epoch": 0.7706853015444243, + "grad_norm": 0.0, + "learning_rate": 2.633547372977242e-06, + "loss": 1.2734, + "step": 26248 + }, + { + "epoch": 0.7707146632215632, + "grad_norm": 0.0, + "learning_rate": 2.632904291432352e-06, + "loss": 1.2002, + "step": 26249 + }, + { + "epoch": 0.7707440248987022, + "grad_norm": 0.0, + "learning_rate": 2.632261276509993e-06, + "loss": 1.248, + "step": 26250 + }, + { + "epoch": 0.7707733865758412, + "grad_norm": 0.0, + "learning_rate": 2.6316183282159857e-06, + "loss": 1.27, + "step": 26251 + }, + { + "epoch": 0.7708027482529802, + "grad_norm": 0.0, + "learning_rate": 2.6309754465561398e-06, + "loss": 1.2861, + "step": 26252 + }, + { + "epoch": 0.7708321099301192, + "grad_norm": 0.0, + "learning_rate": 2.6303326315362675e-06, + "loss": 1.2324, + "step": 26253 + }, + { + "epoch": 0.7708614716072582, + "grad_norm": 0.0, + "learning_rate": 2.6296898831621876e-06, + "loss": 1.2681, + "step": 26254 + }, + { + "epoch": 0.7708908332843972, + "grad_norm": 0.0, + "learning_rate": 2.6290472014397094e-06, + "loss": 1.3486, + "step": 26255 + }, + { + "epoch": 0.7709201949615362, + "grad_norm": 0.0, + "learning_rate": 2.6284045863746457e-06, + "loss": 1.2793, + "step": 26256 + }, + { + "epoch": 0.7709495566386751, + "grad_norm": 0.0, + "learning_rate": 2.627762037972804e-06, + "loss": 1.3252, + "step": 26257 + }, + { + "epoch": 0.7709789183158142, + "grad_norm": 0.0, + "learning_rate": 2.627119556240001e-06, + "loss": 1.1108, + "step": 26258 + }, + { + "epoch": 0.7710082799929532, + "grad_norm": 0.0, + "learning_rate": 2.626477141182041e-06, + "loss": 1.1592, + "step": 26259 + }, + { + "epoch": 0.7710376416700921, + "grad_norm": 0.0, + "learning_rate": 2.62583479280474e-06, + "loss": 1.2803, + "step": 26260 + }, + { + "epoch": 0.7710670033472312, + "grad_norm": 0.0, + "learning_rate": 2.625192511113901e-06, + "loss": 1.1743, + "step": 26261 + }, + { + "epoch": 0.7710963650243702, + "grad_norm": 0.0, + "learning_rate": 2.62455029611534e-06, + "loss": 1.2158, + "step": 26262 + }, + { + "epoch": 0.7711257267015091, + "grad_norm": 0.0, + "learning_rate": 2.6239081478148563e-06, + "loss": 1.2773, + "step": 26263 + }, + { + "epoch": 0.7711550883786482, + "grad_norm": 0.0, + "learning_rate": 2.6232660662182574e-06, + "loss": 1.1733, + "step": 26264 + }, + { + "epoch": 0.7711844500557872, + "grad_norm": 0.0, + "learning_rate": 2.622624051331356e-06, + "loss": 1.0591, + "step": 26265 + }, + { + "epoch": 0.7712138117329261, + "grad_norm": 0.0, + "learning_rate": 2.6219821031599525e-06, + "loss": 1.1938, + "step": 26266 + }, + { + "epoch": 0.7712431734100652, + "grad_norm": 0.0, + "learning_rate": 2.621340221709856e-06, + "loss": 1.1875, + "step": 26267 + }, + { + "epoch": 0.7712725350872042, + "grad_norm": 0.0, + "learning_rate": 2.6206984069868668e-06, + "loss": 1.2178, + "step": 26268 + }, + { + "epoch": 0.7713018967643431, + "grad_norm": 0.0, + "learning_rate": 2.620056658996796e-06, + "loss": 1.1992, + "step": 26269 + }, + { + "epoch": 0.7713312584414822, + "grad_norm": 0.0, + "learning_rate": 2.6194149777454414e-06, + "loss": 1.2124, + "step": 26270 + }, + { + "epoch": 0.7713606201186212, + "grad_norm": 0.0, + "learning_rate": 2.618773363238608e-06, + "loss": 1.2588, + "step": 26271 + }, + { + "epoch": 0.7713899817957601, + "grad_norm": 0.0, + "learning_rate": 2.618131815482098e-06, + "loss": 1.2183, + "step": 26272 + }, + { + "epoch": 0.7714193434728992, + "grad_norm": 0.0, + "learning_rate": 2.6174903344817083e-06, + "loss": 1.3008, + "step": 26273 + }, + { + "epoch": 0.7714487051500382, + "grad_norm": 0.0, + "learning_rate": 2.616848920243248e-06, + "loss": 1.1392, + "step": 26274 + }, + { + "epoch": 0.7714780668271771, + "grad_norm": 0.0, + "learning_rate": 2.61620757277251e-06, + "loss": 1.1777, + "step": 26275 + }, + { + "epoch": 0.7715074285043162, + "grad_norm": 0.0, + "learning_rate": 2.615566292075301e-06, + "loss": 1.3535, + "step": 26276 + }, + { + "epoch": 0.7715367901814552, + "grad_norm": 0.0, + "learning_rate": 2.6149250781574154e-06, + "loss": 1.2549, + "step": 26277 + }, + { + "epoch": 0.7715661518585941, + "grad_norm": 0.0, + "learning_rate": 2.614283931024656e-06, + "loss": 1.1934, + "step": 26278 + }, + { + "epoch": 0.7715955135357332, + "grad_norm": 0.0, + "learning_rate": 2.6136428506828183e-06, + "loss": 1.3105, + "step": 26279 + }, + { + "epoch": 0.7716248752128722, + "grad_norm": 0.0, + "learning_rate": 2.6130018371376996e-06, + "loss": 1.0923, + "step": 26280 + }, + { + "epoch": 0.7716542368900111, + "grad_norm": 0.0, + "learning_rate": 2.6123608903950947e-06, + "loss": 1.2358, + "step": 26281 + }, + { + "epoch": 0.7716835985671502, + "grad_norm": 0.0, + "learning_rate": 2.6117200104608053e-06, + "loss": 1.209, + "step": 26282 + }, + { + "epoch": 0.7717129602442891, + "grad_norm": 0.0, + "learning_rate": 2.6110791973406247e-06, + "loss": 1.2222, + "step": 26283 + }, + { + "epoch": 0.7717423219214281, + "grad_norm": 0.0, + "learning_rate": 2.6104384510403436e-06, + "loss": 1.2295, + "step": 26284 + }, + { + "epoch": 0.7717716835985672, + "grad_norm": 0.0, + "learning_rate": 2.609797771565763e-06, + "loss": 1.2095, + "step": 26285 + }, + { + "epoch": 0.7718010452757061, + "grad_norm": 0.0, + "learning_rate": 2.6091571589226706e-06, + "loss": 1.1655, + "step": 26286 + }, + { + "epoch": 0.7718304069528451, + "grad_norm": 0.0, + "learning_rate": 2.6085166131168682e-06, + "loss": 1.2246, + "step": 26287 + }, + { + "epoch": 0.7718597686299842, + "grad_norm": 0.0, + "learning_rate": 2.607876134154138e-06, + "loss": 1.1729, + "step": 26288 + }, + { + "epoch": 0.7718891303071231, + "grad_norm": 0.0, + "learning_rate": 2.60723572204028e-06, + "loss": 1.2725, + "step": 26289 + }, + { + "epoch": 0.7719184919842621, + "grad_norm": 0.0, + "learning_rate": 2.6065953767810803e-06, + "loss": 1.3232, + "step": 26290 + }, + { + "epoch": 0.7719478536614012, + "grad_norm": 0.0, + "learning_rate": 2.6059550983823334e-06, + "loss": 1.2324, + "step": 26291 + }, + { + "epoch": 0.7719772153385401, + "grad_norm": 0.0, + "learning_rate": 2.60531488684983e-06, + "loss": 1.2012, + "step": 26292 + }, + { + "epoch": 0.7720065770156791, + "grad_norm": 0.0, + "learning_rate": 2.6046747421893537e-06, + "loss": 1.0928, + "step": 26293 + }, + { + "epoch": 0.7720359386928182, + "grad_norm": 0.0, + "learning_rate": 2.6040346644067015e-06, + "loss": 1.2407, + "step": 26294 + }, + { + "epoch": 0.7720653003699571, + "grad_norm": 0.0, + "learning_rate": 2.6033946535076572e-06, + "loss": 1.3379, + "step": 26295 + }, + { + "epoch": 0.7720946620470961, + "grad_norm": 0.0, + "learning_rate": 2.60275470949801e-06, + "loss": 1.1855, + "step": 26296 + }, + { + "epoch": 0.7721240237242352, + "grad_norm": 0.0, + "learning_rate": 2.6021148323835434e-06, + "loss": 1.2842, + "step": 26297 + }, + { + "epoch": 0.7721533854013741, + "grad_norm": 0.0, + "learning_rate": 2.6014750221700493e-06, + "loss": 1.2129, + "step": 26298 + }, + { + "epoch": 0.7721827470785131, + "grad_norm": 0.0, + "learning_rate": 2.6008352788633083e-06, + "loss": 1.1895, + "step": 26299 + }, + { + "epoch": 0.7722121087556522, + "grad_norm": 0.0, + "learning_rate": 2.6001956024691123e-06, + "loss": 1.1704, + "step": 26300 + }, + { + "epoch": 0.7722414704327911, + "grad_norm": 0.0, + "learning_rate": 2.5995559929932402e-06, + "loss": 1.0586, + "step": 26301 + }, + { + "epoch": 0.7722708321099301, + "grad_norm": 0.0, + "learning_rate": 2.598916450441481e-06, + "loss": 1.2163, + "step": 26302 + }, + { + "epoch": 0.7723001937870692, + "grad_norm": 0.0, + "learning_rate": 2.5982769748196156e-06, + "loss": 1.147, + "step": 26303 + }, + { + "epoch": 0.7723295554642081, + "grad_norm": 0.0, + "learning_rate": 2.5976375661334263e-06, + "loss": 1.1621, + "step": 26304 + }, + { + "epoch": 0.7723589171413471, + "grad_norm": 0.0, + "learning_rate": 2.596998224388698e-06, + "loss": 1.3135, + "step": 26305 + }, + { + "epoch": 0.7723882788184862, + "grad_norm": 0.0, + "learning_rate": 2.596358949591207e-06, + "loss": 1.166, + "step": 26306 + }, + { + "epoch": 0.7724176404956251, + "grad_norm": 0.0, + "learning_rate": 2.5957197417467408e-06, + "loss": 1.1587, + "step": 26307 + }, + { + "epoch": 0.7724470021727641, + "grad_norm": 0.0, + "learning_rate": 2.595080600861074e-06, + "loss": 1.2295, + "step": 26308 + }, + { + "epoch": 0.7724763638499031, + "grad_norm": 0.0, + "learning_rate": 2.5944415269399937e-06, + "loss": 1.252, + "step": 26309 + }, + { + "epoch": 0.7725057255270421, + "grad_norm": 0.0, + "learning_rate": 2.5938025199892714e-06, + "loss": 1.1206, + "step": 26310 + }, + { + "epoch": 0.7725350872041811, + "grad_norm": 0.0, + "learning_rate": 2.5931635800146927e-06, + "loss": 1.144, + "step": 26311 + }, + { + "epoch": 0.7725644488813201, + "grad_norm": 0.0, + "learning_rate": 2.592524707022036e-06, + "loss": 1.1724, + "step": 26312 + }, + { + "epoch": 0.7725938105584591, + "grad_norm": 0.0, + "learning_rate": 2.5918859010170695e-06, + "loss": 1.3115, + "step": 26313 + }, + { + "epoch": 0.7726231722355981, + "grad_norm": 0.0, + "learning_rate": 2.591247162005579e-06, + "loss": 1.3496, + "step": 26314 + }, + { + "epoch": 0.7726525339127371, + "grad_norm": 0.0, + "learning_rate": 2.5906084899933346e-06, + "loss": 1.1489, + "step": 26315 + }, + { + "epoch": 0.7726818955898761, + "grad_norm": 0.0, + "learning_rate": 2.589969884986119e-06, + "loss": 1.0767, + "step": 26316 + }, + { + "epoch": 0.7727112572670151, + "grad_norm": 0.0, + "learning_rate": 2.5893313469896995e-06, + "loss": 1.1973, + "step": 26317 + }, + { + "epoch": 0.7727406189441541, + "grad_norm": 0.0, + "learning_rate": 2.5886928760098585e-06, + "loss": 1.312, + "step": 26318 + }, + { + "epoch": 0.7727699806212931, + "grad_norm": 0.0, + "learning_rate": 2.5880544720523647e-06, + "loss": 1.2466, + "step": 26319 + }, + { + "epoch": 0.7727993422984321, + "grad_norm": 0.0, + "learning_rate": 2.587416135122993e-06, + "loss": 1.166, + "step": 26320 + }, + { + "epoch": 0.7728287039755711, + "grad_norm": 0.0, + "learning_rate": 2.5867778652275165e-06, + "loss": 1.2031, + "step": 26321 + }, + { + "epoch": 0.7728580656527101, + "grad_norm": 0.0, + "learning_rate": 2.5861396623717015e-06, + "loss": 1.1377, + "step": 26322 + }, + { + "epoch": 0.772887427329849, + "grad_norm": 0.0, + "learning_rate": 2.585501526561329e-06, + "loss": 1.0962, + "step": 26323 + }, + { + "epoch": 0.7729167890069881, + "grad_norm": 0.0, + "learning_rate": 2.5848634578021613e-06, + "loss": 1.3369, + "step": 26324 + }, + { + "epoch": 0.7729461506841271, + "grad_norm": 0.0, + "learning_rate": 2.584225456099976e-06, + "loss": 1.1714, + "step": 26325 + }, + { + "epoch": 0.772975512361266, + "grad_norm": 0.0, + "learning_rate": 2.5835875214605353e-06, + "loss": 1.0938, + "step": 26326 + }, + { + "epoch": 0.7730048740384051, + "grad_norm": 0.0, + "learning_rate": 2.582949653889615e-06, + "loss": 1.1846, + "step": 26327 + }, + { + "epoch": 0.7730342357155441, + "grad_norm": 0.0, + "learning_rate": 2.5823118533929805e-06, + "loss": 1.2998, + "step": 26328 + }, + { + "epoch": 0.773063597392683, + "grad_norm": 0.0, + "learning_rate": 2.5816741199764007e-06, + "loss": 1.2017, + "step": 26329 + }, + { + "epoch": 0.7730929590698221, + "grad_norm": 0.0, + "learning_rate": 2.5810364536456377e-06, + "loss": 1.1279, + "step": 26330 + }, + { + "epoch": 0.7731223207469611, + "grad_norm": 0.0, + "learning_rate": 2.580398854406465e-06, + "loss": 1.2334, + "step": 26331 + }, + { + "epoch": 0.7731516824241, + "grad_norm": 0.0, + "learning_rate": 2.5797613222646456e-06, + "loss": 1.229, + "step": 26332 + }, + { + "epoch": 0.7731810441012391, + "grad_norm": 0.0, + "learning_rate": 2.5791238572259427e-06, + "loss": 1.1973, + "step": 26333 + }, + { + "epoch": 0.7732104057783781, + "grad_norm": 0.0, + "learning_rate": 2.5784864592961258e-06, + "loss": 1.1455, + "step": 26334 + }, + { + "epoch": 0.773239767455517, + "grad_norm": 0.0, + "learning_rate": 2.577849128480954e-06, + "loss": 1.3457, + "step": 26335 + }, + { + "epoch": 0.7732691291326561, + "grad_norm": 0.0, + "learning_rate": 2.5772118647861956e-06, + "loss": 1.0439, + "step": 26336 + }, + { + "epoch": 0.7732984908097951, + "grad_norm": 0.0, + "learning_rate": 2.5765746682176117e-06, + "loss": 1.1943, + "step": 26337 + }, + { + "epoch": 0.773327852486934, + "grad_norm": 0.0, + "learning_rate": 2.5759375387809637e-06, + "loss": 1.2178, + "step": 26338 + }, + { + "epoch": 0.7733572141640731, + "grad_norm": 0.0, + "learning_rate": 2.575300476482011e-06, + "loss": 1.2539, + "step": 26339 + }, + { + "epoch": 0.7733865758412121, + "grad_norm": 0.0, + "learning_rate": 2.5746634813265215e-06, + "loss": 1.146, + "step": 26340 + }, + { + "epoch": 0.773415937518351, + "grad_norm": 0.0, + "learning_rate": 2.574026553320251e-06, + "loss": 1.1479, + "step": 26341 + }, + { + "epoch": 0.7734452991954901, + "grad_norm": 0.0, + "learning_rate": 2.5733896924689573e-06, + "loss": 1.3789, + "step": 26342 + }, + { + "epoch": 0.7734746608726291, + "grad_norm": 0.0, + "learning_rate": 2.5727528987784057e-06, + "loss": 1.3184, + "step": 26343 + }, + { + "epoch": 0.773504022549768, + "grad_norm": 0.0, + "learning_rate": 2.572116172254352e-06, + "loss": 1.2129, + "step": 26344 + }, + { + "epoch": 0.7735333842269071, + "grad_norm": 0.0, + "learning_rate": 2.571479512902554e-06, + "loss": 1.125, + "step": 26345 + }, + { + "epoch": 0.7735627459040461, + "grad_norm": 0.0, + "learning_rate": 2.5708429207287654e-06, + "loss": 1.2168, + "step": 26346 + }, + { + "epoch": 0.773592107581185, + "grad_norm": 0.0, + "learning_rate": 2.570206395738751e-06, + "loss": 1.2344, + "step": 26347 + }, + { + "epoch": 0.7736214692583241, + "grad_norm": 0.0, + "learning_rate": 2.56956993793826e-06, + "loss": 1.3379, + "step": 26348 + }, + { + "epoch": 0.773650830935463, + "grad_norm": 0.0, + "learning_rate": 2.5689335473330533e-06, + "loss": 1.2949, + "step": 26349 + }, + { + "epoch": 0.773680192612602, + "grad_norm": 0.0, + "learning_rate": 2.5682972239288807e-06, + "loss": 1.2373, + "step": 26350 + }, + { + "epoch": 0.773709554289741, + "grad_norm": 0.0, + "learning_rate": 2.5676609677315035e-06, + "loss": 1.2373, + "step": 26351 + }, + { + "epoch": 0.77373891596688, + "grad_norm": 0.0, + "learning_rate": 2.5670247787466706e-06, + "loss": 1.1865, + "step": 26352 + }, + { + "epoch": 0.773768277644019, + "grad_norm": 0.0, + "learning_rate": 2.5663886569801366e-06, + "loss": 1.292, + "step": 26353 + }, + { + "epoch": 0.773797639321158, + "grad_norm": 0.0, + "learning_rate": 2.5657526024376533e-06, + "loss": 1.2471, + "step": 26354 + }, + { + "epoch": 0.773827000998297, + "grad_norm": 0.0, + "learning_rate": 2.565116615124971e-06, + "loss": 1.2598, + "step": 26355 + }, + { + "epoch": 0.773856362675436, + "grad_norm": 0.0, + "learning_rate": 2.5644806950478452e-06, + "loss": 1.3135, + "step": 26356 + }, + { + "epoch": 0.773885724352575, + "grad_norm": 0.0, + "learning_rate": 2.563844842212023e-06, + "loss": 1.2158, + "step": 26357 + }, + { + "epoch": 0.773915086029714, + "grad_norm": 0.0, + "learning_rate": 2.563209056623258e-06, + "loss": 1.207, + "step": 26358 + }, + { + "epoch": 0.773944447706853, + "grad_norm": 0.0, + "learning_rate": 2.5625733382872965e-06, + "loss": 1.2881, + "step": 26359 + }, + { + "epoch": 0.773973809383992, + "grad_norm": 0.0, + "learning_rate": 2.561937687209891e-06, + "loss": 1.2559, + "step": 26360 + }, + { + "epoch": 0.774003171061131, + "grad_norm": 0.0, + "learning_rate": 2.561302103396789e-06, + "loss": 1.3262, + "step": 26361 + }, + { + "epoch": 0.77403253273827, + "grad_norm": 0.0, + "learning_rate": 2.560666586853736e-06, + "loss": 1.1357, + "step": 26362 + }, + { + "epoch": 0.774061894415409, + "grad_norm": 0.0, + "learning_rate": 2.5600311375864808e-06, + "loss": 1.2153, + "step": 26363 + }, + { + "epoch": 0.774091256092548, + "grad_norm": 0.0, + "learning_rate": 2.5593957556007666e-06, + "loss": 1.2812, + "step": 26364 + }, + { + "epoch": 0.774120617769687, + "grad_norm": 0.0, + "learning_rate": 2.5587604409023447e-06, + "loss": 1.1748, + "step": 26365 + }, + { + "epoch": 0.774149979446826, + "grad_norm": 0.0, + "learning_rate": 2.558125193496954e-06, + "loss": 1.3066, + "step": 26366 + }, + { + "epoch": 0.774179341123965, + "grad_norm": 0.0, + "learning_rate": 2.557490013390348e-06, + "loss": 1.1582, + "step": 26367 + }, + { + "epoch": 0.774208702801104, + "grad_norm": 0.0, + "learning_rate": 2.5568549005882647e-06, + "loss": 1.2861, + "step": 26368 + }, + { + "epoch": 0.774238064478243, + "grad_norm": 0.0, + "learning_rate": 2.556219855096449e-06, + "loss": 1.1279, + "step": 26369 + }, + { + "epoch": 0.774267426155382, + "grad_norm": 0.0, + "learning_rate": 2.555584876920644e-06, + "loss": 1.2559, + "step": 26370 + }, + { + "epoch": 0.774296787832521, + "grad_norm": 0.0, + "learning_rate": 2.554949966066588e-06, + "loss": 1.207, + "step": 26371 + }, + { + "epoch": 0.77432614950966, + "grad_norm": 0.0, + "learning_rate": 2.5543151225400287e-06, + "loss": 1.1318, + "step": 26372 + }, + { + "epoch": 0.774355511186799, + "grad_norm": 0.0, + "learning_rate": 2.553680346346702e-06, + "loss": 1.144, + "step": 26373 + }, + { + "epoch": 0.774384872863938, + "grad_norm": 0.0, + "learning_rate": 2.5530456374923542e-06, + "loss": 1.229, + "step": 26374 + }, + { + "epoch": 0.7744142345410769, + "grad_norm": 0.0, + "learning_rate": 2.5524109959827192e-06, + "loss": 1.1729, + "step": 26375 + }, + { + "epoch": 0.774443596218216, + "grad_norm": 0.0, + "learning_rate": 2.551776421823542e-06, + "loss": 1.2939, + "step": 26376 + }, + { + "epoch": 0.774472957895355, + "grad_norm": 0.0, + "learning_rate": 2.5511419150205576e-06, + "loss": 1.2549, + "step": 26377 + }, + { + "epoch": 0.7745023195724939, + "grad_norm": 0.0, + "learning_rate": 2.5505074755795045e-06, + "loss": 1.168, + "step": 26378 + }, + { + "epoch": 0.774531681249633, + "grad_norm": 0.0, + "learning_rate": 2.5498731035061165e-06, + "loss": 1.1714, + "step": 26379 + }, + { + "epoch": 0.774561042926772, + "grad_norm": 0.0, + "learning_rate": 2.5492387988061386e-06, + "loss": 1.2852, + "step": 26380 + }, + { + "epoch": 0.7745904046039109, + "grad_norm": 0.0, + "learning_rate": 2.5486045614853007e-06, + "loss": 1.332, + "step": 26381 + }, + { + "epoch": 0.77461976628105, + "grad_norm": 0.0, + "learning_rate": 2.547970391549338e-06, + "loss": 1.3057, + "step": 26382 + }, + { + "epoch": 0.774649127958189, + "grad_norm": 0.0, + "learning_rate": 2.547336289003991e-06, + "loss": 1.1914, + "step": 26383 + }, + { + "epoch": 0.7746784896353279, + "grad_norm": 0.0, + "learning_rate": 2.5467022538549867e-06, + "loss": 1.3438, + "step": 26384 + }, + { + "epoch": 0.774707851312467, + "grad_norm": 0.0, + "learning_rate": 2.5460682861080666e-06, + "loss": 1.3008, + "step": 26385 + }, + { + "epoch": 0.774737212989606, + "grad_norm": 0.0, + "learning_rate": 2.545434385768959e-06, + "loss": 1.0947, + "step": 26386 + }, + { + "epoch": 0.7747665746667449, + "grad_norm": 0.0, + "learning_rate": 2.5448005528433984e-06, + "loss": 1.1895, + "step": 26387 + }, + { + "epoch": 0.774795936343884, + "grad_norm": 0.0, + "learning_rate": 2.5441667873371124e-06, + "loss": 1.2168, + "step": 26388 + }, + { + "epoch": 0.774825298021023, + "grad_norm": 0.0, + "learning_rate": 2.5435330892558373e-06, + "loss": 1.1548, + "step": 26389 + }, + { + "epoch": 0.7748546596981619, + "grad_norm": 0.0, + "learning_rate": 2.5428994586053035e-06, + "loss": 1.2451, + "step": 26390 + }, + { + "epoch": 0.774884021375301, + "grad_norm": 0.0, + "learning_rate": 2.542265895391237e-06, + "loss": 1.168, + "step": 26391 + }, + { + "epoch": 0.77491338305244, + "grad_norm": 0.0, + "learning_rate": 2.5416323996193728e-06, + "loss": 1.228, + "step": 26392 + }, + { + "epoch": 0.7749427447295789, + "grad_norm": 0.0, + "learning_rate": 2.5409989712954354e-06, + "loss": 1.29, + "step": 26393 + }, + { + "epoch": 0.774972106406718, + "grad_norm": 0.0, + "learning_rate": 2.540365610425156e-06, + "loss": 1.1934, + "step": 26394 + }, + { + "epoch": 0.775001468083857, + "grad_norm": 0.0, + "learning_rate": 2.5397323170142575e-06, + "loss": 1.1895, + "step": 26395 + }, + { + "epoch": 0.7750308297609959, + "grad_norm": 0.0, + "learning_rate": 2.539099091068473e-06, + "loss": 1.2554, + "step": 26396 + }, + { + "epoch": 0.775060191438135, + "grad_norm": 0.0, + "learning_rate": 2.5384659325935236e-06, + "loss": 1.2891, + "step": 26397 + }, + { + "epoch": 0.7750895531152739, + "grad_norm": 0.0, + "learning_rate": 2.5378328415951403e-06, + "loss": 1.1626, + "step": 26398 + }, + { + "epoch": 0.7751189147924129, + "grad_norm": 0.0, + "learning_rate": 2.5371998180790434e-06, + "loss": 1.3486, + "step": 26399 + }, + { + "epoch": 0.775148276469552, + "grad_norm": 0.0, + "learning_rate": 2.536566862050962e-06, + "loss": 1.1948, + "step": 26400 + }, + { + "epoch": 0.7751776381466909, + "grad_norm": 0.0, + "learning_rate": 2.535933973516619e-06, + "loss": 1.2207, + "step": 26401 + }, + { + "epoch": 0.7752069998238299, + "grad_norm": 0.0, + "learning_rate": 2.5353011524817352e-06, + "loss": 1.2754, + "step": 26402 + }, + { + "epoch": 0.775236361500969, + "grad_norm": 0.0, + "learning_rate": 2.534668398952036e-06, + "loss": 1.1885, + "step": 26403 + }, + { + "epoch": 0.7752657231781079, + "grad_norm": 0.0, + "learning_rate": 2.534035712933238e-06, + "loss": 1.1514, + "step": 26404 + }, + { + "epoch": 0.7752950848552469, + "grad_norm": 0.0, + "learning_rate": 2.5334030944310718e-06, + "loss": 1.1934, + "step": 26405 + }, + { + "epoch": 0.775324446532386, + "grad_norm": 0.0, + "learning_rate": 2.5327705434512497e-06, + "loss": 1.2324, + "step": 26406 + }, + { + "epoch": 0.7753538082095249, + "grad_norm": 0.0, + "learning_rate": 2.5321380599994993e-06, + "loss": 1.1299, + "step": 26407 + }, + { + "epoch": 0.7753831698866639, + "grad_norm": 0.0, + "learning_rate": 2.5315056440815332e-06, + "loss": 1.2705, + "step": 26408 + }, + { + "epoch": 0.775412531563803, + "grad_norm": 0.0, + "learning_rate": 2.530873295703078e-06, + "loss": 1.2402, + "step": 26409 + }, + { + "epoch": 0.7754418932409419, + "grad_norm": 0.0, + "learning_rate": 2.5302410148698485e-06, + "loss": 1.2295, + "step": 26410 + }, + { + "epoch": 0.7754712549180809, + "grad_norm": 0.0, + "learning_rate": 2.529608801587562e-06, + "loss": 1.1602, + "step": 26411 + }, + { + "epoch": 0.77550061659522, + "grad_norm": 0.0, + "learning_rate": 2.5289766558619365e-06, + "loss": 1.3076, + "step": 26412 + }, + { + "epoch": 0.7755299782723589, + "grad_norm": 0.0, + "learning_rate": 2.528344577698686e-06, + "loss": 1.2012, + "step": 26413 + }, + { + "epoch": 0.7755593399494979, + "grad_norm": 0.0, + "learning_rate": 2.527712567103532e-06, + "loss": 1.2744, + "step": 26414 + }, + { + "epoch": 0.775588701626637, + "grad_norm": 0.0, + "learning_rate": 2.5270806240821833e-06, + "loss": 1.3643, + "step": 26415 + }, + { + "epoch": 0.7756180633037759, + "grad_norm": 0.0, + "learning_rate": 2.526448748640361e-06, + "loss": 1.0742, + "step": 26416 + }, + { + "epoch": 0.7756474249809149, + "grad_norm": 0.0, + "learning_rate": 2.5258169407837774e-06, + "loss": 1.1724, + "step": 26417 + }, + { + "epoch": 0.775676786658054, + "grad_norm": 0.0, + "learning_rate": 2.5251852005181454e-06, + "loss": 1.1133, + "step": 26418 + }, + { + "epoch": 0.7757061483351929, + "grad_norm": 0.0, + "learning_rate": 2.524553527849175e-06, + "loss": 1.3535, + "step": 26419 + }, + { + "epoch": 0.7757355100123319, + "grad_norm": 0.0, + "learning_rate": 2.523921922782584e-06, + "loss": 1.2852, + "step": 26420 + }, + { + "epoch": 0.775764871689471, + "grad_norm": 0.0, + "learning_rate": 2.523290385324082e-06, + "loss": 1.2559, + "step": 26421 + }, + { + "epoch": 0.7757942333666099, + "grad_norm": 0.0, + "learning_rate": 2.5226589154793768e-06, + "loss": 1.2456, + "step": 26422 + }, + { + "epoch": 0.7758235950437489, + "grad_norm": 0.0, + "learning_rate": 2.5220275132541848e-06, + "loss": 1.248, + "step": 26423 + }, + { + "epoch": 0.7758529567208879, + "grad_norm": 0.0, + "learning_rate": 2.5213961786542106e-06, + "loss": 1.1025, + "step": 26424 + }, + { + "epoch": 0.7758823183980269, + "grad_norm": 0.0, + "learning_rate": 2.5207649116851696e-06, + "loss": 0.9702, + "step": 26425 + }, + { + "epoch": 0.7759116800751659, + "grad_norm": 0.0, + "learning_rate": 2.5201337123527656e-06, + "loss": 1.2822, + "step": 26426 + }, + { + "epoch": 0.7759410417523049, + "grad_norm": 0.0, + "learning_rate": 2.5195025806627082e-06, + "loss": 1.2822, + "step": 26427 + }, + { + "epoch": 0.7759704034294439, + "grad_norm": 0.0, + "learning_rate": 2.5188715166207034e-06, + "loss": 1.2354, + "step": 26428 + }, + { + "epoch": 0.7759997651065829, + "grad_norm": 0.0, + "learning_rate": 2.5182405202324613e-06, + "loss": 1.1914, + "step": 26429 + }, + { + "epoch": 0.7760291267837219, + "grad_norm": 0.0, + "learning_rate": 2.5176095915036856e-06, + "loss": 1.2559, + "step": 26430 + }, + { + "epoch": 0.7760584884608609, + "grad_norm": 0.0, + "learning_rate": 2.5169787304400805e-06, + "loss": 1.1582, + "step": 26431 + }, + { + "epoch": 0.7760878501379999, + "grad_norm": 0.0, + "learning_rate": 2.516347937047355e-06, + "loss": 1.2656, + "step": 26432 + }, + { + "epoch": 0.7761172118151389, + "grad_norm": 0.0, + "learning_rate": 2.51571721133121e-06, + "loss": 1.2568, + "step": 26433 + }, + { + "epoch": 0.7761465734922779, + "grad_norm": 0.0, + "learning_rate": 2.5150865532973544e-06, + "loss": 1.1807, + "step": 26434 + }, + { + "epoch": 0.7761759351694169, + "grad_norm": 0.0, + "learning_rate": 2.5144559629514877e-06, + "loss": 1.1733, + "step": 26435 + }, + { + "epoch": 0.7762052968465559, + "grad_norm": 0.0, + "learning_rate": 2.5138254402993113e-06, + "loss": 1.0757, + "step": 26436 + }, + { + "epoch": 0.7762346585236949, + "grad_norm": 0.0, + "learning_rate": 2.513194985346528e-06, + "loss": 1.1846, + "step": 26437 + }, + { + "epoch": 0.7762640202008338, + "grad_norm": 0.0, + "learning_rate": 2.512564598098841e-06, + "loss": 1.0063, + "step": 26438 + }, + { + "epoch": 0.7762933818779729, + "grad_norm": 0.0, + "learning_rate": 2.5119342785619505e-06, + "loss": 1.1895, + "step": 26439 + }, + { + "epoch": 0.7763227435551119, + "grad_norm": 0.0, + "learning_rate": 2.511304026741553e-06, + "loss": 1.209, + "step": 26440 + }, + { + "epoch": 0.7763521052322508, + "grad_norm": 0.0, + "learning_rate": 2.5106738426433543e-06, + "loss": 1.1714, + "step": 26441 + }, + { + "epoch": 0.7763814669093899, + "grad_norm": 0.0, + "learning_rate": 2.5100437262730492e-06, + "loss": 1.1582, + "step": 26442 + }, + { + "epoch": 0.7764108285865289, + "grad_norm": 0.0, + "learning_rate": 2.5094136776363378e-06, + "loss": 1.272, + "step": 26443 + }, + { + "epoch": 0.7764401902636678, + "grad_norm": 0.0, + "learning_rate": 2.5087836967389136e-06, + "loss": 1.2607, + "step": 26444 + }, + { + "epoch": 0.7764695519408069, + "grad_norm": 0.0, + "learning_rate": 2.50815378358648e-06, + "loss": 1.1567, + "step": 26445 + }, + { + "epoch": 0.7764989136179459, + "grad_norm": 0.0, + "learning_rate": 2.507523938184728e-06, + "loss": 1.1758, + "step": 26446 + }, + { + "epoch": 0.7765282752950848, + "grad_norm": 0.0, + "learning_rate": 2.506894160539358e-06, + "loss": 1.2666, + "step": 26447 + }, + { + "epoch": 0.7765576369722239, + "grad_norm": 0.0, + "learning_rate": 2.5062644506560605e-06, + "loss": 1.1758, + "step": 26448 + }, + { + "epoch": 0.7765869986493629, + "grad_norm": 0.0, + "learning_rate": 2.5056348085405356e-06, + "loss": 1.1426, + "step": 26449 + }, + { + "epoch": 0.7766163603265018, + "grad_norm": 0.0, + "learning_rate": 2.505005234198474e-06, + "loss": 1.1782, + "step": 26450 + }, + { + "epoch": 0.7766457220036408, + "grad_norm": 0.0, + "learning_rate": 2.5043757276355706e-06, + "loss": 1.27, + "step": 26451 + }, + { + "epoch": 0.7766750836807799, + "grad_norm": 0.0, + "learning_rate": 2.503746288857517e-06, + "loss": 1.3037, + "step": 26452 + }, + { + "epoch": 0.7767044453579188, + "grad_norm": 0.0, + "learning_rate": 2.503116917870002e-06, + "loss": 1.2793, + "step": 26453 + }, + { + "epoch": 0.7767338070350578, + "grad_norm": 0.0, + "learning_rate": 2.5024876146787236e-06, + "loss": 1.29, + "step": 26454 + }, + { + "epoch": 0.7767631687121969, + "grad_norm": 0.0, + "learning_rate": 2.501858379289368e-06, + "loss": 1.2559, + "step": 26455 + }, + { + "epoch": 0.7767925303893358, + "grad_norm": 0.0, + "learning_rate": 2.5012292117076287e-06, + "loss": 1.1675, + "step": 26456 + }, + { + "epoch": 0.7768218920664748, + "grad_norm": 0.0, + "learning_rate": 2.500600111939192e-06, + "loss": 1.2549, + "step": 26457 + }, + { + "epoch": 0.7768512537436139, + "grad_norm": 0.0, + "learning_rate": 2.4999710799897525e-06, + "loss": 1.2124, + "step": 26458 + }, + { + "epoch": 0.7768806154207528, + "grad_norm": 0.0, + "learning_rate": 2.4993421158649946e-06, + "loss": 1.2891, + "step": 26459 + }, + { + "epoch": 0.7769099770978918, + "grad_norm": 0.0, + "learning_rate": 2.4987132195706077e-06, + "loss": 1.1855, + "step": 26460 + }, + { + "epoch": 0.7769393387750309, + "grad_norm": 0.0, + "learning_rate": 2.4980843911122777e-06, + "loss": 1.2041, + "step": 26461 + }, + { + "epoch": 0.7769687004521698, + "grad_norm": 0.0, + "learning_rate": 2.4974556304956888e-06, + "loss": 1.2666, + "step": 26462 + }, + { + "epoch": 0.7769980621293088, + "grad_norm": 0.0, + "learning_rate": 2.496826937726533e-06, + "loss": 1.293, + "step": 26463 + }, + { + "epoch": 0.7770274238064478, + "grad_norm": 0.0, + "learning_rate": 2.4961983128104896e-06, + "loss": 1.4277, + "step": 26464 + }, + { + "epoch": 0.7770567854835868, + "grad_norm": 0.0, + "learning_rate": 2.49556975575325e-06, + "loss": 1.2446, + "step": 26465 + }, + { + "epoch": 0.7770861471607258, + "grad_norm": 0.0, + "learning_rate": 2.4949412665604923e-06, + "loss": 1.2749, + "step": 26466 + }, + { + "epoch": 0.7771155088378648, + "grad_norm": 0.0, + "learning_rate": 2.4943128452379086e-06, + "loss": 1.2373, + "step": 26467 + }, + { + "epoch": 0.7771448705150038, + "grad_norm": 0.0, + "learning_rate": 2.4936844917911695e-06, + "loss": 1.2471, + "step": 26468 + }, + { + "epoch": 0.7771742321921428, + "grad_norm": 0.0, + "learning_rate": 2.493056206225967e-06, + "loss": 1.0962, + "step": 26469 + }, + { + "epoch": 0.7772035938692818, + "grad_norm": 0.0, + "learning_rate": 2.4924279885479807e-06, + "loss": 1.2085, + "step": 26470 + }, + { + "epoch": 0.7772329555464208, + "grad_norm": 0.0, + "learning_rate": 2.491799838762887e-06, + "loss": 1.127, + "step": 26471 + }, + { + "epoch": 0.7772623172235598, + "grad_norm": 0.0, + "learning_rate": 2.4911717568763737e-06, + "loss": 1.229, + "step": 26472 + }, + { + "epoch": 0.7772916789006988, + "grad_norm": 0.0, + "learning_rate": 2.4905437428941138e-06, + "loss": 1.2026, + "step": 26473 + }, + { + "epoch": 0.7773210405778378, + "grad_norm": 0.0, + "learning_rate": 2.4899157968217937e-06, + "loss": 1.3379, + "step": 26474 + }, + { + "epoch": 0.7773504022549768, + "grad_norm": 0.0, + "learning_rate": 2.4892879186650878e-06, + "loss": 1.1963, + "step": 26475 + }, + { + "epoch": 0.7773797639321158, + "grad_norm": 0.0, + "learning_rate": 2.4886601084296745e-06, + "loss": 1.23, + "step": 26476 + }, + { + "epoch": 0.7774091256092548, + "grad_norm": 0.0, + "learning_rate": 2.488032366121229e-06, + "loss": 1.2134, + "step": 26477 + }, + { + "epoch": 0.7774384872863938, + "grad_norm": 0.0, + "learning_rate": 2.487404691745433e-06, + "loss": 1.1133, + "step": 26478 + }, + { + "epoch": 0.7774678489635328, + "grad_norm": 0.0, + "learning_rate": 2.486777085307962e-06, + "loss": 1.2363, + "step": 26479 + }, + { + "epoch": 0.7774972106406718, + "grad_norm": 0.0, + "learning_rate": 2.486149546814486e-06, + "loss": 1.2383, + "step": 26480 + }, + { + "epoch": 0.7775265723178107, + "grad_norm": 0.0, + "learning_rate": 2.4855220762706865e-06, + "loss": 1.0923, + "step": 26481 + }, + { + "epoch": 0.7775559339949498, + "grad_norm": 0.0, + "learning_rate": 2.484894673682233e-06, + "loss": 1.2715, + "step": 26482 + }, + { + "epoch": 0.7775852956720888, + "grad_norm": 0.0, + "learning_rate": 2.4842673390548046e-06, + "loss": 1.2583, + "step": 26483 + }, + { + "epoch": 0.7776146573492277, + "grad_norm": 0.0, + "learning_rate": 2.4836400723940714e-06, + "loss": 1.2412, + "step": 26484 + }, + { + "epoch": 0.7776440190263668, + "grad_norm": 0.0, + "learning_rate": 2.4830128737057057e-06, + "loss": 1.2607, + "step": 26485 + }, + { + "epoch": 0.7776733807035058, + "grad_norm": 0.0, + "learning_rate": 2.4823857429953767e-06, + "loss": 1.1294, + "step": 26486 + }, + { + "epoch": 0.7777027423806447, + "grad_norm": 0.0, + "learning_rate": 2.481758680268762e-06, + "loss": 1.1846, + "step": 26487 + }, + { + "epoch": 0.7777321040577838, + "grad_norm": 0.0, + "learning_rate": 2.4811316855315258e-06, + "loss": 1.2383, + "step": 26488 + }, + { + "epoch": 0.7777614657349228, + "grad_norm": 0.0, + "learning_rate": 2.480504758789344e-06, + "loss": 1.1943, + "step": 26489 + }, + { + "epoch": 0.7777908274120617, + "grad_norm": 0.0, + "learning_rate": 2.4798779000478845e-06, + "loss": 1.1934, + "step": 26490 + }, + { + "epoch": 0.7778201890892008, + "grad_norm": 0.0, + "learning_rate": 2.4792511093128115e-06, + "loss": 1.2075, + "step": 26491 + }, + { + "epoch": 0.7778495507663398, + "grad_norm": 0.0, + "learning_rate": 2.4786243865898018e-06, + "loss": 1.1855, + "step": 26492 + }, + { + "epoch": 0.7778789124434787, + "grad_norm": 0.0, + "learning_rate": 2.477997731884514e-06, + "loss": 1.2383, + "step": 26493 + }, + { + "epoch": 0.7779082741206178, + "grad_norm": 0.0, + "learning_rate": 2.4773711452026205e-06, + "loss": 1.21, + "step": 26494 + }, + { + "epoch": 0.7779376357977568, + "grad_norm": 0.0, + "learning_rate": 2.476744626549784e-06, + "loss": 1.3516, + "step": 26495 + }, + { + "epoch": 0.7779669974748957, + "grad_norm": 0.0, + "learning_rate": 2.4761181759316755e-06, + "loss": 1.1738, + "step": 26496 + }, + { + "epoch": 0.7779963591520348, + "grad_norm": 0.0, + "learning_rate": 2.4754917933539548e-06, + "loss": 1.2954, + "step": 26497 + }, + { + "epoch": 0.7780257208291738, + "grad_norm": 0.0, + "learning_rate": 2.4748654788222913e-06, + "loss": 1.1558, + "step": 26498 + }, + { + "epoch": 0.7780550825063127, + "grad_norm": 0.0, + "learning_rate": 2.4742392323423457e-06, + "loss": 1.1851, + "step": 26499 + }, + { + "epoch": 0.7780844441834518, + "grad_norm": 0.0, + "learning_rate": 2.4736130539197835e-06, + "loss": 1.1958, + "step": 26500 + }, + { + "epoch": 0.7781138058605908, + "grad_norm": 0.0, + "learning_rate": 2.472986943560266e-06, + "loss": 1.2388, + "step": 26501 + }, + { + "epoch": 0.7781431675377297, + "grad_norm": 0.0, + "learning_rate": 2.4723609012694515e-06, + "loss": 1.1797, + "step": 26502 + }, + { + "epoch": 0.7781725292148688, + "grad_norm": 0.0, + "learning_rate": 2.471734927053008e-06, + "loss": 1.0713, + "step": 26503 + }, + { + "epoch": 0.7782018908920078, + "grad_norm": 0.0, + "learning_rate": 2.471109020916591e-06, + "loss": 1.2881, + "step": 26504 + }, + { + "epoch": 0.7782312525691467, + "grad_norm": 0.0, + "learning_rate": 2.470483182865866e-06, + "loss": 1.1494, + "step": 26505 + }, + { + "epoch": 0.7782606142462858, + "grad_norm": 0.0, + "learning_rate": 2.469857412906488e-06, + "loss": 1.1572, + "step": 26506 + }, + { + "epoch": 0.7782899759234247, + "grad_norm": 0.0, + "learning_rate": 2.4692317110441198e-06, + "loss": 1.1973, + "step": 26507 + }, + { + "epoch": 0.7783193376005637, + "grad_norm": 0.0, + "learning_rate": 2.468606077284418e-06, + "loss": 1.1846, + "step": 26508 + }, + { + "epoch": 0.7783486992777028, + "grad_norm": 0.0, + "learning_rate": 2.4679805116330403e-06, + "loss": 1.1899, + "step": 26509 + }, + { + "epoch": 0.7783780609548417, + "grad_norm": 0.0, + "learning_rate": 2.4673550140956435e-06, + "loss": 1.1479, + "step": 26510 + }, + { + "epoch": 0.7784074226319807, + "grad_norm": 0.0, + "learning_rate": 2.4667295846778828e-06, + "loss": 1.2402, + "step": 26511 + }, + { + "epoch": 0.7784367843091198, + "grad_norm": 0.0, + "learning_rate": 2.4661042233854184e-06, + "loss": 1.2607, + "step": 26512 + }, + { + "epoch": 0.7784661459862587, + "grad_norm": 0.0, + "learning_rate": 2.465478930223899e-06, + "loss": 1.1597, + "step": 26513 + }, + { + "epoch": 0.7784955076633977, + "grad_norm": 0.0, + "learning_rate": 2.4648537051989862e-06, + "loss": 1.2192, + "step": 26514 + }, + { + "epoch": 0.7785248693405368, + "grad_norm": 0.0, + "learning_rate": 2.4642285483163285e-06, + "loss": 1.2261, + "step": 26515 + }, + { + "epoch": 0.7785542310176757, + "grad_norm": 0.0, + "learning_rate": 2.463603459581586e-06, + "loss": 1.1035, + "step": 26516 + }, + { + "epoch": 0.7785835926948147, + "grad_norm": 0.0, + "learning_rate": 2.4629784390004064e-06, + "loss": 1.1562, + "step": 26517 + }, + { + "epoch": 0.7786129543719538, + "grad_norm": 0.0, + "learning_rate": 2.4623534865784426e-06, + "loss": 1.2246, + "step": 26518 + }, + { + "epoch": 0.7786423160490927, + "grad_norm": 0.0, + "learning_rate": 2.461728602321348e-06, + "loss": 1.2471, + "step": 26519 + }, + { + "epoch": 0.7786716777262317, + "grad_norm": 0.0, + "learning_rate": 2.4611037862347687e-06, + "loss": 1.0977, + "step": 26520 + }, + { + "epoch": 0.7787010394033708, + "grad_norm": 0.0, + "learning_rate": 2.4604790383243615e-06, + "loss": 1.1362, + "step": 26521 + }, + { + "epoch": 0.7787304010805097, + "grad_norm": 0.0, + "learning_rate": 2.45985435859577e-06, + "loss": 1.2031, + "step": 26522 + }, + { + "epoch": 0.7787597627576487, + "grad_norm": 0.0, + "learning_rate": 2.45922974705465e-06, + "loss": 1.1445, + "step": 26523 + }, + { + "epoch": 0.7787891244347878, + "grad_norm": 0.0, + "learning_rate": 2.4586052037066465e-06, + "loss": 1.2334, + "step": 26524 + }, + { + "epoch": 0.7788184861119267, + "grad_norm": 0.0, + "learning_rate": 2.4579807285574074e-06, + "loss": 1.2334, + "step": 26525 + }, + { + "epoch": 0.7788478477890657, + "grad_norm": 0.0, + "learning_rate": 2.4573563216125774e-06, + "loss": 1.1089, + "step": 26526 + }, + { + "epoch": 0.7788772094662048, + "grad_norm": 0.0, + "learning_rate": 2.456731982877808e-06, + "loss": 1.1699, + "step": 26527 + }, + { + "epoch": 0.7789065711433437, + "grad_norm": 0.0, + "learning_rate": 2.456107712358744e-06, + "loss": 1.1548, + "step": 26528 + }, + { + "epoch": 0.7789359328204827, + "grad_norm": 0.0, + "learning_rate": 2.4554835100610263e-06, + "loss": 1.1362, + "step": 26529 + }, + { + "epoch": 0.7789652944976218, + "grad_norm": 0.0, + "learning_rate": 2.4548593759903074e-06, + "loss": 1.1235, + "step": 26530 + }, + { + "epoch": 0.7789946561747607, + "grad_norm": 0.0, + "learning_rate": 2.454235310152223e-06, + "loss": 1.2793, + "step": 26531 + }, + { + "epoch": 0.7790240178518997, + "grad_norm": 0.0, + "learning_rate": 2.4536113125524265e-06, + "loss": 1.2427, + "step": 26532 + }, + { + "epoch": 0.7790533795290387, + "grad_norm": 0.0, + "learning_rate": 2.4529873831965533e-06, + "loss": 1.0107, + "step": 26533 + }, + { + "epoch": 0.7790827412061777, + "grad_norm": 0.0, + "learning_rate": 2.4523635220902484e-06, + "loss": 1.1606, + "step": 26534 + }, + { + "epoch": 0.7791121028833167, + "grad_norm": 0.0, + "learning_rate": 2.451739729239151e-06, + "loss": 1.168, + "step": 26535 + }, + { + "epoch": 0.7791414645604557, + "grad_norm": 0.0, + "learning_rate": 2.451116004648907e-06, + "loss": 1.2236, + "step": 26536 + }, + { + "epoch": 0.7791708262375947, + "grad_norm": 0.0, + "learning_rate": 2.450492348325152e-06, + "loss": 1.2939, + "step": 26537 + }, + { + "epoch": 0.7792001879147337, + "grad_norm": 0.0, + "learning_rate": 2.4498687602735305e-06, + "loss": 1.3193, + "step": 26538 + }, + { + "epoch": 0.7792295495918727, + "grad_norm": 0.0, + "learning_rate": 2.449245240499679e-06, + "loss": 1.207, + "step": 26539 + }, + { + "epoch": 0.7792589112690117, + "grad_norm": 0.0, + "learning_rate": 2.4486217890092334e-06, + "loss": 1.1899, + "step": 26540 + }, + { + "epoch": 0.7792882729461507, + "grad_norm": 0.0, + "learning_rate": 2.4479984058078388e-06, + "loss": 1.3008, + "step": 26541 + }, + { + "epoch": 0.7793176346232897, + "grad_norm": 0.0, + "learning_rate": 2.4473750909011284e-06, + "loss": 1.2422, + "step": 26542 + }, + { + "epoch": 0.7793469963004287, + "grad_norm": 0.0, + "learning_rate": 2.446751844294739e-06, + "loss": 1.3398, + "step": 26543 + }, + { + "epoch": 0.7793763579775677, + "grad_norm": 0.0, + "learning_rate": 2.446128665994305e-06, + "loss": 1.1025, + "step": 26544 + }, + { + "epoch": 0.7794057196547067, + "grad_norm": 0.0, + "learning_rate": 2.4455055560054663e-06, + "loss": 1.2715, + "step": 26545 + }, + { + "epoch": 0.7794350813318457, + "grad_norm": 0.0, + "learning_rate": 2.444882514333853e-06, + "loss": 1.2871, + "step": 26546 + }, + { + "epoch": 0.7794644430089847, + "grad_norm": 0.0, + "learning_rate": 2.4442595409851046e-06, + "loss": 1.2568, + "step": 26547 + }, + { + "epoch": 0.7794938046861237, + "grad_norm": 0.0, + "learning_rate": 2.443636635964852e-06, + "loss": 1.2959, + "step": 26548 + }, + { + "epoch": 0.7795231663632627, + "grad_norm": 0.0, + "learning_rate": 2.4430137992787284e-06, + "loss": 1.2383, + "step": 26549 + }, + { + "epoch": 0.7795525280404016, + "grad_norm": 0.0, + "learning_rate": 2.4423910309323673e-06, + "loss": 1.1348, + "step": 26550 + }, + { + "epoch": 0.7795818897175407, + "grad_norm": 0.0, + "learning_rate": 2.4417683309313955e-06, + "loss": 1.2979, + "step": 26551 + }, + { + "epoch": 0.7796112513946797, + "grad_norm": 0.0, + "learning_rate": 2.441145699281453e-06, + "loss": 1.21, + "step": 26552 + }, + { + "epoch": 0.7796406130718186, + "grad_norm": 0.0, + "learning_rate": 2.440523135988161e-06, + "loss": 1.2271, + "step": 26553 + }, + { + "epoch": 0.7796699747489576, + "grad_norm": 0.0, + "learning_rate": 2.439900641057159e-06, + "loss": 1.1982, + "step": 26554 + }, + { + "epoch": 0.7796993364260967, + "grad_norm": 0.0, + "learning_rate": 2.4392782144940676e-06, + "loss": 1.2007, + "step": 26555 + }, + { + "epoch": 0.7797286981032356, + "grad_norm": 0.0, + "learning_rate": 2.4386558563045227e-06, + "loss": 1.2109, + "step": 26556 + }, + { + "epoch": 0.7797580597803746, + "grad_norm": 0.0, + "learning_rate": 2.438033566494149e-06, + "loss": 1.2334, + "step": 26557 + }, + { + "epoch": 0.7797874214575137, + "grad_norm": 0.0, + "learning_rate": 2.4374113450685755e-06, + "loss": 1.2085, + "step": 26558 + }, + { + "epoch": 0.7798167831346526, + "grad_norm": 0.0, + "learning_rate": 2.4367891920334276e-06, + "loss": 1.1079, + "step": 26559 + }, + { + "epoch": 0.7798461448117916, + "grad_norm": 0.0, + "learning_rate": 2.4361671073943284e-06, + "loss": 1.2114, + "step": 26560 + }, + { + "epoch": 0.7798755064889307, + "grad_norm": 0.0, + "learning_rate": 2.435545091156911e-06, + "loss": 1.1196, + "step": 26561 + }, + { + "epoch": 0.7799048681660696, + "grad_norm": 0.0, + "learning_rate": 2.434923143326793e-06, + "loss": 1.1865, + "step": 26562 + }, + { + "epoch": 0.7799342298432086, + "grad_norm": 0.0, + "learning_rate": 2.434301263909605e-06, + "loss": 1.3164, + "step": 26563 + }, + { + "epoch": 0.7799635915203477, + "grad_norm": 0.0, + "learning_rate": 2.4336794529109662e-06, + "loss": 1.2109, + "step": 26564 + }, + { + "epoch": 0.7799929531974866, + "grad_norm": 0.0, + "learning_rate": 2.4330577103365048e-06, + "loss": 1.4092, + "step": 26565 + }, + { + "epoch": 0.7800223148746256, + "grad_norm": 0.0, + "learning_rate": 2.4324360361918407e-06, + "loss": 1.1294, + "step": 26566 + }, + { + "epoch": 0.7800516765517647, + "grad_norm": 0.0, + "learning_rate": 2.431814430482595e-06, + "loss": 1.1631, + "step": 26567 + }, + { + "epoch": 0.7800810382289036, + "grad_norm": 0.0, + "learning_rate": 2.43119289321439e-06, + "loss": 1.1895, + "step": 26568 + }, + { + "epoch": 0.7801103999060426, + "grad_norm": 0.0, + "learning_rate": 2.4305714243928426e-06, + "loss": 1.23, + "step": 26569 + }, + { + "epoch": 0.7801397615831817, + "grad_norm": 0.0, + "learning_rate": 2.4299500240235797e-06, + "loss": 1.1738, + "step": 26570 + }, + { + "epoch": 0.7801691232603206, + "grad_norm": 0.0, + "learning_rate": 2.429328692112215e-06, + "loss": 1.3242, + "step": 26571 + }, + { + "epoch": 0.7801984849374596, + "grad_norm": 0.0, + "learning_rate": 2.428707428664372e-06, + "loss": 1.167, + "step": 26572 + }, + { + "epoch": 0.7802278466145987, + "grad_norm": 0.0, + "learning_rate": 2.428086233685667e-06, + "loss": 1.1553, + "step": 26573 + }, + { + "epoch": 0.7802572082917376, + "grad_norm": 0.0, + "learning_rate": 2.4274651071817167e-06, + "loss": 1.1914, + "step": 26574 + }, + { + "epoch": 0.7802865699688766, + "grad_norm": 0.0, + "learning_rate": 2.426844049158137e-06, + "loss": 1.3008, + "step": 26575 + }, + { + "epoch": 0.7803159316460156, + "grad_norm": 0.0, + "learning_rate": 2.4262230596205483e-06, + "loss": 1.2808, + "step": 26576 + }, + { + "epoch": 0.7803452933231546, + "grad_norm": 0.0, + "learning_rate": 2.425602138574564e-06, + "loss": 1.3154, + "step": 26577 + }, + { + "epoch": 0.7803746550002936, + "grad_norm": 0.0, + "learning_rate": 2.4249812860257982e-06, + "loss": 1.2998, + "step": 26578 + }, + { + "epoch": 0.7804040166774326, + "grad_norm": 0.0, + "learning_rate": 2.424360501979868e-06, + "loss": 1.2803, + "step": 26579 + }, + { + "epoch": 0.7804333783545716, + "grad_norm": 0.0, + "learning_rate": 2.4237397864423827e-06, + "loss": 1.248, + "step": 26580 + }, + { + "epoch": 0.7804627400317106, + "grad_norm": 0.0, + "learning_rate": 2.423119139418962e-06, + "loss": 1.3164, + "step": 26581 + }, + { + "epoch": 0.7804921017088496, + "grad_norm": 0.0, + "learning_rate": 2.422498560915216e-06, + "loss": 1.2705, + "step": 26582 + }, + { + "epoch": 0.7805214633859886, + "grad_norm": 0.0, + "learning_rate": 2.4218780509367556e-06, + "loss": 1.1953, + "step": 26583 + }, + { + "epoch": 0.7805508250631276, + "grad_norm": 0.0, + "learning_rate": 2.4212576094891906e-06, + "loss": 1.186, + "step": 26584 + }, + { + "epoch": 0.7805801867402666, + "grad_norm": 0.0, + "learning_rate": 2.420637236578136e-06, + "loss": 1.2402, + "step": 26585 + }, + { + "epoch": 0.7806095484174056, + "grad_norm": 0.0, + "learning_rate": 2.420016932209197e-06, + "loss": 1.2422, + "step": 26586 + }, + { + "epoch": 0.7806389100945446, + "grad_norm": 0.0, + "learning_rate": 2.41939669638799e-06, + "loss": 1.248, + "step": 26587 + }, + { + "epoch": 0.7806682717716836, + "grad_norm": 0.0, + "learning_rate": 2.4187765291201193e-06, + "loss": 1.1802, + "step": 26588 + }, + { + "epoch": 0.7806976334488226, + "grad_norm": 0.0, + "learning_rate": 2.4181564304111914e-06, + "loss": 1.1128, + "step": 26589 + }, + { + "epoch": 0.7807269951259616, + "grad_norm": 0.0, + "learning_rate": 2.41753640026682e-06, + "loss": 1.1187, + "step": 26590 + }, + { + "epoch": 0.7807563568031006, + "grad_norm": 0.0, + "learning_rate": 2.416916438692608e-06, + "loss": 1.2461, + "step": 26591 + }, + { + "epoch": 0.7807857184802396, + "grad_norm": 0.0, + "learning_rate": 2.4162965456941635e-06, + "loss": 1.165, + "step": 26592 + }, + { + "epoch": 0.7808150801573785, + "grad_norm": 0.0, + "learning_rate": 2.4156767212770884e-06, + "loss": 1.2383, + "step": 26593 + }, + { + "epoch": 0.7808444418345176, + "grad_norm": 0.0, + "learning_rate": 2.4150569654469937e-06, + "loss": 1.2295, + "step": 26594 + }, + { + "epoch": 0.7808738035116566, + "grad_norm": 0.0, + "learning_rate": 2.414437278209478e-06, + "loss": 1.2949, + "step": 26595 + }, + { + "epoch": 0.7809031651887955, + "grad_norm": 0.0, + "learning_rate": 2.413817659570152e-06, + "loss": 1.2334, + "step": 26596 + }, + { + "epoch": 0.7809325268659346, + "grad_norm": 0.0, + "learning_rate": 2.4131981095346157e-06, + "loss": 1.2461, + "step": 26597 + }, + { + "epoch": 0.7809618885430736, + "grad_norm": 0.0, + "learning_rate": 2.4125786281084727e-06, + "loss": 1.2998, + "step": 26598 + }, + { + "epoch": 0.7809912502202125, + "grad_norm": 0.0, + "learning_rate": 2.4119592152973237e-06, + "loss": 1.2588, + "step": 26599 + }, + { + "epoch": 0.7810206118973516, + "grad_norm": 0.0, + "learning_rate": 2.411339871106768e-06, + "loss": 1.1621, + "step": 26600 + }, + { + "epoch": 0.7810499735744906, + "grad_norm": 0.0, + "learning_rate": 2.410720595542412e-06, + "loss": 1.1738, + "step": 26601 + }, + { + "epoch": 0.7810793352516295, + "grad_norm": 0.0, + "learning_rate": 2.4101013886098514e-06, + "loss": 1.248, + "step": 26602 + }, + { + "epoch": 0.7811086969287686, + "grad_norm": 0.0, + "learning_rate": 2.40948225031469e-06, + "loss": 1.3193, + "step": 26603 + }, + { + "epoch": 0.7811380586059076, + "grad_norm": 0.0, + "learning_rate": 2.4088631806625206e-06, + "loss": 1.2266, + "step": 26604 + }, + { + "epoch": 0.7811674202830465, + "grad_norm": 0.0, + "learning_rate": 2.4082441796589496e-06, + "loss": 1.3711, + "step": 26605 + }, + { + "epoch": 0.7811967819601856, + "grad_norm": 0.0, + "learning_rate": 2.407625247309571e-06, + "loss": 1.2476, + "step": 26606 + }, + { + "epoch": 0.7812261436373246, + "grad_norm": 0.0, + "learning_rate": 2.4070063836199806e-06, + "loss": 1.1699, + "step": 26607 + }, + { + "epoch": 0.7812555053144635, + "grad_norm": 0.0, + "learning_rate": 2.4063875885957765e-06, + "loss": 1.3145, + "step": 26608 + }, + { + "epoch": 0.7812848669916026, + "grad_norm": 0.0, + "learning_rate": 2.4057688622425524e-06, + "loss": 1.2051, + "step": 26609 + }, + { + "epoch": 0.7813142286687416, + "grad_norm": 0.0, + "learning_rate": 2.405150204565907e-06, + "loss": 1.2725, + "step": 26610 + }, + { + "epoch": 0.7813435903458805, + "grad_norm": 0.0, + "learning_rate": 2.4045316155714318e-06, + "loss": 1.2412, + "step": 26611 + }, + { + "epoch": 0.7813729520230196, + "grad_norm": 0.0, + "learning_rate": 2.4039130952647238e-06, + "loss": 1.144, + "step": 26612 + }, + { + "epoch": 0.7814023137001586, + "grad_norm": 0.0, + "learning_rate": 2.4032946436513727e-06, + "loss": 1.2402, + "step": 26613 + }, + { + "epoch": 0.7814316753772975, + "grad_norm": 0.0, + "learning_rate": 2.402676260736977e-06, + "loss": 1.1719, + "step": 26614 + }, + { + "epoch": 0.7814610370544366, + "grad_norm": 0.0, + "learning_rate": 2.4020579465271255e-06, + "loss": 1.0864, + "step": 26615 + }, + { + "epoch": 0.7814903987315756, + "grad_norm": 0.0, + "learning_rate": 2.4014397010274095e-06, + "loss": 1.085, + "step": 26616 + }, + { + "epoch": 0.7815197604087145, + "grad_norm": 0.0, + "learning_rate": 2.4008215242434196e-06, + "loss": 1.2505, + "step": 26617 + }, + { + "epoch": 0.7815491220858536, + "grad_norm": 0.0, + "learning_rate": 2.4002034161807454e-06, + "loss": 1.1865, + "step": 26618 + }, + { + "epoch": 0.7815784837629925, + "grad_norm": 0.0, + "learning_rate": 2.3995853768449808e-06, + "loss": 1.1953, + "step": 26619 + }, + { + "epoch": 0.7816078454401315, + "grad_norm": 0.0, + "learning_rate": 2.398967406241708e-06, + "loss": 1.2388, + "step": 26620 + }, + { + "epoch": 0.7816372071172706, + "grad_norm": 0.0, + "learning_rate": 2.3983495043765235e-06, + "loss": 1.2285, + "step": 26621 + }, + { + "epoch": 0.7816665687944095, + "grad_norm": 0.0, + "learning_rate": 2.3977316712550113e-06, + "loss": 1.1655, + "step": 26622 + }, + { + "epoch": 0.7816959304715485, + "grad_norm": 0.0, + "learning_rate": 2.3971139068827576e-06, + "loss": 1.3062, + "step": 26623 + }, + { + "epoch": 0.7817252921486876, + "grad_norm": 0.0, + "learning_rate": 2.3964962112653476e-06, + "loss": 1.125, + "step": 26624 + }, + { + "epoch": 0.7817546538258265, + "grad_norm": 0.0, + "learning_rate": 2.3958785844083733e-06, + "loss": 1.1792, + "step": 26625 + }, + { + "epoch": 0.7817840155029655, + "grad_norm": 0.0, + "learning_rate": 2.395261026317416e-06, + "loss": 1.1133, + "step": 26626 + }, + { + "epoch": 0.7818133771801046, + "grad_norm": 0.0, + "learning_rate": 2.394643536998057e-06, + "loss": 1.2383, + "step": 26627 + }, + { + "epoch": 0.7818427388572435, + "grad_norm": 0.0, + "learning_rate": 2.3940261164558885e-06, + "loss": 1.2725, + "step": 26628 + }, + { + "epoch": 0.7818721005343825, + "grad_norm": 0.0, + "learning_rate": 2.3934087646964854e-06, + "loss": 1.127, + "step": 26629 + }, + { + "epoch": 0.7819014622115216, + "grad_norm": 0.0, + "learning_rate": 2.3927914817254394e-06, + "loss": 1.2471, + "step": 26630 + }, + { + "epoch": 0.7819308238886605, + "grad_norm": 0.0, + "learning_rate": 2.3921742675483273e-06, + "loss": 1.3584, + "step": 26631 + }, + { + "epoch": 0.7819601855657995, + "grad_norm": 0.0, + "learning_rate": 2.3915571221707324e-06, + "loss": 1.0762, + "step": 26632 + }, + { + "epoch": 0.7819895472429386, + "grad_norm": 0.0, + "learning_rate": 2.390940045598231e-06, + "loss": 1.2842, + "step": 26633 + }, + { + "epoch": 0.7820189089200775, + "grad_norm": 0.0, + "learning_rate": 2.3903230378364106e-06, + "loss": 1.1826, + "step": 26634 + }, + { + "epoch": 0.7820482705972165, + "grad_norm": 0.0, + "learning_rate": 2.3897060988908462e-06, + "loss": 1.1089, + "step": 26635 + }, + { + "epoch": 0.7820776322743556, + "grad_norm": 0.0, + "learning_rate": 2.3890892287671196e-06, + "loss": 1.2588, + "step": 26636 + }, + { + "epoch": 0.7821069939514945, + "grad_norm": 0.0, + "learning_rate": 2.3884724274708103e-06, + "loss": 1.2007, + "step": 26637 + }, + { + "epoch": 0.7821363556286335, + "grad_norm": 0.0, + "learning_rate": 2.3878556950074894e-06, + "loss": 1.1729, + "step": 26638 + }, + { + "epoch": 0.7821657173057726, + "grad_norm": 0.0, + "learning_rate": 2.3872390313827433e-06, + "loss": 1.2676, + "step": 26639 + }, + { + "epoch": 0.7821950789829115, + "grad_norm": 0.0, + "learning_rate": 2.386622436602144e-06, + "loss": 1.1411, + "step": 26640 + }, + { + "epoch": 0.7822244406600505, + "grad_norm": 0.0, + "learning_rate": 2.3860059106712674e-06, + "loss": 1.2119, + "step": 26641 + }, + { + "epoch": 0.7822538023371896, + "grad_norm": 0.0, + "learning_rate": 2.3853894535956867e-06, + "loss": 1.2588, + "step": 26642 + }, + { + "epoch": 0.7822831640143285, + "grad_norm": 0.0, + "learning_rate": 2.384773065380982e-06, + "loss": 1.2676, + "step": 26643 + }, + { + "epoch": 0.7823125256914675, + "grad_norm": 0.0, + "learning_rate": 2.384156746032722e-06, + "loss": 1.2441, + "step": 26644 + }, + { + "epoch": 0.7823418873686065, + "grad_norm": 0.0, + "learning_rate": 2.3835404955564856e-06, + "loss": 1.2471, + "step": 26645 + }, + { + "epoch": 0.7823712490457455, + "grad_norm": 0.0, + "learning_rate": 2.3829243139578427e-06, + "loss": 1.2441, + "step": 26646 + }, + { + "epoch": 0.7824006107228845, + "grad_norm": 0.0, + "learning_rate": 2.382308201242367e-06, + "loss": 1.2129, + "step": 26647 + }, + { + "epoch": 0.7824299724000235, + "grad_norm": 0.0, + "learning_rate": 2.3816921574156283e-06, + "loss": 1.1738, + "step": 26648 + }, + { + "epoch": 0.7824593340771625, + "grad_norm": 0.0, + "learning_rate": 2.3810761824831965e-06, + "loss": 1.2051, + "step": 26649 + }, + { + "epoch": 0.7824886957543015, + "grad_norm": 0.0, + "learning_rate": 2.3804602764506467e-06, + "loss": 1.2139, + "step": 26650 + }, + { + "epoch": 0.7825180574314405, + "grad_norm": 0.0, + "learning_rate": 2.379844439323542e-06, + "loss": 1.1587, + "step": 26651 + }, + { + "epoch": 0.7825474191085795, + "grad_norm": 0.0, + "learning_rate": 2.3792286711074596e-06, + "loss": 1.1689, + "step": 26652 + }, + { + "epoch": 0.7825767807857185, + "grad_norm": 0.0, + "learning_rate": 2.3786129718079607e-06, + "loss": 1.1123, + "step": 26653 + }, + { + "epoch": 0.7826061424628574, + "grad_norm": 0.0, + "learning_rate": 2.37799734143062e-06, + "loss": 1.3496, + "step": 26654 + }, + { + "epoch": 0.7826355041399965, + "grad_norm": 0.0, + "learning_rate": 2.3773817799810018e-06, + "loss": 1.1523, + "step": 26655 + }, + { + "epoch": 0.7826648658171355, + "grad_norm": 0.0, + "learning_rate": 2.3767662874646713e-06, + "loss": 1.1826, + "step": 26656 + }, + { + "epoch": 0.7826942274942744, + "grad_norm": 0.0, + "learning_rate": 2.3761508638871954e-06, + "loss": 1.2363, + "step": 26657 + }, + { + "epoch": 0.7827235891714135, + "grad_norm": 0.0, + "learning_rate": 2.3755355092541377e-06, + "loss": 1.249, + "step": 26658 + }, + { + "epoch": 0.7827529508485525, + "grad_norm": 0.0, + "learning_rate": 2.3749202235710676e-06, + "loss": 1.2959, + "step": 26659 + }, + { + "epoch": 0.7827823125256914, + "grad_norm": 0.0, + "learning_rate": 2.3743050068435436e-06, + "loss": 1.2163, + "step": 26660 + }, + { + "epoch": 0.7828116742028305, + "grad_norm": 0.0, + "learning_rate": 2.373689859077135e-06, + "loss": 1.1987, + "step": 26661 + }, + { + "epoch": 0.7828410358799694, + "grad_norm": 0.0, + "learning_rate": 2.3730747802774e-06, + "loss": 1.2168, + "step": 26662 + }, + { + "epoch": 0.7828703975571084, + "grad_norm": 0.0, + "learning_rate": 2.3724597704499055e-06, + "loss": 1.0674, + "step": 26663 + }, + { + "epoch": 0.7828997592342475, + "grad_norm": 0.0, + "learning_rate": 2.3718448296002116e-06, + "loss": 1.1904, + "step": 26664 + }, + { + "epoch": 0.7829291209113864, + "grad_norm": 0.0, + "learning_rate": 2.371229957733877e-06, + "loss": 1.1929, + "step": 26665 + }, + { + "epoch": 0.7829584825885254, + "grad_norm": 0.0, + "learning_rate": 2.370615154856465e-06, + "loss": 1.1396, + "step": 26666 + }, + { + "epoch": 0.7829878442656645, + "grad_norm": 0.0, + "learning_rate": 2.3700004209735305e-06, + "loss": 1.3027, + "step": 26667 + }, + { + "epoch": 0.7830172059428034, + "grad_norm": 0.0, + "learning_rate": 2.36938575609064e-06, + "loss": 1.1675, + "step": 26668 + }, + { + "epoch": 0.7830465676199424, + "grad_norm": 0.0, + "learning_rate": 2.3687711602133444e-06, + "loss": 1.1055, + "step": 26669 + }, + { + "epoch": 0.7830759292970815, + "grad_norm": 0.0, + "learning_rate": 2.3681566333472083e-06, + "loss": 1.062, + "step": 26670 + }, + { + "epoch": 0.7831052909742204, + "grad_norm": 0.0, + "learning_rate": 2.3675421754977847e-06, + "loss": 1.1465, + "step": 26671 + }, + { + "epoch": 0.7831346526513594, + "grad_norm": 0.0, + "learning_rate": 2.3669277866706366e-06, + "loss": 1.2402, + "step": 26672 + }, + { + "epoch": 0.7831640143284985, + "grad_norm": 0.0, + "learning_rate": 2.3663134668713094e-06, + "loss": 1.2017, + "step": 26673 + }, + { + "epoch": 0.7831933760056374, + "grad_norm": 0.0, + "learning_rate": 2.365699216105368e-06, + "loss": 1.2275, + "step": 26674 + }, + { + "epoch": 0.7832227376827764, + "grad_norm": 0.0, + "learning_rate": 2.3650850343783605e-06, + "loss": 1.1982, + "step": 26675 + }, + { + "epoch": 0.7832520993599155, + "grad_norm": 0.0, + "learning_rate": 2.3644709216958484e-06, + "loss": 1.0298, + "step": 26676 + }, + { + "epoch": 0.7832814610370544, + "grad_norm": 0.0, + "learning_rate": 2.3638568780633797e-06, + "loss": 1.1885, + "step": 26677 + }, + { + "epoch": 0.7833108227141934, + "grad_norm": 0.0, + "learning_rate": 2.363242903486507e-06, + "loss": 1.1689, + "step": 26678 + }, + { + "epoch": 0.7833401843913325, + "grad_norm": 0.0, + "learning_rate": 2.362628997970787e-06, + "loss": 1.3311, + "step": 26679 + }, + { + "epoch": 0.7833695460684714, + "grad_norm": 0.0, + "learning_rate": 2.362015161521769e-06, + "loss": 1.2476, + "step": 26680 + }, + { + "epoch": 0.7833989077456104, + "grad_norm": 0.0, + "learning_rate": 2.361401394145004e-06, + "loss": 1.2588, + "step": 26681 + }, + { + "epoch": 0.7834282694227495, + "grad_norm": 0.0, + "learning_rate": 2.3607876958460397e-06, + "loss": 1.2998, + "step": 26682 + }, + { + "epoch": 0.7834576310998884, + "grad_norm": 0.0, + "learning_rate": 2.360174066630432e-06, + "loss": 1.1719, + "step": 26683 + }, + { + "epoch": 0.7834869927770274, + "grad_norm": 0.0, + "learning_rate": 2.359560506503723e-06, + "loss": 1.2129, + "step": 26684 + }, + { + "epoch": 0.7835163544541665, + "grad_norm": 0.0, + "learning_rate": 2.358947015471469e-06, + "loss": 1.2104, + "step": 26685 + }, + { + "epoch": 0.7835457161313054, + "grad_norm": 0.0, + "learning_rate": 2.358333593539213e-06, + "loss": 1.2178, + "step": 26686 + }, + { + "epoch": 0.7835750778084444, + "grad_norm": 0.0, + "learning_rate": 2.357720240712501e-06, + "loss": 1.2529, + "step": 26687 + }, + { + "epoch": 0.7836044394855834, + "grad_norm": 0.0, + "learning_rate": 2.357106956996885e-06, + "loss": 1.2202, + "step": 26688 + }, + { + "epoch": 0.7836338011627224, + "grad_norm": 0.0, + "learning_rate": 2.3564937423979083e-06, + "loss": 1.1123, + "step": 26689 + }, + { + "epoch": 0.7836631628398614, + "grad_norm": 0.0, + "learning_rate": 2.3558805969211162e-06, + "loss": 1.2144, + "step": 26690 + }, + { + "epoch": 0.7836925245170004, + "grad_norm": 0.0, + "learning_rate": 2.3552675205720497e-06, + "loss": 1.272, + "step": 26691 + }, + { + "epoch": 0.7837218861941394, + "grad_norm": 0.0, + "learning_rate": 2.3546545133562614e-06, + "loss": 1.2085, + "step": 26692 + }, + { + "epoch": 0.7837512478712784, + "grad_norm": 0.0, + "learning_rate": 2.3540415752792866e-06, + "loss": 1.4375, + "step": 26693 + }, + { + "epoch": 0.7837806095484174, + "grad_norm": 0.0, + "learning_rate": 2.3534287063466744e-06, + "loss": 1.165, + "step": 26694 + }, + { + "epoch": 0.7838099712255564, + "grad_norm": 0.0, + "learning_rate": 2.3528159065639656e-06, + "loss": 1.1001, + "step": 26695 + }, + { + "epoch": 0.7838393329026954, + "grad_norm": 0.0, + "learning_rate": 2.352203175936697e-06, + "loss": 1.1704, + "step": 26696 + }, + { + "epoch": 0.7838686945798344, + "grad_norm": 0.0, + "learning_rate": 2.351590514470421e-06, + "loss": 1.2295, + "step": 26697 + }, + { + "epoch": 0.7838980562569734, + "grad_norm": 0.0, + "learning_rate": 2.350977922170664e-06, + "loss": 1.3965, + "step": 26698 + }, + { + "epoch": 0.7839274179341124, + "grad_norm": 0.0, + "learning_rate": 2.3503653990429754e-06, + "loss": 1.2715, + "step": 26699 + }, + { + "epoch": 0.7839567796112514, + "grad_norm": 0.0, + "learning_rate": 2.3497529450928893e-06, + "loss": 1.3018, + "step": 26700 + }, + { + "epoch": 0.7839861412883904, + "grad_norm": 0.0, + "learning_rate": 2.3491405603259487e-06, + "loss": 1.0908, + "step": 26701 + }, + { + "epoch": 0.7840155029655294, + "grad_norm": 0.0, + "learning_rate": 2.3485282447476874e-06, + "loss": 1.2627, + "step": 26702 + }, + { + "epoch": 0.7840448646426684, + "grad_norm": 0.0, + "learning_rate": 2.3479159983636467e-06, + "loss": 1.2783, + "step": 26703 + }, + { + "epoch": 0.7840742263198074, + "grad_norm": 0.0, + "learning_rate": 2.347303821179362e-06, + "loss": 1.2515, + "step": 26704 + }, + { + "epoch": 0.7841035879969463, + "grad_norm": 0.0, + "learning_rate": 2.3466917132003684e-06, + "loss": 1.1719, + "step": 26705 + }, + { + "epoch": 0.7841329496740854, + "grad_norm": 0.0, + "learning_rate": 2.3460796744322024e-06, + "loss": 1.1479, + "step": 26706 + }, + { + "epoch": 0.7841623113512244, + "grad_norm": 0.0, + "learning_rate": 2.345467704880394e-06, + "loss": 1.2261, + "step": 26707 + }, + { + "epoch": 0.7841916730283633, + "grad_norm": 0.0, + "learning_rate": 2.3448558045504854e-06, + "loss": 1.2217, + "step": 26708 + }, + { + "epoch": 0.7842210347055024, + "grad_norm": 0.0, + "learning_rate": 2.344243973448004e-06, + "loss": 1.2607, + "step": 26709 + }, + { + "epoch": 0.7842503963826414, + "grad_norm": 0.0, + "learning_rate": 2.343632211578486e-06, + "loss": 1.0303, + "step": 26710 + }, + { + "epoch": 0.7842797580597803, + "grad_norm": 0.0, + "learning_rate": 2.343020518947461e-06, + "loss": 1.2324, + "step": 26711 + }, + { + "epoch": 0.7843091197369194, + "grad_norm": 0.0, + "learning_rate": 2.342408895560465e-06, + "loss": 1.2393, + "step": 26712 + }, + { + "epoch": 0.7843384814140584, + "grad_norm": 0.0, + "learning_rate": 2.3417973414230266e-06, + "loss": 1.2549, + "step": 26713 + }, + { + "epoch": 0.7843678430911973, + "grad_norm": 0.0, + "learning_rate": 2.3411858565406764e-06, + "loss": 1.376, + "step": 26714 + }, + { + "epoch": 0.7843972047683364, + "grad_norm": 0.0, + "learning_rate": 2.3405744409189436e-06, + "loss": 1.2178, + "step": 26715 + }, + { + "epoch": 0.7844265664454754, + "grad_norm": 0.0, + "learning_rate": 2.3399630945633555e-06, + "loss": 1.1006, + "step": 26716 + }, + { + "epoch": 0.7844559281226143, + "grad_norm": 0.0, + "learning_rate": 2.339351817479445e-06, + "loss": 1.2007, + "step": 26717 + }, + { + "epoch": 0.7844852897997534, + "grad_norm": 0.0, + "learning_rate": 2.338740609672735e-06, + "loss": 1.2549, + "step": 26718 + }, + { + "epoch": 0.7845146514768924, + "grad_norm": 0.0, + "learning_rate": 2.338129471148759e-06, + "loss": 1.2031, + "step": 26719 + }, + { + "epoch": 0.7845440131540313, + "grad_norm": 0.0, + "learning_rate": 2.337518401913038e-06, + "loss": 1.1646, + "step": 26720 + }, + { + "epoch": 0.7845733748311704, + "grad_norm": 0.0, + "learning_rate": 2.3369074019711026e-06, + "loss": 1.1841, + "step": 26721 + }, + { + "epoch": 0.7846027365083094, + "grad_norm": 0.0, + "learning_rate": 2.3362964713284764e-06, + "loss": 1.2192, + "step": 26722 + }, + { + "epoch": 0.7846320981854483, + "grad_norm": 0.0, + "learning_rate": 2.3356856099906844e-06, + "loss": 1.2363, + "step": 26723 + }, + { + "epoch": 0.7846614598625874, + "grad_norm": 0.0, + "learning_rate": 2.3350748179632466e-06, + "loss": 1.2598, + "step": 26724 + }, + { + "epoch": 0.7846908215397264, + "grad_norm": 0.0, + "learning_rate": 2.334464095251693e-06, + "loss": 1.1226, + "step": 26725 + }, + { + "epoch": 0.7847201832168653, + "grad_norm": 0.0, + "learning_rate": 2.3338534418615445e-06, + "loss": 1.2822, + "step": 26726 + }, + { + "epoch": 0.7847495448940044, + "grad_norm": 0.0, + "learning_rate": 2.333242857798319e-06, + "loss": 1.1846, + "step": 26727 + }, + { + "epoch": 0.7847789065711434, + "grad_norm": 0.0, + "learning_rate": 2.332632343067546e-06, + "loss": 1.2627, + "step": 26728 + }, + { + "epoch": 0.7848082682482823, + "grad_norm": 0.0, + "learning_rate": 2.332021897674741e-06, + "loss": 1.2305, + "step": 26729 + }, + { + "epoch": 0.7848376299254214, + "grad_norm": 0.0, + "learning_rate": 2.3314115216254263e-06, + "loss": 1.1401, + "step": 26730 + }, + { + "epoch": 0.7848669916025603, + "grad_norm": 0.0, + "learning_rate": 2.330801214925118e-06, + "loss": 1.251, + "step": 26731 + }, + { + "epoch": 0.7848963532796993, + "grad_norm": 0.0, + "learning_rate": 2.3301909775793417e-06, + "loss": 1.2002, + "step": 26732 + }, + { + "epoch": 0.7849257149568384, + "grad_norm": 0.0, + "learning_rate": 2.3295808095936103e-06, + "loss": 1.2266, + "step": 26733 + }, + { + "epoch": 0.7849550766339773, + "grad_norm": 0.0, + "learning_rate": 2.328970710973446e-06, + "loss": 1.2881, + "step": 26734 + }, + { + "epoch": 0.7849844383111163, + "grad_norm": 0.0, + "learning_rate": 2.3283606817243644e-06, + "loss": 1.25, + "step": 26735 + }, + { + "epoch": 0.7850137999882554, + "grad_norm": 0.0, + "learning_rate": 2.327750721851879e-06, + "loss": 1.2295, + "step": 26736 + }, + { + "epoch": 0.7850431616653943, + "grad_norm": 0.0, + "learning_rate": 2.3271408313615117e-06, + "loss": 1.2666, + "step": 26737 + }, + { + "epoch": 0.7850725233425333, + "grad_norm": 0.0, + "learning_rate": 2.3265310102587746e-06, + "loss": 1.1367, + "step": 26738 + }, + { + "epoch": 0.7851018850196724, + "grad_norm": 0.0, + "learning_rate": 2.325921258549183e-06, + "loss": 1.3252, + "step": 26739 + }, + { + "epoch": 0.7851312466968113, + "grad_norm": 0.0, + "learning_rate": 2.3253115762382484e-06, + "loss": 1.1675, + "step": 26740 + }, + { + "epoch": 0.7851606083739503, + "grad_norm": 0.0, + "learning_rate": 2.3247019633314893e-06, + "loss": 1.2559, + "step": 26741 + }, + { + "epoch": 0.7851899700510894, + "grad_norm": 0.0, + "learning_rate": 2.3240924198344126e-06, + "loss": 1.1123, + "step": 26742 + }, + { + "epoch": 0.7852193317282283, + "grad_norm": 0.0, + "learning_rate": 2.3234829457525377e-06, + "loss": 1.2505, + "step": 26743 + }, + { + "epoch": 0.7852486934053673, + "grad_norm": 0.0, + "learning_rate": 2.3228735410913693e-06, + "loss": 1.0312, + "step": 26744 + }, + { + "epoch": 0.7852780550825064, + "grad_norm": 0.0, + "learning_rate": 2.322264205856424e-06, + "loss": 1.2583, + "step": 26745 + }, + { + "epoch": 0.7853074167596453, + "grad_norm": 0.0, + "learning_rate": 2.3216549400532108e-06, + "loss": 1.1943, + "step": 26746 + }, + { + "epoch": 0.7853367784367843, + "grad_norm": 0.0, + "learning_rate": 2.3210457436872367e-06, + "loss": 1.1372, + "step": 26747 + }, + { + "epoch": 0.7853661401139234, + "grad_norm": 0.0, + "learning_rate": 2.3204366167640145e-06, + "loss": 1.1587, + "step": 26748 + }, + { + "epoch": 0.7853955017910623, + "grad_norm": 0.0, + "learning_rate": 2.3198275592890474e-06, + "loss": 1.2305, + "step": 26749 + }, + { + "epoch": 0.7854248634682013, + "grad_norm": 0.0, + "learning_rate": 2.3192185712678482e-06, + "loss": 1.2598, + "step": 26750 + }, + { + "epoch": 0.7854542251453404, + "grad_norm": 0.0, + "learning_rate": 2.318609652705921e-06, + "loss": 1.166, + "step": 26751 + }, + { + "epoch": 0.7854835868224793, + "grad_norm": 0.0, + "learning_rate": 2.3180008036087763e-06, + "loss": 1.251, + "step": 26752 + }, + { + "epoch": 0.7855129484996183, + "grad_norm": 0.0, + "learning_rate": 2.3173920239819183e-06, + "loss": 1.1504, + "step": 26753 + }, + { + "epoch": 0.7855423101767572, + "grad_norm": 0.0, + "learning_rate": 2.3167833138308503e-06, + "loss": 1.2583, + "step": 26754 + }, + { + "epoch": 0.7855716718538963, + "grad_norm": 0.0, + "learning_rate": 2.31617467316108e-06, + "loss": 1.1733, + "step": 26755 + }, + { + "epoch": 0.7856010335310353, + "grad_norm": 0.0, + "learning_rate": 2.3155661019781063e-06, + "loss": 1.4053, + "step": 26756 + }, + { + "epoch": 0.7856303952081742, + "grad_norm": 0.0, + "learning_rate": 2.3149576002874396e-06, + "loss": 1.0825, + "step": 26757 + }, + { + "epoch": 0.7856597568853133, + "grad_norm": 0.0, + "learning_rate": 2.3143491680945772e-06, + "loss": 1.2725, + "step": 26758 + }, + { + "epoch": 0.7856891185624523, + "grad_norm": 0.0, + "learning_rate": 2.313740805405026e-06, + "loss": 1.2012, + "step": 26759 + }, + { + "epoch": 0.7857184802395912, + "grad_norm": 0.0, + "learning_rate": 2.313132512224282e-06, + "loss": 1.1128, + "step": 26760 + }, + { + "epoch": 0.7857478419167303, + "grad_norm": 0.0, + "learning_rate": 2.3125242885578516e-06, + "loss": 1.3008, + "step": 26761 + }, + { + "epoch": 0.7857772035938693, + "grad_norm": 0.0, + "learning_rate": 2.3119161344112338e-06, + "loss": 1.2222, + "step": 26762 + }, + { + "epoch": 0.7858065652710082, + "grad_norm": 0.0, + "learning_rate": 2.3113080497899278e-06, + "loss": 1.3237, + "step": 26763 + }, + { + "epoch": 0.7858359269481473, + "grad_norm": 0.0, + "learning_rate": 2.310700034699431e-06, + "loss": 1.2202, + "step": 26764 + }, + { + "epoch": 0.7858652886252863, + "grad_norm": 0.0, + "learning_rate": 2.3100920891452404e-06, + "loss": 1.1938, + "step": 26765 + }, + { + "epoch": 0.7858946503024252, + "grad_norm": 0.0, + "learning_rate": 2.30948421313286e-06, + "loss": 1.3135, + "step": 26766 + }, + { + "epoch": 0.7859240119795643, + "grad_norm": 0.0, + "learning_rate": 2.3088764066677807e-06, + "loss": 1.251, + "step": 26767 + }, + { + "epoch": 0.7859533736567033, + "grad_norm": 0.0, + "learning_rate": 2.3082686697555036e-06, + "loss": 1.2515, + "step": 26768 + }, + { + "epoch": 0.7859827353338422, + "grad_norm": 0.0, + "learning_rate": 2.30766100240152e-06, + "loss": 1.2588, + "step": 26769 + }, + { + "epoch": 0.7860120970109813, + "grad_norm": 0.0, + "learning_rate": 2.3070534046113315e-06, + "loss": 1.1289, + "step": 26770 + }, + { + "epoch": 0.7860414586881203, + "grad_norm": 0.0, + "learning_rate": 2.3064458763904297e-06, + "loss": 1.2998, + "step": 26771 + }, + { + "epoch": 0.7860708203652592, + "grad_norm": 0.0, + "learning_rate": 2.3058384177443073e-06, + "loss": 1.1182, + "step": 26772 + }, + { + "epoch": 0.7861001820423983, + "grad_norm": 0.0, + "learning_rate": 2.3052310286784563e-06, + "loss": 1.2344, + "step": 26773 + }, + { + "epoch": 0.7861295437195372, + "grad_norm": 0.0, + "learning_rate": 2.3046237091983737e-06, + "loss": 1.0894, + "step": 26774 + }, + { + "epoch": 0.7861589053966762, + "grad_norm": 0.0, + "learning_rate": 2.3040164593095503e-06, + "loss": 1.2588, + "step": 26775 + }, + { + "epoch": 0.7861882670738153, + "grad_norm": 0.0, + "learning_rate": 2.3034092790174744e-06, + "loss": 1.2227, + "step": 26776 + }, + { + "epoch": 0.7862176287509542, + "grad_norm": 0.0, + "learning_rate": 2.3028021683276425e-06, + "loss": 1.1304, + "step": 26777 + }, + { + "epoch": 0.7862469904280932, + "grad_norm": 0.0, + "learning_rate": 2.3021951272455413e-06, + "loss": 1.2393, + "step": 26778 + }, + { + "epoch": 0.7862763521052323, + "grad_norm": 0.0, + "learning_rate": 2.3015881557766606e-06, + "loss": 1.1846, + "step": 26779 + }, + { + "epoch": 0.7863057137823712, + "grad_norm": 0.0, + "learning_rate": 2.3009812539264886e-06, + "loss": 1.2183, + "step": 26780 + }, + { + "epoch": 0.7863350754595102, + "grad_norm": 0.0, + "learning_rate": 2.3003744217005165e-06, + "loss": 1.2568, + "step": 26781 + }, + { + "epoch": 0.7863644371366493, + "grad_norm": 0.0, + "learning_rate": 2.299767659104227e-06, + "loss": 1.2656, + "step": 26782 + }, + { + "epoch": 0.7863937988137882, + "grad_norm": 0.0, + "learning_rate": 2.2991609661431147e-06, + "loss": 1.1196, + "step": 26783 + }, + { + "epoch": 0.7864231604909272, + "grad_norm": 0.0, + "learning_rate": 2.298554342822662e-06, + "loss": 1.3398, + "step": 26784 + }, + { + "epoch": 0.7864525221680663, + "grad_norm": 0.0, + "learning_rate": 2.2979477891483516e-06, + "loss": 1.0967, + "step": 26785 + }, + { + "epoch": 0.7864818838452052, + "grad_norm": 0.0, + "learning_rate": 2.2973413051256745e-06, + "loss": 1.2012, + "step": 26786 + }, + { + "epoch": 0.7865112455223442, + "grad_norm": 0.0, + "learning_rate": 2.2967348907601127e-06, + "loss": 1.2441, + "step": 26787 + }, + { + "epoch": 0.7865406071994833, + "grad_norm": 0.0, + "learning_rate": 2.2961285460571492e-06, + "loss": 1.3447, + "step": 26788 + }, + { + "epoch": 0.7865699688766222, + "grad_norm": 0.0, + "learning_rate": 2.2955222710222667e-06, + "loss": 1.1826, + "step": 26789 + }, + { + "epoch": 0.7865993305537612, + "grad_norm": 0.0, + "learning_rate": 2.2949160656609525e-06, + "loss": 1.2251, + "step": 26790 + }, + { + "epoch": 0.7866286922309003, + "grad_norm": 0.0, + "learning_rate": 2.294309929978682e-06, + "loss": 1.2471, + "step": 26791 + }, + { + "epoch": 0.7866580539080392, + "grad_norm": 0.0, + "learning_rate": 2.293703863980943e-06, + "loss": 1.2539, + "step": 26792 + }, + { + "epoch": 0.7866874155851782, + "grad_norm": 0.0, + "learning_rate": 2.293097867673211e-06, + "loss": 1.3438, + "step": 26793 + }, + { + "epoch": 0.7867167772623173, + "grad_norm": 0.0, + "learning_rate": 2.2924919410609715e-06, + "loss": 1.1123, + "step": 26794 + }, + { + "epoch": 0.7867461389394562, + "grad_norm": 0.0, + "learning_rate": 2.2918860841497016e-06, + "loss": 1.2109, + "step": 26795 + }, + { + "epoch": 0.7867755006165952, + "grad_norm": 0.0, + "learning_rate": 2.29128029694488e-06, + "loss": 1.2002, + "step": 26796 + }, + { + "epoch": 0.7868048622937343, + "grad_norm": 0.0, + "learning_rate": 2.2906745794519846e-06, + "loss": 1.2529, + "step": 26797 + }, + { + "epoch": 0.7868342239708732, + "grad_norm": 0.0, + "learning_rate": 2.29006893167649e-06, + "loss": 1.2617, + "step": 26798 + }, + { + "epoch": 0.7868635856480122, + "grad_norm": 0.0, + "learning_rate": 2.2894633536238806e-06, + "loss": 1.1206, + "step": 26799 + }, + { + "epoch": 0.7868929473251512, + "grad_norm": 0.0, + "learning_rate": 2.2888578452996257e-06, + "loss": 1.2007, + "step": 26800 + }, + { + "epoch": 0.7869223090022902, + "grad_norm": 0.0, + "learning_rate": 2.288252406709206e-06, + "loss": 1.1348, + "step": 26801 + }, + { + "epoch": 0.7869516706794292, + "grad_norm": 0.0, + "learning_rate": 2.2876470378580962e-06, + "loss": 1.3633, + "step": 26802 + }, + { + "epoch": 0.7869810323565682, + "grad_norm": 0.0, + "learning_rate": 2.2870417387517683e-06, + "loss": 1.144, + "step": 26803 + }, + { + "epoch": 0.7870103940337072, + "grad_norm": 0.0, + "learning_rate": 2.2864365093956974e-06, + "loss": 1.2969, + "step": 26804 + }, + { + "epoch": 0.7870397557108462, + "grad_norm": 0.0, + "learning_rate": 2.2858313497953545e-06, + "loss": 1.1621, + "step": 26805 + }, + { + "epoch": 0.7870691173879852, + "grad_norm": 0.0, + "learning_rate": 2.285226259956217e-06, + "loss": 1.2031, + "step": 26806 + }, + { + "epoch": 0.7870984790651242, + "grad_norm": 0.0, + "learning_rate": 2.2846212398837507e-06, + "loss": 1.1016, + "step": 26807 + }, + { + "epoch": 0.7871278407422632, + "grad_norm": 0.0, + "learning_rate": 2.284016289583434e-06, + "loss": 1.251, + "step": 26808 + }, + { + "epoch": 0.7871572024194022, + "grad_norm": 0.0, + "learning_rate": 2.283411409060731e-06, + "loss": 1.2891, + "step": 26809 + }, + { + "epoch": 0.7871865640965412, + "grad_norm": 0.0, + "learning_rate": 2.2828065983211165e-06, + "loss": 1.1528, + "step": 26810 + }, + { + "epoch": 0.7872159257736802, + "grad_norm": 0.0, + "learning_rate": 2.28220185737006e-06, + "loss": 1.2202, + "step": 26811 + }, + { + "epoch": 0.7872452874508192, + "grad_norm": 0.0, + "learning_rate": 2.281597186213028e-06, + "loss": 1.2119, + "step": 26812 + }, + { + "epoch": 0.7872746491279582, + "grad_norm": 0.0, + "learning_rate": 2.2809925848554893e-06, + "loss": 1.2646, + "step": 26813 + }, + { + "epoch": 0.7873040108050972, + "grad_norm": 0.0, + "learning_rate": 2.2803880533029087e-06, + "loss": 1.2441, + "step": 26814 + }, + { + "epoch": 0.7873333724822362, + "grad_norm": 0.0, + "learning_rate": 2.279783591560759e-06, + "loss": 1.1836, + "step": 26815 + }, + { + "epoch": 0.7873627341593752, + "grad_norm": 0.0, + "learning_rate": 2.2791791996345015e-06, + "loss": 1.1167, + "step": 26816 + }, + { + "epoch": 0.7873920958365141, + "grad_norm": 0.0, + "learning_rate": 2.2785748775296056e-06, + "loss": 1.1592, + "step": 26817 + }, + { + "epoch": 0.7874214575136532, + "grad_norm": 0.0, + "learning_rate": 2.277970625251532e-06, + "loss": 1.2417, + "step": 26818 + }, + { + "epoch": 0.7874508191907922, + "grad_norm": 0.0, + "learning_rate": 2.2773664428057494e-06, + "loss": 1.2715, + "step": 26819 + }, + { + "epoch": 0.7874801808679311, + "grad_norm": 0.0, + "learning_rate": 2.2767623301977215e-06, + "loss": 1.2334, + "step": 26820 + }, + { + "epoch": 0.7875095425450702, + "grad_norm": 0.0, + "learning_rate": 2.276158287432909e-06, + "loss": 1.1616, + "step": 26821 + }, + { + "epoch": 0.7875389042222092, + "grad_norm": 0.0, + "learning_rate": 2.2755543145167726e-06, + "loss": 1.2378, + "step": 26822 + }, + { + "epoch": 0.7875682658993481, + "grad_norm": 0.0, + "learning_rate": 2.2749504114547792e-06, + "loss": 1.3818, + "step": 26823 + }, + { + "epoch": 0.7875976275764872, + "grad_norm": 0.0, + "learning_rate": 2.274346578252389e-06, + "loss": 1.0806, + "step": 26824 + }, + { + "epoch": 0.7876269892536262, + "grad_norm": 0.0, + "learning_rate": 2.2737428149150565e-06, + "loss": 1.1621, + "step": 26825 + }, + { + "epoch": 0.7876563509307651, + "grad_norm": 0.0, + "learning_rate": 2.27313912144825e-06, + "loss": 1.2705, + "step": 26826 + }, + { + "epoch": 0.7876857126079042, + "grad_norm": 0.0, + "learning_rate": 2.2725354978574266e-06, + "loss": 1.1724, + "step": 26827 + }, + { + "epoch": 0.7877150742850432, + "grad_norm": 0.0, + "learning_rate": 2.2719319441480424e-06, + "loss": 1.2949, + "step": 26828 + }, + { + "epoch": 0.7877444359621821, + "grad_norm": 0.0, + "learning_rate": 2.271328460325554e-06, + "loss": 1.2217, + "step": 26829 + }, + { + "epoch": 0.7877737976393212, + "grad_norm": 0.0, + "learning_rate": 2.2707250463954243e-06, + "loss": 1.2949, + "step": 26830 + }, + { + "epoch": 0.7878031593164602, + "grad_norm": 0.0, + "learning_rate": 2.2701217023631063e-06, + "loss": 1.1973, + "step": 26831 + }, + { + "epoch": 0.7878325209935991, + "grad_norm": 0.0, + "learning_rate": 2.269518428234059e-06, + "loss": 1.1558, + "step": 26832 + }, + { + "epoch": 0.7878618826707382, + "grad_norm": 0.0, + "learning_rate": 2.2689152240137356e-06, + "loss": 1.2178, + "step": 26833 + }, + { + "epoch": 0.7878912443478772, + "grad_norm": 0.0, + "learning_rate": 2.2683120897075894e-06, + "loss": 1.249, + "step": 26834 + }, + { + "epoch": 0.7879206060250161, + "grad_norm": 0.0, + "learning_rate": 2.267709025321081e-06, + "loss": 1.3916, + "step": 26835 + }, + { + "epoch": 0.7879499677021552, + "grad_norm": 0.0, + "learning_rate": 2.267106030859659e-06, + "loss": 1.1704, + "step": 26836 + }, + { + "epoch": 0.7879793293792942, + "grad_norm": 0.0, + "learning_rate": 2.266503106328778e-06, + "loss": 1.1875, + "step": 26837 + }, + { + "epoch": 0.7880086910564331, + "grad_norm": 0.0, + "learning_rate": 2.2659002517338866e-06, + "loss": 1.1646, + "step": 26838 + }, + { + "epoch": 0.7880380527335722, + "grad_norm": 0.0, + "learning_rate": 2.265297467080444e-06, + "loss": 1.2075, + "step": 26839 + }, + { + "epoch": 0.7880674144107112, + "grad_norm": 0.0, + "learning_rate": 2.2646947523738936e-06, + "loss": 1.2998, + "step": 26840 + }, + { + "epoch": 0.7880967760878501, + "grad_norm": 0.0, + "learning_rate": 2.264092107619693e-06, + "loss": 1.1724, + "step": 26841 + }, + { + "epoch": 0.7881261377649892, + "grad_norm": 0.0, + "learning_rate": 2.2634895328232853e-06, + "loss": 1.3428, + "step": 26842 + }, + { + "epoch": 0.7881554994421281, + "grad_norm": 0.0, + "learning_rate": 2.262887027990126e-06, + "loss": 1.1323, + "step": 26843 + }, + { + "epoch": 0.7881848611192671, + "grad_norm": 0.0, + "learning_rate": 2.262284593125661e-06, + "loss": 1.292, + "step": 26844 + }, + { + "epoch": 0.7882142227964062, + "grad_norm": 0.0, + "learning_rate": 2.2616822282353377e-06, + "loss": 1.2642, + "step": 26845 + }, + { + "epoch": 0.7882435844735451, + "grad_norm": 0.0, + "learning_rate": 2.261079933324605e-06, + "loss": 1.2578, + "step": 26846 + }, + { + "epoch": 0.7882729461506841, + "grad_norm": 0.0, + "learning_rate": 2.260477708398905e-06, + "loss": 1.3164, + "step": 26847 + }, + { + "epoch": 0.7883023078278232, + "grad_norm": 0.0, + "learning_rate": 2.2598755534636894e-06, + "loss": 1.2949, + "step": 26848 + }, + { + "epoch": 0.7883316695049621, + "grad_norm": 0.0, + "learning_rate": 2.259273468524399e-06, + "loss": 1.1636, + "step": 26849 + }, + { + "epoch": 0.7883610311821011, + "grad_norm": 0.0, + "learning_rate": 2.258671453586484e-06, + "loss": 1.2817, + "step": 26850 + }, + { + "epoch": 0.7883903928592402, + "grad_norm": 0.0, + "learning_rate": 2.258069508655383e-06, + "loss": 1.2573, + "step": 26851 + }, + { + "epoch": 0.7884197545363791, + "grad_norm": 0.0, + "learning_rate": 2.2574676337365466e-06, + "loss": 1.1768, + "step": 26852 + }, + { + "epoch": 0.7884491162135181, + "grad_norm": 0.0, + "learning_rate": 2.2568658288354117e-06, + "loss": 1.2173, + "step": 26853 + }, + { + "epoch": 0.7884784778906571, + "grad_norm": 0.0, + "learning_rate": 2.2562640939574187e-06, + "loss": 1.1685, + "step": 26854 + }, + { + "epoch": 0.7885078395677961, + "grad_norm": 0.0, + "learning_rate": 2.2556624291080164e-06, + "loss": 1.124, + "step": 26855 + }, + { + "epoch": 0.7885372012449351, + "grad_norm": 0.0, + "learning_rate": 2.2550608342926383e-06, + "loss": 1.1929, + "step": 26856 + }, + { + "epoch": 0.788566562922074, + "grad_norm": 0.0, + "learning_rate": 2.2544593095167323e-06, + "loss": 1.1812, + "step": 26857 + }, + { + "epoch": 0.7885959245992131, + "grad_norm": 0.0, + "learning_rate": 2.2538578547857306e-06, + "loss": 1.0903, + "step": 26858 + }, + { + "epoch": 0.7886252862763521, + "grad_norm": 0.0, + "learning_rate": 2.2532564701050795e-06, + "loss": 1.2744, + "step": 26859 + }, + { + "epoch": 0.788654647953491, + "grad_norm": 0.0, + "learning_rate": 2.2526551554802136e-06, + "loss": 1.1748, + "step": 26860 + }, + { + "epoch": 0.7886840096306301, + "grad_norm": 0.0, + "learning_rate": 2.2520539109165717e-06, + "loss": 1.3018, + "step": 26861 + }, + { + "epoch": 0.7887133713077691, + "grad_norm": 0.0, + "learning_rate": 2.251452736419588e-06, + "loss": 1.2153, + "step": 26862 + }, + { + "epoch": 0.788742732984908, + "grad_norm": 0.0, + "learning_rate": 2.250851631994705e-06, + "loss": 1.29, + "step": 26863 + }, + { + "epoch": 0.7887720946620471, + "grad_norm": 0.0, + "learning_rate": 2.2502505976473544e-06, + "loss": 1.1973, + "step": 26864 + }, + { + "epoch": 0.7888014563391861, + "grad_norm": 0.0, + "learning_rate": 2.2496496333829686e-06, + "loss": 1.1528, + "step": 26865 + }, + { + "epoch": 0.788830818016325, + "grad_norm": 0.0, + "learning_rate": 2.2490487392069906e-06, + "loss": 1.2705, + "step": 26866 + }, + { + "epoch": 0.7888601796934641, + "grad_norm": 0.0, + "learning_rate": 2.248447915124846e-06, + "loss": 1.2305, + "step": 26867 + }, + { + "epoch": 0.7888895413706031, + "grad_norm": 0.0, + "learning_rate": 2.247847161141976e-06, + "loss": 1.1699, + "step": 26868 + }, + { + "epoch": 0.788918903047742, + "grad_norm": 0.0, + "learning_rate": 2.247246477263808e-06, + "loss": 1.3057, + "step": 26869 + }, + { + "epoch": 0.7889482647248811, + "grad_norm": 0.0, + "learning_rate": 2.2466458634957767e-06, + "loss": 1.1519, + "step": 26870 + }, + { + "epoch": 0.7889776264020201, + "grad_norm": 0.0, + "learning_rate": 2.2460453198433097e-06, + "loss": 1.2041, + "step": 26871 + }, + { + "epoch": 0.789006988079159, + "grad_norm": 0.0, + "learning_rate": 2.2454448463118428e-06, + "loss": 1.1138, + "step": 26872 + }, + { + "epoch": 0.7890363497562981, + "grad_norm": 0.0, + "learning_rate": 2.244844442906805e-06, + "loss": 1.1055, + "step": 26873 + }, + { + "epoch": 0.7890657114334371, + "grad_norm": 0.0, + "learning_rate": 2.244244109633622e-06, + "loss": 1.1074, + "step": 26874 + }, + { + "epoch": 0.789095073110576, + "grad_norm": 0.0, + "learning_rate": 2.2436438464977284e-06, + "loss": 1.1738, + "step": 26875 + }, + { + "epoch": 0.7891244347877151, + "grad_norm": 0.0, + "learning_rate": 2.2430436535045473e-06, + "loss": 1.3369, + "step": 26876 + }, + { + "epoch": 0.7891537964648541, + "grad_norm": 0.0, + "learning_rate": 2.242443530659514e-06, + "loss": 1.2305, + "step": 26877 + }, + { + "epoch": 0.789183158141993, + "grad_norm": 0.0, + "learning_rate": 2.2418434779680455e-06, + "loss": 1.2969, + "step": 26878 + }, + { + "epoch": 0.7892125198191321, + "grad_norm": 0.0, + "learning_rate": 2.2412434954355767e-06, + "loss": 1.2119, + "step": 26879 + }, + { + "epoch": 0.7892418814962711, + "grad_norm": 0.0, + "learning_rate": 2.2406435830675255e-06, + "loss": 1.207, + "step": 26880 + }, + { + "epoch": 0.78927124317341, + "grad_norm": 0.0, + "learning_rate": 2.2400437408693264e-06, + "loss": 1.2563, + "step": 26881 + }, + { + "epoch": 0.7893006048505491, + "grad_norm": 0.0, + "learning_rate": 2.2394439688463976e-06, + "loss": 1.2466, + "step": 26882 + }, + { + "epoch": 0.789329966527688, + "grad_norm": 0.0, + "learning_rate": 2.238844267004162e-06, + "loss": 1.2188, + "step": 26883 + }, + { + "epoch": 0.789359328204827, + "grad_norm": 0.0, + "learning_rate": 2.238244635348049e-06, + "loss": 1.2764, + "step": 26884 + }, + { + "epoch": 0.7893886898819661, + "grad_norm": 0.0, + "learning_rate": 2.2376450738834764e-06, + "loss": 1.1982, + "step": 26885 + }, + { + "epoch": 0.789418051559105, + "grad_norm": 0.0, + "learning_rate": 2.2370455826158687e-06, + "loss": 1.2852, + "step": 26886 + }, + { + "epoch": 0.789447413236244, + "grad_norm": 0.0, + "learning_rate": 2.2364461615506417e-06, + "loss": 1.2656, + "step": 26887 + }, + { + "epoch": 0.7894767749133831, + "grad_norm": 0.0, + "learning_rate": 2.235846810693224e-06, + "loss": 1.2051, + "step": 26888 + }, + { + "epoch": 0.789506136590522, + "grad_norm": 0.0, + "learning_rate": 2.2352475300490297e-06, + "loss": 1.332, + "step": 26889 + }, + { + "epoch": 0.789535498267661, + "grad_norm": 0.0, + "learning_rate": 2.2346483196234825e-06, + "loss": 1.1895, + "step": 26890 + }, + { + "epoch": 0.7895648599448001, + "grad_norm": 0.0, + "learning_rate": 2.2340491794219964e-06, + "loss": 1.1147, + "step": 26891 + }, + { + "epoch": 0.789594221621939, + "grad_norm": 0.0, + "learning_rate": 2.233450109449996e-06, + "loss": 1.1436, + "step": 26892 + }, + { + "epoch": 0.789623583299078, + "grad_norm": 0.0, + "learning_rate": 2.2328511097128945e-06, + "loss": 1.2021, + "step": 26893 + }, + { + "epoch": 0.7896529449762171, + "grad_norm": 0.0, + "learning_rate": 2.2322521802161103e-06, + "loss": 1.2686, + "step": 26894 + }, + { + "epoch": 0.789682306653356, + "grad_norm": 0.0, + "learning_rate": 2.2316533209650583e-06, + "loss": 1.1733, + "step": 26895 + }, + { + "epoch": 0.789711668330495, + "grad_norm": 0.0, + "learning_rate": 2.231054531965152e-06, + "loss": 1.2148, + "step": 26896 + }, + { + "epoch": 0.7897410300076341, + "grad_norm": 0.0, + "learning_rate": 2.2304558132218123e-06, + "loss": 1.2041, + "step": 26897 + }, + { + "epoch": 0.789770391684773, + "grad_norm": 0.0, + "learning_rate": 2.2298571647404487e-06, + "loss": 1.1851, + "step": 26898 + }, + { + "epoch": 0.789799753361912, + "grad_norm": 0.0, + "learning_rate": 2.2292585865264783e-06, + "loss": 1.2578, + "step": 26899 + }, + { + "epoch": 0.7898291150390511, + "grad_norm": 0.0, + "learning_rate": 2.2286600785853097e-06, + "loss": 1.1792, + "step": 26900 + }, + { + "epoch": 0.78985847671619, + "grad_norm": 0.0, + "learning_rate": 2.228061640922362e-06, + "loss": 1.2949, + "step": 26901 + }, + { + "epoch": 0.789887838393329, + "grad_norm": 0.0, + "learning_rate": 2.227463273543046e-06, + "loss": 1.2002, + "step": 26902 + }, + { + "epoch": 0.7899172000704681, + "grad_norm": 0.0, + "learning_rate": 2.226864976452764e-06, + "loss": 1.2148, + "step": 26903 + }, + { + "epoch": 0.789946561747607, + "grad_norm": 0.0, + "learning_rate": 2.2262667496569356e-06, + "loss": 1.2764, + "step": 26904 + }, + { + "epoch": 0.789975923424746, + "grad_norm": 0.0, + "learning_rate": 2.2256685931609655e-06, + "loss": 1.1294, + "step": 26905 + }, + { + "epoch": 0.7900052851018851, + "grad_norm": 0.0, + "learning_rate": 2.225070506970267e-06, + "loss": 1.3291, + "step": 26906 + }, + { + "epoch": 0.790034646779024, + "grad_norm": 0.0, + "learning_rate": 2.224472491090246e-06, + "loss": 1.1035, + "step": 26907 + }, + { + "epoch": 0.790064008456163, + "grad_norm": 0.0, + "learning_rate": 2.223874545526313e-06, + "loss": 1.0127, + "step": 26908 + }, + { + "epoch": 0.790093370133302, + "grad_norm": 0.0, + "learning_rate": 2.223276670283875e-06, + "loss": 1.1235, + "step": 26909 + }, + { + "epoch": 0.790122731810441, + "grad_norm": 0.0, + "learning_rate": 2.222678865368337e-06, + "loss": 1.3643, + "step": 26910 + }, + { + "epoch": 0.79015209348758, + "grad_norm": 0.0, + "learning_rate": 2.2220811307851032e-06, + "loss": 1.1367, + "step": 26911 + }, + { + "epoch": 0.790181455164719, + "grad_norm": 0.0, + "learning_rate": 2.2214834665395847e-06, + "loss": 1.3555, + "step": 26912 + }, + { + "epoch": 0.790210816841858, + "grad_norm": 0.0, + "learning_rate": 2.220885872637183e-06, + "loss": 1.2627, + "step": 26913 + }, + { + "epoch": 0.790240178518997, + "grad_norm": 0.0, + "learning_rate": 2.2202883490833003e-06, + "loss": 1.1675, + "step": 26914 + }, + { + "epoch": 0.790269540196136, + "grad_norm": 0.0, + "learning_rate": 2.219690895883345e-06, + "loss": 1.2002, + "step": 26915 + }, + { + "epoch": 0.790298901873275, + "grad_norm": 0.0, + "learning_rate": 2.2190935130427147e-06, + "loss": 1.2349, + "step": 26916 + }, + { + "epoch": 0.790328263550414, + "grad_norm": 0.0, + "learning_rate": 2.2184962005668177e-06, + "loss": 1.1382, + "step": 26917 + }, + { + "epoch": 0.790357625227553, + "grad_norm": 0.0, + "learning_rate": 2.217898958461052e-06, + "loss": 1.2451, + "step": 26918 + }, + { + "epoch": 0.790386986904692, + "grad_norm": 0.0, + "learning_rate": 2.217301786730819e-06, + "loss": 1.021, + "step": 26919 + }, + { + "epoch": 0.790416348581831, + "grad_norm": 0.0, + "learning_rate": 2.2167046853815166e-06, + "loss": 1.3184, + "step": 26920 + }, + { + "epoch": 0.79044571025897, + "grad_norm": 0.0, + "learning_rate": 2.216107654418549e-06, + "loss": 1.2231, + "step": 26921 + }, + { + "epoch": 0.790475071936109, + "grad_norm": 0.0, + "learning_rate": 2.2155106938473137e-06, + "loss": 1.1353, + "step": 26922 + }, + { + "epoch": 0.790504433613248, + "grad_norm": 0.0, + "learning_rate": 2.2149138036732065e-06, + "loss": 1.2295, + "step": 26923 + }, + { + "epoch": 0.790533795290387, + "grad_norm": 0.0, + "learning_rate": 2.214316983901631e-06, + "loss": 1.2197, + "step": 26924 + }, + { + "epoch": 0.790563156967526, + "grad_norm": 0.0, + "learning_rate": 2.2137202345379773e-06, + "loss": 1.1318, + "step": 26925 + }, + { + "epoch": 0.790592518644665, + "grad_norm": 0.0, + "learning_rate": 2.2131235555876486e-06, + "loss": 1.2744, + "step": 26926 + }, + { + "epoch": 0.790621880321804, + "grad_norm": 0.0, + "learning_rate": 2.212526947056037e-06, + "loss": 1.252, + "step": 26927 + }, + { + "epoch": 0.790651241998943, + "grad_norm": 0.0, + "learning_rate": 2.2119304089485394e-06, + "loss": 1.2314, + "step": 26928 + }, + { + "epoch": 0.790680603676082, + "grad_norm": 0.0, + "learning_rate": 2.211333941270547e-06, + "loss": 1.1768, + "step": 26929 + }, + { + "epoch": 0.790709965353221, + "grad_norm": 0.0, + "learning_rate": 2.2107375440274592e-06, + "loss": 1.2939, + "step": 26930 + }, + { + "epoch": 0.79073932703036, + "grad_norm": 0.0, + "learning_rate": 2.2101412172246638e-06, + "loss": 1.2695, + "step": 26931 + }, + { + "epoch": 0.790768688707499, + "grad_norm": 0.0, + "learning_rate": 2.209544960867559e-06, + "loss": 1.2334, + "step": 26932 + }, + { + "epoch": 0.790798050384638, + "grad_norm": 0.0, + "learning_rate": 2.2089487749615347e-06, + "loss": 1.1255, + "step": 26933 + }, + { + "epoch": 0.790827412061777, + "grad_norm": 0.0, + "learning_rate": 2.208352659511981e-06, + "loss": 1.3096, + "step": 26934 + }, + { + "epoch": 0.7908567737389159, + "grad_norm": 0.0, + "learning_rate": 2.2077566145242903e-06, + "loss": 1.1689, + "step": 26935 + }, + { + "epoch": 0.790886135416055, + "grad_norm": 0.0, + "learning_rate": 2.207160640003849e-06, + "loss": 1.1064, + "step": 26936 + }, + { + "epoch": 0.790915497093194, + "grad_norm": 0.0, + "learning_rate": 2.206564735956054e-06, + "loss": 1.2437, + "step": 26937 + }, + { + "epoch": 0.7909448587703329, + "grad_norm": 0.0, + "learning_rate": 2.2059689023862853e-06, + "loss": 1.1929, + "step": 26938 + }, + { + "epoch": 0.790974220447472, + "grad_norm": 0.0, + "learning_rate": 2.20537313929994e-06, + "loss": 1.1475, + "step": 26939 + }, + { + "epoch": 0.791003582124611, + "grad_norm": 0.0, + "learning_rate": 2.2047774467023984e-06, + "loss": 1.3228, + "step": 26940 + }, + { + "epoch": 0.7910329438017499, + "grad_norm": 0.0, + "learning_rate": 2.204181824599053e-06, + "loss": 1.1372, + "step": 26941 + }, + { + "epoch": 0.791062305478889, + "grad_norm": 0.0, + "learning_rate": 2.2035862729952884e-06, + "loss": 1.1963, + "step": 26942 + }, + { + "epoch": 0.791091667156028, + "grad_norm": 0.0, + "learning_rate": 2.20299079189649e-06, + "loss": 1.333, + "step": 26943 + }, + { + "epoch": 0.7911210288331669, + "grad_norm": 0.0, + "learning_rate": 2.2023953813080434e-06, + "loss": 1.2217, + "step": 26944 + }, + { + "epoch": 0.791150390510306, + "grad_norm": 0.0, + "learning_rate": 2.201800041235328e-06, + "loss": 1.2422, + "step": 26945 + }, + { + "epoch": 0.791179752187445, + "grad_norm": 0.0, + "learning_rate": 2.201204771683736e-06, + "loss": 1.2412, + "step": 26946 + }, + { + "epoch": 0.7912091138645839, + "grad_norm": 0.0, + "learning_rate": 2.200609572658643e-06, + "loss": 1.085, + "step": 26947 + }, + { + "epoch": 0.791238475541723, + "grad_norm": 0.0, + "learning_rate": 2.2000144441654382e-06, + "loss": 1.3193, + "step": 26948 + }, + { + "epoch": 0.791267837218862, + "grad_norm": 0.0, + "learning_rate": 2.1994193862094982e-06, + "loss": 1.2207, + "step": 26949 + }, + { + "epoch": 0.7912971988960009, + "grad_norm": 0.0, + "learning_rate": 2.198824398796208e-06, + "loss": 1.2769, + "step": 26950 + }, + { + "epoch": 0.79132656057314, + "grad_norm": 0.0, + "learning_rate": 2.198229481930947e-06, + "loss": 1.2554, + "step": 26951 + }, + { + "epoch": 0.791355922250279, + "grad_norm": 0.0, + "learning_rate": 2.197634635619096e-06, + "loss": 1.1855, + "step": 26952 + }, + { + "epoch": 0.7913852839274179, + "grad_norm": 0.0, + "learning_rate": 2.197039859866031e-06, + "loss": 1.1611, + "step": 26953 + }, + { + "epoch": 0.791414645604557, + "grad_norm": 0.0, + "learning_rate": 2.1964451546771325e-06, + "loss": 1.0986, + "step": 26954 + }, + { + "epoch": 0.791444007281696, + "grad_norm": 0.0, + "learning_rate": 2.1958505200577805e-06, + "loss": 1.2837, + "step": 26955 + }, + { + "epoch": 0.7914733689588349, + "grad_norm": 0.0, + "learning_rate": 2.195255956013348e-06, + "loss": 1.1328, + "step": 26956 + }, + { + "epoch": 0.7915027306359739, + "grad_norm": 0.0, + "learning_rate": 2.194661462549218e-06, + "loss": 1.2241, + "step": 26957 + }, + { + "epoch": 0.791532092313113, + "grad_norm": 0.0, + "learning_rate": 2.194067039670764e-06, + "loss": 1.2529, + "step": 26958 + }, + { + "epoch": 0.7915614539902519, + "grad_norm": 0.0, + "learning_rate": 2.19347268738336e-06, + "loss": 1.207, + "step": 26959 + }, + { + "epoch": 0.7915908156673909, + "grad_norm": 0.0, + "learning_rate": 2.1928784056923793e-06, + "loss": 1.1938, + "step": 26960 + }, + { + "epoch": 0.7916201773445299, + "grad_norm": 0.0, + "learning_rate": 2.1922841946032003e-06, + "loss": 1.4287, + "step": 26961 + }, + { + "epoch": 0.7916495390216689, + "grad_norm": 0.0, + "learning_rate": 2.1916900541211963e-06, + "loss": 1.29, + "step": 26962 + }, + { + "epoch": 0.7916789006988079, + "grad_norm": 0.0, + "learning_rate": 2.1910959842517356e-06, + "loss": 1.188, + "step": 26963 + }, + { + "epoch": 0.7917082623759469, + "grad_norm": 0.0, + "learning_rate": 2.1905019850001964e-06, + "loss": 1.2402, + "step": 26964 + }, + { + "epoch": 0.7917376240530859, + "grad_norm": 0.0, + "learning_rate": 2.1899080563719453e-06, + "loss": 1.332, + "step": 26965 + }, + { + "epoch": 0.7917669857302249, + "grad_norm": 0.0, + "learning_rate": 2.1893141983723586e-06, + "loss": 1.2637, + "step": 26966 + }, + { + "epoch": 0.7917963474073639, + "grad_norm": 0.0, + "learning_rate": 2.1887204110068037e-06, + "loss": 1.2881, + "step": 26967 + }, + { + "epoch": 0.7918257090845029, + "grad_norm": 0.0, + "learning_rate": 2.188126694280651e-06, + "loss": 1.293, + "step": 26968 + }, + { + "epoch": 0.7918550707616419, + "grad_norm": 0.0, + "learning_rate": 2.1875330481992665e-06, + "loss": 1.1846, + "step": 26969 + }, + { + "epoch": 0.7918844324387809, + "grad_norm": 0.0, + "learning_rate": 2.1869394727680236e-06, + "loss": 1.2061, + "step": 26970 + }, + { + "epoch": 0.7919137941159199, + "grad_norm": 0.0, + "learning_rate": 2.1863459679922895e-06, + "loss": 1.272, + "step": 26971 + }, + { + "epoch": 0.7919431557930589, + "grad_norm": 0.0, + "learning_rate": 2.1857525338774253e-06, + "loss": 1.2842, + "step": 26972 + }, + { + "epoch": 0.7919725174701979, + "grad_norm": 0.0, + "learning_rate": 2.1851591704288055e-06, + "loss": 1.2861, + "step": 26973 + }, + { + "epoch": 0.7920018791473369, + "grad_norm": 0.0, + "learning_rate": 2.184565877651791e-06, + "loss": 1.1226, + "step": 26974 + }, + { + "epoch": 0.7920312408244758, + "grad_norm": 0.0, + "learning_rate": 2.1839726555517503e-06, + "loss": 1.1934, + "step": 26975 + }, + { + "epoch": 0.7920606025016149, + "grad_norm": 0.0, + "learning_rate": 2.183379504134048e-06, + "loss": 1.293, + "step": 26976 + }, + { + "epoch": 0.7920899641787539, + "grad_norm": 0.0, + "learning_rate": 2.1827864234040454e-06, + "loss": 1.124, + "step": 26977 + }, + { + "epoch": 0.7921193258558928, + "grad_norm": 0.0, + "learning_rate": 2.1821934133671043e-06, + "loss": 1.3232, + "step": 26978 + }, + { + "epoch": 0.7921486875330319, + "grad_norm": 0.0, + "learning_rate": 2.1816004740285935e-06, + "loss": 1.2422, + "step": 26979 + }, + { + "epoch": 0.7921780492101709, + "grad_norm": 0.0, + "learning_rate": 2.1810076053938702e-06, + "loss": 1.25, + "step": 26980 + }, + { + "epoch": 0.7922074108873098, + "grad_norm": 0.0, + "learning_rate": 2.1804148074682986e-06, + "loss": 1.0615, + "step": 26981 + }, + { + "epoch": 0.7922367725644489, + "grad_norm": 0.0, + "learning_rate": 2.17982208025724e-06, + "loss": 1.2334, + "step": 26982 + }, + { + "epoch": 0.7922661342415879, + "grad_norm": 0.0, + "learning_rate": 2.1792294237660514e-06, + "loss": 1.1958, + "step": 26983 + }, + { + "epoch": 0.7922954959187268, + "grad_norm": 0.0, + "learning_rate": 2.1786368380000955e-06, + "loss": 1.1157, + "step": 26984 + }, + { + "epoch": 0.7923248575958659, + "grad_norm": 0.0, + "learning_rate": 2.1780443229647275e-06, + "loss": 1.165, + "step": 26985 + }, + { + "epoch": 0.7923542192730049, + "grad_norm": 0.0, + "learning_rate": 2.1774518786653097e-06, + "loss": 1.2417, + "step": 26986 + }, + { + "epoch": 0.7923835809501438, + "grad_norm": 0.0, + "learning_rate": 2.1768595051071952e-06, + "loss": 0.9409, + "step": 26987 + }, + { + "epoch": 0.7924129426272829, + "grad_norm": 0.0, + "learning_rate": 2.1762672022957467e-06, + "loss": 1.1968, + "step": 26988 + }, + { + "epoch": 0.7924423043044219, + "grad_norm": 0.0, + "learning_rate": 2.1756749702363144e-06, + "loss": 1.1748, + "step": 26989 + }, + { + "epoch": 0.7924716659815608, + "grad_norm": 0.0, + "learning_rate": 2.17508280893426e-06, + "loss": 1.2715, + "step": 26990 + }, + { + "epoch": 0.7925010276586999, + "grad_norm": 0.0, + "learning_rate": 2.174490718394936e-06, + "loss": 1.2837, + "step": 26991 + }, + { + "epoch": 0.7925303893358389, + "grad_norm": 0.0, + "learning_rate": 2.1738986986236964e-06, + "loss": 1.0977, + "step": 26992 + }, + { + "epoch": 0.7925597510129778, + "grad_norm": 0.0, + "learning_rate": 2.1733067496258953e-06, + "loss": 1.0952, + "step": 26993 + }, + { + "epoch": 0.7925891126901169, + "grad_norm": 0.0, + "learning_rate": 2.1727148714068835e-06, + "loss": 1.1631, + "step": 26994 + }, + { + "epoch": 0.7926184743672559, + "grad_norm": 0.0, + "learning_rate": 2.172123063972018e-06, + "loss": 1.21, + "step": 26995 + }, + { + "epoch": 0.7926478360443948, + "grad_norm": 0.0, + "learning_rate": 2.1715313273266457e-06, + "loss": 1.1895, + "step": 26996 + }, + { + "epoch": 0.7926771977215339, + "grad_norm": 0.0, + "learning_rate": 2.1709396614761223e-06, + "loss": 1.145, + "step": 26997 + }, + { + "epoch": 0.7927065593986728, + "grad_norm": 0.0, + "learning_rate": 2.1703480664257957e-06, + "loss": 1.2637, + "step": 26998 + }, + { + "epoch": 0.7927359210758118, + "grad_norm": 0.0, + "learning_rate": 2.1697565421810186e-06, + "loss": 1.2441, + "step": 26999 + }, + { + "epoch": 0.7927652827529509, + "grad_norm": 0.0, + "learning_rate": 2.1691650887471384e-06, + "loss": 1.293, + "step": 27000 + }, + { + "epoch": 0.7927946444300898, + "grad_norm": 0.0, + "learning_rate": 2.1685737061295033e-06, + "loss": 1.1548, + "step": 27001 + }, + { + "epoch": 0.7928240061072288, + "grad_norm": 0.0, + "learning_rate": 2.167982394333462e-06, + "loss": 1.2163, + "step": 27002 + }, + { + "epoch": 0.7928533677843679, + "grad_norm": 0.0, + "learning_rate": 2.16739115336436e-06, + "loss": 1.1602, + "step": 27003 + }, + { + "epoch": 0.7928827294615068, + "grad_norm": 0.0, + "learning_rate": 2.1667999832275476e-06, + "loss": 1.208, + "step": 27004 + }, + { + "epoch": 0.7929120911386458, + "grad_norm": 0.0, + "learning_rate": 2.1662088839283657e-06, + "loss": 1.4189, + "step": 27005 + }, + { + "epoch": 0.7929414528157849, + "grad_norm": 0.0, + "learning_rate": 2.1656178554721672e-06, + "loss": 1.2422, + "step": 27006 + }, + { + "epoch": 0.7929708144929238, + "grad_norm": 0.0, + "learning_rate": 2.1650268978642917e-06, + "loss": 1.2202, + "step": 27007 + }, + { + "epoch": 0.7930001761700628, + "grad_norm": 0.0, + "learning_rate": 2.1644360111100858e-06, + "loss": 1.1934, + "step": 27008 + }, + { + "epoch": 0.7930295378472019, + "grad_norm": 0.0, + "learning_rate": 2.1638451952148885e-06, + "loss": 1.3252, + "step": 27009 + }, + { + "epoch": 0.7930588995243408, + "grad_norm": 0.0, + "learning_rate": 2.163254450184048e-06, + "loss": 1.2559, + "step": 27010 + }, + { + "epoch": 0.7930882612014798, + "grad_norm": 0.0, + "learning_rate": 2.1626637760229054e-06, + "loss": 1.1924, + "step": 27011 + }, + { + "epoch": 0.7931176228786189, + "grad_norm": 0.0, + "learning_rate": 2.1620731727367985e-06, + "loss": 1.3018, + "step": 27012 + }, + { + "epoch": 0.7931469845557578, + "grad_norm": 0.0, + "learning_rate": 2.161482640331074e-06, + "loss": 1.1675, + "step": 27013 + }, + { + "epoch": 0.7931763462328968, + "grad_norm": 0.0, + "learning_rate": 2.160892178811066e-06, + "loss": 1.0908, + "step": 27014 + }, + { + "epoch": 0.7932057079100359, + "grad_norm": 0.0, + "learning_rate": 2.160301788182121e-06, + "loss": 1.2329, + "step": 27015 + }, + { + "epoch": 0.7932350695871748, + "grad_norm": 0.0, + "learning_rate": 2.1597114684495745e-06, + "loss": 1.1582, + "step": 27016 + }, + { + "epoch": 0.7932644312643138, + "grad_norm": 0.0, + "learning_rate": 2.1591212196187663e-06, + "loss": 1.3086, + "step": 27017 + }, + { + "epoch": 0.7932937929414529, + "grad_norm": 0.0, + "learning_rate": 2.158531041695029e-06, + "loss": 1.2969, + "step": 27018 + }, + { + "epoch": 0.7933231546185918, + "grad_norm": 0.0, + "learning_rate": 2.1579409346837065e-06, + "loss": 1.1499, + "step": 27019 + }, + { + "epoch": 0.7933525162957308, + "grad_norm": 0.0, + "learning_rate": 2.1573508985901328e-06, + "loss": 1.2734, + "step": 27020 + }, + { + "epoch": 0.7933818779728699, + "grad_norm": 0.0, + "learning_rate": 2.1567609334196403e-06, + "loss": 1.2939, + "step": 27021 + }, + { + "epoch": 0.7934112396500088, + "grad_norm": 0.0, + "learning_rate": 2.15617103917757e-06, + "loss": 1.2441, + "step": 27022 + }, + { + "epoch": 0.7934406013271478, + "grad_norm": 0.0, + "learning_rate": 2.155581215869252e-06, + "loss": 1.1504, + "step": 27023 + }, + { + "epoch": 0.7934699630042868, + "grad_norm": 0.0, + "learning_rate": 2.1549914635000245e-06, + "loss": 1.2559, + "step": 27024 + }, + { + "epoch": 0.7934993246814258, + "grad_norm": 0.0, + "learning_rate": 2.154401782075218e-06, + "loss": 1.1021, + "step": 27025 + }, + { + "epoch": 0.7935286863585648, + "grad_norm": 0.0, + "learning_rate": 2.153812171600166e-06, + "loss": 1.1343, + "step": 27026 + }, + { + "epoch": 0.7935580480357038, + "grad_norm": 0.0, + "learning_rate": 2.153222632080196e-06, + "loss": 1.2129, + "step": 27027 + }, + { + "epoch": 0.7935874097128428, + "grad_norm": 0.0, + "learning_rate": 2.1526331635206475e-06, + "loss": 1.21, + "step": 27028 + }, + { + "epoch": 0.7936167713899818, + "grad_norm": 0.0, + "learning_rate": 2.1520437659268435e-06, + "loss": 1.2129, + "step": 27029 + }, + { + "epoch": 0.7936461330671208, + "grad_norm": 0.0, + "learning_rate": 2.1514544393041205e-06, + "loss": 1.1963, + "step": 27030 + }, + { + "epoch": 0.7936754947442598, + "grad_norm": 0.0, + "learning_rate": 2.1508651836578054e-06, + "loss": 1.2637, + "step": 27031 + }, + { + "epoch": 0.7937048564213988, + "grad_norm": 0.0, + "learning_rate": 2.1502759989932265e-06, + "loss": 1.0552, + "step": 27032 + }, + { + "epoch": 0.7937342180985378, + "grad_norm": 0.0, + "learning_rate": 2.1496868853157117e-06, + "loss": 1.3643, + "step": 27033 + }, + { + "epoch": 0.7937635797756768, + "grad_norm": 0.0, + "learning_rate": 2.1490978426305863e-06, + "loss": 1.2285, + "step": 27034 + }, + { + "epoch": 0.7937929414528158, + "grad_norm": 0.0, + "learning_rate": 2.1485088709431822e-06, + "loss": 1.2568, + "step": 27035 + }, + { + "epoch": 0.7938223031299548, + "grad_norm": 0.0, + "learning_rate": 2.1479199702588215e-06, + "loss": 1.2354, + "step": 27036 + }, + { + "epoch": 0.7938516648070938, + "grad_norm": 0.0, + "learning_rate": 2.1473311405828333e-06, + "loss": 1.2725, + "step": 27037 + }, + { + "epoch": 0.7938810264842328, + "grad_norm": 0.0, + "learning_rate": 2.146742381920538e-06, + "loss": 1.2734, + "step": 27038 + }, + { + "epoch": 0.7939103881613718, + "grad_norm": 0.0, + "learning_rate": 2.1461536942772655e-06, + "loss": 1.2178, + "step": 27039 + }, + { + "epoch": 0.7939397498385108, + "grad_norm": 0.0, + "learning_rate": 2.145565077658337e-06, + "loss": 1.1416, + "step": 27040 + }, + { + "epoch": 0.7939691115156498, + "grad_norm": 0.0, + "learning_rate": 2.144976532069074e-06, + "loss": 1.2295, + "step": 27041 + }, + { + "epoch": 0.7939984731927888, + "grad_norm": 0.0, + "learning_rate": 2.144388057514801e-06, + "loss": 1.2031, + "step": 27042 + }, + { + "epoch": 0.7940278348699278, + "grad_norm": 0.0, + "learning_rate": 2.1437996540008355e-06, + "loss": 1.1826, + "step": 27043 + }, + { + "epoch": 0.7940571965470667, + "grad_norm": 0.0, + "learning_rate": 2.1432113215325046e-06, + "loss": 1.1465, + "step": 27044 + }, + { + "epoch": 0.7940865582242058, + "grad_norm": 0.0, + "learning_rate": 2.142623060115122e-06, + "loss": 1.1436, + "step": 27045 + }, + { + "epoch": 0.7941159199013448, + "grad_norm": 0.0, + "learning_rate": 2.142034869754016e-06, + "loss": 1.1641, + "step": 27046 + }, + { + "epoch": 0.7941452815784837, + "grad_norm": 0.0, + "learning_rate": 2.1414467504544966e-06, + "loss": 1.1973, + "step": 27047 + }, + { + "epoch": 0.7941746432556228, + "grad_norm": 0.0, + "learning_rate": 2.140858702221891e-06, + "loss": 1.293, + "step": 27048 + }, + { + "epoch": 0.7942040049327618, + "grad_norm": 0.0, + "learning_rate": 2.1402707250615116e-06, + "loss": 1.1426, + "step": 27049 + }, + { + "epoch": 0.7942333666099007, + "grad_norm": 0.0, + "learning_rate": 2.1396828189786777e-06, + "loss": 1.0552, + "step": 27050 + }, + { + "epoch": 0.7942627282870398, + "grad_norm": 0.0, + "learning_rate": 2.1390949839787056e-06, + "loss": 1.2598, + "step": 27051 + }, + { + "epoch": 0.7942920899641788, + "grad_norm": 0.0, + "learning_rate": 2.138507220066908e-06, + "loss": 1.269, + "step": 27052 + }, + { + "epoch": 0.7943214516413177, + "grad_norm": 0.0, + "learning_rate": 2.137919527248605e-06, + "loss": 1.1855, + "step": 27053 + }, + { + "epoch": 0.7943508133184568, + "grad_norm": 0.0, + "learning_rate": 2.1373319055291066e-06, + "loss": 1.3916, + "step": 27054 + }, + { + "epoch": 0.7943801749955958, + "grad_norm": 0.0, + "learning_rate": 2.1367443549137325e-06, + "loss": 1.3711, + "step": 27055 + }, + { + "epoch": 0.7944095366727347, + "grad_norm": 0.0, + "learning_rate": 2.13615687540779e-06, + "loss": 1.1284, + "step": 27056 + }, + { + "epoch": 0.7944388983498737, + "grad_norm": 0.0, + "learning_rate": 2.1355694670166004e-06, + "loss": 1.3232, + "step": 27057 + }, + { + "epoch": 0.7944682600270128, + "grad_norm": 0.0, + "learning_rate": 2.134982129745464e-06, + "loss": 1.3281, + "step": 27058 + }, + { + "epoch": 0.7944976217041517, + "grad_norm": 0.0, + "learning_rate": 2.1343948635997027e-06, + "loss": 1.2539, + "step": 27059 + }, + { + "epoch": 0.7945269833812907, + "grad_norm": 0.0, + "learning_rate": 2.1338076685846222e-06, + "loss": 1.2236, + "step": 27060 + }, + { + "epoch": 0.7945563450584298, + "grad_norm": 0.0, + "learning_rate": 2.133220544705531e-06, + "loss": 1.1426, + "step": 27061 + }, + { + "epoch": 0.7945857067355687, + "grad_norm": 0.0, + "learning_rate": 2.132633491967744e-06, + "loss": 1.2935, + "step": 27062 + }, + { + "epoch": 0.7946150684127077, + "grad_norm": 0.0, + "learning_rate": 2.132046510376563e-06, + "loss": 1.1587, + "step": 27063 + }, + { + "epoch": 0.7946444300898468, + "grad_norm": 0.0, + "learning_rate": 2.1314595999373046e-06, + "loss": 1.2251, + "step": 27064 + }, + { + "epoch": 0.7946737917669857, + "grad_norm": 0.0, + "learning_rate": 2.1308727606552724e-06, + "loss": 1.1357, + "step": 27065 + }, + { + "epoch": 0.7947031534441247, + "grad_norm": 0.0, + "learning_rate": 2.1302859925357722e-06, + "loss": 1.291, + "step": 27066 + }, + { + "epoch": 0.7947325151212637, + "grad_norm": 0.0, + "learning_rate": 2.129699295584109e-06, + "loss": 1.1426, + "step": 27067 + }, + { + "epoch": 0.7947618767984027, + "grad_norm": 0.0, + "learning_rate": 2.129112669805594e-06, + "loss": 1.144, + "step": 27068 + }, + { + "epoch": 0.7947912384755417, + "grad_norm": 0.0, + "learning_rate": 2.128526115205528e-06, + "loss": 1.1919, + "step": 27069 + }, + { + "epoch": 0.7948206001526807, + "grad_norm": 0.0, + "learning_rate": 2.1279396317892143e-06, + "loss": 1.2417, + "step": 27070 + }, + { + "epoch": 0.7948499618298197, + "grad_norm": 0.0, + "learning_rate": 2.1273532195619606e-06, + "loss": 1.2539, + "step": 27071 + }, + { + "epoch": 0.7948793235069587, + "grad_norm": 0.0, + "learning_rate": 2.1267668785290653e-06, + "loss": 1.3496, + "step": 27072 + }, + { + "epoch": 0.7949086851840977, + "grad_norm": 0.0, + "learning_rate": 2.126180608695836e-06, + "loss": 1.1963, + "step": 27073 + }, + { + "epoch": 0.7949380468612367, + "grad_norm": 0.0, + "learning_rate": 2.1255944100675728e-06, + "loss": 1.1392, + "step": 27074 + }, + { + "epoch": 0.7949674085383757, + "grad_norm": 0.0, + "learning_rate": 2.1250082826495753e-06, + "loss": 1.1802, + "step": 27075 + }, + { + "epoch": 0.7949967702155147, + "grad_norm": 0.0, + "learning_rate": 2.1244222264471426e-06, + "loss": 1.252, + "step": 27076 + }, + { + "epoch": 0.7950261318926537, + "grad_norm": 0.0, + "learning_rate": 2.123836241465579e-06, + "loss": 1.3301, + "step": 27077 + }, + { + "epoch": 0.7950554935697927, + "grad_norm": 0.0, + "learning_rate": 2.123250327710178e-06, + "loss": 1.2119, + "step": 27078 + }, + { + "epoch": 0.7950848552469317, + "grad_norm": 0.0, + "learning_rate": 2.1226644851862454e-06, + "loss": 1.1685, + "step": 27079 + }, + { + "epoch": 0.7951142169240707, + "grad_norm": 0.0, + "learning_rate": 2.1220787138990752e-06, + "loss": 1.1758, + "step": 27080 + }, + { + "epoch": 0.7951435786012097, + "grad_norm": 0.0, + "learning_rate": 2.121493013853961e-06, + "loss": 1.1104, + "step": 27081 + }, + { + "epoch": 0.7951729402783487, + "grad_norm": 0.0, + "learning_rate": 2.1209073850562103e-06, + "loss": 1.2998, + "step": 27082 + }, + { + "epoch": 0.7952023019554877, + "grad_norm": 0.0, + "learning_rate": 2.1203218275111047e-06, + "loss": 1.29, + "step": 27083 + }, + { + "epoch": 0.7952316636326267, + "grad_norm": 0.0, + "learning_rate": 2.1197363412239502e-06, + "loss": 1.2476, + "step": 27084 + }, + { + "epoch": 0.7952610253097657, + "grad_norm": 0.0, + "learning_rate": 2.1191509262000353e-06, + "loss": 1.2705, + "step": 27085 + }, + { + "epoch": 0.7952903869869047, + "grad_norm": 0.0, + "learning_rate": 2.1185655824446603e-06, + "loss": 1.2222, + "step": 27086 + }, + { + "epoch": 0.7953197486640436, + "grad_norm": 0.0, + "learning_rate": 2.1179803099631124e-06, + "loss": 1.2798, + "step": 27087 + }, + { + "epoch": 0.7953491103411827, + "grad_norm": 0.0, + "learning_rate": 2.1173951087606893e-06, + "loss": 1.1572, + "step": 27088 + }, + { + "epoch": 0.7953784720183217, + "grad_norm": 0.0, + "learning_rate": 2.116809978842681e-06, + "loss": 1.1387, + "step": 27089 + }, + { + "epoch": 0.7954078336954606, + "grad_norm": 0.0, + "learning_rate": 2.11622492021438e-06, + "loss": 1.3213, + "step": 27090 + }, + { + "epoch": 0.7954371953725997, + "grad_norm": 0.0, + "learning_rate": 2.115639932881075e-06, + "loss": 1.3564, + "step": 27091 + }, + { + "epoch": 0.7954665570497387, + "grad_norm": 0.0, + "learning_rate": 2.1150550168480555e-06, + "loss": 1.2012, + "step": 27092 + }, + { + "epoch": 0.7954959187268776, + "grad_norm": 0.0, + "learning_rate": 2.114470172120615e-06, + "loss": 1.1309, + "step": 27093 + }, + { + "epoch": 0.7955252804040167, + "grad_norm": 0.0, + "learning_rate": 2.1138853987040377e-06, + "loss": 1.2764, + "step": 27094 + }, + { + "epoch": 0.7955546420811557, + "grad_norm": 0.0, + "learning_rate": 2.113300696603616e-06, + "loss": 1.1865, + "step": 27095 + }, + { + "epoch": 0.7955840037582946, + "grad_norm": 0.0, + "learning_rate": 2.1127160658246347e-06, + "loss": 1.1562, + "step": 27096 + }, + { + "epoch": 0.7956133654354337, + "grad_norm": 0.0, + "learning_rate": 2.1121315063723847e-06, + "loss": 1.2627, + "step": 27097 + }, + { + "epoch": 0.7956427271125727, + "grad_norm": 0.0, + "learning_rate": 2.111547018252148e-06, + "loss": 1.1797, + "step": 27098 + }, + { + "epoch": 0.7956720887897116, + "grad_norm": 0.0, + "learning_rate": 2.110962601469213e-06, + "loss": 1.23, + "step": 27099 + }, + { + "epoch": 0.7957014504668507, + "grad_norm": 0.0, + "learning_rate": 2.1103782560288644e-06, + "loss": 1.2266, + "step": 27100 + }, + { + "epoch": 0.7957308121439897, + "grad_norm": 0.0, + "learning_rate": 2.109793981936382e-06, + "loss": 1.1387, + "step": 27101 + }, + { + "epoch": 0.7957601738211286, + "grad_norm": 0.0, + "learning_rate": 2.1092097791970555e-06, + "loss": 1.3389, + "step": 27102 + }, + { + "epoch": 0.7957895354982677, + "grad_norm": 0.0, + "learning_rate": 2.108625647816165e-06, + "loss": 1.1758, + "step": 27103 + }, + { + "epoch": 0.7958188971754067, + "grad_norm": 0.0, + "learning_rate": 2.1080415877989945e-06, + "loss": 1.1362, + "step": 27104 + }, + { + "epoch": 0.7958482588525456, + "grad_norm": 0.0, + "learning_rate": 2.107457599150824e-06, + "loss": 1.3125, + "step": 27105 + }, + { + "epoch": 0.7958776205296847, + "grad_norm": 0.0, + "learning_rate": 2.1068736818769377e-06, + "loss": 1.2158, + "step": 27106 + }, + { + "epoch": 0.7959069822068237, + "grad_norm": 0.0, + "learning_rate": 2.1062898359826134e-06, + "loss": 1.1479, + "step": 27107 + }, + { + "epoch": 0.7959363438839626, + "grad_norm": 0.0, + "learning_rate": 2.105706061473133e-06, + "loss": 1.1997, + "step": 27108 + }, + { + "epoch": 0.7959657055611017, + "grad_norm": 0.0, + "learning_rate": 2.105122358353774e-06, + "loss": 1.2256, + "step": 27109 + }, + { + "epoch": 0.7959950672382407, + "grad_norm": 0.0, + "learning_rate": 2.104538726629812e-06, + "loss": 1.3506, + "step": 27110 + }, + { + "epoch": 0.7960244289153796, + "grad_norm": 0.0, + "learning_rate": 2.1039551663065326e-06, + "loss": 1.0347, + "step": 27111 + }, + { + "epoch": 0.7960537905925187, + "grad_norm": 0.0, + "learning_rate": 2.1033716773892054e-06, + "loss": 1.3057, + "step": 27112 + }, + { + "epoch": 0.7960831522696576, + "grad_norm": 0.0, + "learning_rate": 2.102788259883113e-06, + "loss": 1.2744, + "step": 27113 + }, + { + "epoch": 0.7961125139467966, + "grad_norm": 0.0, + "learning_rate": 2.1022049137935287e-06, + "loss": 1.1782, + "step": 27114 + }, + { + "epoch": 0.7961418756239357, + "grad_norm": 0.0, + "learning_rate": 2.1016216391257282e-06, + "loss": 1.1523, + "step": 27115 + }, + { + "epoch": 0.7961712373010746, + "grad_norm": 0.0, + "learning_rate": 2.1010384358849835e-06, + "loss": 1.2227, + "step": 27116 + }, + { + "epoch": 0.7962005989782136, + "grad_norm": 0.0, + "learning_rate": 2.1004553040765727e-06, + "loss": 1.1606, + "step": 27117 + }, + { + "epoch": 0.7962299606553527, + "grad_norm": 0.0, + "learning_rate": 2.0998722437057663e-06, + "loss": 1.2295, + "step": 27118 + }, + { + "epoch": 0.7962593223324916, + "grad_norm": 0.0, + "learning_rate": 2.099289254777841e-06, + "loss": 1.167, + "step": 27119 + }, + { + "epoch": 0.7962886840096306, + "grad_norm": 0.0, + "learning_rate": 2.0987063372980666e-06, + "loss": 1.2334, + "step": 27120 + }, + { + "epoch": 0.7963180456867697, + "grad_norm": 0.0, + "learning_rate": 2.098123491271711e-06, + "loss": 1.2559, + "step": 27121 + }, + { + "epoch": 0.7963474073639086, + "grad_norm": 0.0, + "learning_rate": 2.097540716704052e-06, + "loss": 1.25, + "step": 27122 + }, + { + "epoch": 0.7963767690410476, + "grad_norm": 0.0, + "learning_rate": 2.0969580136003554e-06, + "loss": 1.1562, + "step": 27123 + }, + { + "epoch": 0.7964061307181867, + "grad_norm": 0.0, + "learning_rate": 2.0963753819658915e-06, + "loss": 1.292, + "step": 27124 + }, + { + "epoch": 0.7964354923953256, + "grad_norm": 0.0, + "learning_rate": 2.0957928218059265e-06, + "loss": 1.3096, + "step": 27125 + }, + { + "epoch": 0.7964648540724646, + "grad_norm": 0.0, + "learning_rate": 2.0952103331257335e-06, + "loss": 1.1123, + "step": 27126 + }, + { + "epoch": 0.7964942157496037, + "grad_norm": 0.0, + "learning_rate": 2.094627915930576e-06, + "loss": 1.208, + "step": 27127 + }, + { + "epoch": 0.7965235774267426, + "grad_norm": 0.0, + "learning_rate": 2.0940455702257255e-06, + "loss": 1.2251, + "step": 27128 + }, + { + "epoch": 0.7965529391038816, + "grad_norm": 0.0, + "learning_rate": 2.0934632960164458e-06, + "loss": 1.3066, + "step": 27129 + }, + { + "epoch": 0.7965823007810207, + "grad_norm": 0.0, + "learning_rate": 2.092881093307999e-06, + "loss": 1.084, + "step": 27130 + }, + { + "epoch": 0.7966116624581596, + "grad_norm": 0.0, + "learning_rate": 2.0922989621056565e-06, + "loss": 1.1953, + "step": 27131 + }, + { + "epoch": 0.7966410241352986, + "grad_norm": 0.0, + "learning_rate": 2.09171690241468e-06, + "loss": 1.2725, + "step": 27132 + }, + { + "epoch": 0.7966703858124377, + "grad_norm": 0.0, + "learning_rate": 2.0911349142403325e-06, + "loss": 1.2305, + "step": 27133 + }, + { + "epoch": 0.7966997474895766, + "grad_norm": 0.0, + "learning_rate": 2.0905529975878748e-06, + "loss": 1.2295, + "step": 27134 + }, + { + "epoch": 0.7967291091667156, + "grad_norm": 0.0, + "learning_rate": 2.0899711524625743e-06, + "loss": 1.2266, + "step": 27135 + }, + { + "epoch": 0.7967584708438546, + "grad_norm": 0.0, + "learning_rate": 2.0893893788696886e-06, + "loss": 1.189, + "step": 27136 + }, + { + "epoch": 0.7967878325209936, + "grad_norm": 0.0, + "learning_rate": 2.0888076768144837e-06, + "loss": 1.3066, + "step": 27137 + }, + { + "epoch": 0.7968171941981326, + "grad_norm": 0.0, + "learning_rate": 2.0882260463022163e-06, + "loss": 1.1875, + "step": 27138 + }, + { + "epoch": 0.7968465558752716, + "grad_norm": 0.0, + "learning_rate": 2.0876444873381464e-06, + "loss": 1.0908, + "step": 27139 + }, + { + "epoch": 0.7968759175524106, + "grad_norm": 0.0, + "learning_rate": 2.087062999927535e-06, + "loss": 1.3008, + "step": 27140 + }, + { + "epoch": 0.7969052792295496, + "grad_norm": 0.0, + "learning_rate": 2.0864815840756358e-06, + "loss": 1.2314, + "step": 27141 + }, + { + "epoch": 0.7969346409066886, + "grad_norm": 0.0, + "learning_rate": 2.085900239787714e-06, + "loss": 1.2715, + "step": 27142 + }, + { + "epoch": 0.7969640025838276, + "grad_norm": 0.0, + "learning_rate": 2.08531896706902e-06, + "loss": 1.2266, + "step": 27143 + }, + { + "epoch": 0.7969933642609666, + "grad_norm": 0.0, + "learning_rate": 2.0847377659248158e-06, + "loss": 1.1904, + "step": 27144 + }, + { + "epoch": 0.7970227259381056, + "grad_norm": 0.0, + "learning_rate": 2.0841566363603525e-06, + "loss": 1.1802, + "step": 27145 + }, + { + "epoch": 0.7970520876152446, + "grad_norm": 0.0, + "learning_rate": 2.0835755783808908e-06, + "loss": 1.3652, + "step": 27146 + }, + { + "epoch": 0.7970814492923836, + "grad_norm": 0.0, + "learning_rate": 2.082994591991684e-06, + "loss": 1.248, + "step": 27147 + }, + { + "epoch": 0.7971108109695226, + "grad_norm": 0.0, + "learning_rate": 2.082413677197983e-06, + "loss": 1.1128, + "step": 27148 + }, + { + "epoch": 0.7971401726466616, + "grad_norm": 0.0, + "learning_rate": 2.0818328340050432e-06, + "loss": 1.0542, + "step": 27149 + }, + { + "epoch": 0.7971695343238006, + "grad_norm": 0.0, + "learning_rate": 2.0812520624181144e-06, + "loss": 1.2119, + "step": 27150 + }, + { + "epoch": 0.7971988960009396, + "grad_norm": 0.0, + "learning_rate": 2.0806713624424535e-06, + "loss": 1.1675, + "step": 27151 + }, + { + "epoch": 0.7972282576780786, + "grad_norm": 0.0, + "learning_rate": 2.0800907340833077e-06, + "loss": 1.293, + "step": 27152 + }, + { + "epoch": 0.7972576193552176, + "grad_norm": 0.0, + "learning_rate": 2.0795101773459314e-06, + "loss": 1.147, + "step": 27153 + }, + { + "epoch": 0.7972869810323566, + "grad_norm": 0.0, + "learning_rate": 2.0789296922355716e-06, + "loss": 1.0874, + "step": 27154 + }, + { + "epoch": 0.7973163427094956, + "grad_norm": 0.0, + "learning_rate": 2.078349278757481e-06, + "loss": 1.2983, + "step": 27155 + }, + { + "epoch": 0.7973457043866345, + "grad_norm": 0.0, + "learning_rate": 2.0777689369169075e-06, + "loss": 1.2861, + "step": 27156 + }, + { + "epoch": 0.7973750660637735, + "grad_norm": 0.0, + "learning_rate": 2.077188666719098e-06, + "loss": 1.1963, + "step": 27157 + }, + { + "epoch": 0.7974044277409126, + "grad_norm": 0.0, + "learning_rate": 2.0766084681693e-06, + "loss": 1.1714, + "step": 27158 + }, + { + "epoch": 0.7974337894180515, + "grad_norm": 0.0, + "learning_rate": 2.0760283412727576e-06, + "loss": 1.1152, + "step": 27159 + }, + { + "epoch": 0.7974631510951905, + "grad_norm": 0.0, + "learning_rate": 2.075448286034724e-06, + "loss": 1.0869, + "step": 27160 + }, + { + "epoch": 0.7974925127723296, + "grad_norm": 0.0, + "learning_rate": 2.0748683024604376e-06, + "loss": 1.2432, + "step": 27161 + }, + { + "epoch": 0.7975218744494685, + "grad_norm": 0.0, + "learning_rate": 2.0742883905551503e-06, + "loss": 1.2549, + "step": 27162 + }, + { + "epoch": 0.7975512361266075, + "grad_norm": 0.0, + "learning_rate": 2.073708550324103e-06, + "loss": 1.1982, + "step": 27163 + }, + { + "epoch": 0.7975805978037466, + "grad_norm": 0.0, + "learning_rate": 2.0731287817725387e-06, + "loss": 1.2773, + "step": 27164 + }, + { + "epoch": 0.7976099594808855, + "grad_norm": 0.0, + "learning_rate": 2.0725490849056983e-06, + "loss": 1.1421, + "step": 27165 + }, + { + "epoch": 0.7976393211580245, + "grad_norm": 0.0, + "learning_rate": 2.0719694597288296e-06, + "loss": 1.1255, + "step": 27166 + }, + { + "epoch": 0.7976686828351636, + "grad_norm": 0.0, + "learning_rate": 2.0713899062471687e-06, + "loss": 1.2651, + "step": 27167 + }, + { + "epoch": 0.7976980445123025, + "grad_norm": 0.0, + "learning_rate": 2.070810424465962e-06, + "loss": 1.2241, + "step": 27168 + }, + { + "epoch": 0.7977274061894415, + "grad_norm": 0.0, + "learning_rate": 2.070231014390448e-06, + "loss": 1.1841, + "step": 27169 + }, + { + "epoch": 0.7977567678665806, + "grad_norm": 0.0, + "learning_rate": 2.0696516760258623e-06, + "loss": 1.1206, + "step": 27170 + }, + { + "epoch": 0.7977861295437195, + "grad_norm": 0.0, + "learning_rate": 2.0690724093774506e-06, + "loss": 1.1953, + "step": 27171 + }, + { + "epoch": 0.7978154912208585, + "grad_norm": 0.0, + "learning_rate": 2.068493214450448e-06, + "loss": 1.1323, + "step": 27172 + }, + { + "epoch": 0.7978448528979976, + "grad_norm": 0.0, + "learning_rate": 2.067914091250093e-06, + "loss": 1.2729, + "step": 27173 + }, + { + "epoch": 0.7978742145751365, + "grad_norm": 0.0, + "learning_rate": 2.067335039781618e-06, + "loss": 1.105, + "step": 27174 + }, + { + "epoch": 0.7979035762522755, + "grad_norm": 0.0, + "learning_rate": 2.066756060050269e-06, + "loss": 1.0557, + "step": 27175 + }, + { + "epoch": 0.7979329379294146, + "grad_norm": 0.0, + "learning_rate": 2.0661771520612726e-06, + "loss": 1.2827, + "step": 27176 + }, + { + "epoch": 0.7979622996065535, + "grad_norm": 0.0, + "learning_rate": 2.0655983158198712e-06, + "loss": 1.0942, + "step": 27177 + }, + { + "epoch": 0.7979916612836925, + "grad_norm": 0.0, + "learning_rate": 2.0650195513312953e-06, + "loss": 1.1914, + "step": 27178 + }, + { + "epoch": 0.7980210229608315, + "grad_norm": 0.0, + "learning_rate": 2.064440858600778e-06, + "loss": 1.2686, + "step": 27179 + }, + { + "epoch": 0.7980503846379705, + "grad_norm": 0.0, + "learning_rate": 2.063862237633557e-06, + "loss": 1.2227, + "step": 27180 + }, + { + "epoch": 0.7980797463151095, + "grad_norm": 0.0, + "learning_rate": 2.0632836884348615e-06, + "loss": 1.208, + "step": 27181 + }, + { + "epoch": 0.7981091079922485, + "grad_norm": 0.0, + "learning_rate": 2.0627052110099257e-06, + "loss": 1.0356, + "step": 27182 + }, + { + "epoch": 0.7981384696693875, + "grad_norm": 0.0, + "learning_rate": 2.0621268053639754e-06, + "loss": 1.3369, + "step": 27183 + }, + { + "epoch": 0.7981678313465265, + "grad_norm": 0.0, + "learning_rate": 2.0615484715022484e-06, + "loss": 1.1963, + "step": 27184 + }, + { + "epoch": 0.7981971930236655, + "grad_norm": 0.0, + "learning_rate": 2.060970209429969e-06, + "loss": 1.1885, + "step": 27185 + }, + { + "epoch": 0.7982265547008045, + "grad_norm": 0.0, + "learning_rate": 2.0603920191523717e-06, + "loss": 1.0933, + "step": 27186 + }, + { + "epoch": 0.7982559163779435, + "grad_norm": 0.0, + "learning_rate": 2.059813900674683e-06, + "loss": 1.3594, + "step": 27187 + }, + { + "epoch": 0.7982852780550825, + "grad_norm": 0.0, + "learning_rate": 2.0592358540021307e-06, + "loss": 1.2266, + "step": 27188 + }, + { + "epoch": 0.7983146397322215, + "grad_norm": 0.0, + "learning_rate": 2.0586578791399426e-06, + "loss": 1.2363, + "step": 27189 + }, + { + "epoch": 0.7983440014093605, + "grad_norm": 0.0, + "learning_rate": 2.0580799760933422e-06, + "loss": 1.2578, + "step": 27190 + }, + { + "epoch": 0.7983733630864995, + "grad_norm": 0.0, + "learning_rate": 2.0575021448675603e-06, + "loss": 1.2227, + "step": 27191 + }, + { + "epoch": 0.7984027247636385, + "grad_norm": 0.0, + "learning_rate": 2.056924385467819e-06, + "loss": 1.1978, + "step": 27192 + }, + { + "epoch": 0.7984320864407775, + "grad_norm": 0.0, + "learning_rate": 2.0563466978993474e-06, + "loss": 1.2432, + "step": 27193 + }, + { + "epoch": 0.7984614481179165, + "grad_norm": 0.0, + "learning_rate": 2.0557690821673637e-06, + "loss": 1.1748, + "step": 27194 + }, + { + "epoch": 0.7984908097950555, + "grad_norm": 0.0, + "learning_rate": 2.055191538277098e-06, + "loss": 1.1519, + "step": 27195 + }, + { + "epoch": 0.7985201714721945, + "grad_norm": 0.0, + "learning_rate": 2.05461406623377e-06, + "loss": 1.1797, + "step": 27196 + }, + { + "epoch": 0.7985495331493335, + "grad_norm": 0.0, + "learning_rate": 2.0540366660426015e-06, + "loss": 1.0986, + "step": 27197 + }, + { + "epoch": 0.7985788948264725, + "grad_norm": 0.0, + "learning_rate": 2.0534593377088142e-06, + "loss": 1.2949, + "step": 27198 + }, + { + "epoch": 0.7986082565036114, + "grad_norm": 0.0, + "learning_rate": 2.052882081237627e-06, + "loss": 1.2705, + "step": 27199 + }, + { + "epoch": 0.7986376181807505, + "grad_norm": 0.0, + "learning_rate": 2.052304896634264e-06, + "loss": 1.1089, + "step": 27200 + }, + { + "epoch": 0.7986669798578895, + "grad_norm": 0.0, + "learning_rate": 2.051727783903942e-06, + "loss": 1.167, + "step": 27201 + }, + { + "epoch": 0.7986963415350284, + "grad_norm": 0.0, + "learning_rate": 2.051150743051883e-06, + "loss": 1.1924, + "step": 27202 + }, + { + "epoch": 0.7987257032121675, + "grad_norm": 0.0, + "learning_rate": 2.050573774083302e-06, + "loss": 1.2666, + "step": 27203 + }, + { + "epoch": 0.7987550648893065, + "grad_norm": 0.0, + "learning_rate": 2.0499968770034198e-06, + "loss": 1.1157, + "step": 27204 + }, + { + "epoch": 0.7987844265664454, + "grad_norm": 0.0, + "learning_rate": 2.0494200518174522e-06, + "loss": 1.3164, + "step": 27205 + }, + { + "epoch": 0.7988137882435845, + "grad_norm": 0.0, + "learning_rate": 2.0488432985306152e-06, + "loss": 1.126, + "step": 27206 + }, + { + "epoch": 0.7988431499207235, + "grad_norm": 0.0, + "learning_rate": 2.0482666171481247e-06, + "loss": 1.2734, + "step": 27207 + }, + { + "epoch": 0.7988725115978624, + "grad_norm": 0.0, + "learning_rate": 2.0476900076751926e-06, + "loss": 1.2188, + "step": 27208 + }, + { + "epoch": 0.7989018732750015, + "grad_norm": 0.0, + "learning_rate": 2.0471134701170403e-06, + "loss": 1.2246, + "step": 27209 + }, + { + "epoch": 0.7989312349521405, + "grad_norm": 0.0, + "learning_rate": 2.0465370044788737e-06, + "loss": 1.1177, + "step": 27210 + }, + { + "epoch": 0.7989605966292794, + "grad_norm": 0.0, + "learning_rate": 2.0459606107659126e-06, + "loss": 1.25, + "step": 27211 + }, + { + "epoch": 0.7989899583064185, + "grad_norm": 0.0, + "learning_rate": 2.0453842889833677e-06, + "loss": 1.1265, + "step": 27212 + }, + { + "epoch": 0.7990193199835575, + "grad_norm": 0.0, + "learning_rate": 2.044808039136449e-06, + "loss": 1.1655, + "step": 27213 + }, + { + "epoch": 0.7990486816606964, + "grad_norm": 0.0, + "learning_rate": 2.044231861230366e-06, + "loss": 1.1294, + "step": 27214 + }, + { + "epoch": 0.7990780433378355, + "grad_norm": 0.0, + "learning_rate": 2.043655755270335e-06, + "loss": 1.2339, + "step": 27215 + }, + { + "epoch": 0.7991074050149745, + "grad_norm": 0.0, + "learning_rate": 2.0430797212615595e-06, + "loss": 1.2129, + "step": 27216 + }, + { + "epoch": 0.7991367666921134, + "grad_norm": 0.0, + "learning_rate": 2.0425037592092545e-06, + "loss": 1.1719, + "step": 27217 + }, + { + "epoch": 0.7991661283692525, + "grad_norm": 0.0, + "learning_rate": 2.041927869118626e-06, + "loss": 1.0913, + "step": 27218 + }, + { + "epoch": 0.7991954900463915, + "grad_norm": 0.0, + "learning_rate": 2.041352050994879e-06, + "loss": 1.1685, + "step": 27219 + }, + { + "epoch": 0.7992248517235304, + "grad_norm": 0.0, + "learning_rate": 2.0407763048432273e-06, + "loss": 1.2749, + "step": 27220 + }, + { + "epoch": 0.7992542134006695, + "grad_norm": 0.0, + "learning_rate": 2.0402006306688726e-06, + "loss": 1.3081, + "step": 27221 + }, + { + "epoch": 0.7992835750778085, + "grad_norm": 0.0, + "learning_rate": 2.0396250284770224e-06, + "loss": 1.2461, + "step": 27222 + }, + { + "epoch": 0.7993129367549474, + "grad_norm": 0.0, + "learning_rate": 2.03904949827288e-06, + "loss": 1.2705, + "step": 27223 + }, + { + "epoch": 0.7993422984320865, + "grad_norm": 0.0, + "learning_rate": 2.038474040061654e-06, + "loss": 1.1807, + "step": 27224 + }, + { + "epoch": 0.7993716601092254, + "grad_norm": 0.0, + "learning_rate": 2.0378986538485433e-06, + "loss": 1.2793, + "step": 27225 + }, + { + "epoch": 0.7994010217863644, + "grad_norm": 0.0, + "learning_rate": 2.037323339638757e-06, + "loss": 1.208, + "step": 27226 + }, + { + "epoch": 0.7994303834635035, + "grad_norm": 0.0, + "learning_rate": 2.0367480974374953e-06, + "loss": 1.208, + "step": 27227 + }, + { + "epoch": 0.7994597451406424, + "grad_norm": 0.0, + "learning_rate": 2.0361729272499563e-06, + "loss": 1.2783, + "step": 27228 + }, + { + "epoch": 0.7994891068177814, + "grad_norm": 0.0, + "learning_rate": 2.0355978290813496e-06, + "loss": 1.249, + "step": 27229 + }, + { + "epoch": 0.7995184684949205, + "grad_norm": 0.0, + "learning_rate": 2.035022802936869e-06, + "loss": 1.2852, + "step": 27230 + }, + { + "epoch": 0.7995478301720594, + "grad_norm": 0.0, + "learning_rate": 2.034447848821719e-06, + "loss": 1.1826, + "step": 27231 + }, + { + "epoch": 0.7995771918491984, + "grad_norm": 0.0, + "learning_rate": 2.033872966741094e-06, + "loss": 1.3457, + "step": 27232 + }, + { + "epoch": 0.7996065535263375, + "grad_norm": 0.0, + "learning_rate": 2.0332981567001987e-06, + "loss": 1.0903, + "step": 27233 + }, + { + "epoch": 0.7996359152034764, + "grad_norm": 0.0, + "learning_rate": 2.0327234187042254e-06, + "loss": 1.1641, + "step": 27234 + }, + { + "epoch": 0.7996652768806154, + "grad_norm": 0.0, + "learning_rate": 2.032148752758377e-06, + "loss": 1.1895, + "step": 27235 + }, + { + "epoch": 0.7996946385577545, + "grad_norm": 0.0, + "learning_rate": 2.031574158867846e-06, + "loss": 1.2754, + "step": 27236 + }, + { + "epoch": 0.7997240002348934, + "grad_norm": 0.0, + "learning_rate": 2.030999637037836e-06, + "loss": 1.2422, + "step": 27237 + }, + { + "epoch": 0.7997533619120324, + "grad_norm": 0.0, + "learning_rate": 2.0304251872735337e-06, + "loss": 1.2949, + "step": 27238 + }, + { + "epoch": 0.7997827235891715, + "grad_norm": 0.0, + "learning_rate": 2.0298508095801363e-06, + "loss": 1.1406, + "step": 27239 + }, + { + "epoch": 0.7998120852663104, + "grad_norm": 0.0, + "learning_rate": 2.0292765039628403e-06, + "loss": 1.2568, + "step": 27240 + }, + { + "epoch": 0.7998414469434494, + "grad_norm": 0.0, + "learning_rate": 2.0287022704268366e-06, + "loss": 1.2515, + "step": 27241 + }, + { + "epoch": 0.7998708086205885, + "grad_norm": 0.0, + "learning_rate": 2.028128108977322e-06, + "loss": 1.252, + "step": 27242 + }, + { + "epoch": 0.7999001702977274, + "grad_norm": 0.0, + "learning_rate": 2.027554019619483e-06, + "loss": 1.208, + "step": 27243 + }, + { + "epoch": 0.7999295319748664, + "grad_norm": 0.0, + "learning_rate": 2.0269800023585185e-06, + "loss": 1.2456, + "step": 27244 + }, + { + "epoch": 0.7999588936520055, + "grad_norm": 0.0, + "learning_rate": 2.0264060571996156e-06, + "loss": 1.2334, + "step": 27245 + }, + { + "epoch": 0.7999882553291444, + "grad_norm": 0.0, + "learning_rate": 2.0258321841479657e-06, + "loss": 1.2095, + "step": 27246 + }, + { + "epoch": 0.8000176170062834, + "grad_norm": 0.0, + "learning_rate": 2.0252583832087557e-06, + "loss": 1.1558, + "step": 27247 + }, + { + "epoch": 0.8000469786834224, + "grad_norm": 0.0, + "learning_rate": 2.0246846543871757e-06, + "loss": 1.2812, + "step": 27248 + }, + { + "epoch": 0.8000763403605614, + "grad_norm": 0.0, + "learning_rate": 2.0241109976884165e-06, + "loss": 1.2959, + "step": 27249 + }, + { + "epoch": 0.8001057020377004, + "grad_norm": 0.0, + "learning_rate": 2.023537413117661e-06, + "loss": 1.249, + "step": 27250 + }, + { + "epoch": 0.8001350637148394, + "grad_norm": 0.0, + "learning_rate": 2.022963900680103e-06, + "loss": 1.0884, + "step": 27251 + }, + { + "epoch": 0.8001644253919784, + "grad_norm": 0.0, + "learning_rate": 2.022390460380923e-06, + "loss": 1.2568, + "step": 27252 + }, + { + "epoch": 0.8001937870691174, + "grad_norm": 0.0, + "learning_rate": 2.021817092225311e-06, + "loss": 1.2432, + "step": 27253 + }, + { + "epoch": 0.8002231487462564, + "grad_norm": 0.0, + "learning_rate": 2.0212437962184505e-06, + "loss": 1.3154, + "step": 27254 + }, + { + "epoch": 0.8002525104233954, + "grad_norm": 0.0, + "learning_rate": 2.020670572365526e-06, + "loss": 1.1006, + "step": 27255 + }, + { + "epoch": 0.8002818721005344, + "grad_norm": 0.0, + "learning_rate": 2.0200974206717207e-06, + "loss": 1.2686, + "step": 27256 + }, + { + "epoch": 0.8003112337776733, + "grad_norm": 0.0, + "learning_rate": 2.019524341142216e-06, + "loss": 1.2222, + "step": 27257 + }, + { + "epoch": 0.8003405954548124, + "grad_norm": 0.0, + "learning_rate": 2.0189513337821985e-06, + "loss": 1.2559, + "step": 27258 + }, + { + "epoch": 0.8003699571319514, + "grad_norm": 0.0, + "learning_rate": 2.0183783985968463e-06, + "loss": 1.1758, + "step": 27259 + }, + { + "epoch": 0.8003993188090903, + "grad_norm": 0.0, + "learning_rate": 2.017805535591344e-06, + "loss": 1.0825, + "step": 27260 + }, + { + "epoch": 0.8004286804862294, + "grad_norm": 0.0, + "learning_rate": 2.0172327447708683e-06, + "loss": 1.1836, + "step": 27261 + }, + { + "epoch": 0.8004580421633684, + "grad_norm": 0.0, + "learning_rate": 2.0166600261406068e-06, + "loss": 1.145, + "step": 27262 + }, + { + "epoch": 0.8004874038405073, + "grad_norm": 0.0, + "learning_rate": 2.016087379705728e-06, + "loss": 1.2144, + "step": 27263 + }, + { + "epoch": 0.8005167655176464, + "grad_norm": 0.0, + "learning_rate": 2.015514805471418e-06, + "loss": 1.1738, + "step": 27264 + }, + { + "epoch": 0.8005461271947854, + "grad_norm": 0.0, + "learning_rate": 2.0149423034428506e-06, + "loss": 1.2261, + "step": 27265 + }, + { + "epoch": 0.8005754888719243, + "grad_norm": 0.0, + "learning_rate": 2.014369873625207e-06, + "loss": 1.166, + "step": 27266 + }, + { + "epoch": 0.8006048505490634, + "grad_norm": 0.0, + "learning_rate": 2.013797516023661e-06, + "loss": 1.1636, + "step": 27267 + }, + { + "epoch": 0.8006342122262023, + "grad_norm": 0.0, + "learning_rate": 2.0132252306433875e-06, + "loss": 1.2988, + "step": 27268 + }, + { + "epoch": 0.8006635739033413, + "grad_norm": 0.0, + "learning_rate": 2.0126530174895664e-06, + "loss": 1.1934, + "step": 27269 + }, + { + "epoch": 0.8006929355804804, + "grad_norm": 0.0, + "learning_rate": 2.0120808765673705e-06, + "loss": 1.2754, + "step": 27270 + }, + { + "epoch": 0.8007222972576193, + "grad_norm": 0.0, + "learning_rate": 2.011508807881971e-06, + "loss": 1.2578, + "step": 27271 + }, + { + "epoch": 0.8007516589347583, + "grad_norm": 0.0, + "learning_rate": 2.010936811438542e-06, + "loss": 1.2246, + "step": 27272 + }, + { + "epoch": 0.8007810206118974, + "grad_norm": 0.0, + "learning_rate": 2.01036488724226e-06, + "loss": 1.418, + "step": 27273 + }, + { + "epoch": 0.8008103822890363, + "grad_norm": 0.0, + "learning_rate": 2.009793035298292e-06, + "loss": 1.2861, + "step": 27274 + }, + { + "epoch": 0.8008397439661753, + "grad_norm": 0.0, + "learning_rate": 2.009221255611814e-06, + "loss": 1.1064, + "step": 27275 + }, + { + "epoch": 0.8008691056433144, + "grad_norm": 0.0, + "learning_rate": 2.008649548187994e-06, + "loss": 1.2324, + "step": 27276 + }, + { + "epoch": 0.8008984673204533, + "grad_norm": 0.0, + "learning_rate": 2.0080779130320015e-06, + "loss": 1.2627, + "step": 27277 + }, + { + "epoch": 0.8009278289975923, + "grad_norm": 0.0, + "learning_rate": 2.007506350149009e-06, + "loss": 1.2402, + "step": 27278 + }, + { + "epoch": 0.8009571906747314, + "grad_norm": 0.0, + "learning_rate": 2.006934859544184e-06, + "loss": 1.2588, + "step": 27279 + }, + { + "epoch": 0.8009865523518703, + "grad_norm": 0.0, + "learning_rate": 2.006363441222694e-06, + "loss": 1.1621, + "step": 27280 + }, + { + "epoch": 0.8010159140290093, + "grad_norm": 0.0, + "learning_rate": 2.0057920951897038e-06, + "loss": 1.2725, + "step": 27281 + }, + { + "epoch": 0.8010452757061484, + "grad_norm": 0.0, + "learning_rate": 2.0052208214503855e-06, + "loss": 1.1592, + "step": 27282 + }, + { + "epoch": 0.8010746373832873, + "grad_norm": 0.0, + "learning_rate": 2.004649620009901e-06, + "loss": 1.1982, + "step": 27283 + }, + { + "epoch": 0.8011039990604263, + "grad_norm": 0.0, + "learning_rate": 2.004078490873419e-06, + "loss": 1.2979, + "step": 27284 + }, + { + "epoch": 0.8011333607375654, + "grad_norm": 0.0, + "learning_rate": 2.0035074340461015e-06, + "loss": 1.3242, + "step": 27285 + }, + { + "epoch": 0.8011627224147043, + "grad_norm": 0.0, + "learning_rate": 2.002936449533116e-06, + "loss": 1.2354, + "step": 27286 + }, + { + "epoch": 0.8011920840918433, + "grad_norm": 0.0, + "learning_rate": 2.0023655373396267e-06, + "loss": 1.1797, + "step": 27287 + }, + { + "epoch": 0.8012214457689824, + "grad_norm": 0.0, + "learning_rate": 2.0017946974707904e-06, + "loss": 1.3066, + "step": 27288 + }, + { + "epoch": 0.8012508074461213, + "grad_norm": 0.0, + "learning_rate": 2.001223929931774e-06, + "loss": 1.1509, + "step": 27289 + }, + { + "epoch": 0.8012801691232603, + "grad_norm": 0.0, + "learning_rate": 2.000653234727736e-06, + "loss": 1.1279, + "step": 27290 + }, + { + "epoch": 0.8013095308003993, + "grad_norm": 0.0, + "learning_rate": 2.0000826118638417e-06, + "loss": 1.3623, + "step": 27291 + }, + { + "epoch": 0.8013388924775383, + "grad_norm": 0.0, + "learning_rate": 1.9995120613452478e-06, + "loss": 1.291, + "step": 27292 + }, + { + "epoch": 0.8013682541546773, + "grad_norm": 0.0, + "learning_rate": 1.9989415831771165e-06, + "loss": 1.2988, + "step": 27293 + }, + { + "epoch": 0.8013976158318163, + "grad_norm": 0.0, + "learning_rate": 1.9983711773646063e-06, + "loss": 1.1367, + "step": 27294 + }, + { + "epoch": 0.8014269775089553, + "grad_norm": 0.0, + "learning_rate": 1.997800843912875e-06, + "loss": 1.2129, + "step": 27295 + }, + { + "epoch": 0.8014563391860943, + "grad_norm": 0.0, + "learning_rate": 1.99723058282708e-06, + "loss": 1.2803, + "step": 27296 + }, + { + "epoch": 0.8014857008632333, + "grad_norm": 0.0, + "learning_rate": 1.9966603941123754e-06, + "loss": 1.1084, + "step": 27297 + }, + { + "epoch": 0.8015150625403723, + "grad_norm": 0.0, + "learning_rate": 1.996090277773922e-06, + "loss": 1.2568, + "step": 27298 + }, + { + "epoch": 0.8015444242175113, + "grad_norm": 0.0, + "learning_rate": 1.9955202338168733e-06, + "loss": 1.1807, + "step": 27299 + }, + { + "epoch": 0.8015737858946503, + "grad_norm": 0.0, + "learning_rate": 1.994950262246387e-06, + "loss": 1.2432, + "step": 27300 + }, + { + "epoch": 0.8016031475717893, + "grad_norm": 0.0, + "learning_rate": 1.9943803630676126e-06, + "loss": 1.0928, + "step": 27301 + }, + { + "epoch": 0.8016325092489283, + "grad_norm": 0.0, + "learning_rate": 1.993810536285711e-06, + "loss": 1.2065, + "step": 27302 + }, + { + "epoch": 0.8016618709260673, + "grad_norm": 0.0, + "learning_rate": 1.9932407819058296e-06, + "loss": 1.1973, + "step": 27303 + }, + { + "epoch": 0.8016912326032063, + "grad_norm": 0.0, + "learning_rate": 1.9926710999331236e-06, + "loss": 1.2188, + "step": 27304 + }, + { + "epoch": 0.8017205942803453, + "grad_norm": 0.0, + "learning_rate": 1.99210149037274e-06, + "loss": 1.2012, + "step": 27305 + }, + { + "epoch": 0.8017499559574843, + "grad_norm": 0.0, + "learning_rate": 1.991531953229836e-06, + "loss": 1.2266, + "step": 27306 + }, + { + "epoch": 0.8017793176346233, + "grad_norm": 0.0, + "learning_rate": 1.990962488509559e-06, + "loss": 1.2725, + "step": 27307 + }, + { + "epoch": 0.8018086793117623, + "grad_norm": 0.0, + "learning_rate": 1.990393096217058e-06, + "loss": 1.1406, + "step": 27308 + }, + { + "epoch": 0.8018380409889013, + "grad_norm": 0.0, + "learning_rate": 1.989823776357486e-06, + "loss": 1.2139, + "step": 27309 + }, + { + "epoch": 0.8018674026660403, + "grad_norm": 0.0, + "learning_rate": 1.9892545289359853e-06, + "loss": 1.1895, + "step": 27310 + }, + { + "epoch": 0.8018967643431792, + "grad_norm": 0.0, + "learning_rate": 1.988685353957711e-06, + "loss": 1.2686, + "step": 27311 + }, + { + "epoch": 0.8019261260203183, + "grad_norm": 0.0, + "learning_rate": 1.9881162514278074e-06, + "loss": 1.2695, + "step": 27312 + }, + { + "epoch": 0.8019554876974573, + "grad_norm": 0.0, + "learning_rate": 1.9875472213514193e-06, + "loss": 1.2559, + "step": 27313 + }, + { + "epoch": 0.8019848493745962, + "grad_norm": 0.0, + "learning_rate": 1.986978263733692e-06, + "loss": 1.2847, + "step": 27314 + }, + { + "epoch": 0.8020142110517353, + "grad_norm": 0.0, + "learning_rate": 1.9864093785797747e-06, + "loss": 1.2607, + "step": 27315 + }, + { + "epoch": 0.8020435727288743, + "grad_norm": 0.0, + "learning_rate": 1.9858405658948088e-06, + "loss": 1.1587, + "step": 27316 + }, + { + "epoch": 0.8020729344060132, + "grad_norm": 0.0, + "learning_rate": 1.9852718256839376e-06, + "loss": 1.1846, + "step": 27317 + }, + { + "epoch": 0.8021022960831523, + "grad_norm": 0.0, + "learning_rate": 1.984703157952308e-06, + "loss": 1.418, + "step": 27318 + }, + { + "epoch": 0.8021316577602913, + "grad_norm": 0.0, + "learning_rate": 1.98413456270506e-06, + "loss": 1.1436, + "step": 27319 + }, + { + "epoch": 0.8021610194374302, + "grad_norm": 0.0, + "learning_rate": 1.9835660399473366e-06, + "loss": 1.1763, + "step": 27320 + }, + { + "epoch": 0.8021903811145693, + "grad_norm": 0.0, + "learning_rate": 1.9829975896842768e-06, + "loss": 1.3398, + "step": 27321 + }, + { + "epoch": 0.8022197427917083, + "grad_norm": 0.0, + "learning_rate": 1.9824292119210243e-06, + "loss": 1.2583, + "step": 27322 + }, + { + "epoch": 0.8022491044688472, + "grad_norm": 0.0, + "learning_rate": 1.981860906662716e-06, + "loss": 1.2124, + "step": 27323 + }, + { + "epoch": 0.8022784661459863, + "grad_norm": 0.0, + "learning_rate": 1.9812926739144957e-06, + "loss": 1.1187, + "step": 27324 + }, + { + "epoch": 0.8023078278231253, + "grad_norm": 0.0, + "learning_rate": 1.980724513681498e-06, + "loss": 1.3174, + "step": 27325 + }, + { + "epoch": 0.8023371895002642, + "grad_norm": 0.0, + "learning_rate": 1.980156425968861e-06, + "loss": 1.248, + "step": 27326 + }, + { + "epoch": 0.8023665511774033, + "grad_norm": 0.0, + "learning_rate": 1.979588410781725e-06, + "loss": 1.1338, + "step": 27327 + }, + { + "epoch": 0.8023959128545423, + "grad_norm": 0.0, + "learning_rate": 1.9790204681252255e-06, + "loss": 1.1323, + "step": 27328 + }, + { + "epoch": 0.8024252745316812, + "grad_norm": 0.0, + "learning_rate": 1.978452598004499e-06, + "loss": 1.2026, + "step": 27329 + }, + { + "epoch": 0.8024546362088203, + "grad_norm": 0.0, + "learning_rate": 1.9778848004246766e-06, + "loss": 1.165, + "step": 27330 + }, + { + "epoch": 0.8024839978859593, + "grad_norm": 0.0, + "learning_rate": 1.977317075390899e-06, + "loss": 1.2373, + "step": 27331 + }, + { + "epoch": 0.8025133595630982, + "grad_norm": 0.0, + "learning_rate": 1.976749422908295e-06, + "loss": 1.1445, + "step": 27332 + }, + { + "epoch": 0.8025427212402373, + "grad_norm": 0.0, + "learning_rate": 1.9761818429820036e-06, + "loss": 1.187, + "step": 27333 + }, + { + "epoch": 0.8025720829173763, + "grad_norm": 0.0, + "learning_rate": 1.975614335617152e-06, + "loss": 1.168, + "step": 27334 + }, + { + "epoch": 0.8026014445945152, + "grad_norm": 0.0, + "learning_rate": 1.9750469008188776e-06, + "loss": 1.2441, + "step": 27335 + }, + { + "epoch": 0.8026308062716543, + "grad_norm": 0.0, + "learning_rate": 1.97447953859231e-06, + "loss": 1.3105, + "step": 27336 + }, + { + "epoch": 0.8026601679487932, + "grad_norm": 0.0, + "learning_rate": 1.9739122489425777e-06, + "loss": 1.2285, + "step": 27337 + }, + { + "epoch": 0.8026895296259322, + "grad_norm": 0.0, + "learning_rate": 1.9733450318748137e-06, + "loss": 1.3105, + "step": 27338 + }, + { + "epoch": 0.8027188913030713, + "grad_norm": 0.0, + "learning_rate": 1.972777887394144e-06, + "loss": 1.2412, + "step": 27339 + }, + { + "epoch": 0.8027482529802102, + "grad_norm": 0.0, + "learning_rate": 1.972210815505702e-06, + "loss": 1.3135, + "step": 27340 + }, + { + "epoch": 0.8027776146573492, + "grad_norm": 0.0, + "learning_rate": 1.9716438162146102e-06, + "loss": 1.146, + "step": 27341 + }, + { + "epoch": 0.8028069763344883, + "grad_norm": 0.0, + "learning_rate": 1.9710768895260024e-06, + "loss": 1.1973, + "step": 27342 + }, + { + "epoch": 0.8028363380116272, + "grad_norm": 0.0, + "learning_rate": 1.9705100354450024e-06, + "loss": 1.1196, + "step": 27343 + }, + { + "epoch": 0.8028656996887662, + "grad_norm": 0.0, + "learning_rate": 1.969943253976737e-06, + "loss": 1.1929, + "step": 27344 + }, + { + "epoch": 0.8028950613659053, + "grad_norm": 0.0, + "learning_rate": 1.9693765451263304e-06, + "loss": 1.1924, + "step": 27345 + }, + { + "epoch": 0.8029244230430442, + "grad_norm": 0.0, + "learning_rate": 1.9688099088989066e-06, + "loss": 1.2793, + "step": 27346 + }, + { + "epoch": 0.8029537847201832, + "grad_norm": 0.0, + "learning_rate": 1.9682433452995943e-06, + "loss": 1.2285, + "step": 27347 + }, + { + "epoch": 0.8029831463973223, + "grad_norm": 0.0, + "learning_rate": 1.967676854333511e-06, + "loss": 1.2075, + "step": 27348 + }, + { + "epoch": 0.8030125080744612, + "grad_norm": 0.0, + "learning_rate": 1.9671104360057856e-06, + "loss": 1.063, + "step": 27349 + }, + { + "epoch": 0.8030418697516002, + "grad_norm": 0.0, + "learning_rate": 1.966544090321536e-06, + "loss": 1.208, + "step": 27350 + }, + { + "epoch": 0.8030712314287393, + "grad_norm": 0.0, + "learning_rate": 1.965977817285888e-06, + "loss": 1.2188, + "step": 27351 + }, + { + "epoch": 0.8031005931058782, + "grad_norm": 0.0, + "learning_rate": 1.9654116169039604e-06, + "loss": 1.1733, + "step": 27352 + }, + { + "epoch": 0.8031299547830172, + "grad_norm": 0.0, + "learning_rate": 1.964845489180872e-06, + "loss": 1.1748, + "step": 27353 + }, + { + "epoch": 0.8031593164601563, + "grad_norm": 0.0, + "learning_rate": 1.964279434121743e-06, + "loss": 1.417, + "step": 27354 + }, + { + "epoch": 0.8031886781372952, + "grad_norm": 0.0, + "learning_rate": 1.9637134517316947e-06, + "loss": 1.1816, + "step": 27355 + }, + { + "epoch": 0.8032180398144342, + "grad_norm": 0.0, + "learning_rate": 1.963147542015843e-06, + "loss": 1.1714, + "step": 27356 + }, + { + "epoch": 0.8032474014915733, + "grad_norm": 0.0, + "learning_rate": 1.962581704979305e-06, + "loss": 1.1069, + "step": 27357 + }, + { + "epoch": 0.8032767631687122, + "grad_norm": 0.0, + "learning_rate": 1.962015940627201e-06, + "loss": 1.1226, + "step": 27358 + }, + { + "epoch": 0.8033061248458512, + "grad_norm": 0.0, + "learning_rate": 1.9614502489646423e-06, + "loss": 1.1279, + "step": 27359 + }, + { + "epoch": 0.8033354865229901, + "grad_norm": 0.0, + "learning_rate": 1.960884629996751e-06, + "loss": 1.1987, + "step": 27360 + }, + { + "epoch": 0.8033648482001292, + "grad_norm": 0.0, + "learning_rate": 1.960319083728639e-06, + "loss": 1.1411, + "step": 27361 + }, + { + "epoch": 0.8033942098772682, + "grad_norm": 0.0, + "learning_rate": 1.9597536101654204e-06, + "loss": 1.2065, + "step": 27362 + }, + { + "epoch": 0.8034235715544071, + "grad_norm": 0.0, + "learning_rate": 1.9591882093122063e-06, + "loss": 1.1528, + "step": 27363 + }, + { + "epoch": 0.8034529332315462, + "grad_norm": 0.0, + "learning_rate": 1.958622881174115e-06, + "loss": 1.292, + "step": 27364 + }, + { + "epoch": 0.8034822949086852, + "grad_norm": 0.0, + "learning_rate": 1.9580576257562554e-06, + "loss": 1.186, + "step": 27365 + }, + { + "epoch": 0.8035116565858241, + "grad_norm": 0.0, + "learning_rate": 1.9574924430637398e-06, + "loss": 1.2363, + "step": 27366 + }, + { + "epoch": 0.8035410182629632, + "grad_norm": 0.0, + "learning_rate": 1.9569273331016814e-06, + "loss": 1.2773, + "step": 27367 + }, + { + "epoch": 0.8035703799401022, + "grad_norm": 0.0, + "learning_rate": 1.956362295875188e-06, + "loss": 1.1675, + "step": 27368 + }, + { + "epoch": 0.8035997416172411, + "grad_norm": 0.0, + "learning_rate": 1.955797331389372e-06, + "loss": 1.1836, + "step": 27369 + }, + { + "epoch": 0.8036291032943802, + "grad_norm": 0.0, + "learning_rate": 1.955232439649337e-06, + "loss": 1.2075, + "step": 27370 + }, + { + "epoch": 0.8036584649715192, + "grad_norm": 0.0, + "learning_rate": 1.954667620660199e-06, + "loss": 1.0874, + "step": 27371 + }, + { + "epoch": 0.8036878266486581, + "grad_norm": 0.0, + "learning_rate": 1.9541028744270585e-06, + "loss": 1.2559, + "step": 27372 + }, + { + "epoch": 0.8037171883257972, + "grad_norm": 0.0, + "learning_rate": 1.9535382009550297e-06, + "loss": 1.2793, + "step": 27373 + }, + { + "epoch": 0.8037465500029362, + "grad_norm": 0.0, + "learning_rate": 1.952973600249213e-06, + "loss": 1.1172, + "step": 27374 + }, + { + "epoch": 0.8037759116800751, + "grad_norm": 0.0, + "learning_rate": 1.9524090723147193e-06, + "loss": 1.1675, + "step": 27375 + }, + { + "epoch": 0.8038052733572142, + "grad_norm": 0.0, + "learning_rate": 1.9518446171566508e-06, + "loss": 1.2568, + "step": 27376 + }, + { + "epoch": 0.8038346350343532, + "grad_norm": 0.0, + "learning_rate": 1.951280234780114e-06, + "loss": 1.252, + "step": 27377 + }, + { + "epoch": 0.8038639967114921, + "grad_norm": 0.0, + "learning_rate": 1.9507159251902096e-06, + "loss": 1.2754, + "step": 27378 + }, + { + "epoch": 0.8038933583886312, + "grad_norm": 0.0, + "learning_rate": 1.9501516883920412e-06, + "loss": 1.2437, + "step": 27379 + }, + { + "epoch": 0.8039227200657701, + "grad_norm": 0.0, + "learning_rate": 1.9495875243907147e-06, + "loss": 1.1538, + "step": 27380 + }, + { + "epoch": 0.8039520817429091, + "grad_norm": 0.0, + "learning_rate": 1.9490234331913283e-06, + "loss": 1.166, + "step": 27381 + }, + { + "epoch": 0.8039814434200482, + "grad_norm": 0.0, + "learning_rate": 1.9484594147989867e-06, + "loss": 1.2812, + "step": 27382 + }, + { + "epoch": 0.8040108050971871, + "grad_norm": 0.0, + "learning_rate": 1.9478954692187856e-06, + "loss": 1.166, + "step": 27383 + }, + { + "epoch": 0.8040401667743261, + "grad_norm": 0.0, + "learning_rate": 1.9473315964558314e-06, + "loss": 1.2354, + "step": 27384 + }, + { + "epoch": 0.8040695284514652, + "grad_norm": 0.0, + "learning_rate": 1.94676779651522e-06, + "loss": 1.2129, + "step": 27385 + }, + { + "epoch": 0.8040988901286041, + "grad_norm": 0.0, + "learning_rate": 1.9462040694020488e-06, + "loss": 1.2471, + "step": 27386 + }, + { + "epoch": 0.8041282518057431, + "grad_norm": 0.0, + "learning_rate": 1.945640415121418e-06, + "loss": 1.2017, + "step": 27387 + }, + { + "epoch": 0.8041576134828822, + "grad_norm": 0.0, + "learning_rate": 1.94507683367842e-06, + "loss": 1.3281, + "step": 27388 + }, + { + "epoch": 0.8041869751600211, + "grad_norm": 0.0, + "learning_rate": 1.944513325078159e-06, + "loss": 1.2441, + "step": 27389 + }, + { + "epoch": 0.8042163368371601, + "grad_norm": 0.0, + "learning_rate": 1.9439498893257225e-06, + "loss": 1.2646, + "step": 27390 + }, + { + "epoch": 0.8042456985142992, + "grad_norm": 0.0, + "learning_rate": 1.943386526426214e-06, + "loss": 1.1216, + "step": 27391 + }, + { + "epoch": 0.8042750601914381, + "grad_norm": 0.0, + "learning_rate": 1.9428232363847256e-06, + "loss": 1.2227, + "step": 27392 + }, + { + "epoch": 0.8043044218685771, + "grad_norm": 0.0, + "learning_rate": 1.942260019206349e-06, + "loss": 1.1421, + "step": 27393 + }, + { + "epoch": 0.8043337835457162, + "grad_norm": 0.0, + "learning_rate": 1.9416968748961785e-06, + "loss": 1.2266, + "step": 27394 + }, + { + "epoch": 0.8043631452228551, + "grad_norm": 0.0, + "learning_rate": 1.9411338034593043e-06, + "loss": 1.3252, + "step": 27395 + }, + { + "epoch": 0.8043925068999941, + "grad_norm": 0.0, + "learning_rate": 1.9405708049008243e-06, + "loss": 1.2446, + "step": 27396 + }, + { + "epoch": 0.8044218685771332, + "grad_norm": 0.0, + "learning_rate": 1.9400078792258236e-06, + "loss": 1.1748, + "step": 27397 + }, + { + "epoch": 0.8044512302542721, + "grad_norm": 0.0, + "learning_rate": 1.9394450264393984e-06, + "loss": 1.1816, + "step": 27398 + }, + { + "epoch": 0.8044805919314111, + "grad_norm": 0.0, + "learning_rate": 1.938882246546634e-06, + "loss": 1.3442, + "step": 27399 + }, + { + "epoch": 0.8045099536085502, + "grad_norm": 0.0, + "learning_rate": 1.9383195395526233e-06, + "loss": 1.1768, + "step": 27400 + }, + { + "epoch": 0.8045393152856891, + "grad_norm": 0.0, + "learning_rate": 1.9377569054624547e-06, + "loss": 1.3291, + "step": 27401 + }, + { + "epoch": 0.8045686769628281, + "grad_norm": 0.0, + "learning_rate": 1.9371943442812146e-06, + "loss": 1.2627, + "step": 27402 + }, + { + "epoch": 0.8045980386399672, + "grad_norm": 0.0, + "learning_rate": 1.9366318560139898e-06, + "loss": 1.1836, + "step": 27403 + }, + { + "epoch": 0.8046274003171061, + "grad_norm": 0.0, + "learning_rate": 1.9360694406658688e-06, + "loss": 1.1904, + "step": 27404 + }, + { + "epoch": 0.8046567619942451, + "grad_norm": 0.0, + "learning_rate": 1.9355070982419377e-06, + "loss": 1.2803, + "step": 27405 + }, + { + "epoch": 0.8046861236713841, + "grad_norm": 0.0, + "learning_rate": 1.9349448287472794e-06, + "loss": 1.2744, + "step": 27406 + }, + { + "epoch": 0.8047154853485231, + "grad_norm": 0.0, + "learning_rate": 1.934382632186983e-06, + "loss": 1.2012, + "step": 27407 + }, + { + "epoch": 0.8047448470256621, + "grad_norm": 0.0, + "learning_rate": 1.9338205085661266e-06, + "loss": 1.1538, + "step": 27408 + }, + { + "epoch": 0.8047742087028011, + "grad_norm": 0.0, + "learning_rate": 1.9332584578898006e-06, + "loss": 1.2026, + "step": 27409 + }, + { + "epoch": 0.8048035703799401, + "grad_norm": 0.0, + "learning_rate": 1.932696480163083e-06, + "loss": 1.1597, + "step": 27410 + }, + { + "epoch": 0.8048329320570791, + "grad_norm": 0.0, + "learning_rate": 1.932134575391058e-06, + "loss": 1.1826, + "step": 27411 + }, + { + "epoch": 0.8048622937342181, + "grad_norm": 0.0, + "learning_rate": 1.9315727435788045e-06, + "loss": 1.1621, + "step": 27412 + }, + { + "epoch": 0.8048916554113571, + "grad_norm": 0.0, + "learning_rate": 1.9310109847314064e-06, + "loss": 1.2158, + "step": 27413 + }, + { + "epoch": 0.8049210170884961, + "grad_norm": 0.0, + "learning_rate": 1.9304492988539435e-06, + "loss": 1.2734, + "step": 27414 + }, + { + "epoch": 0.8049503787656351, + "grad_norm": 0.0, + "learning_rate": 1.9298876859514904e-06, + "loss": 1.1846, + "step": 27415 + }, + { + "epoch": 0.8049797404427741, + "grad_norm": 0.0, + "learning_rate": 1.9293261460291334e-06, + "loss": 1.2314, + "step": 27416 + }, + { + "epoch": 0.8050091021199131, + "grad_norm": 0.0, + "learning_rate": 1.9287646790919466e-06, + "loss": 1.1704, + "step": 27417 + }, + { + "epoch": 0.8050384637970521, + "grad_norm": 0.0, + "learning_rate": 1.928203285145008e-06, + "loss": 1.2588, + "step": 27418 + }, + { + "epoch": 0.8050678254741911, + "grad_norm": 0.0, + "learning_rate": 1.9276419641933918e-06, + "loss": 1.1772, + "step": 27419 + }, + { + "epoch": 0.80509718715133, + "grad_norm": 0.0, + "learning_rate": 1.927080716242179e-06, + "loss": 1.1597, + "step": 27420 + }, + { + "epoch": 0.8051265488284691, + "grad_norm": 0.0, + "learning_rate": 1.92651954129644e-06, + "loss": 1.1699, + "step": 27421 + }, + { + "epoch": 0.8051559105056081, + "grad_norm": 0.0, + "learning_rate": 1.9259584393612553e-06, + "loss": 1.0605, + "step": 27422 + }, + { + "epoch": 0.805185272182747, + "grad_norm": 0.0, + "learning_rate": 1.9253974104416937e-06, + "loss": 1.1836, + "step": 27423 + }, + { + "epoch": 0.8052146338598861, + "grad_norm": 0.0, + "learning_rate": 1.9248364545428334e-06, + "loss": 1.2329, + "step": 27424 + }, + { + "epoch": 0.8052439955370251, + "grad_norm": 0.0, + "learning_rate": 1.924275571669746e-06, + "loss": 1.2168, + "step": 27425 + }, + { + "epoch": 0.805273357214164, + "grad_norm": 0.0, + "learning_rate": 1.923714761827502e-06, + "loss": 1.1445, + "step": 27426 + }, + { + "epoch": 0.8053027188913031, + "grad_norm": 0.0, + "learning_rate": 1.9231540250211746e-06, + "loss": 1.2021, + "step": 27427 + }, + { + "epoch": 0.8053320805684421, + "grad_norm": 0.0, + "learning_rate": 1.92259336125583e-06, + "loss": 1.1123, + "step": 27428 + }, + { + "epoch": 0.805361442245581, + "grad_norm": 0.0, + "learning_rate": 1.922032770536546e-06, + "loss": 1.167, + "step": 27429 + }, + { + "epoch": 0.8053908039227201, + "grad_norm": 0.0, + "learning_rate": 1.921472252868386e-06, + "loss": 1.249, + "step": 27430 + }, + { + "epoch": 0.8054201655998591, + "grad_norm": 0.0, + "learning_rate": 1.9209118082564237e-06, + "loss": 1.2695, + "step": 27431 + }, + { + "epoch": 0.805449527276998, + "grad_norm": 0.0, + "learning_rate": 1.920351436705722e-06, + "loss": 1.1245, + "step": 27432 + }, + { + "epoch": 0.8054788889541371, + "grad_norm": 0.0, + "learning_rate": 1.919791138221354e-06, + "loss": 1.1768, + "step": 27433 + }, + { + "epoch": 0.8055082506312761, + "grad_norm": 0.0, + "learning_rate": 1.919230912808384e-06, + "loss": 1.2891, + "step": 27434 + }, + { + "epoch": 0.805537612308415, + "grad_norm": 0.0, + "learning_rate": 1.9186707604718793e-06, + "loss": 1.1685, + "step": 27435 + }, + { + "epoch": 0.8055669739855541, + "grad_norm": 0.0, + "learning_rate": 1.9181106812169035e-06, + "loss": 1.2334, + "step": 27436 + }, + { + "epoch": 0.8055963356626931, + "grad_norm": 0.0, + "learning_rate": 1.9175506750485206e-06, + "loss": 1.2153, + "step": 27437 + }, + { + "epoch": 0.805625697339832, + "grad_norm": 0.0, + "learning_rate": 1.9169907419717994e-06, + "loss": 1.1611, + "step": 27438 + }, + { + "epoch": 0.8056550590169711, + "grad_norm": 0.0, + "learning_rate": 1.9164308819917986e-06, + "loss": 1.1528, + "step": 27439 + }, + { + "epoch": 0.8056844206941101, + "grad_norm": 0.0, + "learning_rate": 1.9158710951135863e-06, + "loss": 1.292, + "step": 27440 + }, + { + "epoch": 0.805713782371249, + "grad_norm": 0.0, + "learning_rate": 1.9153113813422185e-06, + "loss": 1.1504, + "step": 27441 + }, + { + "epoch": 0.8057431440483881, + "grad_norm": 0.0, + "learning_rate": 1.9147517406827663e-06, + "loss": 1.2803, + "step": 27442 + }, + { + "epoch": 0.805772505725527, + "grad_norm": 0.0, + "learning_rate": 1.9141921731402823e-06, + "loss": 1.1865, + "step": 27443 + }, + { + "epoch": 0.805801867402666, + "grad_norm": 0.0, + "learning_rate": 1.9136326787198267e-06, + "loss": 1.2539, + "step": 27444 + }, + { + "epoch": 0.8058312290798051, + "grad_norm": 0.0, + "learning_rate": 1.9130732574264644e-06, + "loss": 1.2637, + "step": 27445 + }, + { + "epoch": 0.805860590756944, + "grad_norm": 0.0, + "learning_rate": 1.912513909265249e-06, + "loss": 1.1357, + "step": 27446 + }, + { + "epoch": 0.805889952434083, + "grad_norm": 0.0, + "learning_rate": 1.9119546342412443e-06, + "loss": 1.2617, + "step": 27447 + }, + { + "epoch": 0.8059193141112221, + "grad_norm": 0.0, + "learning_rate": 1.911395432359502e-06, + "loss": 1.3062, + "step": 27448 + }, + { + "epoch": 0.805948675788361, + "grad_norm": 0.0, + "learning_rate": 1.9108363036250854e-06, + "loss": 1.3574, + "step": 27449 + }, + { + "epoch": 0.8059780374655, + "grad_norm": 0.0, + "learning_rate": 1.9102772480430477e-06, + "loss": 1.1592, + "step": 27450 + }, + { + "epoch": 0.8060073991426391, + "grad_norm": 0.0, + "learning_rate": 1.9097182656184444e-06, + "loss": 1.2358, + "step": 27451 + }, + { + "epoch": 0.806036760819778, + "grad_norm": 0.0, + "learning_rate": 1.9091593563563284e-06, + "loss": 1.2227, + "step": 27452 + }, + { + "epoch": 0.806066122496917, + "grad_norm": 0.0, + "learning_rate": 1.9086005202617586e-06, + "loss": 1.3184, + "step": 27453 + }, + { + "epoch": 0.8060954841740561, + "grad_norm": 0.0, + "learning_rate": 1.908041757339787e-06, + "loss": 1.2339, + "step": 27454 + }, + { + "epoch": 0.806124845851195, + "grad_norm": 0.0, + "learning_rate": 1.9074830675954638e-06, + "loss": 1.2461, + "step": 27455 + }, + { + "epoch": 0.806154207528334, + "grad_norm": 0.0, + "learning_rate": 1.9069244510338457e-06, + "loss": 1.1782, + "step": 27456 + }, + { + "epoch": 0.8061835692054731, + "grad_norm": 0.0, + "learning_rate": 1.9063659076599805e-06, + "loss": 1.251, + "step": 27457 + }, + { + "epoch": 0.806212930882612, + "grad_norm": 0.0, + "learning_rate": 1.9058074374789237e-06, + "loss": 1.2197, + "step": 27458 + }, + { + "epoch": 0.806242292559751, + "grad_norm": 0.0, + "learning_rate": 1.9052490404957235e-06, + "loss": 1.2041, + "step": 27459 + }, + { + "epoch": 0.80627165423689, + "grad_norm": 0.0, + "learning_rate": 1.9046907167154283e-06, + "loss": 1.2393, + "step": 27460 + }, + { + "epoch": 0.806301015914029, + "grad_norm": 0.0, + "learning_rate": 1.904132466143086e-06, + "loss": 1.2705, + "step": 27461 + }, + { + "epoch": 0.806330377591168, + "grad_norm": 0.0, + "learning_rate": 1.9035742887837505e-06, + "loss": 1.1455, + "step": 27462 + }, + { + "epoch": 0.806359739268307, + "grad_norm": 0.0, + "learning_rate": 1.9030161846424655e-06, + "loss": 1.2822, + "step": 27463 + }, + { + "epoch": 0.806389100945446, + "grad_norm": 0.0, + "learning_rate": 1.902458153724276e-06, + "loss": 1.3027, + "step": 27464 + }, + { + "epoch": 0.806418462622585, + "grad_norm": 0.0, + "learning_rate": 1.9019001960342343e-06, + "loss": 1.1382, + "step": 27465 + }, + { + "epoch": 0.806447824299724, + "grad_norm": 0.0, + "learning_rate": 1.901342311577381e-06, + "loss": 1.2031, + "step": 27466 + }, + { + "epoch": 0.806477185976863, + "grad_norm": 0.0, + "learning_rate": 1.9007845003587678e-06, + "loss": 1.1992, + "step": 27467 + }, + { + "epoch": 0.806506547654002, + "grad_norm": 0.0, + "learning_rate": 1.9002267623834293e-06, + "loss": 1.2568, + "step": 27468 + }, + { + "epoch": 0.8065359093311409, + "grad_norm": 0.0, + "learning_rate": 1.8996690976564169e-06, + "loss": 1.2246, + "step": 27469 + }, + { + "epoch": 0.80656527100828, + "grad_norm": 0.0, + "learning_rate": 1.8991115061827691e-06, + "loss": 1.2383, + "step": 27470 + }, + { + "epoch": 0.806594632685419, + "grad_norm": 0.0, + "learning_rate": 1.8985539879675342e-06, + "loss": 1.1372, + "step": 27471 + }, + { + "epoch": 0.8066239943625579, + "grad_norm": 0.0, + "learning_rate": 1.8979965430157465e-06, + "loss": 1.2471, + "step": 27472 + }, + { + "epoch": 0.806653356039697, + "grad_norm": 0.0, + "learning_rate": 1.8974391713324536e-06, + "loss": 1.3467, + "step": 27473 + }, + { + "epoch": 0.806682717716836, + "grad_norm": 0.0, + "learning_rate": 1.8968818729226945e-06, + "loss": 1.1807, + "step": 27474 + }, + { + "epoch": 0.8067120793939749, + "grad_norm": 0.0, + "learning_rate": 1.8963246477915064e-06, + "loss": 1.3066, + "step": 27475 + }, + { + "epoch": 0.806741441071114, + "grad_norm": 0.0, + "learning_rate": 1.895767495943931e-06, + "loss": 1.125, + "step": 27476 + }, + { + "epoch": 0.806770802748253, + "grad_norm": 0.0, + "learning_rate": 1.8952104173850028e-06, + "loss": 1.1855, + "step": 27477 + }, + { + "epoch": 0.8068001644253919, + "grad_norm": 0.0, + "learning_rate": 1.8946534121197646e-06, + "loss": 1.2607, + "step": 27478 + }, + { + "epoch": 0.806829526102531, + "grad_norm": 0.0, + "learning_rate": 1.8940964801532502e-06, + "loss": 1.1353, + "step": 27479 + }, + { + "epoch": 0.80685888777967, + "grad_norm": 0.0, + "learning_rate": 1.8935396214904988e-06, + "loss": 1.1963, + "step": 27480 + }, + { + "epoch": 0.8068882494568089, + "grad_norm": 0.0, + "learning_rate": 1.892982836136542e-06, + "loss": 1.2734, + "step": 27481 + }, + { + "epoch": 0.806917611133948, + "grad_norm": 0.0, + "learning_rate": 1.8924261240964203e-06, + "loss": 1.1914, + "step": 27482 + }, + { + "epoch": 0.806946972811087, + "grad_norm": 0.0, + "learning_rate": 1.891869485375165e-06, + "loss": 1.2734, + "step": 27483 + }, + { + "epoch": 0.8069763344882259, + "grad_norm": 0.0, + "learning_rate": 1.8913129199778114e-06, + "loss": 1.2173, + "step": 27484 + }, + { + "epoch": 0.807005696165365, + "grad_norm": 0.0, + "learning_rate": 1.8907564279093915e-06, + "loss": 1.2021, + "step": 27485 + }, + { + "epoch": 0.807035057842504, + "grad_norm": 0.0, + "learning_rate": 1.890200009174936e-06, + "loss": 1.2734, + "step": 27486 + }, + { + "epoch": 0.8070644195196429, + "grad_norm": 0.0, + "learning_rate": 1.8896436637794801e-06, + "loss": 1.2109, + "step": 27487 + }, + { + "epoch": 0.807093781196782, + "grad_norm": 0.0, + "learning_rate": 1.8890873917280517e-06, + "loss": 1.3174, + "step": 27488 + }, + { + "epoch": 0.807123142873921, + "grad_norm": 0.0, + "learning_rate": 1.8885311930256867e-06, + "loss": 1.2529, + "step": 27489 + }, + { + "epoch": 0.8071525045510599, + "grad_norm": 0.0, + "learning_rate": 1.8879750676774079e-06, + "loss": 1.2007, + "step": 27490 + }, + { + "epoch": 0.807181866228199, + "grad_norm": 0.0, + "learning_rate": 1.887419015688251e-06, + "loss": 1.186, + "step": 27491 + }, + { + "epoch": 0.807211227905338, + "grad_norm": 0.0, + "learning_rate": 1.8868630370632413e-06, + "loss": 1.144, + "step": 27492 + }, + { + "epoch": 0.8072405895824769, + "grad_norm": 0.0, + "learning_rate": 1.8863071318074066e-06, + "loss": 1.0625, + "step": 27493 + }, + { + "epoch": 0.807269951259616, + "grad_norm": 0.0, + "learning_rate": 1.8857512999257755e-06, + "loss": 1.2134, + "step": 27494 + }, + { + "epoch": 0.8072993129367549, + "grad_norm": 0.0, + "learning_rate": 1.8851955414233701e-06, + "loss": 1.2168, + "step": 27495 + }, + { + "epoch": 0.8073286746138939, + "grad_norm": 0.0, + "learning_rate": 1.8846398563052227e-06, + "loss": 1.1631, + "step": 27496 + }, + { + "epoch": 0.807358036291033, + "grad_norm": 0.0, + "learning_rate": 1.8840842445763529e-06, + "loss": 1.2295, + "step": 27497 + }, + { + "epoch": 0.8073873979681719, + "grad_norm": 0.0, + "learning_rate": 1.8835287062417896e-06, + "loss": 1.228, + "step": 27498 + }, + { + "epoch": 0.8074167596453109, + "grad_norm": 0.0, + "learning_rate": 1.882973241306556e-06, + "loss": 1.2144, + "step": 27499 + }, + { + "epoch": 0.80744612132245, + "grad_norm": 0.0, + "learning_rate": 1.8824178497756728e-06, + "loss": 1.1865, + "step": 27500 + }, + { + "epoch": 0.8074754829995889, + "grad_norm": 0.0, + "learning_rate": 1.8818625316541628e-06, + "loss": 1.2168, + "step": 27501 + }, + { + "epoch": 0.8075048446767279, + "grad_norm": 0.0, + "learning_rate": 1.8813072869470507e-06, + "loss": 1.208, + "step": 27502 + }, + { + "epoch": 0.807534206353867, + "grad_norm": 0.0, + "learning_rate": 1.8807521156593566e-06, + "loss": 1.2207, + "step": 27503 + }, + { + "epoch": 0.8075635680310059, + "grad_norm": 0.0, + "learning_rate": 1.8801970177960971e-06, + "loss": 1.2256, + "step": 27504 + }, + { + "epoch": 0.8075929297081449, + "grad_norm": 0.0, + "learning_rate": 1.879641993362299e-06, + "loss": 1.1953, + "step": 27505 + }, + { + "epoch": 0.807622291385284, + "grad_norm": 0.0, + "learning_rate": 1.8790870423629747e-06, + "loss": 1.1855, + "step": 27506 + }, + { + "epoch": 0.8076516530624229, + "grad_norm": 0.0, + "learning_rate": 1.8785321648031485e-06, + "loss": 1.291, + "step": 27507 + }, + { + "epoch": 0.8076810147395619, + "grad_norm": 0.0, + "learning_rate": 1.877977360687837e-06, + "loss": 1.1782, + "step": 27508 + }, + { + "epoch": 0.807710376416701, + "grad_norm": 0.0, + "learning_rate": 1.8774226300220554e-06, + "loss": 1.2656, + "step": 27509 + }, + { + "epoch": 0.8077397380938399, + "grad_norm": 0.0, + "learning_rate": 1.8768679728108185e-06, + "loss": 1.2358, + "step": 27510 + }, + { + "epoch": 0.8077690997709789, + "grad_norm": 0.0, + "learning_rate": 1.8763133890591478e-06, + "loss": 1.1943, + "step": 27511 + }, + { + "epoch": 0.807798461448118, + "grad_norm": 0.0, + "learning_rate": 1.875758878772056e-06, + "loss": 1.1577, + "step": 27512 + }, + { + "epoch": 0.8078278231252569, + "grad_norm": 0.0, + "learning_rate": 1.8752044419545544e-06, + "loss": 0.998, + "step": 27513 + }, + { + "epoch": 0.8078571848023959, + "grad_norm": 0.0, + "learning_rate": 1.8746500786116617e-06, + "loss": 1.3164, + "step": 27514 + }, + { + "epoch": 0.807886546479535, + "grad_norm": 0.0, + "learning_rate": 1.8740957887483879e-06, + "loss": 1.1543, + "step": 27515 + }, + { + "epoch": 0.8079159081566739, + "grad_norm": 0.0, + "learning_rate": 1.8735415723697491e-06, + "loss": 1.1758, + "step": 27516 + }, + { + "epoch": 0.8079452698338129, + "grad_norm": 0.0, + "learning_rate": 1.8729874294807549e-06, + "loss": 1.0635, + "step": 27517 + }, + { + "epoch": 0.807974631510952, + "grad_norm": 0.0, + "learning_rate": 1.8724333600864164e-06, + "loss": 1.1562, + "step": 27518 + }, + { + "epoch": 0.8080039931880909, + "grad_norm": 0.0, + "learning_rate": 1.8718793641917421e-06, + "loss": 1.2617, + "step": 27519 + }, + { + "epoch": 0.8080333548652299, + "grad_norm": 0.0, + "learning_rate": 1.8713254418017479e-06, + "loss": 1.1982, + "step": 27520 + }, + { + "epoch": 0.8080627165423689, + "grad_norm": 0.0, + "learning_rate": 1.8707715929214355e-06, + "loss": 1.2393, + "step": 27521 + }, + { + "epoch": 0.8080920782195079, + "grad_norm": 0.0, + "learning_rate": 1.8702178175558205e-06, + "loss": 1.3145, + "step": 27522 + }, + { + "epoch": 0.8081214398966469, + "grad_norm": 0.0, + "learning_rate": 1.8696641157099083e-06, + "loss": 1.1709, + "step": 27523 + }, + { + "epoch": 0.8081508015737859, + "grad_norm": 0.0, + "learning_rate": 1.8691104873887056e-06, + "loss": 1.188, + "step": 27524 + }, + { + "epoch": 0.8081801632509249, + "grad_norm": 0.0, + "learning_rate": 1.8685569325972197e-06, + "loss": 1.2002, + "step": 27525 + }, + { + "epoch": 0.8082095249280639, + "grad_norm": 0.0, + "learning_rate": 1.868003451340452e-06, + "loss": 1.165, + "step": 27526 + }, + { + "epoch": 0.8082388866052029, + "grad_norm": 0.0, + "learning_rate": 1.8674500436234155e-06, + "loss": 1.2305, + "step": 27527 + }, + { + "epoch": 0.8082682482823419, + "grad_norm": 0.0, + "learning_rate": 1.866896709451108e-06, + "loss": 1.1221, + "step": 27528 + }, + { + "epoch": 0.8082976099594809, + "grad_norm": 0.0, + "learning_rate": 1.8663434488285393e-06, + "loss": 1.0371, + "step": 27529 + }, + { + "epoch": 0.8083269716366199, + "grad_norm": 0.0, + "learning_rate": 1.865790261760707e-06, + "loss": 1.1865, + "step": 27530 + }, + { + "epoch": 0.8083563333137589, + "grad_norm": 0.0, + "learning_rate": 1.865237148252619e-06, + "loss": 1.0127, + "step": 27531 + }, + { + "epoch": 0.8083856949908979, + "grad_norm": 0.0, + "learning_rate": 1.8646841083092749e-06, + "loss": 1.2969, + "step": 27532 + }, + { + "epoch": 0.8084150566680369, + "grad_norm": 0.0, + "learning_rate": 1.8641311419356756e-06, + "loss": 1.1597, + "step": 27533 + }, + { + "epoch": 0.8084444183451759, + "grad_norm": 0.0, + "learning_rate": 1.8635782491368225e-06, + "loss": 1.2471, + "step": 27534 + }, + { + "epoch": 0.8084737800223148, + "grad_norm": 0.0, + "learning_rate": 1.863025429917713e-06, + "loss": 1.2031, + "step": 27535 + }, + { + "epoch": 0.8085031416994539, + "grad_norm": 0.0, + "learning_rate": 1.8624726842833496e-06, + "loss": 1.1494, + "step": 27536 + }, + { + "epoch": 0.8085325033765929, + "grad_norm": 0.0, + "learning_rate": 1.8619200122387283e-06, + "loss": 1.168, + "step": 27537 + }, + { + "epoch": 0.8085618650537318, + "grad_norm": 0.0, + "learning_rate": 1.8613674137888504e-06, + "loss": 1.1919, + "step": 27538 + }, + { + "epoch": 0.8085912267308709, + "grad_norm": 0.0, + "learning_rate": 1.860814888938709e-06, + "loss": 1.2173, + "step": 27539 + }, + { + "epoch": 0.8086205884080099, + "grad_norm": 0.0, + "learning_rate": 1.8602624376933064e-06, + "loss": 1.1108, + "step": 27540 + }, + { + "epoch": 0.8086499500851488, + "grad_norm": 0.0, + "learning_rate": 1.8597100600576345e-06, + "loss": 1.146, + "step": 27541 + }, + { + "epoch": 0.8086793117622879, + "grad_norm": 0.0, + "learning_rate": 1.8591577560366892e-06, + "loss": 1.1514, + "step": 27542 + }, + { + "epoch": 0.8087086734394269, + "grad_norm": 0.0, + "learning_rate": 1.8586055256354652e-06, + "loss": 1.2578, + "step": 27543 + }, + { + "epoch": 0.8087380351165658, + "grad_norm": 0.0, + "learning_rate": 1.8580533688589542e-06, + "loss": 1.2329, + "step": 27544 + }, + { + "epoch": 0.8087673967937049, + "grad_norm": 0.0, + "learning_rate": 1.8575012857121543e-06, + "loss": 1.1802, + "step": 27545 + }, + { + "epoch": 0.8087967584708439, + "grad_norm": 0.0, + "learning_rate": 1.8569492762000529e-06, + "loss": 1.1641, + "step": 27546 + }, + { + "epoch": 0.8088261201479828, + "grad_norm": 0.0, + "learning_rate": 1.856397340327647e-06, + "loss": 1.1724, + "step": 27547 + }, + { + "epoch": 0.8088554818251219, + "grad_norm": 0.0, + "learning_rate": 1.8558454780999258e-06, + "loss": 1.1826, + "step": 27548 + }, + { + "epoch": 0.8088848435022609, + "grad_norm": 0.0, + "learning_rate": 1.8552936895218798e-06, + "loss": 1.2021, + "step": 27549 + }, + { + "epoch": 0.8089142051793998, + "grad_norm": 0.0, + "learning_rate": 1.8547419745984963e-06, + "loss": 1.2139, + "step": 27550 + }, + { + "epoch": 0.8089435668565389, + "grad_norm": 0.0, + "learning_rate": 1.8541903333347688e-06, + "loss": 1.2197, + "step": 27551 + }, + { + "epoch": 0.8089729285336779, + "grad_norm": 0.0, + "learning_rate": 1.8536387657356858e-06, + "loss": 1.1812, + "step": 27552 + }, + { + "epoch": 0.8090022902108168, + "grad_norm": 0.0, + "learning_rate": 1.8530872718062299e-06, + "loss": 1.252, + "step": 27553 + }, + { + "epoch": 0.8090316518879559, + "grad_norm": 0.0, + "learning_rate": 1.8525358515513958e-06, + "loss": 1.1533, + "step": 27554 + }, + { + "epoch": 0.8090610135650949, + "grad_norm": 0.0, + "learning_rate": 1.851984504976163e-06, + "loss": 1.2002, + "step": 27555 + }, + { + "epoch": 0.8090903752422338, + "grad_norm": 0.0, + "learning_rate": 1.8514332320855243e-06, + "loss": 1.3027, + "step": 27556 + }, + { + "epoch": 0.8091197369193729, + "grad_norm": 0.0, + "learning_rate": 1.8508820328844622e-06, + "loss": 1.2734, + "step": 27557 + }, + { + "epoch": 0.8091490985965119, + "grad_norm": 0.0, + "learning_rate": 1.8503309073779608e-06, + "loss": 1.2046, + "step": 27558 + }, + { + "epoch": 0.8091784602736508, + "grad_norm": 0.0, + "learning_rate": 1.8497798555710012e-06, + "loss": 1.3467, + "step": 27559 + }, + { + "epoch": 0.8092078219507898, + "grad_norm": 0.0, + "learning_rate": 1.849228877468572e-06, + "loss": 1.3105, + "step": 27560 + }, + { + "epoch": 0.8092371836279288, + "grad_norm": 0.0, + "learning_rate": 1.848677973075651e-06, + "loss": 1.2305, + "step": 27561 + }, + { + "epoch": 0.8092665453050678, + "grad_norm": 0.0, + "learning_rate": 1.8481271423972258e-06, + "loss": 1.2764, + "step": 27562 + }, + { + "epoch": 0.8092959069822068, + "grad_norm": 0.0, + "learning_rate": 1.8475763854382734e-06, + "loss": 1.2988, + "step": 27563 + }, + { + "epoch": 0.8093252686593458, + "grad_norm": 0.0, + "learning_rate": 1.847025702203774e-06, + "loss": 1.2939, + "step": 27564 + }, + { + "epoch": 0.8093546303364848, + "grad_norm": 0.0, + "learning_rate": 1.8464750926987119e-06, + "loss": 1.2046, + "step": 27565 + }, + { + "epoch": 0.8093839920136238, + "grad_norm": 0.0, + "learning_rate": 1.8459245569280626e-06, + "loss": 1.0786, + "step": 27566 + }, + { + "epoch": 0.8094133536907628, + "grad_norm": 0.0, + "learning_rate": 1.845374094896807e-06, + "loss": 1.2041, + "step": 27567 + }, + { + "epoch": 0.8094427153679018, + "grad_norm": 0.0, + "learning_rate": 1.8448237066099195e-06, + "loss": 1.1855, + "step": 27568 + }, + { + "epoch": 0.8094720770450408, + "grad_norm": 0.0, + "learning_rate": 1.844273392072381e-06, + "loss": 1.1821, + "step": 27569 + }, + { + "epoch": 0.8095014387221798, + "grad_norm": 0.0, + "learning_rate": 1.843723151289165e-06, + "loss": 1.1636, + "step": 27570 + }, + { + "epoch": 0.8095308003993188, + "grad_norm": 0.0, + "learning_rate": 1.8431729842652524e-06, + "loss": 1.1401, + "step": 27571 + }, + { + "epoch": 0.8095601620764578, + "grad_norm": 0.0, + "learning_rate": 1.8426228910056155e-06, + "loss": 1.1816, + "step": 27572 + }, + { + "epoch": 0.8095895237535968, + "grad_norm": 0.0, + "learning_rate": 1.842072871515228e-06, + "loss": 1.2837, + "step": 27573 + }, + { + "epoch": 0.8096188854307358, + "grad_norm": 0.0, + "learning_rate": 1.8415229257990664e-06, + "loss": 0.9922, + "step": 27574 + }, + { + "epoch": 0.8096482471078748, + "grad_norm": 0.0, + "learning_rate": 1.8409730538620985e-06, + "loss": 1.2144, + "step": 27575 + }, + { + "epoch": 0.8096776087850138, + "grad_norm": 0.0, + "learning_rate": 1.8404232557093039e-06, + "loss": 1.1738, + "step": 27576 + }, + { + "epoch": 0.8097069704621528, + "grad_norm": 0.0, + "learning_rate": 1.8398735313456485e-06, + "loss": 1.1401, + "step": 27577 + }, + { + "epoch": 0.8097363321392917, + "grad_norm": 0.0, + "learning_rate": 1.8393238807761093e-06, + "loss": 1.0918, + "step": 27578 + }, + { + "epoch": 0.8097656938164308, + "grad_norm": 0.0, + "learning_rate": 1.8387743040056517e-06, + "loss": 1.3213, + "step": 27579 + }, + { + "epoch": 0.8097950554935698, + "grad_norm": 0.0, + "learning_rate": 1.8382248010392512e-06, + "loss": 1.2803, + "step": 27580 + }, + { + "epoch": 0.8098244171707087, + "grad_norm": 0.0, + "learning_rate": 1.837675371881874e-06, + "loss": 1.2832, + "step": 27581 + }, + { + "epoch": 0.8098537788478478, + "grad_norm": 0.0, + "learning_rate": 1.8371260165384886e-06, + "loss": 1.167, + "step": 27582 + }, + { + "epoch": 0.8098831405249868, + "grad_norm": 0.0, + "learning_rate": 1.8365767350140628e-06, + "loss": 1.2871, + "step": 27583 + }, + { + "epoch": 0.8099125022021257, + "grad_norm": 0.0, + "learning_rate": 1.836027527313562e-06, + "loss": 1.1782, + "step": 27584 + }, + { + "epoch": 0.8099418638792648, + "grad_norm": 0.0, + "learning_rate": 1.8354783934419573e-06, + "loss": 1.251, + "step": 27585 + }, + { + "epoch": 0.8099712255564038, + "grad_norm": 0.0, + "learning_rate": 1.8349293334042107e-06, + "loss": 1.3174, + "step": 27586 + }, + { + "epoch": 0.8100005872335427, + "grad_norm": 0.0, + "learning_rate": 1.8343803472052913e-06, + "loss": 1.1865, + "step": 27587 + }, + { + "epoch": 0.8100299489106818, + "grad_norm": 0.0, + "learning_rate": 1.8338314348501596e-06, + "loss": 1.2832, + "step": 27588 + }, + { + "epoch": 0.8100593105878208, + "grad_norm": 0.0, + "learning_rate": 1.8332825963437838e-06, + "loss": 1.1055, + "step": 27589 + }, + { + "epoch": 0.8100886722649597, + "grad_norm": 0.0, + "learning_rate": 1.8327338316911247e-06, + "loss": 1.146, + "step": 27590 + }, + { + "epoch": 0.8101180339420988, + "grad_norm": 0.0, + "learning_rate": 1.8321851408971458e-06, + "loss": 1.2139, + "step": 27591 + }, + { + "epoch": 0.8101473956192378, + "grad_norm": 0.0, + "learning_rate": 1.8316365239668076e-06, + "loss": 1.1191, + "step": 27592 + }, + { + "epoch": 0.8101767572963767, + "grad_norm": 0.0, + "learning_rate": 1.8310879809050707e-06, + "loss": 1.1357, + "step": 27593 + }, + { + "epoch": 0.8102061189735158, + "grad_norm": 0.0, + "learning_rate": 1.8305395117169e-06, + "loss": 1.1519, + "step": 27594 + }, + { + "epoch": 0.8102354806506548, + "grad_norm": 0.0, + "learning_rate": 1.8299911164072492e-06, + "loss": 1.4473, + "step": 27595 + }, + { + "epoch": 0.8102648423277937, + "grad_norm": 0.0, + "learning_rate": 1.8294427949810844e-06, + "loss": 1.2471, + "step": 27596 + }, + { + "epoch": 0.8102942040049328, + "grad_norm": 0.0, + "learning_rate": 1.8288945474433605e-06, + "loss": 1.2402, + "step": 27597 + }, + { + "epoch": 0.8103235656820718, + "grad_norm": 0.0, + "learning_rate": 1.828346373799036e-06, + "loss": 1.1895, + "step": 27598 + }, + { + "epoch": 0.8103529273592107, + "grad_norm": 0.0, + "learning_rate": 1.8277982740530653e-06, + "loss": 1.2256, + "step": 27599 + }, + { + "epoch": 0.8103822890363498, + "grad_norm": 0.0, + "learning_rate": 1.8272502482104104e-06, + "loss": 1.1685, + "step": 27600 + }, + { + "epoch": 0.8104116507134888, + "grad_norm": 0.0, + "learning_rate": 1.826702296276024e-06, + "loss": 1.3281, + "step": 27601 + }, + { + "epoch": 0.8104410123906277, + "grad_norm": 0.0, + "learning_rate": 1.8261544182548608e-06, + "loss": 1.2139, + "step": 27602 + }, + { + "epoch": 0.8104703740677668, + "grad_norm": 0.0, + "learning_rate": 1.825606614151878e-06, + "loss": 1.2051, + "step": 27603 + }, + { + "epoch": 0.8104997357449057, + "grad_norm": 0.0, + "learning_rate": 1.8250588839720261e-06, + "loss": 1.2139, + "step": 27604 + }, + { + "epoch": 0.8105290974220447, + "grad_norm": 0.0, + "learning_rate": 1.8245112277202626e-06, + "loss": 1.2539, + "step": 27605 + }, + { + "epoch": 0.8105584590991838, + "grad_norm": 0.0, + "learning_rate": 1.8239636454015374e-06, + "loss": 1.291, + "step": 27606 + }, + { + "epoch": 0.8105878207763227, + "grad_norm": 0.0, + "learning_rate": 1.8234161370208037e-06, + "loss": 1.3193, + "step": 27607 + }, + { + "epoch": 0.8106171824534617, + "grad_norm": 0.0, + "learning_rate": 1.8228687025830093e-06, + "loss": 1.167, + "step": 27608 + }, + { + "epoch": 0.8106465441306008, + "grad_norm": 0.0, + "learning_rate": 1.8223213420931097e-06, + "loss": 1.1489, + "step": 27609 + }, + { + "epoch": 0.8106759058077397, + "grad_norm": 0.0, + "learning_rate": 1.8217740555560493e-06, + "loss": 1.02, + "step": 27610 + }, + { + "epoch": 0.8107052674848787, + "grad_norm": 0.0, + "learning_rate": 1.8212268429767844e-06, + "loss": 1.1665, + "step": 27611 + }, + { + "epoch": 0.8107346291620178, + "grad_norm": 0.0, + "learning_rate": 1.8206797043602586e-06, + "loss": 1.251, + "step": 27612 + }, + { + "epoch": 0.8107639908391567, + "grad_norm": 0.0, + "learning_rate": 1.8201326397114194e-06, + "loss": 1.3076, + "step": 27613 + }, + { + "epoch": 0.8107933525162957, + "grad_norm": 0.0, + "learning_rate": 1.8195856490352182e-06, + "loss": 1.208, + "step": 27614 + }, + { + "epoch": 0.8108227141934348, + "grad_norm": 0.0, + "learning_rate": 1.8190387323365977e-06, + "loss": 1.2681, + "step": 27615 + }, + { + "epoch": 0.8108520758705737, + "grad_norm": 0.0, + "learning_rate": 1.8184918896205062e-06, + "loss": 1.374, + "step": 27616 + }, + { + "epoch": 0.8108814375477127, + "grad_norm": 0.0, + "learning_rate": 1.8179451208918852e-06, + "loss": 1.2422, + "step": 27617 + }, + { + "epoch": 0.8109107992248518, + "grad_norm": 0.0, + "learning_rate": 1.8173984261556842e-06, + "loss": 1.2939, + "step": 27618 + }, + { + "epoch": 0.8109401609019907, + "grad_norm": 0.0, + "learning_rate": 1.8168518054168426e-06, + "loss": 1.2207, + "step": 27619 + }, + { + "epoch": 0.8109695225791297, + "grad_norm": 0.0, + "learning_rate": 1.8163052586803088e-06, + "loss": 1.2539, + "step": 27620 + }, + { + "epoch": 0.8109988842562688, + "grad_norm": 0.0, + "learning_rate": 1.8157587859510216e-06, + "loss": 1.3213, + "step": 27621 + }, + { + "epoch": 0.8110282459334077, + "grad_norm": 0.0, + "learning_rate": 1.8152123872339244e-06, + "loss": 1.3794, + "step": 27622 + }, + { + "epoch": 0.8110576076105467, + "grad_norm": 0.0, + "learning_rate": 1.8146660625339574e-06, + "loss": 1.2373, + "step": 27623 + }, + { + "epoch": 0.8110869692876858, + "grad_norm": 0.0, + "learning_rate": 1.8141198118560588e-06, + "loss": 1.2583, + "step": 27624 + }, + { + "epoch": 0.8111163309648247, + "grad_norm": 0.0, + "learning_rate": 1.8135736352051737e-06, + "loss": 1.2568, + "step": 27625 + }, + { + "epoch": 0.8111456926419637, + "grad_norm": 0.0, + "learning_rate": 1.8130275325862368e-06, + "loss": 1.2305, + "step": 27626 + }, + { + "epoch": 0.8111750543191028, + "grad_norm": 0.0, + "learning_rate": 1.812481504004191e-06, + "loss": 1.2197, + "step": 27627 + }, + { + "epoch": 0.8112044159962417, + "grad_norm": 0.0, + "learning_rate": 1.811935549463969e-06, + "loss": 1.1865, + "step": 27628 + }, + { + "epoch": 0.8112337776733807, + "grad_norm": 0.0, + "learning_rate": 1.8113896689705136e-06, + "loss": 1.1797, + "step": 27629 + }, + { + "epoch": 0.8112631393505197, + "grad_norm": 0.0, + "learning_rate": 1.8108438625287584e-06, + "loss": 1.3096, + "step": 27630 + }, + { + "epoch": 0.8112925010276587, + "grad_norm": 0.0, + "learning_rate": 1.8102981301436384e-06, + "loss": 1.2422, + "step": 27631 + }, + { + "epoch": 0.8113218627047977, + "grad_norm": 0.0, + "learning_rate": 1.809752471820091e-06, + "loss": 1.3164, + "step": 27632 + }, + { + "epoch": 0.8113512243819367, + "grad_norm": 0.0, + "learning_rate": 1.8092068875630474e-06, + "loss": 1.1802, + "step": 27633 + }, + { + "epoch": 0.8113805860590757, + "grad_norm": 0.0, + "learning_rate": 1.8086613773774452e-06, + "loss": 1.3066, + "step": 27634 + }, + { + "epoch": 0.8114099477362147, + "grad_norm": 0.0, + "learning_rate": 1.8081159412682136e-06, + "loss": 1.1357, + "step": 27635 + }, + { + "epoch": 0.8114393094133537, + "grad_norm": 0.0, + "learning_rate": 1.807570579240291e-06, + "loss": 1.3252, + "step": 27636 + }, + { + "epoch": 0.8114686710904927, + "grad_norm": 0.0, + "learning_rate": 1.807025291298602e-06, + "loss": 1.3276, + "step": 27637 + }, + { + "epoch": 0.8114980327676317, + "grad_norm": 0.0, + "learning_rate": 1.8064800774480851e-06, + "loss": 1.3315, + "step": 27638 + }, + { + "epoch": 0.8115273944447707, + "grad_norm": 0.0, + "learning_rate": 1.8059349376936663e-06, + "loss": 1.3125, + "step": 27639 + }, + { + "epoch": 0.8115567561219097, + "grad_norm": 0.0, + "learning_rate": 1.8053898720402775e-06, + "loss": 1.1733, + "step": 27640 + }, + { + "epoch": 0.8115861177990487, + "grad_norm": 0.0, + "learning_rate": 1.8048448804928453e-06, + "loss": 1.2705, + "step": 27641 + }, + { + "epoch": 0.8116154794761877, + "grad_norm": 0.0, + "learning_rate": 1.8042999630562985e-06, + "loss": 1.209, + "step": 27642 + }, + { + "epoch": 0.8116448411533267, + "grad_norm": 0.0, + "learning_rate": 1.8037551197355674e-06, + "loss": 1.1997, + "step": 27643 + }, + { + "epoch": 0.8116742028304657, + "grad_norm": 0.0, + "learning_rate": 1.8032103505355758e-06, + "loss": 1.1865, + "step": 27644 + }, + { + "epoch": 0.8117035645076047, + "grad_norm": 0.0, + "learning_rate": 1.8026656554612553e-06, + "loss": 1.2876, + "step": 27645 + }, + { + "epoch": 0.8117329261847437, + "grad_norm": 0.0, + "learning_rate": 1.8021210345175254e-06, + "loss": 1.126, + "step": 27646 + }, + { + "epoch": 0.8117622878618826, + "grad_norm": 0.0, + "learning_rate": 1.80157648770932e-06, + "loss": 1.2461, + "step": 27647 + }, + { + "epoch": 0.8117916495390217, + "grad_norm": 0.0, + "learning_rate": 1.8010320150415529e-06, + "loss": 1.2231, + "step": 27648 + }, + { + "epoch": 0.8118210112161607, + "grad_norm": 0.0, + "learning_rate": 1.8004876165191566e-06, + "loss": 1.2285, + "step": 27649 + }, + { + "epoch": 0.8118503728932996, + "grad_norm": 0.0, + "learning_rate": 1.7999432921470494e-06, + "loss": 1.1299, + "step": 27650 + }, + { + "epoch": 0.8118797345704387, + "grad_norm": 0.0, + "learning_rate": 1.7993990419301522e-06, + "loss": 1.3262, + "step": 27651 + }, + { + "epoch": 0.8119090962475777, + "grad_norm": 0.0, + "learning_rate": 1.798854865873393e-06, + "loss": 1.2139, + "step": 27652 + }, + { + "epoch": 0.8119384579247166, + "grad_norm": 0.0, + "learning_rate": 1.7983107639816866e-06, + "loss": 1.2471, + "step": 27653 + }, + { + "epoch": 0.8119678196018557, + "grad_norm": 0.0, + "learning_rate": 1.7977667362599593e-06, + "loss": 1.1392, + "step": 27654 + }, + { + "epoch": 0.8119971812789947, + "grad_norm": 0.0, + "learning_rate": 1.7972227827131282e-06, + "loss": 1.2031, + "step": 27655 + }, + { + "epoch": 0.8120265429561336, + "grad_norm": 0.0, + "learning_rate": 1.796678903346113e-06, + "loss": 1.188, + "step": 27656 + }, + { + "epoch": 0.8120559046332727, + "grad_norm": 0.0, + "learning_rate": 1.796135098163827e-06, + "loss": 1.25, + "step": 27657 + }, + { + "epoch": 0.8120852663104117, + "grad_norm": 0.0, + "learning_rate": 1.7955913671711967e-06, + "loss": 1.2598, + "step": 27658 + }, + { + "epoch": 0.8121146279875506, + "grad_norm": 0.0, + "learning_rate": 1.7950477103731312e-06, + "loss": 1.1963, + "step": 27659 + }, + { + "epoch": 0.8121439896646896, + "grad_norm": 0.0, + "learning_rate": 1.7945041277745534e-06, + "loss": 1.2612, + "step": 27660 + }, + { + "epoch": 0.8121733513418287, + "grad_norm": 0.0, + "learning_rate": 1.7939606193803761e-06, + "loss": 1.1782, + "step": 27661 + }, + { + "epoch": 0.8122027130189676, + "grad_norm": 0.0, + "learning_rate": 1.7934171851955118e-06, + "loss": 1.106, + "step": 27662 + }, + { + "epoch": 0.8122320746961066, + "grad_norm": 0.0, + "learning_rate": 1.79287382522488e-06, + "loss": 1.2627, + "step": 27663 + }, + { + "epoch": 0.8122614363732457, + "grad_norm": 0.0, + "learning_rate": 1.7923305394733924e-06, + "loss": 1.2402, + "step": 27664 + }, + { + "epoch": 0.8122907980503846, + "grad_norm": 0.0, + "learning_rate": 1.791787327945962e-06, + "loss": 1.124, + "step": 27665 + }, + { + "epoch": 0.8123201597275236, + "grad_norm": 0.0, + "learning_rate": 1.7912441906474986e-06, + "loss": 1.2207, + "step": 27666 + }, + { + "epoch": 0.8123495214046627, + "grad_norm": 0.0, + "learning_rate": 1.7907011275829177e-06, + "loss": 1.2461, + "step": 27667 + }, + { + "epoch": 0.8123788830818016, + "grad_norm": 0.0, + "learning_rate": 1.7901581387571276e-06, + "loss": 1.1309, + "step": 27668 + }, + { + "epoch": 0.8124082447589406, + "grad_norm": 0.0, + "learning_rate": 1.789615224175042e-06, + "loss": 1.1631, + "step": 27669 + }, + { + "epoch": 0.8124376064360797, + "grad_norm": 0.0, + "learning_rate": 1.7890723838415692e-06, + "loss": 1.2607, + "step": 27670 + }, + { + "epoch": 0.8124669681132186, + "grad_norm": 0.0, + "learning_rate": 1.788529617761614e-06, + "loss": 1.3604, + "step": 27671 + }, + { + "epoch": 0.8124963297903576, + "grad_norm": 0.0, + "learning_rate": 1.787986925940095e-06, + "loss": 1.1982, + "step": 27672 + }, + { + "epoch": 0.8125256914674966, + "grad_norm": 0.0, + "learning_rate": 1.787444308381907e-06, + "loss": 1.2266, + "step": 27673 + }, + { + "epoch": 0.8125550531446356, + "grad_norm": 0.0, + "learning_rate": 1.7869017650919663e-06, + "loss": 1.21, + "step": 27674 + }, + { + "epoch": 0.8125844148217746, + "grad_norm": 0.0, + "learning_rate": 1.786359296075174e-06, + "loss": 1.1445, + "step": 27675 + }, + { + "epoch": 0.8126137764989136, + "grad_norm": 0.0, + "learning_rate": 1.7858169013364412e-06, + "loss": 1.1094, + "step": 27676 + }, + { + "epoch": 0.8126431381760526, + "grad_norm": 0.0, + "learning_rate": 1.7852745808806659e-06, + "loss": 1.0854, + "step": 27677 + }, + { + "epoch": 0.8126724998531916, + "grad_norm": 0.0, + "learning_rate": 1.7847323347127598e-06, + "loss": 1.1924, + "step": 27678 + }, + { + "epoch": 0.8127018615303306, + "grad_norm": 0.0, + "learning_rate": 1.7841901628376236e-06, + "loss": 1.2495, + "step": 27679 + }, + { + "epoch": 0.8127312232074696, + "grad_norm": 0.0, + "learning_rate": 1.7836480652601595e-06, + "loss": 1.2061, + "step": 27680 + }, + { + "epoch": 0.8127605848846086, + "grad_norm": 0.0, + "learning_rate": 1.7831060419852697e-06, + "loss": 1.084, + "step": 27681 + }, + { + "epoch": 0.8127899465617476, + "grad_norm": 0.0, + "learning_rate": 1.7825640930178546e-06, + "loss": 1.1543, + "step": 27682 + }, + { + "epoch": 0.8128193082388866, + "grad_norm": 0.0, + "learning_rate": 1.782022218362819e-06, + "loss": 1.2422, + "step": 27683 + }, + { + "epoch": 0.8128486699160256, + "grad_norm": 0.0, + "learning_rate": 1.7814804180250577e-06, + "loss": 1.1777, + "step": 27684 + }, + { + "epoch": 0.8128780315931646, + "grad_norm": 0.0, + "learning_rate": 1.7809386920094774e-06, + "loss": 1.1104, + "step": 27685 + }, + { + "epoch": 0.8129073932703036, + "grad_norm": 0.0, + "learning_rate": 1.7803970403209715e-06, + "loss": 1.1138, + "step": 27686 + }, + { + "epoch": 0.8129367549474426, + "grad_norm": 0.0, + "learning_rate": 1.7798554629644416e-06, + "loss": 1.2979, + "step": 27687 + }, + { + "epoch": 0.8129661166245816, + "grad_norm": 0.0, + "learning_rate": 1.779313959944784e-06, + "loss": 1.293, + "step": 27688 + }, + { + "epoch": 0.8129954783017206, + "grad_norm": 0.0, + "learning_rate": 1.7787725312668959e-06, + "loss": 1.2144, + "step": 27689 + }, + { + "epoch": 0.8130248399788595, + "grad_norm": 0.0, + "learning_rate": 1.778231176935673e-06, + "loss": 1.0747, + "step": 27690 + }, + { + "epoch": 0.8130542016559986, + "grad_norm": 0.0, + "learning_rate": 1.7776898969560096e-06, + "loss": 1.25, + "step": 27691 + }, + { + "epoch": 0.8130835633331376, + "grad_norm": 0.0, + "learning_rate": 1.777148691332804e-06, + "loss": 1.2393, + "step": 27692 + }, + { + "epoch": 0.8131129250102765, + "grad_norm": 0.0, + "learning_rate": 1.7766075600709464e-06, + "loss": 1.124, + "step": 27693 + }, + { + "epoch": 0.8131422866874156, + "grad_norm": 0.0, + "learning_rate": 1.7760665031753356e-06, + "loss": 1.21, + "step": 27694 + }, + { + "epoch": 0.8131716483645546, + "grad_norm": 0.0, + "learning_rate": 1.7755255206508582e-06, + "loss": 1.3691, + "step": 27695 + }, + { + "epoch": 0.8132010100416935, + "grad_norm": 0.0, + "learning_rate": 1.7749846125024128e-06, + "loss": 1.2656, + "step": 27696 + }, + { + "epoch": 0.8132303717188326, + "grad_norm": 0.0, + "learning_rate": 1.7744437787348878e-06, + "loss": 1.1289, + "step": 27697 + }, + { + "epoch": 0.8132597333959716, + "grad_norm": 0.0, + "learning_rate": 1.7739030193531747e-06, + "loss": 1.2305, + "step": 27698 + }, + { + "epoch": 0.8132890950731105, + "grad_norm": 0.0, + "learning_rate": 1.7733623343621631e-06, + "loss": 1.2793, + "step": 27699 + }, + { + "epoch": 0.8133184567502496, + "grad_norm": 0.0, + "learning_rate": 1.77282172376674e-06, + "loss": 1.1523, + "step": 27700 + }, + { + "epoch": 0.8133478184273886, + "grad_norm": 0.0, + "learning_rate": 1.7722811875718004e-06, + "loss": 1.2759, + "step": 27701 + }, + { + "epoch": 0.8133771801045275, + "grad_norm": 0.0, + "learning_rate": 1.7717407257822262e-06, + "loss": 1.1445, + "step": 27702 + }, + { + "epoch": 0.8134065417816666, + "grad_norm": 0.0, + "learning_rate": 1.7712003384029097e-06, + "loss": 1.146, + "step": 27703 + }, + { + "epoch": 0.8134359034588056, + "grad_norm": 0.0, + "learning_rate": 1.7706600254387364e-06, + "loss": 1.0293, + "step": 27704 + }, + { + "epoch": 0.8134652651359445, + "grad_norm": 0.0, + "learning_rate": 1.770119786894593e-06, + "loss": 1.1328, + "step": 27705 + }, + { + "epoch": 0.8134946268130836, + "grad_norm": 0.0, + "learning_rate": 1.7695796227753604e-06, + "loss": 1.3799, + "step": 27706 + }, + { + "epoch": 0.8135239884902226, + "grad_norm": 0.0, + "learning_rate": 1.76903953308593e-06, + "loss": 1.2002, + "step": 27707 + }, + { + "epoch": 0.8135533501673615, + "grad_norm": 0.0, + "learning_rate": 1.7684995178311804e-06, + "loss": 1.2129, + "step": 27708 + }, + { + "epoch": 0.8135827118445006, + "grad_norm": 0.0, + "learning_rate": 1.7679595770160007e-06, + "loss": 1.2158, + "step": 27709 + }, + { + "epoch": 0.8136120735216396, + "grad_norm": 0.0, + "learning_rate": 1.7674197106452707e-06, + "loss": 1.1689, + "step": 27710 + }, + { + "epoch": 0.8136414351987785, + "grad_norm": 0.0, + "learning_rate": 1.7668799187238695e-06, + "loss": 1.2402, + "step": 27711 + }, + { + "epoch": 0.8136707968759176, + "grad_norm": 0.0, + "learning_rate": 1.7663402012566855e-06, + "loss": 1.3027, + "step": 27712 + }, + { + "epoch": 0.8137001585530566, + "grad_norm": 0.0, + "learning_rate": 1.7658005582485949e-06, + "loss": 1.1782, + "step": 27713 + }, + { + "epoch": 0.8137295202301955, + "grad_norm": 0.0, + "learning_rate": 1.765260989704478e-06, + "loss": 1.1523, + "step": 27714 + }, + { + "epoch": 0.8137588819073346, + "grad_norm": 0.0, + "learning_rate": 1.7647214956292135e-06, + "loss": 1.1748, + "step": 27715 + }, + { + "epoch": 0.8137882435844735, + "grad_norm": 0.0, + "learning_rate": 1.764182076027684e-06, + "loss": 1.2002, + "step": 27716 + }, + { + "epoch": 0.8138176052616125, + "grad_norm": 0.0, + "learning_rate": 1.7636427309047622e-06, + "loss": 1.1274, + "step": 27717 + }, + { + "epoch": 0.8138469669387516, + "grad_norm": 0.0, + "learning_rate": 1.7631034602653318e-06, + "loss": 1.1548, + "step": 27718 + }, + { + "epoch": 0.8138763286158905, + "grad_norm": 0.0, + "learning_rate": 1.762564264114266e-06, + "loss": 1.0435, + "step": 27719 + }, + { + "epoch": 0.8139056902930295, + "grad_norm": 0.0, + "learning_rate": 1.7620251424564383e-06, + "loss": 1.1875, + "step": 27720 + }, + { + "epoch": 0.8139350519701686, + "grad_norm": 0.0, + "learning_rate": 1.7614860952967295e-06, + "loss": 1.1318, + "step": 27721 + }, + { + "epoch": 0.8139644136473075, + "grad_norm": 0.0, + "learning_rate": 1.7609471226400122e-06, + "loss": 1.2314, + "step": 27722 + }, + { + "epoch": 0.8139937753244465, + "grad_norm": 0.0, + "learning_rate": 1.7604082244911602e-06, + "loss": 1.21, + "step": 27723 + }, + { + "epoch": 0.8140231370015856, + "grad_norm": 0.0, + "learning_rate": 1.7598694008550442e-06, + "loss": 1.2734, + "step": 27724 + }, + { + "epoch": 0.8140524986787245, + "grad_norm": 0.0, + "learning_rate": 1.7593306517365417e-06, + "loss": 1.0923, + "step": 27725 + }, + { + "epoch": 0.8140818603558635, + "grad_norm": 0.0, + "learning_rate": 1.7587919771405204e-06, + "loss": 1.2603, + "step": 27726 + }, + { + "epoch": 0.8141112220330026, + "grad_norm": 0.0, + "learning_rate": 1.758253377071857e-06, + "loss": 1.3203, + "step": 27727 + }, + { + "epoch": 0.8141405837101415, + "grad_norm": 0.0, + "learning_rate": 1.757714851535418e-06, + "loss": 1.2314, + "step": 27728 + }, + { + "epoch": 0.8141699453872805, + "grad_norm": 0.0, + "learning_rate": 1.7571764005360747e-06, + "loss": 1.1533, + "step": 27729 + }, + { + "epoch": 0.8141993070644196, + "grad_norm": 0.0, + "learning_rate": 1.7566380240786963e-06, + "loss": 1.1826, + "step": 27730 + }, + { + "epoch": 0.8142286687415585, + "grad_norm": 0.0, + "learning_rate": 1.7560997221681486e-06, + "loss": 1.248, + "step": 27731 + }, + { + "epoch": 0.8142580304186975, + "grad_norm": 0.0, + "learning_rate": 1.7555614948093057e-06, + "loss": 1.2383, + "step": 27732 + }, + { + "epoch": 0.8142873920958366, + "grad_norm": 0.0, + "learning_rate": 1.7550233420070284e-06, + "loss": 1.2798, + "step": 27733 + }, + { + "epoch": 0.8143167537729755, + "grad_norm": 0.0, + "learning_rate": 1.7544852637661901e-06, + "loss": 1.2588, + "step": 27734 + }, + { + "epoch": 0.8143461154501145, + "grad_norm": 0.0, + "learning_rate": 1.7539472600916496e-06, + "loss": 1.2349, + "step": 27735 + }, + { + "epoch": 0.8143754771272536, + "grad_norm": 0.0, + "learning_rate": 1.7534093309882793e-06, + "loss": 1.0742, + "step": 27736 + }, + { + "epoch": 0.8144048388043925, + "grad_norm": 0.0, + "learning_rate": 1.75287147646094e-06, + "loss": 1.2363, + "step": 27737 + }, + { + "epoch": 0.8144342004815315, + "grad_norm": 0.0, + "learning_rate": 1.7523336965144954e-06, + "loss": 1.1655, + "step": 27738 + }, + { + "epoch": 0.8144635621586706, + "grad_norm": 0.0, + "learning_rate": 1.7517959911538107e-06, + "loss": 1.1299, + "step": 27739 + }, + { + "epoch": 0.8144929238358095, + "grad_norm": 0.0, + "learning_rate": 1.751258360383744e-06, + "loss": 1.2397, + "step": 27740 + }, + { + "epoch": 0.8145222855129485, + "grad_norm": 0.0, + "learning_rate": 1.7507208042091628e-06, + "loss": 1.1533, + "step": 27741 + }, + { + "epoch": 0.8145516471900875, + "grad_norm": 0.0, + "learning_rate": 1.750183322634924e-06, + "loss": 1.1143, + "step": 27742 + }, + { + "epoch": 0.8145810088672265, + "grad_norm": 0.0, + "learning_rate": 1.749645915665893e-06, + "loss": 1.126, + "step": 27743 + }, + { + "epoch": 0.8146103705443655, + "grad_norm": 0.0, + "learning_rate": 1.7491085833069233e-06, + "loss": 1.1816, + "step": 27744 + }, + { + "epoch": 0.8146397322215045, + "grad_norm": 0.0, + "learning_rate": 1.748571325562881e-06, + "loss": 1.2104, + "step": 27745 + }, + { + "epoch": 0.8146690938986435, + "grad_norm": 0.0, + "learning_rate": 1.7480341424386216e-06, + "loss": 1.2754, + "step": 27746 + }, + { + "epoch": 0.8146984555757825, + "grad_norm": 0.0, + "learning_rate": 1.7474970339390029e-06, + "loss": 1.4092, + "step": 27747 + }, + { + "epoch": 0.8147278172529215, + "grad_norm": 0.0, + "learning_rate": 1.7469600000688792e-06, + "loss": 1.2637, + "step": 27748 + }, + { + "epoch": 0.8147571789300605, + "grad_norm": 0.0, + "learning_rate": 1.7464230408331118e-06, + "loss": 1.2588, + "step": 27749 + }, + { + "epoch": 0.8147865406071995, + "grad_norm": 0.0, + "learning_rate": 1.7458861562365558e-06, + "loss": 1.061, + "step": 27750 + }, + { + "epoch": 0.8148159022843385, + "grad_norm": 0.0, + "learning_rate": 1.7453493462840621e-06, + "loss": 1.1729, + "step": 27751 + }, + { + "epoch": 0.8148452639614775, + "grad_norm": 0.0, + "learning_rate": 1.7448126109804908e-06, + "loss": 1.2373, + "step": 27752 + }, + { + "epoch": 0.8148746256386165, + "grad_norm": 0.0, + "learning_rate": 1.7442759503306926e-06, + "loss": 1.2949, + "step": 27753 + }, + { + "epoch": 0.8149039873157555, + "grad_norm": 0.0, + "learning_rate": 1.7437393643395228e-06, + "loss": 1.1826, + "step": 27754 + }, + { + "epoch": 0.8149333489928945, + "grad_norm": 0.0, + "learning_rate": 1.7432028530118284e-06, + "loss": 1.1445, + "step": 27755 + }, + { + "epoch": 0.8149627106700335, + "grad_norm": 0.0, + "learning_rate": 1.7426664163524686e-06, + "loss": 1.2314, + "step": 27756 + }, + { + "epoch": 0.8149920723471725, + "grad_norm": 0.0, + "learning_rate": 1.7421300543662877e-06, + "loss": 1.1538, + "step": 27757 + }, + { + "epoch": 0.8150214340243115, + "grad_norm": 0.0, + "learning_rate": 1.7415937670581428e-06, + "loss": 1.2393, + "step": 27758 + }, + { + "epoch": 0.8150507957014504, + "grad_norm": 0.0, + "learning_rate": 1.7410575544328812e-06, + "loss": 1.1875, + "step": 27759 + }, + { + "epoch": 0.8150801573785895, + "grad_norm": 0.0, + "learning_rate": 1.7405214164953477e-06, + "loss": 1.3096, + "step": 27760 + }, + { + "epoch": 0.8151095190557285, + "grad_norm": 0.0, + "learning_rate": 1.7399853532503973e-06, + "loss": 1.2256, + "step": 27761 + }, + { + "epoch": 0.8151388807328674, + "grad_norm": 0.0, + "learning_rate": 1.7394493647028742e-06, + "loss": 1.3027, + "step": 27762 + }, + { + "epoch": 0.8151682424100064, + "grad_norm": 0.0, + "learning_rate": 1.7389134508576266e-06, + "loss": 1.25, + "step": 27763 + }, + { + "epoch": 0.8151976040871455, + "grad_norm": 0.0, + "learning_rate": 1.7383776117194983e-06, + "loss": 1.1777, + "step": 27764 + }, + { + "epoch": 0.8152269657642844, + "grad_norm": 0.0, + "learning_rate": 1.737841847293339e-06, + "loss": 1.166, + "step": 27765 + }, + { + "epoch": 0.8152563274414234, + "grad_norm": 0.0, + "learning_rate": 1.7373061575839888e-06, + "loss": 1.1689, + "step": 27766 + }, + { + "epoch": 0.8152856891185625, + "grad_norm": 0.0, + "learning_rate": 1.7367705425962988e-06, + "loss": 1.1377, + "step": 27767 + }, + { + "epoch": 0.8153150507957014, + "grad_norm": 0.0, + "learning_rate": 1.7362350023351082e-06, + "loss": 1.2666, + "step": 27768 + }, + { + "epoch": 0.8153444124728404, + "grad_norm": 0.0, + "learning_rate": 1.7356995368052587e-06, + "loss": 1.3555, + "step": 27769 + }, + { + "epoch": 0.8153737741499795, + "grad_norm": 0.0, + "learning_rate": 1.7351641460115964e-06, + "loss": 1.29, + "step": 27770 + }, + { + "epoch": 0.8154031358271184, + "grad_norm": 0.0, + "learning_rate": 1.734628829958962e-06, + "loss": 1.1987, + "step": 27771 + }, + { + "epoch": 0.8154324975042574, + "grad_norm": 0.0, + "learning_rate": 1.734093588652197e-06, + "loss": 1.2705, + "step": 27772 + }, + { + "epoch": 0.8154618591813965, + "grad_norm": 0.0, + "learning_rate": 1.7335584220961365e-06, + "loss": 1.2158, + "step": 27773 + }, + { + "epoch": 0.8154912208585354, + "grad_norm": 0.0, + "learning_rate": 1.733023330295628e-06, + "loss": 1.2422, + "step": 27774 + }, + { + "epoch": 0.8155205825356744, + "grad_norm": 0.0, + "learning_rate": 1.7324883132555037e-06, + "loss": 1.166, + "step": 27775 + }, + { + "epoch": 0.8155499442128135, + "grad_norm": 0.0, + "learning_rate": 1.7319533709806068e-06, + "loss": 1.1904, + "step": 27776 + }, + { + "epoch": 0.8155793058899524, + "grad_norm": 0.0, + "learning_rate": 1.7314185034757735e-06, + "loss": 1.1367, + "step": 27777 + }, + { + "epoch": 0.8156086675670914, + "grad_norm": 0.0, + "learning_rate": 1.730883710745841e-06, + "loss": 1.2783, + "step": 27778 + }, + { + "epoch": 0.8156380292442305, + "grad_norm": 0.0, + "learning_rate": 1.7303489927956441e-06, + "loss": 1.1865, + "step": 27779 + }, + { + "epoch": 0.8156673909213694, + "grad_norm": 0.0, + "learning_rate": 1.7298143496300168e-06, + "loss": 1.2578, + "step": 27780 + }, + { + "epoch": 0.8156967525985084, + "grad_norm": 0.0, + "learning_rate": 1.7292797812537987e-06, + "loss": 1.3096, + "step": 27781 + }, + { + "epoch": 0.8157261142756475, + "grad_norm": 0.0, + "learning_rate": 1.7287452876718192e-06, + "loss": 1.2119, + "step": 27782 + }, + { + "epoch": 0.8157554759527864, + "grad_norm": 0.0, + "learning_rate": 1.7282108688889165e-06, + "loss": 1.2344, + "step": 27783 + }, + { + "epoch": 0.8157848376299254, + "grad_norm": 0.0, + "learning_rate": 1.7276765249099202e-06, + "loss": 1.2295, + "step": 27784 + }, + { + "epoch": 0.8158141993070644, + "grad_norm": 0.0, + "learning_rate": 1.7271422557396656e-06, + "loss": 1.208, + "step": 27785 + }, + { + "epoch": 0.8158435609842034, + "grad_norm": 0.0, + "learning_rate": 1.7266080613829817e-06, + "loss": 1.2427, + "step": 27786 + }, + { + "epoch": 0.8158729226613424, + "grad_norm": 0.0, + "learning_rate": 1.7260739418447003e-06, + "loss": 1.25, + "step": 27787 + }, + { + "epoch": 0.8159022843384814, + "grad_norm": 0.0, + "learning_rate": 1.7255398971296511e-06, + "loss": 1.1875, + "step": 27788 + }, + { + "epoch": 0.8159316460156204, + "grad_norm": 0.0, + "learning_rate": 1.7250059272426612e-06, + "loss": 1.3252, + "step": 27789 + }, + { + "epoch": 0.8159610076927594, + "grad_norm": 0.0, + "learning_rate": 1.7244720321885643e-06, + "loss": 1.1655, + "step": 27790 + }, + { + "epoch": 0.8159903693698984, + "grad_norm": 0.0, + "learning_rate": 1.7239382119721837e-06, + "loss": 1.2822, + "step": 27791 + }, + { + "epoch": 0.8160197310470374, + "grad_norm": 0.0, + "learning_rate": 1.723404466598353e-06, + "loss": 1.0996, + "step": 27792 + }, + { + "epoch": 0.8160490927241764, + "grad_norm": 0.0, + "learning_rate": 1.7228707960718915e-06, + "loss": 1.2314, + "step": 27793 + }, + { + "epoch": 0.8160784544013154, + "grad_norm": 0.0, + "learning_rate": 1.7223372003976325e-06, + "loss": 1.2666, + "step": 27794 + }, + { + "epoch": 0.8161078160784544, + "grad_norm": 0.0, + "learning_rate": 1.7218036795803983e-06, + "loss": 1.2168, + "step": 27795 + }, + { + "epoch": 0.8161371777555934, + "grad_norm": 0.0, + "learning_rate": 1.7212702336250132e-06, + "loss": 1.3184, + "step": 27796 + }, + { + "epoch": 0.8161665394327324, + "grad_norm": 0.0, + "learning_rate": 1.7207368625362985e-06, + "loss": 1.2188, + "step": 27797 + }, + { + "epoch": 0.8161959011098714, + "grad_norm": 0.0, + "learning_rate": 1.7202035663190831e-06, + "loss": 1.1191, + "step": 27798 + }, + { + "epoch": 0.8162252627870104, + "grad_norm": 0.0, + "learning_rate": 1.7196703449781882e-06, + "loss": 1.1982, + "step": 27799 + }, + { + "epoch": 0.8162546244641494, + "grad_norm": 0.0, + "learning_rate": 1.7191371985184325e-06, + "loss": 1.2568, + "step": 27800 + }, + { + "epoch": 0.8162839861412884, + "grad_norm": 0.0, + "learning_rate": 1.718604126944642e-06, + "loss": 1.1831, + "step": 27801 + }, + { + "epoch": 0.8163133478184273, + "grad_norm": 0.0, + "learning_rate": 1.718071130261635e-06, + "loss": 1.333, + "step": 27802 + }, + { + "epoch": 0.8163427094955664, + "grad_norm": 0.0, + "learning_rate": 1.7175382084742321e-06, + "loss": 1.2124, + "step": 27803 + }, + { + "epoch": 0.8163720711727054, + "grad_norm": 0.0, + "learning_rate": 1.7170053615872506e-06, + "loss": 1.2031, + "step": 27804 + }, + { + "epoch": 0.8164014328498443, + "grad_norm": 0.0, + "learning_rate": 1.7164725896055123e-06, + "loss": 1.1987, + "step": 27805 + }, + { + "epoch": 0.8164307945269834, + "grad_norm": 0.0, + "learning_rate": 1.7159398925338322e-06, + "loss": 1.1348, + "step": 27806 + }, + { + "epoch": 0.8164601562041224, + "grad_norm": 0.0, + "learning_rate": 1.715407270377031e-06, + "loss": 1.252, + "step": 27807 + }, + { + "epoch": 0.8164895178812613, + "grad_norm": 0.0, + "learning_rate": 1.7148747231399243e-06, + "loss": 1.1299, + "step": 27808 + }, + { + "epoch": 0.8165188795584004, + "grad_norm": 0.0, + "learning_rate": 1.7143422508273244e-06, + "loss": 1.0552, + "step": 27809 + }, + { + "epoch": 0.8165482412355394, + "grad_norm": 0.0, + "learning_rate": 1.7138098534440518e-06, + "loss": 1.2637, + "step": 27810 + }, + { + "epoch": 0.8165776029126783, + "grad_norm": 0.0, + "learning_rate": 1.713277530994919e-06, + "loss": 1.2793, + "step": 27811 + }, + { + "epoch": 0.8166069645898174, + "grad_norm": 0.0, + "learning_rate": 1.7127452834847391e-06, + "loss": 1.207, + "step": 27812 + }, + { + "epoch": 0.8166363262669564, + "grad_norm": 0.0, + "learning_rate": 1.712213110918325e-06, + "loss": 1.3203, + "step": 27813 + }, + { + "epoch": 0.8166656879440953, + "grad_norm": 0.0, + "learning_rate": 1.7116810133004914e-06, + "loss": 1.1807, + "step": 27814 + }, + { + "epoch": 0.8166950496212344, + "grad_norm": 0.0, + "learning_rate": 1.7111489906360468e-06, + "loss": 1.1895, + "step": 27815 + }, + { + "epoch": 0.8167244112983734, + "grad_norm": 0.0, + "learning_rate": 1.7106170429298075e-06, + "loss": 1.2563, + "step": 27816 + }, + { + "epoch": 0.8167537729755123, + "grad_norm": 0.0, + "learning_rate": 1.7100851701865818e-06, + "loss": 1.249, + "step": 27817 + }, + { + "epoch": 0.8167831346526514, + "grad_norm": 0.0, + "learning_rate": 1.7095533724111758e-06, + "loss": 1.0781, + "step": 27818 + }, + { + "epoch": 0.8168124963297904, + "grad_norm": 0.0, + "learning_rate": 1.7090216496084044e-06, + "loss": 1.2329, + "step": 27819 + }, + { + "epoch": 0.8168418580069293, + "grad_norm": 0.0, + "learning_rate": 1.7084900017830742e-06, + "loss": 1.166, + "step": 27820 + }, + { + "epoch": 0.8168712196840684, + "grad_norm": 0.0, + "learning_rate": 1.707958428939991e-06, + "loss": 1.2544, + "step": 27821 + }, + { + "epoch": 0.8169005813612074, + "grad_norm": 0.0, + "learning_rate": 1.707426931083962e-06, + "loss": 1.2178, + "step": 27822 + }, + { + "epoch": 0.8169299430383463, + "grad_norm": 0.0, + "learning_rate": 1.706895508219797e-06, + "loss": 1.2139, + "step": 27823 + }, + { + "epoch": 0.8169593047154854, + "grad_norm": 0.0, + "learning_rate": 1.7063641603522974e-06, + "loss": 1.2568, + "step": 27824 + }, + { + "epoch": 0.8169886663926244, + "grad_norm": 0.0, + "learning_rate": 1.705832887486274e-06, + "loss": 1.0898, + "step": 27825 + }, + { + "epoch": 0.8170180280697633, + "grad_norm": 0.0, + "learning_rate": 1.7053016896265252e-06, + "loss": 1.2173, + "step": 27826 + }, + { + "epoch": 0.8170473897469024, + "grad_norm": 0.0, + "learning_rate": 1.7047705667778614e-06, + "loss": 1.1855, + "step": 27827 + }, + { + "epoch": 0.8170767514240413, + "grad_norm": 0.0, + "learning_rate": 1.70423951894508e-06, + "loss": 1.207, + "step": 27828 + }, + { + "epoch": 0.8171061131011803, + "grad_norm": 0.0, + "learning_rate": 1.7037085461329827e-06, + "loss": 1.189, + "step": 27829 + }, + { + "epoch": 0.8171354747783194, + "grad_norm": 0.0, + "learning_rate": 1.7031776483463768e-06, + "loss": 1.1494, + "step": 27830 + }, + { + "epoch": 0.8171648364554583, + "grad_norm": 0.0, + "learning_rate": 1.7026468255900575e-06, + "loss": 1.21, + "step": 27831 + }, + { + "epoch": 0.8171941981325973, + "grad_norm": 0.0, + "learning_rate": 1.7021160778688294e-06, + "loss": 1.2871, + "step": 27832 + }, + { + "epoch": 0.8172235598097364, + "grad_norm": 0.0, + "learning_rate": 1.701585405187489e-06, + "loss": 1.2451, + "step": 27833 + }, + { + "epoch": 0.8172529214868753, + "grad_norm": 0.0, + "learning_rate": 1.7010548075508403e-06, + "loss": 1.1611, + "step": 27834 + }, + { + "epoch": 0.8172822831640143, + "grad_norm": 0.0, + "learning_rate": 1.700524284963677e-06, + "loss": 1.146, + "step": 27835 + }, + { + "epoch": 0.8173116448411534, + "grad_norm": 0.0, + "learning_rate": 1.6999938374307989e-06, + "loss": 1.2046, + "step": 27836 + }, + { + "epoch": 0.8173410065182923, + "grad_norm": 0.0, + "learning_rate": 1.6994634649570018e-06, + "loss": 1.3223, + "step": 27837 + }, + { + "epoch": 0.8173703681954313, + "grad_norm": 0.0, + "learning_rate": 1.698933167547081e-06, + "loss": 1.3384, + "step": 27838 + }, + { + "epoch": 0.8173997298725704, + "grad_norm": 0.0, + "learning_rate": 1.6984029452058348e-06, + "loss": 1.1333, + "step": 27839 + }, + { + "epoch": 0.8174290915497093, + "grad_norm": 0.0, + "learning_rate": 1.6978727979380549e-06, + "loss": 1.3047, + "step": 27840 + }, + { + "epoch": 0.8174584532268483, + "grad_norm": 0.0, + "learning_rate": 1.6973427257485397e-06, + "loss": 1.1265, + "step": 27841 + }, + { + "epoch": 0.8174878149039874, + "grad_norm": 0.0, + "learning_rate": 1.6968127286420777e-06, + "loss": 1.1475, + "step": 27842 + }, + { + "epoch": 0.8175171765811263, + "grad_norm": 0.0, + "learning_rate": 1.6962828066234682e-06, + "loss": 1.2021, + "step": 27843 + }, + { + "epoch": 0.8175465382582653, + "grad_norm": 0.0, + "learning_rate": 1.6957529596974987e-06, + "loss": 1.2725, + "step": 27844 + }, + { + "epoch": 0.8175758999354044, + "grad_norm": 0.0, + "learning_rate": 1.6952231878689629e-06, + "loss": 1.1162, + "step": 27845 + }, + { + "epoch": 0.8176052616125433, + "grad_norm": 0.0, + "learning_rate": 1.6946934911426482e-06, + "loss": 1.144, + "step": 27846 + }, + { + "epoch": 0.8176346232896823, + "grad_norm": 0.0, + "learning_rate": 1.6941638695233488e-06, + "loss": 1.3164, + "step": 27847 + }, + { + "epoch": 0.8176639849668214, + "grad_norm": 0.0, + "learning_rate": 1.6936343230158536e-06, + "loss": 1.1357, + "step": 27848 + }, + { + "epoch": 0.8176933466439603, + "grad_norm": 0.0, + "learning_rate": 1.6931048516249482e-06, + "loss": 1.3379, + "step": 27849 + }, + { + "epoch": 0.8177227083210993, + "grad_norm": 0.0, + "learning_rate": 1.692575455355424e-06, + "loss": 1.1655, + "step": 27850 + }, + { + "epoch": 0.8177520699982384, + "grad_norm": 0.0, + "learning_rate": 1.6920461342120665e-06, + "loss": 1.2715, + "step": 27851 + }, + { + "epoch": 0.8177814316753773, + "grad_norm": 0.0, + "learning_rate": 1.691516888199668e-06, + "loss": 1.1826, + "step": 27852 + }, + { + "epoch": 0.8178107933525163, + "grad_norm": 0.0, + "learning_rate": 1.690987717323005e-06, + "loss": 1.1978, + "step": 27853 + }, + { + "epoch": 0.8178401550296553, + "grad_norm": 0.0, + "learning_rate": 1.6904586215868713e-06, + "loss": 1.208, + "step": 27854 + }, + { + "epoch": 0.8178695167067943, + "grad_norm": 0.0, + "learning_rate": 1.6899296009960452e-06, + "loss": 1.2305, + "step": 27855 + }, + { + "epoch": 0.8178988783839333, + "grad_norm": 0.0, + "learning_rate": 1.6894006555553167e-06, + "loss": 1.3398, + "step": 27856 + }, + { + "epoch": 0.8179282400610723, + "grad_norm": 0.0, + "learning_rate": 1.6888717852694658e-06, + "loss": 1.2148, + "step": 27857 + }, + { + "epoch": 0.8179576017382113, + "grad_norm": 0.0, + "learning_rate": 1.6883429901432747e-06, + "loss": 1.3271, + "step": 27858 + }, + { + "epoch": 0.8179869634153503, + "grad_norm": 0.0, + "learning_rate": 1.6878142701815281e-06, + "loss": 1.1221, + "step": 27859 + }, + { + "epoch": 0.8180163250924893, + "grad_norm": 0.0, + "learning_rate": 1.6872856253890058e-06, + "loss": 1.2285, + "step": 27860 + }, + { + "epoch": 0.8180456867696283, + "grad_norm": 0.0, + "learning_rate": 1.6867570557704894e-06, + "loss": 1.1748, + "step": 27861 + }, + { + "epoch": 0.8180750484467673, + "grad_norm": 0.0, + "learning_rate": 1.6862285613307539e-06, + "loss": 1.189, + "step": 27862 + }, + { + "epoch": 0.8181044101239062, + "grad_norm": 0.0, + "learning_rate": 1.6857001420745866e-06, + "loss": 1.2412, + "step": 27863 + }, + { + "epoch": 0.8181337718010453, + "grad_norm": 0.0, + "learning_rate": 1.6851717980067595e-06, + "loss": 1.1689, + "step": 27864 + }, + { + "epoch": 0.8181631334781843, + "grad_norm": 0.0, + "learning_rate": 1.6846435291320552e-06, + "loss": 1.3369, + "step": 27865 + }, + { + "epoch": 0.8181924951553232, + "grad_norm": 0.0, + "learning_rate": 1.6841153354552475e-06, + "loss": 1.2236, + "step": 27866 + }, + { + "epoch": 0.8182218568324623, + "grad_norm": 0.0, + "learning_rate": 1.6835872169811162e-06, + "loss": 1.2002, + "step": 27867 + }, + { + "epoch": 0.8182512185096013, + "grad_norm": 0.0, + "learning_rate": 1.6830591737144353e-06, + "loss": 1.2637, + "step": 27868 + }, + { + "epoch": 0.8182805801867402, + "grad_norm": 0.0, + "learning_rate": 1.6825312056599818e-06, + "loss": 1.0786, + "step": 27869 + }, + { + "epoch": 0.8183099418638793, + "grad_norm": 0.0, + "learning_rate": 1.6820033128225278e-06, + "loss": 1.1709, + "step": 27870 + }, + { + "epoch": 0.8183393035410182, + "grad_norm": 0.0, + "learning_rate": 1.6814754952068457e-06, + "loss": 1.2344, + "step": 27871 + }, + { + "epoch": 0.8183686652181572, + "grad_norm": 0.0, + "learning_rate": 1.6809477528177132e-06, + "loss": 1.3789, + "step": 27872 + }, + { + "epoch": 0.8183980268952963, + "grad_norm": 0.0, + "learning_rate": 1.6804200856598985e-06, + "loss": 1.2783, + "step": 27873 + }, + { + "epoch": 0.8184273885724352, + "grad_norm": 0.0, + "learning_rate": 1.6798924937381778e-06, + "loss": 1.2324, + "step": 27874 + }, + { + "epoch": 0.8184567502495742, + "grad_norm": 0.0, + "learning_rate": 1.6793649770573184e-06, + "loss": 1.2339, + "step": 27875 + }, + { + "epoch": 0.8184861119267133, + "grad_norm": 0.0, + "learning_rate": 1.6788375356220943e-06, + "loss": 1.1855, + "step": 27876 + }, + { + "epoch": 0.8185154736038522, + "grad_norm": 0.0, + "learning_rate": 1.6783101694372761e-06, + "loss": 1.2666, + "step": 27877 + }, + { + "epoch": 0.8185448352809912, + "grad_norm": 0.0, + "learning_rate": 1.6777828785076256e-06, + "loss": 1.1328, + "step": 27878 + }, + { + "epoch": 0.8185741969581303, + "grad_norm": 0.0, + "learning_rate": 1.6772556628379177e-06, + "loss": 1.1792, + "step": 27879 + }, + { + "epoch": 0.8186035586352692, + "grad_norm": 0.0, + "learning_rate": 1.6767285224329165e-06, + "loss": 1.2817, + "step": 27880 + }, + { + "epoch": 0.8186329203124082, + "grad_norm": 0.0, + "learning_rate": 1.6762014572973929e-06, + "loss": 1.2266, + "step": 27881 + }, + { + "epoch": 0.8186622819895473, + "grad_norm": 0.0, + "learning_rate": 1.6756744674361092e-06, + "loss": 1.2959, + "step": 27882 + }, + { + "epoch": 0.8186916436666862, + "grad_norm": 0.0, + "learning_rate": 1.6751475528538352e-06, + "loss": 1.1738, + "step": 27883 + }, + { + "epoch": 0.8187210053438252, + "grad_norm": 0.0, + "learning_rate": 1.6746207135553337e-06, + "loss": 1.3281, + "step": 27884 + }, + { + "epoch": 0.8187503670209643, + "grad_norm": 0.0, + "learning_rate": 1.6740939495453702e-06, + "loss": 1.2686, + "step": 27885 + }, + { + "epoch": 0.8187797286981032, + "grad_norm": 0.0, + "learning_rate": 1.6735672608287067e-06, + "loss": 1.2534, + "step": 27886 + }, + { + "epoch": 0.8188090903752422, + "grad_norm": 0.0, + "learning_rate": 1.6730406474101046e-06, + "loss": 1.2041, + "step": 27887 + }, + { + "epoch": 0.8188384520523813, + "grad_norm": 0.0, + "learning_rate": 1.672514109294332e-06, + "loss": 1.1523, + "step": 27888 + }, + { + "epoch": 0.8188678137295202, + "grad_norm": 0.0, + "learning_rate": 1.6719876464861428e-06, + "loss": 1.2998, + "step": 27889 + }, + { + "epoch": 0.8188971754066592, + "grad_norm": 0.0, + "learning_rate": 1.6714612589903057e-06, + "loss": 1.2676, + "step": 27890 + }, + { + "epoch": 0.8189265370837983, + "grad_norm": 0.0, + "learning_rate": 1.6709349468115743e-06, + "loss": 1.2744, + "step": 27891 + }, + { + "epoch": 0.8189558987609372, + "grad_norm": 0.0, + "learning_rate": 1.670408709954714e-06, + "loss": 1.167, + "step": 27892 + }, + { + "epoch": 0.8189852604380762, + "grad_norm": 0.0, + "learning_rate": 1.6698825484244818e-06, + "loss": 1.2861, + "step": 27893 + }, + { + "epoch": 0.8190146221152153, + "grad_norm": 0.0, + "learning_rate": 1.6693564622256342e-06, + "loss": 1.4834, + "step": 27894 + }, + { + "epoch": 0.8190439837923542, + "grad_norm": 0.0, + "learning_rate": 1.6688304513629272e-06, + "loss": 1.1768, + "step": 27895 + }, + { + "epoch": 0.8190733454694932, + "grad_norm": 0.0, + "learning_rate": 1.6683045158411227e-06, + "loss": 1.2085, + "step": 27896 + }, + { + "epoch": 0.8191027071466322, + "grad_norm": 0.0, + "learning_rate": 1.6677786556649733e-06, + "loss": 1.1377, + "step": 27897 + }, + { + "epoch": 0.8191320688237712, + "grad_norm": 0.0, + "learning_rate": 1.6672528708392344e-06, + "loss": 1.2988, + "step": 27898 + }, + { + "epoch": 0.8191614305009102, + "grad_norm": 0.0, + "learning_rate": 1.6667271613686641e-06, + "loss": 1.2236, + "step": 27899 + }, + { + "epoch": 0.8191907921780492, + "grad_norm": 0.0, + "learning_rate": 1.6662015272580113e-06, + "loss": 1.25, + "step": 27900 + }, + { + "epoch": 0.8192201538551882, + "grad_norm": 0.0, + "learning_rate": 1.6656759685120349e-06, + "loss": 1.1494, + "step": 27901 + }, + { + "epoch": 0.8192495155323272, + "grad_norm": 0.0, + "learning_rate": 1.6651504851354848e-06, + "loss": 1.1807, + "step": 27902 + }, + { + "epoch": 0.8192788772094662, + "grad_norm": 0.0, + "learning_rate": 1.6646250771331141e-06, + "loss": 1.2988, + "step": 27903 + }, + { + "epoch": 0.8193082388866052, + "grad_norm": 0.0, + "learning_rate": 1.6640997445096707e-06, + "loss": 1.2588, + "step": 27904 + }, + { + "epoch": 0.8193376005637442, + "grad_norm": 0.0, + "learning_rate": 1.663574487269911e-06, + "loss": 1.2505, + "step": 27905 + }, + { + "epoch": 0.8193669622408832, + "grad_norm": 0.0, + "learning_rate": 1.6630493054185825e-06, + "loss": 1.3242, + "step": 27906 + }, + { + "epoch": 0.8193963239180222, + "grad_norm": 0.0, + "learning_rate": 1.6625241989604313e-06, + "loss": 1.2231, + "step": 27907 + }, + { + "epoch": 0.8194256855951612, + "grad_norm": 0.0, + "learning_rate": 1.6619991679002111e-06, + "loss": 1.2407, + "step": 27908 + }, + { + "epoch": 0.8194550472723002, + "grad_norm": 0.0, + "learning_rate": 1.6614742122426685e-06, + "loss": 1.2715, + "step": 27909 + }, + { + "epoch": 0.8194844089494392, + "grad_norm": 0.0, + "learning_rate": 1.6609493319925496e-06, + "loss": 1.2168, + "step": 27910 + }, + { + "epoch": 0.8195137706265782, + "grad_norm": 0.0, + "learning_rate": 1.6604245271545993e-06, + "loss": 1.1914, + "step": 27911 + }, + { + "epoch": 0.8195431323037172, + "grad_norm": 0.0, + "learning_rate": 1.6598997977335684e-06, + "loss": 1.124, + "step": 27912 + }, + { + "epoch": 0.8195724939808562, + "grad_norm": 0.0, + "learning_rate": 1.6593751437341977e-06, + "loss": 1.2368, + "step": 27913 + }, + { + "epoch": 0.8196018556579951, + "grad_norm": 0.0, + "learning_rate": 1.6588505651612353e-06, + "loss": 1.1543, + "step": 27914 + }, + { + "epoch": 0.8196312173351342, + "grad_norm": 0.0, + "learning_rate": 1.6583260620194207e-06, + "loss": 1.1514, + "step": 27915 + }, + { + "epoch": 0.8196605790122732, + "grad_norm": 0.0, + "learning_rate": 1.6578016343135028e-06, + "loss": 1.2969, + "step": 27916 + }, + { + "epoch": 0.8196899406894121, + "grad_norm": 0.0, + "learning_rate": 1.6572772820482207e-06, + "loss": 1.373, + "step": 27917 + }, + { + "epoch": 0.8197193023665512, + "grad_norm": 0.0, + "learning_rate": 1.6567530052283176e-06, + "loss": 1.2061, + "step": 27918 + }, + { + "epoch": 0.8197486640436902, + "grad_norm": 0.0, + "learning_rate": 1.6562288038585328e-06, + "loss": 1.123, + "step": 27919 + }, + { + "epoch": 0.8197780257208291, + "grad_norm": 0.0, + "learning_rate": 1.655704677943606e-06, + "loss": 1.2563, + "step": 27920 + }, + { + "epoch": 0.8198073873979682, + "grad_norm": 0.0, + "learning_rate": 1.655180627488282e-06, + "loss": 1.2803, + "step": 27921 + }, + { + "epoch": 0.8198367490751072, + "grad_norm": 0.0, + "learning_rate": 1.654656652497293e-06, + "loss": 1.063, + "step": 27922 + }, + { + "epoch": 0.8198661107522461, + "grad_norm": 0.0, + "learning_rate": 1.6541327529753836e-06, + "loss": 1.2715, + "step": 27923 + }, + { + "epoch": 0.8198954724293852, + "grad_norm": 0.0, + "learning_rate": 1.6536089289272871e-06, + "loss": 1.1875, + "step": 27924 + }, + { + "epoch": 0.8199248341065242, + "grad_norm": 0.0, + "learning_rate": 1.6530851803577451e-06, + "loss": 1.2104, + "step": 27925 + }, + { + "epoch": 0.8199541957836631, + "grad_norm": 0.0, + "learning_rate": 1.6525615072714917e-06, + "loss": 1.3184, + "step": 27926 + }, + { + "epoch": 0.8199835574608022, + "grad_norm": 0.0, + "learning_rate": 1.652037909673262e-06, + "loss": 1.2627, + "step": 27927 + }, + { + "epoch": 0.8200129191379412, + "grad_norm": 0.0, + "learning_rate": 1.651514387567792e-06, + "loss": 1.1797, + "step": 27928 + }, + { + "epoch": 0.8200422808150801, + "grad_norm": 0.0, + "learning_rate": 1.6509909409598123e-06, + "loss": 1.1934, + "step": 27929 + }, + { + "epoch": 0.8200716424922192, + "grad_norm": 0.0, + "learning_rate": 1.6504675698540628e-06, + "loss": 1.2979, + "step": 27930 + }, + { + "epoch": 0.8201010041693582, + "grad_norm": 0.0, + "learning_rate": 1.6499442742552707e-06, + "loss": 1.2646, + "step": 27931 + }, + { + "epoch": 0.8201303658464971, + "grad_norm": 0.0, + "learning_rate": 1.6494210541681733e-06, + "loss": 1.2305, + "step": 27932 + }, + { + "epoch": 0.8201597275236362, + "grad_norm": 0.0, + "learning_rate": 1.6488979095975e-06, + "loss": 1.2524, + "step": 27933 + }, + { + "epoch": 0.8201890892007752, + "grad_norm": 0.0, + "learning_rate": 1.6483748405479816e-06, + "loss": 1.1714, + "step": 27934 + }, + { + "epoch": 0.8202184508779141, + "grad_norm": 0.0, + "learning_rate": 1.6478518470243465e-06, + "loss": 1.2285, + "step": 27935 + }, + { + "epoch": 0.8202478125550532, + "grad_norm": 0.0, + "learning_rate": 1.6473289290313276e-06, + "loss": 1.2412, + "step": 27936 + }, + { + "epoch": 0.8202771742321922, + "grad_norm": 0.0, + "learning_rate": 1.6468060865736534e-06, + "loss": 1.1694, + "step": 27937 + }, + { + "epoch": 0.8203065359093311, + "grad_norm": 0.0, + "learning_rate": 1.646283319656048e-06, + "loss": 1.248, + "step": 27938 + }, + { + "epoch": 0.8203358975864702, + "grad_norm": 0.0, + "learning_rate": 1.645760628283245e-06, + "loss": 1.3809, + "step": 27939 + }, + { + "epoch": 0.8203652592636091, + "grad_norm": 0.0, + "learning_rate": 1.6452380124599653e-06, + "loss": 1.2559, + "step": 27940 + }, + { + "epoch": 0.8203946209407481, + "grad_norm": 0.0, + "learning_rate": 1.6447154721909398e-06, + "loss": 1.252, + "step": 27941 + }, + { + "epoch": 0.8204239826178872, + "grad_norm": 0.0, + "learning_rate": 1.6441930074808932e-06, + "loss": 1.2334, + "step": 27942 + }, + { + "epoch": 0.8204533442950261, + "grad_norm": 0.0, + "learning_rate": 1.6436706183345485e-06, + "loss": 1.1265, + "step": 27943 + }, + { + "epoch": 0.8204827059721651, + "grad_norm": 0.0, + "learning_rate": 1.6431483047566277e-06, + "loss": 1.2021, + "step": 27944 + }, + { + "epoch": 0.8205120676493042, + "grad_norm": 0.0, + "learning_rate": 1.6426260667518602e-06, + "loss": 1.1812, + "step": 27945 + }, + { + "epoch": 0.8205414293264431, + "grad_norm": 0.0, + "learning_rate": 1.6421039043249653e-06, + "loss": 1.2422, + "step": 27946 + }, + { + "epoch": 0.8205707910035821, + "grad_norm": 0.0, + "learning_rate": 1.6415818174806631e-06, + "loss": 1.2871, + "step": 27947 + }, + { + "epoch": 0.8206001526807212, + "grad_norm": 0.0, + "learning_rate": 1.6410598062236793e-06, + "loss": 1.2207, + "step": 27948 + }, + { + "epoch": 0.8206295143578601, + "grad_norm": 0.0, + "learning_rate": 1.6405378705587293e-06, + "loss": 1.2959, + "step": 27949 + }, + { + "epoch": 0.8206588760349991, + "grad_norm": 0.0, + "learning_rate": 1.6400160104905395e-06, + "loss": 1.1987, + "step": 27950 + }, + { + "epoch": 0.8206882377121382, + "grad_norm": 0.0, + "learning_rate": 1.6394942260238255e-06, + "loss": 1.1553, + "step": 27951 + }, + { + "epoch": 0.8207175993892771, + "grad_norm": 0.0, + "learning_rate": 1.6389725171633076e-06, + "loss": 1.1304, + "step": 27952 + }, + { + "epoch": 0.8207469610664161, + "grad_norm": 0.0, + "learning_rate": 1.6384508839136981e-06, + "loss": 1.2637, + "step": 27953 + }, + { + "epoch": 0.8207763227435552, + "grad_norm": 0.0, + "learning_rate": 1.6379293262797224e-06, + "loss": 1.2607, + "step": 27954 + }, + { + "epoch": 0.8208056844206941, + "grad_norm": 0.0, + "learning_rate": 1.6374078442660933e-06, + "loss": 1.2422, + "step": 27955 + }, + { + "epoch": 0.8208350460978331, + "grad_norm": 0.0, + "learning_rate": 1.636886437877524e-06, + "loss": 1.1494, + "step": 27956 + }, + { + "epoch": 0.8208644077749722, + "grad_norm": 0.0, + "learning_rate": 1.6363651071187348e-06, + "loss": 1.1011, + "step": 27957 + }, + { + "epoch": 0.8208937694521111, + "grad_norm": 0.0, + "learning_rate": 1.6358438519944387e-06, + "loss": 1.1846, + "step": 27958 + }, + { + "epoch": 0.8209231311292501, + "grad_norm": 0.0, + "learning_rate": 1.6353226725093486e-06, + "loss": 1.2539, + "step": 27959 + }, + { + "epoch": 0.8209524928063892, + "grad_norm": 0.0, + "learning_rate": 1.6348015686681739e-06, + "loss": 1.2461, + "step": 27960 + }, + { + "epoch": 0.8209818544835281, + "grad_norm": 0.0, + "learning_rate": 1.6342805404756345e-06, + "loss": 1.3047, + "step": 27961 + }, + { + "epoch": 0.8210112161606671, + "grad_norm": 0.0, + "learning_rate": 1.6337595879364366e-06, + "loss": 1.1812, + "step": 27962 + }, + { + "epoch": 0.821040577837806, + "grad_norm": 0.0, + "learning_rate": 1.633238711055295e-06, + "loss": 1.252, + "step": 27963 + }, + { + "epoch": 0.8210699395149451, + "grad_norm": 0.0, + "learning_rate": 1.6327179098369172e-06, + "loss": 1.2188, + "step": 27964 + }, + { + "epoch": 0.8210993011920841, + "grad_norm": 0.0, + "learning_rate": 1.632197184286015e-06, + "loss": 1.2246, + "step": 27965 + }, + { + "epoch": 0.821128662869223, + "grad_norm": 0.0, + "learning_rate": 1.6316765344072983e-06, + "loss": 1.3008, + "step": 27966 + }, + { + "epoch": 0.8211580245463621, + "grad_norm": 0.0, + "learning_rate": 1.6311559602054727e-06, + "loss": 1.229, + "step": 27967 + }, + { + "epoch": 0.8211873862235011, + "grad_norm": 0.0, + "learning_rate": 1.6306354616852483e-06, + "loss": 1.2007, + "step": 27968 + }, + { + "epoch": 0.82121674790064, + "grad_norm": 0.0, + "learning_rate": 1.6301150388513276e-06, + "loss": 1.2188, + "step": 27969 + }, + { + "epoch": 0.8212461095777791, + "grad_norm": 0.0, + "learning_rate": 1.6295946917084227e-06, + "loss": 1.2163, + "step": 27970 + }, + { + "epoch": 0.8212754712549181, + "grad_norm": 0.0, + "learning_rate": 1.629074420261234e-06, + "loss": 1.0508, + "step": 27971 + }, + { + "epoch": 0.821304832932057, + "grad_norm": 0.0, + "learning_rate": 1.6285542245144714e-06, + "loss": 1.2402, + "step": 27972 + }, + { + "epoch": 0.8213341946091961, + "grad_norm": 0.0, + "learning_rate": 1.6280341044728353e-06, + "loss": 1.2544, + "step": 27973 + }, + { + "epoch": 0.8213635562863351, + "grad_norm": 0.0, + "learning_rate": 1.627514060141032e-06, + "loss": 1.168, + "step": 27974 + }, + { + "epoch": 0.821392917963474, + "grad_norm": 0.0, + "learning_rate": 1.6269940915237648e-06, + "loss": 1.2666, + "step": 27975 + }, + { + "epoch": 0.8214222796406131, + "grad_norm": 0.0, + "learning_rate": 1.6264741986257337e-06, + "loss": 1.2236, + "step": 27976 + }, + { + "epoch": 0.8214516413177521, + "grad_norm": 0.0, + "learning_rate": 1.625954381451641e-06, + "loss": 1.0259, + "step": 27977 + }, + { + "epoch": 0.821481002994891, + "grad_norm": 0.0, + "learning_rate": 1.6254346400061848e-06, + "loss": 1.2773, + "step": 27978 + }, + { + "epoch": 0.8215103646720301, + "grad_norm": 0.0, + "learning_rate": 1.6249149742940706e-06, + "loss": 1.1797, + "step": 27979 + }, + { + "epoch": 0.821539726349169, + "grad_norm": 0.0, + "learning_rate": 1.6243953843199922e-06, + "loss": 1.501, + "step": 27980 + }, + { + "epoch": 0.821569088026308, + "grad_norm": 0.0, + "learning_rate": 1.6238758700886549e-06, + "loss": 1.3135, + "step": 27981 + }, + { + "epoch": 0.8215984497034471, + "grad_norm": 0.0, + "learning_rate": 1.6233564316047523e-06, + "loss": 1.2012, + "step": 27982 + }, + { + "epoch": 0.821627811380586, + "grad_norm": 0.0, + "learning_rate": 1.6228370688729832e-06, + "loss": 1.0991, + "step": 27983 + }, + { + "epoch": 0.821657173057725, + "grad_norm": 0.0, + "learning_rate": 1.6223177818980408e-06, + "loss": 1.1963, + "step": 27984 + }, + { + "epoch": 0.8216865347348641, + "grad_norm": 0.0, + "learning_rate": 1.6217985706846273e-06, + "loss": 1.2246, + "step": 27985 + }, + { + "epoch": 0.821715896412003, + "grad_norm": 0.0, + "learning_rate": 1.621279435237435e-06, + "loss": 1.3008, + "step": 27986 + }, + { + "epoch": 0.821745258089142, + "grad_norm": 0.0, + "learning_rate": 1.6207603755611567e-06, + "loss": 1.0713, + "step": 27987 + }, + { + "epoch": 0.8217746197662811, + "grad_norm": 0.0, + "learning_rate": 1.6202413916604897e-06, + "loss": 1.3062, + "step": 27988 + }, + { + "epoch": 0.82180398144342, + "grad_norm": 0.0, + "learning_rate": 1.6197224835401237e-06, + "loss": 1.1597, + "step": 27989 + }, + { + "epoch": 0.821833343120559, + "grad_norm": 0.0, + "learning_rate": 1.619203651204756e-06, + "loss": 1.0771, + "step": 27990 + }, + { + "epoch": 0.8218627047976981, + "grad_norm": 0.0, + "learning_rate": 1.618684894659076e-06, + "loss": 1.4736, + "step": 27991 + }, + { + "epoch": 0.821892066474837, + "grad_norm": 0.0, + "learning_rate": 1.6181662139077758e-06, + "loss": 1.2441, + "step": 27992 + }, + { + "epoch": 0.821921428151976, + "grad_norm": 0.0, + "learning_rate": 1.6176476089555416e-06, + "loss": 1.0884, + "step": 27993 + }, + { + "epoch": 0.8219507898291151, + "grad_norm": 0.0, + "learning_rate": 1.6171290798070704e-06, + "loss": 1.2407, + "step": 27994 + }, + { + "epoch": 0.821980151506254, + "grad_norm": 0.0, + "learning_rate": 1.6166106264670467e-06, + "loss": 1.1924, + "step": 27995 + }, + { + "epoch": 0.822009513183393, + "grad_norm": 0.0, + "learning_rate": 1.6160922489401586e-06, + "loss": 1.23, + "step": 27996 + }, + { + "epoch": 0.8220388748605321, + "grad_norm": 0.0, + "learning_rate": 1.6155739472310971e-06, + "loss": 1.0938, + "step": 27997 + }, + { + "epoch": 0.822068236537671, + "grad_norm": 0.0, + "learning_rate": 1.615055721344545e-06, + "loss": 1.2041, + "step": 27998 + }, + { + "epoch": 0.82209759821481, + "grad_norm": 0.0, + "learning_rate": 1.6145375712851951e-06, + "loss": 1.2402, + "step": 27999 + }, + { + "epoch": 0.8221269598919491, + "grad_norm": 0.0, + "learning_rate": 1.6140194970577283e-06, + "loss": 1.124, + "step": 28000 + }, + { + "epoch": 0.822156321569088, + "grad_norm": 0.0, + "learning_rate": 1.6135014986668318e-06, + "loss": 1.313, + "step": 28001 + }, + { + "epoch": 0.822185683246227, + "grad_norm": 0.0, + "learning_rate": 1.6129835761171864e-06, + "loss": 1.3174, + "step": 28002 + }, + { + "epoch": 0.8222150449233661, + "grad_norm": 0.0, + "learning_rate": 1.6124657294134805e-06, + "loss": 1.252, + "step": 28003 + }, + { + "epoch": 0.822244406600505, + "grad_norm": 0.0, + "learning_rate": 1.6119479585603926e-06, + "loss": 1.2881, + "step": 28004 + }, + { + "epoch": 0.822273768277644, + "grad_norm": 0.0, + "learning_rate": 1.61143026356261e-06, + "loss": 1.2412, + "step": 28005 + }, + { + "epoch": 0.822303129954783, + "grad_norm": 0.0, + "learning_rate": 1.6109126444248125e-06, + "loss": 1.3018, + "step": 28006 + }, + { + "epoch": 0.822332491631922, + "grad_norm": 0.0, + "learning_rate": 1.6103951011516794e-06, + "loss": 1.1704, + "step": 28007 + }, + { + "epoch": 0.822361853309061, + "grad_norm": 0.0, + "learning_rate": 1.6098776337478927e-06, + "loss": 1.2744, + "step": 28008 + }, + { + "epoch": 0.8223912149862, + "grad_norm": 0.0, + "learning_rate": 1.6093602422181298e-06, + "loss": 1.2012, + "step": 28009 + }, + { + "epoch": 0.822420576663339, + "grad_norm": 0.0, + "learning_rate": 1.6088429265670725e-06, + "loss": 1.1777, + "step": 28010 + }, + { + "epoch": 0.822449938340478, + "grad_norm": 0.0, + "learning_rate": 1.6083256867993947e-06, + "loss": 1.0713, + "step": 28011 + }, + { + "epoch": 0.822479300017617, + "grad_norm": 0.0, + "learning_rate": 1.6078085229197805e-06, + "loss": 1.0449, + "step": 28012 + }, + { + "epoch": 0.822508661694756, + "grad_norm": 0.0, + "learning_rate": 1.6072914349328993e-06, + "loss": 1.21, + "step": 28013 + }, + { + "epoch": 0.822538023371895, + "grad_norm": 0.0, + "learning_rate": 1.6067744228434345e-06, + "loss": 1.1934, + "step": 28014 + }, + { + "epoch": 0.822567385049034, + "grad_norm": 0.0, + "learning_rate": 1.6062574866560576e-06, + "loss": 1.251, + "step": 28015 + }, + { + "epoch": 0.822596746726173, + "grad_norm": 0.0, + "learning_rate": 1.6057406263754439e-06, + "loss": 1.3418, + "step": 28016 + }, + { + "epoch": 0.822626108403312, + "grad_norm": 0.0, + "learning_rate": 1.6052238420062682e-06, + "loss": 1.2783, + "step": 28017 + }, + { + "epoch": 0.822655470080451, + "grad_norm": 0.0, + "learning_rate": 1.6047071335532006e-06, + "loss": 1.3242, + "step": 28018 + }, + { + "epoch": 0.82268483175759, + "grad_norm": 0.0, + "learning_rate": 1.604190501020918e-06, + "loss": 1.1001, + "step": 28019 + }, + { + "epoch": 0.822714193434729, + "grad_norm": 0.0, + "learning_rate": 1.6036739444140891e-06, + "loss": 1.2803, + "step": 28020 + }, + { + "epoch": 0.822743555111868, + "grad_norm": 0.0, + "learning_rate": 1.6031574637373893e-06, + "loss": 1.2344, + "step": 28021 + }, + { + "epoch": 0.822772916789007, + "grad_norm": 0.0, + "learning_rate": 1.6026410589954843e-06, + "loss": 1.2246, + "step": 28022 + }, + { + "epoch": 0.822802278466146, + "grad_norm": 0.0, + "learning_rate": 1.6021247301930498e-06, + "loss": 1.1729, + "step": 28023 + }, + { + "epoch": 0.822831640143285, + "grad_norm": 0.0, + "learning_rate": 1.601608477334753e-06, + "loss": 1.1216, + "step": 28024 + }, + { + "epoch": 0.822861001820424, + "grad_norm": 0.0, + "learning_rate": 1.60109230042526e-06, + "loss": 1.146, + "step": 28025 + }, + { + "epoch": 0.822890363497563, + "grad_norm": 0.0, + "learning_rate": 1.6005761994692416e-06, + "loss": 1.332, + "step": 28026 + }, + { + "epoch": 0.822919725174702, + "grad_norm": 0.0, + "learning_rate": 1.6000601744713595e-06, + "loss": 1.1445, + "step": 28027 + }, + { + "epoch": 0.822949086851841, + "grad_norm": 0.0, + "learning_rate": 1.5995442254362882e-06, + "loss": 1.1543, + "step": 28028 + }, + { + "epoch": 0.82297844852898, + "grad_norm": 0.0, + "learning_rate": 1.5990283523686878e-06, + "loss": 1.1865, + "step": 28029 + }, + { + "epoch": 0.823007810206119, + "grad_norm": 0.0, + "learning_rate": 1.5985125552732271e-06, + "loss": 1.25, + "step": 28030 + }, + { + "epoch": 0.823037171883258, + "grad_norm": 0.0, + "learning_rate": 1.5979968341545671e-06, + "loss": 1.209, + "step": 28031 + }, + { + "epoch": 0.8230665335603969, + "grad_norm": 0.0, + "learning_rate": 1.597481189017378e-06, + "loss": 1.2637, + "step": 28032 + }, + { + "epoch": 0.823095895237536, + "grad_norm": 0.0, + "learning_rate": 1.5969656198663142e-06, + "loss": 1.2725, + "step": 28033 + }, + { + "epoch": 0.823125256914675, + "grad_norm": 0.0, + "learning_rate": 1.596450126706044e-06, + "loss": 1.1885, + "step": 28034 + }, + { + "epoch": 0.8231546185918139, + "grad_norm": 0.0, + "learning_rate": 1.595934709541228e-06, + "loss": 1.2354, + "step": 28035 + }, + { + "epoch": 0.823183980268953, + "grad_norm": 0.0, + "learning_rate": 1.595419368376524e-06, + "loss": 1.2061, + "step": 28036 + }, + { + "epoch": 0.823213341946092, + "grad_norm": 0.0, + "learning_rate": 1.5949041032165979e-06, + "loss": 1.208, + "step": 28037 + }, + { + "epoch": 0.8232427036232309, + "grad_norm": 0.0, + "learning_rate": 1.5943889140661039e-06, + "loss": 1.2261, + "step": 28038 + }, + { + "epoch": 0.82327206530037, + "grad_norm": 0.0, + "learning_rate": 1.593873800929706e-06, + "loss": 1.3799, + "step": 28039 + }, + { + "epoch": 0.823301426977509, + "grad_norm": 0.0, + "learning_rate": 1.5933587638120607e-06, + "loss": 1.3076, + "step": 28040 + }, + { + "epoch": 0.8233307886546479, + "grad_norm": 0.0, + "learning_rate": 1.5928438027178239e-06, + "loss": 1.29, + "step": 28041 + }, + { + "epoch": 0.823360150331787, + "grad_norm": 0.0, + "learning_rate": 1.592328917651652e-06, + "loss": 1.229, + "step": 28042 + }, + { + "epoch": 0.823389512008926, + "grad_norm": 0.0, + "learning_rate": 1.591814108618206e-06, + "loss": 1.0791, + "step": 28043 + }, + { + "epoch": 0.8234188736860649, + "grad_norm": 0.0, + "learning_rate": 1.5912993756221373e-06, + "loss": 1.2144, + "step": 28044 + }, + { + "epoch": 0.823448235363204, + "grad_norm": 0.0, + "learning_rate": 1.5907847186681003e-06, + "loss": 1.2793, + "step": 28045 + }, + { + "epoch": 0.823477597040343, + "grad_norm": 0.0, + "learning_rate": 1.590270137760752e-06, + "loss": 1.1792, + "step": 28046 + }, + { + "epoch": 0.8235069587174819, + "grad_norm": 0.0, + "learning_rate": 1.5897556329047436e-06, + "loss": 1.2119, + "step": 28047 + }, + { + "epoch": 0.823536320394621, + "grad_norm": 0.0, + "learning_rate": 1.58924120410473e-06, + "loss": 1.1602, + "step": 28048 + }, + { + "epoch": 0.82356568207176, + "grad_norm": 0.0, + "learning_rate": 1.588726851365363e-06, + "loss": 1.3223, + "step": 28049 + }, + { + "epoch": 0.8235950437488989, + "grad_norm": 0.0, + "learning_rate": 1.5882125746912936e-06, + "loss": 1.21, + "step": 28050 + }, + { + "epoch": 0.823624405426038, + "grad_norm": 0.0, + "learning_rate": 1.5876983740871687e-06, + "loss": 1.1514, + "step": 28051 + }, + { + "epoch": 0.823653767103177, + "grad_norm": 0.0, + "learning_rate": 1.5871842495576452e-06, + "loss": 1.0747, + "step": 28052 + }, + { + "epoch": 0.8236831287803159, + "grad_norm": 0.0, + "learning_rate": 1.5866702011073665e-06, + "loss": 1.1323, + "step": 28053 + }, + { + "epoch": 0.823712490457455, + "grad_norm": 0.0, + "learning_rate": 1.5861562287409859e-06, + "loss": 1.3037, + "step": 28054 + }, + { + "epoch": 0.8237418521345939, + "grad_norm": 0.0, + "learning_rate": 1.5856423324631487e-06, + "loss": 1.2158, + "step": 28055 + }, + { + "epoch": 0.8237712138117329, + "grad_norm": 0.0, + "learning_rate": 1.585128512278502e-06, + "loss": 1.1924, + "step": 28056 + }, + { + "epoch": 0.823800575488872, + "grad_norm": 0.0, + "learning_rate": 1.5846147681916967e-06, + "loss": 1.061, + "step": 28057 + }, + { + "epoch": 0.8238299371660109, + "grad_norm": 0.0, + "learning_rate": 1.5841011002073714e-06, + "loss": 1.1943, + "step": 28058 + }, + { + "epoch": 0.8238592988431499, + "grad_norm": 0.0, + "learning_rate": 1.5835875083301766e-06, + "loss": 1.2812, + "step": 28059 + }, + { + "epoch": 0.823888660520289, + "grad_norm": 0.0, + "learning_rate": 1.5830739925647542e-06, + "loss": 1.2637, + "step": 28060 + }, + { + "epoch": 0.8239180221974279, + "grad_norm": 0.0, + "learning_rate": 1.5825605529157506e-06, + "loss": 1.3193, + "step": 28061 + }, + { + "epoch": 0.8239473838745669, + "grad_norm": 0.0, + "learning_rate": 1.5820471893878064e-06, + "loss": 1.2246, + "step": 28062 + }, + { + "epoch": 0.8239767455517059, + "grad_norm": 0.0, + "learning_rate": 1.581533901985567e-06, + "loss": 1.2988, + "step": 28063 + }, + { + "epoch": 0.8240061072288449, + "grad_norm": 0.0, + "learning_rate": 1.5810206907136728e-06, + "loss": 1.2275, + "step": 28064 + }, + { + "epoch": 0.8240354689059839, + "grad_norm": 0.0, + "learning_rate": 1.5805075555767646e-06, + "loss": 1.1826, + "step": 28065 + }, + { + "epoch": 0.8240648305831229, + "grad_norm": 0.0, + "learning_rate": 1.5799944965794822e-06, + "loss": 1.334, + "step": 28066 + }, + { + "epoch": 0.8240941922602619, + "grad_norm": 0.0, + "learning_rate": 1.579481513726464e-06, + "loss": 1.1807, + "step": 28067 + }, + { + "epoch": 0.8241235539374009, + "grad_norm": 0.0, + "learning_rate": 1.5789686070223542e-06, + "loss": 1.25, + "step": 28068 + }, + { + "epoch": 0.8241529156145398, + "grad_norm": 0.0, + "learning_rate": 1.5784557764717845e-06, + "loss": 1.1558, + "step": 28069 + }, + { + "epoch": 0.8241822772916789, + "grad_norm": 0.0, + "learning_rate": 1.577943022079399e-06, + "loss": 1.2314, + "step": 28070 + }, + { + "epoch": 0.8242116389688179, + "grad_norm": 0.0, + "learning_rate": 1.5774303438498284e-06, + "loss": 1.1108, + "step": 28071 + }, + { + "epoch": 0.8242410006459568, + "grad_norm": 0.0, + "learning_rate": 1.5769177417877156e-06, + "loss": 1.0996, + "step": 28072 + }, + { + "epoch": 0.8242703623230959, + "grad_norm": 0.0, + "learning_rate": 1.5764052158976927e-06, + "loss": 1.2383, + "step": 28073 + }, + { + "epoch": 0.8242997240002349, + "grad_norm": 0.0, + "learning_rate": 1.5758927661843949e-06, + "loss": 1.1318, + "step": 28074 + }, + { + "epoch": 0.8243290856773738, + "grad_norm": 0.0, + "learning_rate": 1.5753803926524557e-06, + "loss": 1.1846, + "step": 28075 + }, + { + "epoch": 0.8243584473545129, + "grad_norm": 0.0, + "learning_rate": 1.5748680953065064e-06, + "loss": 1.104, + "step": 28076 + }, + { + "epoch": 0.8243878090316519, + "grad_norm": 0.0, + "learning_rate": 1.5743558741511856e-06, + "loss": 1.1836, + "step": 28077 + }, + { + "epoch": 0.8244171707087908, + "grad_norm": 0.0, + "learning_rate": 1.57384372919112e-06, + "loss": 1.2168, + "step": 28078 + }, + { + "epoch": 0.8244465323859299, + "grad_norm": 0.0, + "learning_rate": 1.5733316604309468e-06, + "loss": 1.2427, + "step": 28079 + }, + { + "epoch": 0.8244758940630689, + "grad_norm": 0.0, + "learning_rate": 1.5728196678752905e-06, + "loss": 1.2153, + "step": 28080 + }, + { + "epoch": 0.8245052557402078, + "grad_norm": 0.0, + "learning_rate": 1.5723077515287855e-06, + "loss": 1.2188, + "step": 28081 + }, + { + "epoch": 0.8245346174173469, + "grad_norm": 0.0, + "learning_rate": 1.5717959113960612e-06, + "loss": 1.3604, + "step": 28082 + }, + { + "epoch": 0.8245639790944859, + "grad_norm": 0.0, + "learning_rate": 1.5712841474817441e-06, + "loss": 1.4092, + "step": 28083 + }, + { + "epoch": 0.8245933407716248, + "grad_norm": 0.0, + "learning_rate": 1.5707724597904639e-06, + "loss": 1.2441, + "step": 28084 + }, + { + "epoch": 0.8246227024487639, + "grad_norm": 0.0, + "learning_rate": 1.5702608483268445e-06, + "loss": 1.1118, + "step": 28085 + }, + { + "epoch": 0.8246520641259029, + "grad_norm": 0.0, + "learning_rate": 1.5697493130955176e-06, + "loss": 1.2158, + "step": 28086 + }, + { + "epoch": 0.8246814258030418, + "grad_norm": 0.0, + "learning_rate": 1.5692378541011034e-06, + "loss": 1.2988, + "step": 28087 + }, + { + "epoch": 0.8247107874801809, + "grad_norm": 0.0, + "learning_rate": 1.5687264713482342e-06, + "loss": 1.2178, + "step": 28088 + }, + { + "epoch": 0.8247401491573199, + "grad_norm": 0.0, + "learning_rate": 1.56821516484153e-06, + "loss": 1.2461, + "step": 28089 + }, + { + "epoch": 0.8247695108344588, + "grad_norm": 0.0, + "learning_rate": 1.5677039345856148e-06, + "loss": 1.2969, + "step": 28090 + }, + { + "epoch": 0.8247988725115979, + "grad_norm": 0.0, + "learning_rate": 1.5671927805851118e-06, + "loss": 1.2007, + "step": 28091 + }, + { + "epoch": 0.8248282341887369, + "grad_norm": 0.0, + "learning_rate": 1.5666817028446446e-06, + "loss": 1.2422, + "step": 28092 + }, + { + "epoch": 0.8248575958658758, + "grad_norm": 0.0, + "learning_rate": 1.5661707013688355e-06, + "loss": 1.1299, + "step": 28093 + }, + { + "epoch": 0.8248869575430149, + "grad_norm": 0.0, + "learning_rate": 1.5656597761623026e-06, + "loss": 1.1802, + "step": 28094 + }, + { + "epoch": 0.8249163192201538, + "grad_norm": 0.0, + "learning_rate": 1.5651489272296704e-06, + "loss": 1.2168, + "step": 28095 + }, + { + "epoch": 0.8249456808972928, + "grad_norm": 0.0, + "learning_rate": 1.5646381545755552e-06, + "loss": 1.2427, + "step": 28096 + }, + { + "epoch": 0.8249750425744319, + "grad_norm": 0.0, + "learning_rate": 1.5641274582045784e-06, + "loss": 1.3135, + "step": 28097 + }, + { + "epoch": 0.8250044042515708, + "grad_norm": 0.0, + "learning_rate": 1.5636168381213578e-06, + "loss": 1.1328, + "step": 28098 + }, + { + "epoch": 0.8250337659287098, + "grad_norm": 0.0, + "learning_rate": 1.563106294330512e-06, + "loss": 1.2412, + "step": 28099 + }, + { + "epoch": 0.8250631276058489, + "grad_norm": 0.0, + "learning_rate": 1.5625958268366525e-06, + "loss": 1.1211, + "step": 28100 + }, + { + "epoch": 0.8250924892829878, + "grad_norm": 0.0, + "learning_rate": 1.5620854356444037e-06, + "loss": 1.042, + "step": 28101 + }, + { + "epoch": 0.8251218509601268, + "grad_norm": 0.0, + "learning_rate": 1.561575120758374e-06, + "loss": 1.1816, + "step": 28102 + }, + { + "epoch": 0.8251512126372659, + "grad_norm": 0.0, + "learning_rate": 1.5610648821831843e-06, + "loss": 1.0996, + "step": 28103 + }, + { + "epoch": 0.8251805743144048, + "grad_norm": 0.0, + "learning_rate": 1.560554719923446e-06, + "loss": 1.0317, + "step": 28104 + }, + { + "epoch": 0.8252099359915438, + "grad_norm": 0.0, + "learning_rate": 1.5600446339837704e-06, + "loss": 1.2158, + "step": 28105 + }, + { + "epoch": 0.8252392976686829, + "grad_norm": 0.0, + "learning_rate": 1.5595346243687747e-06, + "loss": 1.1064, + "step": 28106 + }, + { + "epoch": 0.8252686593458218, + "grad_norm": 0.0, + "learning_rate": 1.5590246910830697e-06, + "loss": 1.3389, + "step": 28107 + }, + { + "epoch": 0.8252980210229608, + "grad_norm": 0.0, + "learning_rate": 1.5585148341312662e-06, + "loss": 1.229, + "step": 28108 + }, + { + "epoch": 0.8253273827000999, + "grad_norm": 0.0, + "learning_rate": 1.5580050535179725e-06, + "loss": 1.2026, + "step": 28109 + }, + { + "epoch": 0.8253567443772388, + "grad_norm": 0.0, + "learning_rate": 1.557495349247803e-06, + "loss": 1.3086, + "step": 28110 + }, + { + "epoch": 0.8253861060543778, + "grad_norm": 0.0, + "learning_rate": 1.5569857213253626e-06, + "loss": 1.248, + "step": 28111 + }, + { + "epoch": 0.8254154677315169, + "grad_norm": 0.0, + "learning_rate": 1.5564761697552655e-06, + "loss": 1.2051, + "step": 28112 + }, + { + "epoch": 0.8254448294086558, + "grad_norm": 0.0, + "learning_rate": 1.555966694542117e-06, + "loss": 1.1377, + "step": 28113 + }, + { + "epoch": 0.8254741910857948, + "grad_norm": 0.0, + "learning_rate": 1.5554572956905245e-06, + "loss": 1.2778, + "step": 28114 + }, + { + "epoch": 0.8255035527629339, + "grad_norm": 0.0, + "learning_rate": 1.5549479732050931e-06, + "loss": 1.0679, + "step": 28115 + }, + { + "epoch": 0.8255329144400728, + "grad_norm": 0.0, + "learning_rate": 1.5544387270904283e-06, + "loss": 1.2588, + "step": 28116 + }, + { + "epoch": 0.8255622761172118, + "grad_norm": 0.0, + "learning_rate": 1.5539295573511403e-06, + "loss": 1.1445, + "step": 28117 + }, + { + "epoch": 0.8255916377943509, + "grad_norm": 0.0, + "learning_rate": 1.5534204639918271e-06, + "loss": 1.1631, + "step": 28118 + }, + { + "epoch": 0.8256209994714898, + "grad_norm": 0.0, + "learning_rate": 1.5529114470170981e-06, + "loss": 1.2588, + "step": 28119 + }, + { + "epoch": 0.8256503611486288, + "grad_norm": 0.0, + "learning_rate": 1.5524025064315518e-06, + "loss": 1.0117, + "step": 28120 + }, + { + "epoch": 0.8256797228257678, + "grad_norm": 0.0, + "learning_rate": 1.5518936422397956e-06, + "loss": 1.1572, + "step": 28121 + }, + { + "epoch": 0.8257090845029068, + "grad_norm": 0.0, + "learning_rate": 1.5513848544464293e-06, + "loss": 1.2598, + "step": 28122 + }, + { + "epoch": 0.8257384461800458, + "grad_norm": 0.0, + "learning_rate": 1.5508761430560526e-06, + "loss": 1.2085, + "step": 28123 + }, + { + "epoch": 0.8257678078571848, + "grad_norm": 0.0, + "learning_rate": 1.5503675080732682e-06, + "loss": 1.2871, + "step": 28124 + }, + { + "epoch": 0.8257971695343238, + "grad_norm": 0.0, + "learning_rate": 1.5498589495026707e-06, + "loss": 1.2715, + "step": 28125 + }, + { + "epoch": 0.8258265312114628, + "grad_norm": 0.0, + "learning_rate": 1.5493504673488657e-06, + "loss": 1.2407, + "step": 28126 + }, + { + "epoch": 0.8258558928886018, + "grad_norm": 0.0, + "learning_rate": 1.5488420616164468e-06, + "loss": 1.1592, + "step": 28127 + }, + { + "epoch": 0.8258852545657408, + "grad_norm": 0.0, + "learning_rate": 1.5483337323100145e-06, + "loss": 1.2803, + "step": 28128 + }, + { + "epoch": 0.8259146162428798, + "grad_norm": 0.0, + "learning_rate": 1.5478254794341629e-06, + "loss": 1.1362, + "step": 28129 + }, + { + "epoch": 0.8259439779200188, + "grad_norm": 0.0, + "learning_rate": 1.5473173029934929e-06, + "loss": 1.2324, + "step": 28130 + }, + { + "epoch": 0.8259733395971578, + "grad_norm": 0.0, + "learning_rate": 1.5468092029925973e-06, + "loss": 1.1206, + "step": 28131 + }, + { + "epoch": 0.8260027012742968, + "grad_norm": 0.0, + "learning_rate": 1.5463011794360717e-06, + "loss": 1.2158, + "step": 28132 + }, + { + "epoch": 0.8260320629514358, + "grad_norm": 0.0, + "learning_rate": 1.5457932323285085e-06, + "loss": 1.3193, + "step": 28133 + }, + { + "epoch": 0.8260614246285748, + "grad_norm": 0.0, + "learning_rate": 1.5452853616745e-06, + "loss": 1.1841, + "step": 28134 + }, + { + "epoch": 0.8260907863057138, + "grad_norm": 0.0, + "learning_rate": 1.5447775674786436e-06, + "loss": 1.229, + "step": 28135 + }, + { + "epoch": 0.8261201479828528, + "grad_norm": 0.0, + "learning_rate": 1.5442698497455266e-06, + "loss": 1.0864, + "step": 28136 + }, + { + "epoch": 0.8261495096599918, + "grad_norm": 0.0, + "learning_rate": 1.5437622084797456e-06, + "loss": 1.25, + "step": 28137 + }, + { + "epoch": 0.8261788713371307, + "grad_norm": 0.0, + "learning_rate": 1.5432546436858885e-06, + "loss": 1.166, + "step": 28138 + }, + { + "epoch": 0.8262082330142698, + "grad_norm": 0.0, + "learning_rate": 1.5427471553685459e-06, + "loss": 1.1611, + "step": 28139 + }, + { + "epoch": 0.8262375946914088, + "grad_norm": 0.0, + "learning_rate": 1.5422397435323033e-06, + "loss": 1.2236, + "step": 28140 + }, + { + "epoch": 0.8262669563685477, + "grad_norm": 0.0, + "learning_rate": 1.5417324081817564e-06, + "loss": 1.3057, + "step": 28141 + }, + { + "epoch": 0.8262963180456868, + "grad_norm": 0.0, + "learning_rate": 1.541225149321489e-06, + "loss": 1.2188, + "step": 28142 + }, + { + "epoch": 0.8263256797228258, + "grad_norm": 0.0, + "learning_rate": 1.5407179669560868e-06, + "loss": 1.3291, + "step": 28143 + }, + { + "epoch": 0.8263550413999647, + "grad_norm": 0.0, + "learning_rate": 1.54021086109014e-06, + "loss": 1.2754, + "step": 28144 + }, + { + "epoch": 0.8263844030771038, + "grad_norm": 0.0, + "learning_rate": 1.539703831728231e-06, + "loss": 1.2715, + "step": 28145 + }, + { + "epoch": 0.8264137647542428, + "grad_norm": 0.0, + "learning_rate": 1.539196878874949e-06, + "loss": 1.2222, + "step": 28146 + }, + { + "epoch": 0.8264431264313817, + "grad_norm": 0.0, + "learning_rate": 1.5386900025348773e-06, + "loss": 1.1738, + "step": 28147 + }, + { + "epoch": 0.8264724881085208, + "grad_norm": 0.0, + "learning_rate": 1.5381832027125986e-06, + "loss": 1.1802, + "step": 28148 + }, + { + "epoch": 0.8265018497856598, + "grad_norm": 0.0, + "learning_rate": 1.5376764794126942e-06, + "loss": 1.1123, + "step": 28149 + }, + { + "epoch": 0.8265312114627987, + "grad_norm": 0.0, + "learning_rate": 1.53716983263975e-06, + "loss": 1.0986, + "step": 28150 + }, + { + "epoch": 0.8265605731399378, + "grad_norm": 0.0, + "learning_rate": 1.5366632623983445e-06, + "loss": 1.1377, + "step": 28151 + }, + { + "epoch": 0.8265899348170768, + "grad_norm": 0.0, + "learning_rate": 1.5361567686930634e-06, + "loss": 1.1777, + "step": 28152 + }, + { + "epoch": 0.8266192964942157, + "grad_norm": 0.0, + "learning_rate": 1.5356503515284837e-06, + "loss": 1.1216, + "step": 28153 + }, + { + "epoch": 0.8266486581713548, + "grad_norm": 0.0, + "learning_rate": 1.535144010909183e-06, + "loss": 1.3623, + "step": 28154 + }, + { + "epoch": 0.8266780198484938, + "grad_norm": 0.0, + "learning_rate": 1.5346377468397466e-06, + "loss": 1.189, + "step": 28155 + }, + { + "epoch": 0.8267073815256327, + "grad_norm": 0.0, + "learning_rate": 1.5341315593247474e-06, + "loss": 1.2324, + "step": 28156 + }, + { + "epoch": 0.8267367432027718, + "grad_norm": 0.0, + "learning_rate": 1.5336254483687662e-06, + "loss": 1.2002, + "step": 28157 + }, + { + "epoch": 0.8267661048799108, + "grad_norm": 0.0, + "learning_rate": 1.533119413976375e-06, + "loss": 1.25, + "step": 28158 + }, + { + "epoch": 0.8267954665570497, + "grad_norm": 0.0, + "learning_rate": 1.5326134561521555e-06, + "loss": 1.1191, + "step": 28159 + }, + { + "epoch": 0.8268248282341888, + "grad_norm": 0.0, + "learning_rate": 1.5321075749006796e-06, + "loss": 1.2495, + "step": 28160 + }, + { + "epoch": 0.8268541899113278, + "grad_norm": 0.0, + "learning_rate": 1.5316017702265251e-06, + "loss": 1.123, + "step": 28161 + }, + { + "epoch": 0.8268835515884667, + "grad_norm": 0.0, + "learning_rate": 1.5310960421342658e-06, + "loss": 1.2559, + "step": 28162 + }, + { + "epoch": 0.8269129132656058, + "grad_norm": 0.0, + "learning_rate": 1.5305903906284725e-06, + "loss": 1.0195, + "step": 28163 + }, + { + "epoch": 0.8269422749427447, + "grad_norm": 0.0, + "learning_rate": 1.5300848157137204e-06, + "loss": 1.2256, + "step": 28164 + }, + { + "epoch": 0.8269716366198837, + "grad_norm": 0.0, + "learning_rate": 1.5295793173945771e-06, + "loss": 1.105, + "step": 28165 + }, + { + "epoch": 0.8270009982970227, + "grad_norm": 0.0, + "learning_rate": 1.5290738956756201e-06, + "loss": 1.1909, + "step": 28166 + }, + { + "epoch": 0.8270303599741617, + "grad_norm": 0.0, + "learning_rate": 1.5285685505614157e-06, + "loss": 1.2793, + "step": 28167 + }, + { + "epoch": 0.8270597216513007, + "grad_norm": 0.0, + "learning_rate": 1.5280632820565367e-06, + "loss": 1.2588, + "step": 28168 + }, + { + "epoch": 0.8270890833284397, + "grad_norm": 0.0, + "learning_rate": 1.5275580901655496e-06, + "loss": 1.2998, + "step": 28169 + }, + { + "epoch": 0.8271184450055787, + "grad_norm": 0.0, + "learning_rate": 1.5270529748930262e-06, + "loss": 1.2393, + "step": 28170 + }, + { + "epoch": 0.8271478066827177, + "grad_norm": 0.0, + "learning_rate": 1.5265479362435331e-06, + "loss": 1.2261, + "step": 28171 + }, + { + "epoch": 0.8271771683598567, + "grad_norm": 0.0, + "learning_rate": 1.5260429742216365e-06, + "loss": 1.2881, + "step": 28172 + }, + { + "epoch": 0.8272065300369957, + "grad_norm": 0.0, + "learning_rate": 1.5255380888319049e-06, + "loss": 1.2305, + "step": 28173 + }, + { + "epoch": 0.8272358917141347, + "grad_norm": 0.0, + "learning_rate": 1.5250332800788992e-06, + "loss": 1.2041, + "step": 28174 + }, + { + "epoch": 0.8272652533912737, + "grad_norm": 0.0, + "learning_rate": 1.5245285479671902e-06, + "loss": 1.103, + "step": 28175 + }, + { + "epoch": 0.8272946150684127, + "grad_norm": 0.0, + "learning_rate": 1.5240238925013385e-06, + "loss": 1.2607, + "step": 28176 + }, + { + "epoch": 0.8273239767455517, + "grad_norm": 0.0, + "learning_rate": 1.5235193136859116e-06, + "loss": 1.126, + "step": 28177 + }, + { + "epoch": 0.8273533384226907, + "grad_norm": 0.0, + "learning_rate": 1.5230148115254684e-06, + "loss": 1.1885, + "step": 28178 + }, + { + "epoch": 0.8273827000998297, + "grad_norm": 0.0, + "learning_rate": 1.522510386024575e-06, + "loss": 1.2378, + "step": 28179 + }, + { + "epoch": 0.8274120617769687, + "grad_norm": 0.0, + "learning_rate": 1.522006037187791e-06, + "loss": 1.124, + "step": 28180 + }, + { + "epoch": 0.8274414234541076, + "grad_norm": 0.0, + "learning_rate": 1.5215017650196784e-06, + "loss": 1.2393, + "step": 28181 + }, + { + "epoch": 0.8274707851312467, + "grad_norm": 0.0, + "learning_rate": 1.520997569524797e-06, + "loss": 1.0962, + "step": 28182 + }, + { + "epoch": 0.8275001468083857, + "grad_norm": 0.0, + "learning_rate": 1.5204934507077029e-06, + "loss": 1.2383, + "step": 28183 + }, + { + "epoch": 0.8275295084855246, + "grad_norm": 0.0, + "learning_rate": 1.5199894085729606e-06, + "loss": 1.1987, + "step": 28184 + }, + { + "epoch": 0.8275588701626637, + "grad_norm": 0.0, + "learning_rate": 1.5194854431251238e-06, + "loss": 1.2969, + "step": 28185 + }, + { + "epoch": 0.8275882318398027, + "grad_norm": 0.0, + "learning_rate": 1.5189815543687547e-06, + "loss": 1.2646, + "step": 28186 + }, + { + "epoch": 0.8276175935169416, + "grad_norm": 0.0, + "learning_rate": 1.5184777423084063e-06, + "loss": 1.2227, + "step": 28187 + }, + { + "epoch": 0.8276469551940807, + "grad_norm": 0.0, + "learning_rate": 1.5179740069486371e-06, + "loss": 1.1416, + "step": 28188 + }, + { + "epoch": 0.8276763168712197, + "grad_norm": 0.0, + "learning_rate": 1.5174703482939978e-06, + "loss": 1.2256, + "step": 28189 + }, + { + "epoch": 0.8277056785483586, + "grad_norm": 0.0, + "learning_rate": 1.5169667663490496e-06, + "loss": 1.1938, + "step": 28190 + }, + { + "epoch": 0.8277350402254977, + "grad_norm": 0.0, + "learning_rate": 1.5164632611183416e-06, + "loss": 1.1118, + "step": 28191 + }, + { + "epoch": 0.8277644019026367, + "grad_norm": 0.0, + "learning_rate": 1.5159598326064306e-06, + "loss": 1.2598, + "step": 28192 + }, + { + "epoch": 0.8277937635797756, + "grad_norm": 0.0, + "learning_rate": 1.5154564808178673e-06, + "loss": 1.21, + "step": 28193 + }, + { + "epoch": 0.8278231252569147, + "grad_norm": 0.0, + "learning_rate": 1.5149532057572024e-06, + "loss": 1.2642, + "step": 28194 + }, + { + "epoch": 0.8278524869340537, + "grad_norm": 0.0, + "learning_rate": 1.5144500074289903e-06, + "loss": 1.2354, + "step": 28195 + }, + { + "epoch": 0.8278818486111926, + "grad_norm": 0.0, + "learning_rate": 1.5139468858377803e-06, + "loss": 1.1519, + "step": 28196 + }, + { + "epoch": 0.8279112102883317, + "grad_norm": 0.0, + "learning_rate": 1.5134438409881226e-06, + "loss": 1.2051, + "step": 28197 + }, + { + "epoch": 0.8279405719654707, + "grad_norm": 0.0, + "learning_rate": 1.5129408728845628e-06, + "loss": 1.2095, + "step": 28198 + }, + { + "epoch": 0.8279699336426096, + "grad_norm": 0.0, + "learning_rate": 1.5124379815316537e-06, + "loss": 1.2646, + "step": 28199 + }, + { + "epoch": 0.8279992953197487, + "grad_norm": 0.0, + "learning_rate": 1.511935166933941e-06, + "loss": 1.3208, + "step": 28200 + }, + { + "epoch": 0.8280286569968877, + "grad_norm": 0.0, + "learning_rate": 1.5114324290959738e-06, + "loss": 1.2598, + "step": 28201 + }, + { + "epoch": 0.8280580186740266, + "grad_norm": 0.0, + "learning_rate": 1.5109297680222967e-06, + "loss": 1.2026, + "step": 28202 + }, + { + "epoch": 0.8280873803511657, + "grad_norm": 0.0, + "learning_rate": 1.5104271837174545e-06, + "loss": 1.3301, + "step": 28203 + }, + { + "epoch": 0.8281167420283047, + "grad_norm": 0.0, + "learning_rate": 1.5099246761859943e-06, + "loss": 1.3555, + "step": 28204 + }, + { + "epoch": 0.8281461037054436, + "grad_norm": 0.0, + "learning_rate": 1.5094222454324614e-06, + "loss": 1.0635, + "step": 28205 + }, + { + "epoch": 0.8281754653825827, + "grad_norm": 0.0, + "learning_rate": 1.5089198914613968e-06, + "loss": 1.0542, + "step": 28206 + }, + { + "epoch": 0.8282048270597216, + "grad_norm": 0.0, + "learning_rate": 1.508417614277341e-06, + "loss": 1.1558, + "step": 28207 + }, + { + "epoch": 0.8282341887368606, + "grad_norm": 0.0, + "learning_rate": 1.507915413884843e-06, + "loss": 1.2603, + "step": 28208 + }, + { + "epoch": 0.8282635504139997, + "grad_norm": 0.0, + "learning_rate": 1.5074132902884375e-06, + "loss": 1.1611, + "step": 28209 + }, + { + "epoch": 0.8282929120911386, + "grad_norm": 0.0, + "learning_rate": 1.5069112434926724e-06, + "loss": 1.2803, + "step": 28210 + }, + { + "epoch": 0.8283222737682776, + "grad_norm": 0.0, + "learning_rate": 1.506409273502083e-06, + "loss": 1.0742, + "step": 28211 + }, + { + "epoch": 0.8283516354454167, + "grad_norm": 0.0, + "learning_rate": 1.5059073803212099e-06, + "loss": 1.2178, + "step": 28212 + }, + { + "epoch": 0.8283809971225556, + "grad_norm": 0.0, + "learning_rate": 1.5054055639545917e-06, + "loss": 1.2148, + "step": 28213 + }, + { + "epoch": 0.8284103587996946, + "grad_norm": 0.0, + "learning_rate": 1.5049038244067637e-06, + "loss": 1.2852, + "step": 28214 + }, + { + "epoch": 0.8284397204768337, + "grad_norm": 0.0, + "learning_rate": 1.504402161682269e-06, + "loss": 1.2383, + "step": 28215 + }, + { + "epoch": 0.8284690821539726, + "grad_norm": 0.0, + "learning_rate": 1.5039005757856385e-06, + "loss": 1.2412, + "step": 28216 + }, + { + "epoch": 0.8284984438311116, + "grad_norm": 0.0, + "learning_rate": 1.5033990667214138e-06, + "loss": 1.1587, + "step": 28217 + }, + { + "epoch": 0.8285278055082507, + "grad_norm": 0.0, + "learning_rate": 1.5028976344941249e-06, + "loss": 1.2939, + "step": 28218 + }, + { + "epoch": 0.8285571671853896, + "grad_norm": 0.0, + "learning_rate": 1.5023962791083113e-06, + "loss": 1.2314, + "step": 28219 + }, + { + "epoch": 0.8285865288625286, + "grad_norm": 0.0, + "learning_rate": 1.501895000568504e-06, + "loss": 1.124, + "step": 28220 + }, + { + "epoch": 0.8286158905396677, + "grad_norm": 0.0, + "learning_rate": 1.501393798879237e-06, + "loss": 1.1987, + "step": 28221 + }, + { + "epoch": 0.8286452522168066, + "grad_norm": 0.0, + "learning_rate": 1.5008926740450425e-06, + "loss": 1.209, + "step": 28222 + }, + { + "epoch": 0.8286746138939456, + "grad_norm": 0.0, + "learning_rate": 1.5003916260704498e-06, + "loss": 1.2515, + "step": 28223 + }, + { + "epoch": 0.8287039755710847, + "grad_norm": 0.0, + "learning_rate": 1.4998906549599945e-06, + "loss": 1.1353, + "step": 28224 + }, + { + "epoch": 0.8287333372482236, + "grad_norm": 0.0, + "learning_rate": 1.499389760718203e-06, + "loss": 1.1157, + "step": 28225 + }, + { + "epoch": 0.8287626989253626, + "grad_norm": 0.0, + "learning_rate": 1.498888943349608e-06, + "loss": 1.2451, + "step": 28226 + }, + { + "epoch": 0.8287920606025017, + "grad_norm": 0.0, + "learning_rate": 1.498388202858736e-06, + "loss": 1.1782, + "step": 28227 + }, + { + "epoch": 0.8288214222796406, + "grad_norm": 0.0, + "learning_rate": 1.4978875392501192e-06, + "loss": 1.27, + "step": 28228 + }, + { + "epoch": 0.8288507839567796, + "grad_norm": 0.0, + "learning_rate": 1.4973869525282836e-06, + "loss": 1.1675, + "step": 28229 + }, + { + "epoch": 0.8288801456339187, + "grad_norm": 0.0, + "learning_rate": 1.4968864426977537e-06, + "loss": 1.1685, + "step": 28230 + }, + { + "epoch": 0.8289095073110576, + "grad_norm": 0.0, + "learning_rate": 1.4963860097630589e-06, + "loss": 1.2148, + "step": 28231 + }, + { + "epoch": 0.8289388689881966, + "grad_norm": 0.0, + "learning_rate": 1.4958856537287193e-06, + "loss": 1.209, + "step": 28232 + }, + { + "epoch": 0.8289682306653356, + "grad_norm": 0.0, + "learning_rate": 1.4953853745992676e-06, + "loss": 1.1919, + "step": 28233 + }, + { + "epoch": 0.8289975923424746, + "grad_norm": 0.0, + "learning_rate": 1.4948851723792201e-06, + "loss": 1.3354, + "step": 28234 + }, + { + "epoch": 0.8290269540196136, + "grad_norm": 0.0, + "learning_rate": 1.494385047073108e-06, + "loss": 1.3472, + "step": 28235 + }, + { + "epoch": 0.8290563156967526, + "grad_norm": 0.0, + "learning_rate": 1.4938849986854477e-06, + "loss": 1.1016, + "step": 28236 + }, + { + "epoch": 0.8290856773738916, + "grad_norm": 0.0, + "learning_rate": 1.4933850272207672e-06, + "loss": 1.2085, + "step": 28237 + }, + { + "epoch": 0.8291150390510306, + "grad_norm": 0.0, + "learning_rate": 1.4928851326835814e-06, + "loss": 1.1914, + "step": 28238 + }, + { + "epoch": 0.8291444007281696, + "grad_norm": 0.0, + "learning_rate": 1.492385315078415e-06, + "loss": 1.1978, + "step": 28239 + }, + { + "epoch": 0.8291737624053086, + "grad_norm": 0.0, + "learning_rate": 1.4918855744097848e-06, + "loss": 1.2568, + "step": 28240 + }, + { + "epoch": 0.8292031240824476, + "grad_norm": 0.0, + "learning_rate": 1.491385910682216e-06, + "loss": 1.2476, + "step": 28241 + }, + { + "epoch": 0.8292324857595866, + "grad_norm": 0.0, + "learning_rate": 1.4908863239002224e-06, + "loss": 1.2256, + "step": 28242 + }, + { + "epoch": 0.8292618474367256, + "grad_norm": 0.0, + "learning_rate": 1.4903868140683208e-06, + "loss": 1.2109, + "step": 28243 + }, + { + "epoch": 0.8292912091138646, + "grad_norm": 0.0, + "learning_rate": 1.489887381191033e-06, + "loss": 1.2485, + "step": 28244 + }, + { + "epoch": 0.8293205707910036, + "grad_norm": 0.0, + "learning_rate": 1.489388025272873e-06, + "loss": 1.3584, + "step": 28245 + }, + { + "epoch": 0.8293499324681426, + "grad_norm": 0.0, + "learning_rate": 1.4888887463183576e-06, + "loss": 1.0933, + "step": 28246 + }, + { + "epoch": 0.8293792941452816, + "grad_norm": 0.0, + "learning_rate": 1.4883895443319973e-06, + "loss": 1.1982, + "step": 28247 + }, + { + "epoch": 0.8294086558224206, + "grad_norm": 0.0, + "learning_rate": 1.487890419318313e-06, + "loss": 1.3159, + "step": 28248 + }, + { + "epoch": 0.8294380174995596, + "grad_norm": 0.0, + "learning_rate": 1.4873913712818132e-06, + "loss": 1.1958, + "step": 28249 + }, + { + "epoch": 0.8294673791766985, + "grad_norm": 0.0, + "learning_rate": 1.4868924002270168e-06, + "loss": 1.333, + "step": 28250 + }, + { + "epoch": 0.8294967408538376, + "grad_norm": 0.0, + "learning_rate": 1.4863935061584312e-06, + "loss": 1.1982, + "step": 28251 + }, + { + "epoch": 0.8295261025309766, + "grad_norm": 0.0, + "learning_rate": 1.485894689080568e-06, + "loss": 1.3154, + "step": 28252 + }, + { + "epoch": 0.8295554642081155, + "grad_norm": 0.0, + "learning_rate": 1.485395948997942e-06, + "loss": 1.2812, + "step": 28253 + }, + { + "epoch": 0.8295848258852546, + "grad_norm": 0.0, + "learning_rate": 1.48489728591506e-06, + "loss": 1.2314, + "step": 28254 + }, + { + "epoch": 0.8296141875623936, + "grad_norm": 0.0, + "learning_rate": 1.4843986998364336e-06, + "loss": 1.2363, + "step": 28255 + }, + { + "epoch": 0.8296435492395325, + "grad_norm": 0.0, + "learning_rate": 1.4839001907665673e-06, + "loss": 1.2476, + "step": 28256 + }, + { + "epoch": 0.8296729109166716, + "grad_norm": 0.0, + "learning_rate": 1.483401758709976e-06, + "loss": 1.2568, + "step": 28257 + }, + { + "epoch": 0.8297022725938106, + "grad_norm": 0.0, + "learning_rate": 1.482903403671161e-06, + "loss": 1.127, + "step": 28258 + }, + { + "epoch": 0.8297316342709495, + "grad_norm": 0.0, + "learning_rate": 1.4824051256546345e-06, + "loss": 1.3008, + "step": 28259 + }, + { + "epoch": 0.8297609959480886, + "grad_norm": 0.0, + "learning_rate": 1.4819069246648998e-06, + "loss": 1.3574, + "step": 28260 + }, + { + "epoch": 0.8297903576252276, + "grad_norm": 0.0, + "learning_rate": 1.4814088007064586e-06, + "loss": 1.208, + "step": 28261 + }, + { + "epoch": 0.8298197193023665, + "grad_norm": 0.0, + "learning_rate": 1.4809107537838253e-06, + "loss": 1.2383, + "step": 28262 + }, + { + "epoch": 0.8298490809795056, + "grad_norm": 0.0, + "learning_rate": 1.4804127839014927e-06, + "loss": 1.2959, + "step": 28263 + }, + { + "epoch": 0.8298784426566446, + "grad_norm": 0.0, + "learning_rate": 1.4799148910639717e-06, + "loss": 1.1763, + "step": 28264 + }, + { + "epoch": 0.8299078043337835, + "grad_norm": 0.0, + "learning_rate": 1.4794170752757598e-06, + "loss": 1.209, + "step": 28265 + }, + { + "epoch": 0.8299371660109225, + "grad_norm": 0.0, + "learning_rate": 1.4789193365413646e-06, + "loss": 1.1953, + "step": 28266 + }, + { + "epoch": 0.8299665276880616, + "grad_norm": 0.0, + "learning_rate": 1.4784216748652814e-06, + "loss": 1.2407, + "step": 28267 + }, + { + "epoch": 0.8299958893652005, + "grad_norm": 0.0, + "learning_rate": 1.4779240902520154e-06, + "loss": 1.1543, + "step": 28268 + }, + { + "epoch": 0.8300252510423395, + "grad_norm": 0.0, + "learning_rate": 1.4774265827060641e-06, + "loss": 1.2314, + "step": 28269 + }, + { + "epoch": 0.8300546127194786, + "grad_norm": 0.0, + "learning_rate": 1.4769291522319274e-06, + "loss": 1.2881, + "step": 28270 + }, + { + "epoch": 0.8300839743966175, + "grad_norm": 0.0, + "learning_rate": 1.4764317988341027e-06, + "loss": 1.189, + "step": 28271 + }, + { + "epoch": 0.8301133360737565, + "grad_norm": 0.0, + "learning_rate": 1.4759345225170863e-06, + "loss": 1.1602, + "step": 28272 + }, + { + "epoch": 0.8301426977508956, + "grad_norm": 0.0, + "learning_rate": 1.4754373232853792e-06, + "loss": 1.2139, + "step": 28273 + }, + { + "epoch": 0.8301720594280345, + "grad_norm": 0.0, + "learning_rate": 1.4749402011434733e-06, + "loss": 1.3037, + "step": 28274 + }, + { + "epoch": 0.8302014211051735, + "grad_norm": 0.0, + "learning_rate": 1.4744431560958683e-06, + "loss": 1.1709, + "step": 28275 + }, + { + "epoch": 0.8302307827823125, + "grad_norm": 0.0, + "learning_rate": 1.4739461881470562e-06, + "loss": 1.1582, + "step": 28276 + }, + { + "epoch": 0.8302601444594515, + "grad_norm": 0.0, + "learning_rate": 1.4734492973015335e-06, + "loss": 1.1006, + "step": 28277 + }, + { + "epoch": 0.8302895061365905, + "grad_norm": 0.0, + "learning_rate": 1.4729524835637932e-06, + "loss": 1.2441, + "step": 28278 + }, + { + "epoch": 0.8303188678137295, + "grad_norm": 0.0, + "learning_rate": 1.472455746938326e-06, + "loss": 1.1382, + "step": 28279 + }, + { + "epoch": 0.8303482294908685, + "grad_norm": 0.0, + "learning_rate": 1.4719590874296263e-06, + "loss": 1.3174, + "step": 28280 + }, + { + "epoch": 0.8303775911680075, + "grad_norm": 0.0, + "learning_rate": 1.4714625050421827e-06, + "loss": 1.2261, + "step": 28281 + }, + { + "epoch": 0.8304069528451465, + "grad_norm": 0.0, + "learning_rate": 1.4709659997804905e-06, + "loss": 1.0117, + "step": 28282 + }, + { + "epoch": 0.8304363145222855, + "grad_norm": 0.0, + "learning_rate": 1.4704695716490337e-06, + "loss": 1.3643, + "step": 28283 + }, + { + "epoch": 0.8304656761994245, + "grad_norm": 0.0, + "learning_rate": 1.4699732206523077e-06, + "loss": 1.2031, + "step": 28284 + }, + { + "epoch": 0.8304950378765635, + "grad_norm": 0.0, + "learning_rate": 1.4694769467947967e-06, + "loss": 1.3447, + "step": 28285 + }, + { + "epoch": 0.8305243995537025, + "grad_norm": 0.0, + "learning_rate": 1.4689807500809917e-06, + "loss": 1.2598, + "step": 28286 + }, + { + "epoch": 0.8305537612308415, + "grad_norm": 0.0, + "learning_rate": 1.4684846305153788e-06, + "loss": 1.1768, + "step": 28287 + }, + { + "epoch": 0.8305831229079805, + "grad_norm": 0.0, + "learning_rate": 1.4679885881024447e-06, + "loss": 1.2217, + "step": 28288 + }, + { + "epoch": 0.8306124845851195, + "grad_norm": 0.0, + "learning_rate": 1.4674926228466734e-06, + "loss": 1.2061, + "step": 28289 + }, + { + "epoch": 0.8306418462622585, + "grad_norm": 0.0, + "learning_rate": 1.4669967347525527e-06, + "loss": 1.1689, + "step": 28290 + }, + { + "epoch": 0.8306712079393975, + "grad_norm": 0.0, + "learning_rate": 1.4665009238245653e-06, + "loss": 1.3242, + "step": 28291 + }, + { + "epoch": 0.8307005696165365, + "grad_norm": 0.0, + "learning_rate": 1.4660051900671945e-06, + "loss": 1.1533, + "step": 28292 + }, + { + "epoch": 0.8307299312936754, + "grad_norm": 0.0, + "learning_rate": 1.4655095334849267e-06, + "loss": 1.2998, + "step": 28293 + }, + { + "epoch": 0.8307592929708145, + "grad_norm": 0.0, + "learning_rate": 1.4650139540822405e-06, + "loss": 1.1514, + "step": 28294 + }, + { + "epoch": 0.8307886546479535, + "grad_norm": 0.0, + "learning_rate": 1.4645184518636202e-06, + "loss": 1.0942, + "step": 28295 + }, + { + "epoch": 0.8308180163250924, + "grad_norm": 0.0, + "learning_rate": 1.464023026833543e-06, + "loss": 1.2744, + "step": 28296 + }, + { + "epoch": 0.8308473780022315, + "grad_norm": 0.0, + "learning_rate": 1.4635276789964937e-06, + "loss": 1.1704, + "step": 28297 + }, + { + "epoch": 0.8308767396793705, + "grad_norm": 0.0, + "learning_rate": 1.463032408356948e-06, + "loss": 1.1025, + "step": 28298 + }, + { + "epoch": 0.8309061013565094, + "grad_norm": 0.0, + "learning_rate": 1.4625372149193885e-06, + "loss": 1.2559, + "step": 28299 + }, + { + "epoch": 0.8309354630336485, + "grad_norm": 0.0, + "learning_rate": 1.4620420986882911e-06, + "loss": 1.3242, + "step": 28300 + }, + { + "epoch": 0.8309648247107875, + "grad_norm": 0.0, + "learning_rate": 1.4615470596681314e-06, + "loss": 1.2236, + "step": 28301 + }, + { + "epoch": 0.8309941863879264, + "grad_norm": 0.0, + "learning_rate": 1.4610520978633913e-06, + "loss": 1.1494, + "step": 28302 + }, + { + "epoch": 0.8310235480650655, + "grad_norm": 0.0, + "learning_rate": 1.460557213278543e-06, + "loss": 1.2539, + "step": 28303 + }, + { + "epoch": 0.8310529097422045, + "grad_norm": 0.0, + "learning_rate": 1.4600624059180635e-06, + "loss": 1.145, + "step": 28304 + }, + { + "epoch": 0.8310822714193434, + "grad_norm": 0.0, + "learning_rate": 1.459567675786424e-06, + "loss": 1.2524, + "step": 28305 + }, + { + "epoch": 0.8311116330964825, + "grad_norm": 0.0, + "learning_rate": 1.4590730228881044e-06, + "loss": 1.123, + "step": 28306 + }, + { + "epoch": 0.8311409947736215, + "grad_norm": 0.0, + "learning_rate": 1.458578447227572e-06, + "loss": 1.127, + "step": 28307 + }, + { + "epoch": 0.8311703564507604, + "grad_norm": 0.0, + "learning_rate": 1.4580839488093047e-06, + "loss": 1.2056, + "step": 28308 + }, + { + "epoch": 0.8311997181278995, + "grad_norm": 0.0, + "learning_rate": 1.4575895276377695e-06, + "loss": 1.0454, + "step": 28309 + }, + { + "epoch": 0.8312290798050385, + "grad_norm": 0.0, + "learning_rate": 1.4570951837174418e-06, + "loss": 1.2402, + "step": 28310 + }, + { + "epoch": 0.8312584414821774, + "grad_norm": 0.0, + "learning_rate": 1.4566009170527907e-06, + "loss": 1.1074, + "step": 28311 + }, + { + "epoch": 0.8312878031593165, + "grad_norm": 0.0, + "learning_rate": 1.4561067276482854e-06, + "loss": 1.1353, + "step": 28312 + }, + { + "epoch": 0.8313171648364555, + "grad_norm": 0.0, + "learning_rate": 1.455612615508395e-06, + "loss": 1.1919, + "step": 28313 + }, + { + "epoch": 0.8313465265135944, + "grad_norm": 0.0, + "learning_rate": 1.4551185806375855e-06, + "loss": 1.1396, + "step": 28314 + }, + { + "epoch": 0.8313758881907335, + "grad_norm": 0.0, + "learning_rate": 1.4546246230403294e-06, + "loss": 1.2632, + "step": 28315 + }, + { + "epoch": 0.8314052498678725, + "grad_norm": 0.0, + "learning_rate": 1.4541307427210894e-06, + "loss": 1.3027, + "step": 28316 + }, + { + "epoch": 0.8314346115450114, + "grad_norm": 0.0, + "learning_rate": 1.4536369396843353e-06, + "loss": 1.3076, + "step": 28317 + }, + { + "epoch": 0.8314639732221505, + "grad_norm": 0.0, + "learning_rate": 1.4531432139345313e-06, + "loss": 1.2578, + "step": 28318 + }, + { + "epoch": 0.8314933348992894, + "grad_norm": 0.0, + "learning_rate": 1.452649565476143e-06, + "loss": 1.3223, + "step": 28319 + }, + { + "epoch": 0.8315226965764284, + "grad_norm": 0.0, + "learning_rate": 1.452155994313633e-06, + "loss": 1.0874, + "step": 28320 + }, + { + "epoch": 0.8315520582535675, + "grad_norm": 0.0, + "learning_rate": 1.4516625004514628e-06, + "loss": 1.2578, + "step": 28321 + }, + { + "epoch": 0.8315814199307064, + "grad_norm": 0.0, + "learning_rate": 1.4511690838941006e-06, + "loss": 1.2207, + "step": 28322 + }, + { + "epoch": 0.8316107816078454, + "grad_norm": 0.0, + "learning_rate": 1.4506757446460041e-06, + "loss": 1.2554, + "step": 28323 + }, + { + "epoch": 0.8316401432849845, + "grad_norm": 0.0, + "learning_rate": 1.4501824827116384e-06, + "loss": 1.085, + "step": 28324 + }, + { + "epoch": 0.8316695049621234, + "grad_norm": 0.0, + "learning_rate": 1.4496892980954591e-06, + "loss": 1.3145, + "step": 28325 + }, + { + "epoch": 0.8316988666392624, + "grad_norm": 0.0, + "learning_rate": 1.4491961908019314e-06, + "loss": 1.1328, + "step": 28326 + }, + { + "epoch": 0.8317282283164015, + "grad_norm": 0.0, + "learning_rate": 1.448703160835514e-06, + "loss": 1.1284, + "step": 28327 + }, + { + "epoch": 0.8317575899935404, + "grad_norm": 0.0, + "learning_rate": 1.4482102082006622e-06, + "loss": 1.0845, + "step": 28328 + }, + { + "epoch": 0.8317869516706794, + "grad_norm": 0.0, + "learning_rate": 1.4477173329018367e-06, + "loss": 1.145, + "step": 28329 + }, + { + "epoch": 0.8318163133478185, + "grad_norm": 0.0, + "learning_rate": 1.447224534943491e-06, + "loss": 1.1816, + "step": 28330 + }, + { + "epoch": 0.8318456750249574, + "grad_norm": 0.0, + "learning_rate": 1.4467318143300869e-06, + "loss": 1.1943, + "step": 28331 + }, + { + "epoch": 0.8318750367020964, + "grad_norm": 0.0, + "learning_rate": 1.446239171066074e-06, + "loss": 1.2246, + "step": 28332 + }, + { + "epoch": 0.8319043983792355, + "grad_norm": 0.0, + "learning_rate": 1.4457466051559144e-06, + "loss": 1.1943, + "step": 28333 + }, + { + "epoch": 0.8319337600563744, + "grad_norm": 0.0, + "learning_rate": 1.4452541166040568e-06, + "loss": 1.2188, + "step": 28334 + }, + { + "epoch": 0.8319631217335134, + "grad_norm": 0.0, + "learning_rate": 1.4447617054149587e-06, + "loss": 1.2734, + "step": 28335 + }, + { + "epoch": 0.8319924834106525, + "grad_norm": 0.0, + "learning_rate": 1.4442693715930722e-06, + "loss": 1.1699, + "step": 28336 + }, + { + "epoch": 0.8320218450877914, + "grad_norm": 0.0, + "learning_rate": 1.4437771151428482e-06, + "loss": 1.1689, + "step": 28337 + }, + { + "epoch": 0.8320512067649304, + "grad_norm": 0.0, + "learning_rate": 1.4432849360687374e-06, + "loss": 1.1436, + "step": 28338 + }, + { + "epoch": 0.8320805684420695, + "grad_norm": 0.0, + "learning_rate": 1.4427928343751941e-06, + "loss": 1.1016, + "step": 28339 + }, + { + "epoch": 0.8321099301192084, + "grad_norm": 0.0, + "learning_rate": 1.442300810066667e-06, + "loss": 1.3271, + "step": 28340 + }, + { + "epoch": 0.8321392917963474, + "grad_norm": 0.0, + "learning_rate": 1.4418088631476035e-06, + "loss": 1.3008, + "step": 28341 + }, + { + "epoch": 0.8321686534734865, + "grad_norm": 0.0, + "learning_rate": 1.4413169936224557e-06, + "loss": 1.2725, + "step": 28342 + }, + { + "epoch": 0.8321980151506254, + "grad_norm": 0.0, + "learning_rate": 1.4408252014956702e-06, + "loss": 1.3242, + "step": 28343 + }, + { + "epoch": 0.8322273768277644, + "grad_norm": 0.0, + "learning_rate": 1.4403334867716946e-06, + "loss": 1.1284, + "step": 28344 + }, + { + "epoch": 0.8322567385049034, + "grad_norm": 0.0, + "learning_rate": 1.4398418494549738e-06, + "loss": 1.1611, + "step": 28345 + }, + { + "epoch": 0.8322861001820424, + "grad_norm": 0.0, + "learning_rate": 1.4393502895499579e-06, + "loss": 1.3232, + "step": 28346 + }, + { + "epoch": 0.8323154618591814, + "grad_norm": 0.0, + "learning_rate": 1.4388588070610864e-06, + "loss": 1.3838, + "step": 28347 + }, + { + "epoch": 0.8323448235363204, + "grad_norm": 0.0, + "learning_rate": 1.4383674019928095e-06, + "loss": 1.2549, + "step": 28348 + }, + { + "epoch": 0.8323741852134594, + "grad_norm": 0.0, + "learning_rate": 1.4378760743495701e-06, + "loss": 1.1611, + "step": 28349 + }, + { + "epoch": 0.8324035468905984, + "grad_norm": 0.0, + "learning_rate": 1.437384824135807e-06, + "loss": 1.2754, + "step": 28350 + }, + { + "epoch": 0.8324329085677374, + "grad_norm": 0.0, + "learning_rate": 1.4368936513559685e-06, + "loss": 1.1758, + "step": 28351 + }, + { + "epoch": 0.8324622702448764, + "grad_norm": 0.0, + "learning_rate": 1.4364025560144935e-06, + "loss": 1.1226, + "step": 28352 + }, + { + "epoch": 0.8324916319220154, + "grad_norm": 0.0, + "learning_rate": 1.4359115381158229e-06, + "loss": 1.2148, + "step": 28353 + }, + { + "epoch": 0.8325209935991544, + "grad_norm": 0.0, + "learning_rate": 1.4354205976643954e-06, + "loss": 1.1738, + "step": 28354 + }, + { + "epoch": 0.8325503552762934, + "grad_norm": 0.0, + "learning_rate": 1.4349297346646563e-06, + "loss": 1.2812, + "step": 28355 + }, + { + "epoch": 0.8325797169534324, + "grad_norm": 0.0, + "learning_rate": 1.4344389491210375e-06, + "loss": 1.2764, + "step": 28356 + }, + { + "epoch": 0.8326090786305714, + "grad_norm": 0.0, + "learning_rate": 1.4339482410379834e-06, + "loss": 1.2373, + "step": 28357 + }, + { + "epoch": 0.8326384403077104, + "grad_norm": 0.0, + "learning_rate": 1.4334576104199283e-06, + "loss": 1.1533, + "step": 28358 + }, + { + "epoch": 0.8326678019848494, + "grad_norm": 0.0, + "learning_rate": 1.4329670572713105e-06, + "loss": 1.1841, + "step": 28359 + }, + { + "epoch": 0.8326971636619884, + "grad_norm": 0.0, + "learning_rate": 1.432476581596567e-06, + "loss": 1.3091, + "step": 28360 + }, + { + "epoch": 0.8327265253391274, + "grad_norm": 0.0, + "learning_rate": 1.4319861834001313e-06, + "loss": 1.3242, + "step": 28361 + }, + { + "epoch": 0.8327558870162663, + "grad_norm": 0.0, + "learning_rate": 1.4314958626864385e-06, + "loss": 1.2153, + "step": 28362 + }, + { + "epoch": 0.8327852486934054, + "grad_norm": 0.0, + "learning_rate": 1.431005619459921e-06, + "loss": 1.3193, + "step": 28363 + }, + { + "epoch": 0.8328146103705444, + "grad_norm": 0.0, + "learning_rate": 1.4305154537250165e-06, + "loss": 1.1172, + "step": 28364 + }, + { + "epoch": 0.8328439720476833, + "grad_norm": 0.0, + "learning_rate": 1.430025365486154e-06, + "loss": 1.1401, + "step": 28365 + }, + { + "epoch": 0.8328733337248223, + "grad_norm": 0.0, + "learning_rate": 1.4295353547477674e-06, + "loss": 1.248, + "step": 28366 + }, + { + "epoch": 0.8329026954019614, + "grad_norm": 0.0, + "learning_rate": 1.429045421514289e-06, + "loss": 1.207, + "step": 28367 + }, + { + "epoch": 0.8329320570791003, + "grad_norm": 0.0, + "learning_rate": 1.4285555657901473e-06, + "loss": 1.2168, + "step": 28368 + }, + { + "epoch": 0.8329614187562393, + "grad_norm": 0.0, + "learning_rate": 1.4280657875797731e-06, + "loss": 1.1348, + "step": 28369 + }, + { + "epoch": 0.8329907804333784, + "grad_norm": 0.0, + "learning_rate": 1.4275760868875933e-06, + "loss": 1.2324, + "step": 28370 + }, + { + "epoch": 0.8330201421105173, + "grad_norm": 0.0, + "learning_rate": 1.4270864637180404e-06, + "loss": 1.0454, + "step": 28371 + }, + { + "epoch": 0.8330495037876563, + "grad_norm": 0.0, + "learning_rate": 1.4265969180755378e-06, + "loss": 1.1113, + "step": 28372 + }, + { + "epoch": 0.8330788654647954, + "grad_norm": 0.0, + "learning_rate": 1.4261074499645166e-06, + "loss": 1.2031, + "step": 28373 + }, + { + "epoch": 0.8331082271419343, + "grad_norm": 0.0, + "learning_rate": 1.4256180593893998e-06, + "loss": 1.1973, + "step": 28374 + }, + { + "epoch": 0.8331375888190733, + "grad_norm": 0.0, + "learning_rate": 1.4251287463546171e-06, + "loss": 1.2285, + "step": 28375 + }, + { + "epoch": 0.8331669504962124, + "grad_norm": 0.0, + "learning_rate": 1.4246395108645916e-06, + "loss": 1.2383, + "step": 28376 + }, + { + "epoch": 0.8331963121733513, + "grad_norm": 0.0, + "learning_rate": 1.4241503529237466e-06, + "loss": 1.3457, + "step": 28377 + }, + { + "epoch": 0.8332256738504903, + "grad_norm": 0.0, + "learning_rate": 1.423661272536505e-06, + "loss": 1.2188, + "step": 28378 + }, + { + "epoch": 0.8332550355276294, + "grad_norm": 0.0, + "learning_rate": 1.4231722697072925e-06, + "loss": 1.2969, + "step": 28379 + }, + { + "epoch": 0.8332843972047683, + "grad_norm": 0.0, + "learning_rate": 1.4226833444405296e-06, + "loss": 1.2412, + "step": 28380 + }, + { + "epoch": 0.8333137588819073, + "grad_norm": 0.0, + "learning_rate": 1.4221944967406353e-06, + "loss": 1.1191, + "step": 28381 + }, + { + "epoch": 0.8333431205590464, + "grad_norm": 0.0, + "learning_rate": 1.421705726612036e-06, + "loss": 1.1836, + "step": 28382 + }, + { + "epoch": 0.8333724822361853, + "grad_norm": 0.0, + "learning_rate": 1.4212170340591458e-06, + "loss": 1.1914, + "step": 28383 + }, + { + "epoch": 0.8334018439133243, + "grad_norm": 0.0, + "learning_rate": 1.420728419086389e-06, + "loss": 1.2563, + "step": 28384 + }, + { + "epoch": 0.8334312055904634, + "grad_norm": 0.0, + "learning_rate": 1.4202398816981833e-06, + "loss": 1.1465, + "step": 28385 + }, + { + "epoch": 0.8334605672676023, + "grad_norm": 0.0, + "learning_rate": 1.419751421898945e-06, + "loss": 1.2158, + "step": 28386 + }, + { + "epoch": 0.8334899289447413, + "grad_norm": 0.0, + "learning_rate": 1.4192630396930906e-06, + "loss": 1.3643, + "step": 28387 + }, + { + "epoch": 0.8335192906218803, + "grad_norm": 0.0, + "learning_rate": 1.41877473508504e-06, + "loss": 1.105, + "step": 28388 + }, + { + "epoch": 0.8335486522990193, + "grad_norm": 0.0, + "learning_rate": 1.4182865080792063e-06, + "loss": 1.0991, + "step": 28389 + }, + { + "epoch": 0.8335780139761583, + "grad_norm": 0.0, + "learning_rate": 1.4177983586800037e-06, + "loss": 1.1738, + "step": 28390 + }, + { + "epoch": 0.8336073756532973, + "grad_norm": 0.0, + "learning_rate": 1.417310286891851e-06, + "loss": 1.0776, + "step": 28391 + }, + { + "epoch": 0.8336367373304363, + "grad_norm": 0.0, + "learning_rate": 1.4168222927191599e-06, + "loss": 1.0244, + "step": 28392 + }, + { + "epoch": 0.8336660990075753, + "grad_norm": 0.0, + "learning_rate": 1.4163343761663427e-06, + "loss": 1.3677, + "step": 28393 + }, + { + "epoch": 0.8336954606847143, + "grad_norm": 0.0, + "learning_rate": 1.4158465372378094e-06, + "loss": 1.2549, + "step": 28394 + }, + { + "epoch": 0.8337248223618533, + "grad_norm": 0.0, + "learning_rate": 1.415358775937976e-06, + "loss": 1.4111, + "step": 28395 + }, + { + "epoch": 0.8337541840389923, + "grad_norm": 0.0, + "learning_rate": 1.4148710922712506e-06, + "loss": 1.2344, + "step": 28396 + }, + { + "epoch": 0.8337835457161313, + "grad_norm": 0.0, + "learning_rate": 1.4143834862420468e-06, + "loss": 1.3013, + "step": 28397 + }, + { + "epoch": 0.8338129073932703, + "grad_norm": 0.0, + "learning_rate": 1.4138959578547718e-06, + "loss": 1.291, + "step": 28398 + }, + { + "epoch": 0.8338422690704093, + "grad_norm": 0.0, + "learning_rate": 1.4134085071138314e-06, + "loss": 1.3271, + "step": 28399 + }, + { + "epoch": 0.8338716307475483, + "grad_norm": 0.0, + "learning_rate": 1.4129211340236403e-06, + "loss": 1.1714, + "step": 28400 + }, + { + "epoch": 0.8339009924246873, + "grad_norm": 0.0, + "learning_rate": 1.4124338385886017e-06, + "loss": 1.1733, + "step": 28401 + }, + { + "epoch": 0.8339303541018263, + "grad_norm": 0.0, + "learning_rate": 1.4119466208131238e-06, + "loss": 1.1465, + "step": 28402 + }, + { + "epoch": 0.8339597157789653, + "grad_norm": 0.0, + "learning_rate": 1.4114594807016102e-06, + "loss": 1.2695, + "step": 28403 + }, + { + "epoch": 0.8339890774561043, + "grad_norm": 0.0, + "learning_rate": 1.4109724182584694e-06, + "loss": 1.2046, + "step": 28404 + }, + { + "epoch": 0.8340184391332433, + "grad_norm": 0.0, + "learning_rate": 1.4104854334881024e-06, + "loss": 1.1216, + "step": 28405 + }, + { + "epoch": 0.8340478008103823, + "grad_norm": 0.0, + "learning_rate": 1.409998526394919e-06, + "loss": 1.1816, + "step": 28406 + }, + { + "epoch": 0.8340771624875213, + "grad_norm": 0.0, + "learning_rate": 1.4095116969833156e-06, + "loss": 1.0547, + "step": 28407 + }, + { + "epoch": 0.8341065241646602, + "grad_norm": 0.0, + "learning_rate": 1.4090249452576998e-06, + "loss": 1.2046, + "step": 28408 + }, + { + "epoch": 0.8341358858417993, + "grad_norm": 0.0, + "learning_rate": 1.4085382712224727e-06, + "loss": 1.373, + "step": 28409 + }, + { + "epoch": 0.8341652475189383, + "grad_norm": 0.0, + "learning_rate": 1.4080516748820338e-06, + "loss": 1.2549, + "step": 28410 + }, + { + "epoch": 0.8341946091960772, + "grad_norm": 0.0, + "learning_rate": 1.4075651562407832e-06, + "loss": 1.167, + "step": 28411 + }, + { + "epoch": 0.8342239708732163, + "grad_norm": 0.0, + "learning_rate": 1.4070787153031207e-06, + "loss": 1.1992, + "step": 28412 + }, + { + "epoch": 0.8342533325503553, + "grad_norm": 0.0, + "learning_rate": 1.406592352073447e-06, + "loss": 1.1738, + "step": 28413 + }, + { + "epoch": 0.8342826942274942, + "grad_norm": 0.0, + "learning_rate": 1.4061060665561588e-06, + "loss": 1.2114, + "step": 28414 + }, + { + "epoch": 0.8343120559046333, + "grad_norm": 0.0, + "learning_rate": 1.4056198587556559e-06, + "loss": 1.2212, + "step": 28415 + }, + { + "epoch": 0.8343414175817723, + "grad_norm": 0.0, + "learning_rate": 1.40513372867633e-06, + "loss": 1.3027, + "step": 28416 + }, + { + "epoch": 0.8343707792589112, + "grad_norm": 0.0, + "learning_rate": 1.404647676322587e-06, + "loss": 1.2388, + "step": 28417 + }, + { + "epoch": 0.8344001409360503, + "grad_norm": 0.0, + "learning_rate": 1.404161701698814e-06, + "loss": 1.2378, + "step": 28418 + }, + { + "epoch": 0.8344295026131893, + "grad_norm": 0.0, + "learning_rate": 1.4036758048094057e-06, + "loss": 1.272, + "step": 28419 + }, + { + "epoch": 0.8344588642903282, + "grad_norm": 0.0, + "learning_rate": 1.4031899856587606e-06, + "loss": 1.2939, + "step": 28420 + }, + { + "epoch": 0.8344882259674673, + "grad_norm": 0.0, + "learning_rate": 1.4027042442512684e-06, + "loss": 1.3184, + "step": 28421 + }, + { + "epoch": 0.8345175876446063, + "grad_norm": 0.0, + "learning_rate": 1.4022185805913257e-06, + "loss": 1.1118, + "step": 28422 + }, + { + "epoch": 0.8345469493217452, + "grad_norm": 0.0, + "learning_rate": 1.4017329946833202e-06, + "loss": 1.1895, + "step": 28423 + }, + { + "epoch": 0.8345763109988843, + "grad_norm": 0.0, + "learning_rate": 1.4012474865316471e-06, + "loss": 1.1626, + "step": 28424 + }, + { + "epoch": 0.8346056726760233, + "grad_norm": 0.0, + "learning_rate": 1.4007620561406953e-06, + "loss": 1.207, + "step": 28425 + }, + { + "epoch": 0.8346350343531622, + "grad_norm": 0.0, + "learning_rate": 1.4002767035148546e-06, + "loss": 1.3604, + "step": 28426 + }, + { + "epoch": 0.8346643960303013, + "grad_norm": 0.0, + "learning_rate": 1.3997914286585113e-06, + "loss": 1.1772, + "step": 28427 + }, + { + "epoch": 0.8346937577074403, + "grad_norm": 0.0, + "learning_rate": 1.3993062315760596e-06, + "loss": 1.1875, + "step": 28428 + }, + { + "epoch": 0.8347231193845792, + "grad_norm": 0.0, + "learning_rate": 1.3988211122718842e-06, + "loss": 1.0762, + "step": 28429 + }, + { + "epoch": 0.8347524810617183, + "grad_norm": 0.0, + "learning_rate": 1.3983360707503691e-06, + "loss": 1.2441, + "step": 28430 + }, + { + "epoch": 0.8347818427388572, + "grad_norm": 0.0, + "learning_rate": 1.3978511070159073e-06, + "loss": 1.2783, + "step": 28431 + }, + { + "epoch": 0.8348112044159962, + "grad_norm": 0.0, + "learning_rate": 1.3973662210728778e-06, + "loss": 1.1768, + "step": 28432 + }, + { + "epoch": 0.8348405660931353, + "grad_norm": 0.0, + "learning_rate": 1.3968814129256713e-06, + "loss": 1.2344, + "step": 28433 + }, + { + "epoch": 0.8348699277702742, + "grad_norm": 0.0, + "learning_rate": 1.3963966825786702e-06, + "loss": 1.3027, + "step": 28434 + }, + { + "epoch": 0.8348992894474132, + "grad_norm": 0.0, + "learning_rate": 1.395912030036257e-06, + "loss": 1.167, + "step": 28435 + }, + { + "epoch": 0.8349286511245523, + "grad_norm": 0.0, + "learning_rate": 1.3954274553028123e-06, + "loss": 1.1284, + "step": 28436 + }, + { + "epoch": 0.8349580128016912, + "grad_norm": 0.0, + "learning_rate": 1.3949429583827233e-06, + "loss": 1.2266, + "step": 28437 + }, + { + "epoch": 0.8349873744788302, + "grad_norm": 0.0, + "learning_rate": 1.394458539280369e-06, + "loss": 1.2671, + "step": 28438 + }, + { + "epoch": 0.8350167361559693, + "grad_norm": 0.0, + "learning_rate": 1.3939741980001265e-06, + "loss": 1.248, + "step": 28439 + }, + { + "epoch": 0.8350460978331082, + "grad_norm": 0.0, + "learning_rate": 1.3934899345463826e-06, + "loss": 1.3076, + "step": 28440 + }, + { + "epoch": 0.8350754595102472, + "grad_norm": 0.0, + "learning_rate": 1.3930057489235117e-06, + "loss": 1.2095, + "step": 28441 + }, + { + "epoch": 0.8351048211873863, + "grad_norm": 0.0, + "learning_rate": 1.3925216411358978e-06, + "loss": 1.1587, + "step": 28442 + }, + { + "epoch": 0.8351341828645252, + "grad_norm": 0.0, + "learning_rate": 1.3920376111879108e-06, + "loss": 1.2559, + "step": 28443 + }, + { + "epoch": 0.8351635445416642, + "grad_norm": 0.0, + "learning_rate": 1.3915536590839351e-06, + "loss": 1.248, + "step": 28444 + }, + { + "epoch": 0.8351929062188033, + "grad_norm": 0.0, + "learning_rate": 1.3910697848283417e-06, + "loss": 1.2046, + "step": 28445 + }, + { + "epoch": 0.8352222678959422, + "grad_norm": 0.0, + "learning_rate": 1.3905859884255102e-06, + "loss": 1.2764, + "step": 28446 + }, + { + "epoch": 0.8352516295730812, + "grad_norm": 0.0, + "learning_rate": 1.3901022698798162e-06, + "loss": 1.3184, + "step": 28447 + }, + { + "epoch": 0.8352809912502203, + "grad_norm": 0.0, + "learning_rate": 1.3896186291956294e-06, + "loss": 1.2764, + "step": 28448 + }, + { + "epoch": 0.8353103529273592, + "grad_norm": 0.0, + "learning_rate": 1.3891350663773285e-06, + "loss": 1.376, + "step": 28449 + }, + { + "epoch": 0.8353397146044982, + "grad_norm": 0.0, + "learning_rate": 1.3886515814292845e-06, + "loss": 1.2891, + "step": 28450 + }, + { + "epoch": 0.8353690762816373, + "grad_norm": 0.0, + "learning_rate": 1.3881681743558695e-06, + "loss": 1.1875, + "step": 28451 + }, + { + "epoch": 0.8353984379587762, + "grad_norm": 0.0, + "learning_rate": 1.3876848451614532e-06, + "loss": 1.0874, + "step": 28452 + }, + { + "epoch": 0.8354277996359152, + "grad_norm": 0.0, + "learning_rate": 1.3872015938504113e-06, + "loss": 1.2446, + "step": 28453 + }, + { + "epoch": 0.8354571613130543, + "grad_norm": 0.0, + "learning_rate": 1.3867184204271078e-06, + "loss": 1.1592, + "step": 28454 + }, + { + "epoch": 0.8354865229901932, + "grad_norm": 0.0, + "learning_rate": 1.3862353248959182e-06, + "loss": 1.2529, + "step": 28455 + }, + { + "epoch": 0.8355158846673322, + "grad_norm": 0.0, + "learning_rate": 1.3857523072612068e-06, + "loss": 1.2852, + "step": 28456 + }, + { + "epoch": 0.8355452463444712, + "grad_norm": 0.0, + "learning_rate": 1.3852693675273454e-06, + "loss": 1.1699, + "step": 28457 + }, + { + "epoch": 0.8355746080216102, + "grad_norm": 0.0, + "learning_rate": 1.3847865056986998e-06, + "loss": 1.3398, + "step": 28458 + }, + { + "epoch": 0.8356039696987492, + "grad_norm": 0.0, + "learning_rate": 1.384303721779635e-06, + "loss": 1.1421, + "step": 28459 + }, + { + "epoch": 0.8356333313758882, + "grad_norm": 0.0, + "learning_rate": 1.3838210157745192e-06, + "loss": 1.188, + "step": 28460 + }, + { + "epoch": 0.8356626930530272, + "grad_norm": 0.0, + "learning_rate": 1.3833383876877139e-06, + "loss": 1.1401, + "step": 28461 + }, + { + "epoch": 0.8356920547301662, + "grad_norm": 0.0, + "learning_rate": 1.3828558375235889e-06, + "loss": 1.2549, + "step": 28462 + }, + { + "epoch": 0.8357214164073052, + "grad_norm": 0.0, + "learning_rate": 1.3823733652865035e-06, + "loss": 1.208, + "step": 28463 + }, + { + "epoch": 0.8357507780844442, + "grad_norm": 0.0, + "learning_rate": 1.3818909709808249e-06, + "loss": 1.3086, + "step": 28464 + }, + { + "epoch": 0.8357801397615832, + "grad_norm": 0.0, + "learning_rate": 1.3814086546109107e-06, + "loss": 1.2075, + "step": 28465 + }, + { + "epoch": 0.8358095014387221, + "grad_norm": 0.0, + "learning_rate": 1.3809264161811275e-06, + "loss": 1.2832, + "step": 28466 + }, + { + "epoch": 0.8358388631158612, + "grad_norm": 0.0, + "learning_rate": 1.3804442556958364e-06, + "loss": 1.3037, + "step": 28467 + }, + { + "epoch": 0.8358682247930002, + "grad_norm": 0.0, + "learning_rate": 1.3799621731593904e-06, + "loss": 1.2744, + "step": 28468 + }, + { + "epoch": 0.8358975864701391, + "grad_norm": 0.0, + "learning_rate": 1.3794801685761573e-06, + "loss": 1.127, + "step": 28469 + }, + { + "epoch": 0.8359269481472782, + "grad_norm": 0.0, + "learning_rate": 1.3789982419504899e-06, + "loss": 1.1167, + "step": 28470 + }, + { + "epoch": 0.8359563098244172, + "grad_norm": 0.0, + "learning_rate": 1.3785163932867519e-06, + "loss": 1.1016, + "step": 28471 + }, + { + "epoch": 0.8359856715015561, + "grad_norm": 0.0, + "learning_rate": 1.378034622589296e-06, + "loss": 1.2368, + "step": 28472 + }, + { + "epoch": 0.8360150331786952, + "grad_norm": 0.0, + "learning_rate": 1.3775529298624824e-06, + "loss": 1.2383, + "step": 28473 + }, + { + "epoch": 0.8360443948558341, + "grad_norm": 0.0, + "learning_rate": 1.3770713151106673e-06, + "loss": 1.0859, + "step": 28474 + }, + { + "epoch": 0.8360737565329731, + "grad_norm": 0.0, + "learning_rate": 1.3765897783382042e-06, + "loss": 1.2705, + "step": 28475 + }, + { + "epoch": 0.8361031182101122, + "grad_norm": 0.0, + "learning_rate": 1.3761083195494451e-06, + "loss": 1.2549, + "step": 28476 + }, + { + "epoch": 0.8361324798872511, + "grad_norm": 0.0, + "learning_rate": 1.3756269387487497e-06, + "loss": 1.0796, + "step": 28477 + }, + { + "epoch": 0.8361618415643901, + "grad_norm": 0.0, + "learning_rate": 1.3751456359404692e-06, + "loss": 1.0933, + "step": 28478 + }, + { + "epoch": 0.8361912032415292, + "grad_norm": 0.0, + "learning_rate": 1.374664411128953e-06, + "loss": 1.2031, + "step": 28479 + }, + { + "epoch": 0.8362205649186681, + "grad_norm": 0.0, + "learning_rate": 1.3741832643185583e-06, + "loss": 1.1953, + "step": 28480 + }, + { + "epoch": 0.8362499265958071, + "grad_norm": 0.0, + "learning_rate": 1.373702195513631e-06, + "loss": 1.2144, + "step": 28481 + }, + { + "epoch": 0.8362792882729462, + "grad_norm": 0.0, + "learning_rate": 1.3732212047185256e-06, + "loss": 1.2393, + "step": 28482 + }, + { + "epoch": 0.8363086499500851, + "grad_norm": 0.0, + "learning_rate": 1.3727402919375898e-06, + "loss": 1.3477, + "step": 28483 + }, + { + "epoch": 0.8363380116272241, + "grad_norm": 0.0, + "learning_rate": 1.3722594571751746e-06, + "loss": 1.1963, + "step": 28484 + }, + { + "epoch": 0.8363673733043632, + "grad_norm": 0.0, + "learning_rate": 1.371778700435623e-06, + "loss": 1.3047, + "step": 28485 + }, + { + "epoch": 0.8363967349815021, + "grad_norm": 0.0, + "learning_rate": 1.3712980217232897e-06, + "loss": 1.2256, + "step": 28486 + }, + { + "epoch": 0.8364260966586411, + "grad_norm": 0.0, + "learning_rate": 1.3708174210425173e-06, + "loss": 1.1543, + "step": 28487 + }, + { + "epoch": 0.8364554583357802, + "grad_norm": 0.0, + "learning_rate": 1.3703368983976517e-06, + "loss": 1.2266, + "step": 28488 + }, + { + "epoch": 0.8364848200129191, + "grad_norm": 0.0, + "learning_rate": 1.3698564537930415e-06, + "loss": 1.2031, + "step": 28489 + }, + { + "epoch": 0.8365141816900581, + "grad_norm": 0.0, + "learning_rate": 1.3693760872330263e-06, + "loss": 1.208, + "step": 28490 + }, + { + "epoch": 0.8365435433671972, + "grad_norm": 0.0, + "learning_rate": 1.3688957987219564e-06, + "loss": 1.2783, + "step": 28491 + }, + { + "epoch": 0.8365729050443361, + "grad_norm": 0.0, + "learning_rate": 1.3684155882641725e-06, + "loss": 1.252, + "step": 28492 + }, + { + "epoch": 0.8366022667214751, + "grad_norm": 0.0, + "learning_rate": 1.3679354558640168e-06, + "loss": 1.3438, + "step": 28493 + }, + { + "epoch": 0.8366316283986142, + "grad_norm": 0.0, + "learning_rate": 1.3674554015258301e-06, + "loss": 1.0845, + "step": 28494 + }, + { + "epoch": 0.8366609900757531, + "grad_norm": 0.0, + "learning_rate": 1.3669754252539558e-06, + "loss": 1.2754, + "step": 28495 + }, + { + "epoch": 0.8366903517528921, + "grad_norm": 0.0, + "learning_rate": 1.3664955270527324e-06, + "loss": 1.1968, + "step": 28496 + }, + { + "epoch": 0.8367197134300312, + "grad_norm": 0.0, + "learning_rate": 1.3660157069265034e-06, + "loss": 1.1934, + "step": 28497 + }, + { + "epoch": 0.8367490751071701, + "grad_norm": 0.0, + "learning_rate": 1.365535964879605e-06, + "loss": 1.2578, + "step": 28498 + }, + { + "epoch": 0.8367784367843091, + "grad_norm": 0.0, + "learning_rate": 1.3650563009163765e-06, + "loss": 1.1553, + "step": 28499 + }, + { + "epoch": 0.8368077984614481, + "grad_norm": 0.0, + "learning_rate": 1.364576715041155e-06, + "loss": 1.1836, + "step": 28500 + }, + { + "epoch": 0.8368371601385871, + "grad_norm": 0.0, + "learning_rate": 1.364097207258276e-06, + "loss": 1.1626, + "step": 28501 + }, + { + "epoch": 0.8368665218157261, + "grad_norm": 0.0, + "learning_rate": 1.3636177775720794e-06, + "loss": 1.1646, + "step": 28502 + }, + { + "epoch": 0.8368958834928651, + "grad_norm": 0.0, + "learning_rate": 1.3631384259868974e-06, + "loss": 1.2715, + "step": 28503 + }, + { + "epoch": 0.8369252451700041, + "grad_norm": 0.0, + "learning_rate": 1.3626591525070688e-06, + "loss": 1.1582, + "step": 28504 + }, + { + "epoch": 0.8369546068471431, + "grad_norm": 0.0, + "learning_rate": 1.3621799571369221e-06, + "loss": 1.1426, + "step": 28505 + }, + { + "epoch": 0.8369839685242821, + "grad_norm": 0.0, + "learning_rate": 1.3617008398807974e-06, + "loss": 1.4209, + "step": 28506 + }, + { + "epoch": 0.8370133302014211, + "grad_norm": 0.0, + "learning_rate": 1.3612218007430244e-06, + "loss": 1.1504, + "step": 28507 + }, + { + "epoch": 0.8370426918785601, + "grad_norm": 0.0, + "learning_rate": 1.3607428397279355e-06, + "loss": 1.1899, + "step": 28508 + }, + { + "epoch": 0.8370720535556991, + "grad_norm": 0.0, + "learning_rate": 1.3602639568398612e-06, + "loss": 1.3423, + "step": 28509 + }, + { + "epoch": 0.8371014152328381, + "grad_norm": 0.0, + "learning_rate": 1.3597851520831295e-06, + "loss": 1.3525, + "step": 28510 + }, + { + "epoch": 0.8371307769099771, + "grad_norm": 0.0, + "learning_rate": 1.359306425462077e-06, + "loss": 1.1074, + "step": 28511 + }, + { + "epoch": 0.8371601385871161, + "grad_norm": 0.0, + "learning_rate": 1.3588277769810265e-06, + "loss": 1.2012, + "step": 28512 + }, + { + "epoch": 0.8371895002642551, + "grad_norm": 0.0, + "learning_rate": 1.3583492066443117e-06, + "loss": 1.1758, + "step": 28513 + }, + { + "epoch": 0.837218861941394, + "grad_norm": 0.0, + "learning_rate": 1.3578707144562564e-06, + "loss": 1.3027, + "step": 28514 + }, + { + "epoch": 0.8372482236185331, + "grad_norm": 0.0, + "learning_rate": 1.3573923004211909e-06, + "loss": 1.1978, + "step": 28515 + }, + { + "epoch": 0.8372775852956721, + "grad_norm": 0.0, + "learning_rate": 1.3569139645434404e-06, + "loss": 1.0776, + "step": 28516 + }, + { + "epoch": 0.837306946972811, + "grad_norm": 0.0, + "learning_rate": 1.3564357068273316e-06, + "loss": 1.2207, + "step": 28517 + }, + { + "epoch": 0.8373363086499501, + "grad_norm": 0.0, + "learning_rate": 1.3559575272771875e-06, + "loss": 1.1787, + "step": 28518 + }, + { + "epoch": 0.8373656703270891, + "grad_norm": 0.0, + "learning_rate": 1.3554794258973302e-06, + "loss": 1.3164, + "step": 28519 + }, + { + "epoch": 0.837395032004228, + "grad_norm": 0.0, + "learning_rate": 1.3550014026920898e-06, + "loss": 1.2168, + "step": 28520 + }, + { + "epoch": 0.8374243936813671, + "grad_norm": 0.0, + "learning_rate": 1.354523457665783e-06, + "loss": 1.1714, + "step": 28521 + }, + { + "epoch": 0.8374537553585061, + "grad_norm": 0.0, + "learning_rate": 1.3540455908227367e-06, + "loss": 1.2285, + "step": 28522 + }, + { + "epoch": 0.837483117035645, + "grad_norm": 0.0, + "learning_rate": 1.3535678021672716e-06, + "loss": 1.1851, + "step": 28523 + }, + { + "epoch": 0.8375124787127841, + "grad_norm": 0.0, + "learning_rate": 1.353090091703706e-06, + "loss": 1.3193, + "step": 28524 + }, + { + "epoch": 0.8375418403899231, + "grad_norm": 0.0, + "learning_rate": 1.3526124594363598e-06, + "loss": 1.1782, + "step": 28525 + }, + { + "epoch": 0.837571202067062, + "grad_norm": 0.0, + "learning_rate": 1.3521349053695554e-06, + "loss": 1.2598, + "step": 28526 + }, + { + "epoch": 0.8376005637442011, + "grad_norm": 0.0, + "learning_rate": 1.35165742950761e-06, + "loss": 1.1489, + "step": 28527 + }, + { + "epoch": 0.8376299254213401, + "grad_norm": 0.0, + "learning_rate": 1.3511800318548385e-06, + "loss": 1.0171, + "step": 28528 + }, + { + "epoch": 0.837659287098479, + "grad_norm": 0.0, + "learning_rate": 1.3507027124155647e-06, + "loss": 1.1685, + "step": 28529 + }, + { + "epoch": 0.8376886487756181, + "grad_norm": 0.0, + "learning_rate": 1.3502254711940976e-06, + "loss": 1.2334, + "step": 28530 + }, + { + "epoch": 0.8377180104527571, + "grad_norm": 0.0, + "learning_rate": 1.3497483081947594e-06, + "loss": 1.2441, + "step": 28531 + }, + { + "epoch": 0.837747372129896, + "grad_norm": 0.0, + "learning_rate": 1.349271223421863e-06, + "loss": 1.2368, + "step": 28532 + }, + { + "epoch": 0.8377767338070351, + "grad_norm": 0.0, + "learning_rate": 1.348794216879723e-06, + "loss": 1.1709, + "step": 28533 + }, + { + "epoch": 0.8378060954841741, + "grad_norm": 0.0, + "learning_rate": 1.3483172885726492e-06, + "loss": 1.2598, + "step": 28534 + }, + { + "epoch": 0.837835457161313, + "grad_norm": 0.0, + "learning_rate": 1.3478404385049604e-06, + "loss": 1.1748, + "step": 28535 + }, + { + "epoch": 0.8378648188384521, + "grad_norm": 0.0, + "learning_rate": 1.3473636666809663e-06, + "loss": 1.2373, + "step": 28536 + }, + { + "epoch": 0.8378941805155911, + "grad_norm": 0.0, + "learning_rate": 1.3468869731049773e-06, + "loss": 1.2861, + "step": 28537 + }, + { + "epoch": 0.83792354219273, + "grad_norm": 0.0, + "learning_rate": 1.3464103577813059e-06, + "loss": 1.2559, + "step": 28538 + }, + { + "epoch": 0.8379529038698691, + "grad_norm": 0.0, + "learning_rate": 1.3459338207142613e-06, + "loss": 1.1714, + "step": 28539 + }, + { + "epoch": 0.837982265547008, + "grad_norm": 0.0, + "learning_rate": 1.3454573619081545e-06, + "loss": 1.208, + "step": 28540 + }, + { + "epoch": 0.838011627224147, + "grad_norm": 0.0, + "learning_rate": 1.3449809813672942e-06, + "loss": 1.1099, + "step": 28541 + }, + { + "epoch": 0.8380409889012861, + "grad_norm": 0.0, + "learning_rate": 1.344504679095987e-06, + "loss": 1.2227, + "step": 28542 + }, + { + "epoch": 0.838070350578425, + "grad_norm": 0.0, + "learning_rate": 1.3440284550985383e-06, + "loss": 1.1069, + "step": 28543 + }, + { + "epoch": 0.838099712255564, + "grad_norm": 0.0, + "learning_rate": 1.343552309379259e-06, + "loss": 1.0586, + "step": 28544 + }, + { + "epoch": 0.8381290739327031, + "grad_norm": 0.0, + "learning_rate": 1.3430762419424525e-06, + "loss": 1.0566, + "step": 28545 + }, + { + "epoch": 0.838158435609842, + "grad_norm": 0.0, + "learning_rate": 1.3426002527924253e-06, + "loss": 1.291, + "step": 28546 + }, + { + "epoch": 0.838187797286981, + "grad_norm": 0.0, + "learning_rate": 1.3421243419334817e-06, + "loss": 1.2334, + "step": 28547 + }, + { + "epoch": 0.8382171589641201, + "grad_norm": 0.0, + "learning_rate": 1.341648509369925e-06, + "loss": 1.3086, + "step": 28548 + }, + { + "epoch": 0.838246520641259, + "grad_norm": 0.0, + "learning_rate": 1.3411727551060582e-06, + "loss": 1.2373, + "step": 28549 + }, + { + "epoch": 0.838275882318398, + "grad_norm": 0.0, + "learning_rate": 1.3406970791461816e-06, + "loss": 1.1616, + "step": 28550 + }, + { + "epoch": 0.8383052439955371, + "grad_norm": 0.0, + "learning_rate": 1.3402214814946014e-06, + "loss": 1.146, + "step": 28551 + }, + { + "epoch": 0.838334605672676, + "grad_norm": 0.0, + "learning_rate": 1.339745962155613e-06, + "loss": 1.2158, + "step": 28552 + }, + { + "epoch": 0.838363967349815, + "grad_norm": 0.0, + "learning_rate": 1.3392705211335232e-06, + "loss": 1.2432, + "step": 28553 + }, + { + "epoch": 0.8383933290269541, + "grad_norm": 0.0, + "learning_rate": 1.3387951584326253e-06, + "loss": 1.2734, + "step": 28554 + }, + { + "epoch": 0.838422690704093, + "grad_norm": 0.0, + "learning_rate": 1.3383198740572235e-06, + "loss": 1.1934, + "step": 28555 + }, + { + "epoch": 0.838452052381232, + "grad_norm": 0.0, + "learning_rate": 1.337844668011613e-06, + "loss": 1.3135, + "step": 28556 + }, + { + "epoch": 0.8384814140583711, + "grad_norm": 0.0, + "learning_rate": 1.3373695403000919e-06, + "loss": 1.1973, + "step": 28557 + }, + { + "epoch": 0.83851077573551, + "grad_norm": 0.0, + "learning_rate": 1.3368944909269566e-06, + "loss": 1.1934, + "step": 28558 + }, + { + "epoch": 0.838540137412649, + "grad_norm": 0.0, + "learning_rate": 1.3364195198965003e-06, + "loss": 1.1953, + "step": 28559 + }, + { + "epoch": 0.8385694990897881, + "grad_norm": 0.0, + "learning_rate": 1.335944627213024e-06, + "loss": 1.2725, + "step": 28560 + }, + { + "epoch": 0.838598860766927, + "grad_norm": 0.0, + "learning_rate": 1.3354698128808164e-06, + "loss": 1.248, + "step": 28561 + }, + { + "epoch": 0.838628222444066, + "grad_norm": 0.0, + "learning_rate": 1.3349950769041764e-06, + "loss": 1.2671, + "step": 28562 + }, + { + "epoch": 0.8386575841212051, + "grad_norm": 0.0, + "learning_rate": 1.334520419287394e-06, + "loss": 1.2529, + "step": 28563 + }, + { + "epoch": 0.838686945798344, + "grad_norm": 0.0, + "learning_rate": 1.3340458400347645e-06, + "loss": 1.1348, + "step": 28564 + }, + { + "epoch": 0.838716307475483, + "grad_norm": 0.0, + "learning_rate": 1.3335713391505768e-06, + "loss": 1.2788, + "step": 28565 + }, + { + "epoch": 0.838745669152622, + "grad_norm": 0.0, + "learning_rate": 1.3330969166391239e-06, + "loss": 1.1934, + "step": 28566 + }, + { + "epoch": 0.838775030829761, + "grad_norm": 0.0, + "learning_rate": 1.332622572504695e-06, + "loss": 1.1875, + "step": 28567 + }, + { + "epoch": 0.8388043925069, + "grad_norm": 0.0, + "learning_rate": 1.3321483067515783e-06, + "loss": 1.2881, + "step": 28568 + }, + { + "epoch": 0.8388337541840389, + "grad_norm": 0.0, + "learning_rate": 1.3316741193840654e-06, + "loss": 1.3623, + "step": 28569 + }, + { + "epoch": 0.838863115861178, + "grad_norm": 0.0, + "learning_rate": 1.3312000104064427e-06, + "loss": 1.2085, + "step": 28570 + }, + { + "epoch": 0.838892477538317, + "grad_norm": 0.0, + "learning_rate": 1.3307259798229987e-06, + "loss": 1.041, + "step": 28571 + }, + { + "epoch": 0.8389218392154559, + "grad_norm": 0.0, + "learning_rate": 1.3302520276380216e-06, + "loss": 1.1436, + "step": 28572 + }, + { + "epoch": 0.838951200892595, + "grad_norm": 0.0, + "learning_rate": 1.3297781538557953e-06, + "loss": 1.2988, + "step": 28573 + }, + { + "epoch": 0.838980562569734, + "grad_norm": 0.0, + "learning_rate": 1.3293043584806032e-06, + "loss": 1.1812, + "step": 28574 + }, + { + "epoch": 0.8390099242468729, + "grad_norm": 0.0, + "learning_rate": 1.3288306415167352e-06, + "loss": 1.2393, + "step": 28575 + }, + { + "epoch": 0.839039285924012, + "grad_norm": 0.0, + "learning_rate": 1.3283570029684712e-06, + "loss": 1.2251, + "step": 28576 + }, + { + "epoch": 0.839068647601151, + "grad_norm": 0.0, + "learning_rate": 1.3278834428400944e-06, + "loss": 1.208, + "step": 28577 + }, + { + "epoch": 0.8390980092782899, + "grad_norm": 0.0, + "learning_rate": 1.3274099611358904e-06, + "loss": 1.1748, + "step": 28578 + }, + { + "epoch": 0.839127370955429, + "grad_norm": 0.0, + "learning_rate": 1.3269365578601367e-06, + "loss": 1.2764, + "step": 28579 + }, + { + "epoch": 0.839156732632568, + "grad_norm": 0.0, + "learning_rate": 1.32646323301712e-06, + "loss": 1.2422, + "step": 28580 + }, + { + "epoch": 0.8391860943097069, + "grad_norm": 0.0, + "learning_rate": 1.325989986611117e-06, + "loss": 1.2246, + "step": 28581 + }, + { + "epoch": 0.839215455986846, + "grad_norm": 0.0, + "learning_rate": 1.3255168186464084e-06, + "loss": 1.2344, + "step": 28582 + }, + { + "epoch": 0.839244817663985, + "grad_norm": 0.0, + "learning_rate": 1.32504372912727e-06, + "loss": 1.2578, + "step": 28583 + }, + { + "epoch": 0.8392741793411239, + "grad_norm": 0.0, + "learning_rate": 1.3245707180579858e-06, + "loss": 1.2051, + "step": 28584 + }, + { + "epoch": 0.839303541018263, + "grad_norm": 0.0, + "learning_rate": 1.3240977854428306e-06, + "loss": 1.2578, + "step": 28585 + }, + { + "epoch": 0.839332902695402, + "grad_norm": 0.0, + "learning_rate": 1.3236249312860772e-06, + "loss": 1.2793, + "step": 28586 + }, + { + "epoch": 0.8393622643725409, + "grad_norm": 0.0, + "learning_rate": 1.323152155592009e-06, + "loss": 1.332, + "step": 28587 + }, + { + "epoch": 0.83939162604968, + "grad_norm": 0.0, + "learning_rate": 1.3226794583648962e-06, + "loss": 1.2949, + "step": 28588 + }, + { + "epoch": 0.839420987726819, + "grad_norm": 0.0, + "learning_rate": 1.322206839609017e-06, + "loss": 1.1709, + "step": 28589 + }, + { + "epoch": 0.8394503494039579, + "grad_norm": 0.0, + "learning_rate": 1.3217342993286441e-06, + "loss": 1.1592, + "step": 28590 + }, + { + "epoch": 0.839479711081097, + "grad_norm": 0.0, + "learning_rate": 1.3212618375280496e-06, + "loss": 1.0811, + "step": 28591 + }, + { + "epoch": 0.8395090727582359, + "grad_norm": 0.0, + "learning_rate": 1.3207894542115052e-06, + "loss": 1.1543, + "step": 28592 + }, + { + "epoch": 0.8395384344353749, + "grad_norm": 0.0, + "learning_rate": 1.3203171493832878e-06, + "loss": 1.2568, + "step": 28593 + }, + { + "epoch": 0.839567796112514, + "grad_norm": 0.0, + "learning_rate": 1.3198449230476618e-06, + "loss": 1.186, + "step": 28594 + }, + { + "epoch": 0.8395971577896529, + "grad_norm": 0.0, + "learning_rate": 1.3193727752089036e-06, + "loss": 1.2402, + "step": 28595 + }, + { + "epoch": 0.8396265194667919, + "grad_norm": 0.0, + "learning_rate": 1.318900705871281e-06, + "loss": 1.0869, + "step": 28596 + }, + { + "epoch": 0.839655881143931, + "grad_norm": 0.0, + "learning_rate": 1.3184287150390618e-06, + "loss": 1.1304, + "step": 28597 + }, + { + "epoch": 0.8396852428210699, + "grad_norm": 0.0, + "learning_rate": 1.3179568027165157e-06, + "loss": 1.207, + "step": 28598 + }, + { + "epoch": 0.8397146044982089, + "grad_norm": 0.0, + "learning_rate": 1.3174849689079073e-06, + "loss": 1.2642, + "step": 28599 + }, + { + "epoch": 0.839743966175348, + "grad_norm": 0.0, + "learning_rate": 1.3170132136175084e-06, + "loss": 1.209, + "step": 28600 + }, + { + "epoch": 0.8397733278524869, + "grad_norm": 0.0, + "learning_rate": 1.3165415368495803e-06, + "loss": 1.2388, + "step": 28601 + }, + { + "epoch": 0.8398026895296259, + "grad_norm": 0.0, + "learning_rate": 1.3160699386083941e-06, + "loss": 1.0889, + "step": 28602 + }, + { + "epoch": 0.839832051206765, + "grad_norm": 0.0, + "learning_rate": 1.3155984188982085e-06, + "loss": 1.2285, + "step": 28603 + }, + { + "epoch": 0.8398614128839039, + "grad_norm": 0.0, + "learning_rate": 1.3151269777232923e-06, + "loss": 1.2368, + "step": 28604 + }, + { + "epoch": 0.8398907745610429, + "grad_norm": 0.0, + "learning_rate": 1.3146556150879075e-06, + "loss": 1.2178, + "step": 28605 + }, + { + "epoch": 0.839920136238182, + "grad_norm": 0.0, + "learning_rate": 1.3141843309963165e-06, + "loss": 1.1753, + "step": 28606 + }, + { + "epoch": 0.8399494979153209, + "grad_norm": 0.0, + "learning_rate": 1.3137131254527803e-06, + "loss": 1.0264, + "step": 28607 + }, + { + "epoch": 0.8399788595924599, + "grad_norm": 0.0, + "learning_rate": 1.3132419984615595e-06, + "loss": 1.2588, + "step": 28608 + }, + { + "epoch": 0.840008221269599, + "grad_norm": 0.0, + "learning_rate": 1.312770950026918e-06, + "loss": 1.1265, + "step": 28609 + }, + { + "epoch": 0.8400375829467379, + "grad_norm": 0.0, + "learning_rate": 1.3122999801531112e-06, + "loss": 1.2031, + "step": 28610 + }, + { + "epoch": 0.8400669446238769, + "grad_norm": 0.0, + "learning_rate": 1.311829088844403e-06, + "loss": 0.9854, + "step": 28611 + }, + { + "epoch": 0.840096306301016, + "grad_norm": 0.0, + "learning_rate": 1.311358276105047e-06, + "loss": 1.2324, + "step": 28612 + }, + { + "epoch": 0.8401256679781549, + "grad_norm": 0.0, + "learning_rate": 1.3108875419393064e-06, + "loss": 1.1655, + "step": 28613 + }, + { + "epoch": 0.8401550296552939, + "grad_norm": 0.0, + "learning_rate": 1.3104168863514344e-06, + "loss": 1.3115, + "step": 28614 + }, + { + "epoch": 0.840184391332433, + "grad_norm": 0.0, + "learning_rate": 1.3099463093456876e-06, + "loss": 1.1743, + "step": 28615 + }, + { + "epoch": 0.8402137530095719, + "grad_norm": 0.0, + "learning_rate": 1.3094758109263216e-06, + "loss": 1.1519, + "step": 28616 + }, + { + "epoch": 0.8402431146867109, + "grad_norm": 0.0, + "learning_rate": 1.3090053910975907e-06, + "loss": 1.2217, + "step": 28617 + }, + { + "epoch": 0.8402724763638499, + "grad_norm": 0.0, + "learning_rate": 1.3085350498637507e-06, + "loss": 1.2979, + "step": 28618 + }, + { + "epoch": 0.8403018380409889, + "grad_norm": 0.0, + "learning_rate": 1.308064787229053e-06, + "loss": 1.3218, + "step": 28619 + }, + { + "epoch": 0.8403311997181279, + "grad_norm": 0.0, + "learning_rate": 1.3075946031977528e-06, + "loss": 1.0835, + "step": 28620 + }, + { + "epoch": 0.8403605613952669, + "grad_norm": 0.0, + "learning_rate": 1.3071244977740994e-06, + "loss": 1.2471, + "step": 28621 + }, + { + "epoch": 0.8403899230724059, + "grad_norm": 0.0, + "learning_rate": 1.3066544709623496e-06, + "loss": 1.2168, + "step": 28622 + }, + { + "epoch": 0.8404192847495449, + "grad_norm": 0.0, + "learning_rate": 1.3061845227667447e-06, + "loss": 1.0518, + "step": 28623 + }, + { + "epoch": 0.8404486464266839, + "grad_norm": 0.0, + "learning_rate": 1.305714653191542e-06, + "loss": 1.165, + "step": 28624 + }, + { + "epoch": 0.8404780081038229, + "grad_norm": 0.0, + "learning_rate": 1.3052448622409897e-06, + "loss": 1.29, + "step": 28625 + }, + { + "epoch": 0.8405073697809619, + "grad_norm": 0.0, + "learning_rate": 1.3047751499193318e-06, + "loss": 1.2954, + "step": 28626 + }, + { + "epoch": 0.8405367314581009, + "grad_norm": 0.0, + "learning_rate": 1.3043055162308206e-06, + "loss": 1.2031, + "step": 28627 + }, + { + "epoch": 0.8405660931352399, + "grad_norm": 0.0, + "learning_rate": 1.3038359611797002e-06, + "loss": 1.3164, + "step": 28628 + }, + { + "epoch": 0.8405954548123789, + "grad_norm": 0.0, + "learning_rate": 1.3033664847702209e-06, + "loss": 1.1611, + "step": 28629 + }, + { + "epoch": 0.8406248164895179, + "grad_norm": 0.0, + "learning_rate": 1.3028970870066248e-06, + "loss": 1.1846, + "step": 28630 + }, + { + "epoch": 0.8406541781666569, + "grad_norm": 0.0, + "learning_rate": 1.3024277678931586e-06, + "loss": 1.1309, + "step": 28631 + }, + { + "epoch": 0.8406835398437958, + "grad_norm": 0.0, + "learning_rate": 1.3019585274340618e-06, + "loss": 1.1904, + "step": 28632 + }, + { + "epoch": 0.8407129015209349, + "grad_norm": 0.0, + "learning_rate": 1.301489365633586e-06, + "loss": 1.1904, + "step": 28633 + }, + { + "epoch": 0.8407422631980739, + "grad_norm": 0.0, + "learning_rate": 1.3010202824959662e-06, + "loss": 1.1875, + "step": 28634 + }, + { + "epoch": 0.8407716248752128, + "grad_norm": 0.0, + "learning_rate": 1.3005512780254493e-06, + "loss": 1.1494, + "step": 28635 + }, + { + "epoch": 0.8408009865523519, + "grad_norm": 0.0, + "learning_rate": 1.3000823522262762e-06, + "loss": 1.2646, + "step": 28636 + }, + { + "epoch": 0.8408303482294909, + "grad_norm": 0.0, + "learning_rate": 1.2996135051026837e-06, + "loss": 1.1328, + "step": 28637 + }, + { + "epoch": 0.8408597099066298, + "grad_norm": 0.0, + "learning_rate": 1.2991447366589171e-06, + "loss": 1.0171, + "step": 28638 + }, + { + "epoch": 0.8408890715837689, + "grad_norm": 0.0, + "learning_rate": 1.2986760468992132e-06, + "loss": 1.1934, + "step": 28639 + }, + { + "epoch": 0.8409184332609079, + "grad_norm": 0.0, + "learning_rate": 1.2982074358278096e-06, + "loss": 1.3164, + "step": 28640 + }, + { + "epoch": 0.8409477949380468, + "grad_norm": 0.0, + "learning_rate": 1.297738903448943e-06, + "loss": 1.3213, + "step": 28641 + }, + { + "epoch": 0.8409771566151859, + "grad_norm": 0.0, + "learning_rate": 1.2972704497668542e-06, + "loss": 1.1152, + "step": 28642 + }, + { + "epoch": 0.8410065182923249, + "grad_norm": 0.0, + "learning_rate": 1.2968020747857757e-06, + "loss": 1.1465, + "step": 28643 + }, + { + "epoch": 0.8410358799694638, + "grad_norm": 0.0, + "learning_rate": 1.2963337785099461e-06, + "loss": 1.0342, + "step": 28644 + }, + { + "epoch": 0.8410652416466029, + "grad_norm": 0.0, + "learning_rate": 1.2958655609436e-06, + "loss": 1.2173, + "step": 28645 + }, + { + "epoch": 0.8410946033237419, + "grad_norm": 0.0, + "learning_rate": 1.2953974220909693e-06, + "loss": 1.2095, + "step": 28646 + }, + { + "epoch": 0.8411239650008808, + "grad_norm": 0.0, + "learning_rate": 1.294929361956292e-06, + "loss": 1.0713, + "step": 28647 + }, + { + "epoch": 0.8411533266780199, + "grad_norm": 0.0, + "learning_rate": 1.2944613805437945e-06, + "loss": 1.1099, + "step": 28648 + }, + { + "epoch": 0.8411826883551589, + "grad_norm": 0.0, + "learning_rate": 1.2939934778577134e-06, + "loss": 1.0513, + "step": 28649 + }, + { + "epoch": 0.8412120500322978, + "grad_norm": 0.0, + "learning_rate": 1.2935256539022779e-06, + "loss": 1.1772, + "step": 28650 + }, + { + "epoch": 0.8412414117094369, + "grad_norm": 0.0, + "learning_rate": 1.2930579086817209e-06, + "loss": 1.2012, + "step": 28651 + }, + { + "epoch": 0.8412707733865759, + "grad_norm": 0.0, + "learning_rate": 1.29259024220027e-06, + "loss": 1.2607, + "step": 28652 + }, + { + "epoch": 0.8413001350637148, + "grad_norm": 0.0, + "learning_rate": 1.2921226544621567e-06, + "loss": 1.2832, + "step": 28653 + }, + { + "epoch": 0.8413294967408539, + "grad_norm": 0.0, + "learning_rate": 1.2916551454716085e-06, + "loss": 1.2109, + "step": 28654 + }, + { + "epoch": 0.8413588584179928, + "grad_norm": 0.0, + "learning_rate": 1.2911877152328534e-06, + "loss": 1.1133, + "step": 28655 + }, + { + "epoch": 0.8413882200951318, + "grad_norm": 0.0, + "learning_rate": 1.2907203637501187e-06, + "loss": 1.1841, + "step": 28656 + }, + { + "epoch": 0.8414175817722709, + "grad_norm": 0.0, + "learning_rate": 1.2902530910276268e-06, + "loss": 1.3105, + "step": 28657 + }, + { + "epoch": 0.8414469434494098, + "grad_norm": 0.0, + "learning_rate": 1.2897858970696097e-06, + "loss": 1.1885, + "step": 28658 + }, + { + "epoch": 0.8414763051265488, + "grad_norm": 0.0, + "learning_rate": 1.2893187818802865e-06, + "loss": 1.1655, + "step": 28659 + }, + { + "epoch": 0.8415056668036879, + "grad_norm": 0.0, + "learning_rate": 1.2888517454638882e-06, + "loss": 1.1797, + "step": 28660 + }, + { + "epoch": 0.8415350284808268, + "grad_norm": 0.0, + "learning_rate": 1.2883847878246315e-06, + "loss": 1.1484, + "step": 28661 + }, + { + "epoch": 0.8415643901579658, + "grad_norm": 0.0, + "learning_rate": 1.2879179089667438e-06, + "loss": 1.1313, + "step": 28662 + }, + { + "epoch": 0.8415937518351049, + "grad_norm": 0.0, + "learning_rate": 1.2874511088944463e-06, + "loss": 1.1328, + "step": 28663 + }, + { + "epoch": 0.8416231135122438, + "grad_norm": 0.0, + "learning_rate": 1.2869843876119592e-06, + "loss": 1.1924, + "step": 28664 + }, + { + "epoch": 0.8416524751893828, + "grad_norm": 0.0, + "learning_rate": 1.2865177451235044e-06, + "loss": 1.2656, + "step": 28665 + }, + { + "epoch": 0.8416818368665219, + "grad_norm": 0.0, + "learning_rate": 1.2860511814332988e-06, + "loss": 1.2363, + "step": 28666 + }, + { + "epoch": 0.8417111985436608, + "grad_norm": 0.0, + "learning_rate": 1.2855846965455654e-06, + "loss": 1.1802, + "step": 28667 + }, + { + "epoch": 0.8417405602207998, + "grad_norm": 0.0, + "learning_rate": 1.28511829046452e-06, + "loss": 1.3369, + "step": 28668 + }, + { + "epoch": 0.8417699218979388, + "grad_norm": 0.0, + "learning_rate": 1.2846519631943844e-06, + "loss": 1.2031, + "step": 28669 + }, + { + "epoch": 0.8417992835750778, + "grad_norm": 0.0, + "learning_rate": 1.28418571473937e-06, + "loss": 1.2163, + "step": 28670 + }, + { + "epoch": 0.8418286452522168, + "grad_norm": 0.0, + "learning_rate": 1.2837195451036988e-06, + "loss": 1.1768, + "step": 28671 + }, + { + "epoch": 0.8418580069293558, + "grad_norm": 0.0, + "learning_rate": 1.2832534542915843e-06, + "loss": 1.3091, + "step": 28672 + }, + { + "epoch": 0.8418873686064948, + "grad_norm": 0.0, + "learning_rate": 1.2827874423072417e-06, + "loss": 1.1895, + "step": 28673 + }, + { + "epoch": 0.8419167302836338, + "grad_norm": 0.0, + "learning_rate": 1.2823215091548836e-06, + "loss": 1.1885, + "step": 28674 + }, + { + "epoch": 0.8419460919607727, + "grad_norm": 0.0, + "learning_rate": 1.281855654838724e-06, + "loss": 1.124, + "step": 28675 + }, + { + "epoch": 0.8419754536379118, + "grad_norm": 0.0, + "learning_rate": 1.2813898793629775e-06, + "loss": 1.1689, + "step": 28676 + }, + { + "epoch": 0.8420048153150508, + "grad_norm": 0.0, + "learning_rate": 1.2809241827318531e-06, + "loss": 1.0879, + "step": 28677 + }, + { + "epoch": 0.8420341769921897, + "grad_norm": 0.0, + "learning_rate": 1.280458564949567e-06, + "loss": 1.2358, + "step": 28678 + }, + { + "epoch": 0.8420635386693288, + "grad_norm": 0.0, + "learning_rate": 1.2799930260203264e-06, + "loss": 1.1953, + "step": 28679 + }, + { + "epoch": 0.8420929003464678, + "grad_norm": 0.0, + "learning_rate": 1.279527565948343e-06, + "loss": 1.2988, + "step": 28680 + }, + { + "epoch": 0.8421222620236067, + "grad_norm": 0.0, + "learning_rate": 1.2790621847378227e-06, + "loss": 1.0454, + "step": 28681 + }, + { + "epoch": 0.8421516237007458, + "grad_norm": 0.0, + "learning_rate": 1.2785968823929784e-06, + "loss": 1.2695, + "step": 28682 + }, + { + "epoch": 0.8421809853778848, + "grad_norm": 0.0, + "learning_rate": 1.2781316589180136e-06, + "loss": 1.1509, + "step": 28683 + }, + { + "epoch": 0.8422103470550237, + "grad_norm": 0.0, + "learning_rate": 1.2776665143171407e-06, + "loss": 1.251, + "step": 28684 + }, + { + "epoch": 0.8422397087321628, + "grad_norm": 0.0, + "learning_rate": 1.2772014485945627e-06, + "loss": 1.1924, + "step": 28685 + }, + { + "epoch": 0.8422690704093018, + "grad_norm": 0.0, + "learning_rate": 1.2767364617544842e-06, + "loss": 1.23, + "step": 28686 + }, + { + "epoch": 0.8422984320864407, + "grad_norm": 0.0, + "learning_rate": 1.276271553801115e-06, + "loss": 1.2407, + "step": 28687 + }, + { + "epoch": 0.8423277937635798, + "grad_norm": 0.0, + "learning_rate": 1.2758067247386553e-06, + "loss": 1.2275, + "step": 28688 + }, + { + "epoch": 0.8423571554407188, + "grad_norm": 0.0, + "learning_rate": 1.2753419745713103e-06, + "loss": 1.1943, + "step": 28689 + }, + { + "epoch": 0.8423865171178577, + "grad_norm": 0.0, + "learning_rate": 1.274877303303279e-06, + "loss": 1.0776, + "step": 28690 + }, + { + "epoch": 0.8424158787949968, + "grad_norm": 0.0, + "learning_rate": 1.2744127109387693e-06, + "loss": 1.2266, + "step": 28691 + }, + { + "epoch": 0.8424452404721358, + "grad_norm": 0.0, + "learning_rate": 1.273948197481979e-06, + "loss": 1.3838, + "step": 28692 + }, + { + "epoch": 0.8424746021492747, + "grad_norm": 0.0, + "learning_rate": 1.273483762937111e-06, + "loss": 1.124, + "step": 28693 + }, + { + "epoch": 0.8425039638264138, + "grad_norm": 0.0, + "learning_rate": 1.2730194073083646e-06, + "loss": 1.231, + "step": 28694 + }, + { + "epoch": 0.8425333255035528, + "grad_norm": 0.0, + "learning_rate": 1.2725551305999363e-06, + "loss": 1.2358, + "step": 28695 + }, + { + "epoch": 0.8425626871806917, + "grad_norm": 0.0, + "learning_rate": 1.2720909328160292e-06, + "loss": 1.1831, + "step": 28696 + }, + { + "epoch": 0.8425920488578308, + "grad_norm": 0.0, + "learning_rate": 1.27162681396084e-06, + "loss": 1.2324, + "step": 28697 + }, + { + "epoch": 0.8426214105349698, + "grad_norm": 0.0, + "learning_rate": 1.2711627740385636e-06, + "loss": 1.1548, + "step": 28698 + }, + { + "epoch": 0.8426507722121087, + "grad_norm": 0.0, + "learning_rate": 1.270698813053396e-06, + "loss": 1.207, + "step": 28699 + }, + { + "epoch": 0.8426801338892478, + "grad_norm": 0.0, + "learning_rate": 1.2702349310095375e-06, + "loss": 1.1538, + "step": 28700 + }, + { + "epoch": 0.8427094955663867, + "grad_norm": 0.0, + "learning_rate": 1.269771127911177e-06, + "loss": 1.0229, + "step": 28701 + }, + { + "epoch": 0.8427388572435257, + "grad_norm": 0.0, + "learning_rate": 1.2693074037625153e-06, + "loss": 1.2607, + "step": 28702 + }, + { + "epoch": 0.8427682189206648, + "grad_norm": 0.0, + "learning_rate": 1.2688437585677416e-06, + "loss": 1.1982, + "step": 28703 + }, + { + "epoch": 0.8427975805978037, + "grad_norm": 0.0, + "learning_rate": 1.2683801923310513e-06, + "loss": 1.2109, + "step": 28704 + }, + { + "epoch": 0.8428269422749427, + "grad_norm": 0.0, + "learning_rate": 1.2679167050566332e-06, + "loss": 1.1753, + "step": 28705 + }, + { + "epoch": 0.8428563039520818, + "grad_norm": 0.0, + "learning_rate": 1.2674532967486796e-06, + "loss": 1.2393, + "step": 28706 + }, + { + "epoch": 0.8428856656292207, + "grad_norm": 0.0, + "learning_rate": 1.266989967411385e-06, + "loss": 1.3359, + "step": 28707 + }, + { + "epoch": 0.8429150273063597, + "grad_norm": 0.0, + "learning_rate": 1.2665267170489336e-06, + "loss": 1.1665, + "step": 28708 + }, + { + "epoch": 0.8429443889834988, + "grad_norm": 0.0, + "learning_rate": 1.2660635456655202e-06, + "loss": 1.2842, + "step": 28709 + }, + { + "epoch": 0.8429737506606377, + "grad_norm": 0.0, + "learning_rate": 1.2656004532653287e-06, + "loss": 1.2139, + "step": 28710 + }, + { + "epoch": 0.8430031123377767, + "grad_norm": 0.0, + "learning_rate": 1.2651374398525517e-06, + "loss": 1.2383, + "step": 28711 + }, + { + "epoch": 0.8430324740149158, + "grad_norm": 0.0, + "learning_rate": 1.2646745054313736e-06, + "loss": 1.2217, + "step": 28712 + }, + { + "epoch": 0.8430618356920547, + "grad_norm": 0.0, + "learning_rate": 1.264211650005981e-06, + "loss": 1.168, + "step": 28713 + }, + { + "epoch": 0.8430911973691937, + "grad_norm": 0.0, + "learning_rate": 1.2637488735805602e-06, + "loss": 1.1528, + "step": 28714 + }, + { + "epoch": 0.8431205590463328, + "grad_norm": 0.0, + "learning_rate": 1.2632861761592929e-06, + "loss": 1.1436, + "step": 28715 + }, + { + "epoch": 0.8431499207234717, + "grad_norm": 0.0, + "learning_rate": 1.2628235577463688e-06, + "loss": 1.1934, + "step": 28716 + }, + { + "epoch": 0.8431792824006107, + "grad_norm": 0.0, + "learning_rate": 1.2623610183459666e-06, + "loss": 1.1709, + "step": 28717 + }, + { + "epoch": 0.8432086440777498, + "grad_norm": 0.0, + "learning_rate": 1.2618985579622744e-06, + "loss": 1.0996, + "step": 28718 + }, + { + "epoch": 0.8432380057548887, + "grad_norm": 0.0, + "learning_rate": 1.2614361765994675e-06, + "loss": 1.3525, + "step": 28719 + }, + { + "epoch": 0.8432673674320277, + "grad_norm": 0.0, + "learning_rate": 1.260973874261735e-06, + "loss": 1.2783, + "step": 28720 + }, + { + "epoch": 0.8432967291091668, + "grad_norm": 0.0, + "learning_rate": 1.2605116509532534e-06, + "loss": 1.2227, + "step": 28721 + }, + { + "epoch": 0.8433260907863057, + "grad_norm": 0.0, + "learning_rate": 1.2600495066782026e-06, + "loss": 1.3506, + "step": 28722 + }, + { + "epoch": 0.8433554524634447, + "grad_norm": 0.0, + "learning_rate": 1.2595874414407628e-06, + "loss": 1.166, + "step": 28723 + }, + { + "epoch": 0.8433848141405837, + "grad_norm": 0.0, + "learning_rate": 1.2591254552451104e-06, + "loss": 1.2397, + "step": 28724 + }, + { + "epoch": 0.8434141758177227, + "grad_norm": 0.0, + "learning_rate": 1.2586635480954268e-06, + "loss": 1.1699, + "step": 28725 + }, + { + "epoch": 0.8434435374948617, + "grad_norm": 0.0, + "learning_rate": 1.258201719995885e-06, + "loss": 1.3027, + "step": 28726 + }, + { + "epoch": 0.8434728991720007, + "grad_norm": 0.0, + "learning_rate": 1.2577399709506666e-06, + "loss": 1.0913, + "step": 28727 + }, + { + "epoch": 0.8435022608491397, + "grad_norm": 0.0, + "learning_rate": 1.2572783009639445e-06, + "loss": 1.2461, + "step": 28728 + }, + { + "epoch": 0.8435316225262787, + "grad_norm": 0.0, + "learning_rate": 1.2568167100398942e-06, + "loss": 1.2881, + "step": 28729 + }, + { + "epoch": 0.8435609842034177, + "grad_norm": 0.0, + "learning_rate": 1.256355198182687e-06, + "loss": 1.3096, + "step": 28730 + }, + { + "epoch": 0.8435903458805567, + "grad_norm": 0.0, + "learning_rate": 1.2558937653965008e-06, + "loss": 1.3164, + "step": 28731 + }, + { + "epoch": 0.8436197075576957, + "grad_norm": 0.0, + "learning_rate": 1.2554324116855044e-06, + "loss": 1.125, + "step": 28732 + }, + { + "epoch": 0.8436490692348347, + "grad_norm": 0.0, + "learning_rate": 1.2549711370538754e-06, + "loss": 1.1963, + "step": 28733 + }, + { + "epoch": 0.8436784309119737, + "grad_norm": 0.0, + "learning_rate": 1.2545099415057805e-06, + "loss": 1.1064, + "step": 28734 + }, + { + "epoch": 0.8437077925891127, + "grad_norm": 0.0, + "learning_rate": 1.2540488250453908e-06, + "loss": 1.2324, + "step": 28735 + }, + { + "epoch": 0.8437371542662517, + "grad_norm": 0.0, + "learning_rate": 1.25358778767688e-06, + "loss": 1.2725, + "step": 28736 + }, + { + "epoch": 0.8437665159433907, + "grad_norm": 0.0, + "learning_rate": 1.2531268294044141e-06, + "loss": 1.2812, + "step": 28737 + }, + { + "epoch": 0.8437958776205297, + "grad_norm": 0.0, + "learning_rate": 1.2526659502321624e-06, + "loss": 1.1865, + "step": 28738 + }, + { + "epoch": 0.8438252392976687, + "grad_norm": 0.0, + "learning_rate": 1.2522051501642906e-06, + "loss": 1.2559, + "step": 28739 + }, + { + "epoch": 0.8438546009748077, + "grad_norm": 0.0, + "learning_rate": 1.2517444292049707e-06, + "loss": 1.1074, + "step": 28740 + }, + { + "epoch": 0.8438839626519467, + "grad_norm": 0.0, + "learning_rate": 1.251283787358364e-06, + "loss": 1.2354, + "step": 28741 + }, + { + "epoch": 0.8439133243290857, + "grad_norm": 0.0, + "learning_rate": 1.25082322462864e-06, + "loss": 1.1982, + "step": 28742 + }, + { + "epoch": 0.8439426860062247, + "grad_norm": 0.0, + "learning_rate": 1.2503627410199637e-06, + "loss": 1.2305, + "step": 28743 + }, + { + "epoch": 0.8439720476833636, + "grad_norm": 0.0, + "learning_rate": 1.2499023365364949e-06, + "loss": 1.0503, + "step": 28744 + }, + { + "epoch": 0.8440014093605027, + "grad_norm": 0.0, + "learning_rate": 1.2494420111824023e-06, + "loss": 1.2773, + "step": 28745 + }, + { + "epoch": 0.8440307710376417, + "grad_norm": 0.0, + "learning_rate": 1.2489817649618463e-06, + "loss": 1.251, + "step": 28746 + }, + { + "epoch": 0.8440601327147806, + "grad_norm": 0.0, + "learning_rate": 1.24852159787899e-06, + "loss": 1.0005, + "step": 28747 + }, + { + "epoch": 0.8440894943919197, + "grad_norm": 0.0, + "learning_rate": 1.2480615099379922e-06, + "loss": 1.1528, + "step": 28748 + }, + { + "epoch": 0.8441188560690587, + "grad_norm": 0.0, + "learning_rate": 1.2476015011430177e-06, + "loss": 1.2715, + "step": 28749 + }, + { + "epoch": 0.8441482177461976, + "grad_norm": 0.0, + "learning_rate": 1.2471415714982216e-06, + "loss": 1.2715, + "step": 28750 + }, + { + "epoch": 0.8441775794233367, + "grad_norm": 0.0, + "learning_rate": 1.2466817210077687e-06, + "loss": 1.1523, + "step": 28751 + }, + { + "epoch": 0.8442069411004757, + "grad_norm": 0.0, + "learning_rate": 1.2462219496758143e-06, + "loss": 1.1572, + "step": 28752 + }, + { + "epoch": 0.8442363027776146, + "grad_norm": 0.0, + "learning_rate": 1.2457622575065164e-06, + "loss": 1.1504, + "step": 28753 + }, + { + "epoch": 0.8442656644547537, + "grad_norm": 0.0, + "learning_rate": 1.245302644504033e-06, + "loss": 1.2646, + "step": 28754 + }, + { + "epoch": 0.8442950261318927, + "grad_norm": 0.0, + "learning_rate": 1.2448431106725167e-06, + "loss": 1.2075, + "step": 28755 + }, + { + "epoch": 0.8443243878090316, + "grad_norm": 0.0, + "learning_rate": 1.2443836560161293e-06, + "loss": 1.1553, + "step": 28756 + }, + { + "epoch": 0.8443537494861707, + "grad_norm": 0.0, + "learning_rate": 1.2439242805390217e-06, + "loss": 1.1729, + "step": 28757 + }, + { + "epoch": 0.8443831111633097, + "grad_norm": 0.0, + "learning_rate": 1.2434649842453506e-06, + "loss": 1.1328, + "step": 28758 + }, + { + "epoch": 0.8444124728404486, + "grad_norm": 0.0, + "learning_rate": 1.2430057671392658e-06, + "loss": 1.1587, + "step": 28759 + }, + { + "epoch": 0.8444418345175877, + "grad_norm": 0.0, + "learning_rate": 1.2425466292249243e-06, + "loss": 1.0908, + "step": 28760 + }, + { + "epoch": 0.8444711961947267, + "grad_norm": 0.0, + "learning_rate": 1.2420875705064772e-06, + "loss": 1.145, + "step": 28761 + }, + { + "epoch": 0.8445005578718656, + "grad_norm": 0.0, + "learning_rate": 1.2416285909880754e-06, + "loss": 1.1597, + "step": 28762 + }, + { + "epoch": 0.8445299195490047, + "grad_norm": 0.0, + "learning_rate": 1.2411696906738679e-06, + "loss": 1.229, + "step": 28763 + }, + { + "epoch": 0.8445592812261437, + "grad_norm": 0.0, + "learning_rate": 1.2407108695680059e-06, + "loss": 1.251, + "step": 28764 + }, + { + "epoch": 0.8445886429032826, + "grad_norm": 0.0, + "learning_rate": 1.2402521276746394e-06, + "loss": 1.2305, + "step": 28765 + }, + { + "epoch": 0.8446180045804217, + "grad_norm": 0.0, + "learning_rate": 1.239793464997915e-06, + "loss": 1.2764, + "step": 28766 + }, + { + "epoch": 0.8446473662575607, + "grad_norm": 0.0, + "learning_rate": 1.2393348815419837e-06, + "loss": 1.2471, + "step": 28767 + }, + { + "epoch": 0.8446767279346996, + "grad_norm": 0.0, + "learning_rate": 1.238876377310988e-06, + "loss": 1.1143, + "step": 28768 + }, + { + "epoch": 0.8447060896118386, + "grad_norm": 0.0, + "learning_rate": 1.23841795230908e-06, + "loss": 1.0488, + "step": 28769 + }, + { + "epoch": 0.8447354512889776, + "grad_norm": 0.0, + "learning_rate": 1.237959606540402e-06, + "loss": 1.2012, + "step": 28770 + }, + { + "epoch": 0.8447648129661166, + "grad_norm": 0.0, + "learning_rate": 1.2375013400090997e-06, + "loss": 1.1499, + "step": 28771 + }, + { + "epoch": 0.8447941746432556, + "grad_norm": 0.0, + "learning_rate": 1.237043152719316e-06, + "loss": 1.2368, + "step": 28772 + }, + { + "epoch": 0.8448235363203946, + "grad_norm": 0.0, + "learning_rate": 1.2365850446751938e-06, + "loss": 1.0728, + "step": 28773 + }, + { + "epoch": 0.8448528979975336, + "grad_norm": 0.0, + "learning_rate": 1.236127015880879e-06, + "loss": 1.1284, + "step": 28774 + }, + { + "epoch": 0.8448822596746726, + "grad_norm": 0.0, + "learning_rate": 1.23566906634051e-06, + "loss": 1.3223, + "step": 28775 + }, + { + "epoch": 0.8449116213518116, + "grad_norm": 0.0, + "learning_rate": 1.2352111960582325e-06, + "loss": 1.208, + "step": 28776 + }, + { + "epoch": 0.8449409830289506, + "grad_norm": 0.0, + "learning_rate": 1.234753405038185e-06, + "loss": 1.2803, + "step": 28777 + }, + { + "epoch": 0.8449703447060896, + "grad_norm": 0.0, + "learning_rate": 1.2342956932845073e-06, + "loss": 1.3047, + "step": 28778 + }, + { + "epoch": 0.8449997063832286, + "grad_norm": 0.0, + "learning_rate": 1.2338380608013367e-06, + "loss": 1.2705, + "step": 28779 + }, + { + "epoch": 0.8450290680603676, + "grad_norm": 0.0, + "learning_rate": 1.233380507592815e-06, + "loss": 1.1787, + "step": 28780 + }, + { + "epoch": 0.8450584297375066, + "grad_norm": 0.0, + "learning_rate": 1.2329230336630772e-06, + "loss": 1.2051, + "step": 28781 + }, + { + "epoch": 0.8450877914146456, + "grad_norm": 0.0, + "learning_rate": 1.2324656390162626e-06, + "loss": 1.1138, + "step": 28782 + }, + { + "epoch": 0.8451171530917846, + "grad_norm": 0.0, + "learning_rate": 1.2320083236565073e-06, + "loss": 1.3057, + "step": 28783 + }, + { + "epoch": 0.8451465147689236, + "grad_norm": 0.0, + "learning_rate": 1.2315510875879443e-06, + "loss": 1.1758, + "step": 28784 + }, + { + "epoch": 0.8451758764460626, + "grad_norm": 0.0, + "learning_rate": 1.2310939308147129e-06, + "loss": 1.2363, + "step": 28785 + }, + { + "epoch": 0.8452052381232016, + "grad_norm": 0.0, + "learning_rate": 1.230636853340944e-06, + "loss": 1.1167, + "step": 28786 + }, + { + "epoch": 0.8452345998003405, + "grad_norm": 0.0, + "learning_rate": 1.2301798551707722e-06, + "loss": 1.165, + "step": 28787 + }, + { + "epoch": 0.8452639614774796, + "grad_norm": 0.0, + "learning_rate": 1.2297229363083285e-06, + "loss": 1.1138, + "step": 28788 + }, + { + "epoch": 0.8452933231546186, + "grad_norm": 0.0, + "learning_rate": 1.2292660967577475e-06, + "loss": 1.3135, + "step": 28789 + }, + { + "epoch": 0.8453226848317575, + "grad_norm": 0.0, + "learning_rate": 1.228809336523158e-06, + "loss": 1.3291, + "step": 28790 + }, + { + "epoch": 0.8453520465088966, + "grad_norm": 0.0, + "learning_rate": 1.2283526556086934e-06, + "loss": 1.1895, + "step": 28791 + }, + { + "epoch": 0.8453814081860356, + "grad_norm": 0.0, + "learning_rate": 1.2278960540184825e-06, + "loss": 1.3203, + "step": 28792 + }, + { + "epoch": 0.8454107698631745, + "grad_norm": 0.0, + "learning_rate": 1.2274395317566522e-06, + "loss": 1.1592, + "step": 28793 + }, + { + "epoch": 0.8454401315403136, + "grad_norm": 0.0, + "learning_rate": 1.226983088827336e-06, + "loss": 1.2847, + "step": 28794 + }, + { + "epoch": 0.8454694932174526, + "grad_norm": 0.0, + "learning_rate": 1.2265267252346568e-06, + "loss": 1.252, + "step": 28795 + }, + { + "epoch": 0.8454988548945915, + "grad_norm": 0.0, + "learning_rate": 1.226070440982744e-06, + "loss": 1.3105, + "step": 28796 + }, + { + "epoch": 0.8455282165717306, + "grad_norm": 0.0, + "learning_rate": 1.2256142360757217e-06, + "loss": 1.2461, + "step": 28797 + }, + { + "epoch": 0.8455575782488696, + "grad_norm": 0.0, + "learning_rate": 1.225158110517719e-06, + "loss": 1.2461, + "step": 28798 + }, + { + "epoch": 0.8455869399260085, + "grad_norm": 0.0, + "learning_rate": 1.224702064312856e-06, + "loss": 1.2832, + "step": 28799 + }, + { + "epoch": 0.8456163016031476, + "grad_norm": 0.0, + "learning_rate": 1.2242460974652615e-06, + "loss": 1.0747, + "step": 28800 + }, + { + "epoch": 0.8456456632802866, + "grad_norm": 0.0, + "learning_rate": 1.2237902099790555e-06, + "loss": 1.1519, + "step": 28801 + }, + { + "epoch": 0.8456750249574255, + "grad_norm": 0.0, + "learning_rate": 1.223334401858367e-06, + "loss": 1.0811, + "step": 28802 + }, + { + "epoch": 0.8457043866345646, + "grad_norm": 0.0, + "learning_rate": 1.2228786731073106e-06, + "loss": 1.1899, + "step": 28803 + }, + { + "epoch": 0.8457337483117036, + "grad_norm": 0.0, + "learning_rate": 1.2224230237300083e-06, + "loss": 1.2588, + "step": 28804 + }, + { + "epoch": 0.8457631099888425, + "grad_norm": 0.0, + "learning_rate": 1.2219674537305836e-06, + "loss": 1.2905, + "step": 28805 + }, + { + "epoch": 0.8457924716659816, + "grad_norm": 0.0, + "learning_rate": 1.2215119631131545e-06, + "loss": 1.2607, + "step": 28806 + }, + { + "epoch": 0.8458218333431206, + "grad_norm": 0.0, + "learning_rate": 1.221056551881843e-06, + "loss": 1.0889, + "step": 28807 + }, + { + "epoch": 0.8458511950202595, + "grad_norm": 0.0, + "learning_rate": 1.2206012200407624e-06, + "loss": 1.1797, + "step": 28808 + }, + { + "epoch": 0.8458805566973986, + "grad_norm": 0.0, + "learning_rate": 1.2201459675940364e-06, + "loss": 1.2158, + "step": 28809 + }, + { + "epoch": 0.8459099183745376, + "grad_norm": 0.0, + "learning_rate": 1.219690794545778e-06, + "loss": 1.168, + "step": 28810 + }, + { + "epoch": 0.8459392800516765, + "grad_norm": 0.0, + "learning_rate": 1.2192357009001055e-06, + "loss": 1.3086, + "step": 28811 + }, + { + "epoch": 0.8459686417288156, + "grad_norm": 0.0, + "learning_rate": 1.2187806866611328e-06, + "loss": 1.29, + "step": 28812 + }, + { + "epoch": 0.8459980034059545, + "grad_norm": 0.0, + "learning_rate": 1.2183257518329728e-06, + "loss": 1.209, + "step": 28813 + }, + { + "epoch": 0.8460273650830935, + "grad_norm": 0.0, + "learning_rate": 1.217870896419745e-06, + "loss": 1.0903, + "step": 28814 + }, + { + "epoch": 0.8460567267602326, + "grad_norm": 0.0, + "learning_rate": 1.2174161204255564e-06, + "loss": 1.3125, + "step": 28815 + }, + { + "epoch": 0.8460860884373715, + "grad_norm": 0.0, + "learning_rate": 1.216961423854527e-06, + "loss": 1.1699, + "step": 28816 + }, + { + "epoch": 0.8461154501145105, + "grad_norm": 0.0, + "learning_rate": 1.216506806710761e-06, + "loss": 1.1748, + "step": 28817 + }, + { + "epoch": 0.8461448117916496, + "grad_norm": 0.0, + "learning_rate": 1.2160522689983767e-06, + "loss": 1.3115, + "step": 28818 + }, + { + "epoch": 0.8461741734687885, + "grad_norm": 0.0, + "learning_rate": 1.2155978107214806e-06, + "loss": 1.1226, + "step": 28819 + }, + { + "epoch": 0.8462035351459275, + "grad_norm": 0.0, + "learning_rate": 1.2151434318841837e-06, + "loss": 1.1875, + "step": 28820 + }, + { + "epoch": 0.8462328968230666, + "grad_norm": 0.0, + "learning_rate": 1.214689132490592e-06, + "loss": 1.2754, + "step": 28821 + }, + { + "epoch": 0.8462622585002055, + "grad_norm": 0.0, + "learning_rate": 1.2142349125448194e-06, + "loss": 1.3545, + "step": 28822 + }, + { + "epoch": 0.8462916201773445, + "grad_norm": 0.0, + "learning_rate": 1.2137807720509698e-06, + "loss": 1.2183, + "step": 28823 + }, + { + "epoch": 0.8463209818544836, + "grad_norm": 0.0, + "learning_rate": 1.2133267110131486e-06, + "loss": 1.1382, + "step": 28824 + }, + { + "epoch": 0.8463503435316225, + "grad_norm": 0.0, + "learning_rate": 1.212872729435467e-06, + "loss": 1.0703, + "step": 28825 + }, + { + "epoch": 0.8463797052087615, + "grad_norm": 0.0, + "learning_rate": 1.212418827322025e-06, + "loss": 1.2744, + "step": 28826 + }, + { + "epoch": 0.8464090668859006, + "grad_norm": 0.0, + "learning_rate": 1.2119650046769349e-06, + "loss": 1.1602, + "step": 28827 + }, + { + "epoch": 0.8464384285630395, + "grad_norm": 0.0, + "learning_rate": 1.2115112615042913e-06, + "loss": 1.1719, + "step": 28828 + }, + { + "epoch": 0.8464677902401785, + "grad_norm": 0.0, + "learning_rate": 1.211057597808204e-06, + "loss": 1.3652, + "step": 28829 + }, + { + "epoch": 0.8464971519173176, + "grad_norm": 0.0, + "learning_rate": 1.210604013592772e-06, + "loss": 1.0864, + "step": 28830 + }, + { + "epoch": 0.8465265135944565, + "grad_norm": 0.0, + "learning_rate": 1.2101505088621014e-06, + "loss": 1.3066, + "step": 28831 + }, + { + "epoch": 0.8465558752715955, + "grad_norm": 0.0, + "learning_rate": 1.2096970836202904e-06, + "loss": 1.1616, + "step": 28832 + }, + { + "epoch": 0.8465852369487346, + "grad_norm": 0.0, + "learning_rate": 1.2092437378714382e-06, + "loss": 1.2158, + "step": 28833 + }, + { + "epoch": 0.8466145986258735, + "grad_norm": 0.0, + "learning_rate": 1.2087904716196474e-06, + "loss": 1.0498, + "step": 28834 + }, + { + "epoch": 0.8466439603030125, + "grad_norm": 0.0, + "learning_rate": 1.2083372848690167e-06, + "loss": 1.1289, + "step": 28835 + }, + { + "epoch": 0.8466733219801515, + "grad_norm": 0.0, + "learning_rate": 1.2078841776236428e-06, + "loss": 1.1772, + "step": 28836 + }, + { + "epoch": 0.8467026836572905, + "grad_norm": 0.0, + "learning_rate": 1.2074311498876222e-06, + "loss": 1.2119, + "step": 28837 + }, + { + "epoch": 0.8467320453344295, + "grad_norm": 0.0, + "learning_rate": 1.2069782016650567e-06, + "loss": 1.2773, + "step": 28838 + }, + { + "epoch": 0.8467614070115685, + "grad_norm": 0.0, + "learning_rate": 1.2065253329600356e-06, + "loss": 1.1445, + "step": 28839 + }, + { + "epoch": 0.8467907686887075, + "grad_norm": 0.0, + "learning_rate": 1.2060725437766606e-06, + "loss": 1.2549, + "step": 28840 + }, + { + "epoch": 0.8468201303658465, + "grad_norm": 0.0, + "learning_rate": 1.205619834119024e-06, + "loss": 1.1406, + "step": 28841 + }, + { + "epoch": 0.8468494920429855, + "grad_norm": 0.0, + "learning_rate": 1.2051672039912166e-06, + "loss": 1.1807, + "step": 28842 + }, + { + "epoch": 0.8468788537201245, + "grad_norm": 0.0, + "learning_rate": 1.2047146533973374e-06, + "loss": 1.2227, + "step": 28843 + }, + { + "epoch": 0.8469082153972635, + "grad_norm": 0.0, + "learning_rate": 1.2042621823414757e-06, + "loss": 1.2344, + "step": 28844 + }, + { + "epoch": 0.8469375770744025, + "grad_norm": 0.0, + "learning_rate": 1.2038097908277246e-06, + "loss": 1.1963, + "step": 28845 + }, + { + "epoch": 0.8469669387515415, + "grad_norm": 0.0, + "learning_rate": 1.203357478860171e-06, + "loss": 1.1709, + "step": 28846 + }, + { + "epoch": 0.8469963004286805, + "grad_norm": 0.0, + "learning_rate": 1.2029052464429113e-06, + "loss": 1.2666, + "step": 28847 + }, + { + "epoch": 0.8470256621058195, + "grad_norm": 0.0, + "learning_rate": 1.2024530935800305e-06, + "loss": 1.3193, + "step": 28848 + }, + { + "epoch": 0.8470550237829585, + "grad_norm": 0.0, + "learning_rate": 1.2020010202756215e-06, + "loss": 1.1602, + "step": 28849 + }, + { + "epoch": 0.8470843854600975, + "grad_norm": 0.0, + "learning_rate": 1.2015490265337682e-06, + "loss": 1.3506, + "step": 28850 + }, + { + "epoch": 0.8471137471372365, + "grad_norm": 0.0, + "learning_rate": 1.2010971123585623e-06, + "loss": 1.0767, + "step": 28851 + }, + { + "epoch": 0.8471431088143755, + "grad_norm": 0.0, + "learning_rate": 1.2006452777540912e-06, + "loss": 1.4092, + "step": 28852 + }, + { + "epoch": 0.8471724704915145, + "grad_norm": 0.0, + "learning_rate": 1.2001935227244332e-06, + "loss": 1.2002, + "step": 28853 + }, + { + "epoch": 0.8472018321686535, + "grad_norm": 0.0, + "learning_rate": 1.199741847273682e-06, + "loss": 1.1265, + "step": 28854 + }, + { + "epoch": 0.8472311938457925, + "grad_norm": 0.0, + "learning_rate": 1.1992902514059168e-06, + "loss": 1.2178, + "step": 28855 + }, + { + "epoch": 0.8472605555229314, + "grad_norm": 0.0, + "learning_rate": 1.198838735125225e-06, + "loss": 1.2241, + "step": 28856 + }, + { + "epoch": 0.8472899172000705, + "grad_norm": 0.0, + "learning_rate": 1.198387298435687e-06, + "loss": 1.2451, + "step": 28857 + }, + { + "epoch": 0.8473192788772095, + "grad_norm": 0.0, + "learning_rate": 1.1979359413413893e-06, + "loss": 1.2148, + "step": 28858 + }, + { + "epoch": 0.8473486405543484, + "grad_norm": 0.0, + "learning_rate": 1.1974846638464111e-06, + "loss": 1.3916, + "step": 28859 + }, + { + "epoch": 0.8473780022314875, + "grad_norm": 0.0, + "learning_rate": 1.1970334659548332e-06, + "loss": 1.2617, + "step": 28860 + }, + { + "epoch": 0.8474073639086265, + "grad_norm": 0.0, + "learning_rate": 1.196582347670736e-06, + "loss": 1.0649, + "step": 28861 + }, + { + "epoch": 0.8474367255857654, + "grad_norm": 0.0, + "learning_rate": 1.1961313089981973e-06, + "loss": 1.3125, + "step": 28862 + }, + { + "epoch": 0.8474660872629045, + "grad_norm": 0.0, + "learning_rate": 1.1956803499412995e-06, + "loss": 1.1953, + "step": 28863 + }, + { + "epoch": 0.8474954489400435, + "grad_norm": 0.0, + "learning_rate": 1.1952294705041178e-06, + "loss": 1.1978, + "step": 28864 + }, + { + "epoch": 0.8475248106171824, + "grad_norm": 0.0, + "learning_rate": 1.1947786706907315e-06, + "loss": 1.1714, + "step": 28865 + }, + { + "epoch": 0.8475541722943215, + "grad_norm": 0.0, + "learning_rate": 1.1943279505052163e-06, + "loss": 1.0581, + "step": 28866 + }, + { + "epoch": 0.8475835339714605, + "grad_norm": 0.0, + "learning_rate": 1.1938773099516499e-06, + "loss": 1.3809, + "step": 28867 + }, + { + "epoch": 0.8476128956485994, + "grad_norm": 0.0, + "learning_rate": 1.1934267490341067e-06, + "loss": 1.2393, + "step": 28868 + }, + { + "epoch": 0.8476422573257384, + "grad_norm": 0.0, + "learning_rate": 1.1929762677566603e-06, + "loss": 1.1567, + "step": 28869 + }, + { + "epoch": 0.8476716190028775, + "grad_norm": 0.0, + "learning_rate": 1.1925258661233842e-06, + "loss": 1.2842, + "step": 28870 + }, + { + "epoch": 0.8477009806800164, + "grad_norm": 0.0, + "learning_rate": 1.1920755441383525e-06, + "loss": 1.1611, + "step": 28871 + }, + { + "epoch": 0.8477303423571554, + "grad_norm": 0.0, + "learning_rate": 1.191625301805639e-06, + "loss": 1.2461, + "step": 28872 + }, + { + "epoch": 0.8477597040342945, + "grad_norm": 0.0, + "learning_rate": 1.1911751391293113e-06, + "loss": 1.2969, + "step": 28873 + }, + { + "epoch": 0.8477890657114334, + "grad_norm": 0.0, + "learning_rate": 1.1907250561134443e-06, + "loss": 1.1997, + "step": 28874 + }, + { + "epoch": 0.8478184273885724, + "grad_norm": 0.0, + "learning_rate": 1.1902750527621043e-06, + "loss": 1.2363, + "step": 28875 + }, + { + "epoch": 0.8478477890657115, + "grad_norm": 0.0, + "learning_rate": 1.1898251290793662e-06, + "loss": 1.23, + "step": 28876 + }, + { + "epoch": 0.8478771507428504, + "grad_norm": 0.0, + "learning_rate": 1.1893752850692952e-06, + "loss": 1.2354, + "step": 28877 + }, + { + "epoch": 0.8479065124199894, + "grad_norm": 0.0, + "learning_rate": 1.1889255207359607e-06, + "loss": 1.2422, + "step": 28878 + }, + { + "epoch": 0.8479358740971285, + "grad_norm": 0.0, + "learning_rate": 1.188475836083427e-06, + "loss": 1.0942, + "step": 28879 + }, + { + "epoch": 0.8479652357742674, + "grad_norm": 0.0, + "learning_rate": 1.1880262311157643e-06, + "loss": 1.2085, + "step": 28880 + }, + { + "epoch": 0.8479945974514064, + "grad_norm": 0.0, + "learning_rate": 1.187576705837038e-06, + "loss": 1.1035, + "step": 28881 + }, + { + "epoch": 0.8480239591285454, + "grad_norm": 0.0, + "learning_rate": 1.1871272602513096e-06, + "loss": 1.1885, + "step": 28882 + }, + { + "epoch": 0.8480533208056844, + "grad_norm": 0.0, + "learning_rate": 1.186677894362649e-06, + "loss": 1.2168, + "step": 28883 + }, + { + "epoch": 0.8480826824828234, + "grad_norm": 0.0, + "learning_rate": 1.1862286081751173e-06, + "loss": 1.2056, + "step": 28884 + }, + { + "epoch": 0.8481120441599624, + "grad_norm": 0.0, + "learning_rate": 1.185779401692777e-06, + "loss": 1.2764, + "step": 28885 + }, + { + "epoch": 0.8481414058371014, + "grad_norm": 0.0, + "learning_rate": 1.1853302749196893e-06, + "loss": 1.1138, + "step": 28886 + }, + { + "epoch": 0.8481707675142404, + "grad_norm": 0.0, + "learning_rate": 1.1848812278599198e-06, + "loss": 1.3379, + "step": 28887 + }, + { + "epoch": 0.8482001291913794, + "grad_norm": 0.0, + "learning_rate": 1.1844322605175252e-06, + "loss": 1.2539, + "step": 28888 + }, + { + "epoch": 0.8482294908685184, + "grad_norm": 0.0, + "learning_rate": 1.1839833728965677e-06, + "loss": 1.2168, + "step": 28889 + }, + { + "epoch": 0.8482588525456574, + "grad_norm": 0.0, + "learning_rate": 1.1835345650011076e-06, + "loss": 1.207, + "step": 28890 + }, + { + "epoch": 0.8482882142227964, + "grad_norm": 0.0, + "learning_rate": 1.1830858368352006e-06, + "loss": 1.2114, + "step": 28891 + }, + { + "epoch": 0.8483175758999354, + "grad_norm": 0.0, + "learning_rate": 1.1826371884029074e-06, + "loss": 1.293, + "step": 28892 + }, + { + "epoch": 0.8483469375770744, + "grad_norm": 0.0, + "learning_rate": 1.1821886197082856e-06, + "loss": 1.0454, + "step": 28893 + }, + { + "epoch": 0.8483762992542134, + "grad_norm": 0.0, + "learning_rate": 1.18174013075539e-06, + "loss": 1.1631, + "step": 28894 + }, + { + "epoch": 0.8484056609313524, + "grad_norm": 0.0, + "learning_rate": 1.1812917215482744e-06, + "loss": 1.3311, + "step": 28895 + }, + { + "epoch": 0.8484350226084914, + "grad_norm": 0.0, + "learning_rate": 1.1808433920909979e-06, + "loss": 1.1387, + "step": 28896 + }, + { + "epoch": 0.8484643842856304, + "grad_norm": 0.0, + "learning_rate": 1.1803951423876115e-06, + "loss": 1.3057, + "step": 28897 + }, + { + "epoch": 0.8484937459627694, + "grad_norm": 0.0, + "learning_rate": 1.179946972442173e-06, + "loss": 1.1973, + "step": 28898 + }, + { + "epoch": 0.8485231076399083, + "grad_norm": 0.0, + "learning_rate": 1.1794988822587305e-06, + "loss": 1.1914, + "step": 28899 + }, + { + "epoch": 0.8485524693170474, + "grad_norm": 0.0, + "learning_rate": 1.1790508718413407e-06, + "loss": 1.1196, + "step": 28900 + }, + { + "epoch": 0.8485818309941864, + "grad_norm": 0.0, + "learning_rate": 1.1786029411940546e-06, + "loss": 1.2393, + "step": 28901 + }, + { + "epoch": 0.8486111926713253, + "grad_norm": 0.0, + "learning_rate": 1.1781550903209183e-06, + "loss": 1.2988, + "step": 28902 + }, + { + "epoch": 0.8486405543484644, + "grad_norm": 0.0, + "learning_rate": 1.1777073192259858e-06, + "loss": 1.2969, + "step": 28903 + }, + { + "epoch": 0.8486699160256034, + "grad_norm": 0.0, + "learning_rate": 1.177259627913303e-06, + "loss": 1.1514, + "step": 28904 + }, + { + "epoch": 0.8486992777027423, + "grad_norm": 0.0, + "learning_rate": 1.1768120163869245e-06, + "loss": 1.2754, + "step": 28905 + }, + { + "epoch": 0.8487286393798814, + "grad_norm": 0.0, + "learning_rate": 1.1763644846508915e-06, + "loss": 1.2148, + "step": 28906 + }, + { + "epoch": 0.8487580010570204, + "grad_norm": 0.0, + "learning_rate": 1.1759170327092563e-06, + "loss": 1.1899, + "step": 28907 + }, + { + "epoch": 0.8487873627341593, + "grad_norm": 0.0, + "learning_rate": 1.175469660566063e-06, + "loss": 1.2031, + "step": 28908 + }, + { + "epoch": 0.8488167244112984, + "grad_norm": 0.0, + "learning_rate": 1.1750223682253593e-06, + "loss": 1.1782, + "step": 28909 + }, + { + "epoch": 0.8488460860884374, + "grad_norm": 0.0, + "learning_rate": 1.1745751556911866e-06, + "loss": 1.2256, + "step": 28910 + }, + { + "epoch": 0.8488754477655763, + "grad_norm": 0.0, + "learning_rate": 1.17412802296759e-06, + "loss": 1.1499, + "step": 28911 + }, + { + "epoch": 0.8489048094427154, + "grad_norm": 0.0, + "learning_rate": 1.173680970058615e-06, + "loss": 1.2168, + "step": 28912 + }, + { + "epoch": 0.8489341711198544, + "grad_norm": 0.0, + "learning_rate": 1.1732339969683015e-06, + "loss": 1.0586, + "step": 28913 + }, + { + "epoch": 0.8489635327969933, + "grad_norm": 0.0, + "learning_rate": 1.1727871037006965e-06, + "loss": 1.1621, + "step": 28914 + }, + { + "epoch": 0.8489928944741324, + "grad_norm": 0.0, + "learning_rate": 1.1723402902598358e-06, + "loss": 1.2354, + "step": 28915 + }, + { + "epoch": 0.8490222561512714, + "grad_norm": 0.0, + "learning_rate": 1.1718935566497648e-06, + "loss": 1.0005, + "step": 28916 + }, + { + "epoch": 0.8490516178284103, + "grad_norm": 0.0, + "learning_rate": 1.1714469028745213e-06, + "loss": 1.2803, + "step": 28917 + }, + { + "epoch": 0.8490809795055494, + "grad_norm": 0.0, + "learning_rate": 1.1710003289381445e-06, + "loss": 1.2031, + "step": 28918 + }, + { + "epoch": 0.8491103411826884, + "grad_norm": 0.0, + "learning_rate": 1.170553834844671e-06, + "loss": 1.0439, + "step": 28919 + }, + { + "epoch": 0.8491397028598273, + "grad_norm": 0.0, + "learning_rate": 1.1701074205981421e-06, + "loss": 1.2197, + "step": 28920 + }, + { + "epoch": 0.8491690645369664, + "grad_norm": 0.0, + "learning_rate": 1.1696610862025936e-06, + "loss": 1.2651, + "step": 28921 + }, + { + "epoch": 0.8491984262141054, + "grad_norm": 0.0, + "learning_rate": 1.1692148316620588e-06, + "loss": 1.2988, + "step": 28922 + }, + { + "epoch": 0.8492277878912443, + "grad_norm": 0.0, + "learning_rate": 1.1687686569805778e-06, + "loss": 1.0347, + "step": 28923 + }, + { + "epoch": 0.8492571495683834, + "grad_norm": 0.0, + "learning_rate": 1.1683225621621829e-06, + "loss": 1.21, + "step": 28924 + }, + { + "epoch": 0.8492865112455223, + "grad_norm": 0.0, + "learning_rate": 1.16787654721091e-06, + "loss": 1.3174, + "step": 28925 + }, + { + "epoch": 0.8493158729226613, + "grad_norm": 0.0, + "learning_rate": 1.167430612130791e-06, + "loss": 1.1904, + "step": 28926 + }, + { + "epoch": 0.8493452345998004, + "grad_norm": 0.0, + "learning_rate": 1.1669847569258597e-06, + "loss": 1.2666, + "step": 28927 + }, + { + "epoch": 0.8493745962769393, + "grad_norm": 0.0, + "learning_rate": 1.1665389816001449e-06, + "loss": 1.335, + "step": 28928 + }, + { + "epoch": 0.8494039579540783, + "grad_norm": 0.0, + "learning_rate": 1.1660932861576823e-06, + "loss": 1.124, + "step": 28929 + }, + { + "epoch": 0.8494333196312174, + "grad_norm": 0.0, + "learning_rate": 1.1656476706025011e-06, + "loss": 1.3613, + "step": 28930 + }, + { + "epoch": 0.8494626813083563, + "grad_norm": 0.0, + "learning_rate": 1.1652021349386277e-06, + "loss": 1.2881, + "step": 28931 + }, + { + "epoch": 0.8494920429854953, + "grad_norm": 0.0, + "learning_rate": 1.164756679170097e-06, + "loss": 1.3311, + "step": 28932 + }, + { + "epoch": 0.8495214046626344, + "grad_norm": 0.0, + "learning_rate": 1.1643113033009335e-06, + "loss": 1.1855, + "step": 28933 + }, + { + "epoch": 0.8495507663397733, + "grad_norm": 0.0, + "learning_rate": 1.1638660073351659e-06, + "loss": 1.1333, + "step": 28934 + }, + { + "epoch": 0.8495801280169123, + "grad_norm": 0.0, + "learning_rate": 1.1634207912768191e-06, + "loss": 1.25, + "step": 28935 + }, + { + "epoch": 0.8496094896940514, + "grad_norm": 0.0, + "learning_rate": 1.1629756551299243e-06, + "loss": 1.186, + "step": 28936 + }, + { + "epoch": 0.8496388513711903, + "grad_norm": 0.0, + "learning_rate": 1.1625305988985003e-06, + "loss": 1.2139, + "step": 28937 + }, + { + "epoch": 0.8496682130483293, + "grad_norm": 0.0, + "learning_rate": 1.1620856225865784e-06, + "loss": 1.1421, + "step": 28938 + }, + { + "epoch": 0.8496975747254684, + "grad_norm": 0.0, + "learning_rate": 1.1616407261981767e-06, + "loss": 1.2236, + "step": 28939 + }, + { + "epoch": 0.8497269364026073, + "grad_norm": 0.0, + "learning_rate": 1.1611959097373238e-06, + "loss": 1.1982, + "step": 28940 + }, + { + "epoch": 0.8497562980797463, + "grad_norm": 0.0, + "learning_rate": 1.1607511732080402e-06, + "loss": 1.2363, + "step": 28941 + }, + { + "epoch": 0.8497856597568854, + "grad_norm": 0.0, + "learning_rate": 1.1603065166143467e-06, + "loss": 1.2021, + "step": 28942 + }, + { + "epoch": 0.8498150214340243, + "grad_norm": 0.0, + "learning_rate": 1.1598619399602662e-06, + "loss": 1.1001, + "step": 28943 + }, + { + "epoch": 0.8498443831111633, + "grad_norm": 0.0, + "learning_rate": 1.1594174432498162e-06, + "loss": 1.1904, + "step": 28944 + }, + { + "epoch": 0.8498737447883024, + "grad_norm": 0.0, + "learning_rate": 1.1589730264870203e-06, + "loss": 1.208, + "step": 28945 + }, + { + "epoch": 0.8499031064654413, + "grad_norm": 0.0, + "learning_rate": 1.158528689675893e-06, + "loss": 1.2056, + "step": 28946 + }, + { + "epoch": 0.8499324681425803, + "grad_norm": 0.0, + "learning_rate": 1.1580844328204578e-06, + "loss": 1.249, + "step": 28947 + }, + { + "epoch": 0.8499618298197193, + "grad_norm": 0.0, + "learning_rate": 1.1576402559247268e-06, + "loss": 1.1484, + "step": 28948 + }, + { + "epoch": 0.8499911914968583, + "grad_norm": 0.0, + "learning_rate": 1.1571961589927217e-06, + "loss": 1.2344, + "step": 28949 + }, + { + "epoch": 0.8500205531739973, + "grad_norm": 0.0, + "learning_rate": 1.1567521420284566e-06, + "loss": 1.2363, + "step": 28950 + }, + { + "epoch": 0.8500499148511363, + "grad_norm": 0.0, + "learning_rate": 1.1563082050359464e-06, + "loss": 1.209, + "step": 28951 + }, + { + "epoch": 0.8500792765282753, + "grad_norm": 0.0, + "learning_rate": 1.1558643480192068e-06, + "loss": 1.1724, + "step": 28952 + }, + { + "epoch": 0.8501086382054143, + "grad_norm": 0.0, + "learning_rate": 1.1554205709822474e-06, + "loss": 1.1924, + "step": 28953 + }, + { + "epoch": 0.8501379998825533, + "grad_norm": 0.0, + "learning_rate": 1.154976873929089e-06, + "loss": 1.1826, + "step": 28954 + }, + { + "epoch": 0.8501673615596923, + "grad_norm": 0.0, + "learning_rate": 1.1545332568637357e-06, + "loss": 1.1846, + "step": 28955 + }, + { + "epoch": 0.8501967232368313, + "grad_norm": 0.0, + "learning_rate": 1.1540897197902067e-06, + "loss": 1.0796, + "step": 28956 + }, + { + "epoch": 0.8502260849139703, + "grad_norm": 0.0, + "learning_rate": 1.15364626271251e-06, + "loss": 1.1948, + "step": 28957 + }, + { + "epoch": 0.8502554465911093, + "grad_norm": 0.0, + "learning_rate": 1.1532028856346555e-06, + "loss": 1.1738, + "step": 28958 + }, + { + "epoch": 0.8502848082682483, + "grad_norm": 0.0, + "learning_rate": 1.1527595885606536e-06, + "loss": 1.0059, + "step": 28959 + }, + { + "epoch": 0.8503141699453873, + "grad_norm": 0.0, + "learning_rate": 1.1523163714945096e-06, + "loss": 1.2339, + "step": 28960 + }, + { + "epoch": 0.8503435316225263, + "grad_norm": 0.0, + "learning_rate": 1.1518732344402372e-06, + "loss": 1.2363, + "step": 28961 + }, + { + "epoch": 0.8503728932996653, + "grad_norm": 0.0, + "learning_rate": 1.1514301774018388e-06, + "loss": 1.0869, + "step": 28962 + }, + { + "epoch": 0.8504022549768043, + "grad_norm": 0.0, + "learning_rate": 1.1509872003833256e-06, + "loss": 1.188, + "step": 28963 + }, + { + "epoch": 0.8504316166539433, + "grad_norm": 0.0, + "learning_rate": 1.1505443033886987e-06, + "loss": 1.2031, + "step": 28964 + }, + { + "epoch": 0.8504609783310823, + "grad_norm": 0.0, + "learning_rate": 1.1501014864219695e-06, + "loss": 1.1094, + "step": 28965 + }, + { + "epoch": 0.8504903400082213, + "grad_norm": 0.0, + "learning_rate": 1.1496587494871381e-06, + "loss": 1.2578, + "step": 28966 + }, + { + "epoch": 0.8505197016853603, + "grad_norm": 0.0, + "learning_rate": 1.149216092588209e-06, + "loss": 1.2158, + "step": 28967 + }, + { + "epoch": 0.8505490633624992, + "grad_norm": 0.0, + "learning_rate": 1.1487735157291847e-06, + "loss": 1.2466, + "step": 28968 + }, + { + "epoch": 0.8505784250396383, + "grad_norm": 0.0, + "learning_rate": 1.1483310189140694e-06, + "loss": 1.2334, + "step": 28969 + }, + { + "epoch": 0.8506077867167773, + "grad_norm": 0.0, + "learning_rate": 1.1478886021468639e-06, + "loss": 1.3223, + "step": 28970 + }, + { + "epoch": 0.8506371483939162, + "grad_norm": 0.0, + "learning_rate": 1.1474462654315665e-06, + "loss": 1.1748, + "step": 28971 + }, + { + "epoch": 0.8506665100710552, + "grad_norm": 0.0, + "learning_rate": 1.1470040087721823e-06, + "loss": 1.3125, + "step": 28972 + }, + { + "epoch": 0.8506958717481943, + "grad_norm": 0.0, + "learning_rate": 1.1465618321727056e-06, + "loss": 1.1885, + "step": 28973 + }, + { + "epoch": 0.8507252334253332, + "grad_norm": 0.0, + "learning_rate": 1.1461197356371389e-06, + "loss": 1.2139, + "step": 28974 + }, + { + "epoch": 0.8507545951024722, + "grad_norm": 0.0, + "learning_rate": 1.1456777191694801e-06, + "loss": 1.1992, + "step": 28975 + }, + { + "epoch": 0.8507839567796113, + "grad_norm": 0.0, + "learning_rate": 1.1452357827737248e-06, + "loss": 1.1494, + "step": 28976 + }, + { + "epoch": 0.8508133184567502, + "grad_norm": 0.0, + "learning_rate": 1.1447939264538689e-06, + "loss": 1.1875, + "step": 28977 + }, + { + "epoch": 0.8508426801338892, + "grad_norm": 0.0, + "learning_rate": 1.144352150213911e-06, + "loss": 1.1895, + "step": 28978 + }, + { + "epoch": 0.8508720418110283, + "grad_norm": 0.0, + "learning_rate": 1.1439104540578439e-06, + "loss": 1.3271, + "step": 28979 + }, + { + "epoch": 0.8509014034881672, + "grad_norm": 0.0, + "learning_rate": 1.1434688379896609e-06, + "loss": 1.127, + "step": 28980 + }, + { + "epoch": 0.8509307651653062, + "grad_norm": 0.0, + "learning_rate": 1.1430273020133597e-06, + "loss": 1.1699, + "step": 28981 + }, + { + "epoch": 0.8509601268424453, + "grad_norm": 0.0, + "learning_rate": 1.1425858461329308e-06, + "loss": 1.0894, + "step": 28982 + }, + { + "epoch": 0.8509894885195842, + "grad_norm": 0.0, + "learning_rate": 1.1421444703523666e-06, + "loss": 1.2061, + "step": 28983 + }, + { + "epoch": 0.8510188501967232, + "grad_norm": 0.0, + "learning_rate": 1.1417031746756569e-06, + "loss": 1.2949, + "step": 28984 + }, + { + "epoch": 0.8510482118738623, + "grad_norm": 0.0, + "learning_rate": 1.1412619591067953e-06, + "loss": 1.1396, + "step": 28985 + }, + { + "epoch": 0.8510775735510012, + "grad_norm": 0.0, + "learning_rate": 1.1408208236497687e-06, + "loss": 1.2373, + "step": 28986 + }, + { + "epoch": 0.8511069352281402, + "grad_norm": 0.0, + "learning_rate": 1.1403797683085705e-06, + "loss": 1.2666, + "step": 28987 + }, + { + "epoch": 0.8511362969052793, + "grad_norm": 0.0, + "learning_rate": 1.1399387930871852e-06, + "loss": 1.1182, + "step": 28988 + }, + { + "epoch": 0.8511656585824182, + "grad_norm": 0.0, + "learning_rate": 1.1394978979896042e-06, + "loss": 1.2271, + "step": 28989 + }, + { + "epoch": 0.8511950202595572, + "grad_norm": 0.0, + "learning_rate": 1.1390570830198122e-06, + "loss": 1.0825, + "step": 28990 + }, + { + "epoch": 0.8512243819366963, + "grad_norm": 0.0, + "learning_rate": 1.138616348181797e-06, + "loss": 1.1934, + "step": 28991 + }, + { + "epoch": 0.8512537436138352, + "grad_norm": 0.0, + "learning_rate": 1.138175693479544e-06, + "loss": 1.2568, + "step": 28992 + }, + { + "epoch": 0.8512831052909742, + "grad_norm": 0.0, + "learning_rate": 1.137735118917035e-06, + "loss": 1.3701, + "step": 28993 + }, + { + "epoch": 0.8513124669681132, + "grad_norm": 0.0, + "learning_rate": 1.1372946244982596e-06, + "loss": 1.0957, + "step": 28994 + }, + { + "epoch": 0.8513418286452522, + "grad_norm": 0.0, + "learning_rate": 1.1368542102271963e-06, + "loss": 1.1367, + "step": 28995 + }, + { + "epoch": 0.8513711903223912, + "grad_norm": 0.0, + "learning_rate": 1.1364138761078326e-06, + "loss": 1.1602, + "step": 28996 + }, + { + "epoch": 0.8514005519995302, + "grad_norm": 0.0, + "learning_rate": 1.1359736221441464e-06, + "loss": 1.3018, + "step": 28997 + }, + { + "epoch": 0.8514299136766692, + "grad_norm": 0.0, + "learning_rate": 1.1355334483401227e-06, + "loss": 1.1953, + "step": 28998 + }, + { + "epoch": 0.8514592753538082, + "grad_norm": 0.0, + "learning_rate": 1.135093354699739e-06, + "loss": 1.3037, + "step": 28999 + }, + { + "epoch": 0.8514886370309472, + "grad_norm": 0.0, + "learning_rate": 1.1346533412269779e-06, + "loss": 1.2095, + "step": 29000 + }, + { + "epoch": 0.8515179987080862, + "grad_norm": 0.0, + "learning_rate": 1.134213407925816e-06, + "loss": 1.2207, + "step": 29001 + }, + { + "epoch": 0.8515473603852252, + "grad_norm": 0.0, + "learning_rate": 1.1337735548002315e-06, + "loss": 1.2158, + "step": 29002 + }, + { + "epoch": 0.8515767220623642, + "grad_norm": 0.0, + "learning_rate": 1.1333337818542057e-06, + "loss": 1.2324, + "step": 29003 + }, + { + "epoch": 0.8516060837395032, + "grad_norm": 0.0, + "learning_rate": 1.1328940890917106e-06, + "loss": 1.2568, + "step": 29004 + }, + { + "epoch": 0.8516354454166422, + "grad_norm": 0.0, + "learning_rate": 1.1324544765167278e-06, + "loss": 1.252, + "step": 29005 + }, + { + "epoch": 0.8516648070937812, + "grad_norm": 0.0, + "learning_rate": 1.1320149441332285e-06, + "loss": 1.1641, + "step": 29006 + }, + { + "epoch": 0.8516941687709202, + "grad_norm": 0.0, + "learning_rate": 1.1315754919451927e-06, + "loss": 1.1069, + "step": 29007 + }, + { + "epoch": 0.8517235304480592, + "grad_norm": 0.0, + "learning_rate": 1.1311361199565862e-06, + "loss": 1.123, + "step": 29008 + }, + { + "epoch": 0.8517528921251982, + "grad_norm": 0.0, + "learning_rate": 1.1306968281713903e-06, + "loss": 1.2026, + "step": 29009 + }, + { + "epoch": 0.8517822538023372, + "grad_norm": 0.0, + "learning_rate": 1.130257616593573e-06, + "loss": 1.2036, + "step": 29010 + }, + { + "epoch": 0.8518116154794761, + "grad_norm": 0.0, + "learning_rate": 1.1298184852271065e-06, + "loss": 1.0435, + "step": 29011 + }, + { + "epoch": 0.8518409771566152, + "grad_norm": 0.0, + "learning_rate": 1.1293794340759656e-06, + "loss": 1.2646, + "step": 29012 + }, + { + "epoch": 0.8518703388337542, + "grad_norm": 0.0, + "learning_rate": 1.128940463144116e-06, + "loss": 1.3022, + "step": 29013 + }, + { + "epoch": 0.8518997005108931, + "grad_norm": 0.0, + "learning_rate": 1.128501572435532e-06, + "loss": 1.3066, + "step": 29014 + }, + { + "epoch": 0.8519290621880322, + "grad_norm": 0.0, + "learning_rate": 1.1280627619541796e-06, + "loss": 0.9834, + "step": 29015 + }, + { + "epoch": 0.8519584238651712, + "grad_norm": 0.0, + "learning_rate": 1.1276240317040276e-06, + "loss": 1.0303, + "step": 29016 + }, + { + "epoch": 0.8519877855423101, + "grad_norm": 0.0, + "learning_rate": 1.127185381689042e-06, + "loss": 1.2295, + "step": 29017 + }, + { + "epoch": 0.8520171472194492, + "grad_norm": 0.0, + "learning_rate": 1.126746811913194e-06, + "loss": 1.1665, + "step": 29018 + }, + { + "epoch": 0.8520465088965882, + "grad_norm": 0.0, + "learning_rate": 1.1263083223804462e-06, + "loss": 1.2305, + "step": 29019 + }, + { + "epoch": 0.8520758705737271, + "grad_norm": 0.0, + "learning_rate": 1.1258699130947636e-06, + "loss": 1.2202, + "step": 29020 + }, + { + "epoch": 0.8521052322508662, + "grad_norm": 0.0, + "learning_rate": 1.1254315840601138e-06, + "loss": 1.0977, + "step": 29021 + }, + { + "epoch": 0.8521345939280052, + "grad_norm": 0.0, + "learning_rate": 1.1249933352804565e-06, + "loss": 1.1543, + "step": 29022 + }, + { + "epoch": 0.8521639556051441, + "grad_norm": 0.0, + "learning_rate": 1.1245551667597597e-06, + "loss": 1.2852, + "step": 29023 + }, + { + "epoch": 0.8521933172822832, + "grad_norm": 0.0, + "learning_rate": 1.124117078501984e-06, + "loss": 1.2129, + "step": 29024 + }, + { + "epoch": 0.8522226789594222, + "grad_norm": 0.0, + "learning_rate": 1.12367907051109e-06, + "loss": 1.2109, + "step": 29025 + }, + { + "epoch": 0.8522520406365611, + "grad_norm": 0.0, + "learning_rate": 1.1232411427910384e-06, + "loss": 1.2729, + "step": 29026 + }, + { + "epoch": 0.8522814023137002, + "grad_norm": 0.0, + "learning_rate": 1.1228032953457913e-06, + "loss": 1.2705, + "step": 29027 + }, + { + "epoch": 0.8523107639908392, + "grad_norm": 0.0, + "learning_rate": 1.1223655281793077e-06, + "loss": 1.0596, + "step": 29028 + }, + { + "epoch": 0.8523401256679781, + "grad_norm": 0.0, + "learning_rate": 1.1219278412955447e-06, + "loss": 1.2266, + "step": 29029 + }, + { + "epoch": 0.8523694873451172, + "grad_norm": 0.0, + "learning_rate": 1.1214902346984646e-06, + "loss": 1.332, + "step": 29030 + }, + { + "epoch": 0.8523988490222562, + "grad_norm": 0.0, + "learning_rate": 1.1210527083920186e-06, + "loss": 1.3047, + "step": 29031 + }, + { + "epoch": 0.8524282106993951, + "grad_norm": 0.0, + "learning_rate": 1.1206152623801725e-06, + "loss": 1.1602, + "step": 29032 + }, + { + "epoch": 0.8524575723765342, + "grad_norm": 0.0, + "learning_rate": 1.120177896666872e-06, + "loss": 1.1797, + "step": 29033 + }, + { + "epoch": 0.8524869340536732, + "grad_norm": 0.0, + "learning_rate": 1.1197406112560783e-06, + "loss": 1.1968, + "step": 29034 + }, + { + "epoch": 0.8525162957308121, + "grad_norm": 0.0, + "learning_rate": 1.1193034061517427e-06, + "loss": 1.2671, + "step": 29035 + }, + { + "epoch": 0.8525456574079512, + "grad_norm": 0.0, + "learning_rate": 1.1188662813578233e-06, + "loss": 1.209, + "step": 29036 + }, + { + "epoch": 0.8525750190850901, + "grad_norm": 0.0, + "learning_rate": 1.1184292368782678e-06, + "loss": 1.1826, + "step": 29037 + }, + { + "epoch": 0.8526043807622291, + "grad_norm": 0.0, + "learning_rate": 1.1179922727170344e-06, + "loss": 1.1792, + "step": 29038 + }, + { + "epoch": 0.8526337424393682, + "grad_norm": 0.0, + "learning_rate": 1.117555388878071e-06, + "loss": 1.2793, + "step": 29039 + }, + { + "epoch": 0.8526631041165071, + "grad_norm": 0.0, + "learning_rate": 1.1171185853653287e-06, + "loss": 1.2388, + "step": 29040 + }, + { + "epoch": 0.8526924657936461, + "grad_norm": 0.0, + "learning_rate": 1.1166818621827591e-06, + "loss": 1.1782, + "step": 29041 + }, + { + "epoch": 0.8527218274707852, + "grad_norm": 0.0, + "learning_rate": 1.1162452193343076e-06, + "loss": 1.2725, + "step": 29042 + }, + { + "epoch": 0.8527511891479241, + "grad_norm": 0.0, + "learning_rate": 1.1158086568239278e-06, + "loss": 1.1167, + "step": 29043 + }, + { + "epoch": 0.8527805508250631, + "grad_norm": 0.0, + "learning_rate": 1.1153721746555635e-06, + "loss": 1.186, + "step": 29044 + }, + { + "epoch": 0.8528099125022022, + "grad_norm": 0.0, + "learning_rate": 1.1149357728331656e-06, + "loss": 1.1216, + "step": 29045 + }, + { + "epoch": 0.8528392741793411, + "grad_norm": 0.0, + "learning_rate": 1.1144994513606777e-06, + "loss": 1.1553, + "step": 29046 + }, + { + "epoch": 0.8528686358564801, + "grad_norm": 0.0, + "learning_rate": 1.114063210242049e-06, + "loss": 1.1294, + "step": 29047 + }, + { + "epoch": 0.8528979975336192, + "grad_norm": 0.0, + "learning_rate": 1.1136270494812219e-06, + "loss": 1.0718, + "step": 29048 + }, + { + "epoch": 0.8529273592107581, + "grad_norm": 0.0, + "learning_rate": 1.1131909690821418e-06, + "loss": 1.1357, + "step": 29049 + }, + { + "epoch": 0.8529567208878971, + "grad_norm": 0.0, + "learning_rate": 1.1127549690487516e-06, + "loss": 1.1567, + "step": 29050 + }, + { + "epoch": 0.8529860825650362, + "grad_norm": 0.0, + "learning_rate": 1.1123190493849922e-06, + "loss": 1.3105, + "step": 29051 + }, + { + "epoch": 0.8530154442421751, + "grad_norm": 0.0, + "learning_rate": 1.1118832100948096e-06, + "loss": 1.3584, + "step": 29052 + }, + { + "epoch": 0.8530448059193141, + "grad_norm": 0.0, + "learning_rate": 1.1114474511821417e-06, + "loss": 1.0044, + "step": 29053 + }, + { + "epoch": 0.8530741675964532, + "grad_norm": 0.0, + "learning_rate": 1.1110117726509317e-06, + "loss": 1.2119, + "step": 29054 + }, + { + "epoch": 0.8531035292735921, + "grad_norm": 0.0, + "learning_rate": 1.110576174505118e-06, + "loss": 1.21, + "step": 29055 + }, + { + "epoch": 0.8531328909507311, + "grad_norm": 0.0, + "learning_rate": 1.1101406567486427e-06, + "loss": 1.3369, + "step": 29056 + }, + { + "epoch": 0.8531622526278702, + "grad_norm": 0.0, + "learning_rate": 1.1097052193854418e-06, + "loss": 1.147, + "step": 29057 + }, + { + "epoch": 0.8531916143050091, + "grad_norm": 0.0, + "learning_rate": 1.1092698624194531e-06, + "loss": 1.1235, + "step": 29058 + }, + { + "epoch": 0.8532209759821481, + "grad_norm": 0.0, + "learning_rate": 1.1088345858546146e-06, + "loss": 1.2432, + "step": 29059 + }, + { + "epoch": 0.8532503376592872, + "grad_norm": 0.0, + "learning_rate": 1.1083993896948597e-06, + "loss": 1.1758, + "step": 29060 + }, + { + "epoch": 0.8532796993364261, + "grad_norm": 0.0, + "learning_rate": 1.1079642739441287e-06, + "loss": 1.0117, + "step": 29061 + }, + { + "epoch": 0.8533090610135651, + "grad_norm": 0.0, + "learning_rate": 1.1075292386063508e-06, + "loss": 1.4092, + "step": 29062 + }, + { + "epoch": 0.8533384226907041, + "grad_norm": 0.0, + "learning_rate": 1.107094283685467e-06, + "loss": 1.103, + "step": 29063 + }, + { + "epoch": 0.8533677843678431, + "grad_norm": 0.0, + "learning_rate": 1.1066594091854055e-06, + "loss": 1.2607, + "step": 29064 + }, + { + "epoch": 0.8533971460449821, + "grad_norm": 0.0, + "learning_rate": 1.106224615110102e-06, + "loss": 1.2075, + "step": 29065 + }, + { + "epoch": 0.8534265077221211, + "grad_norm": 0.0, + "learning_rate": 1.1057899014634843e-06, + "loss": 1.1821, + "step": 29066 + }, + { + "epoch": 0.8534558693992601, + "grad_norm": 0.0, + "learning_rate": 1.1053552682494885e-06, + "loss": 1.1719, + "step": 29067 + }, + { + "epoch": 0.8534852310763991, + "grad_norm": 0.0, + "learning_rate": 1.104920715472043e-06, + "loss": 1.2275, + "step": 29068 + }, + { + "epoch": 0.8535145927535381, + "grad_norm": 0.0, + "learning_rate": 1.1044862431350756e-06, + "loss": 1.3252, + "step": 29069 + }, + { + "epoch": 0.8535439544306771, + "grad_norm": 0.0, + "learning_rate": 1.1040518512425192e-06, + "loss": 1.2109, + "step": 29070 + }, + { + "epoch": 0.8535733161078161, + "grad_norm": 0.0, + "learning_rate": 1.1036175397982984e-06, + "loss": 1.1987, + "step": 29071 + }, + { + "epoch": 0.853602677784955, + "grad_norm": 0.0, + "learning_rate": 1.1031833088063437e-06, + "loss": 1.0977, + "step": 29072 + }, + { + "epoch": 0.8536320394620941, + "grad_norm": 0.0, + "learning_rate": 1.1027491582705818e-06, + "loss": 1.249, + "step": 29073 + }, + { + "epoch": 0.8536614011392331, + "grad_norm": 0.0, + "learning_rate": 1.1023150881949373e-06, + "loss": 1.2295, + "step": 29074 + }, + { + "epoch": 0.853690762816372, + "grad_norm": 0.0, + "learning_rate": 1.101881098583335e-06, + "loss": 1.2422, + "step": 29075 + }, + { + "epoch": 0.8537201244935111, + "grad_norm": 0.0, + "learning_rate": 1.1014471894397017e-06, + "loss": 1.2261, + "step": 29076 + }, + { + "epoch": 0.85374948617065, + "grad_norm": 0.0, + "learning_rate": 1.1010133607679596e-06, + "loss": 1.2021, + "step": 29077 + }, + { + "epoch": 0.853778847847789, + "grad_norm": 0.0, + "learning_rate": 1.1005796125720336e-06, + "loss": 1.3281, + "step": 29078 + }, + { + "epoch": 0.8538082095249281, + "grad_norm": 0.0, + "learning_rate": 1.100145944855846e-06, + "loss": 1.1699, + "step": 29079 + }, + { + "epoch": 0.853837571202067, + "grad_norm": 0.0, + "learning_rate": 1.099712357623316e-06, + "loss": 1.3008, + "step": 29080 + }, + { + "epoch": 0.853866932879206, + "grad_norm": 0.0, + "learning_rate": 1.099278850878368e-06, + "loss": 1.3262, + "step": 29081 + }, + { + "epoch": 0.8538962945563451, + "grad_norm": 0.0, + "learning_rate": 1.0988454246249214e-06, + "loss": 1.2524, + "step": 29082 + }, + { + "epoch": 0.853925656233484, + "grad_norm": 0.0, + "learning_rate": 1.0984120788668951e-06, + "loss": 1.2852, + "step": 29083 + }, + { + "epoch": 0.853955017910623, + "grad_norm": 0.0, + "learning_rate": 1.0979788136082059e-06, + "loss": 1.1978, + "step": 29084 + }, + { + "epoch": 0.8539843795877621, + "grad_norm": 0.0, + "learning_rate": 1.0975456288527765e-06, + "loss": 1.1865, + "step": 29085 + }, + { + "epoch": 0.854013741264901, + "grad_norm": 0.0, + "learning_rate": 1.09711252460452e-06, + "loss": 1.1997, + "step": 29086 + }, + { + "epoch": 0.85404310294204, + "grad_norm": 0.0, + "learning_rate": 1.0966795008673558e-06, + "loss": 1.2275, + "step": 29087 + }, + { + "epoch": 0.8540724646191791, + "grad_norm": 0.0, + "learning_rate": 1.0962465576452009e-06, + "loss": 1.3457, + "step": 29088 + }, + { + "epoch": 0.854101826296318, + "grad_norm": 0.0, + "learning_rate": 1.0958136949419674e-06, + "loss": 1.2822, + "step": 29089 + }, + { + "epoch": 0.854131187973457, + "grad_norm": 0.0, + "learning_rate": 1.095380912761571e-06, + "loss": 1.2207, + "step": 29090 + }, + { + "epoch": 0.8541605496505961, + "grad_norm": 0.0, + "learning_rate": 1.0949482111079234e-06, + "loss": 1.0938, + "step": 29091 + }, + { + "epoch": 0.854189911327735, + "grad_norm": 0.0, + "learning_rate": 1.0945155899849424e-06, + "loss": 1.2646, + "step": 29092 + }, + { + "epoch": 0.854219273004874, + "grad_norm": 0.0, + "learning_rate": 1.094083049396536e-06, + "loss": 1.2119, + "step": 29093 + }, + { + "epoch": 0.8542486346820131, + "grad_norm": 0.0, + "learning_rate": 1.0936505893466188e-06, + "loss": 1.2744, + "step": 29094 + }, + { + "epoch": 0.854277996359152, + "grad_norm": 0.0, + "learning_rate": 1.0932182098390986e-06, + "loss": 1.3042, + "step": 29095 + }, + { + "epoch": 0.854307358036291, + "grad_norm": 0.0, + "learning_rate": 1.0927859108778883e-06, + "loss": 1.1533, + "step": 29096 + }, + { + "epoch": 0.8543367197134301, + "grad_norm": 0.0, + "learning_rate": 1.0923536924668975e-06, + "loss": 1.0869, + "step": 29097 + }, + { + "epoch": 0.854366081390569, + "grad_norm": 0.0, + "learning_rate": 1.0919215546100326e-06, + "loss": 1.0425, + "step": 29098 + }, + { + "epoch": 0.854395443067708, + "grad_norm": 0.0, + "learning_rate": 1.0914894973112022e-06, + "loss": 1.2974, + "step": 29099 + }, + { + "epoch": 0.854424804744847, + "grad_norm": 0.0, + "learning_rate": 1.0910575205743124e-06, + "loss": 1.1411, + "step": 29100 + }, + { + "epoch": 0.854454166421986, + "grad_norm": 0.0, + "learning_rate": 1.0906256244032732e-06, + "loss": 1.1777, + "step": 29101 + }, + { + "epoch": 0.854483528099125, + "grad_norm": 0.0, + "learning_rate": 1.090193808801986e-06, + "loss": 1.1787, + "step": 29102 + }, + { + "epoch": 0.854512889776264, + "grad_norm": 0.0, + "learning_rate": 1.0897620737743597e-06, + "loss": 1.2056, + "step": 29103 + }, + { + "epoch": 0.854542251453403, + "grad_norm": 0.0, + "learning_rate": 1.0893304193242959e-06, + "loss": 1.4277, + "step": 29104 + }, + { + "epoch": 0.854571613130542, + "grad_norm": 0.0, + "learning_rate": 1.0888988454557003e-06, + "loss": 1.3076, + "step": 29105 + }, + { + "epoch": 0.854600974807681, + "grad_norm": 0.0, + "learning_rate": 1.0884673521724743e-06, + "loss": 1.1582, + "step": 29106 + }, + { + "epoch": 0.85463033648482, + "grad_norm": 0.0, + "learning_rate": 1.0880359394785211e-06, + "loss": 0.9883, + "step": 29107 + }, + { + "epoch": 0.854659698161959, + "grad_norm": 0.0, + "learning_rate": 1.0876046073777414e-06, + "loss": 1.1387, + "step": 29108 + }, + { + "epoch": 0.854689059839098, + "grad_norm": 0.0, + "learning_rate": 1.0871733558740327e-06, + "loss": 1.2197, + "step": 29109 + }, + { + "epoch": 0.854718421516237, + "grad_norm": 0.0, + "learning_rate": 1.0867421849713012e-06, + "loss": 1.2197, + "step": 29110 + }, + { + "epoch": 0.854747783193376, + "grad_norm": 0.0, + "learning_rate": 1.08631109467344e-06, + "loss": 1.1079, + "step": 29111 + }, + { + "epoch": 0.854777144870515, + "grad_norm": 0.0, + "learning_rate": 1.085880084984353e-06, + "loss": 1.1274, + "step": 29112 + }, + { + "epoch": 0.854806506547654, + "grad_norm": 0.0, + "learning_rate": 1.085449155907935e-06, + "loss": 1.2246, + "step": 29113 + }, + { + "epoch": 0.854835868224793, + "grad_norm": 0.0, + "learning_rate": 1.0850183074480826e-06, + "loss": 1.1987, + "step": 29114 + }, + { + "epoch": 0.854865229901932, + "grad_norm": 0.0, + "learning_rate": 1.0845875396086914e-06, + "loss": 1.2354, + "step": 29115 + }, + { + "epoch": 0.854894591579071, + "grad_norm": 0.0, + "learning_rate": 1.0841568523936608e-06, + "loss": 1.1997, + "step": 29116 + }, + { + "epoch": 0.85492395325621, + "grad_norm": 0.0, + "learning_rate": 1.083726245806883e-06, + "loss": 1.1855, + "step": 29117 + }, + { + "epoch": 0.854953314933349, + "grad_norm": 0.0, + "learning_rate": 1.0832957198522497e-06, + "loss": 1.1523, + "step": 29118 + }, + { + "epoch": 0.854982676610488, + "grad_norm": 0.0, + "learning_rate": 1.0828652745336598e-06, + "loss": 1.1187, + "step": 29119 + }, + { + "epoch": 0.855012038287627, + "grad_norm": 0.0, + "learning_rate": 1.0824349098550003e-06, + "loss": 1.2681, + "step": 29120 + }, + { + "epoch": 0.855041399964766, + "grad_norm": 0.0, + "learning_rate": 1.0820046258201677e-06, + "loss": 1.3164, + "step": 29121 + }, + { + "epoch": 0.855070761641905, + "grad_norm": 0.0, + "learning_rate": 1.0815744224330504e-06, + "loss": 1.3193, + "step": 29122 + }, + { + "epoch": 0.855100123319044, + "grad_norm": 0.0, + "learning_rate": 1.0811442996975407e-06, + "loss": 1.1938, + "step": 29123 + }, + { + "epoch": 0.855129484996183, + "grad_norm": 0.0, + "learning_rate": 1.0807142576175256e-06, + "loss": 1.1724, + "step": 29124 + }, + { + "epoch": 0.855158846673322, + "grad_norm": 0.0, + "learning_rate": 1.0802842961968973e-06, + "loss": 1.1787, + "step": 29125 + }, + { + "epoch": 0.8551882083504609, + "grad_norm": 0.0, + "learning_rate": 1.0798544154395395e-06, + "loss": 1.2026, + "step": 29126 + }, + { + "epoch": 0.8552175700276, + "grad_norm": 0.0, + "learning_rate": 1.0794246153493458e-06, + "loss": 1.21, + "step": 29127 + }, + { + "epoch": 0.855246931704739, + "grad_norm": 0.0, + "learning_rate": 1.0789948959302e-06, + "loss": 1.1841, + "step": 29128 + }, + { + "epoch": 0.8552762933818779, + "grad_norm": 0.0, + "learning_rate": 1.078565257185985e-06, + "loss": 1.0591, + "step": 29129 + }, + { + "epoch": 0.855305655059017, + "grad_norm": 0.0, + "learning_rate": 1.0781356991205926e-06, + "loss": 1.2119, + "step": 29130 + }, + { + "epoch": 0.855335016736156, + "grad_norm": 0.0, + "learning_rate": 1.077706221737903e-06, + "loss": 1.1372, + "step": 29131 + }, + { + "epoch": 0.8553643784132949, + "grad_norm": 0.0, + "learning_rate": 1.077276825041802e-06, + "loss": 1.1675, + "step": 29132 + }, + { + "epoch": 0.855393740090434, + "grad_norm": 0.0, + "learning_rate": 1.0768475090361686e-06, + "loss": 1.2754, + "step": 29133 + }, + { + "epoch": 0.855423101767573, + "grad_norm": 0.0, + "learning_rate": 1.0764182737248907e-06, + "loss": 1.1724, + "step": 29134 + }, + { + "epoch": 0.8554524634447119, + "grad_norm": 0.0, + "learning_rate": 1.0759891191118466e-06, + "loss": 1.2769, + "step": 29135 + }, + { + "epoch": 0.855481825121851, + "grad_norm": 0.0, + "learning_rate": 1.0755600452009195e-06, + "loss": 1.2871, + "step": 29136 + }, + { + "epoch": 0.85551118679899, + "grad_norm": 0.0, + "learning_rate": 1.075131051995989e-06, + "loss": 1.2734, + "step": 29137 + }, + { + "epoch": 0.8555405484761289, + "grad_norm": 0.0, + "learning_rate": 1.0747021395009339e-06, + "loss": 1.2368, + "step": 29138 + }, + { + "epoch": 0.855569910153268, + "grad_norm": 0.0, + "learning_rate": 1.0742733077196331e-06, + "loss": 1.2139, + "step": 29139 + }, + { + "epoch": 0.855599271830407, + "grad_norm": 0.0, + "learning_rate": 1.073844556655963e-06, + "loss": 1.3594, + "step": 29140 + }, + { + "epoch": 0.8556286335075459, + "grad_norm": 0.0, + "learning_rate": 1.0734158863138045e-06, + "loss": 1.2085, + "step": 29141 + }, + { + "epoch": 0.855657995184685, + "grad_norm": 0.0, + "learning_rate": 1.07298729669703e-06, + "loss": 1.1934, + "step": 29142 + }, + { + "epoch": 0.855687356861824, + "grad_norm": 0.0, + "learning_rate": 1.072558787809519e-06, + "loss": 1.2783, + "step": 29143 + }, + { + "epoch": 0.8557167185389629, + "grad_norm": 0.0, + "learning_rate": 1.0721303596551446e-06, + "loss": 1.1816, + "step": 29144 + }, + { + "epoch": 0.855746080216102, + "grad_norm": 0.0, + "learning_rate": 1.071702012237783e-06, + "loss": 1.25, + "step": 29145 + }, + { + "epoch": 0.855775441893241, + "grad_norm": 0.0, + "learning_rate": 1.0712737455613065e-06, + "loss": 1.3467, + "step": 29146 + }, + { + "epoch": 0.8558048035703799, + "grad_norm": 0.0, + "learning_rate": 1.0708455596295885e-06, + "loss": 1.1582, + "step": 29147 + }, + { + "epoch": 0.855834165247519, + "grad_norm": 0.0, + "learning_rate": 1.0704174544465007e-06, + "loss": 1.2881, + "step": 29148 + }, + { + "epoch": 0.855863526924658, + "grad_norm": 0.0, + "learning_rate": 1.0699894300159131e-06, + "loss": 1.1729, + "step": 29149 + }, + { + "epoch": 0.8558928886017969, + "grad_norm": 0.0, + "learning_rate": 1.0695614863417002e-06, + "loss": 1.1221, + "step": 29150 + }, + { + "epoch": 0.855922250278936, + "grad_norm": 0.0, + "learning_rate": 1.0691336234277283e-06, + "loss": 1.2134, + "step": 29151 + }, + { + "epoch": 0.8559516119560749, + "grad_norm": 0.0, + "learning_rate": 1.0687058412778695e-06, + "loss": 1.1396, + "step": 29152 + }, + { + "epoch": 0.8559809736332139, + "grad_norm": 0.0, + "learning_rate": 1.0682781398959885e-06, + "loss": 1.1523, + "step": 29153 + }, + { + "epoch": 0.856010335310353, + "grad_norm": 0.0, + "learning_rate": 1.067850519285959e-06, + "loss": 1.1865, + "step": 29154 + }, + { + "epoch": 0.8560396969874919, + "grad_norm": 0.0, + "learning_rate": 1.0674229794516445e-06, + "loss": 1.2192, + "step": 29155 + }, + { + "epoch": 0.8560690586646309, + "grad_norm": 0.0, + "learning_rate": 1.0669955203969118e-06, + "loss": 1.1558, + "step": 29156 + }, + { + "epoch": 0.85609842034177, + "grad_norm": 0.0, + "learning_rate": 1.0665681421256257e-06, + "loss": 1.1309, + "step": 29157 + }, + { + "epoch": 0.8561277820189089, + "grad_norm": 0.0, + "learning_rate": 1.0661408446416499e-06, + "loss": 1.2202, + "step": 29158 + }, + { + "epoch": 0.8561571436960479, + "grad_norm": 0.0, + "learning_rate": 1.0657136279488533e-06, + "loss": 1.2056, + "step": 29159 + }, + { + "epoch": 0.856186505373187, + "grad_norm": 0.0, + "learning_rate": 1.065286492051093e-06, + "loss": 1.23, + "step": 29160 + }, + { + "epoch": 0.8562158670503259, + "grad_norm": 0.0, + "learning_rate": 1.064859436952238e-06, + "loss": 1.207, + "step": 29161 + }, + { + "epoch": 0.8562452287274649, + "grad_norm": 0.0, + "learning_rate": 1.0644324626561474e-06, + "loss": 1.291, + "step": 29162 + }, + { + "epoch": 0.856274590404604, + "grad_norm": 0.0, + "learning_rate": 1.0640055691666828e-06, + "loss": 1.2998, + "step": 29163 + }, + { + "epoch": 0.8563039520817429, + "grad_norm": 0.0, + "learning_rate": 1.063578756487702e-06, + "loss": 1.2588, + "step": 29164 + }, + { + "epoch": 0.8563333137588819, + "grad_norm": 0.0, + "learning_rate": 1.0631520246230697e-06, + "loss": 1.2568, + "step": 29165 + }, + { + "epoch": 0.856362675436021, + "grad_norm": 0.0, + "learning_rate": 1.0627253735766419e-06, + "loss": 1.1343, + "step": 29166 + }, + { + "epoch": 0.8563920371131599, + "grad_norm": 0.0, + "learning_rate": 1.0622988033522753e-06, + "loss": 1.2202, + "step": 29167 + }, + { + "epoch": 0.8564213987902989, + "grad_norm": 0.0, + "learning_rate": 1.0618723139538324e-06, + "loss": 1.1836, + "step": 29168 + }, + { + "epoch": 0.856450760467438, + "grad_norm": 0.0, + "learning_rate": 1.0614459053851644e-06, + "loss": 1.21, + "step": 29169 + }, + { + "epoch": 0.8564801221445769, + "grad_norm": 0.0, + "learning_rate": 1.061019577650132e-06, + "loss": 1.2344, + "step": 29170 + }, + { + "epoch": 0.8565094838217159, + "grad_norm": 0.0, + "learning_rate": 1.0605933307525896e-06, + "loss": 1.167, + "step": 29171 + }, + { + "epoch": 0.8565388454988548, + "grad_norm": 0.0, + "learning_rate": 1.0601671646963906e-06, + "loss": 1.189, + "step": 29172 + }, + { + "epoch": 0.8565682071759939, + "grad_norm": 0.0, + "learning_rate": 1.059741079485388e-06, + "loss": 1.0732, + "step": 29173 + }, + { + "epoch": 0.8565975688531329, + "grad_norm": 0.0, + "learning_rate": 1.0593150751234382e-06, + "loss": 1.2207, + "step": 29174 + }, + { + "epoch": 0.8566269305302718, + "grad_norm": 0.0, + "learning_rate": 1.0588891516143905e-06, + "loss": 1.2085, + "step": 29175 + }, + { + "epoch": 0.8566562922074109, + "grad_norm": 0.0, + "learning_rate": 1.0584633089620988e-06, + "loss": 1.2637, + "step": 29176 + }, + { + "epoch": 0.8566856538845499, + "grad_norm": 0.0, + "learning_rate": 1.0580375471704141e-06, + "loss": 1.252, + "step": 29177 + }, + { + "epoch": 0.8567150155616888, + "grad_norm": 0.0, + "learning_rate": 1.0576118662431845e-06, + "loss": 1.2988, + "step": 29178 + }, + { + "epoch": 0.8567443772388279, + "grad_norm": 0.0, + "learning_rate": 1.0571862661842624e-06, + "loss": 1.2153, + "step": 29179 + }, + { + "epoch": 0.8567737389159669, + "grad_norm": 0.0, + "learning_rate": 1.056760746997495e-06, + "loss": 1.2598, + "step": 29180 + }, + { + "epoch": 0.8568031005931058, + "grad_norm": 0.0, + "learning_rate": 1.05633530868673e-06, + "loss": 1.251, + "step": 29181 + }, + { + "epoch": 0.8568324622702449, + "grad_norm": 0.0, + "learning_rate": 1.0559099512558147e-06, + "loss": 1.2383, + "step": 29182 + }, + { + "epoch": 0.8568618239473839, + "grad_norm": 0.0, + "learning_rate": 1.0554846747085979e-06, + "loss": 1.1836, + "step": 29183 + }, + { + "epoch": 0.8568911856245228, + "grad_norm": 0.0, + "learning_rate": 1.055059479048921e-06, + "loss": 1.2744, + "step": 29184 + }, + { + "epoch": 0.8569205473016619, + "grad_norm": 0.0, + "learning_rate": 1.0546343642806345e-06, + "loss": 1.1929, + "step": 29185 + }, + { + "epoch": 0.8569499089788009, + "grad_norm": 0.0, + "learning_rate": 1.0542093304075806e-06, + "loss": 1.1191, + "step": 29186 + }, + { + "epoch": 0.8569792706559398, + "grad_norm": 0.0, + "learning_rate": 1.053784377433603e-06, + "loss": 1.1885, + "step": 29187 + }, + { + "epoch": 0.8570086323330789, + "grad_norm": 0.0, + "learning_rate": 1.0533595053625444e-06, + "loss": 1.2588, + "step": 29188 + }, + { + "epoch": 0.8570379940102179, + "grad_norm": 0.0, + "learning_rate": 1.052934714198245e-06, + "loss": 1.2109, + "step": 29189 + }, + { + "epoch": 0.8570673556873568, + "grad_norm": 0.0, + "learning_rate": 1.0525100039445502e-06, + "loss": 1.2812, + "step": 29190 + }, + { + "epoch": 0.8570967173644959, + "grad_norm": 0.0, + "learning_rate": 1.0520853746052973e-06, + "loss": 1.1782, + "step": 29191 + }, + { + "epoch": 0.8571260790416348, + "grad_norm": 0.0, + "learning_rate": 1.0516608261843298e-06, + "loss": 1.2852, + "step": 29192 + }, + { + "epoch": 0.8571554407187738, + "grad_norm": 0.0, + "learning_rate": 1.0512363586854823e-06, + "loss": 1.2368, + "step": 29193 + }, + { + "epoch": 0.8571848023959129, + "grad_norm": 0.0, + "learning_rate": 1.0508119721125986e-06, + "loss": 1.1099, + "step": 29194 + }, + { + "epoch": 0.8572141640730518, + "grad_norm": 0.0, + "learning_rate": 1.0503876664695145e-06, + "loss": 1.1582, + "step": 29195 + }, + { + "epoch": 0.8572435257501908, + "grad_norm": 0.0, + "learning_rate": 1.0499634417600668e-06, + "loss": 1.2559, + "step": 29196 + }, + { + "epoch": 0.8572728874273299, + "grad_norm": 0.0, + "learning_rate": 1.0495392979880925e-06, + "loss": 1.0801, + "step": 29197 + }, + { + "epoch": 0.8573022491044688, + "grad_norm": 0.0, + "learning_rate": 1.049115235157423e-06, + "loss": 1.2065, + "step": 29198 + }, + { + "epoch": 0.8573316107816078, + "grad_norm": 0.0, + "learning_rate": 1.0486912532718996e-06, + "loss": 1.1963, + "step": 29199 + }, + { + "epoch": 0.8573609724587469, + "grad_norm": 0.0, + "learning_rate": 1.0482673523353515e-06, + "loss": 1.2583, + "step": 29200 + }, + { + "epoch": 0.8573903341358858, + "grad_norm": 0.0, + "learning_rate": 1.0478435323516168e-06, + "loss": 1.2344, + "step": 29201 + }, + { + "epoch": 0.8574196958130248, + "grad_norm": 0.0, + "learning_rate": 1.0474197933245233e-06, + "loss": 1.2393, + "step": 29202 + }, + { + "epoch": 0.8574490574901639, + "grad_norm": 0.0, + "learning_rate": 1.0469961352579073e-06, + "loss": 1.085, + "step": 29203 + }, + { + "epoch": 0.8574784191673028, + "grad_norm": 0.0, + "learning_rate": 1.0465725581555985e-06, + "loss": 1.3398, + "step": 29204 + }, + { + "epoch": 0.8575077808444418, + "grad_norm": 0.0, + "learning_rate": 1.0461490620214277e-06, + "loss": 1.0874, + "step": 29205 + }, + { + "epoch": 0.8575371425215809, + "grad_norm": 0.0, + "learning_rate": 1.0457256468592225e-06, + "loss": 1.0806, + "step": 29206 + }, + { + "epoch": 0.8575665041987198, + "grad_norm": 0.0, + "learning_rate": 1.0453023126728123e-06, + "loss": 1.1924, + "step": 29207 + }, + { + "epoch": 0.8575958658758588, + "grad_norm": 0.0, + "learning_rate": 1.0448790594660285e-06, + "loss": 1.1943, + "step": 29208 + }, + { + "epoch": 0.8576252275529979, + "grad_norm": 0.0, + "learning_rate": 1.0444558872426947e-06, + "loss": 1.4131, + "step": 29209 + }, + { + "epoch": 0.8576545892301368, + "grad_norm": 0.0, + "learning_rate": 1.044032796006642e-06, + "loss": 1.3711, + "step": 29210 + }, + { + "epoch": 0.8576839509072758, + "grad_norm": 0.0, + "learning_rate": 1.0436097857616934e-06, + "loss": 1.1699, + "step": 29211 + }, + { + "epoch": 0.8577133125844149, + "grad_norm": 0.0, + "learning_rate": 1.0431868565116776e-06, + "loss": 1.2861, + "step": 29212 + }, + { + "epoch": 0.8577426742615538, + "grad_norm": 0.0, + "learning_rate": 1.042764008260414e-06, + "loss": 1.2207, + "step": 29213 + }, + { + "epoch": 0.8577720359386928, + "grad_norm": 0.0, + "learning_rate": 1.0423412410117307e-06, + "loss": 1.2295, + "step": 29214 + }, + { + "epoch": 0.8578013976158319, + "grad_norm": 0.0, + "learning_rate": 1.04191855476945e-06, + "loss": 1.2461, + "step": 29215 + }, + { + "epoch": 0.8578307592929708, + "grad_norm": 0.0, + "learning_rate": 1.0414959495373921e-06, + "loss": 1.2676, + "step": 29216 + }, + { + "epoch": 0.8578601209701098, + "grad_norm": 0.0, + "learning_rate": 1.0410734253193822e-06, + "loss": 1.2314, + "step": 29217 + }, + { + "epoch": 0.8578894826472488, + "grad_norm": 0.0, + "learning_rate": 1.040650982119239e-06, + "loss": 1.1016, + "step": 29218 + }, + { + "epoch": 0.8579188443243878, + "grad_norm": 0.0, + "learning_rate": 1.040228619940784e-06, + "loss": 1.3203, + "step": 29219 + }, + { + "epoch": 0.8579482060015268, + "grad_norm": 0.0, + "learning_rate": 1.0398063387878365e-06, + "loss": 1.2583, + "step": 29220 + }, + { + "epoch": 0.8579775676786658, + "grad_norm": 0.0, + "learning_rate": 1.0393841386642157e-06, + "loss": 1.0386, + "step": 29221 + }, + { + "epoch": 0.8580069293558048, + "grad_norm": 0.0, + "learning_rate": 1.038962019573737e-06, + "loss": 1.333, + "step": 29222 + }, + { + "epoch": 0.8580362910329438, + "grad_norm": 0.0, + "learning_rate": 1.0385399815202213e-06, + "loss": 1.3008, + "step": 29223 + }, + { + "epoch": 0.8580656527100828, + "grad_norm": 0.0, + "learning_rate": 1.0381180245074818e-06, + "loss": 1.2627, + "step": 29224 + }, + { + "epoch": 0.8580950143872218, + "grad_norm": 0.0, + "learning_rate": 1.0376961485393377e-06, + "loss": 1.2334, + "step": 29225 + }, + { + "epoch": 0.8581243760643608, + "grad_norm": 0.0, + "learning_rate": 1.0372743536196027e-06, + "loss": 1.2969, + "step": 29226 + }, + { + "epoch": 0.8581537377414998, + "grad_norm": 0.0, + "learning_rate": 1.0368526397520895e-06, + "loss": 1.2354, + "step": 29227 + }, + { + "epoch": 0.8581830994186388, + "grad_norm": 0.0, + "learning_rate": 1.0364310069406148e-06, + "loss": 1.2212, + "step": 29228 + }, + { + "epoch": 0.8582124610957778, + "grad_norm": 0.0, + "learning_rate": 1.036009455188991e-06, + "loss": 1.2764, + "step": 29229 + }, + { + "epoch": 0.8582418227729168, + "grad_norm": 0.0, + "learning_rate": 1.0355879845010286e-06, + "loss": 1.1377, + "step": 29230 + }, + { + "epoch": 0.8582711844500558, + "grad_norm": 0.0, + "learning_rate": 1.035166594880539e-06, + "loss": 1.1875, + "step": 29231 + }, + { + "epoch": 0.8583005461271948, + "grad_norm": 0.0, + "learning_rate": 1.0347452863313345e-06, + "loss": 1.2075, + "step": 29232 + }, + { + "epoch": 0.8583299078043338, + "grad_norm": 0.0, + "learning_rate": 1.0343240588572223e-06, + "loss": 1.2119, + "step": 29233 + }, + { + "epoch": 0.8583592694814728, + "grad_norm": 0.0, + "learning_rate": 1.033902912462017e-06, + "loss": 1.2041, + "step": 29234 + }, + { + "epoch": 0.8583886311586117, + "grad_norm": 0.0, + "learning_rate": 1.0334818471495234e-06, + "loss": 1.1265, + "step": 29235 + }, + { + "epoch": 0.8584179928357508, + "grad_norm": 0.0, + "learning_rate": 1.0330608629235471e-06, + "loss": 1.165, + "step": 29236 + }, + { + "epoch": 0.8584473545128898, + "grad_norm": 0.0, + "learning_rate": 1.0326399597879033e-06, + "loss": 1.2861, + "step": 29237 + }, + { + "epoch": 0.8584767161900287, + "grad_norm": 0.0, + "learning_rate": 1.0322191377463875e-06, + "loss": 1.1006, + "step": 29238 + }, + { + "epoch": 0.8585060778671678, + "grad_norm": 0.0, + "learning_rate": 1.0317983968028133e-06, + "loss": 1.2012, + "step": 29239 + }, + { + "epoch": 0.8585354395443068, + "grad_norm": 0.0, + "learning_rate": 1.0313777369609812e-06, + "loss": 1.2754, + "step": 29240 + }, + { + "epoch": 0.8585648012214457, + "grad_norm": 0.0, + "learning_rate": 1.0309571582246992e-06, + "loss": 1.2178, + "step": 29241 + }, + { + "epoch": 0.8585941628985848, + "grad_norm": 0.0, + "learning_rate": 1.0305366605977663e-06, + "loss": 1.0977, + "step": 29242 + }, + { + "epoch": 0.8586235245757238, + "grad_norm": 0.0, + "learning_rate": 1.0301162440839896e-06, + "loss": 1.1416, + "step": 29243 + }, + { + "epoch": 0.8586528862528627, + "grad_norm": 0.0, + "learning_rate": 1.0296959086871695e-06, + "loss": 1.1768, + "step": 29244 + }, + { + "epoch": 0.8586822479300018, + "grad_norm": 0.0, + "learning_rate": 1.029275654411106e-06, + "loss": 1.23, + "step": 29245 + }, + { + "epoch": 0.8587116096071408, + "grad_norm": 0.0, + "learning_rate": 1.0288554812595997e-06, + "loss": 1.2192, + "step": 29246 + }, + { + "epoch": 0.8587409712842797, + "grad_norm": 0.0, + "learning_rate": 1.0284353892364496e-06, + "loss": 1.208, + "step": 29247 + }, + { + "epoch": 0.8587703329614188, + "grad_norm": 0.0, + "learning_rate": 1.0280153783454571e-06, + "loss": 1.2637, + "step": 29248 + }, + { + "epoch": 0.8587996946385578, + "grad_norm": 0.0, + "learning_rate": 1.0275954485904183e-06, + "loss": 1.2104, + "step": 29249 + }, + { + "epoch": 0.8588290563156967, + "grad_norm": 0.0, + "learning_rate": 1.0271755999751331e-06, + "loss": 1.1807, + "step": 29250 + }, + { + "epoch": 0.8588584179928358, + "grad_norm": 0.0, + "learning_rate": 1.0267558325033954e-06, + "loss": 1.1035, + "step": 29251 + }, + { + "epoch": 0.8588877796699748, + "grad_norm": 0.0, + "learning_rate": 1.0263361461790044e-06, + "loss": 1.209, + "step": 29252 + }, + { + "epoch": 0.8589171413471137, + "grad_norm": 0.0, + "learning_rate": 1.0259165410057536e-06, + "loss": 1.2363, + "step": 29253 + }, + { + "epoch": 0.8589465030242528, + "grad_norm": 0.0, + "learning_rate": 1.025497016987439e-06, + "loss": 1.1519, + "step": 29254 + }, + { + "epoch": 0.8589758647013918, + "grad_norm": 0.0, + "learning_rate": 1.025077574127852e-06, + "loss": 1.2715, + "step": 29255 + }, + { + "epoch": 0.8590052263785307, + "grad_norm": 0.0, + "learning_rate": 1.0246582124307859e-06, + "loss": 1.3477, + "step": 29256 + }, + { + "epoch": 0.8590345880556698, + "grad_norm": 0.0, + "learning_rate": 1.0242389319000356e-06, + "loss": 1.1577, + "step": 29257 + }, + { + "epoch": 0.8590639497328088, + "grad_norm": 0.0, + "learning_rate": 1.0238197325393896e-06, + "loss": 1.165, + "step": 29258 + }, + { + "epoch": 0.8590933114099477, + "grad_norm": 0.0, + "learning_rate": 1.023400614352642e-06, + "loss": 1.1152, + "step": 29259 + }, + { + "epoch": 0.8591226730870868, + "grad_norm": 0.0, + "learning_rate": 1.0229815773435802e-06, + "loss": 1.2168, + "step": 29260 + }, + { + "epoch": 0.8591520347642257, + "grad_norm": 0.0, + "learning_rate": 1.0225626215159968e-06, + "loss": 1.0737, + "step": 29261 + }, + { + "epoch": 0.8591813964413647, + "grad_norm": 0.0, + "learning_rate": 1.0221437468736795e-06, + "loss": 1.1953, + "step": 29262 + }, + { + "epoch": 0.8592107581185038, + "grad_norm": 0.0, + "learning_rate": 1.0217249534204143e-06, + "loss": 1.1187, + "step": 29263 + }, + { + "epoch": 0.8592401197956427, + "grad_norm": 0.0, + "learning_rate": 1.0213062411599884e-06, + "loss": 1.1958, + "step": 29264 + }, + { + "epoch": 0.8592694814727817, + "grad_norm": 0.0, + "learning_rate": 1.0208876100961918e-06, + "loss": 1.1934, + "step": 29265 + }, + { + "epoch": 0.8592988431499208, + "grad_norm": 0.0, + "learning_rate": 1.0204690602328083e-06, + "loss": 1.1821, + "step": 29266 + }, + { + "epoch": 0.8593282048270597, + "grad_norm": 0.0, + "learning_rate": 1.0200505915736202e-06, + "loss": 1.3242, + "step": 29267 + }, + { + "epoch": 0.8593575665041987, + "grad_norm": 0.0, + "learning_rate": 1.019632204122417e-06, + "loss": 1.1953, + "step": 29268 + }, + { + "epoch": 0.8593869281813378, + "grad_norm": 0.0, + "learning_rate": 1.0192138978829792e-06, + "loss": 1.2188, + "step": 29269 + }, + { + "epoch": 0.8594162898584767, + "grad_norm": 0.0, + "learning_rate": 1.0187956728590897e-06, + "loss": 1.4033, + "step": 29270 + }, + { + "epoch": 0.8594456515356157, + "grad_norm": 0.0, + "learning_rate": 1.0183775290545294e-06, + "loss": 1.2261, + "step": 29271 + }, + { + "epoch": 0.8594750132127547, + "grad_norm": 0.0, + "learning_rate": 1.0179594664730842e-06, + "loss": 1.1797, + "step": 29272 + }, + { + "epoch": 0.8595043748898937, + "grad_norm": 0.0, + "learning_rate": 1.0175414851185294e-06, + "loss": 1.1714, + "step": 29273 + }, + { + "epoch": 0.8595337365670327, + "grad_norm": 0.0, + "learning_rate": 1.0171235849946492e-06, + "loss": 1.25, + "step": 29274 + }, + { + "epoch": 0.8595630982441717, + "grad_norm": 0.0, + "learning_rate": 1.0167057661052204e-06, + "loss": 1.251, + "step": 29275 + }, + { + "epoch": 0.8595924599213107, + "grad_norm": 0.0, + "learning_rate": 1.016288028454021e-06, + "loss": 1.189, + "step": 29276 + }, + { + "epoch": 0.8596218215984497, + "grad_norm": 0.0, + "learning_rate": 1.0158703720448315e-06, + "loss": 1.019, + "step": 29277 + }, + { + "epoch": 0.8596511832755886, + "grad_norm": 0.0, + "learning_rate": 1.0154527968814264e-06, + "loss": 1.2314, + "step": 29278 + }, + { + "epoch": 0.8596805449527277, + "grad_norm": 0.0, + "learning_rate": 1.015035302967583e-06, + "loss": 1.0483, + "step": 29279 + }, + { + "epoch": 0.8597099066298667, + "grad_norm": 0.0, + "learning_rate": 1.0146178903070747e-06, + "loss": 1.0947, + "step": 29280 + }, + { + "epoch": 0.8597392683070056, + "grad_norm": 0.0, + "learning_rate": 1.0142005589036796e-06, + "loss": 1.2158, + "step": 29281 + }, + { + "epoch": 0.8597686299841447, + "grad_norm": 0.0, + "learning_rate": 1.0137833087611692e-06, + "loss": 1.1909, + "step": 29282 + }, + { + "epoch": 0.8597979916612837, + "grad_norm": 0.0, + "learning_rate": 1.0133661398833184e-06, + "loss": 1.2891, + "step": 29283 + }, + { + "epoch": 0.8598273533384226, + "grad_norm": 0.0, + "learning_rate": 1.0129490522739005e-06, + "loss": 1.2656, + "step": 29284 + }, + { + "epoch": 0.8598567150155617, + "grad_norm": 0.0, + "learning_rate": 1.0125320459366828e-06, + "loss": 1.0732, + "step": 29285 + }, + { + "epoch": 0.8598860766927007, + "grad_norm": 0.0, + "learning_rate": 1.0121151208754444e-06, + "loss": 1.2363, + "step": 29286 + }, + { + "epoch": 0.8599154383698396, + "grad_norm": 0.0, + "learning_rate": 1.0116982770939455e-06, + "loss": 1.2754, + "step": 29287 + }, + { + "epoch": 0.8599448000469787, + "grad_norm": 0.0, + "learning_rate": 1.0112815145959641e-06, + "loss": 1.0518, + "step": 29288 + }, + { + "epoch": 0.8599741617241177, + "grad_norm": 0.0, + "learning_rate": 1.0108648333852633e-06, + "loss": 1.2695, + "step": 29289 + }, + { + "epoch": 0.8600035234012566, + "grad_norm": 0.0, + "learning_rate": 1.0104482334656162e-06, + "loss": 1.3359, + "step": 29290 + }, + { + "epoch": 0.8600328850783957, + "grad_norm": 0.0, + "learning_rate": 1.0100317148407868e-06, + "loss": 1.2461, + "step": 29291 + }, + { + "epoch": 0.8600622467555347, + "grad_norm": 0.0, + "learning_rate": 1.009615277514544e-06, + "loss": 1.0859, + "step": 29292 + }, + { + "epoch": 0.8600916084326736, + "grad_norm": 0.0, + "learning_rate": 1.0091989214906527e-06, + "loss": 1.1963, + "step": 29293 + }, + { + "epoch": 0.8601209701098127, + "grad_norm": 0.0, + "learning_rate": 1.008782646772879e-06, + "loss": 1.1074, + "step": 29294 + }, + { + "epoch": 0.8601503317869517, + "grad_norm": 0.0, + "learning_rate": 1.0083664533649862e-06, + "loss": 1.2803, + "step": 29295 + }, + { + "epoch": 0.8601796934640906, + "grad_norm": 0.0, + "learning_rate": 1.007950341270736e-06, + "loss": 1.0332, + "step": 29296 + }, + { + "epoch": 0.8602090551412297, + "grad_norm": 0.0, + "learning_rate": 1.007534310493895e-06, + "loss": 1.2485, + "step": 29297 + }, + { + "epoch": 0.8602384168183687, + "grad_norm": 0.0, + "learning_rate": 1.0071183610382241e-06, + "loss": 1.1152, + "step": 29298 + }, + { + "epoch": 0.8602677784955076, + "grad_norm": 0.0, + "learning_rate": 1.0067024929074854e-06, + "loss": 1.1616, + "step": 29299 + }, + { + "epoch": 0.8602971401726467, + "grad_norm": 0.0, + "learning_rate": 1.0062867061054383e-06, + "loss": 1.2832, + "step": 29300 + }, + { + "epoch": 0.8603265018497857, + "grad_norm": 0.0, + "learning_rate": 1.0058710006358452e-06, + "loss": 1.2256, + "step": 29301 + }, + { + "epoch": 0.8603558635269246, + "grad_norm": 0.0, + "learning_rate": 1.0054553765024645e-06, + "loss": 1.2051, + "step": 29302 + }, + { + "epoch": 0.8603852252040637, + "grad_norm": 0.0, + "learning_rate": 1.005039833709054e-06, + "loss": 1.2705, + "step": 29303 + }, + { + "epoch": 0.8604145868812026, + "grad_norm": 0.0, + "learning_rate": 1.0046243722593718e-06, + "loss": 0.9722, + "step": 29304 + }, + { + "epoch": 0.8604439485583416, + "grad_norm": 0.0, + "learning_rate": 1.004208992157174e-06, + "loss": 1.1807, + "step": 29305 + }, + { + "epoch": 0.8604733102354807, + "grad_norm": 0.0, + "learning_rate": 1.0037936934062187e-06, + "loss": 1.2012, + "step": 29306 + }, + { + "epoch": 0.8605026719126196, + "grad_norm": 0.0, + "learning_rate": 1.0033784760102606e-06, + "loss": 1.208, + "step": 29307 + }, + { + "epoch": 0.8605320335897586, + "grad_norm": 0.0, + "learning_rate": 1.0029633399730554e-06, + "loss": 1.1748, + "step": 29308 + }, + { + "epoch": 0.8605613952668977, + "grad_norm": 0.0, + "learning_rate": 1.002548285298356e-06, + "loss": 1.123, + "step": 29309 + }, + { + "epoch": 0.8605907569440366, + "grad_norm": 0.0, + "learning_rate": 1.002133311989919e-06, + "loss": 1.1567, + "step": 29310 + }, + { + "epoch": 0.8606201186211756, + "grad_norm": 0.0, + "learning_rate": 1.001718420051494e-06, + "loss": 1.2085, + "step": 29311 + }, + { + "epoch": 0.8606494802983147, + "grad_norm": 0.0, + "learning_rate": 1.0013036094868344e-06, + "loss": 1.2202, + "step": 29312 + }, + { + "epoch": 0.8606788419754536, + "grad_norm": 0.0, + "learning_rate": 1.0008888802996886e-06, + "loss": 1.1733, + "step": 29313 + }, + { + "epoch": 0.8607082036525926, + "grad_norm": 0.0, + "learning_rate": 1.000474232493811e-06, + "loss": 1.1265, + "step": 29314 + }, + { + "epoch": 0.8607375653297317, + "grad_norm": 0.0, + "learning_rate": 1.00005966607295e-06, + "loss": 1.1768, + "step": 29315 + }, + { + "epoch": 0.8607669270068706, + "grad_norm": 0.0, + "learning_rate": 9.996451810408524e-07, + "loss": 1.2051, + "step": 29316 + }, + { + "epoch": 0.8607962886840096, + "grad_norm": 0.0, + "learning_rate": 9.992307774012711e-07, + "loss": 1.1753, + "step": 29317 + }, + { + "epoch": 0.8608256503611487, + "grad_norm": 0.0, + "learning_rate": 9.988164551579504e-07, + "loss": 1.2832, + "step": 29318 + }, + { + "epoch": 0.8608550120382876, + "grad_norm": 0.0, + "learning_rate": 9.984022143146376e-07, + "loss": 1.1152, + "step": 29319 + }, + { + "epoch": 0.8608843737154266, + "grad_norm": 0.0, + "learning_rate": 9.979880548750775e-07, + "loss": 1.2822, + "step": 29320 + }, + { + "epoch": 0.8609137353925657, + "grad_norm": 0.0, + "learning_rate": 9.975739768430183e-07, + "loss": 1.2148, + "step": 29321 + }, + { + "epoch": 0.8609430970697046, + "grad_norm": 0.0, + "learning_rate": 9.97159980222201e-07, + "loss": 1.2725, + "step": 29322 + }, + { + "epoch": 0.8609724587468436, + "grad_norm": 0.0, + "learning_rate": 9.96746065016374e-07, + "loss": 1.1484, + "step": 29323 + }, + { + "epoch": 0.8610018204239827, + "grad_norm": 0.0, + "learning_rate": 9.96332231229279e-07, + "loss": 1.2354, + "step": 29324 + }, + { + "epoch": 0.8610311821011216, + "grad_norm": 0.0, + "learning_rate": 9.959184788646537e-07, + "loss": 1.3203, + "step": 29325 + }, + { + "epoch": 0.8610605437782606, + "grad_norm": 0.0, + "learning_rate": 9.955048079262464e-07, + "loss": 1.1748, + "step": 29326 + }, + { + "epoch": 0.8610899054553997, + "grad_norm": 0.0, + "learning_rate": 9.950912184177953e-07, + "loss": 1.1406, + "step": 29327 + }, + { + "epoch": 0.8611192671325386, + "grad_norm": 0.0, + "learning_rate": 9.946777103430405e-07, + "loss": 1.1504, + "step": 29328 + }, + { + "epoch": 0.8611486288096776, + "grad_norm": 0.0, + "learning_rate": 9.942642837057204e-07, + "loss": 1.1733, + "step": 29329 + }, + { + "epoch": 0.8611779904868166, + "grad_norm": 0.0, + "learning_rate": 9.93850938509575e-07, + "loss": 1.2539, + "step": 29330 + }, + { + "epoch": 0.8612073521639556, + "grad_norm": 0.0, + "learning_rate": 9.934376747583407e-07, + "loss": 1.1357, + "step": 29331 + }, + { + "epoch": 0.8612367138410946, + "grad_norm": 0.0, + "learning_rate": 9.930244924557585e-07, + "loss": 1.0967, + "step": 29332 + }, + { + "epoch": 0.8612660755182336, + "grad_norm": 0.0, + "learning_rate": 9.926113916055624e-07, + "loss": 1.188, + "step": 29333 + }, + { + "epoch": 0.8612954371953726, + "grad_norm": 0.0, + "learning_rate": 9.92198372211487e-07, + "loss": 1.1738, + "step": 29334 + }, + { + "epoch": 0.8613247988725116, + "grad_norm": 0.0, + "learning_rate": 9.917854342772693e-07, + "loss": 1.2139, + "step": 29335 + }, + { + "epoch": 0.8613541605496506, + "grad_norm": 0.0, + "learning_rate": 9.913725778066442e-07, + "loss": 1.2109, + "step": 29336 + }, + { + "epoch": 0.8613835222267896, + "grad_norm": 0.0, + "learning_rate": 9.90959802803343e-07, + "loss": 1.2832, + "step": 29337 + }, + { + "epoch": 0.8614128839039286, + "grad_norm": 0.0, + "learning_rate": 9.905471092710984e-07, + "loss": 1.374, + "step": 29338 + }, + { + "epoch": 0.8614422455810676, + "grad_norm": 0.0, + "learning_rate": 9.901344972136452e-07, + "loss": 1.3281, + "step": 29339 + }, + { + "epoch": 0.8614716072582066, + "grad_norm": 0.0, + "learning_rate": 9.897219666347113e-07, + "loss": 1.1841, + "step": 29340 + }, + { + "epoch": 0.8615009689353456, + "grad_norm": 0.0, + "learning_rate": 9.893095175380308e-07, + "loss": 1.2109, + "step": 29341 + }, + { + "epoch": 0.8615303306124846, + "grad_norm": 0.0, + "learning_rate": 9.888971499273326e-07, + "loss": 1.2202, + "step": 29342 + }, + { + "epoch": 0.8615596922896236, + "grad_norm": 0.0, + "learning_rate": 9.884848638063449e-07, + "loss": 1.291, + "step": 29343 + }, + { + "epoch": 0.8615890539667626, + "grad_norm": 0.0, + "learning_rate": 9.88072659178797e-07, + "loss": 1.2808, + "step": 29344 + }, + { + "epoch": 0.8616184156439016, + "grad_norm": 0.0, + "learning_rate": 9.876605360484127e-07, + "loss": 1.0967, + "step": 29345 + }, + { + "epoch": 0.8616477773210406, + "grad_norm": 0.0, + "learning_rate": 9.872484944189253e-07, + "loss": 1.1807, + "step": 29346 + }, + { + "epoch": 0.8616771389981795, + "grad_norm": 0.0, + "learning_rate": 9.868365342940566e-07, + "loss": 1.1768, + "step": 29347 + }, + { + "epoch": 0.8617065006753186, + "grad_norm": 0.0, + "learning_rate": 9.864246556775348e-07, + "loss": 1.2041, + "step": 29348 + }, + { + "epoch": 0.8617358623524576, + "grad_norm": 0.0, + "learning_rate": 9.860128585730811e-07, + "loss": 1.2539, + "step": 29349 + }, + { + "epoch": 0.8617652240295965, + "grad_norm": 0.0, + "learning_rate": 9.856011429844226e-07, + "loss": 1.2803, + "step": 29350 + }, + { + "epoch": 0.8617945857067356, + "grad_norm": 0.0, + "learning_rate": 9.85189508915283e-07, + "loss": 1.2036, + "step": 29351 + }, + { + "epoch": 0.8618239473838746, + "grad_norm": 0.0, + "learning_rate": 9.847779563693828e-07, + "loss": 1.2051, + "step": 29352 + }, + { + "epoch": 0.8618533090610135, + "grad_norm": 0.0, + "learning_rate": 9.843664853504454e-07, + "loss": 1.2383, + "step": 29353 + }, + { + "epoch": 0.8618826707381526, + "grad_norm": 0.0, + "learning_rate": 9.83955095862188e-07, + "loss": 1.2881, + "step": 29354 + }, + { + "epoch": 0.8619120324152916, + "grad_norm": 0.0, + "learning_rate": 9.835437879083354e-07, + "loss": 1.1606, + "step": 29355 + }, + { + "epoch": 0.8619413940924305, + "grad_norm": 0.0, + "learning_rate": 9.831325614926035e-07, + "loss": 1.311, + "step": 29356 + }, + { + "epoch": 0.8619707557695696, + "grad_norm": 0.0, + "learning_rate": 9.82721416618716e-07, + "loss": 1.083, + "step": 29357 + }, + { + "epoch": 0.8620001174467086, + "grad_norm": 0.0, + "learning_rate": 9.823103532903854e-07, + "loss": 1.1846, + "step": 29358 + }, + { + "epoch": 0.8620294791238475, + "grad_norm": 0.0, + "learning_rate": 9.818993715113335e-07, + "loss": 1.186, + "step": 29359 + }, + { + "epoch": 0.8620588408009866, + "grad_norm": 0.0, + "learning_rate": 9.814884712852757e-07, + "loss": 1.2612, + "step": 29360 + }, + { + "epoch": 0.8620882024781256, + "grad_norm": 0.0, + "learning_rate": 9.81077652615927e-07, + "loss": 1.1602, + "step": 29361 + }, + { + "epoch": 0.8621175641552645, + "grad_norm": 0.0, + "learning_rate": 9.806669155070015e-07, + "loss": 1.1328, + "step": 29362 + }, + { + "epoch": 0.8621469258324036, + "grad_norm": 0.0, + "learning_rate": 9.802562599622157e-07, + "loss": 1.2139, + "step": 29363 + }, + { + "epoch": 0.8621762875095426, + "grad_norm": 0.0, + "learning_rate": 9.798456859852835e-07, + "loss": 1.2397, + "step": 29364 + }, + { + "epoch": 0.8622056491866815, + "grad_norm": 0.0, + "learning_rate": 9.794351935799151e-07, + "loss": 1.2227, + "step": 29365 + }, + { + "epoch": 0.8622350108638206, + "grad_norm": 0.0, + "learning_rate": 9.790247827498255e-07, + "loss": 1.2842, + "step": 29366 + }, + { + "epoch": 0.8622643725409596, + "grad_norm": 0.0, + "learning_rate": 9.786144534987252e-07, + "loss": 1.2139, + "step": 29367 + }, + { + "epoch": 0.8622937342180985, + "grad_norm": 0.0, + "learning_rate": 9.782042058303253e-07, + "loss": 1.2046, + "step": 29368 + }, + { + "epoch": 0.8623230958952376, + "grad_norm": 0.0, + "learning_rate": 9.777940397483331e-07, + "loss": 1.0864, + "step": 29369 + }, + { + "epoch": 0.8623524575723766, + "grad_norm": 0.0, + "learning_rate": 9.773839552564624e-07, + "loss": 1.2212, + "step": 29370 + }, + { + "epoch": 0.8623818192495155, + "grad_norm": 0.0, + "learning_rate": 9.769739523584176e-07, + "loss": 1.3467, + "step": 29371 + }, + { + "epoch": 0.8624111809266546, + "grad_norm": 0.0, + "learning_rate": 9.765640310579095e-07, + "loss": 1.1816, + "step": 29372 + }, + { + "epoch": 0.8624405426037935, + "grad_norm": 0.0, + "learning_rate": 9.761541913586437e-07, + "loss": 1.1021, + "step": 29373 + }, + { + "epoch": 0.8624699042809325, + "grad_norm": 0.0, + "learning_rate": 9.75744433264325e-07, + "loss": 1.2627, + "step": 29374 + }, + { + "epoch": 0.8624992659580715, + "grad_norm": 0.0, + "learning_rate": 9.75334756778662e-07, + "loss": 1.2217, + "step": 29375 + }, + { + "epoch": 0.8625286276352105, + "grad_norm": 0.0, + "learning_rate": 9.749251619053578e-07, + "loss": 1.1484, + "step": 29376 + }, + { + "epoch": 0.8625579893123495, + "grad_norm": 0.0, + "learning_rate": 9.745156486481178e-07, + "loss": 1.2515, + "step": 29377 + }, + { + "epoch": 0.8625873509894885, + "grad_norm": 0.0, + "learning_rate": 9.741062170106418e-07, + "loss": 1.1997, + "step": 29378 + }, + { + "epoch": 0.8626167126666275, + "grad_norm": 0.0, + "learning_rate": 9.736968669966363e-07, + "loss": 1.1489, + "step": 29379 + }, + { + "epoch": 0.8626460743437665, + "grad_norm": 0.0, + "learning_rate": 9.732875986098012e-07, + "loss": 1.2236, + "step": 29380 + }, + { + "epoch": 0.8626754360209055, + "grad_norm": 0.0, + "learning_rate": 9.728784118538382e-07, + "loss": 1.1665, + "step": 29381 + }, + { + "epoch": 0.8627047976980445, + "grad_norm": 0.0, + "learning_rate": 9.724693067324476e-07, + "loss": 1.2334, + "step": 29382 + }, + { + "epoch": 0.8627341593751835, + "grad_norm": 0.0, + "learning_rate": 9.720602832493299e-07, + "loss": 1.1108, + "step": 29383 + }, + { + "epoch": 0.8627635210523225, + "grad_norm": 0.0, + "learning_rate": 9.71651341408183e-07, + "loss": 1.3311, + "step": 29384 + }, + { + "epoch": 0.8627928827294615, + "grad_norm": 0.0, + "learning_rate": 9.712424812127052e-07, + "loss": 1.2021, + "step": 29385 + }, + { + "epoch": 0.8628222444066005, + "grad_norm": 0.0, + "learning_rate": 9.708337026665937e-07, + "loss": 1.2573, + "step": 29386 + }, + { + "epoch": 0.8628516060837395, + "grad_norm": 0.0, + "learning_rate": 9.704250057735442e-07, + "loss": 1.2764, + "step": 29387 + }, + { + "epoch": 0.8628809677608785, + "grad_norm": 0.0, + "learning_rate": 9.70016390537255e-07, + "loss": 1.1372, + "step": 29388 + }, + { + "epoch": 0.8629103294380175, + "grad_norm": 0.0, + "learning_rate": 9.696078569614198e-07, + "loss": 1.2637, + "step": 29389 + }, + { + "epoch": 0.8629396911151564, + "grad_norm": 0.0, + "learning_rate": 9.691994050497344e-07, + "loss": 1.0537, + "step": 29390 + }, + { + "epoch": 0.8629690527922955, + "grad_norm": 0.0, + "learning_rate": 9.687910348058894e-07, + "loss": 1.1094, + "step": 29391 + }, + { + "epoch": 0.8629984144694345, + "grad_norm": 0.0, + "learning_rate": 9.683827462335837e-07, + "loss": 1.2598, + "step": 29392 + }, + { + "epoch": 0.8630277761465734, + "grad_norm": 0.0, + "learning_rate": 9.679745393365048e-07, + "loss": 1.2998, + "step": 29393 + }, + { + "epoch": 0.8630571378237125, + "grad_norm": 0.0, + "learning_rate": 9.675664141183428e-07, + "loss": 1.2598, + "step": 29394 + }, + { + "epoch": 0.8630864995008515, + "grad_norm": 0.0, + "learning_rate": 9.671583705827936e-07, + "loss": 1.1943, + "step": 29395 + }, + { + "epoch": 0.8631158611779904, + "grad_norm": 0.0, + "learning_rate": 9.66750408733541e-07, + "loss": 1.1289, + "step": 29396 + }, + { + "epoch": 0.8631452228551295, + "grad_norm": 0.0, + "learning_rate": 9.663425285742812e-07, + "loss": 1.1958, + "step": 29397 + }, + { + "epoch": 0.8631745845322685, + "grad_norm": 0.0, + "learning_rate": 9.659347301086973e-07, + "loss": 1.249, + "step": 29398 + }, + { + "epoch": 0.8632039462094074, + "grad_norm": 0.0, + "learning_rate": 9.655270133404816e-07, + "loss": 1.2153, + "step": 29399 + }, + { + "epoch": 0.8632333078865465, + "grad_norm": 0.0, + "learning_rate": 9.651193782733181e-07, + "loss": 1.1978, + "step": 29400 + }, + { + "epoch": 0.8632626695636855, + "grad_norm": 0.0, + "learning_rate": 9.647118249108945e-07, + "loss": 1.251, + "step": 29401 + }, + { + "epoch": 0.8632920312408244, + "grad_norm": 0.0, + "learning_rate": 9.643043532568952e-07, + "loss": 1.269, + "step": 29402 + }, + { + "epoch": 0.8633213929179635, + "grad_norm": 0.0, + "learning_rate": 9.638969633150042e-07, + "loss": 1.2803, + "step": 29403 + }, + { + "epoch": 0.8633507545951025, + "grad_norm": 0.0, + "learning_rate": 9.634896550889095e-07, + "loss": 1.1807, + "step": 29404 + }, + { + "epoch": 0.8633801162722414, + "grad_norm": 0.0, + "learning_rate": 9.630824285822893e-07, + "loss": 1.1201, + "step": 29405 + }, + { + "epoch": 0.8634094779493805, + "grad_norm": 0.0, + "learning_rate": 9.626752837988318e-07, + "loss": 1.3105, + "step": 29406 + }, + { + "epoch": 0.8634388396265195, + "grad_norm": 0.0, + "learning_rate": 9.62268220742214e-07, + "loss": 1.0874, + "step": 29407 + }, + { + "epoch": 0.8634682013036584, + "grad_norm": 0.0, + "learning_rate": 9.618612394161208e-07, + "loss": 1.2842, + "step": 29408 + }, + { + "epoch": 0.8634975629807975, + "grad_norm": 0.0, + "learning_rate": 9.614543398242315e-07, + "loss": 1.1426, + "step": 29409 + }, + { + "epoch": 0.8635269246579365, + "grad_norm": 0.0, + "learning_rate": 9.610475219702264e-07, + "loss": 1.2285, + "step": 29410 + }, + { + "epoch": 0.8635562863350754, + "grad_norm": 0.0, + "learning_rate": 9.606407858577805e-07, + "loss": 1.2085, + "step": 29411 + }, + { + "epoch": 0.8635856480122145, + "grad_norm": 0.0, + "learning_rate": 9.60234131490576e-07, + "loss": 1.1567, + "step": 29412 + }, + { + "epoch": 0.8636150096893535, + "grad_norm": 0.0, + "learning_rate": 9.598275588722905e-07, + "loss": 1.189, + "step": 29413 + }, + { + "epoch": 0.8636443713664924, + "grad_norm": 0.0, + "learning_rate": 9.594210680065974e-07, + "loss": 1.042, + "step": 29414 + }, + { + "epoch": 0.8636737330436315, + "grad_norm": 0.0, + "learning_rate": 9.590146588971772e-07, + "loss": 1.2559, + "step": 29415 + }, + { + "epoch": 0.8637030947207704, + "grad_norm": 0.0, + "learning_rate": 9.586083315477013e-07, + "loss": 1.2021, + "step": 29416 + }, + { + "epoch": 0.8637324563979094, + "grad_norm": 0.0, + "learning_rate": 9.582020859618491e-07, + "loss": 1.208, + "step": 29417 + }, + { + "epoch": 0.8637618180750485, + "grad_norm": 0.0, + "learning_rate": 9.577959221432864e-07, + "loss": 1.29, + "step": 29418 + }, + { + "epoch": 0.8637911797521874, + "grad_norm": 0.0, + "learning_rate": 9.573898400956939e-07, + "loss": 1.2783, + "step": 29419 + }, + { + "epoch": 0.8638205414293264, + "grad_norm": 0.0, + "learning_rate": 9.569838398227394e-07, + "loss": 1.2114, + "step": 29420 + }, + { + "epoch": 0.8638499031064655, + "grad_norm": 0.0, + "learning_rate": 9.565779213280969e-07, + "loss": 1.2217, + "step": 29421 + }, + { + "epoch": 0.8638792647836044, + "grad_norm": 0.0, + "learning_rate": 9.561720846154366e-07, + "loss": 1.1826, + "step": 29422 + }, + { + "epoch": 0.8639086264607434, + "grad_norm": 0.0, + "learning_rate": 9.557663296884256e-07, + "loss": 1.2451, + "step": 29423 + }, + { + "epoch": 0.8639379881378825, + "grad_norm": 0.0, + "learning_rate": 9.55360656550739e-07, + "loss": 1.1611, + "step": 29424 + }, + { + "epoch": 0.8639673498150214, + "grad_norm": 0.0, + "learning_rate": 9.549550652060424e-07, + "loss": 1.1216, + "step": 29425 + }, + { + "epoch": 0.8639967114921604, + "grad_norm": 0.0, + "learning_rate": 9.545495556580032e-07, + "loss": 1.1128, + "step": 29426 + }, + { + "epoch": 0.8640260731692995, + "grad_norm": 0.0, + "learning_rate": 9.54144127910287e-07, + "loss": 1.2148, + "step": 29427 + }, + { + "epoch": 0.8640554348464384, + "grad_norm": 0.0, + "learning_rate": 9.537387819665644e-07, + "loss": 1.1572, + "step": 29428 + }, + { + "epoch": 0.8640847965235774, + "grad_norm": 0.0, + "learning_rate": 9.533335178304959e-07, + "loss": 1.1694, + "step": 29429 + }, + { + "epoch": 0.8641141582007165, + "grad_norm": 0.0, + "learning_rate": 9.529283355057517e-07, + "loss": 1.332, + "step": 29430 + }, + { + "epoch": 0.8641435198778554, + "grad_norm": 0.0, + "learning_rate": 9.525232349959923e-07, + "loss": 1.3525, + "step": 29431 + }, + { + "epoch": 0.8641728815549944, + "grad_norm": 0.0, + "learning_rate": 9.521182163048837e-07, + "loss": 1.1372, + "step": 29432 + }, + { + "epoch": 0.8642022432321335, + "grad_norm": 0.0, + "learning_rate": 9.517132794360873e-07, + "loss": 1.1895, + "step": 29433 + }, + { + "epoch": 0.8642316049092724, + "grad_norm": 0.0, + "learning_rate": 9.513084243932657e-07, + "loss": 1.2676, + "step": 29434 + }, + { + "epoch": 0.8642609665864114, + "grad_norm": 0.0, + "learning_rate": 9.509036511800784e-07, + "loss": 1.2432, + "step": 29435 + }, + { + "epoch": 0.8642903282635505, + "grad_norm": 0.0, + "learning_rate": 9.504989598001856e-07, + "loss": 1.2197, + "step": 29436 + }, + { + "epoch": 0.8643196899406894, + "grad_norm": 0.0, + "learning_rate": 9.500943502572502e-07, + "loss": 1.1587, + "step": 29437 + }, + { + "epoch": 0.8643490516178284, + "grad_norm": 0.0, + "learning_rate": 9.496898225549267e-07, + "loss": 1.1611, + "step": 29438 + }, + { + "epoch": 0.8643784132949675, + "grad_norm": 0.0, + "learning_rate": 9.492853766968791e-07, + "loss": 1.21, + "step": 29439 + }, + { + "epoch": 0.8644077749721064, + "grad_norm": 0.0, + "learning_rate": 9.488810126867587e-07, + "loss": 1.1455, + "step": 29440 + }, + { + "epoch": 0.8644371366492454, + "grad_norm": 0.0, + "learning_rate": 9.484767305282272e-07, + "loss": 1.2051, + "step": 29441 + }, + { + "epoch": 0.8644664983263844, + "grad_norm": 0.0, + "learning_rate": 9.480725302249416e-07, + "loss": 1.2402, + "step": 29442 + }, + { + "epoch": 0.8644958600035234, + "grad_norm": 0.0, + "learning_rate": 9.476684117805501e-07, + "loss": 1.1899, + "step": 29443 + }, + { + "epoch": 0.8645252216806624, + "grad_norm": 0.0, + "learning_rate": 9.472643751987132e-07, + "loss": 1.2637, + "step": 29444 + }, + { + "epoch": 0.8645545833578014, + "grad_norm": 0.0, + "learning_rate": 9.468604204830811e-07, + "loss": 1.1738, + "step": 29445 + }, + { + "epoch": 0.8645839450349404, + "grad_norm": 0.0, + "learning_rate": 9.464565476373111e-07, + "loss": 1.1655, + "step": 29446 + }, + { + "epoch": 0.8646133067120794, + "grad_norm": 0.0, + "learning_rate": 9.460527566650513e-07, + "loss": 1.2393, + "step": 29447 + }, + { + "epoch": 0.8646426683892184, + "grad_norm": 0.0, + "learning_rate": 9.456490475699564e-07, + "loss": 1.1387, + "step": 29448 + }, + { + "epoch": 0.8646720300663574, + "grad_norm": 0.0, + "learning_rate": 9.45245420355676e-07, + "loss": 1.2188, + "step": 29449 + }, + { + "epoch": 0.8647013917434964, + "grad_norm": 0.0, + "learning_rate": 9.448418750258603e-07, + "loss": 1.2314, + "step": 29450 + }, + { + "epoch": 0.8647307534206354, + "grad_norm": 0.0, + "learning_rate": 9.444384115841577e-07, + "loss": 1.0781, + "step": 29451 + }, + { + "epoch": 0.8647601150977744, + "grad_norm": 0.0, + "learning_rate": 9.440350300342182e-07, + "loss": 1.1904, + "step": 29452 + }, + { + "epoch": 0.8647894767749134, + "grad_norm": 0.0, + "learning_rate": 9.436317303796904e-07, + "loss": 1.2871, + "step": 29453 + }, + { + "epoch": 0.8648188384520524, + "grad_norm": 0.0, + "learning_rate": 9.43228512624218e-07, + "loss": 1.2617, + "step": 29454 + }, + { + "epoch": 0.8648482001291914, + "grad_norm": 0.0, + "learning_rate": 9.428253767714513e-07, + "loss": 1.2754, + "step": 29455 + }, + { + "epoch": 0.8648775618063304, + "grad_norm": 0.0, + "learning_rate": 9.424223228250329e-07, + "loss": 1.1099, + "step": 29456 + }, + { + "epoch": 0.8649069234834694, + "grad_norm": 0.0, + "learning_rate": 9.420193507886099e-07, + "loss": 1.209, + "step": 29457 + }, + { + "epoch": 0.8649362851606084, + "grad_norm": 0.0, + "learning_rate": 9.416164606658262e-07, + "loss": 1.2568, + "step": 29458 + }, + { + "epoch": 0.8649656468377473, + "grad_norm": 0.0, + "learning_rate": 9.412136524603255e-07, + "loss": 1.1353, + "step": 29459 + }, + { + "epoch": 0.8649950085148864, + "grad_norm": 0.0, + "learning_rate": 9.408109261757469e-07, + "loss": 1.2764, + "step": 29460 + }, + { + "epoch": 0.8650243701920254, + "grad_norm": 0.0, + "learning_rate": 9.404082818157378e-07, + "loss": 1.2656, + "step": 29461 + }, + { + "epoch": 0.8650537318691643, + "grad_norm": 0.0, + "learning_rate": 9.400057193839362e-07, + "loss": 1.3379, + "step": 29462 + }, + { + "epoch": 0.8650830935463034, + "grad_norm": 0.0, + "learning_rate": 9.396032388839815e-07, + "loss": 1.2637, + "step": 29463 + }, + { + "epoch": 0.8651124552234424, + "grad_norm": 0.0, + "learning_rate": 9.392008403195163e-07, + "loss": 1.1152, + "step": 29464 + }, + { + "epoch": 0.8651418169005813, + "grad_norm": 0.0, + "learning_rate": 9.387985236941777e-07, + "loss": 1.2373, + "step": 29465 + }, + { + "epoch": 0.8651711785777204, + "grad_norm": 0.0, + "learning_rate": 9.383962890116082e-07, + "loss": 1.3086, + "step": 29466 + }, + { + "epoch": 0.8652005402548594, + "grad_norm": 0.0, + "learning_rate": 9.379941362754374e-07, + "loss": 1.2891, + "step": 29467 + }, + { + "epoch": 0.8652299019319983, + "grad_norm": 0.0, + "learning_rate": 9.37592065489309e-07, + "loss": 1.2427, + "step": 29468 + }, + { + "epoch": 0.8652592636091374, + "grad_norm": 0.0, + "learning_rate": 9.371900766568543e-07, + "loss": 1.2432, + "step": 29469 + }, + { + "epoch": 0.8652886252862764, + "grad_norm": 0.0, + "learning_rate": 9.367881697817128e-07, + "loss": 1.2876, + "step": 29470 + }, + { + "epoch": 0.8653179869634153, + "grad_norm": 0.0, + "learning_rate": 9.363863448675159e-07, + "loss": 1.3184, + "step": 29471 + }, + { + "epoch": 0.8653473486405544, + "grad_norm": 0.0, + "learning_rate": 9.359846019178975e-07, + "loss": 1.2363, + "step": 29472 + }, + { + "epoch": 0.8653767103176934, + "grad_norm": 0.0, + "learning_rate": 9.355829409364936e-07, + "loss": 1.1807, + "step": 29473 + }, + { + "epoch": 0.8654060719948323, + "grad_norm": 0.0, + "learning_rate": 9.351813619269357e-07, + "loss": 1.1978, + "step": 29474 + }, + { + "epoch": 0.8654354336719713, + "grad_norm": 0.0, + "learning_rate": 9.34779864892853e-07, + "loss": 1.2246, + "step": 29475 + }, + { + "epoch": 0.8654647953491104, + "grad_norm": 0.0, + "learning_rate": 9.34378449837875e-07, + "loss": 1.1221, + "step": 29476 + }, + { + "epoch": 0.8654941570262493, + "grad_norm": 0.0, + "learning_rate": 9.339771167656375e-07, + "loss": 1.1919, + "step": 29477 + }, + { + "epoch": 0.8655235187033883, + "grad_norm": 0.0, + "learning_rate": 9.335758656797644e-07, + "loss": 1.082, + "step": 29478 + }, + { + "epoch": 0.8655528803805274, + "grad_norm": 0.0, + "learning_rate": 9.331746965838895e-07, + "loss": 1.2007, + "step": 29479 + }, + { + "epoch": 0.8655822420576663, + "grad_norm": 0.0, + "learning_rate": 9.327736094816353e-07, + "loss": 1.2246, + "step": 29480 + }, + { + "epoch": 0.8656116037348053, + "grad_norm": 0.0, + "learning_rate": 9.323726043766334e-07, + "loss": 1.1279, + "step": 29481 + }, + { + "epoch": 0.8656409654119444, + "grad_norm": 0.0, + "learning_rate": 9.319716812725077e-07, + "loss": 1.1875, + "step": 29482 + }, + { + "epoch": 0.8656703270890833, + "grad_norm": 0.0, + "learning_rate": 9.315708401728851e-07, + "loss": 1.2754, + "step": 29483 + }, + { + "epoch": 0.8656996887662223, + "grad_norm": 0.0, + "learning_rate": 9.311700810813895e-07, + "loss": 1.2012, + "step": 29484 + }, + { + "epoch": 0.8657290504433613, + "grad_norm": 0.0, + "learning_rate": 9.307694040016435e-07, + "loss": 1.1709, + "step": 29485 + }, + { + "epoch": 0.8657584121205003, + "grad_norm": 0.0, + "learning_rate": 9.303688089372754e-07, + "loss": 1.2188, + "step": 29486 + }, + { + "epoch": 0.8657877737976393, + "grad_norm": 0.0, + "learning_rate": 9.29968295891902e-07, + "loss": 1.2344, + "step": 29487 + }, + { + "epoch": 0.8658171354747783, + "grad_norm": 0.0, + "learning_rate": 9.295678648691498e-07, + "loss": 1.2559, + "step": 29488 + }, + { + "epoch": 0.8658464971519173, + "grad_norm": 0.0, + "learning_rate": 9.291675158726376e-07, + "loss": 1.335, + "step": 29489 + }, + { + "epoch": 0.8658758588290563, + "grad_norm": 0.0, + "learning_rate": 9.287672489059874e-07, + "loss": 1.2549, + "step": 29490 + }, + { + "epoch": 0.8659052205061953, + "grad_norm": 0.0, + "learning_rate": 9.283670639728204e-07, + "loss": 1.2588, + "step": 29491 + }, + { + "epoch": 0.8659345821833343, + "grad_norm": 0.0, + "learning_rate": 9.279669610767506e-07, + "loss": 1.2686, + "step": 29492 + }, + { + "epoch": 0.8659639438604733, + "grad_norm": 0.0, + "learning_rate": 9.275669402213993e-07, + "loss": 1.1973, + "step": 29493 + }, + { + "epoch": 0.8659933055376123, + "grad_norm": 0.0, + "learning_rate": 9.271670014103828e-07, + "loss": 1.2236, + "step": 29494 + }, + { + "epoch": 0.8660226672147513, + "grad_norm": 0.0, + "learning_rate": 9.267671446473204e-07, + "loss": 1.124, + "step": 29495 + }, + { + "epoch": 0.8660520288918903, + "grad_norm": 0.0, + "learning_rate": 9.263673699358233e-07, + "loss": 1.1152, + "step": 29496 + }, + { + "epoch": 0.8660813905690293, + "grad_norm": 0.0, + "learning_rate": 9.259676772795134e-07, + "loss": 1.2588, + "step": 29497 + }, + { + "epoch": 0.8661107522461683, + "grad_norm": 0.0, + "learning_rate": 9.25568066682001e-07, + "loss": 1.2148, + "step": 29498 + }, + { + "epoch": 0.8661401139233073, + "grad_norm": 0.0, + "learning_rate": 9.251685381469e-07, + "loss": 1.0796, + "step": 29499 + }, + { + "epoch": 0.8661694756004463, + "grad_norm": 0.0, + "learning_rate": 9.247690916778229e-07, + "loss": 1.1719, + "step": 29500 + }, + { + "epoch": 0.8661988372775853, + "grad_norm": 0.0, + "learning_rate": 9.243697272783847e-07, + "loss": 1.1904, + "step": 29501 + }, + { + "epoch": 0.8662281989547242, + "grad_norm": 0.0, + "learning_rate": 9.239704449521958e-07, + "loss": 1.1572, + "step": 29502 + }, + { + "epoch": 0.8662575606318633, + "grad_norm": 0.0, + "learning_rate": 9.235712447028645e-07, + "loss": 1.0576, + "step": 29503 + }, + { + "epoch": 0.8662869223090023, + "grad_norm": 0.0, + "learning_rate": 9.231721265340043e-07, + "loss": 1.2012, + "step": 29504 + }, + { + "epoch": 0.8663162839861412, + "grad_norm": 0.0, + "learning_rate": 9.227730904492216e-07, + "loss": 1.0698, + "step": 29505 + }, + { + "epoch": 0.8663456456632803, + "grad_norm": 0.0, + "learning_rate": 9.223741364521288e-07, + "loss": 1.2437, + "step": 29506 + }, + { + "epoch": 0.8663750073404193, + "grad_norm": 0.0, + "learning_rate": 9.219752645463309e-07, + "loss": 1.1826, + "step": 29507 + }, + { + "epoch": 0.8664043690175582, + "grad_norm": 0.0, + "learning_rate": 9.215764747354361e-07, + "loss": 1.1816, + "step": 29508 + }, + { + "epoch": 0.8664337306946973, + "grad_norm": 0.0, + "learning_rate": 9.211777670230482e-07, + "loss": 1.0864, + "step": 29509 + }, + { + "epoch": 0.8664630923718363, + "grad_norm": 0.0, + "learning_rate": 9.207791414127765e-07, + "loss": 1.064, + "step": 29510 + }, + { + "epoch": 0.8664924540489752, + "grad_norm": 0.0, + "learning_rate": 9.203805979082248e-07, + "loss": 1.1357, + "step": 29511 + }, + { + "epoch": 0.8665218157261143, + "grad_norm": 0.0, + "learning_rate": 9.199821365129946e-07, + "loss": 1.189, + "step": 29512 + }, + { + "epoch": 0.8665511774032533, + "grad_norm": 0.0, + "learning_rate": 9.195837572306931e-07, + "loss": 1.3066, + "step": 29513 + }, + { + "epoch": 0.8665805390803922, + "grad_norm": 0.0, + "learning_rate": 9.191854600649186e-07, + "loss": 1.2036, + "step": 29514 + }, + { + "epoch": 0.8666099007575313, + "grad_norm": 0.0, + "learning_rate": 9.187872450192792e-07, + "loss": 1.1123, + "step": 29515 + }, + { + "epoch": 0.8666392624346703, + "grad_norm": 0.0, + "learning_rate": 9.18389112097371e-07, + "loss": 1.1572, + "step": 29516 + }, + { + "epoch": 0.8666686241118092, + "grad_norm": 0.0, + "learning_rate": 9.179910613027965e-07, + "loss": 1.1528, + "step": 29517 + }, + { + "epoch": 0.8666979857889483, + "grad_norm": 0.0, + "learning_rate": 9.17593092639153e-07, + "loss": 1.1572, + "step": 29518 + }, + { + "epoch": 0.8667273474660873, + "grad_norm": 0.0, + "learning_rate": 9.171952061100431e-07, + "loss": 1.2129, + "step": 29519 + }, + { + "epoch": 0.8667567091432262, + "grad_norm": 0.0, + "learning_rate": 9.167974017190628e-07, + "loss": 1.3555, + "step": 29520 + }, + { + "epoch": 0.8667860708203653, + "grad_norm": 0.0, + "learning_rate": 9.163996794698072e-07, + "loss": 1.2202, + "step": 29521 + }, + { + "epoch": 0.8668154324975043, + "grad_norm": 0.0, + "learning_rate": 9.160020393658786e-07, + "loss": 1.2285, + "step": 29522 + }, + { + "epoch": 0.8668447941746432, + "grad_norm": 0.0, + "learning_rate": 9.1560448141087e-07, + "loss": 1.1787, + "step": 29523 + }, + { + "epoch": 0.8668741558517823, + "grad_norm": 0.0, + "learning_rate": 9.152070056083762e-07, + "loss": 1.1943, + "step": 29524 + }, + { + "epoch": 0.8669035175289213, + "grad_norm": 0.0, + "learning_rate": 9.148096119619909e-07, + "loss": 1.2051, + "step": 29525 + }, + { + "epoch": 0.8669328792060602, + "grad_norm": 0.0, + "learning_rate": 9.144123004753102e-07, + "loss": 1.2676, + "step": 29526 + }, + { + "epoch": 0.8669622408831993, + "grad_norm": 0.0, + "learning_rate": 9.140150711519247e-07, + "loss": 1.3174, + "step": 29527 + }, + { + "epoch": 0.8669916025603382, + "grad_norm": 0.0, + "learning_rate": 9.13617923995429e-07, + "loss": 1.2754, + "step": 29528 + }, + { + "epoch": 0.8670209642374772, + "grad_norm": 0.0, + "learning_rate": 9.132208590094116e-07, + "loss": 1.1118, + "step": 29529 + }, + { + "epoch": 0.8670503259146163, + "grad_norm": 0.0, + "learning_rate": 9.128238761974673e-07, + "loss": 1.3252, + "step": 29530 + }, + { + "epoch": 0.8670796875917552, + "grad_norm": 0.0, + "learning_rate": 9.124269755631842e-07, + "loss": 1.2573, + "step": 29531 + }, + { + "epoch": 0.8671090492688942, + "grad_norm": 0.0, + "learning_rate": 9.120301571101508e-07, + "loss": 1.2598, + "step": 29532 + }, + { + "epoch": 0.8671384109460333, + "grad_norm": 0.0, + "learning_rate": 9.116334208419563e-07, + "loss": 1.186, + "step": 29533 + }, + { + "epoch": 0.8671677726231722, + "grad_norm": 0.0, + "learning_rate": 9.112367667621868e-07, + "loss": 1.1455, + "step": 29534 + }, + { + "epoch": 0.8671971343003112, + "grad_norm": 0.0, + "learning_rate": 9.108401948744328e-07, + "loss": 1.2231, + "step": 29535 + }, + { + "epoch": 0.8672264959774503, + "grad_norm": 0.0, + "learning_rate": 9.104437051822768e-07, + "loss": 1.0757, + "step": 29536 + }, + { + "epoch": 0.8672558576545892, + "grad_norm": 0.0, + "learning_rate": 9.100472976893071e-07, + "loss": 1.1885, + "step": 29537 + }, + { + "epoch": 0.8672852193317282, + "grad_norm": 0.0, + "learning_rate": 9.096509723991064e-07, + "loss": 1.1895, + "step": 29538 + }, + { + "epoch": 0.8673145810088673, + "grad_norm": 0.0, + "learning_rate": 9.092547293152631e-07, + "loss": 1.165, + "step": 29539 + }, + { + "epoch": 0.8673439426860062, + "grad_norm": 0.0, + "learning_rate": 9.088585684413564e-07, + "loss": 1.0254, + "step": 29540 + }, + { + "epoch": 0.8673733043631452, + "grad_norm": 0.0, + "learning_rate": 9.084624897809691e-07, + "loss": 1.3115, + "step": 29541 + }, + { + "epoch": 0.8674026660402843, + "grad_norm": 0.0, + "learning_rate": 9.080664933376849e-07, + "loss": 1.2153, + "step": 29542 + }, + { + "epoch": 0.8674320277174232, + "grad_norm": 0.0, + "learning_rate": 9.076705791150819e-07, + "loss": 1.1611, + "step": 29543 + }, + { + "epoch": 0.8674613893945622, + "grad_norm": 0.0, + "learning_rate": 9.072747471167443e-07, + "loss": 1.2207, + "step": 29544 + }, + { + "epoch": 0.8674907510717013, + "grad_norm": 0.0, + "learning_rate": 9.068789973462477e-07, + "loss": 1.2964, + "step": 29545 + }, + { + "epoch": 0.8675201127488402, + "grad_norm": 0.0, + "learning_rate": 9.064833298071762e-07, + "loss": 1.1714, + "step": 29546 + }, + { + "epoch": 0.8675494744259792, + "grad_norm": 0.0, + "learning_rate": 9.060877445031036e-07, + "loss": 1.2661, + "step": 29547 + }, + { + "epoch": 0.8675788361031183, + "grad_norm": 0.0, + "learning_rate": 9.056922414376091e-07, + "loss": 1.251, + "step": 29548 + }, + { + "epoch": 0.8676081977802572, + "grad_norm": 0.0, + "learning_rate": 9.052968206142665e-07, + "loss": 1.1602, + "step": 29549 + }, + { + "epoch": 0.8676375594573962, + "grad_norm": 0.0, + "learning_rate": 9.049014820366553e-07, + "loss": 1.1807, + "step": 29550 + }, + { + "epoch": 0.8676669211345353, + "grad_norm": 0.0, + "learning_rate": 9.045062257083504e-07, + "loss": 1.2812, + "step": 29551 + }, + { + "epoch": 0.8676962828116742, + "grad_norm": 0.0, + "learning_rate": 9.041110516329221e-07, + "loss": 1.2588, + "step": 29552 + }, + { + "epoch": 0.8677256444888132, + "grad_norm": 0.0, + "learning_rate": 9.0371595981395e-07, + "loss": 1.1333, + "step": 29553 + }, + { + "epoch": 0.8677550061659522, + "grad_norm": 0.0, + "learning_rate": 9.033209502550011e-07, + "loss": 1.0649, + "step": 29554 + }, + { + "epoch": 0.8677843678430912, + "grad_norm": 0.0, + "learning_rate": 9.029260229596537e-07, + "loss": 1.2656, + "step": 29555 + }, + { + "epoch": 0.8678137295202302, + "grad_norm": 0.0, + "learning_rate": 9.025311779314749e-07, + "loss": 1.2129, + "step": 29556 + }, + { + "epoch": 0.8678430911973692, + "grad_norm": 0.0, + "learning_rate": 9.021364151740375e-07, + "loss": 1.146, + "step": 29557 + }, + { + "epoch": 0.8678724528745082, + "grad_norm": 0.0, + "learning_rate": 9.017417346909074e-07, + "loss": 1.0317, + "step": 29558 + }, + { + "epoch": 0.8679018145516472, + "grad_norm": 0.0, + "learning_rate": 9.013471364856596e-07, + "loss": 1.2705, + "step": 29559 + }, + { + "epoch": 0.8679311762287862, + "grad_norm": 0.0, + "learning_rate": 9.009526205618601e-07, + "loss": 1.168, + "step": 29560 + }, + { + "epoch": 0.8679605379059252, + "grad_norm": 0.0, + "learning_rate": 9.00558186923074e-07, + "loss": 1.2822, + "step": 29561 + }, + { + "epoch": 0.8679898995830642, + "grad_norm": 0.0, + "learning_rate": 9.001638355728726e-07, + "loss": 1.144, + "step": 29562 + }, + { + "epoch": 0.8680192612602032, + "grad_norm": 0.0, + "learning_rate": 8.997695665148188e-07, + "loss": 1.2793, + "step": 29563 + }, + { + "epoch": 0.8680486229373422, + "grad_norm": 0.0, + "learning_rate": 8.993753797524806e-07, + "loss": 1.2373, + "step": 29564 + }, + { + "epoch": 0.8680779846144812, + "grad_norm": 0.0, + "learning_rate": 8.989812752894211e-07, + "loss": 1.2861, + "step": 29565 + }, + { + "epoch": 0.8681073462916202, + "grad_norm": 0.0, + "learning_rate": 8.985872531292062e-07, + "loss": 1.1851, + "step": 29566 + }, + { + "epoch": 0.8681367079687592, + "grad_norm": 0.0, + "learning_rate": 8.98193313275395e-07, + "loss": 1.3281, + "step": 29567 + }, + { + "epoch": 0.8681660696458982, + "grad_norm": 0.0, + "learning_rate": 8.977994557315539e-07, + "loss": 1.2378, + "step": 29568 + }, + { + "epoch": 0.8681954313230372, + "grad_norm": 0.0, + "learning_rate": 8.974056805012421e-07, + "loss": 1.2246, + "step": 29569 + }, + { + "epoch": 0.8682247930001762, + "grad_norm": 0.0, + "learning_rate": 8.970119875880246e-07, + "loss": 1.2612, + "step": 29570 + }, + { + "epoch": 0.8682541546773151, + "grad_norm": 0.0, + "learning_rate": 8.966183769954572e-07, + "loss": 1.2158, + "step": 29571 + }, + { + "epoch": 0.8682835163544542, + "grad_norm": 0.0, + "learning_rate": 8.962248487271031e-07, + "loss": 1.2793, + "step": 29572 + }, + { + "epoch": 0.8683128780315932, + "grad_norm": 0.0, + "learning_rate": 8.958314027865178e-07, + "loss": 1.0991, + "step": 29573 + }, + { + "epoch": 0.8683422397087321, + "grad_norm": 0.0, + "learning_rate": 8.954380391772599e-07, + "loss": 1.0898, + "step": 29574 + }, + { + "epoch": 0.8683716013858711, + "grad_norm": 0.0, + "learning_rate": 8.950447579028887e-07, + "loss": 1.3164, + "step": 29575 + }, + { + "epoch": 0.8684009630630102, + "grad_norm": 0.0, + "learning_rate": 8.94651558966958e-07, + "loss": 1.166, + "step": 29576 + }, + { + "epoch": 0.8684303247401491, + "grad_norm": 0.0, + "learning_rate": 8.942584423730271e-07, + "loss": 1.3047, + "step": 29577 + }, + { + "epoch": 0.8684596864172881, + "grad_norm": 0.0, + "learning_rate": 8.938654081246478e-07, + "loss": 1.2549, + "step": 29578 + }, + { + "epoch": 0.8684890480944272, + "grad_norm": 0.0, + "learning_rate": 8.934724562253772e-07, + "loss": 1.3281, + "step": 29579 + }, + { + "epoch": 0.8685184097715661, + "grad_norm": 0.0, + "learning_rate": 8.930795866787667e-07, + "loss": 1.3174, + "step": 29580 + }, + { + "epoch": 0.8685477714487051, + "grad_norm": 0.0, + "learning_rate": 8.926867994883703e-07, + "loss": 1.3486, + "step": 29581 + }, + { + "epoch": 0.8685771331258442, + "grad_norm": 0.0, + "learning_rate": 8.922940946577396e-07, + "loss": 1.2598, + "step": 29582 + }, + { + "epoch": 0.8686064948029831, + "grad_norm": 0.0, + "learning_rate": 8.91901472190424e-07, + "loss": 1.2051, + "step": 29583 + }, + { + "epoch": 0.8686358564801221, + "grad_norm": 0.0, + "learning_rate": 8.915089320899772e-07, + "loss": 1.2998, + "step": 29584 + }, + { + "epoch": 0.8686652181572612, + "grad_norm": 0.0, + "learning_rate": 8.911164743599477e-07, + "loss": 1.1304, + "step": 29585 + }, + { + "epoch": 0.8686945798344001, + "grad_norm": 0.0, + "learning_rate": 8.907240990038857e-07, + "loss": 1.2656, + "step": 29586 + }, + { + "epoch": 0.8687239415115391, + "grad_norm": 0.0, + "learning_rate": 8.903318060253363e-07, + "loss": 1.2539, + "step": 29587 + }, + { + "epoch": 0.8687533031886782, + "grad_norm": 0.0, + "learning_rate": 8.899395954278512e-07, + "loss": 1.1807, + "step": 29588 + }, + { + "epoch": 0.8687826648658171, + "grad_norm": 0.0, + "learning_rate": 8.895474672149762e-07, + "loss": 1.1777, + "step": 29589 + }, + { + "epoch": 0.8688120265429561, + "grad_norm": 0.0, + "learning_rate": 8.891554213902564e-07, + "loss": 1.0605, + "step": 29590 + }, + { + "epoch": 0.8688413882200952, + "grad_norm": 0.0, + "learning_rate": 8.887634579572369e-07, + "loss": 1.2754, + "step": 29591 + }, + { + "epoch": 0.8688707498972341, + "grad_norm": 0.0, + "learning_rate": 8.883715769194612e-07, + "loss": 1.2656, + "step": 29592 + }, + { + "epoch": 0.8689001115743731, + "grad_norm": 0.0, + "learning_rate": 8.879797782804777e-07, + "loss": 1.1641, + "step": 29593 + }, + { + "epoch": 0.8689294732515122, + "grad_norm": 0.0, + "learning_rate": 8.875880620438238e-07, + "loss": 1.1323, + "step": 29594 + }, + { + "epoch": 0.8689588349286511, + "grad_norm": 0.0, + "learning_rate": 8.871964282130474e-07, + "loss": 1.2764, + "step": 29595 + }, + { + "epoch": 0.8689881966057901, + "grad_norm": 0.0, + "learning_rate": 8.868048767916848e-07, + "loss": 1.2998, + "step": 29596 + }, + { + "epoch": 0.8690175582829291, + "grad_norm": 0.0, + "learning_rate": 8.864134077832842e-07, + "loss": 1.1714, + "step": 29597 + }, + { + "epoch": 0.8690469199600681, + "grad_norm": 0.0, + "learning_rate": 8.860220211913761e-07, + "loss": 1.2881, + "step": 29598 + }, + { + "epoch": 0.8690762816372071, + "grad_norm": 0.0, + "learning_rate": 8.856307170195078e-07, + "loss": 1.292, + "step": 29599 + }, + { + "epoch": 0.8691056433143461, + "grad_norm": 0.0, + "learning_rate": 8.852394952712151e-07, + "loss": 1.1245, + "step": 29600 + }, + { + "epoch": 0.8691350049914851, + "grad_norm": 0.0, + "learning_rate": 8.848483559500342e-07, + "loss": 1.022, + "step": 29601 + }, + { + "epoch": 0.8691643666686241, + "grad_norm": 0.0, + "learning_rate": 8.844572990595057e-07, + "loss": 1.0879, + "step": 29602 + }, + { + "epoch": 0.8691937283457631, + "grad_norm": 0.0, + "learning_rate": 8.840663246031633e-07, + "loss": 1.166, + "step": 29603 + }, + { + "epoch": 0.8692230900229021, + "grad_norm": 0.0, + "learning_rate": 8.836754325845453e-07, + "loss": 1.2627, + "step": 29604 + }, + { + "epoch": 0.8692524517000411, + "grad_norm": 0.0, + "learning_rate": 8.832846230071845e-07, + "loss": 1.1924, + "step": 29605 + }, + { + "epoch": 0.8692818133771801, + "grad_norm": 0.0, + "learning_rate": 8.828938958746159e-07, + "loss": 1.2959, + "step": 29606 + }, + { + "epoch": 0.8693111750543191, + "grad_norm": 0.0, + "learning_rate": 8.825032511903708e-07, + "loss": 1.1064, + "step": 29607 + }, + { + "epoch": 0.8693405367314581, + "grad_norm": 0.0, + "learning_rate": 8.821126889579856e-07, + "loss": 1.2822, + "step": 29608 + }, + { + "epoch": 0.8693698984085971, + "grad_norm": 0.0, + "learning_rate": 8.817222091809907e-07, + "loss": 1.207, + "step": 29609 + }, + { + "epoch": 0.8693992600857361, + "grad_norm": 0.0, + "learning_rate": 8.813318118629155e-07, + "loss": 1.2354, + "step": 29610 + }, + { + "epoch": 0.869428621762875, + "grad_norm": 0.0, + "learning_rate": 8.809414970072938e-07, + "loss": 1.1982, + "step": 29611 + }, + { + "epoch": 0.8694579834400141, + "grad_norm": 0.0, + "learning_rate": 8.805512646176506e-07, + "loss": 1.1836, + "step": 29612 + }, + { + "epoch": 0.8694873451171531, + "grad_norm": 0.0, + "learning_rate": 8.801611146975209e-07, + "loss": 1.1768, + "step": 29613 + }, + { + "epoch": 0.869516706794292, + "grad_norm": 0.0, + "learning_rate": 8.797710472504295e-07, + "loss": 1.1074, + "step": 29614 + }, + { + "epoch": 0.8695460684714311, + "grad_norm": 0.0, + "learning_rate": 8.793810622799048e-07, + "loss": 1.1914, + "step": 29615 + }, + { + "epoch": 0.8695754301485701, + "grad_norm": 0.0, + "learning_rate": 8.789911597894707e-07, + "loss": 1.2803, + "step": 29616 + }, + { + "epoch": 0.869604791825709, + "grad_norm": 0.0, + "learning_rate": 8.786013397826565e-07, + "loss": 1.2842, + "step": 29617 + }, + { + "epoch": 0.8696341535028481, + "grad_norm": 0.0, + "learning_rate": 8.782116022629861e-07, + "loss": 1.2148, + "step": 29618 + }, + { + "epoch": 0.8696635151799871, + "grad_norm": 0.0, + "learning_rate": 8.778219472339855e-07, + "loss": 1.0449, + "step": 29619 + }, + { + "epoch": 0.869692876857126, + "grad_norm": 0.0, + "learning_rate": 8.774323746991764e-07, + "loss": 1.3066, + "step": 29620 + }, + { + "epoch": 0.8697222385342651, + "grad_norm": 0.0, + "learning_rate": 8.770428846620816e-07, + "loss": 1.2104, + "step": 29621 + }, + { + "epoch": 0.8697516002114041, + "grad_norm": 0.0, + "learning_rate": 8.76653477126228e-07, + "loss": 1.249, + "step": 29622 + }, + { + "epoch": 0.869780961888543, + "grad_norm": 0.0, + "learning_rate": 8.762641520951298e-07, + "loss": 1.106, + "step": 29623 + }, + { + "epoch": 0.8698103235656821, + "grad_norm": 0.0, + "learning_rate": 8.75874909572314e-07, + "loss": 1.084, + "step": 29624 + }, + { + "epoch": 0.8698396852428211, + "grad_norm": 0.0, + "learning_rate": 8.754857495612956e-07, + "loss": 1.2446, + "step": 29625 + }, + { + "epoch": 0.86986904691996, + "grad_norm": 0.0, + "learning_rate": 8.750966720655985e-07, + "loss": 1.146, + "step": 29626 + }, + { + "epoch": 0.8698984085970991, + "grad_norm": 0.0, + "learning_rate": 8.747076770887364e-07, + "loss": 1.3271, + "step": 29627 + }, + { + "epoch": 0.8699277702742381, + "grad_norm": 0.0, + "learning_rate": 8.743187646342333e-07, + "loss": 1.1162, + "step": 29628 + }, + { + "epoch": 0.869957131951377, + "grad_norm": 0.0, + "learning_rate": 8.739299347056007e-07, + "loss": 1.1851, + "step": 29629 + }, + { + "epoch": 0.8699864936285161, + "grad_norm": 0.0, + "learning_rate": 8.735411873063581e-07, + "loss": 1.2559, + "step": 29630 + }, + { + "epoch": 0.8700158553056551, + "grad_norm": 0.0, + "learning_rate": 8.731525224400206e-07, + "loss": 1.2637, + "step": 29631 + }, + { + "epoch": 0.870045216982794, + "grad_norm": 0.0, + "learning_rate": 8.727639401100996e-07, + "loss": 1.248, + "step": 29632 + }, + { + "epoch": 0.8700745786599331, + "grad_norm": 0.0, + "learning_rate": 8.723754403201134e-07, + "loss": 1.1777, + "step": 29633 + }, + { + "epoch": 0.8701039403370721, + "grad_norm": 0.0, + "learning_rate": 8.719870230735716e-07, + "loss": 1.2539, + "step": 29634 + }, + { + "epoch": 0.870133302014211, + "grad_norm": 0.0, + "learning_rate": 8.715986883739914e-07, + "loss": 1.2344, + "step": 29635 + }, + { + "epoch": 0.8701626636913501, + "grad_norm": 0.0, + "learning_rate": 8.712104362248808e-07, + "loss": 1.1504, + "step": 29636 + }, + { + "epoch": 0.870192025368489, + "grad_norm": 0.0, + "learning_rate": 8.708222666297528e-07, + "loss": 1.3105, + "step": 29637 + }, + { + "epoch": 0.870221387045628, + "grad_norm": 0.0, + "learning_rate": 8.704341795921178e-07, + "loss": 1.2549, + "step": 29638 + }, + { + "epoch": 0.8702507487227671, + "grad_norm": 0.0, + "learning_rate": 8.700461751154843e-07, + "loss": 1.1348, + "step": 29639 + }, + { + "epoch": 0.870280110399906, + "grad_norm": 0.0, + "learning_rate": 8.696582532033615e-07, + "loss": 1.251, + "step": 29640 + }, + { + "epoch": 0.870309472077045, + "grad_norm": 0.0, + "learning_rate": 8.692704138592555e-07, + "loss": 1.21, + "step": 29641 + }, + { + "epoch": 0.8703388337541841, + "grad_norm": 0.0, + "learning_rate": 8.68882657086677e-07, + "loss": 1.1899, + "step": 29642 + }, + { + "epoch": 0.870368195431323, + "grad_norm": 0.0, + "learning_rate": 8.684949828891298e-07, + "loss": 1.3662, + "step": 29643 + }, + { + "epoch": 0.870397557108462, + "grad_norm": 0.0, + "learning_rate": 8.681073912701233e-07, + "loss": 1.2627, + "step": 29644 + }, + { + "epoch": 0.8704269187856011, + "grad_norm": 0.0, + "learning_rate": 8.677198822331579e-07, + "loss": 1.2324, + "step": 29645 + }, + { + "epoch": 0.87045628046274, + "grad_norm": 0.0, + "learning_rate": 8.67332455781743e-07, + "loss": 1.1914, + "step": 29646 + }, + { + "epoch": 0.870485642139879, + "grad_norm": 0.0, + "learning_rate": 8.669451119193783e-07, + "loss": 1.3066, + "step": 29647 + }, + { + "epoch": 0.8705150038170181, + "grad_norm": 0.0, + "learning_rate": 8.665578506495697e-07, + "loss": 1.2266, + "step": 29648 + }, + { + "epoch": 0.870544365494157, + "grad_norm": 0.0, + "learning_rate": 8.661706719758156e-07, + "loss": 1.2871, + "step": 29649 + }, + { + "epoch": 0.870573727171296, + "grad_norm": 0.0, + "learning_rate": 8.657835759016187e-07, + "loss": 1.3428, + "step": 29650 + }, + { + "epoch": 0.8706030888484351, + "grad_norm": 0.0, + "learning_rate": 8.653965624304827e-07, + "loss": 1.25, + "step": 29651 + }, + { + "epoch": 0.870632450525574, + "grad_norm": 0.0, + "learning_rate": 8.650096315659018e-07, + "loss": 1.1982, + "step": 29652 + }, + { + "epoch": 0.870661812202713, + "grad_norm": 0.0, + "learning_rate": 8.646227833113808e-07, + "loss": 1.2109, + "step": 29653 + }, + { + "epoch": 0.8706911738798521, + "grad_norm": 0.0, + "learning_rate": 8.642360176704157e-07, + "loss": 1.3203, + "step": 29654 + }, + { + "epoch": 0.870720535556991, + "grad_norm": 0.0, + "learning_rate": 8.638493346465037e-07, + "loss": 1.2891, + "step": 29655 + }, + { + "epoch": 0.87074989723413, + "grad_norm": 0.0, + "learning_rate": 8.634627342431401e-07, + "loss": 1.2285, + "step": 29656 + }, + { + "epoch": 0.8707792589112691, + "grad_norm": 0.0, + "learning_rate": 8.630762164638251e-07, + "loss": 1.292, + "step": 29657 + }, + { + "epoch": 0.870808620588408, + "grad_norm": 0.0, + "learning_rate": 8.626897813120516e-07, + "loss": 1.3027, + "step": 29658 + }, + { + "epoch": 0.870837982265547, + "grad_norm": 0.0, + "learning_rate": 8.623034287913123e-07, + "loss": 1.2627, + "step": 29659 + }, + { + "epoch": 0.8708673439426861, + "grad_norm": 0.0, + "learning_rate": 8.619171589051057e-07, + "loss": 1.0161, + "step": 29660 + }, + { + "epoch": 0.870896705619825, + "grad_norm": 0.0, + "learning_rate": 8.615309716569209e-07, + "loss": 1.2744, + "step": 29661 + }, + { + "epoch": 0.870926067296964, + "grad_norm": 0.0, + "learning_rate": 8.611448670502531e-07, + "loss": 1.1934, + "step": 29662 + }, + { + "epoch": 0.870955428974103, + "grad_norm": 0.0, + "learning_rate": 8.607588450885928e-07, + "loss": 1.1719, + "step": 29663 + }, + { + "epoch": 0.870984790651242, + "grad_norm": 0.0, + "learning_rate": 8.603729057754318e-07, + "loss": 1.146, + "step": 29664 + }, + { + "epoch": 0.871014152328381, + "grad_norm": 0.0, + "learning_rate": 8.599870491142559e-07, + "loss": 1.1699, + "step": 29665 + }, + { + "epoch": 0.87104351400552, + "grad_norm": 0.0, + "learning_rate": 8.596012751085614e-07, + "loss": 1.3037, + "step": 29666 + }, + { + "epoch": 0.871072875682659, + "grad_norm": 0.0, + "learning_rate": 8.592155837618299e-07, + "loss": 1.1113, + "step": 29667 + }, + { + "epoch": 0.871102237359798, + "grad_norm": 0.0, + "learning_rate": 8.588299750775564e-07, + "loss": 1.2695, + "step": 29668 + }, + { + "epoch": 0.871131599036937, + "grad_norm": 0.0, + "learning_rate": 8.584444490592237e-07, + "loss": 1.2832, + "step": 29669 + }, + { + "epoch": 0.871160960714076, + "grad_norm": 0.0, + "learning_rate": 8.580590057103167e-07, + "loss": 1.3379, + "step": 29670 + }, + { + "epoch": 0.871190322391215, + "grad_norm": 0.0, + "learning_rate": 8.576736450343282e-07, + "loss": 1.2378, + "step": 29671 + }, + { + "epoch": 0.871219684068354, + "grad_norm": 0.0, + "learning_rate": 8.572883670347343e-07, + "loss": 1.1567, + "step": 29672 + }, + { + "epoch": 0.871249045745493, + "grad_norm": 0.0, + "learning_rate": 8.569031717150244e-07, + "loss": 1.106, + "step": 29673 + }, + { + "epoch": 0.871278407422632, + "grad_norm": 0.0, + "learning_rate": 8.565180590786792e-07, + "loss": 1.1826, + "step": 29674 + }, + { + "epoch": 0.8713077690997709, + "grad_norm": 0.0, + "learning_rate": 8.561330291291847e-07, + "loss": 1.2515, + "step": 29675 + }, + { + "epoch": 0.87133713077691, + "grad_norm": 0.0, + "learning_rate": 8.557480818700192e-07, + "loss": 1.2148, + "step": 29676 + }, + { + "epoch": 0.871366492454049, + "grad_norm": 0.0, + "learning_rate": 8.553632173046678e-07, + "loss": 1.1382, + "step": 29677 + }, + { + "epoch": 0.8713958541311879, + "grad_norm": 0.0, + "learning_rate": 8.549784354366086e-07, + "loss": 1.1318, + "step": 29678 + }, + { + "epoch": 0.871425215808327, + "grad_norm": 0.0, + "learning_rate": 8.545937362693213e-07, + "loss": 1.2949, + "step": 29679 + }, + { + "epoch": 0.871454577485466, + "grad_norm": 0.0, + "learning_rate": 8.542091198062852e-07, + "loss": 1.2588, + "step": 29680 + }, + { + "epoch": 0.8714839391626049, + "grad_norm": 0.0, + "learning_rate": 8.538245860509764e-07, + "loss": 1.2275, + "step": 29681 + }, + { + "epoch": 0.871513300839744, + "grad_norm": 0.0, + "learning_rate": 8.534401350068755e-07, + "loss": 1.2812, + "step": 29682 + }, + { + "epoch": 0.871542662516883, + "grad_norm": 0.0, + "learning_rate": 8.530557666774575e-07, + "loss": 1.0771, + "step": 29683 + }, + { + "epoch": 0.8715720241940219, + "grad_norm": 0.0, + "learning_rate": 8.526714810661995e-07, + "loss": 1.2705, + "step": 29684 + }, + { + "epoch": 0.871601385871161, + "grad_norm": 0.0, + "learning_rate": 8.522872781765745e-07, + "loss": 1.2168, + "step": 29685 + }, + { + "epoch": 0.8716307475483, + "grad_norm": 0.0, + "learning_rate": 8.519031580120596e-07, + "loss": 1.2822, + "step": 29686 + }, + { + "epoch": 0.8716601092254389, + "grad_norm": 0.0, + "learning_rate": 8.515191205761275e-07, + "loss": 1.1797, + "step": 29687 + }, + { + "epoch": 0.871689470902578, + "grad_norm": 0.0, + "learning_rate": 8.511351658722522e-07, + "loss": 1.1836, + "step": 29688 + }, + { + "epoch": 0.8717188325797169, + "grad_norm": 0.0, + "learning_rate": 8.507512939039031e-07, + "loss": 1.2119, + "step": 29689 + }, + { + "epoch": 0.8717481942568559, + "grad_norm": 0.0, + "learning_rate": 8.503675046745518e-07, + "loss": 1.0659, + "step": 29690 + }, + { + "epoch": 0.871777555933995, + "grad_norm": 0.0, + "learning_rate": 8.499837981876724e-07, + "loss": 1.3555, + "step": 29691 + }, + { + "epoch": 0.8718069176111339, + "grad_norm": 0.0, + "learning_rate": 8.496001744467319e-07, + "loss": 1.2041, + "step": 29692 + }, + { + "epoch": 0.8718362792882729, + "grad_norm": 0.0, + "learning_rate": 8.49216633455201e-07, + "loss": 1.2388, + "step": 29693 + }, + { + "epoch": 0.871865640965412, + "grad_norm": 0.0, + "learning_rate": 8.488331752165468e-07, + "loss": 1.1621, + "step": 29694 + }, + { + "epoch": 0.8718950026425509, + "grad_norm": 0.0, + "learning_rate": 8.484497997342389e-07, + "loss": 1.2402, + "step": 29695 + }, + { + "epoch": 0.8719243643196899, + "grad_norm": 0.0, + "learning_rate": 8.480665070117433e-07, + "loss": 1.1812, + "step": 29696 + }, + { + "epoch": 0.871953725996829, + "grad_norm": 0.0, + "learning_rate": 8.476832970525261e-07, + "loss": 1.1577, + "step": 29697 + }, + { + "epoch": 0.8719830876739679, + "grad_norm": 0.0, + "learning_rate": 8.473001698600535e-07, + "loss": 1.1924, + "step": 29698 + }, + { + "epoch": 0.8720124493511069, + "grad_norm": 0.0, + "learning_rate": 8.469171254377872e-07, + "loss": 1.1704, + "step": 29699 + }, + { + "epoch": 0.872041811028246, + "grad_norm": 0.0, + "learning_rate": 8.465341637891955e-07, + "loss": 1.2246, + "step": 29700 + }, + { + "epoch": 0.8720711727053849, + "grad_norm": 0.0, + "learning_rate": 8.461512849177378e-07, + "loss": 1.2617, + "step": 29701 + }, + { + "epoch": 0.8721005343825239, + "grad_norm": 0.0, + "learning_rate": 8.457684888268791e-07, + "loss": 1.2578, + "step": 29702 + }, + { + "epoch": 0.872129896059663, + "grad_norm": 0.0, + "learning_rate": 8.453857755200811e-07, + "loss": 1.3994, + "step": 29703 + }, + { + "epoch": 0.8721592577368019, + "grad_norm": 0.0, + "learning_rate": 8.450031450008045e-07, + "loss": 1.21, + "step": 29704 + }, + { + "epoch": 0.8721886194139409, + "grad_norm": 0.0, + "learning_rate": 8.446205972725074e-07, + "loss": 1.2031, + "step": 29705 + }, + { + "epoch": 0.87221798109108, + "grad_norm": 0.0, + "learning_rate": 8.442381323386517e-07, + "loss": 1.2148, + "step": 29706 + }, + { + "epoch": 0.8722473427682189, + "grad_norm": 0.0, + "learning_rate": 8.438557502026945e-07, + "loss": 1.1777, + "step": 29707 + }, + { + "epoch": 0.8722767044453579, + "grad_norm": 0.0, + "learning_rate": 8.434734508680964e-07, + "loss": 1.1973, + "step": 29708 + }, + { + "epoch": 0.872306066122497, + "grad_norm": 0.0, + "learning_rate": 8.430912343383125e-07, + "loss": 1.1016, + "step": 29709 + }, + { + "epoch": 0.8723354277996359, + "grad_norm": 0.0, + "learning_rate": 8.427091006167987e-07, + "loss": 1.1406, + "step": 29710 + }, + { + "epoch": 0.8723647894767749, + "grad_norm": 0.0, + "learning_rate": 8.423270497070124e-07, + "loss": 1.0767, + "step": 29711 + }, + { + "epoch": 0.8723941511539139, + "grad_norm": 0.0, + "learning_rate": 8.419450816124086e-07, + "loss": 1.2163, + "step": 29712 + }, + { + "epoch": 0.8724235128310529, + "grad_norm": 0.0, + "learning_rate": 8.415631963364412e-07, + "loss": 1.2524, + "step": 29713 + }, + { + "epoch": 0.8724528745081919, + "grad_norm": 0.0, + "learning_rate": 8.411813938825608e-07, + "loss": 1.2007, + "step": 29714 + }, + { + "epoch": 0.8724822361853309, + "grad_norm": 0.0, + "learning_rate": 8.407996742542246e-07, + "loss": 1.2173, + "step": 29715 + }, + { + "epoch": 0.8725115978624699, + "grad_norm": 0.0, + "learning_rate": 8.40418037454881e-07, + "loss": 1.291, + "step": 29716 + }, + { + "epoch": 0.8725409595396089, + "grad_norm": 0.0, + "learning_rate": 8.400364834879848e-07, + "loss": 1.2056, + "step": 29717 + }, + { + "epoch": 0.8725703212167479, + "grad_norm": 0.0, + "learning_rate": 8.396550123569836e-07, + "loss": 1.0952, + "step": 29718 + }, + { + "epoch": 0.8725996828938869, + "grad_norm": 0.0, + "learning_rate": 8.392736240653276e-07, + "loss": 1.3711, + "step": 29719 + }, + { + "epoch": 0.8726290445710259, + "grad_norm": 0.0, + "learning_rate": 8.388923186164677e-07, + "loss": 1.2832, + "step": 29720 + }, + { + "epoch": 0.8726584062481649, + "grad_norm": 0.0, + "learning_rate": 8.385110960138509e-07, + "loss": 1.2773, + "step": 29721 + }, + { + "epoch": 0.8726877679253039, + "grad_norm": 0.0, + "learning_rate": 8.381299562609235e-07, + "loss": 1.3066, + "step": 29722 + }, + { + "epoch": 0.8727171296024429, + "grad_norm": 0.0, + "learning_rate": 8.377488993611327e-07, + "loss": 1.1577, + "step": 29723 + }, + { + "epoch": 0.8727464912795819, + "grad_norm": 0.0, + "learning_rate": 8.373679253179268e-07, + "loss": 1.2676, + "step": 29724 + }, + { + "epoch": 0.8727758529567209, + "grad_norm": 0.0, + "learning_rate": 8.369870341347475e-07, + "loss": 1.1099, + "step": 29725 + }, + { + "epoch": 0.8728052146338598, + "grad_norm": 0.0, + "learning_rate": 8.36606225815042e-07, + "loss": 1.1006, + "step": 29726 + }, + { + "epoch": 0.8728345763109989, + "grad_norm": 0.0, + "learning_rate": 8.362255003622544e-07, + "loss": 1.1025, + "step": 29727 + }, + { + "epoch": 0.8728639379881379, + "grad_norm": 0.0, + "learning_rate": 8.358448577798262e-07, + "loss": 1.1558, + "step": 29728 + }, + { + "epoch": 0.8728932996652768, + "grad_norm": 0.0, + "learning_rate": 8.354642980711991e-07, + "loss": 1.2227, + "step": 29729 + }, + { + "epoch": 0.8729226613424159, + "grad_norm": 0.0, + "learning_rate": 8.350838212398138e-07, + "loss": 1.209, + "step": 29730 + }, + { + "epoch": 0.8729520230195549, + "grad_norm": 0.0, + "learning_rate": 8.347034272891153e-07, + "loss": 1.1855, + "step": 29731 + }, + { + "epoch": 0.8729813846966938, + "grad_norm": 0.0, + "learning_rate": 8.343231162225385e-07, + "loss": 1.1611, + "step": 29732 + }, + { + "epoch": 0.8730107463738329, + "grad_norm": 0.0, + "learning_rate": 8.339428880435274e-07, + "loss": 1.0234, + "step": 29733 + }, + { + "epoch": 0.8730401080509719, + "grad_norm": 0.0, + "learning_rate": 8.33562742755516e-07, + "loss": 1.1299, + "step": 29734 + }, + { + "epoch": 0.8730694697281108, + "grad_norm": 0.0, + "learning_rate": 8.331826803619469e-07, + "loss": 1.2617, + "step": 29735 + }, + { + "epoch": 0.8730988314052499, + "grad_norm": 0.0, + "learning_rate": 8.328027008662531e-07, + "loss": 1.1406, + "step": 29736 + }, + { + "epoch": 0.8731281930823889, + "grad_norm": 0.0, + "learning_rate": 8.324228042718729e-07, + "loss": 1.1074, + "step": 29737 + }, + { + "epoch": 0.8731575547595278, + "grad_norm": 0.0, + "learning_rate": 8.320429905822414e-07, + "loss": 1.1128, + "step": 29738 + }, + { + "epoch": 0.8731869164366669, + "grad_norm": 0.0, + "learning_rate": 8.316632598007912e-07, + "loss": 1.208, + "step": 29739 + }, + { + "epoch": 0.8732162781138059, + "grad_norm": 0.0, + "learning_rate": 8.312836119309598e-07, + "loss": 1.2207, + "step": 29740 + }, + { + "epoch": 0.8732456397909448, + "grad_norm": 0.0, + "learning_rate": 8.309040469761775e-07, + "loss": 1.1797, + "step": 29741 + }, + { + "epoch": 0.8732750014680839, + "grad_norm": 0.0, + "learning_rate": 8.305245649398796e-07, + "loss": 1.1431, + "step": 29742 + }, + { + "epoch": 0.8733043631452229, + "grad_norm": 0.0, + "learning_rate": 8.301451658254944e-07, + "loss": 1.3682, + "step": 29743 + }, + { + "epoch": 0.8733337248223618, + "grad_norm": 0.0, + "learning_rate": 8.297658496364569e-07, + "loss": 1.2207, + "step": 29744 + }, + { + "epoch": 0.8733630864995009, + "grad_norm": 0.0, + "learning_rate": 8.293866163761955e-07, + "loss": 1.1138, + "step": 29745 + }, + { + "epoch": 0.8733924481766399, + "grad_norm": 0.0, + "learning_rate": 8.290074660481395e-07, + "loss": 1.1064, + "step": 29746 + }, + { + "epoch": 0.8734218098537788, + "grad_norm": 0.0, + "learning_rate": 8.286283986557175e-07, + "loss": 1.2725, + "step": 29747 + }, + { + "epoch": 0.8734511715309179, + "grad_norm": 0.0, + "learning_rate": 8.282494142023556e-07, + "loss": 1.2783, + "step": 29748 + }, + { + "epoch": 0.8734805332080569, + "grad_norm": 0.0, + "learning_rate": 8.278705126914854e-07, + "loss": 1.2676, + "step": 29749 + }, + { + "epoch": 0.8735098948851958, + "grad_norm": 0.0, + "learning_rate": 8.274916941265298e-07, + "loss": 1.2246, + "step": 29750 + }, + { + "epoch": 0.8735392565623349, + "grad_norm": 0.0, + "learning_rate": 8.271129585109172e-07, + "loss": 1.125, + "step": 29751 + }, + { + "epoch": 0.8735686182394738, + "grad_norm": 0.0, + "learning_rate": 8.267343058480715e-07, + "loss": 1.2656, + "step": 29752 + }, + { + "epoch": 0.8735979799166128, + "grad_norm": 0.0, + "learning_rate": 8.263557361414165e-07, + "loss": 1.3232, + "step": 29753 + }, + { + "epoch": 0.8736273415937519, + "grad_norm": 0.0, + "learning_rate": 8.259772493943741e-07, + "loss": 1.1543, + "step": 29754 + }, + { + "epoch": 0.8736567032708908, + "grad_norm": 0.0, + "learning_rate": 8.255988456103714e-07, + "loss": 1.2202, + "step": 29755 + }, + { + "epoch": 0.8736860649480298, + "grad_norm": 0.0, + "learning_rate": 8.252205247928258e-07, + "loss": 1.2949, + "step": 29756 + }, + { + "epoch": 0.8737154266251689, + "grad_norm": 0.0, + "learning_rate": 8.248422869451622e-07, + "loss": 1.168, + "step": 29757 + }, + { + "epoch": 0.8737447883023078, + "grad_norm": 0.0, + "learning_rate": 8.244641320708002e-07, + "loss": 1.1772, + "step": 29758 + }, + { + "epoch": 0.8737741499794468, + "grad_norm": 0.0, + "learning_rate": 8.240860601731571e-07, + "loss": 1.2637, + "step": 29759 + }, + { + "epoch": 0.8738035116565859, + "grad_norm": 0.0, + "learning_rate": 8.237080712556544e-07, + "loss": 1.1982, + "step": 29760 + }, + { + "epoch": 0.8738328733337248, + "grad_norm": 0.0, + "learning_rate": 8.233301653217107e-07, + "loss": 1.0703, + "step": 29761 + }, + { + "epoch": 0.8738622350108638, + "grad_norm": 0.0, + "learning_rate": 8.229523423747421e-07, + "loss": 1.2114, + "step": 29762 + }, + { + "epoch": 0.8738915966880029, + "grad_norm": 0.0, + "learning_rate": 8.225746024181647e-07, + "loss": 1.1855, + "step": 29763 + }, + { + "epoch": 0.8739209583651418, + "grad_norm": 0.0, + "learning_rate": 8.221969454553969e-07, + "loss": 1.1885, + "step": 29764 + }, + { + "epoch": 0.8739503200422808, + "grad_norm": 0.0, + "learning_rate": 8.218193714898504e-07, + "loss": 1.1084, + "step": 29765 + }, + { + "epoch": 0.8739796817194199, + "grad_norm": 0.0, + "learning_rate": 8.214418805249436e-07, + "loss": 1.2188, + "step": 29766 + }, + { + "epoch": 0.8740090433965588, + "grad_norm": 0.0, + "learning_rate": 8.210644725640893e-07, + "loss": 1.0703, + "step": 29767 + }, + { + "epoch": 0.8740384050736978, + "grad_norm": 0.0, + "learning_rate": 8.206871476106982e-07, + "loss": 1.1743, + "step": 29768 + }, + { + "epoch": 0.8740677667508369, + "grad_norm": 0.0, + "learning_rate": 8.203099056681852e-07, + "loss": 1.2188, + "step": 29769 + }, + { + "epoch": 0.8740971284279758, + "grad_norm": 0.0, + "learning_rate": 8.199327467399598e-07, + "loss": 1.3242, + "step": 29770 + }, + { + "epoch": 0.8741264901051148, + "grad_norm": 0.0, + "learning_rate": 8.19555670829435e-07, + "loss": 1.2354, + "step": 29771 + }, + { + "epoch": 0.8741558517822539, + "grad_norm": 0.0, + "learning_rate": 8.191786779400168e-07, + "loss": 1.2295, + "step": 29772 + }, + { + "epoch": 0.8741852134593928, + "grad_norm": 0.0, + "learning_rate": 8.188017680751193e-07, + "loss": 1.1807, + "step": 29773 + }, + { + "epoch": 0.8742145751365318, + "grad_norm": 0.0, + "learning_rate": 8.184249412381474e-07, + "loss": 1.2212, + "step": 29774 + }, + { + "epoch": 0.8742439368136709, + "grad_norm": 0.0, + "learning_rate": 8.180481974325116e-07, + "loss": 1.165, + "step": 29775 + }, + { + "epoch": 0.8742732984908098, + "grad_norm": 0.0, + "learning_rate": 8.176715366616172e-07, + "loss": 1.105, + "step": 29776 + }, + { + "epoch": 0.8743026601679488, + "grad_norm": 0.0, + "learning_rate": 8.172949589288703e-07, + "loss": 1.1504, + "step": 29777 + }, + { + "epoch": 0.8743320218450877, + "grad_norm": 0.0, + "learning_rate": 8.16918464237677e-07, + "loss": 1.25, + "step": 29778 + }, + { + "epoch": 0.8743613835222268, + "grad_norm": 0.0, + "learning_rate": 8.165420525914414e-07, + "loss": 1.2358, + "step": 29779 + }, + { + "epoch": 0.8743907451993658, + "grad_norm": 0.0, + "learning_rate": 8.161657239935684e-07, + "loss": 1.2188, + "step": 29780 + }, + { + "epoch": 0.8744201068765047, + "grad_norm": 0.0, + "learning_rate": 8.157894784474596e-07, + "loss": 1.1982, + "step": 29781 + }, + { + "epoch": 0.8744494685536438, + "grad_norm": 0.0, + "learning_rate": 8.154133159565203e-07, + "loss": 1.1929, + "step": 29782 + }, + { + "epoch": 0.8744788302307828, + "grad_norm": 0.0, + "learning_rate": 8.150372365241488e-07, + "loss": 1.2285, + "step": 29783 + }, + { + "epoch": 0.8745081919079217, + "grad_norm": 0.0, + "learning_rate": 8.14661240153749e-07, + "loss": 1.1553, + "step": 29784 + }, + { + "epoch": 0.8745375535850608, + "grad_norm": 0.0, + "learning_rate": 8.142853268487206e-07, + "loss": 1.2446, + "step": 29785 + }, + { + "epoch": 0.8745669152621998, + "grad_norm": 0.0, + "learning_rate": 8.139094966124627e-07, + "loss": 1.1797, + "step": 29786 + }, + { + "epoch": 0.8745962769393387, + "grad_norm": 0.0, + "learning_rate": 8.13533749448373e-07, + "loss": 1.2754, + "step": 29787 + }, + { + "epoch": 0.8746256386164778, + "grad_norm": 0.0, + "learning_rate": 8.131580853598498e-07, + "loss": 1.2744, + "step": 29788 + }, + { + "epoch": 0.8746550002936168, + "grad_norm": 0.0, + "learning_rate": 8.127825043502912e-07, + "loss": 1.1904, + "step": 29789 + }, + { + "epoch": 0.8746843619707557, + "grad_norm": 0.0, + "learning_rate": 8.124070064230927e-07, + "loss": 1.3994, + "step": 29790 + }, + { + "epoch": 0.8747137236478948, + "grad_norm": 0.0, + "learning_rate": 8.120315915816512e-07, + "loss": 1.1533, + "step": 29791 + }, + { + "epoch": 0.8747430853250338, + "grad_norm": 0.0, + "learning_rate": 8.116562598293609e-07, + "loss": 1.1689, + "step": 29792 + }, + { + "epoch": 0.8747724470021727, + "grad_norm": 0.0, + "learning_rate": 8.112810111696168e-07, + "loss": 1.1807, + "step": 29793 + }, + { + "epoch": 0.8748018086793118, + "grad_norm": 0.0, + "learning_rate": 8.109058456058117e-07, + "loss": 1.1934, + "step": 29794 + }, + { + "epoch": 0.8748311703564507, + "grad_norm": 0.0, + "learning_rate": 8.105307631413395e-07, + "loss": 1.2119, + "step": 29795 + }, + { + "epoch": 0.8748605320335897, + "grad_norm": 0.0, + "learning_rate": 8.101557637795898e-07, + "loss": 1.165, + "step": 29796 + }, + { + "epoch": 0.8748898937107288, + "grad_norm": 0.0, + "learning_rate": 8.097808475239544e-07, + "loss": 1.1089, + "step": 29797 + }, + { + "epoch": 0.8749192553878677, + "grad_norm": 0.0, + "learning_rate": 8.094060143778248e-07, + "loss": 1.1934, + "step": 29798 + }, + { + "epoch": 0.8749486170650067, + "grad_norm": 0.0, + "learning_rate": 8.090312643445896e-07, + "loss": 1.1934, + "step": 29799 + }, + { + "epoch": 0.8749779787421458, + "grad_norm": 0.0, + "learning_rate": 8.086565974276406e-07, + "loss": 1.1523, + "step": 29800 + }, + { + "epoch": 0.8750073404192847, + "grad_norm": 0.0, + "learning_rate": 8.082820136303615e-07, + "loss": 1.2715, + "step": 29801 + }, + { + "epoch": 0.8750367020964237, + "grad_norm": 0.0, + "learning_rate": 8.079075129561464e-07, + "loss": 1.0635, + "step": 29802 + }, + { + "epoch": 0.8750660637735628, + "grad_norm": 0.0, + "learning_rate": 8.075330954083737e-07, + "loss": 1.1494, + "step": 29803 + }, + { + "epoch": 0.8750954254507017, + "grad_norm": 0.0, + "learning_rate": 8.071587609904352e-07, + "loss": 1.2783, + "step": 29804 + }, + { + "epoch": 0.8751247871278407, + "grad_norm": 0.0, + "learning_rate": 8.067845097057126e-07, + "loss": 1.1748, + "step": 29805 + }, + { + "epoch": 0.8751541488049798, + "grad_norm": 0.0, + "learning_rate": 8.064103415575941e-07, + "loss": 1.1729, + "step": 29806 + }, + { + "epoch": 0.8751835104821187, + "grad_norm": 0.0, + "learning_rate": 8.060362565494606e-07, + "loss": 1.2739, + "step": 29807 + }, + { + "epoch": 0.8752128721592577, + "grad_norm": 0.0, + "learning_rate": 8.056622546846948e-07, + "loss": 1.1885, + "step": 29808 + }, + { + "epoch": 0.8752422338363968, + "grad_norm": 0.0, + "learning_rate": 8.052883359666818e-07, + "loss": 1.2036, + "step": 29809 + }, + { + "epoch": 0.8752715955135357, + "grad_norm": 0.0, + "learning_rate": 8.049145003988012e-07, + "loss": 1.0986, + "step": 29810 + }, + { + "epoch": 0.8753009571906747, + "grad_norm": 0.0, + "learning_rate": 8.045407479844336e-07, + "loss": 1.1924, + "step": 29811 + }, + { + "epoch": 0.8753303188678138, + "grad_norm": 0.0, + "learning_rate": 8.041670787269573e-07, + "loss": 1.1777, + "step": 29812 + }, + { + "epoch": 0.8753596805449527, + "grad_norm": 0.0, + "learning_rate": 8.037934926297541e-07, + "loss": 1.2559, + "step": 29813 + }, + { + "epoch": 0.8753890422220917, + "grad_norm": 0.0, + "learning_rate": 8.034199896962003e-07, + "loss": 1.0977, + "step": 29814 + }, + { + "epoch": 0.8754184038992308, + "grad_norm": 0.0, + "learning_rate": 8.030465699296775e-07, + "loss": 1.1787, + "step": 29815 + }, + { + "epoch": 0.8754477655763697, + "grad_norm": 0.0, + "learning_rate": 8.026732333335586e-07, + "loss": 1.209, + "step": 29816 + }, + { + "epoch": 0.8754771272535087, + "grad_norm": 0.0, + "learning_rate": 8.022999799112197e-07, + "loss": 1.2998, + "step": 29817 + }, + { + "epoch": 0.8755064889306478, + "grad_norm": 0.0, + "learning_rate": 8.019268096660404e-07, + "loss": 1.2031, + "step": 29818 + }, + { + "epoch": 0.8755358506077867, + "grad_norm": 0.0, + "learning_rate": 8.015537226013914e-07, + "loss": 1.2021, + "step": 29819 + }, + { + "epoch": 0.8755652122849257, + "grad_norm": 0.0, + "learning_rate": 8.011807187206477e-07, + "loss": 1.2383, + "step": 29820 + }, + { + "epoch": 0.8755945739620647, + "grad_norm": 0.0, + "learning_rate": 8.008077980271822e-07, + "loss": 1.2432, + "step": 29821 + }, + { + "epoch": 0.8756239356392037, + "grad_norm": 0.0, + "learning_rate": 8.004349605243688e-07, + "loss": 1.2129, + "step": 29822 + }, + { + "epoch": 0.8756532973163427, + "grad_norm": 0.0, + "learning_rate": 8.00062206215576e-07, + "loss": 1.1748, + "step": 29823 + }, + { + "epoch": 0.8756826589934817, + "grad_norm": 0.0, + "learning_rate": 7.996895351041789e-07, + "loss": 1.1865, + "step": 29824 + }, + { + "epoch": 0.8757120206706207, + "grad_norm": 0.0, + "learning_rate": 7.993169471935436e-07, + "loss": 1.2461, + "step": 29825 + }, + { + "epoch": 0.8757413823477597, + "grad_norm": 0.0, + "learning_rate": 7.989444424870429e-07, + "loss": 1.2798, + "step": 29826 + }, + { + "epoch": 0.8757707440248987, + "grad_norm": 0.0, + "learning_rate": 7.985720209880466e-07, + "loss": 1.1597, + "step": 29827 + }, + { + "epoch": 0.8758001057020377, + "grad_norm": 0.0, + "learning_rate": 7.981996826999161e-07, + "loss": 1.1665, + "step": 29828 + }, + { + "epoch": 0.8758294673791767, + "grad_norm": 0.0, + "learning_rate": 7.978274276260245e-07, + "loss": 1.2178, + "step": 29829 + }, + { + "epoch": 0.8758588290563157, + "grad_norm": 0.0, + "learning_rate": 7.974552557697346e-07, + "loss": 1.0649, + "step": 29830 + }, + { + "epoch": 0.8758881907334547, + "grad_norm": 0.0, + "learning_rate": 7.970831671344148e-07, + "loss": 1.3125, + "step": 29831 + }, + { + "epoch": 0.8759175524105937, + "grad_norm": 0.0, + "learning_rate": 7.967111617234269e-07, + "loss": 1.2363, + "step": 29832 + }, + { + "epoch": 0.8759469140877327, + "grad_norm": 0.0, + "learning_rate": 7.963392395401393e-07, + "loss": 1.1255, + "step": 29833 + }, + { + "epoch": 0.8759762757648717, + "grad_norm": 0.0, + "learning_rate": 7.959674005879137e-07, + "loss": 1.1436, + "step": 29834 + }, + { + "epoch": 0.8760056374420107, + "grad_norm": 0.0, + "learning_rate": 7.955956448701107e-07, + "loss": 1.2119, + "step": 29835 + }, + { + "epoch": 0.8760349991191497, + "grad_norm": 0.0, + "learning_rate": 7.952239723900945e-07, + "loss": 1.3711, + "step": 29836 + }, + { + "epoch": 0.8760643607962887, + "grad_norm": 0.0, + "learning_rate": 7.948523831512234e-07, + "loss": 1.1348, + "step": 29837 + }, + { + "epoch": 0.8760937224734276, + "grad_norm": 0.0, + "learning_rate": 7.944808771568602e-07, + "loss": 1.1333, + "step": 29838 + }, + { + "epoch": 0.8761230841505667, + "grad_norm": 0.0, + "learning_rate": 7.941094544103634e-07, + "loss": 1.1392, + "step": 29839 + }, + { + "epoch": 0.8761524458277057, + "grad_norm": 0.0, + "learning_rate": 7.937381149150947e-07, + "loss": 1.3135, + "step": 29840 + }, + { + "epoch": 0.8761818075048446, + "grad_norm": 0.0, + "learning_rate": 7.933668586744069e-07, + "loss": 1.1514, + "step": 29841 + }, + { + "epoch": 0.8762111691819837, + "grad_norm": 0.0, + "learning_rate": 7.929956856916632e-07, + "loss": 1.1851, + "step": 29842 + }, + { + "epoch": 0.8762405308591227, + "grad_norm": 0.0, + "learning_rate": 7.926245959702173e-07, + "loss": 1.2402, + "step": 29843 + }, + { + "epoch": 0.8762698925362616, + "grad_norm": 0.0, + "learning_rate": 7.922535895134253e-07, + "loss": 1.209, + "step": 29844 + }, + { + "epoch": 0.8762992542134007, + "grad_norm": 0.0, + "learning_rate": 7.918826663246415e-07, + "loss": 1.0654, + "step": 29845 + }, + { + "epoch": 0.8763286158905397, + "grad_norm": 0.0, + "learning_rate": 7.915118264072196e-07, + "loss": 1.2471, + "step": 29846 + }, + { + "epoch": 0.8763579775676786, + "grad_norm": 0.0, + "learning_rate": 7.911410697645173e-07, + "loss": 1.2744, + "step": 29847 + }, + { + "epoch": 0.8763873392448177, + "grad_norm": 0.0, + "learning_rate": 7.907703963998814e-07, + "loss": 1.25, + "step": 29848 + }, + { + "epoch": 0.8764167009219567, + "grad_norm": 0.0, + "learning_rate": 7.903998063166695e-07, + "loss": 1.2495, + "step": 29849 + }, + { + "epoch": 0.8764460625990956, + "grad_norm": 0.0, + "learning_rate": 7.900292995182301e-07, + "loss": 1.2168, + "step": 29850 + }, + { + "epoch": 0.8764754242762347, + "grad_norm": 0.0, + "learning_rate": 7.896588760079172e-07, + "loss": 1.2803, + "step": 29851 + }, + { + "epoch": 0.8765047859533737, + "grad_norm": 0.0, + "learning_rate": 7.892885357890745e-07, + "loss": 1.249, + "step": 29852 + }, + { + "epoch": 0.8765341476305126, + "grad_norm": 0.0, + "learning_rate": 7.889182788650562e-07, + "loss": 1.0635, + "step": 29853 + }, + { + "epoch": 0.8765635093076517, + "grad_norm": 0.0, + "learning_rate": 7.885481052392063e-07, + "loss": 1.2251, + "step": 29854 + }, + { + "epoch": 0.8765928709847907, + "grad_norm": 0.0, + "learning_rate": 7.881780149148776e-07, + "loss": 1.2529, + "step": 29855 + }, + { + "epoch": 0.8766222326619296, + "grad_norm": 0.0, + "learning_rate": 7.878080078954142e-07, + "loss": 1.1558, + "step": 29856 + }, + { + "epoch": 0.8766515943390687, + "grad_norm": 0.0, + "learning_rate": 7.874380841841611e-07, + "loss": 1.1196, + "step": 29857 + }, + { + "epoch": 0.8766809560162077, + "grad_norm": 0.0, + "learning_rate": 7.870682437844656e-07, + "loss": 1.1553, + "step": 29858 + }, + { + "epoch": 0.8767103176933466, + "grad_norm": 0.0, + "learning_rate": 7.866984866996719e-07, + "loss": 1.1753, + "step": 29859 + }, + { + "epoch": 0.8767396793704857, + "grad_norm": 0.0, + "learning_rate": 7.863288129331225e-07, + "loss": 1.2051, + "step": 29860 + }, + { + "epoch": 0.8767690410476247, + "grad_norm": 0.0, + "learning_rate": 7.859592224881607e-07, + "loss": 1.2324, + "step": 29861 + }, + { + "epoch": 0.8767984027247636, + "grad_norm": 0.0, + "learning_rate": 7.855897153681313e-07, + "loss": 1.1914, + "step": 29862 + }, + { + "epoch": 0.8768277644019027, + "grad_norm": 0.0, + "learning_rate": 7.852202915763707e-07, + "loss": 1.1631, + "step": 29863 + }, + { + "epoch": 0.8768571260790416, + "grad_norm": 0.0, + "learning_rate": 7.848509511162261e-07, + "loss": 1.1597, + "step": 29864 + }, + { + "epoch": 0.8768864877561806, + "grad_norm": 0.0, + "learning_rate": 7.844816939910338e-07, + "loss": 1.0747, + "step": 29865 + }, + { + "epoch": 0.8769158494333197, + "grad_norm": 0.0, + "learning_rate": 7.841125202041322e-07, + "loss": 1.3711, + "step": 29866 + }, + { + "epoch": 0.8769452111104586, + "grad_norm": 0.0, + "learning_rate": 7.837434297588631e-07, + "loss": 1.2627, + "step": 29867 + }, + { + "epoch": 0.8769745727875976, + "grad_norm": 0.0, + "learning_rate": 7.833744226585616e-07, + "loss": 1.2705, + "step": 29868 + }, + { + "epoch": 0.8770039344647367, + "grad_norm": 0.0, + "learning_rate": 7.830054989065661e-07, + "loss": 1.1704, + "step": 29869 + }, + { + "epoch": 0.8770332961418756, + "grad_norm": 0.0, + "learning_rate": 7.826366585062106e-07, + "loss": 1.1279, + "step": 29870 + }, + { + "epoch": 0.8770626578190146, + "grad_norm": 0.0, + "learning_rate": 7.822679014608348e-07, + "loss": 1.0957, + "step": 29871 + }, + { + "epoch": 0.8770920194961537, + "grad_norm": 0.0, + "learning_rate": 7.818992277737692e-07, + "loss": 1.2822, + "step": 29872 + }, + { + "epoch": 0.8771213811732926, + "grad_norm": 0.0, + "learning_rate": 7.815306374483511e-07, + "loss": 1.1421, + "step": 29873 + }, + { + "epoch": 0.8771507428504316, + "grad_norm": 0.0, + "learning_rate": 7.811621304879102e-07, + "loss": 0.9956, + "step": 29874 + }, + { + "epoch": 0.8771801045275707, + "grad_norm": 0.0, + "learning_rate": 7.807937068957839e-07, + "loss": 1.1855, + "step": 29875 + }, + { + "epoch": 0.8772094662047096, + "grad_norm": 0.0, + "learning_rate": 7.804253666753014e-07, + "loss": 1.0576, + "step": 29876 + }, + { + "epoch": 0.8772388278818486, + "grad_norm": 0.0, + "learning_rate": 7.800571098297916e-07, + "loss": 1.1299, + "step": 29877 + }, + { + "epoch": 0.8772681895589876, + "grad_norm": 0.0, + "learning_rate": 7.79688936362587e-07, + "loss": 1.146, + "step": 29878 + }, + { + "epoch": 0.8772975512361266, + "grad_norm": 0.0, + "learning_rate": 7.793208462770153e-07, + "loss": 1.2666, + "step": 29879 + }, + { + "epoch": 0.8773269129132656, + "grad_norm": 0.0, + "learning_rate": 7.78952839576409e-07, + "loss": 1.2402, + "step": 29880 + }, + { + "epoch": 0.8773562745904046, + "grad_norm": 0.0, + "learning_rate": 7.785849162640913e-07, + "loss": 1.3564, + "step": 29881 + }, + { + "epoch": 0.8773856362675436, + "grad_norm": 0.0, + "learning_rate": 7.782170763433939e-07, + "loss": 1.2002, + "step": 29882 + }, + { + "epoch": 0.8774149979446826, + "grad_norm": 0.0, + "learning_rate": 7.778493198176418e-07, + "loss": 1.1821, + "step": 29883 + }, + { + "epoch": 0.8774443596218215, + "grad_norm": 0.0, + "learning_rate": 7.774816466901591e-07, + "loss": 1.1895, + "step": 29884 + }, + { + "epoch": 0.8774737212989606, + "grad_norm": 0.0, + "learning_rate": 7.771140569642722e-07, + "loss": 1.2852, + "step": 29885 + }, + { + "epoch": 0.8775030829760996, + "grad_norm": 0.0, + "learning_rate": 7.767465506433036e-07, + "loss": 1.2061, + "step": 29886 + }, + { + "epoch": 0.8775324446532385, + "grad_norm": 0.0, + "learning_rate": 7.763791277305788e-07, + "loss": 1.2402, + "step": 29887 + }, + { + "epoch": 0.8775618063303776, + "grad_norm": 0.0, + "learning_rate": 7.760117882294193e-07, + "loss": 1.1831, + "step": 29888 + }, + { + "epoch": 0.8775911680075166, + "grad_norm": 0.0, + "learning_rate": 7.756445321431483e-07, + "loss": 1.2012, + "step": 29889 + }, + { + "epoch": 0.8776205296846555, + "grad_norm": 0.0, + "learning_rate": 7.75277359475085e-07, + "loss": 1.2085, + "step": 29890 + }, + { + "epoch": 0.8776498913617946, + "grad_norm": 0.0, + "learning_rate": 7.749102702285526e-07, + "loss": 1.2251, + "step": 29891 + }, + { + "epoch": 0.8776792530389336, + "grad_norm": 0.0, + "learning_rate": 7.745432644068684e-07, + "loss": 1.2588, + "step": 29892 + }, + { + "epoch": 0.8777086147160725, + "grad_norm": 0.0, + "learning_rate": 7.74176342013353e-07, + "loss": 1.2266, + "step": 29893 + }, + { + "epoch": 0.8777379763932116, + "grad_norm": 0.0, + "learning_rate": 7.738095030513215e-07, + "loss": 1.1797, + "step": 29894 + }, + { + "epoch": 0.8777673380703506, + "grad_norm": 0.0, + "learning_rate": 7.734427475240957e-07, + "loss": 1.1597, + "step": 29895 + }, + { + "epoch": 0.8777966997474895, + "grad_norm": 0.0, + "learning_rate": 7.730760754349897e-07, + "loss": 1.3916, + "step": 29896 + }, + { + "epoch": 0.8778260614246286, + "grad_norm": 0.0, + "learning_rate": 7.727094867873186e-07, + "loss": 1.2998, + "step": 29897 + }, + { + "epoch": 0.8778554231017676, + "grad_norm": 0.0, + "learning_rate": 7.723429815844008e-07, + "loss": 1.2134, + "step": 29898 + }, + { + "epoch": 0.8778847847789065, + "grad_norm": 0.0, + "learning_rate": 7.719765598295459e-07, + "loss": 1.0688, + "step": 29899 + }, + { + "epoch": 0.8779141464560456, + "grad_norm": 0.0, + "learning_rate": 7.716102215260723e-07, + "loss": 1.2441, + "step": 29900 + }, + { + "epoch": 0.8779435081331846, + "grad_norm": 0.0, + "learning_rate": 7.712439666772909e-07, + "loss": 1.3018, + "step": 29901 + }, + { + "epoch": 0.8779728698103235, + "grad_norm": 0.0, + "learning_rate": 7.708777952865143e-07, + "loss": 1.3252, + "step": 29902 + }, + { + "epoch": 0.8780022314874626, + "grad_norm": 0.0, + "learning_rate": 7.705117073570512e-07, + "loss": 1.1274, + "step": 29903 + }, + { + "epoch": 0.8780315931646016, + "grad_norm": 0.0, + "learning_rate": 7.701457028922166e-07, + "loss": 1.0713, + "step": 29904 + }, + { + "epoch": 0.8780609548417405, + "grad_norm": 0.0, + "learning_rate": 7.697797818953179e-07, + "loss": 1.1567, + "step": 29905 + }, + { + "epoch": 0.8780903165188796, + "grad_norm": 0.0, + "learning_rate": 7.694139443696624e-07, + "loss": 1.2183, + "step": 29906 + }, + { + "epoch": 0.8781196781960185, + "grad_norm": 0.0, + "learning_rate": 7.690481903185631e-07, + "loss": 1.3184, + "step": 29907 + }, + { + "epoch": 0.8781490398731575, + "grad_norm": 0.0, + "learning_rate": 7.68682519745324e-07, + "loss": 1.2017, + "step": 29908 + }, + { + "epoch": 0.8781784015502966, + "grad_norm": 0.0, + "learning_rate": 7.683169326532547e-07, + "loss": 1.0747, + "step": 29909 + }, + { + "epoch": 0.8782077632274355, + "grad_norm": 0.0, + "learning_rate": 7.679514290456569e-07, + "loss": 1.29, + "step": 29910 + }, + { + "epoch": 0.8782371249045745, + "grad_norm": 0.0, + "learning_rate": 7.675860089258402e-07, + "loss": 1.0625, + "step": 29911 + }, + { + "epoch": 0.8782664865817136, + "grad_norm": 0.0, + "learning_rate": 7.672206722971065e-07, + "loss": 1.2749, + "step": 29912 + }, + { + "epoch": 0.8782958482588525, + "grad_norm": 0.0, + "learning_rate": 7.668554191627619e-07, + "loss": 1.2158, + "step": 29913 + }, + { + "epoch": 0.8783252099359915, + "grad_norm": 0.0, + "learning_rate": 7.664902495261095e-07, + "loss": 1.2588, + "step": 29914 + }, + { + "epoch": 0.8783545716131306, + "grad_norm": 0.0, + "learning_rate": 7.661251633904476e-07, + "loss": 1.1309, + "step": 29915 + }, + { + "epoch": 0.8783839332902695, + "grad_norm": 0.0, + "learning_rate": 7.657601607590826e-07, + "loss": 1.2109, + "step": 29916 + }, + { + "epoch": 0.8784132949674085, + "grad_norm": 0.0, + "learning_rate": 7.653952416353139e-07, + "loss": 1.1943, + "step": 29917 + }, + { + "epoch": 0.8784426566445476, + "grad_norm": 0.0, + "learning_rate": 7.650304060224411e-07, + "loss": 1.1855, + "step": 29918 + }, + { + "epoch": 0.8784720183216865, + "grad_norm": 0.0, + "learning_rate": 7.646656539237618e-07, + "loss": 1.1968, + "step": 29919 + }, + { + "epoch": 0.8785013799988255, + "grad_norm": 0.0, + "learning_rate": 7.643009853425764e-07, + "loss": 1.1768, + "step": 29920 + }, + { + "epoch": 0.8785307416759646, + "grad_norm": 0.0, + "learning_rate": 7.639364002821825e-07, + "loss": 1.0991, + "step": 29921 + }, + { + "epoch": 0.8785601033531035, + "grad_norm": 0.0, + "learning_rate": 7.635718987458785e-07, + "loss": 1.207, + "step": 29922 + }, + { + "epoch": 0.8785894650302425, + "grad_norm": 0.0, + "learning_rate": 7.632074807369572e-07, + "loss": 1.231, + "step": 29923 + }, + { + "epoch": 0.8786188267073816, + "grad_norm": 0.0, + "learning_rate": 7.628431462587182e-07, + "loss": 1.1851, + "step": 29924 + }, + { + "epoch": 0.8786481883845205, + "grad_norm": 0.0, + "learning_rate": 7.624788953144546e-07, + "loss": 1.2842, + "step": 29925 + }, + { + "epoch": 0.8786775500616595, + "grad_norm": 0.0, + "learning_rate": 7.621147279074593e-07, + "loss": 1.1328, + "step": 29926 + }, + { + "epoch": 0.8787069117387986, + "grad_norm": 0.0, + "learning_rate": 7.617506440410272e-07, + "loss": 1.1699, + "step": 29927 + }, + { + "epoch": 0.8787362734159375, + "grad_norm": 0.0, + "learning_rate": 7.61386643718447e-07, + "loss": 1.2402, + "step": 29928 + }, + { + "epoch": 0.8787656350930765, + "grad_norm": 0.0, + "learning_rate": 7.610227269430159e-07, + "loss": 1.2256, + "step": 29929 + }, + { + "epoch": 0.8787949967702156, + "grad_norm": 0.0, + "learning_rate": 7.606588937180215e-07, + "loss": 1.1011, + "step": 29930 + }, + { + "epoch": 0.8788243584473545, + "grad_norm": 0.0, + "learning_rate": 7.602951440467554e-07, + "loss": 1.2646, + "step": 29931 + }, + { + "epoch": 0.8788537201244935, + "grad_norm": 0.0, + "learning_rate": 7.599314779325073e-07, + "loss": 1.2046, + "step": 29932 + }, + { + "epoch": 0.8788830818016325, + "grad_norm": 0.0, + "learning_rate": 7.595678953785657e-07, + "loss": 1.1553, + "step": 29933 + }, + { + "epoch": 0.8789124434787715, + "grad_norm": 0.0, + "learning_rate": 7.592043963882179e-07, + "loss": 1.3018, + "step": 29934 + }, + { + "epoch": 0.8789418051559105, + "grad_norm": 0.0, + "learning_rate": 7.58840980964749e-07, + "loss": 1.2021, + "step": 29935 + }, + { + "epoch": 0.8789711668330495, + "grad_norm": 0.0, + "learning_rate": 7.584776491114498e-07, + "loss": 1.1958, + "step": 29936 + }, + { + "epoch": 0.8790005285101885, + "grad_norm": 0.0, + "learning_rate": 7.581144008316022e-07, + "loss": 1.0933, + "step": 29937 + }, + { + "epoch": 0.8790298901873275, + "grad_norm": 0.0, + "learning_rate": 7.577512361284945e-07, + "loss": 1.2061, + "step": 29938 + }, + { + "epoch": 0.8790592518644665, + "grad_norm": 0.0, + "learning_rate": 7.573881550054085e-07, + "loss": 1.208, + "step": 29939 + }, + { + "epoch": 0.8790886135416055, + "grad_norm": 0.0, + "learning_rate": 7.570251574656296e-07, + "loss": 1.335, + "step": 29940 + }, + { + "epoch": 0.8791179752187445, + "grad_norm": 0.0, + "learning_rate": 7.566622435124393e-07, + "loss": 1.1626, + "step": 29941 + }, + { + "epoch": 0.8791473368958835, + "grad_norm": 0.0, + "learning_rate": 7.562994131491197e-07, + "loss": 1.2754, + "step": 29942 + }, + { + "epoch": 0.8791766985730225, + "grad_norm": 0.0, + "learning_rate": 7.559366663789502e-07, + "loss": 1.2739, + "step": 29943 + }, + { + "epoch": 0.8792060602501615, + "grad_norm": 0.0, + "learning_rate": 7.555740032052151e-07, + "loss": 1.312, + "step": 29944 + }, + { + "epoch": 0.8792354219273005, + "grad_norm": 0.0, + "learning_rate": 7.552114236311902e-07, + "loss": 1.3008, + "step": 29945 + }, + { + "epoch": 0.8792647836044395, + "grad_norm": 0.0, + "learning_rate": 7.548489276601557e-07, + "loss": 1.1753, + "step": 29946 + }, + { + "epoch": 0.8792941452815785, + "grad_norm": 0.0, + "learning_rate": 7.544865152953918e-07, + "loss": 1.2227, + "step": 29947 + }, + { + "epoch": 0.8793235069587175, + "grad_norm": 0.0, + "learning_rate": 7.541241865401716e-07, + "loss": 1.2236, + "step": 29948 + }, + { + "epoch": 0.8793528686358565, + "grad_norm": 0.0, + "learning_rate": 7.53761941397777e-07, + "loss": 1.123, + "step": 29949 + }, + { + "epoch": 0.8793822303129954, + "grad_norm": 0.0, + "learning_rate": 7.533997798714798e-07, + "loss": 1.2119, + "step": 29950 + }, + { + "epoch": 0.8794115919901345, + "grad_norm": 0.0, + "learning_rate": 7.530377019645574e-07, + "loss": 1.3018, + "step": 29951 + }, + { + "epoch": 0.8794409536672735, + "grad_norm": 0.0, + "learning_rate": 7.526757076802816e-07, + "loss": 1.252, + "step": 29952 + }, + { + "epoch": 0.8794703153444124, + "grad_norm": 0.0, + "learning_rate": 7.523137970219296e-07, + "loss": 1.2939, + "step": 29953 + }, + { + "epoch": 0.8794996770215515, + "grad_norm": 0.0, + "learning_rate": 7.519519699927713e-07, + "loss": 1.1475, + "step": 29954 + }, + { + "epoch": 0.8795290386986905, + "grad_norm": 0.0, + "learning_rate": 7.515902265960795e-07, + "loss": 1.1768, + "step": 29955 + }, + { + "epoch": 0.8795584003758294, + "grad_norm": 0.0, + "learning_rate": 7.51228566835126e-07, + "loss": 1.1396, + "step": 29956 + }, + { + "epoch": 0.8795877620529685, + "grad_norm": 0.0, + "learning_rate": 7.508669907131827e-07, + "loss": 1.1284, + "step": 29957 + }, + { + "epoch": 0.8796171237301075, + "grad_norm": 0.0, + "learning_rate": 7.505054982335169e-07, + "loss": 1.1885, + "step": 29958 + }, + { + "epoch": 0.8796464854072464, + "grad_norm": 0.0, + "learning_rate": 7.501440893993972e-07, + "loss": 1.292, + "step": 29959 + }, + { + "epoch": 0.8796758470843855, + "grad_norm": 0.0, + "learning_rate": 7.497827642140954e-07, + "loss": 1.0869, + "step": 29960 + }, + { + "epoch": 0.8797052087615245, + "grad_norm": 0.0, + "learning_rate": 7.494215226808754e-07, + "loss": 1.1484, + "step": 29961 + }, + { + "epoch": 0.8797345704386634, + "grad_norm": 0.0, + "learning_rate": 7.49060364803007e-07, + "loss": 1.2588, + "step": 29962 + }, + { + "epoch": 0.8797639321158025, + "grad_norm": 0.0, + "learning_rate": 7.486992905837553e-07, + "loss": 1.1548, + "step": 29963 + }, + { + "epoch": 0.8797932937929415, + "grad_norm": 0.0, + "learning_rate": 7.483383000263833e-07, + "loss": 1.2021, + "step": 29964 + }, + { + "epoch": 0.8798226554700804, + "grad_norm": 0.0, + "learning_rate": 7.479773931341593e-07, + "loss": 1.21, + "step": 29965 + }, + { + "epoch": 0.8798520171472195, + "grad_norm": 0.0, + "learning_rate": 7.476165699103443e-07, + "loss": 1.1382, + "step": 29966 + }, + { + "epoch": 0.8798813788243585, + "grad_norm": 0.0, + "learning_rate": 7.472558303582033e-07, + "loss": 1.2031, + "step": 29967 + }, + { + "epoch": 0.8799107405014974, + "grad_norm": 0.0, + "learning_rate": 7.46895174480996e-07, + "loss": 1.2939, + "step": 29968 + }, + { + "epoch": 0.8799401021786365, + "grad_norm": 0.0, + "learning_rate": 7.465346022819864e-07, + "loss": 1.2061, + "step": 29969 + }, + { + "epoch": 0.8799694638557755, + "grad_norm": 0.0, + "learning_rate": 7.461741137644329e-07, + "loss": 1.2119, + "step": 29970 + }, + { + "epoch": 0.8799988255329144, + "grad_norm": 0.0, + "learning_rate": 7.458137089315975e-07, + "loss": 1.1777, + "step": 29971 + }, + { + "epoch": 0.8800281872100535, + "grad_norm": 0.0, + "learning_rate": 7.454533877867387e-07, + "loss": 1.1719, + "step": 29972 + }, + { + "epoch": 0.8800575488871925, + "grad_norm": 0.0, + "learning_rate": 7.45093150333116e-07, + "loss": 1.1577, + "step": 29973 + }, + { + "epoch": 0.8800869105643314, + "grad_norm": 0.0, + "learning_rate": 7.447329965739858e-07, + "loss": 1.2646, + "step": 29974 + }, + { + "epoch": 0.8801162722414705, + "grad_norm": 0.0, + "learning_rate": 7.443729265126053e-07, + "loss": 1.1162, + "step": 29975 + }, + { + "epoch": 0.8801456339186094, + "grad_norm": 0.0, + "learning_rate": 7.440129401522311e-07, + "loss": 1.2578, + "step": 29976 + }, + { + "epoch": 0.8801749955957484, + "grad_norm": 0.0, + "learning_rate": 7.43653037496117e-07, + "loss": 1.1885, + "step": 29977 + }, + { + "epoch": 0.8802043572728874, + "grad_norm": 0.0, + "learning_rate": 7.432932185475205e-07, + "loss": 1.1724, + "step": 29978 + }, + { + "epoch": 0.8802337189500264, + "grad_norm": 0.0, + "learning_rate": 7.429334833096924e-07, + "loss": 1.3232, + "step": 29979 + }, + { + "epoch": 0.8802630806271654, + "grad_norm": 0.0, + "learning_rate": 7.425738317858899e-07, + "loss": 1.3076, + "step": 29980 + }, + { + "epoch": 0.8802924423043044, + "grad_norm": 0.0, + "learning_rate": 7.422142639793606e-07, + "loss": 1.188, + "step": 29981 + }, + { + "epoch": 0.8803218039814434, + "grad_norm": 0.0, + "learning_rate": 7.418547798933628e-07, + "loss": 1.2314, + "step": 29982 + }, + { + "epoch": 0.8803511656585824, + "grad_norm": 0.0, + "learning_rate": 7.414953795311408e-07, + "loss": 1.2227, + "step": 29983 + }, + { + "epoch": 0.8803805273357214, + "grad_norm": 0.0, + "learning_rate": 7.411360628959463e-07, + "loss": 1.2148, + "step": 29984 + }, + { + "epoch": 0.8804098890128604, + "grad_norm": 0.0, + "learning_rate": 7.407768299910301e-07, + "loss": 1.1338, + "step": 29985 + }, + { + "epoch": 0.8804392506899994, + "grad_norm": 0.0, + "learning_rate": 7.404176808196395e-07, + "loss": 1.2217, + "step": 29986 + }, + { + "epoch": 0.8804686123671384, + "grad_norm": 0.0, + "learning_rate": 7.400586153850253e-07, + "loss": 1.2529, + "step": 29987 + }, + { + "epoch": 0.8804979740442774, + "grad_norm": 0.0, + "learning_rate": 7.396996336904306e-07, + "loss": 1.292, + "step": 29988 + }, + { + "epoch": 0.8805273357214164, + "grad_norm": 0.0, + "learning_rate": 7.393407357391047e-07, + "loss": 1.1831, + "step": 29989 + }, + { + "epoch": 0.8805566973985554, + "grad_norm": 0.0, + "learning_rate": 7.38981921534293e-07, + "loss": 1.1406, + "step": 29990 + }, + { + "epoch": 0.8805860590756944, + "grad_norm": 0.0, + "learning_rate": 7.386231910792397e-07, + "loss": 1.2124, + "step": 29991 + }, + { + "epoch": 0.8806154207528334, + "grad_norm": 0.0, + "learning_rate": 7.382645443771863e-07, + "loss": 1.1279, + "step": 29992 + }, + { + "epoch": 0.8806447824299724, + "grad_norm": 0.0, + "learning_rate": 7.379059814313805e-07, + "loss": 1.2129, + "step": 29993 + }, + { + "epoch": 0.8806741441071114, + "grad_norm": 0.0, + "learning_rate": 7.375475022450629e-07, + "loss": 1.2061, + "step": 29994 + }, + { + "epoch": 0.8807035057842504, + "grad_norm": 0.0, + "learning_rate": 7.371891068214742e-07, + "loss": 1.2422, + "step": 29995 + }, + { + "epoch": 0.8807328674613893, + "grad_norm": 0.0, + "learning_rate": 7.368307951638576e-07, + "loss": 1.2168, + "step": 29996 + }, + { + "epoch": 0.8807622291385284, + "grad_norm": 0.0, + "learning_rate": 7.364725672754514e-07, + "loss": 1.2041, + "step": 29997 + }, + { + "epoch": 0.8807915908156674, + "grad_norm": 0.0, + "learning_rate": 7.361144231594974e-07, + "loss": 1.2188, + "step": 29998 + }, + { + "epoch": 0.8808209524928063, + "grad_norm": 0.0, + "learning_rate": 7.357563628192344e-07, + "loss": 1.3691, + "step": 29999 + }, + { + "epoch": 0.8808503141699454, + "grad_norm": 0.0, + "learning_rate": 7.353983862578972e-07, + "loss": 1.1533, + "step": 30000 + }, + { + "epoch": 0.8808796758470844, + "grad_norm": 0.0, + "learning_rate": 7.350404934787248e-07, + "loss": 1.1592, + "step": 30001 + }, + { + "epoch": 0.8809090375242233, + "grad_norm": 0.0, + "learning_rate": 7.346826844849553e-07, + "loss": 1.1748, + "step": 30002 + }, + { + "epoch": 0.8809383992013624, + "grad_norm": 0.0, + "learning_rate": 7.34324959279824e-07, + "loss": 1.0786, + "step": 30003 + }, + { + "epoch": 0.8809677608785014, + "grad_norm": 0.0, + "learning_rate": 7.339673178665618e-07, + "loss": 1.1475, + "step": 30004 + }, + { + "epoch": 0.8809971225556403, + "grad_norm": 0.0, + "learning_rate": 7.336097602484093e-07, + "loss": 1.0146, + "step": 30005 + }, + { + "epoch": 0.8810264842327794, + "grad_norm": 0.0, + "learning_rate": 7.332522864285941e-07, + "loss": 1.3145, + "step": 30006 + }, + { + "epoch": 0.8810558459099184, + "grad_norm": 0.0, + "learning_rate": 7.328948964103555e-07, + "loss": 1.0767, + "step": 30007 + }, + { + "epoch": 0.8810852075870573, + "grad_norm": 0.0, + "learning_rate": 7.325375901969189e-07, + "loss": 1.2583, + "step": 30008 + }, + { + "epoch": 0.8811145692641964, + "grad_norm": 0.0, + "learning_rate": 7.321803677915196e-07, + "loss": 0.9824, + "step": 30009 + }, + { + "epoch": 0.8811439309413354, + "grad_norm": 0.0, + "learning_rate": 7.318232291973848e-07, + "loss": 1.2554, + "step": 30010 + }, + { + "epoch": 0.8811732926184743, + "grad_norm": 0.0, + "learning_rate": 7.314661744177487e-07, + "loss": 1.2002, + "step": 30011 + }, + { + "epoch": 0.8812026542956134, + "grad_norm": 0.0, + "learning_rate": 7.311092034558353e-07, + "loss": 1.1396, + "step": 30012 + }, + { + "epoch": 0.8812320159727524, + "grad_norm": 0.0, + "learning_rate": 7.307523163148766e-07, + "loss": 1.1353, + "step": 30013 + }, + { + "epoch": 0.8812613776498913, + "grad_norm": 0.0, + "learning_rate": 7.303955129980989e-07, + "loss": 1.292, + "step": 30014 + }, + { + "epoch": 0.8812907393270304, + "grad_norm": 0.0, + "learning_rate": 7.300387935087283e-07, + "loss": 1.1802, + "step": 30015 + }, + { + "epoch": 0.8813201010041694, + "grad_norm": 0.0, + "learning_rate": 7.296821578499913e-07, + "loss": 1.2461, + "step": 30016 + }, + { + "epoch": 0.8813494626813083, + "grad_norm": 0.0, + "learning_rate": 7.293256060251108e-07, + "loss": 1.1807, + "step": 30017 + }, + { + "epoch": 0.8813788243584474, + "grad_norm": 0.0, + "learning_rate": 7.289691380373154e-07, + "loss": 1.1553, + "step": 30018 + }, + { + "epoch": 0.8814081860355863, + "grad_norm": 0.0, + "learning_rate": 7.286127538898246e-07, + "loss": 1.1729, + "step": 30019 + }, + { + "epoch": 0.8814375477127253, + "grad_norm": 0.0, + "learning_rate": 7.282564535858639e-07, + "loss": 1.2656, + "step": 30020 + }, + { + "epoch": 0.8814669093898644, + "grad_norm": 0.0, + "learning_rate": 7.279002371286537e-07, + "loss": 1.3291, + "step": 30021 + }, + { + "epoch": 0.8814962710670033, + "grad_norm": 0.0, + "learning_rate": 7.275441045214183e-07, + "loss": 1.2935, + "step": 30022 + }, + { + "epoch": 0.8815256327441423, + "grad_norm": 0.0, + "learning_rate": 7.27188055767375e-07, + "loss": 1.335, + "step": 30023 + }, + { + "epoch": 0.8815549944212814, + "grad_norm": 0.0, + "learning_rate": 7.268320908697457e-07, + "loss": 1.1782, + "step": 30024 + }, + { + "epoch": 0.8815843560984203, + "grad_norm": 0.0, + "learning_rate": 7.26476209831749e-07, + "loss": 1.2959, + "step": 30025 + }, + { + "epoch": 0.8816137177755593, + "grad_norm": 0.0, + "learning_rate": 7.261204126566013e-07, + "loss": 1.1992, + "step": 30026 + }, + { + "epoch": 0.8816430794526984, + "grad_norm": 0.0, + "learning_rate": 7.257646993475232e-07, + "loss": 1.2217, + "step": 30027 + }, + { + "epoch": 0.8816724411298373, + "grad_norm": 0.0, + "learning_rate": 7.254090699077277e-07, + "loss": 1.2617, + "step": 30028 + }, + { + "epoch": 0.8817018028069763, + "grad_norm": 0.0, + "learning_rate": 7.250535243404355e-07, + "loss": 1.0547, + "step": 30029 + }, + { + "epoch": 0.8817311644841154, + "grad_norm": 0.0, + "learning_rate": 7.246980626488576e-07, + "loss": 1.2344, + "step": 30030 + }, + { + "epoch": 0.8817605261612543, + "grad_norm": 0.0, + "learning_rate": 7.243426848362145e-07, + "loss": 1.1924, + "step": 30031 + }, + { + "epoch": 0.8817898878383933, + "grad_norm": 0.0, + "learning_rate": 7.239873909057138e-07, + "loss": 1.2666, + "step": 30032 + }, + { + "epoch": 0.8818192495155324, + "grad_norm": 0.0, + "learning_rate": 7.236321808605695e-07, + "loss": 1.1553, + "step": 30033 + }, + { + "epoch": 0.8818486111926713, + "grad_norm": 0.0, + "learning_rate": 7.232770547039969e-07, + "loss": 1.1475, + "step": 30034 + }, + { + "epoch": 0.8818779728698103, + "grad_norm": 0.0, + "learning_rate": 7.229220124392045e-07, + "loss": 1.1719, + "step": 30035 + }, + { + "epoch": 0.8819073345469494, + "grad_norm": 0.0, + "learning_rate": 7.225670540694052e-07, + "loss": 1.2793, + "step": 30036 + }, + { + "epoch": 0.8819366962240883, + "grad_norm": 0.0, + "learning_rate": 7.222121795978054e-07, + "loss": 1.0742, + "step": 30037 + }, + { + "epoch": 0.8819660579012273, + "grad_norm": 0.0, + "learning_rate": 7.218573890276204e-07, + "loss": 1.2461, + "step": 30038 + }, + { + "epoch": 0.8819954195783664, + "grad_norm": 0.0, + "learning_rate": 7.215026823620542e-07, + "loss": 1.248, + "step": 30039 + }, + { + "epoch": 0.8820247812555053, + "grad_norm": 0.0, + "learning_rate": 7.211480596043153e-07, + "loss": 1.1001, + "step": 30040 + }, + { + "epoch": 0.8820541429326443, + "grad_norm": 0.0, + "learning_rate": 7.2079352075761e-07, + "loss": 1.3467, + "step": 30041 + }, + { + "epoch": 0.8820835046097834, + "grad_norm": 0.0, + "learning_rate": 7.204390658251459e-07, + "loss": 1.1514, + "step": 30042 + }, + { + "epoch": 0.8821128662869223, + "grad_norm": 0.0, + "learning_rate": 7.200846948101293e-07, + "loss": 1.0684, + "step": 30043 + }, + { + "epoch": 0.8821422279640613, + "grad_norm": 0.0, + "learning_rate": 7.197304077157608e-07, + "loss": 1.1582, + "step": 30044 + }, + { + "epoch": 0.8821715896412003, + "grad_norm": 0.0, + "learning_rate": 7.193762045452479e-07, + "loss": 1.25, + "step": 30045 + }, + { + "epoch": 0.8822009513183393, + "grad_norm": 0.0, + "learning_rate": 7.190220853017926e-07, + "loss": 1.373, + "step": 30046 + }, + { + "epoch": 0.8822303129954783, + "grad_norm": 0.0, + "learning_rate": 7.186680499885978e-07, + "loss": 1.1792, + "step": 30047 + }, + { + "epoch": 0.8822596746726173, + "grad_norm": 0.0, + "learning_rate": 7.183140986088666e-07, + "loss": 1.2285, + "step": 30048 + }, + { + "epoch": 0.8822890363497563, + "grad_norm": 0.0, + "learning_rate": 7.179602311657963e-07, + "loss": 1.2402, + "step": 30049 + }, + { + "epoch": 0.8823183980268953, + "grad_norm": 0.0, + "learning_rate": 7.176064476625877e-07, + "loss": 1.1411, + "step": 30050 + }, + { + "epoch": 0.8823477597040343, + "grad_norm": 0.0, + "learning_rate": 7.172527481024439e-07, + "loss": 1.2427, + "step": 30051 + }, + { + "epoch": 0.8823771213811733, + "grad_norm": 0.0, + "learning_rate": 7.168991324885599e-07, + "loss": 1.2158, + "step": 30052 + }, + { + "epoch": 0.8824064830583123, + "grad_norm": 0.0, + "learning_rate": 7.165456008241323e-07, + "loss": 1.3052, + "step": 30053 + }, + { + "epoch": 0.8824358447354513, + "grad_norm": 0.0, + "learning_rate": 7.161921531123628e-07, + "loss": 1.2412, + "step": 30054 + }, + { + "epoch": 0.8824652064125903, + "grad_norm": 0.0, + "learning_rate": 7.158387893564433e-07, + "loss": 1.2725, + "step": 30055 + }, + { + "epoch": 0.8824945680897293, + "grad_norm": 0.0, + "learning_rate": 7.154855095595748e-07, + "loss": 1.2002, + "step": 30056 + }, + { + "epoch": 0.8825239297668683, + "grad_norm": 0.0, + "learning_rate": 7.151323137249455e-07, + "loss": 1.2754, + "step": 30057 + }, + { + "epoch": 0.8825532914440073, + "grad_norm": 0.0, + "learning_rate": 7.147792018557543e-07, + "loss": 1.1431, + "step": 30058 + }, + { + "epoch": 0.8825826531211463, + "grad_norm": 0.0, + "learning_rate": 7.144261739551905e-07, + "loss": 1.2754, + "step": 30059 + }, + { + "epoch": 0.8826120147982853, + "grad_norm": 0.0, + "learning_rate": 7.140732300264508e-07, + "loss": 1.1797, + "step": 30060 + }, + { + "epoch": 0.8826413764754243, + "grad_norm": 0.0, + "learning_rate": 7.137203700727235e-07, + "loss": 1.2383, + "step": 30061 + }, + { + "epoch": 0.8826707381525633, + "grad_norm": 0.0, + "learning_rate": 7.13367594097203e-07, + "loss": 1.1978, + "step": 30062 + }, + { + "epoch": 0.8827000998297023, + "grad_norm": 0.0, + "learning_rate": 7.130149021030774e-07, + "loss": 1.2065, + "step": 30063 + }, + { + "epoch": 0.8827294615068413, + "grad_norm": 0.0, + "learning_rate": 7.126622940935368e-07, + "loss": 1.1758, + "step": 30064 + }, + { + "epoch": 0.8827588231839802, + "grad_norm": 0.0, + "learning_rate": 7.123097700717696e-07, + "loss": 1.2109, + "step": 30065 + }, + { + "epoch": 0.8827881848611193, + "grad_norm": 0.0, + "learning_rate": 7.119573300409621e-07, + "loss": 1.1577, + "step": 30066 + }, + { + "epoch": 0.8828175465382583, + "grad_norm": 0.0, + "learning_rate": 7.11604974004304e-07, + "loss": 1.2803, + "step": 30067 + }, + { + "epoch": 0.8828469082153972, + "grad_norm": 0.0, + "learning_rate": 7.112527019649796e-07, + "loss": 1.2207, + "step": 30068 + }, + { + "epoch": 0.8828762698925363, + "grad_norm": 0.0, + "learning_rate": 7.109005139261782e-07, + "loss": 1.1968, + "step": 30069 + }, + { + "epoch": 0.8829056315696753, + "grad_norm": 0.0, + "learning_rate": 7.105484098910808e-07, + "loss": 1.144, + "step": 30070 + }, + { + "epoch": 0.8829349932468142, + "grad_norm": 0.0, + "learning_rate": 7.101963898628739e-07, + "loss": 1.2192, + "step": 30071 + }, + { + "epoch": 0.8829643549239533, + "grad_norm": 0.0, + "learning_rate": 7.098444538447414e-07, + "loss": 1.2188, + "step": 30072 + }, + { + "epoch": 0.8829937166010923, + "grad_norm": 0.0, + "learning_rate": 7.09492601839864e-07, + "loss": 1.1943, + "step": 30073 + }, + { + "epoch": 0.8830230782782312, + "grad_norm": 0.0, + "learning_rate": 7.091408338514239e-07, + "loss": 1.2051, + "step": 30074 + }, + { + "epoch": 0.8830524399553703, + "grad_norm": 0.0, + "learning_rate": 7.087891498826016e-07, + "loss": 1.2852, + "step": 30075 + }, + { + "epoch": 0.8830818016325093, + "grad_norm": 0.0, + "learning_rate": 7.084375499365803e-07, + "loss": 1.1748, + "step": 30076 + }, + { + "epoch": 0.8831111633096482, + "grad_norm": 0.0, + "learning_rate": 7.080860340165352e-07, + "loss": 1.2598, + "step": 30077 + }, + { + "epoch": 0.8831405249867872, + "grad_norm": 0.0, + "learning_rate": 7.077346021256504e-07, + "loss": 1.1318, + "step": 30078 + }, + { + "epoch": 0.8831698866639263, + "grad_norm": 0.0, + "learning_rate": 7.073832542671e-07, + "loss": 1.1914, + "step": 30079 + }, + { + "epoch": 0.8831992483410652, + "grad_norm": 0.0, + "learning_rate": 7.070319904440637e-07, + "loss": 1.2383, + "step": 30080 + }, + { + "epoch": 0.8832286100182042, + "grad_norm": 0.0, + "learning_rate": 7.066808106597179e-07, + "loss": 1.3057, + "step": 30081 + }, + { + "epoch": 0.8832579716953433, + "grad_norm": 0.0, + "learning_rate": 7.063297149172365e-07, + "loss": 1.3086, + "step": 30082 + }, + { + "epoch": 0.8832873333724822, + "grad_norm": 0.0, + "learning_rate": 7.059787032197962e-07, + "loss": 1.3047, + "step": 30083 + }, + { + "epoch": 0.8833166950496212, + "grad_norm": 0.0, + "learning_rate": 7.056277755705687e-07, + "loss": 1.2197, + "step": 30084 + }, + { + "epoch": 0.8833460567267603, + "grad_norm": 0.0, + "learning_rate": 7.052769319727315e-07, + "loss": 1.2476, + "step": 30085 + }, + { + "epoch": 0.8833754184038992, + "grad_norm": 0.0, + "learning_rate": 7.049261724294543e-07, + "loss": 1.167, + "step": 30086 + }, + { + "epoch": 0.8834047800810382, + "grad_norm": 0.0, + "learning_rate": 7.045754969439111e-07, + "loss": 1.1777, + "step": 30087 + }, + { + "epoch": 0.8834341417581772, + "grad_norm": 0.0, + "learning_rate": 7.042249055192718e-07, + "loss": 1.1777, + "step": 30088 + }, + { + "epoch": 0.8834635034353162, + "grad_norm": 0.0, + "learning_rate": 7.038743981587082e-07, + "loss": 1.3076, + "step": 30089 + }, + { + "epoch": 0.8834928651124552, + "grad_norm": 0.0, + "learning_rate": 7.035239748653877e-07, + "loss": 1.2129, + "step": 30090 + }, + { + "epoch": 0.8835222267895942, + "grad_norm": 0.0, + "learning_rate": 7.031736356424823e-07, + "loss": 1.3057, + "step": 30091 + }, + { + "epoch": 0.8835515884667332, + "grad_norm": 0.0, + "learning_rate": 7.028233804931583e-07, + "loss": 1.106, + "step": 30092 + }, + { + "epoch": 0.8835809501438722, + "grad_norm": 0.0, + "learning_rate": 7.024732094205822e-07, + "loss": 1.291, + "step": 30093 + }, + { + "epoch": 0.8836103118210112, + "grad_norm": 0.0, + "learning_rate": 7.021231224279234e-07, + "loss": 1.249, + "step": 30094 + }, + { + "epoch": 0.8836396734981502, + "grad_norm": 0.0, + "learning_rate": 7.017731195183453e-07, + "loss": 1.3281, + "step": 30095 + }, + { + "epoch": 0.8836690351752892, + "grad_norm": 0.0, + "learning_rate": 7.01423200695015e-07, + "loss": 1.1934, + "step": 30096 + }, + { + "epoch": 0.8836983968524282, + "grad_norm": 0.0, + "learning_rate": 7.010733659610968e-07, + "loss": 1.1807, + "step": 30097 + }, + { + "epoch": 0.8837277585295672, + "grad_norm": 0.0, + "learning_rate": 7.007236153197527e-07, + "loss": 1.2046, + "step": 30098 + }, + { + "epoch": 0.8837571202067062, + "grad_norm": 0.0, + "learning_rate": 7.003739487741457e-07, + "loss": 1.249, + "step": 30099 + }, + { + "epoch": 0.8837864818838452, + "grad_norm": 0.0, + "learning_rate": 7.000243663274409e-07, + "loss": 1.1816, + "step": 30100 + }, + { + "epoch": 0.8838158435609842, + "grad_norm": 0.0, + "learning_rate": 6.996748679827969e-07, + "loss": 1.2134, + "step": 30101 + }, + { + "epoch": 0.8838452052381232, + "grad_norm": 0.0, + "learning_rate": 6.993254537433724e-07, + "loss": 1.1582, + "step": 30102 + }, + { + "epoch": 0.8838745669152622, + "grad_norm": 0.0, + "learning_rate": 6.989761236123327e-07, + "loss": 1.2148, + "step": 30103 + }, + { + "epoch": 0.8839039285924012, + "grad_norm": 0.0, + "learning_rate": 6.986268775928317e-07, + "loss": 1.3574, + "step": 30104 + }, + { + "epoch": 0.8839332902695402, + "grad_norm": 0.0, + "learning_rate": 6.982777156880316e-07, + "loss": 1.082, + "step": 30105 + }, + { + "epoch": 0.8839626519466792, + "grad_norm": 0.0, + "learning_rate": 6.979286379010885e-07, + "loss": 1.2031, + "step": 30106 + }, + { + "epoch": 0.8839920136238182, + "grad_norm": 0.0, + "learning_rate": 6.97579644235159e-07, + "loss": 1.1904, + "step": 30107 + }, + { + "epoch": 0.8840213753009571, + "grad_norm": 0.0, + "learning_rate": 6.972307346933971e-07, + "loss": 1.2383, + "step": 30108 + }, + { + "epoch": 0.8840507369780962, + "grad_norm": 0.0, + "learning_rate": 6.968819092789625e-07, + "loss": 1.1348, + "step": 30109 + }, + { + "epoch": 0.8840800986552352, + "grad_norm": 0.0, + "learning_rate": 6.965331679950049e-07, + "loss": 1.1289, + "step": 30110 + }, + { + "epoch": 0.8841094603323741, + "grad_norm": 0.0, + "learning_rate": 6.96184510844683e-07, + "loss": 1.1143, + "step": 30111 + }, + { + "epoch": 0.8841388220095132, + "grad_norm": 0.0, + "learning_rate": 6.958359378311475e-07, + "loss": 1.1602, + "step": 30112 + }, + { + "epoch": 0.8841681836866522, + "grad_norm": 0.0, + "learning_rate": 6.954874489575492e-07, + "loss": 1.1753, + "step": 30113 + }, + { + "epoch": 0.8841975453637911, + "grad_norm": 0.0, + "learning_rate": 6.951390442270423e-07, + "loss": 1.1694, + "step": 30114 + }, + { + "epoch": 0.8842269070409302, + "grad_norm": 0.0, + "learning_rate": 6.947907236427731e-07, + "loss": 1.1069, + "step": 30115 + }, + { + "epoch": 0.8842562687180692, + "grad_norm": 0.0, + "learning_rate": 6.94442487207897e-07, + "loss": 1.2354, + "step": 30116 + }, + { + "epoch": 0.8842856303952081, + "grad_norm": 0.0, + "learning_rate": 6.94094334925558e-07, + "loss": 1.1772, + "step": 30117 + }, + { + "epoch": 0.8843149920723472, + "grad_norm": 0.0, + "learning_rate": 6.937462667989103e-07, + "loss": 1.1777, + "step": 30118 + }, + { + "epoch": 0.8843443537494862, + "grad_norm": 0.0, + "learning_rate": 6.933982828310959e-07, + "loss": 1.1567, + "step": 30119 + }, + { + "epoch": 0.8843737154266251, + "grad_norm": 0.0, + "learning_rate": 6.930503830252655e-07, + "loss": 1.252, + "step": 30120 + }, + { + "epoch": 0.8844030771037642, + "grad_norm": 0.0, + "learning_rate": 6.927025673845655e-07, + "loss": 1.2046, + "step": 30121 + }, + { + "epoch": 0.8844324387809032, + "grad_norm": 0.0, + "learning_rate": 6.923548359121391e-07, + "loss": 1.207, + "step": 30122 + }, + { + "epoch": 0.8844618004580421, + "grad_norm": 0.0, + "learning_rate": 6.920071886111312e-07, + "loss": 1.3311, + "step": 30123 + }, + { + "epoch": 0.8844911621351812, + "grad_norm": 0.0, + "learning_rate": 6.916596254846842e-07, + "loss": 1.2432, + "step": 30124 + }, + { + "epoch": 0.8845205238123202, + "grad_norm": 0.0, + "learning_rate": 6.913121465359451e-07, + "loss": 1.2139, + "step": 30125 + }, + { + "epoch": 0.8845498854894591, + "grad_norm": 0.0, + "learning_rate": 6.909647517680529e-07, + "loss": 1.2549, + "step": 30126 + }, + { + "epoch": 0.8845792471665982, + "grad_norm": 0.0, + "learning_rate": 6.906174411841526e-07, + "loss": 1.335, + "step": 30127 + }, + { + "epoch": 0.8846086088437372, + "grad_norm": 0.0, + "learning_rate": 6.902702147873808e-07, + "loss": 1.1582, + "step": 30128 + }, + { + "epoch": 0.8846379705208761, + "grad_norm": 0.0, + "learning_rate": 6.899230725808814e-07, + "loss": 1.1494, + "step": 30129 + }, + { + "epoch": 0.8846673321980152, + "grad_norm": 0.0, + "learning_rate": 6.895760145677921e-07, + "loss": 1.1685, + "step": 30130 + }, + { + "epoch": 0.8846966938751541, + "grad_norm": 0.0, + "learning_rate": 6.892290407512504e-07, + "loss": 1.1494, + "step": 30131 + }, + { + "epoch": 0.8847260555522931, + "grad_norm": 0.0, + "learning_rate": 6.888821511343957e-07, + "loss": 1.1689, + "step": 30132 + }, + { + "epoch": 0.8847554172294322, + "grad_norm": 0.0, + "learning_rate": 6.885353457203637e-07, + "loss": 1.2656, + "step": 30133 + }, + { + "epoch": 0.8847847789065711, + "grad_norm": 0.0, + "learning_rate": 6.881886245122915e-07, + "loss": 1.3311, + "step": 30134 + }, + { + "epoch": 0.8848141405837101, + "grad_norm": 0.0, + "learning_rate": 6.878419875133136e-07, + "loss": 1.0889, + "step": 30135 + }, + { + "epoch": 0.8848435022608492, + "grad_norm": 0.0, + "learning_rate": 6.874954347265661e-07, + "loss": 1.29, + "step": 30136 + }, + { + "epoch": 0.8848728639379881, + "grad_norm": 0.0, + "learning_rate": 6.871489661551833e-07, + "loss": 1.126, + "step": 30137 + }, + { + "epoch": 0.8849022256151271, + "grad_norm": 0.0, + "learning_rate": 6.868025818022972e-07, + "loss": 1.2119, + "step": 30138 + }, + { + "epoch": 0.8849315872922662, + "grad_norm": 0.0, + "learning_rate": 6.864562816710385e-07, + "loss": 1.1553, + "step": 30139 + }, + { + "epoch": 0.8849609489694051, + "grad_norm": 0.0, + "learning_rate": 6.861100657645425e-07, + "loss": 1.2764, + "step": 30140 + }, + { + "epoch": 0.8849903106465441, + "grad_norm": 0.0, + "learning_rate": 6.85763934085939e-07, + "loss": 1.2422, + "step": 30141 + }, + { + "epoch": 0.8850196723236832, + "grad_norm": 0.0, + "learning_rate": 6.854178866383565e-07, + "loss": 1.29, + "step": 30142 + }, + { + "epoch": 0.8850490340008221, + "grad_norm": 0.0, + "learning_rate": 6.850719234249259e-07, + "loss": 1.3213, + "step": 30143 + }, + { + "epoch": 0.8850783956779611, + "grad_norm": 0.0, + "learning_rate": 6.847260444487747e-07, + "loss": 1.2285, + "step": 30144 + }, + { + "epoch": 0.8851077573551002, + "grad_norm": 0.0, + "learning_rate": 6.843802497130325e-07, + "loss": 1.2134, + "step": 30145 + }, + { + "epoch": 0.8851371190322391, + "grad_norm": 0.0, + "learning_rate": 6.840345392208259e-07, + "loss": 1.2021, + "step": 30146 + }, + { + "epoch": 0.8851664807093781, + "grad_norm": 0.0, + "learning_rate": 6.836889129752811e-07, + "loss": 1.252, + "step": 30147 + }, + { + "epoch": 0.8851958423865172, + "grad_norm": 0.0, + "learning_rate": 6.833433709795212e-07, + "loss": 1.1758, + "step": 30148 + }, + { + "epoch": 0.8852252040636561, + "grad_norm": 0.0, + "learning_rate": 6.82997913236676e-07, + "loss": 1.2422, + "step": 30149 + }, + { + "epoch": 0.8852545657407951, + "grad_norm": 0.0, + "learning_rate": 6.826525397498651e-07, + "loss": 1.2222, + "step": 30150 + }, + { + "epoch": 0.8852839274179342, + "grad_norm": 0.0, + "learning_rate": 6.823072505222128e-07, + "loss": 1.1592, + "step": 30151 + }, + { + "epoch": 0.8853132890950731, + "grad_norm": 0.0, + "learning_rate": 6.819620455568443e-07, + "loss": 1.25, + "step": 30152 + }, + { + "epoch": 0.8853426507722121, + "grad_norm": 0.0, + "learning_rate": 6.816169248568771e-07, + "loss": 1.2451, + "step": 30153 + }, + { + "epoch": 0.8853720124493512, + "grad_norm": 0.0, + "learning_rate": 6.812718884254365e-07, + "loss": 1.1445, + "step": 30154 + }, + { + "epoch": 0.8854013741264901, + "grad_norm": 0.0, + "learning_rate": 6.809269362656412e-07, + "loss": 1.2617, + "step": 30155 + }, + { + "epoch": 0.8854307358036291, + "grad_norm": 0.0, + "learning_rate": 6.805820683806097e-07, + "loss": 1.2373, + "step": 30156 + }, + { + "epoch": 0.8854600974807681, + "grad_norm": 0.0, + "learning_rate": 6.802372847734596e-07, + "loss": 1.2407, + "step": 30157 + }, + { + "epoch": 0.8854894591579071, + "grad_norm": 0.0, + "learning_rate": 6.798925854473115e-07, + "loss": 1.1997, + "step": 30158 + }, + { + "epoch": 0.8855188208350461, + "grad_norm": 0.0, + "learning_rate": 6.79547970405281e-07, + "loss": 1.1694, + "step": 30159 + }, + { + "epoch": 0.8855481825121851, + "grad_norm": 0.0, + "learning_rate": 6.792034396504866e-07, + "loss": 1.1523, + "step": 30160 + }, + { + "epoch": 0.8855775441893241, + "grad_norm": 0.0, + "learning_rate": 6.788589931860435e-07, + "loss": 1.1211, + "step": 30161 + }, + { + "epoch": 0.8856069058664631, + "grad_norm": 0.0, + "learning_rate": 6.785146310150648e-07, + "loss": 1.2144, + "step": 30162 + }, + { + "epoch": 0.8856362675436021, + "grad_norm": 0.0, + "learning_rate": 6.781703531406647e-07, + "loss": 1.2773, + "step": 30163 + }, + { + "epoch": 0.8856656292207411, + "grad_norm": 0.0, + "learning_rate": 6.778261595659575e-07, + "loss": 1.2139, + "step": 30164 + }, + { + "epoch": 0.8856949908978801, + "grad_norm": 0.0, + "learning_rate": 6.77482050294056e-07, + "loss": 1.2393, + "step": 30165 + }, + { + "epoch": 0.8857243525750191, + "grad_norm": 0.0, + "learning_rate": 6.771380253280712e-07, + "loss": 1.1592, + "step": 30166 + }, + { + "epoch": 0.8857537142521581, + "grad_norm": 0.0, + "learning_rate": 6.767940846711163e-07, + "loss": 1.1528, + "step": 30167 + }, + { + "epoch": 0.8857830759292971, + "grad_norm": 0.0, + "learning_rate": 6.764502283262986e-07, + "loss": 1.1763, + "step": 30168 + }, + { + "epoch": 0.8858124376064361, + "grad_norm": 0.0, + "learning_rate": 6.761064562967301e-07, + "loss": 1.1963, + "step": 30169 + }, + { + "epoch": 0.8858417992835751, + "grad_norm": 0.0, + "learning_rate": 6.757627685855195e-07, + "loss": 1.1875, + "step": 30170 + }, + { + "epoch": 0.885871160960714, + "grad_norm": 0.0, + "learning_rate": 6.754191651957742e-07, + "loss": 1.1812, + "step": 30171 + }, + { + "epoch": 0.8859005226378531, + "grad_norm": 0.0, + "learning_rate": 6.750756461306019e-07, + "loss": 1.2529, + "step": 30172 + }, + { + "epoch": 0.8859298843149921, + "grad_norm": 0.0, + "learning_rate": 6.747322113931065e-07, + "loss": 1.1284, + "step": 30173 + }, + { + "epoch": 0.885959245992131, + "grad_norm": 0.0, + "learning_rate": 6.743888609863969e-07, + "loss": 1.1816, + "step": 30174 + }, + { + "epoch": 0.8859886076692701, + "grad_norm": 0.0, + "learning_rate": 6.740455949135772e-07, + "loss": 1.3887, + "step": 30175 + }, + { + "epoch": 0.8860179693464091, + "grad_norm": 0.0, + "learning_rate": 6.737024131777514e-07, + "loss": 1.252, + "step": 30176 + }, + { + "epoch": 0.886047331023548, + "grad_norm": 0.0, + "learning_rate": 6.733593157820228e-07, + "loss": 1.2305, + "step": 30177 + }, + { + "epoch": 0.8860766927006871, + "grad_norm": 0.0, + "learning_rate": 6.730163027294956e-07, + "loss": 1.1118, + "step": 30178 + }, + { + "epoch": 0.8861060543778261, + "grad_norm": 0.0, + "learning_rate": 6.726733740232704e-07, + "loss": 1.1304, + "step": 30179 + }, + { + "epoch": 0.886135416054965, + "grad_norm": 0.0, + "learning_rate": 6.723305296664495e-07, + "loss": 1.269, + "step": 30180 + }, + { + "epoch": 0.886164777732104, + "grad_norm": 0.0, + "learning_rate": 6.719877696621324e-07, + "loss": 1.1367, + "step": 30181 + }, + { + "epoch": 0.8861941394092431, + "grad_norm": 0.0, + "learning_rate": 6.716450940134167e-07, + "loss": 1.2505, + "step": 30182 + }, + { + "epoch": 0.886223501086382, + "grad_norm": 0.0, + "learning_rate": 6.713025027234044e-07, + "loss": 1.2002, + "step": 30183 + }, + { + "epoch": 0.886252862763521, + "grad_norm": 0.0, + "learning_rate": 6.709599957951917e-07, + "loss": 1.1523, + "step": 30184 + }, + { + "epoch": 0.8862822244406601, + "grad_norm": 0.0, + "learning_rate": 6.706175732318798e-07, + "loss": 1.207, + "step": 30185 + }, + { + "epoch": 0.886311586117799, + "grad_norm": 0.0, + "learning_rate": 6.702752350365593e-07, + "loss": 1.291, + "step": 30186 + }, + { + "epoch": 0.886340947794938, + "grad_norm": 0.0, + "learning_rate": 6.699329812123334e-07, + "loss": 1.3203, + "step": 30187 + }, + { + "epoch": 0.8863703094720771, + "grad_norm": 0.0, + "learning_rate": 6.695908117622885e-07, + "loss": 1.2031, + "step": 30188 + }, + { + "epoch": 0.886399671149216, + "grad_norm": 0.0, + "learning_rate": 6.692487266895265e-07, + "loss": 1.2998, + "step": 30189 + }, + { + "epoch": 0.886429032826355, + "grad_norm": 0.0, + "learning_rate": 6.689067259971371e-07, + "loss": 1.2002, + "step": 30190 + }, + { + "epoch": 0.8864583945034941, + "grad_norm": 0.0, + "learning_rate": 6.685648096882124e-07, + "loss": 1.2734, + "step": 30191 + }, + { + "epoch": 0.886487756180633, + "grad_norm": 0.0, + "learning_rate": 6.682229777658477e-07, + "loss": 1.2305, + "step": 30192 + }, + { + "epoch": 0.886517117857772, + "grad_norm": 0.0, + "learning_rate": 6.678812302331306e-07, + "loss": 1.2158, + "step": 30193 + }, + { + "epoch": 0.8865464795349111, + "grad_norm": 0.0, + "learning_rate": 6.67539567093155e-07, + "loss": 1.2646, + "step": 30194 + }, + { + "epoch": 0.88657584121205, + "grad_norm": 0.0, + "learning_rate": 6.671979883490098e-07, + "loss": 1.1162, + "step": 30195 + }, + { + "epoch": 0.886605202889189, + "grad_norm": 0.0, + "learning_rate": 6.668564940037836e-07, + "loss": 1.2046, + "step": 30196 + }, + { + "epoch": 0.886634564566328, + "grad_norm": 0.0, + "learning_rate": 6.665150840605617e-07, + "loss": 1.2588, + "step": 30197 + }, + { + "epoch": 0.886663926243467, + "grad_norm": 0.0, + "learning_rate": 6.661737585224371e-07, + "loss": 1.1123, + "step": 30198 + }, + { + "epoch": 0.886693287920606, + "grad_norm": 0.0, + "learning_rate": 6.658325173924907e-07, + "loss": 1.2402, + "step": 30199 + }, + { + "epoch": 0.886722649597745, + "grad_norm": 0.0, + "learning_rate": 6.654913606738134e-07, + "loss": 1.29, + "step": 30200 + }, + { + "epoch": 0.886752011274884, + "grad_norm": 0.0, + "learning_rate": 6.651502883694894e-07, + "loss": 1.1919, + "step": 30201 + }, + { + "epoch": 0.886781372952023, + "grad_norm": 0.0, + "learning_rate": 6.648093004825995e-07, + "loss": 1.1138, + "step": 30202 + }, + { + "epoch": 0.886810734629162, + "grad_norm": 0.0, + "learning_rate": 6.644683970162314e-07, + "loss": 1.1963, + "step": 30203 + }, + { + "epoch": 0.886840096306301, + "grad_norm": 0.0, + "learning_rate": 6.641275779734668e-07, + "loss": 1.2554, + "step": 30204 + }, + { + "epoch": 0.88686945798344, + "grad_norm": 0.0, + "learning_rate": 6.637868433573868e-07, + "loss": 1.2715, + "step": 30205 + }, + { + "epoch": 0.886898819660579, + "grad_norm": 0.0, + "learning_rate": 6.634461931710723e-07, + "loss": 1.1504, + "step": 30206 + }, + { + "epoch": 0.886928181337718, + "grad_norm": 0.0, + "learning_rate": 6.631056274176062e-07, + "loss": 1.2432, + "step": 30207 + }, + { + "epoch": 0.886957543014857, + "grad_norm": 0.0, + "learning_rate": 6.627651461000662e-07, + "loss": 1.2495, + "step": 30208 + }, + { + "epoch": 0.886986904691996, + "grad_norm": 0.0, + "learning_rate": 6.624247492215342e-07, + "loss": 1.1123, + "step": 30209 + }, + { + "epoch": 0.887016266369135, + "grad_norm": 0.0, + "learning_rate": 6.620844367850854e-07, + "loss": 1.1948, + "step": 30210 + }, + { + "epoch": 0.887045628046274, + "grad_norm": 0.0, + "learning_rate": 6.617442087937986e-07, + "loss": 1.1855, + "step": 30211 + }, + { + "epoch": 0.887074989723413, + "grad_norm": 0.0, + "learning_rate": 6.614040652507514e-07, + "loss": 1.1118, + "step": 30212 + }, + { + "epoch": 0.887104351400552, + "grad_norm": 0.0, + "learning_rate": 6.610640061590168e-07, + "loss": 1.1758, + "step": 30213 + }, + { + "epoch": 0.887133713077691, + "grad_norm": 0.0, + "learning_rate": 6.607240315216734e-07, + "loss": 1.1382, + "step": 30214 + }, + { + "epoch": 0.88716307475483, + "grad_norm": 0.0, + "learning_rate": 6.603841413417933e-07, + "loss": 1.2764, + "step": 30215 + }, + { + "epoch": 0.887192436431969, + "grad_norm": 0.0, + "learning_rate": 6.600443356224529e-07, + "loss": 1.3955, + "step": 30216 + }, + { + "epoch": 0.887221798109108, + "grad_norm": 0.0, + "learning_rate": 6.597046143667219e-07, + "loss": 1.1299, + "step": 30217 + }, + { + "epoch": 0.887251159786247, + "grad_norm": 0.0, + "learning_rate": 6.593649775776756e-07, + "loss": 1.1494, + "step": 30218 + }, + { + "epoch": 0.887280521463386, + "grad_norm": 0.0, + "learning_rate": 6.590254252583849e-07, + "loss": 1.207, + "step": 30219 + }, + { + "epoch": 0.887309883140525, + "grad_norm": 0.0, + "learning_rate": 6.586859574119186e-07, + "loss": 1.1934, + "step": 30220 + }, + { + "epoch": 0.887339244817664, + "grad_norm": 0.0, + "learning_rate": 6.583465740413475e-07, + "loss": 1.0464, + "step": 30221 + }, + { + "epoch": 0.887368606494803, + "grad_norm": 0.0, + "learning_rate": 6.58007275149739e-07, + "loss": 1.2129, + "step": 30222 + }, + { + "epoch": 0.8873979681719419, + "grad_norm": 0.0, + "learning_rate": 6.576680607401653e-07, + "loss": 1.2319, + "step": 30223 + }, + { + "epoch": 0.887427329849081, + "grad_norm": 0.0, + "learning_rate": 6.573289308156905e-07, + "loss": 1.1143, + "step": 30224 + }, + { + "epoch": 0.88745669152622, + "grad_norm": 0.0, + "learning_rate": 6.569898853793843e-07, + "loss": 1.1167, + "step": 30225 + }, + { + "epoch": 0.8874860532033589, + "grad_norm": 0.0, + "learning_rate": 6.566509244343089e-07, + "loss": 1.25, + "step": 30226 + }, + { + "epoch": 0.887515414880498, + "grad_norm": 0.0, + "learning_rate": 6.56312047983535e-07, + "loss": 1.2793, + "step": 30227 + }, + { + "epoch": 0.887544776557637, + "grad_norm": 0.0, + "learning_rate": 6.559732560301235e-07, + "loss": 1.167, + "step": 30228 + }, + { + "epoch": 0.8875741382347759, + "grad_norm": 0.0, + "learning_rate": 6.556345485771387e-07, + "loss": 1.2109, + "step": 30229 + }, + { + "epoch": 0.887603499911915, + "grad_norm": 0.0, + "learning_rate": 6.552959256276448e-07, + "loss": 1.2305, + "step": 30230 + }, + { + "epoch": 0.887632861589054, + "grad_norm": 0.0, + "learning_rate": 6.549573871847004e-07, + "loss": 1.1826, + "step": 30231 + }, + { + "epoch": 0.8876622232661929, + "grad_norm": 0.0, + "learning_rate": 6.54618933251372e-07, + "loss": 1.2139, + "step": 30232 + }, + { + "epoch": 0.887691584943332, + "grad_norm": 0.0, + "learning_rate": 6.54280563830716e-07, + "loss": 1.0498, + "step": 30233 + }, + { + "epoch": 0.887720946620471, + "grad_norm": 0.0, + "learning_rate": 6.539422789257954e-07, + "loss": 1.1265, + "step": 30234 + }, + { + "epoch": 0.8877503082976099, + "grad_norm": 0.0, + "learning_rate": 6.536040785396669e-07, + "loss": 1.1357, + "step": 30235 + }, + { + "epoch": 0.887779669974749, + "grad_norm": 0.0, + "learning_rate": 6.532659626753946e-07, + "loss": 1.0977, + "step": 30236 + }, + { + "epoch": 0.887809031651888, + "grad_norm": 0.0, + "learning_rate": 6.529279313360282e-07, + "loss": 1.1348, + "step": 30237 + }, + { + "epoch": 0.8878383933290269, + "grad_norm": 0.0, + "learning_rate": 6.525899845246298e-07, + "loss": 1.3994, + "step": 30238 + }, + { + "epoch": 0.887867755006166, + "grad_norm": 0.0, + "learning_rate": 6.522521222442535e-07, + "loss": 1.3027, + "step": 30239 + }, + { + "epoch": 0.887897116683305, + "grad_norm": 0.0, + "learning_rate": 6.519143444979548e-07, + "loss": 1.3208, + "step": 30240 + }, + { + "epoch": 0.8879264783604439, + "grad_norm": 0.0, + "learning_rate": 6.515766512887889e-07, + "loss": 1.3271, + "step": 30241 + }, + { + "epoch": 0.887955840037583, + "grad_norm": 0.0, + "learning_rate": 6.51239042619809e-07, + "loss": 1.2314, + "step": 30242 + }, + { + "epoch": 0.887985201714722, + "grad_norm": 0.0, + "learning_rate": 6.509015184940704e-07, + "loss": 1.2075, + "step": 30243 + }, + { + "epoch": 0.8880145633918609, + "grad_norm": 0.0, + "learning_rate": 6.505640789146228e-07, + "loss": 1.1899, + "step": 30244 + }, + { + "epoch": 0.888043925069, + "grad_norm": 0.0, + "learning_rate": 6.502267238845194e-07, + "loss": 1.3247, + "step": 30245 + }, + { + "epoch": 0.888073286746139, + "grad_norm": 0.0, + "learning_rate": 6.498894534068089e-07, + "loss": 1.2275, + "step": 30246 + }, + { + "epoch": 0.8881026484232779, + "grad_norm": 0.0, + "learning_rate": 6.495522674845434e-07, + "loss": 1.2612, + "step": 30247 + }, + { + "epoch": 0.888132010100417, + "grad_norm": 0.0, + "learning_rate": 6.492151661207702e-07, + "loss": 1.2588, + "step": 30248 + }, + { + "epoch": 0.8881613717775559, + "grad_norm": 0.0, + "learning_rate": 6.488781493185414e-07, + "loss": 1.1846, + "step": 30249 + }, + { + "epoch": 0.8881907334546949, + "grad_norm": 0.0, + "learning_rate": 6.485412170809013e-07, + "loss": 1.145, + "step": 30250 + }, + { + "epoch": 0.888220095131834, + "grad_norm": 0.0, + "learning_rate": 6.482043694108964e-07, + "loss": 1.0703, + "step": 30251 + }, + { + "epoch": 0.8882494568089729, + "grad_norm": 0.0, + "learning_rate": 6.478676063115763e-07, + "loss": 1.0967, + "step": 30252 + }, + { + "epoch": 0.8882788184861119, + "grad_norm": 0.0, + "learning_rate": 6.475309277859854e-07, + "loss": 1.104, + "step": 30253 + }, + { + "epoch": 0.888308180163251, + "grad_norm": 0.0, + "learning_rate": 6.471943338371667e-07, + "loss": 1.2192, + "step": 30254 + }, + { + "epoch": 0.8883375418403899, + "grad_norm": 0.0, + "learning_rate": 6.468578244681634e-07, + "loss": 1.2432, + "step": 30255 + }, + { + "epoch": 0.8883669035175289, + "grad_norm": 0.0, + "learning_rate": 6.465213996820208e-07, + "loss": 1.0703, + "step": 30256 + }, + { + "epoch": 0.888396265194668, + "grad_norm": 0.0, + "learning_rate": 6.461850594817809e-07, + "loss": 1.1846, + "step": 30257 + }, + { + "epoch": 0.8884256268718069, + "grad_norm": 0.0, + "learning_rate": 6.458488038704847e-07, + "loss": 1.2295, + "step": 30258 + }, + { + "epoch": 0.8884549885489459, + "grad_norm": 0.0, + "learning_rate": 6.455126328511751e-07, + "loss": 1.1709, + "step": 30259 + }, + { + "epoch": 0.888484350226085, + "grad_norm": 0.0, + "learning_rate": 6.451765464268878e-07, + "loss": 1.2207, + "step": 30260 + }, + { + "epoch": 0.8885137119032239, + "grad_norm": 0.0, + "learning_rate": 6.448405446006689e-07, + "loss": 1.2451, + "step": 30261 + }, + { + "epoch": 0.8885430735803629, + "grad_norm": 0.0, + "learning_rate": 6.445046273755485e-07, + "loss": 1.2275, + "step": 30262 + }, + { + "epoch": 0.888572435257502, + "grad_norm": 0.0, + "learning_rate": 6.441687947545716e-07, + "loss": 1.1914, + "step": 30263 + }, + { + "epoch": 0.8886017969346409, + "grad_norm": 0.0, + "learning_rate": 6.438330467407694e-07, + "loss": 1.1465, + "step": 30264 + }, + { + "epoch": 0.8886311586117799, + "grad_norm": 0.0, + "learning_rate": 6.434973833371838e-07, + "loss": 1.2324, + "step": 30265 + }, + { + "epoch": 0.888660520288919, + "grad_norm": 0.0, + "learning_rate": 6.431618045468457e-07, + "loss": 1.186, + "step": 30266 + }, + { + "epoch": 0.8886898819660579, + "grad_norm": 0.0, + "learning_rate": 6.428263103727927e-07, + "loss": 1.168, + "step": 30267 + }, + { + "epoch": 0.8887192436431969, + "grad_norm": 0.0, + "learning_rate": 6.424909008180591e-07, + "loss": 1.2393, + "step": 30268 + }, + { + "epoch": 0.888748605320336, + "grad_norm": 0.0, + "learning_rate": 6.421555758856757e-07, + "loss": 1.1377, + "step": 30269 + }, + { + "epoch": 0.8887779669974749, + "grad_norm": 0.0, + "learning_rate": 6.418203355786757e-07, + "loss": 1.1885, + "step": 30270 + }, + { + "epoch": 0.8888073286746139, + "grad_norm": 0.0, + "learning_rate": 6.414851799000909e-07, + "loss": 1.293, + "step": 30271 + }, + { + "epoch": 0.888836690351753, + "grad_norm": 0.0, + "learning_rate": 6.411501088529526e-07, + "loss": 1.3408, + "step": 30272 + }, + { + "epoch": 0.8888660520288919, + "grad_norm": 0.0, + "learning_rate": 6.408151224402904e-07, + "loss": 1.1792, + "step": 30273 + }, + { + "epoch": 0.8888954137060309, + "grad_norm": 0.0, + "learning_rate": 6.40480220665134e-07, + "loss": 1.208, + "step": 30274 + }, + { + "epoch": 0.8889247753831699, + "grad_norm": 0.0, + "learning_rate": 6.40145403530511e-07, + "loss": 1.2109, + "step": 30275 + }, + { + "epoch": 0.8889541370603089, + "grad_norm": 0.0, + "learning_rate": 6.398106710394525e-07, + "loss": 1.2188, + "step": 30276 + }, + { + "epoch": 0.8889834987374479, + "grad_norm": 0.0, + "learning_rate": 6.394760231949826e-07, + "loss": 1.1426, + "step": 30277 + }, + { + "epoch": 0.8890128604145869, + "grad_norm": 0.0, + "learning_rate": 6.391414600001288e-07, + "loss": 1.3242, + "step": 30278 + }, + { + "epoch": 0.8890422220917259, + "grad_norm": 0.0, + "learning_rate": 6.388069814579157e-07, + "loss": 1.2627, + "step": 30279 + }, + { + "epoch": 0.8890715837688649, + "grad_norm": 0.0, + "learning_rate": 6.384725875713671e-07, + "loss": 1.252, + "step": 30280 + }, + { + "epoch": 0.8891009454460038, + "grad_norm": 0.0, + "learning_rate": 6.381382783435097e-07, + "loss": 1.29, + "step": 30281 + }, + { + "epoch": 0.8891303071231429, + "grad_norm": 0.0, + "learning_rate": 6.378040537773644e-07, + "loss": 1.1055, + "step": 30282 + }, + { + "epoch": 0.8891596688002819, + "grad_norm": 0.0, + "learning_rate": 6.374699138759555e-07, + "loss": 1.0815, + "step": 30283 + }, + { + "epoch": 0.8891890304774208, + "grad_norm": 0.0, + "learning_rate": 6.371358586423026e-07, + "loss": 1.0747, + "step": 30284 + }, + { + "epoch": 0.8892183921545599, + "grad_norm": 0.0, + "learning_rate": 6.368018880794303e-07, + "loss": 1.2383, + "step": 30285 + }, + { + "epoch": 0.8892477538316989, + "grad_norm": 0.0, + "learning_rate": 6.364680021903558e-07, + "loss": 1.2402, + "step": 30286 + }, + { + "epoch": 0.8892771155088378, + "grad_norm": 0.0, + "learning_rate": 6.361342009780991e-07, + "loss": 1.167, + "step": 30287 + }, + { + "epoch": 0.8893064771859769, + "grad_norm": 0.0, + "learning_rate": 6.3580048444568e-07, + "loss": 1.2539, + "step": 30288 + }, + { + "epoch": 0.8893358388631158, + "grad_norm": 0.0, + "learning_rate": 6.354668525961127e-07, + "loss": 1.0396, + "step": 30289 + }, + { + "epoch": 0.8893652005402548, + "grad_norm": 0.0, + "learning_rate": 6.351333054324194e-07, + "loss": 1.1216, + "step": 30290 + }, + { + "epoch": 0.8893945622173939, + "grad_norm": 0.0, + "learning_rate": 6.347998429576118e-07, + "loss": 1.1904, + "step": 30291 + }, + { + "epoch": 0.8894239238945328, + "grad_norm": 0.0, + "learning_rate": 6.34466465174709e-07, + "loss": 1.2695, + "step": 30292 + }, + { + "epoch": 0.8894532855716718, + "grad_norm": 0.0, + "learning_rate": 6.34133172086725e-07, + "loss": 1.1265, + "step": 30293 + }, + { + "epoch": 0.8894826472488109, + "grad_norm": 0.0, + "learning_rate": 6.33799963696673e-07, + "loss": 1.2637, + "step": 30294 + }, + { + "epoch": 0.8895120089259498, + "grad_norm": 0.0, + "learning_rate": 6.33466840007565e-07, + "loss": 1.1982, + "step": 30295 + }, + { + "epoch": 0.8895413706030888, + "grad_norm": 0.0, + "learning_rate": 6.331338010224175e-07, + "loss": 1.2031, + "step": 30296 + }, + { + "epoch": 0.8895707322802279, + "grad_norm": 0.0, + "learning_rate": 6.32800846744237e-07, + "loss": 1.2969, + "step": 30297 + }, + { + "epoch": 0.8896000939573668, + "grad_norm": 0.0, + "learning_rate": 6.3246797717604e-07, + "loss": 1.2148, + "step": 30298 + }, + { + "epoch": 0.8896294556345058, + "grad_norm": 0.0, + "learning_rate": 6.321351923208341e-07, + "loss": 1.2861, + "step": 30299 + }, + { + "epoch": 0.8896588173116449, + "grad_norm": 0.0, + "learning_rate": 6.318024921816268e-07, + "loss": 1.1685, + "step": 30300 + }, + { + "epoch": 0.8896881789887838, + "grad_norm": 0.0, + "learning_rate": 6.314698767614314e-07, + "loss": 1.2861, + "step": 30301 + }, + { + "epoch": 0.8897175406659228, + "grad_norm": 0.0, + "learning_rate": 6.311373460632519e-07, + "loss": 1.1562, + "step": 30302 + }, + { + "epoch": 0.8897469023430619, + "grad_norm": 0.0, + "learning_rate": 6.308049000900974e-07, + "loss": 1.2051, + "step": 30303 + }, + { + "epoch": 0.8897762640202008, + "grad_norm": 0.0, + "learning_rate": 6.304725388449728e-07, + "loss": 1.2383, + "step": 30304 + }, + { + "epoch": 0.8898056256973398, + "grad_norm": 0.0, + "learning_rate": 6.30140262330885e-07, + "loss": 1.2207, + "step": 30305 + }, + { + "epoch": 0.8898349873744789, + "grad_norm": 0.0, + "learning_rate": 6.298080705508381e-07, + "loss": 1.1533, + "step": 30306 + }, + { + "epoch": 0.8898643490516178, + "grad_norm": 0.0, + "learning_rate": 6.294759635078373e-07, + "loss": 1.291, + "step": 30307 + }, + { + "epoch": 0.8898937107287568, + "grad_norm": 0.0, + "learning_rate": 6.29143941204885e-07, + "loss": 1.2471, + "step": 30308 + }, + { + "epoch": 0.8899230724058959, + "grad_norm": 0.0, + "learning_rate": 6.288120036449829e-07, + "loss": 1.0439, + "step": 30309 + }, + { + "epoch": 0.8899524340830348, + "grad_norm": 0.0, + "learning_rate": 6.284801508311344e-07, + "loss": 1.1758, + "step": 30310 + }, + { + "epoch": 0.8899817957601738, + "grad_norm": 0.0, + "learning_rate": 6.281483827663414e-07, + "loss": 1.2305, + "step": 30311 + }, + { + "epoch": 0.8900111574373128, + "grad_norm": 0.0, + "learning_rate": 6.278166994536017e-07, + "loss": 1.1787, + "step": 30312 + }, + { + "epoch": 0.8900405191144518, + "grad_norm": 0.0, + "learning_rate": 6.274851008959149e-07, + "loss": 1.1899, + "step": 30313 + }, + { + "epoch": 0.8900698807915908, + "grad_norm": 0.0, + "learning_rate": 6.271535870962819e-07, + "loss": 1.2822, + "step": 30314 + }, + { + "epoch": 0.8900992424687298, + "grad_norm": 0.0, + "learning_rate": 6.268221580576983e-07, + "loss": 1.292, + "step": 30315 + }, + { + "epoch": 0.8901286041458688, + "grad_norm": 0.0, + "learning_rate": 6.264908137831638e-07, + "loss": 1.2344, + "step": 30316 + }, + { + "epoch": 0.8901579658230078, + "grad_norm": 0.0, + "learning_rate": 6.261595542756727e-07, + "loss": 1.0566, + "step": 30317 + }, + { + "epoch": 0.8901873275001468, + "grad_norm": 0.0, + "learning_rate": 6.258283795382226e-07, + "loss": 1.2061, + "step": 30318 + }, + { + "epoch": 0.8902166891772858, + "grad_norm": 0.0, + "learning_rate": 6.254972895738065e-07, + "loss": 1.1001, + "step": 30319 + }, + { + "epoch": 0.8902460508544248, + "grad_norm": 0.0, + "learning_rate": 6.251662843854178e-07, + "loss": 1.2461, + "step": 30320 + }, + { + "epoch": 0.8902754125315638, + "grad_norm": 0.0, + "learning_rate": 6.248353639760529e-07, + "loss": 1.2451, + "step": 30321 + }, + { + "epoch": 0.8903047742087028, + "grad_norm": 0.0, + "learning_rate": 6.245045283487017e-07, + "loss": 1.0522, + "step": 30322 + }, + { + "epoch": 0.8903341358858418, + "grad_norm": 0.0, + "learning_rate": 6.241737775063584e-07, + "loss": 1.1914, + "step": 30323 + }, + { + "epoch": 0.8903634975629808, + "grad_norm": 0.0, + "learning_rate": 6.238431114520105e-07, + "loss": 1.2168, + "step": 30324 + }, + { + "epoch": 0.8903928592401198, + "grad_norm": 0.0, + "learning_rate": 6.235125301886536e-07, + "loss": 1.2012, + "step": 30325 + }, + { + "epoch": 0.8904222209172588, + "grad_norm": 0.0, + "learning_rate": 6.231820337192729e-07, + "loss": 1.1792, + "step": 30326 + }, + { + "epoch": 0.8904515825943978, + "grad_norm": 0.0, + "learning_rate": 6.228516220468594e-07, + "loss": 1.3379, + "step": 30327 + }, + { + "epoch": 0.8904809442715368, + "grad_norm": 0.0, + "learning_rate": 6.225212951743998e-07, + "loss": 1.2056, + "step": 30328 + }, + { + "epoch": 0.8905103059486758, + "grad_norm": 0.0, + "learning_rate": 6.221910531048803e-07, + "loss": 1.3047, + "step": 30329 + }, + { + "epoch": 0.8905396676258148, + "grad_norm": 0.0, + "learning_rate": 6.218608958412897e-07, + "loss": 1.2158, + "step": 30330 + }, + { + "epoch": 0.8905690293029538, + "grad_norm": 0.0, + "learning_rate": 6.215308233866124e-07, + "loss": 1.1895, + "step": 30331 + }, + { + "epoch": 0.8905983909800927, + "grad_norm": 0.0, + "learning_rate": 6.212008357438348e-07, + "loss": 1.1396, + "step": 30332 + }, + { + "epoch": 0.8906277526572318, + "grad_norm": 0.0, + "learning_rate": 6.208709329159379e-07, + "loss": 1.2412, + "step": 30333 + }, + { + "epoch": 0.8906571143343708, + "grad_norm": 0.0, + "learning_rate": 6.205411149059092e-07, + "loss": 1.1553, + "step": 30334 + }, + { + "epoch": 0.8906864760115097, + "grad_norm": 0.0, + "learning_rate": 6.2021138171673e-07, + "loss": 1.248, + "step": 30335 + }, + { + "epoch": 0.8907158376886488, + "grad_norm": 0.0, + "learning_rate": 6.198817333513807e-07, + "loss": 1.2246, + "step": 30336 + }, + { + "epoch": 0.8907451993657878, + "grad_norm": 0.0, + "learning_rate": 6.195521698128415e-07, + "loss": 1.3066, + "step": 30337 + }, + { + "epoch": 0.8907745610429267, + "grad_norm": 0.0, + "learning_rate": 6.192226911040966e-07, + "loss": 1.2959, + "step": 30338 + }, + { + "epoch": 0.8908039227200658, + "grad_norm": 0.0, + "learning_rate": 6.188932972281225e-07, + "loss": 1.2344, + "step": 30339 + }, + { + "epoch": 0.8908332843972048, + "grad_norm": 0.0, + "learning_rate": 6.18563988187898e-07, + "loss": 1.2051, + "step": 30340 + }, + { + "epoch": 0.8908626460743437, + "grad_norm": 0.0, + "learning_rate": 6.182347639864039e-07, + "loss": 1.21, + "step": 30341 + }, + { + "epoch": 0.8908920077514828, + "grad_norm": 0.0, + "learning_rate": 6.179056246266147e-07, + "loss": 1.1953, + "step": 30342 + }, + { + "epoch": 0.8909213694286218, + "grad_norm": 0.0, + "learning_rate": 6.175765701115077e-07, + "loss": 1.2188, + "step": 30343 + }, + { + "epoch": 0.8909507311057607, + "grad_norm": 0.0, + "learning_rate": 6.172476004440575e-07, + "loss": 1.3242, + "step": 30344 + }, + { + "epoch": 0.8909800927828998, + "grad_norm": 0.0, + "learning_rate": 6.169187156272415e-07, + "loss": 1.2861, + "step": 30345 + }, + { + "epoch": 0.8910094544600388, + "grad_norm": 0.0, + "learning_rate": 6.16589915664031e-07, + "loss": 1.1748, + "step": 30346 + }, + { + "epoch": 0.8910388161371777, + "grad_norm": 0.0, + "learning_rate": 6.162612005574032e-07, + "loss": 1.3545, + "step": 30347 + }, + { + "epoch": 0.8910681778143168, + "grad_norm": 0.0, + "learning_rate": 6.159325703103281e-07, + "loss": 0.9995, + "step": 30348 + }, + { + "epoch": 0.8910975394914558, + "grad_norm": 0.0, + "learning_rate": 6.156040249257767e-07, + "loss": 1.0537, + "step": 30349 + }, + { + "epoch": 0.8911269011685947, + "grad_norm": 0.0, + "learning_rate": 6.152755644067221e-07, + "loss": 1.1602, + "step": 30350 + }, + { + "epoch": 0.8911562628457338, + "grad_norm": 0.0, + "learning_rate": 6.149471887561353e-07, + "loss": 1.1992, + "step": 30351 + }, + { + "epoch": 0.8911856245228728, + "grad_norm": 0.0, + "learning_rate": 6.146188979769841e-07, + "loss": 1.1309, + "step": 30352 + }, + { + "epoch": 0.8912149862000117, + "grad_norm": 0.0, + "learning_rate": 6.142906920722358e-07, + "loss": 1.002, + "step": 30353 + }, + { + "epoch": 0.8912443478771508, + "grad_norm": 0.0, + "learning_rate": 6.139625710448627e-07, + "loss": 1.1963, + "step": 30354 + }, + { + "epoch": 0.8912737095542898, + "grad_norm": 0.0, + "learning_rate": 6.136345348978279e-07, + "loss": 1.2354, + "step": 30355 + }, + { + "epoch": 0.8913030712314287, + "grad_norm": 0.0, + "learning_rate": 6.133065836341024e-07, + "loss": 1.2271, + "step": 30356 + }, + { + "epoch": 0.8913324329085678, + "grad_norm": 0.0, + "learning_rate": 6.129787172566481e-07, + "loss": 1.1426, + "step": 30357 + }, + { + "epoch": 0.8913617945857067, + "grad_norm": 0.0, + "learning_rate": 6.126509357684307e-07, + "loss": 1.146, + "step": 30358 + }, + { + "epoch": 0.8913911562628457, + "grad_norm": 0.0, + "learning_rate": 6.123232391724166e-07, + "loss": 1.1411, + "step": 30359 + }, + { + "epoch": 0.8914205179399848, + "grad_norm": 0.0, + "learning_rate": 6.119956274715666e-07, + "loss": 1.2158, + "step": 30360 + }, + { + "epoch": 0.8914498796171237, + "grad_norm": 0.0, + "learning_rate": 6.116681006688452e-07, + "loss": 1.2637, + "step": 30361 + }, + { + "epoch": 0.8914792412942627, + "grad_norm": 0.0, + "learning_rate": 6.113406587672121e-07, + "loss": 1.2021, + "step": 30362 + }, + { + "epoch": 0.8915086029714018, + "grad_norm": 0.0, + "learning_rate": 6.110133017696318e-07, + "loss": 1.3379, + "step": 30363 + }, + { + "epoch": 0.8915379646485407, + "grad_norm": 0.0, + "learning_rate": 6.106860296790607e-07, + "loss": 1.3047, + "step": 30364 + }, + { + "epoch": 0.8915673263256797, + "grad_norm": 0.0, + "learning_rate": 6.103588424984618e-07, + "loss": 1.2271, + "step": 30365 + }, + { + "epoch": 0.8915966880028188, + "grad_norm": 0.0, + "learning_rate": 6.100317402307909e-07, + "loss": 1.0684, + "step": 30366 + }, + { + "epoch": 0.8916260496799577, + "grad_norm": 0.0, + "learning_rate": 6.097047228790121e-07, + "loss": 1.2124, + "step": 30367 + }, + { + "epoch": 0.8916554113570967, + "grad_norm": 0.0, + "learning_rate": 6.093777904460751e-07, + "loss": 1.1592, + "step": 30368 + }, + { + "epoch": 0.8916847730342358, + "grad_norm": 0.0, + "learning_rate": 6.090509429349389e-07, + "loss": 1.1416, + "step": 30369 + }, + { + "epoch": 0.8917141347113747, + "grad_norm": 0.0, + "learning_rate": 6.087241803485621e-07, + "loss": 1.2588, + "step": 30370 + }, + { + "epoch": 0.8917434963885137, + "grad_norm": 0.0, + "learning_rate": 6.083975026898959e-07, + "loss": 1.2065, + "step": 30371 + }, + { + "epoch": 0.8917728580656528, + "grad_norm": 0.0, + "learning_rate": 6.080709099618976e-07, + "loss": 1.2568, + "step": 30372 + }, + { + "epoch": 0.8918022197427917, + "grad_norm": 0.0, + "learning_rate": 6.077444021675183e-07, + "loss": 1.1909, + "step": 30373 + }, + { + "epoch": 0.8918315814199307, + "grad_norm": 0.0, + "learning_rate": 6.074179793097135e-07, + "loss": 1.0815, + "step": 30374 + }, + { + "epoch": 0.8918609430970698, + "grad_norm": 0.0, + "learning_rate": 6.070916413914329e-07, + "loss": 1.1753, + "step": 30375 + }, + { + "epoch": 0.8918903047742087, + "grad_norm": 0.0, + "learning_rate": 6.067653884156277e-07, + "loss": 1.2461, + "step": 30376 + }, + { + "epoch": 0.8919196664513477, + "grad_norm": 0.0, + "learning_rate": 6.064392203852499e-07, + "loss": 1.2998, + "step": 30377 + }, + { + "epoch": 0.8919490281284868, + "grad_norm": 0.0, + "learning_rate": 6.061131373032447e-07, + "loss": 1.1069, + "step": 30378 + }, + { + "epoch": 0.8919783898056257, + "grad_norm": 0.0, + "learning_rate": 6.057871391725678e-07, + "loss": 1.2617, + "step": 30379 + }, + { + "epoch": 0.8920077514827647, + "grad_norm": 0.0, + "learning_rate": 6.054612259961612e-07, + "loss": 1.231, + "step": 30380 + }, + { + "epoch": 0.8920371131599036, + "grad_norm": 0.0, + "learning_rate": 6.051353977769758e-07, + "loss": 1.2578, + "step": 30381 + }, + { + "epoch": 0.8920664748370427, + "grad_norm": 0.0, + "learning_rate": 6.048096545179561e-07, + "loss": 1.1357, + "step": 30382 + }, + { + "epoch": 0.8920958365141817, + "grad_norm": 0.0, + "learning_rate": 6.044839962220494e-07, + "loss": 1.2124, + "step": 30383 + }, + { + "epoch": 0.8921251981913206, + "grad_norm": 0.0, + "learning_rate": 6.041584228922015e-07, + "loss": 1.144, + "step": 30384 + }, + { + "epoch": 0.8921545598684597, + "grad_norm": 0.0, + "learning_rate": 6.038329345313542e-07, + "loss": 1.0645, + "step": 30385 + }, + { + "epoch": 0.8921839215455987, + "grad_norm": 0.0, + "learning_rate": 6.03507531142451e-07, + "loss": 1.1152, + "step": 30386 + }, + { + "epoch": 0.8922132832227376, + "grad_norm": 0.0, + "learning_rate": 6.031822127284381e-07, + "loss": 1.0781, + "step": 30387 + }, + { + "epoch": 0.8922426448998767, + "grad_norm": 0.0, + "learning_rate": 6.028569792922534e-07, + "loss": 1.1792, + "step": 30388 + }, + { + "epoch": 0.8922720065770157, + "grad_norm": 0.0, + "learning_rate": 6.025318308368389e-07, + "loss": 1.1343, + "step": 30389 + }, + { + "epoch": 0.8923013682541546, + "grad_norm": 0.0, + "learning_rate": 6.022067673651377e-07, + "loss": 1.1328, + "step": 30390 + }, + { + "epoch": 0.8923307299312937, + "grad_norm": 0.0, + "learning_rate": 6.018817888800855e-07, + "loss": 1.167, + "step": 30391 + }, + { + "epoch": 0.8923600916084327, + "grad_norm": 0.0, + "learning_rate": 6.015568953846262e-07, + "loss": 1.1895, + "step": 30392 + }, + { + "epoch": 0.8923894532855716, + "grad_norm": 0.0, + "learning_rate": 6.012320868816924e-07, + "loss": 1.1831, + "step": 30393 + }, + { + "epoch": 0.8924188149627107, + "grad_norm": 0.0, + "learning_rate": 6.009073633742246e-07, + "loss": 1.207, + "step": 30394 + }, + { + "epoch": 0.8924481766398497, + "grad_norm": 0.0, + "learning_rate": 6.005827248651575e-07, + "loss": 1.0977, + "step": 30395 + }, + { + "epoch": 0.8924775383169886, + "grad_norm": 0.0, + "learning_rate": 6.002581713574296e-07, + "loss": 1.248, + "step": 30396 + }, + { + "epoch": 0.8925068999941277, + "grad_norm": 0.0, + "learning_rate": 5.999337028539742e-07, + "loss": 1.2783, + "step": 30397 + }, + { + "epoch": 0.8925362616712667, + "grad_norm": 0.0, + "learning_rate": 5.996093193577246e-07, + "loss": 1.1938, + "step": 30398 + }, + { + "epoch": 0.8925656233484056, + "grad_norm": 0.0, + "learning_rate": 5.992850208716172e-07, + "loss": 1.209, + "step": 30399 + }, + { + "epoch": 0.8925949850255447, + "grad_norm": 0.0, + "learning_rate": 5.98960807398582e-07, + "loss": 1.0938, + "step": 30400 + }, + { + "epoch": 0.8926243467026836, + "grad_norm": 0.0, + "learning_rate": 5.986366789415521e-07, + "loss": 1.2598, + "step": 30401 + }, + { + "epoch": 0.8926537083798226, + "grad_norm": 0.0, + "learning_rate": 5.983126355034574e-07, + "loss": 1.2568, + "step": 30402 + }, + { + "epoch": 0.8926830700569617, + "grad_norm": 0.0, + "learning_rate": 5.9798867708723e-07, + "loss": 1.2134, + "step": 30403 + }, + { + "epoch": 0.8927124317341006, + "grad_norm": 0.0, + "learning_rate": 5.976648036957988e-07, + "loss": 1.1479, + "step": 30404 + }, + { + "epoch": 0.8927417934112396, + "grad_norm": 0.0, + "learning_rate": 5.973410153320936e-07, + "loss": 1.251, + "step": 30405 + }, + { + "epoch": 0.8927711550883787, + "grad_norm": 0.0, + "learning_rate": 5.970173119990408e-07, + "loss": 1.1313, + "step": 30406 + }, + { + "epoch": 0.8928005167655176, + "grad_norm": 0.0, + "learning_rate": 5.966936936995683e-07, + "loss": 1.1577, + "step": 30407 + }, + { + "epoch": 0.8928298784426566, + "grad_norm": 0.0, + "learning_rate": 5.963701604366034e-07, + "loss": 1.2285, + "step": 30408 + }, + { + "epoch": 0.8928592401197957, + "grad_norm": 0.0, + "learning_rate": 5.96046712213072e-07, + "loss": 1.1108, + "step": 30409 + }, + { + "epoch": 0.8928886017969346, + "grad_norm": 0.0, + "learning_rate": 5.957233490318981e-07, + "loss": 1.2324, + "step": 30410 + }, + { + "epoch": 0.8929179634740736, + "grad_norm": 0.0, + "learning_rate": 5.954000708960062e-07, + "loss": 1.2666, + "step": 30411 + }, + { + "epoch": 0.8929473251512127, + "grad_norm": 0.0, + "learning_rate": 5.950768778083205e-07, + "loss": 1.2402, + "step": 30412 + }, + { + "epoch": 0.8929766868283516, + "grad_norm": 0.0, + "learning_rate": 5.94753769771761e-07, + "loss": 1.2236, + "step": 30413 + }, + { + "epoch": 0.8930060485054906, + "grad_norm": 0.0, + "learning_rate": 5.944307467892552e-07, + "loss": 1.2612, + "step": 30414 + }, + { + "epoch": 0.8930354101826297, + "grad_norm": 0.0, + "learning_rate": 5.941078088637176e-07, + "loss": 1.2881, + "step": 30415 + }, + { + "epoch": 0.8930647718597686, + "grad_norm": 0.0, + "learning_rate": 5.937849559980757e-07, + "loss": 1.1865, + "step": 30416 + }, + { + "epoch": 0.8930941335369076, + "grad_norm": 0.0, + "learning_rate": 5.93462188195244e-07, + "loss": 1.1978, + "step": 30417 + }, + { + "epoch": 0.8931234952140467, + "grad_norm": 0.0, + "learning_rate": 5.931395054581424e-07, + "loss": 1.2354, + "step": 30418 + }, + { + "epoch": 0.8931528568911856, + "grad_norm": 0.0, + "learning_rate": 5.928169077896894e-07, + "loss": 1.1875, + "step": 30419 + }, + { + "epoch": 0.8931822185683246, + "grad_norm": 0.0, + "learning_rate": 5.924943951928008e-07, + "loss": 1.3086, + "step": 30420 + }, + { + "epoch": 0.8932115802454637, + "grad_norm": 0.0, + "learning_rate": 5.921719676703974e-07, + "loss": 1.2168, + "step": 30421 + }, + { + "epoch": 0.8932409419226026, + "grad_norm": 0.0, + "learning_rate": 5.918496252253903e-07, + "loss": 1.2383, + "step": 30422 + }, + { + "epoch": 0.8932703035997416, + "grad_norm": 0.0, + "learning_rate": 5.91527367860697e-07, + "loss": 1.3037, + "step": 30423 + }, + { + "epoch": 0.8932996652768807, + "grad_norm": 0.0, + "learning_rate": 5.91205195579232e-07, + "loss": 1.1934, + "step": 30424 + }, + { + "epoch": 0.8933290269540196, + "grad_norm": 0.0, + "learning_rate": 5.908831083839084e-07, + "loss": 1.0586, + "step": 30425 + }, + { + "epoch": 0.8933583886311586, + "grad_norm": 0.0, + "learning_rate": 5.905611062776384e-07, + "loss": 1.2158, + "step": 30426 + }, + { + "epoch": 0.8933877503082976, + "grad_norm": 0.0, + "learning_rate": 5.902391892633319e-07, + "loss": 1.2891, + "step": 30427 + }, + { + "epoch": 0.8934171119854366, + "grad_norm": 0.0, + "learning_rate": 5.899173573439043e-07, + "loss": 1.2969, + "step": 30428 + }, + { + "epoch": 0.8934464736625756, + "grad_norm": 0.0, + "learning_rate": 5.895956105222622e-07, + "loss": 1.1055, + "step": 30429 + }, + { + "epoch": 0.8934758353397146, + "grad_norm": 0.0, + "learning_rate": 5.892739488013189e-07, + "loss": 1.2432, + "step": 30430 + }, + { + "epoch": 0.8935051970168536, + "grad_norm": 0.0, + "learning_rate": 5.889523721839796e-07, + "loss": 1.1528, + "step": 30431 + }, + { + "epoch": 0.8935345586939926, + "grad_norm": 0.0, + "learning_rate": 5.886308806731566e-07, + "loss": 1.2363, + "step": 30432 + }, + { + "epoch": 0.8935639203711316, + "grad_norm": 0.0, + "learning_rate": 5.883094742717543e-07, + "loss": 1.1055, + "step": 30433 + }, + { + "epoch": 0.8935932820482706, + "grad_norm": 0.0, + "learning_rate": 5.879881529826792e-07, + "loss": 1.2705, + "step": 30434 + }, + { + "epoch": 0.8936226437254096, + "grad_norm": 0.0, + "learning_rate": 5.876669168088367e-07, + "loss": 1.1084, + "step": 30435 + }, + { + "epoch": 0.8936520054025486, + "grad_norm": 0.0, + "learning_rate": 5.873457657531345e-07, + "loss": 1.1729, + "step": 30436 + }, + { + "epoch": 0.8936813670796876, + "grad_norm": 0.0, + "learning_rate": 5.870246998184748e-07, + "loss": 1.1826, + "step": 30437 + }, + { + "epoch": 0.8937107287568266, + "grad_norm": 0.0, + "learning_rate": 5.867037190077595e-07, + "loss": 1.1714, + "step": 30438 + }, + { + "epoch": 0.8937400904339656, + "grad_norm": 0.0, + "learning_rate": 5.863828233238955e-07, + "loss": 1.1348, + "step": 30439 + }, + { + "epoch": 0.8937694521111046, + "grad_norm": 0.0, + "learning_rate": 5.860620127697814e-07, + "loss": 1.2207, + "step": 30440 + }, + { + "epoch": 0.8937988137882436, + "grad_norm": 0.0, + "learning_rate": 5.857412873483215e-07, + "loss": 1.1284, + "step": 30441 + }, + { + "epoch": 0.8938281754653826, + "grad_norm": 0.0, + "learning_rate": 5.854206470624113e-07, + "loss": 1.2207, + "step": 30442 + }, + { + "epoch": 0.8938575371425216, + "grad_norm": 0.0, + "learning_rate": 5.851000919149552e-07, + "loss": 1.1362, + "step": 30443 + }, + { + "epoch": 0.8938868988196605, + "grad_norm": 0.0, + "learning_rate": 5.847796219088486e-07, + "loss": 1.3369, + "step": 30444 + }, + { + "epoch": 0.8939162604967996, + "grad_norm": 0.0, + "learning_rate": 5.844592370469926e-07, + "loss": 1.1475, + "step": 30445 + }, + { + "epoch": 0.8939456221739386, + "grad_norm": 0.0, + "learning_rate": 5.841389373322826e-07, + "loss": 1.2549, + "step": 30446 + }, + { + "epoch": 0.8939749838510775, + "grad_norm": 0.0, + "learning_rate": 5.838187227676151e-07, + "loss": 1.1191, + "step": 30447 + }, + { + "epoch": 0.8940043455282166, + "grad_norm": 0.0, + "learning_rate": 5.834985933558868e-07, + "loss": 1.0127, + "step": 30448 + }, + { + "epoch": 0.8940337072053556, + "grad_norm": 0.0, + "learning_rate": 5.831785490999931e-07, + "loss": 1.0718, + "step": 30449 + }, + { + "epoch": 0.8940630688824945, + "grad_norm": 0.0, + "learning_rate": 5.828585900028272e-07, + "loss": 1.2461, + "step": 30450 + }, + { + "epoch": 0.8940924305596336, + "grad_norm": 0.0, + "learning_rate": 5.825387160672813e-07, + "loss": 1.1201, + "step": 30451 + }, + { + "epoch": 0.8941217922367726, + "grad_norm": 0.0, + "learning_rate": 5.822189272962509e-07, + "loss": 1.189, + "step": 30452 + }, + { + "epoch": 0.8941511539139115, + "grad_norm": 0.0, + "learning_rate": 5.818992236926258e-07, + "loss": 1.2842, + "step": 30453 + }, + { + "epoch": 0.8941805155910506, + "grad_norm": 0.0, + "learning_rate": 5.815796052592992e-07, + "loss": 1.1401, + "step": 30454 + }, + { + "epoch": 0.8942098772681896, + "grad_norm": 0.0, + "learning_rate": 5.812600719991591e-07, + "loss": 1.3018, + "step": 30455 + }, + { + "epoch": 0.8942392389453285, + "grad_norm": 0.0, + "learning_rate": 5.809406239150972e-07, + "loss": 1.2559, + "step": 30456 + }, + { + "epoch": 0.8942686006224676, + "grad_norm": 0.0, + "learning_rate": 5.806212610100015e-07, + "loss": 1.0708, + "step": 30457 + }, + { + "epoch": 0.8942979622996066, + "grad_norm": 0.0, + "learning_rate": 5.803019832867606e-07, + "loss": 1.2754, + "step": 30458 + }, + { + "epoch": 0.8943273239767455, + "grad_norm": 0.0, + "learning_rate": 5.799827907482614e-07, + "loss": 1.3057, + "step": 30459 + }, + { + "epoch": 0.8943566856538846, + "grad_norm": 0.0, + "learning_rate": 5.79663683397389e-07, + "loss": 1.2173, + "step": 30460 + }, + { + "epoch": 0.8943860473310236, + "grad_norm": 0.0, + "learning_rate": 5.793446612370313e-07, + "loss": 1.1621, + "step": 30461 + }, + { + "epoch": 0.8944154090081625, + "grad_norm": 0.0, + "learning_rate": 5.790257242700714e-07, + "loss": 1.2871, + "step": 30462 + }, + { + "epoch": 0.8944447706853016, + "grad_norm": 0.0, + "learning_rate": 5.78706872499396e-07, + "loss": 1.2334, + "step": 30463 + }, + { + "epoch": 0.8944741323624406, + "grad_norm": 0.0, + "learning_rate": 5.783881059278851e-07, + "loss": 1.2061, + "step": 30464 + }, + { + "epoch": 0.8945034940395795, + "grad_norm": 0.0, + "learning_rate": 5.780694245584262e-07, + "loss": 1.168, + "step": 30465 + }, + { + "epoch": 0.8945328557167186, + "grad_norm": 0.0, + "learning_rate": 5.777508283938993e-07, + "loss": 1.106, + "step": 30466 + }, + { + "epoch": 0.8945622173938576, + "grad_norm": 0.0, + "learning_rate": 5.77432317437181e-07, + "loss": 1.1982, + "step": 30467 + }, + { + "epoch": 0.8945915790709965, + "grad_norm": 0.0, + "learning_rate": 5.771138916911579e-07, + "loss": 1.0386, + "step": 30468 + }, + { + "epoch": 0.8946209407481356, + "grad_norm": 0.0, + "learning_rate": 5.767955511587054e-07, + "loss": 1.0776, + "step": 30469 + }, + { + "epoch": 0.8946503024252745, + "grad_norm": 0.0, + "learning_rate": 5.764772958427056e-07, + "loss": 1.106, + "step": 30470 + }, + { + "epoch": 0.8946796641024135, + "grad_norm": 0.0, + "learning_rate": 5.761591257460331e-07, + "loss": 1.1626, + "step": 30471 + }, + { + "epoch": 0.8947090257795526, + "grad_norm": 0.0, + "learning_rate": 5.758410408715687e-07, + "loss": 1.1943, + "step": 30472 + }, + { + "epoch": 0.8947383874566915, + "grad_norm": 0.0, + "learning_rate": 5.755230412221879e-07, + "loss": 1.269, + "step": 30473 + }, + { + "epoch": 0.8947677491338305, + "grad_norm": 0.0, + "learning_rate": 5.752051268007652e-07, + "loss": 1.1758, + "step": 30474 + }, + { + "epoch": 0.8947971108109696, + "grad_norm": 0.0, + "learning_rate": 5.748872976101771e-07, + "loss": 1.167, + "step": 30475 + }, + { + "epoch": 0.8948264724881085, + "grad_norm": 0.0, + "learning_rate": 5.745695536532947e-07, + "loss": 1.1831, + "step": 30476 + }, + { + "epoch": 0.8948558341652475, + "grad_norm": 0.0, + "learning_rate": 5.742518949329956e-07, + "loss": 1.1748, + "step": 30477 + }, + { + "epoch": 0.8948851958423866, + "grad_norm": 0.0, + "learning_rate": 5.739343214521498e-07, + "loss": 1.2749, + "step": 30478 + }, + { + "epoch": 0.8949145575195255, + "grad_norm": 0.0, + "learning_rate": 5.736168332136305e-07, + "loss": 1.2588, + "step": 30479 + }, + { + "epoch": 0.8949439191966645, + "grad_norm": 0.0, + "learning_rate": 5.732994302203065e-07, + "loss": 1.1641, + "step": 30480 + }, + { + "epoch": 0.8949732808738035, + "grad_norm": 0.0, + "learning_rate": 5.729821124750523e-07, + "loss": 1.2378, + "step": 30481 + }, + { + "epoch": 0.8950026425509425, + "grad_norm": 0.0, + "learning_rate": 5.726648799807344e-07, + "loss": 1.0977, + "step": 30482 + }, + { + "epoch": 0.8950320042280815, + "grad_norm": 0.0, + "learning_rate": 5.723477327402227e-07, + "loss": 1.2363, + "step": 30483 + }, + { + "epoch": 0.8950613659052205, + "grad_norm": 0.0, + "learning_rate": 5.720306707563839e-07, + "loss": 1.2432, + "step": 30484 + }, + { + "epoch": 0.8950907275823595, + "grad_norm": 0.0, + "learning_rate": 5.717136940320866e-07, + "loss": 1.1885, + "step": 30485 + }, + { + "epoch": 0.8951200892594985, + "grad_norm": 0.0, + "learning_rate": 5.713968025701989e-07, + "loss": 1.1953, + "step": 30486 + }, + { + "epoch": 0.8951494509366374, + "grad_norm": 0.0, + "learning_rate": 5.710799963735813e-07, + "loss": 1.2588, + "step": 30487 + }, + { + "epoch": 0.8951788126137765, + "grad_norm": 0.0, + "learning_rate": 5.707632754451043e-07, + "loss": 1.2852, + "step": 30488 + }, + { + "epoch": 0.8952081742909155, + "grad_norm": 0.0, + "learning_rate": 5.704466397876285e-07, + "loss": 1.3057, + "step": 30489 + }, + { + "epoch": 0.8952375359680544, + "grad_norm": 0.0, + "learning_rate": 5.701300894040196e-07, + "loss": 1.1479, + "step": 30490 + }, + { + "epoch": 0.8952668976451935, + "grad_norm": 0.0, + "learning_rate": 5.698136242971397e-07, + "loss": 1.0977, + "step": 30491 + }, + { + "epoch": 0.8952962593223325, + "grad_norm": 0.0, + "learning_rate": 5.69497244469851e-07, + "loss": 1.2607, + "step": 30492 + }, + { + "epoch": 0.8953256209994714, + "grad_norm": 0.0, + "learning_rate": 5.691809499250111e-07, + "loss": 1.1401, + "step": 30493 + }, + { + "epoch": 0.8953549826766105, + "grad_norm": 0.0, + "learning_rate": 5.688647406654857e-07, + "loss": 1.103, + "step": 30494 + }, + { + "epoch": 0.8953843443537495, + "grad_norm": 0.0, + "learning_rate": 5.685486166941312e-07, + "loss": 1.2012, + "step": 30495 + }, + { + "epoch": 0.8954137060308884, + "grad_norm": 0.0, + "learning_rate": 5.682325780138054e-07, + "loss": 1.0947, + "step": 30496 + }, + { + "epoch": 0.8954430677080275, + "grad_norm": 0.0, + "learning_rate": 5.679166246273693e-07, + "loss": 1.2178, + "step": 30497 + }, + { + "epoch": 0.8954724293851665, + "grad_norm": 0.0, + "learning_rate": 5.676007565376795e-07, + "loss": 1.166, + "step": 30498 + }, + { + "epoch": 0.8955017910623054, + "grad_norm": 0.0, + "learning_rate": 5.672849737475916e-07, + "loss": 1.2319, + "step": 30499 + }, + { + "epoch": 0.8955311527394445, + "grad_norm": 0.0, + "learning_rate": 5.669692762599599e-07, + "loss": 1.2373, + "step": 30500 + }, + { + "epoch": 0.8955605144165835, + "grad_norm": 0.0, + "learning_rate": 5.666536640776421e-07, + "loss": 1.0288, + "step": 30501 + }, + { + "epoch": 0.8955898760937224, + "grad_norm": 0.0, + "learning_rate": 5.663381372034893e-07, + "loss": 1.3125, + "step": 30502 + }, + { + "epoch": 0.8956192377708615, + "grad_norm": 0.0, + "learning_rate": 5.660226956403591e-07, + "loss": 1.0791, + "step": 30503 + }, + { + "epoch": 0.8956485994480005, + "grad_norm": 0.0, + "learning_rate": 5.657073393910994e-07, + "loss": 1.1416, + "step": 30504 + }, + { + "epoch": 0.8956779611251394, + "grad_norm": 0.0, + "learning_rate": 5.653920684585657e-07, + "loss": 1.1953, + "step": 30505 + }, + { + "epoch": 0.8957073228022785, + "grad_norm": 0.0, + "learning_rate": 5.650768828456088e-07, + "loss": 1.104, + "step": 30506 + }, + { + "epoch": 0.8957366844794175, + "grad_norm": 0.0, + "learning_rate": 5.647617825550778e-07, + "loss": 1.3145, + "step": 30507 + }, + { + "epoch": 0.8957660461565564, + "grad_norm": 0.0, + "learning_rate": 5.644467675898213e-07, + "loss": 1.1079, + "step": 30508 + }, + { + "epoch": 0.8957954078336955, + "grad_norm": 0.0, + "learning_rate": 5.641318379526883e-07, + "loss": 1.2612, + "step": 30509 + }, + { + "epoch": 0.8958247695108345, + "grad_norm": 0.0, + "learning_rate": 5.638169936465299e-07, + "loss": 1.2271, + "step": 30510 + }, + { + "epoch": 0.8958541311879734, + "grad_norm": 0.0, + "learning_rate": 5.635022346741891e-07, + "loss": 1.2251, + "step": 30511 + }, + { + "epoch": 0.8958834928651125, + "grad_norm": 0.0, + "learning_rate": 5.631875610385152e-07, + "loss": 1.1558, + "step": 30512 + }, + { + "epoch": 0.8959128545422514, + "grad_norm": 0.0, + "learning_rate": 5.628729727423521e-07, + "loss": 1.1421, + "step": 30513 + }, + { + "epoch": 0.8959422162193904, + "grad_norm": 0.0, + "learning_rate": 5.625584697885466e-07, + "loss": 1.1226, + "step": 30514 + }, + { + "epoch": 0.8959715778965295, + "grad_norm": 0.0, + "learning_rate": 5.622440521799421e-07, + "loss": 1.248, + "step": 30515 + }, + { + "epoch": 0.8960009395736684, + "grad_norm": 0.0, + "learning_rate": 5.619297199193818e-07, + "loss": 1.1543, + "step": 30516 + }, + { + "epoch": 0.8960303012508074, + "grad_norm": 0.0, + "learning_rate": 5.61615473009709e-07, + "loss": 1.2441, + "step": 30517 + }, + { + "epoch": 0.8960596629279465, + "grad_norm": 0.0, + "learning_rate": 5.613013114537625e-07, + "loss": 1.0464, + "step": 30518 + }, + { + "epoch": 0.8960890246050854, + "grad_norm": 0.0, + "learning_rate": 5.609872352543866e-07, + "loss": 1.0923, + "step": 30519 + }, + { + "epoch": 0.8961183862822244, + "grad_norm": 0.0, + "learning_rate": 5.606732444144191e-07, + "loss": 1.1421, + "step": 30520 + }, + { + "epoch": 0.8961477479593635, + "grad_norm": 0.0, + "learning_rate": 5.603593389367023e-07, + "loss": 1.1406, + "step": 30521 + }, + { + "epoch": 0.8961771096365024, + "grad_norm": 0.0, + "learning_rate": 5.600455188240739e-07, + "loss": 1.229, + "step": 30522 + }, + { + "epoch": 0.8962064713136414, + "grad_norm": 0.0, + "learning_rate": 5.597317840793714e-07, + "loss": 1.1992, + "step": 30523 + }, + { + "epoch": 0.8962358329907805, + "grad_norm": 0.0, + "learning_rate": 5.594181347054306e-07, + "loss": 1.2085, + "step": 30524 + }, + { + "epoch": 0.8962651946679194, + "grad_norm": 0.0, + "learning_rate": 5.591045707050902e-07, + "loss": 1.1851, + "step": 30525 + }, + { + "epoch": 0.8962945563450584, + "grad_norm": 0.0, + "learning_rate": 5.587910920811856e-07, + "loss": 1.2402, + "step": 30526 + }, + { + "epoch": 0.8963239180221975, + "grad_norm": 0.0, + "learning_rate": 5.584776988365503e-07, + "loss": 1.1934, + "step": 30527 + }, + { + "epoch": 0.8963532796993364, + "grad_norm": 0.0, + "learning_rate": 5.581643909740197e-07, + "loss": 1.1704, + "step": 30528 + }, + { + "epoch": 0.8963826413764754, + "grad_norm": 0.0, + "learning_rate": 5.578511684964249e-07, + "loss": 1.2241, + "step": 30529 + }, + { + "epoch": 0.8964120030536145, + "grad_norm": 0.0, + "learning_rate": 5.575380314066026e-07, + "loss": 1.4258, + "step": 30530 + }, + { + "epoch": 0.8964413647307534, + "grad_norm": 0.0, + "learning_rate": 5.572249797073815e-07, + "loss": 1.083, + "step": 30531 + }, + { + "epoch": 0.8964707264078924, + "grad_norm": 0.0, + "learning_rate": 5.569120134015938e-07, + "loss": 1.2373, + "step": 30532 + }, + { + "epoch": 0.8965000880850315, + "grad_norm": 0.0, + "learning_rate": 5.565991324920672e-07, + "loss": 1.124, + "step": 30533 + }, + { + "epoch": 0.8965294497621704, + "grad_norm": 0.0, + "learning_rate": 5.562863369816341e-07, + "loss": 1.2285, + "step": 30534 + }, + { + "epoch": 0.8965588114393094, + "grad_norm": 0.0, + "learning_rate": 5.55973626873123e-07, + "loss": 1.2422, + "step": 30535 + }, + { + "epoch": 0.8965881731164485, + "grad_norm": 0.0, + "learning_rate": 5.556610021693598e-07, + "loss": 1.1313, + "step": 30536 + }, + { + "epoch": 0.8966175347935874, + "grad_norm": 0.0, + "learning_rate": 5.553484628731742e-07, + "loss": 1.1924, + "step": 30537 + }, + { + "epoch": 0.8966468964707264, + "grad_norm": 0.0, + "learning_rate": 5.550360089873896e-07, + "loss": 1.2559, + "step": 30538 + }, + { + "epoch": 0.8966762581478654, + "grad_norm": 0.0, + "learning_rate": 5.547236405148338e-07, + "loss": 1.1914, + "step": 30539 + }, + { + "epoch": 0.8967056198250044, + "grad_norm": 0.0, + "learning_rate": 5.544113574583321e-07, + "loss": 1.0864, + "step": 30540 + }, + { + "epoch": 0.8967349815021434, + "grad_norm": 0.0, + "learning_rate": 5.540991598207079e-07, + "loss": 1.1582, + "step": 30541 + }, + { + "epoch": 0.8967643431792824, + "grad_norm": 0.0, + "learning_rate": 5.537870476047824e-07, + "loss": 1.2666, + "step": 30542 + }, + { + "epoch": 0.8967937048564214, + "grad_norm": 0.0, + "learning_rate": 5.53475020813381e-07, + "loss": 1.2637, + "step": 30543 + }, + { + "epoch": 0.8968230665335604, + "grad_norm": 0.0, + "learning_rate": 5.531630794493237e-07, + "loss": 1.1924, + "step": 30544 + }, + { + "epoch": 0.8968524282106994, + "grad_norm": 0.0, + "learning_rate": 5.528512235154315e-07, + "loss": 1.1992, + "step": 30545 + }, + { + "epoch": 0.8968817898878384, + "grad_norm": 0.0, + "learning_rate": 5.525394530145267e-07, + "loss": 1.1924, + "step": 30546 + }, + { + "epoch": 0.8969111515649774, + "grad_norm": 0.0, + "learning_rate": 5.52227767949427e-07, + "loss": 1.2568, + "step": 30547 + }, + { + "epoch": 0.8969405132421164, + "grad_norm": 0.0, + "learning_rate": 5.519161683229501e-07, + "loss": 1.1807, + "step": 30548 + }, + { + "epoch": 0.8969698749192554, + "grad_norm": 0.0, + "learning_rate": 5.51604654137915e-07, + "loss": 1.2124, + "step": 30549 + }, + { + "epoch": 0.8969992365963944, + "grad_norm": 0.0, + "learning_rate": 5.512932253971392e-07, + "loss": 1.0127, + "step": 30550 + }, + { + "epoch": 0.8970285982735334, + "grad_norm": 0.0, + "learning_rate": 5.509818821034374e-07, + "loss": 1.1377, + "step": 30551 + }, + { + "epoch": 0.8970579599506724, + "grad_norm": 0.0, + "learning_rate": 5.506706242596282e-07, + "loss": 1.084, + "step": 30552 + }, + { + "epoch": 0.8970873216278114, + "grad_norm": 0.0, + "learning_rate": 5.50359451868523e-07, + "loss": 1.3008, + "step": 30553 + }, + { + "epoch": 0.8971166833049504, + "grad_norm": 0.0, + "learning_rate": 5.500483649329392e-07, + "loss": 1.2212, + "step": 30554 + }, + { + "epoch": 0.8971460449820894, + "grad_norm": 0.0, + "learning_rate": 5.49737363455688e-07, + "loss": 1.2607, + "step": 30555 + }, + { + "epoch": 0.8971754066592283, + "grad_norm": 0.0, + "learning_rate": 5.494264474395816e-07, + "loss": 1.29, + "step": 30556 + }, + { + "epoch": 0.8972047683363674, + "grad_norm": 0.0, + "learning_rate": 5.491156168874324e-07, + "loss": 1.1099, + "step": 30557 + }, + { + "epoch": 0.8972341300135064, + "grad_norm": 0.0, + "learning_rate": 5.4880487180205e-07, + "loss": 1.2817, + "step": 30558 + }, + { + "epoch": 0.8972634916906453, + "grad_norm": 0.0, + "learning_rate": 5.484942121862458e-07, + "loss": 1.1392, + "step": 30559 + }, + { + "epoch": 0.8972928533677844, + "grad_norm": 0.0, + "learning_rate": 5.481836380428285e-07, + "loss": 1.1392, + "step": 30560 + }, + { + "epoch": 0.8973222150449234, + "grad_norm": 0.0, + "learning_rate": 5.478731493746092e-07, + "loss": 1.292, + "step": 30561 + }, + { + "epoch": 0.8973515767220623, + "grad_norm": 0.0, + "learning_rate": 5.475627461843914e-07, + "loss": 1.2012, + "step": 30562 + }, + { + "epoch": 0.8973809383992014, + "grad_norm": 0.0, + "learning_rate": 5.472524284749869e-07, + "loss": 1.042, + "step": 30563 + }, + { + "epoch": 0.8974103000763404, + "grad_norm": 0.0, + "learning_rate": 5.469421962491983e-07, + "loss": 1.0439, + "step": 30564 + }, + { + "epoch": 0.8974396617534793, + "grad_norm": 0.0, + "learning_rate": 5.466320495098331e-07, + "loss": 1.1147, + "step": 30565 + }, + { + "epoch": 0.8974690234306184, + "grad_norm": 0.0, + "learning_rate": 5.463219882596949e-07, + "loss": 1.2852, + "step": 30566 + }, + { + "epoch": 0.8974983851077574, + "grad_norm": 0.0, + "learning_rate": 5.460120125015877e-07, + "loss": 1.1465, + "step": 30567 + }, + { + "epoch": 0.8975277467848963, + "grad_norm": 0.0, + "learning_rate": 5.457021222383153e-07, + "loss": 1.1289, + "step": 30568 + }, + { + "epoch": 0.8975571084620354, + "grad_norm": 0.0, + "learning_rate": 5.453923174726794e-07, + "loss": 1.127, + "step": 30569 + }, + { + "epoch": 0.8975864701391744, + "grad_norm": 0.0, + "learning_rate": 5.450825982074826e-07, + "loss": 1.1821, + "step": 30570 + }, + { + "epoch": 0.8976158318163133, + "grad_norm": 0.0, + "learning_rate": 5.447729644455246e-07, + "loss": 1.3027, + "step": 30571 + }, + { + "epoch": 0.8976451934934524, + "grad_norm": 0.0, + "learning_rate": 5.44463416189609e-07, + "loss": 1.1553, + "step": 30572 + }, + { + "epoch": 0.8976745551705914, + "grad_norm": 0.0, + "learning_rate": 5.441539534425288e-07, + "loss": 1.1318, + "step": 30573 + }, + { + "epoch": 0.8977039168477303, + "grad_norm": 0.0, + "learning_rate": 5.438445762070888e-07, + "loss": 1.187, + "step": 30574 + }, + { + "epoch": 0.8977332785248694, + "grad_norm": 0.0, + "learning_rate": 5.43535284486083e-07, + "loss": 1.1841, + "step": 30575 + }, + { + "epoch": 0.8977626402020084, + "grad_norm": 0.0, + "learning_rate": 5.432260782823084e-07, + "loss": 1.0581, + "step": 30576 + }, + { + "epoch": 0.8977920018791473, + "grad_norm": 0.0, + "learning_rate": 5.429169575985638e-07, + "loss": 1.0762, + "step": 30577 + }, + { + "epoch": 0.8978213635562864, + "grad_norm": 0.0, + "learning_rate": 5.426079224376412e-07, + "loss": 1.2373, + "step": 30578 + }, + { + "epoch": 0.8978507252334254, + "grad_norm": 0.0, + "learning_rate": 5.422989728023387e-07, + "loss": 1.0776, + "step": 30579 + }, + { + "epoch": 0.8978800869105643, + "grad_norm": 0.0, + "learning_rate": 5.419901086954493e-07, + "loss": 1.2119, + "step": 30580 + }, + { + "epoch": 0.8979094485877034, + "grad_norm": 0.0, + "learning_rate": 5.416813301197655e-07, + "loss": 1.2319, + "step": 30581 + }, + { + "epoch": 0.8979388102648423, + "grad_norm": 0.0, + "learning_rate": 5.413726370780781e-07, + "loss": 1.3047, + "step": 30582 + }, + { + "epoch": 0.8979681719419813, + "grad_norm": 0.0, + "learning_rate": 5.410640295731817e-07, + "loss": 1.1377, + "step": 30583 + }, + { + "epoch": 0.8979975336191203, + "grad_norm": 0.0, + "learning_rate": 5.407555076078664e-07, + "loss": 1.0894, + "step": 30584 + }, + { + "epoch": 0.8980268952962593, + "grad_norm": 0.0, + "learning_rate": 5.404470711849197e-07, + "loss": 1.2686, + "step": 30585 + }, + { + "epoch": 0.8980562569733983, + "grad_norm": 0.0, + "learning_rate": 5.40138720307134e-07, + "loss": 1.2021, + "step": 30586 + }, + { + "epoch": 0.8980856186505373, + "grad_norm": 0.0, + "learning_rate": 5.398304549772948e-07, + "loss": 1.1748, + "step": 30587 + }, + { + "epoch": 0.8981149803276763, + "grad_norm": 0.0, + "learning_rate": 5.395222751981932e-07, + "loss": 1.1035, + "step": 30588 + }, + { + "epoch": 0.8981443420048153, + "grad_norm": 0.0, + "learning_rate": 5.392141809726148e-07, + "loss": 1.2158, + "step": 30589 + }, + { + "epoch": 0.8981737036819543, + "grad_norm": 0.0, + "learning_rate": 5.389061723033462e-07, + "loss": 1.1455, + "step": 30590 + }, + { + "epoch": 0.8982030653590933, + "grad_norm": 0.0, + "learning_rate": 5.385982491931696e-07, + "loss": 1.2124, + "step": 30591 + }, + { + "epoch": 0.8982324270362323, + "grad_norm": 0.0, + "learning_rate": 5.38290411644874e-07, + "loss": 1.2031, + "step": 30592 + }, + { + "epoch": 0.8982617887133713, + "grad_norm": 0.0, + "learning_rate": 5.379826596612414e-07, + "loss": 1.1504, + "step": 30593 + }, + { + "epoch": 0.8982911503905103, + "grad_norm": 0.0, + "learning_rate": 5.376749932450532e-07, + "loss": 1.1855, + "step": 30594 + }, + { + "epoch": 0.8983205120676493, + "grad_norm": 0.0, + "learning_rate": 5.373674123990946e-07, + "loss": 1.1416, + "step": 30595 + }, + { + "epoch": 0.8983498737447883, + "grad_norm": 0.0, + "learning_rate": 5.37059917126147e-07, + "loss": 1.2734, + "step": 30596 + }, + { + "epoch": 0.8983792354219273, + "grad_norm": 0.0, + "learning_rate": 5.367525074289892e-07, + "loss": 1.2925, + "step": 30597 + }, + { + "epoch": 0.8984085970990663, + "grad_norm": 0.0, + "learning_rate": 5.364451833104012e-07, + "loss": 1.2197, + "step": 30598 + }, + { + "epoch": 0.8984379587762052, + "grad_norm": 0.0, + "learning_rate": 5.361379447731641e-07, + "loss": 1.0986, + "step": 30599 + }, + { + "epoch": 0.8984673204533443, + "grad_norm": 0.0, + "learning_rate": 5.358307918200534e-07, + "loss": 1.2686, + "step": 30600 + }, + { + "epoch": 0.8984966821304833, + "grad_norm": 0.0, + "learning_rate": 5.355237244538503e-07, + "loss": 1.0254, + "step": 30601 + }, + { + "epoch": 0.8985260438076222, + "grad_norm": 0.0, + "learning_rate": 5.352167426773292e-07, + "loss": 1.2139, + "step": 30602 + }, + { + "epoch": 0.8985554054847613, + "grad_norm": 0.0, + "learning_rate": 5.349098464932678e-07, + "loss": 1.1885, + "step": 30603 + }, + { + "epoch": 0.8985847671619003, + "grad_norm": 0.0, + "learning_rate": 5.346030359044407e-07, + "loss": 1.1133, + "step": 30604 + }, + { + "epoch": 0.8986141288390392, + "grad_norm": 0.0, + "learning_rate": 5.342963109136235e-07, + "loss": 1.2256, + "step": 30605 + }, + { + "epoch": 0.8986434905161783, + "grad_norm": 0.0, + "learning_rate": 5.339896715235881e-07, + "loss": 1.146, + "step": 30606 + }, + { + "epoch": 0.8986728521933173, + "grad_norm": 0.0, + "learning_rate": 5.336831177371072e-07, + "loss": 1.2256, + "step": 30607 + }, + { + "epoch": 0.8987022138704562, + "grad_norm": 0.0, + "learning_rate": 5.333766495569548e-07, + "loss": 1.21, + "step": 30608 + }, + { + "epoch": 0.8987315755475953, + "grad_norm": 0.0, + "learning_rate": 5.330702669859011e-07, + "loss": 1.0981, + "step": 30609 + }, + { + "epoch": 0.8987609372247343, + "grad_norm": 0.0, + "learning_rate": 5.327639700267195e-07, + "loss": 1.1138, + "step": 30610 + }, + { + "epoch": 0.8987902989018732, + "grad_norm": 0.0, + "learning_rate": 5.324577586821755e-07, + "loss": 1.1953, + "step": 30611 + }, + { + "epoch": 0.8988196605790123, + "grad_norm": 0.0, + "learning_rate": 5.321516329550425e-07, + "loss": 1.1265, + "step": 30612 + }, + { + "epoch": 0.8988490222561513, + "grad_norm": 0.0, + "learning_rate": 5.31845592848087e-07, + "loss": 1.1587, + "step": 30613 + }, + { + "epoch": 0.8988783839332902, + "grad_norm": 0.0, + "learning_rate": 5.315396383640769e-07, + "loss": 1.187, + "step": 30614 + }, + { + "epoch": 0.8989077456104293, + "grad_norm": 0.0, + "learning_rate": 5.31233769505779e-07, + "loss": 1.3486, + "step": 30615 + }, + { + "epoch": 0.8989371072875683, + "grad_norm": 0.0, + "learning_rate": 5.309279862759576e-07, + "loss": 1.2349, + "step": 30616 + }, + { + "epoch": 0.8989664689647072, + "grad_norm": 0.0, + "learning_rate": 5.306222886773804e-07, + "loss": 1.3535, + "step": 30617 + }, + { + "epoch": 0.8989958306418463, + "grad_norm": 0.0, + "learning_rate": 5.303166767128098e-07, + "loss": 1.2529, + "step": 30618 + }, + { + "epoch": 0.8990251923189853, + "grad_norm": 0.0, + "learning_rate": 5.300111503850114e-07, + "loss": 1.1948, + "step": 30619 + }, + { + "epoch": 0.8990545539961242, + "grad_norm": 0.0, + "learning_rate": 5.297057096967473e-07, + "loss": 1.0952, + "step": 30620 + }, + { + "epoch": 0.8990839156732633, + "grad_norm": 0.0, + "learning_rate": 5.29400354650782e-07, + "loss": 1.207, + "step": 30621 + }, + { + "epoch": 0.8991132773504023, + "grad_norm": 0.0, + "learning_rate": 5.290950852498722e-07, + "loss": 1.1558, + "step": 30622 + }, + { + "epoch": 0.8991426390275412, + "grad_norm": 0.0, + "learning_rate": 5.287899014967812e-07, + "loss": 1.0552, + "step": 30623 + }, + { + "epoch": 0.8991720007046803, + "grad_norm": 0.0, + "learning_rate": 5.284848033942703e-07, + "loss": 1.2524, + "step": 30624 + }, + { + "epoch": 0.8992013623818192, + "grad_norm": 0.0, + "learning_rate": 5.281797909450947e-07, + "loss": 1.1079, + "step": 30625 + }, + { + "epoch": 0.8992307240589582, + "grad_norm": 0.0, + "learning_rate": 5.27874864152017e-07, + "loss": 1.105, + "step": 30626 + }, + { + "epoch": 0.8992600857360973, + "grad_norm": 0.0, + "learning_rate": 5.275700230177915e-07, + "loss": 1.2744, + "step": 30627 + }, + { + "epoch": 0.8992894474132362, + "grad_norm": 0.0, + "learning_rate": 5.272652675451772e-07, + "loss": 1.1895, + "step": 30628 + }, + { + "epoch": 0.8993188090903752, + "grad_norm": 0.0, + "learning_rate": 5.269605977369297e-07, + "loss": 1.2119, + "step": 30629 + }, + { + "epoch": 0.8993481707675143, + "grad_norm": 0.0, + "learning_rate": 5.266560135958032e-07, + "loss": 1.1504, + "step": 30630 + }, + { + "epoch": 0.8993775324446532, + "grad_norm": 0.0, + "learning_rate": 5.263515151245502e-07, + "loss": 1.0918, + "step": 30631 + }, + { + "epoch": 0.8994068941217922, + "grad_norm": 0.0, + "learning_rate": 5.260471023259295e-07, + "loss": 1.1309, + "step": 30632 + }, + { + "epoch": 0.8994362557989313, + "grad_norm": 0.0, + "learning_rate": 5.257427752026901e-07, + "loss": 1.1631, + "step": 30633 + }, + { + "epoch": 0.8994656174760702, + "grad_norm": 0.0, + "learning_rate": 5.254385337575851e-07, + "loss": 1.2349, + "step": 30634 + }, + { + "epoch": 0.8994949791532092, + "grad_norm": 0.0, + "learning_rate": 5.25134377993366e-07, + "loss": 1.0952, + "step": 30635 + }, + { + "epoch": 0.8995243408303483, + "grad_norm": 0.0, + "learning_rate": 5.248303079127826e-07, + "loss": 1.249, + "step": 30636 + }, + { + "epoch": 0.8995537025074872, + "grad_norm": 0.0, + "learning_rate": 5.24526323518586e-07, + "loss": 1.1616, + "step": 30637 + }, + { + "epoch": 0.8995830641846262, + "grad_norm": 0.0, + "learning_rate": 5.242224248135253e-07, + "loss": 1.1562, + "step": 30638 + }, + { + "epoch": 0.8996124258617653, + "grad_norm": 0.0, + "learning_rate": 5.239186118003481e-07, + "loss": 1.2227, + "step": 30639 + }, + { + "epoch": 0.8996417875389042, + "grad_norm": 0.0, + "learning_rate": 5.236148844818001e-07, + "loss": 1.0742, + "step": 30640 + }, + { + "epoch": 0.8996711492160432, + "grad_norm": 0.0, + "learning_rate": 5.233112428606302e-07, + "loss": 1.1934, + "step": 30641 + }, + { + "epoch": 0.8997005108931823, + "grad_norm": 0.0, + "learning_rate": 5.230076869395839e-07, + "loss": 1.1182, + "step": 30642 + }, + { + "epoch": 0.8997298725703212, + "grad_norm": 0.0, + "learning_rate": 5.227042167214058e-07, + "loss": 1.1724, + "step": 30643 + }, + { + "epoch": 0.8997592342474602, + "grad_norm": 0.0, + "learning_rate": 5.224008322088426e-07, + "loss": 1.1177, + "step": 30644 + }, + { + "epoch": 0.8997885959245993, + "grad_norm": 0.0, + "learning_rate": 5.22097533404633e-07, + "loss": 1.2148, + "step": 30645 + }, + { + "epoch": 0.8998179576017382, + "grad_norm": 0.0, + "learning_rate": 5.217943203115261e-07, + "loss": 1.1318, + "step": 30646 + }, + { + "epoch": 0.8998473192788772, + "grad_norm": 0.0, + "learning_rate": 5.214911929322575e-07, + "loss": 1.2461, + "step": 30647 + }, + { + "epoch": 0.8998766809560163, + "grad_norm": 0.0, + "learning_rate": 5.211881512695738e-07, + "loss": 1.2441, + "step": 30648 + }, + { + "epoch": 0.8999060426331552, + "grad_norm": 0.0, + "learning_rate": 5.208851953262106e-07, + "loss": 1.2158, + "step": 30649 + }, + { + "epoch": 0.8999354043102942, + "grad_norm": 0.0, + "learning_rate": 5.205823251049124e-07, + "loss": 1.3105, + "step": 30650 + }, + { + "epoch": 0.8999647659874332, + "grad_norm": 0.0, + "learning_rate": 5.202795406084138e-07, + "loss": 1.0786, + "step": 30651 + }, + { + "epoch": 0.8999941276645722, + "grad_norm": 0.0, + "learning_rate": 5.19976841839458e-07, + "loss": 1.2046, + "step": 30652 + }, + { + "epoch": 0.9000234893417112, + "grad_norm": 0.0, + "learning_rate": 5.196742288007784e-07, + "loss": 1.2217, + "step": 30653 + }, + { + "epoch": 0.9000528510188502, + "grad_norm": 0.0, + "learning_rate": 5.193717014951128e-07, + "loss": 1.1426, + "step": 30654 + }, + { + "epoch": 0.9000822126959892, + "grad_norm": 0.0, + "learning_rate": 5.190692599251967e-07, + "loss": 1.1523, + "step": 30655 + }, + { + "epoch": 0.9001115743731282, + "grad_norm": 0.0, + "learning_rate": 5.187669040937637e-07, + "loss": 1.2559, + "step": 30656 + }, + { + "epoch": 0.9001409360502672, + "grad_norm": 0.0, + "learning_rate": 5.184646340035526e-07, + "loss": 1.2178, + "step": 30657 + }, + { + "epoch": 0.9001702977274062, + "grad_norm": 0.0, + "learning_rate": 5.181624496572912e-07, + "loss": 1.3555, + "step": 30658 + }, + { + "epoch": 0.9001996594045452, + "grad_norm": 0.0, + "learning_rate": 5.178603510577174e-07, + "loss": 1.1582, + "step": 30659 + }, + { + "epoch": 0.9002290210816842, + "grad_norm": 0.0, + "learning_rate": 5.175583382075588e-07, + "loss": 1.2373, + "step": 30660 + }, + { + "epoch": 0.9002583827588232, + "grad_norm": 0.0, + "learning_rate": 5.172564111095501e-07, + "loss": 1.0923, + "step": 30661 + }, + { + "epoch": 0.9002877444359622, + "grad_norm": 0.0, + "learning_rate": 5.169545697664213e-07, + "loss": 1.1348, + "step": 30662 + }, + { + "epoch": 0.9003171061131012, + "grad_norm": 0.0, + "learning_rate": 5.166528141809002e-07, + "loss": 1.1963, + "step": 30663 + }, + { + "epoch": 0.9003464677902402, + "grad_norm": 0.0, + "learning_rate": 5.163511443557167e-07, + "loss": 1.1763, + "step": 30664 + }, + { + "epoch": 0.9003758294673792, + "grad_norm": 0.0, + "learning_rate": 5.160495602935967e-07, + "loss": 1.0234, + "step": 30665 + }, + { + "epoch": 0.9004051911445182, + "grad_norm": 0.0, + "learning_rate": 5.15748061997271e-07, + "loss": 1.1875, + "step": 30666 + }, + { + "epoch": 0.9004345528216572, + "grad_norm": 0.0, + "learning_rate": 5.154466494694632e-07, + "loss": 1.0996, + "step": 30667 + }, + { + "epoch": 0.9004639144987961, + "grad_norm": 0.0, + "learning_rate": 5.151453227129011e-07, + "loss": 1.0635, + "step": 30668 + }, + { + "epoch": 0.9004932761759352, + "grad_norm": 0.0, + "learning_rate": 5.148440817303091e-07, + "loss": 1.2129, + "step": 30669 + }, + { + "epoch": 0.9005226378530742, + "grad_norm": 0.0, + "learning_rate": 5.145429265244117e-07, + "loss": 1.3164, + "step": 30670 + }, + { + "epoch": 0.9005519995302131, + "grad_norm": 0.0, + "learning_rate": 5.142418570979313e-07, + "loss": 1.2534, + "step": 30671 + }, + { + "epoch": 0.9005813612073522, + "grad_norm": 0.0, + "learning_rate": 5.139408734535922e-07, + "loss": 1.2529, + "step": 30672 + }, + { + "epoch": 0.9006107228844912, + "grad_norm": 0.0, + "learning_rate": 5.136399755941146e-07, + "loss": 1.2773, + "step": 30673 + }, + { + "epoch": 0.9006400845616301, + "grad_norm": 0.0, + "learning_rate": 5.133391635222185e-07, + "loss": 1.2871, + "step": 30674 + }, + { + "epoch": 0.9006694462387692, + "grad_norm": 0.0, + "learning_rate": 5.130384372406283e-07, + "loss": 1.3018, + "step": 30675 + }, + { + "epoch": 0.9006988079159082, + "grad_norm": 0.0, + "learning_rate": 5.127377967520586e-07, + "loss": 1.1099, + "step": 30676 + }, + { + "epoch": 0.9007281695930471, + "grad_norm": 0.0, + "learning_rate": 5.124372420592328e-07, + "loss": 1.2666, + "step": 30677 + }, + { + "epoch": 0.9007575312701862, + "grad_norm": 0.0, + "learning_rate": 5.121367731648675e-07, + "loss": 1.1924, + "step": 30678 + }, + { + "epoch": 0.9007868929473252, + "grad_norm": 0.0, + "learning_rate": 5.118363900716782e-07, + "loss": 1.1187, + "step": 30679 + }, + { + "epoch": 0.9008162546244641, + "grad_norm": 0.0, + "learning_rate": 5.115360927823809e-07, + "loss": 1.2646, + "step": 30680 + }, + { + "epoch": 0.9008456163016032, + "grad_norm": 0.0, + "learning_rate": 5.112358812996942e-07, + "loss": 1.1689, + "step": 30681 + }, + { + "epoch": 0.9008749779787422, + "grad_norm": 0.0, + "learning_rate": 5.109357556263328e-07, + "loss": 1.2236, + "step": 30682 + }, + { + "epoch": 0.9009043396558811, + "grad_norm": 0.0, + "learning_rate": 5.106357157650066e-07, + "loss": 1.2373, + "step": 30683 + }, + { + "epoch": 0.9009337013330201, + "grad_norm": 0.0, + "learning_rate": 5.103357617184345e-07, + "loss": 1.1582, + "step": 30684 + }, + { + "epoch": 0.9009630630101592, + "grad_norm": 0.0, + "learning_rate": 5.100358934893235e-07, + "loss": 1.2109, + "step": 30685 + }, + { + "epoch": 0.9009924246872981, + "grad_norm": 0.0, + "learning_rate": 5.097361110803911e-07, + "loss": 1.0596, + "step": 30686 + }, + { + "epoch": 0.9010217863644371, + "grad_norm": 0.0, + "learning_rate": 5.094364144943454e-07, + "loss": 1.0601, + "step": 30687 + }, + { + "epoch": 0.9010511480415762, + "grad_norm": 0.0, + "learning_rate": 5.091368037338961e-07, + "loss": 1.1431, + "step": 30688 + }, + { + "epoch": 0.9010805097187151, + "grad_norm": 0.0, + "learning_rate": 5.088372788017526e-07, + "loss": 1.1963, + "step": 30689 + }, + { + "epoch": 0.9011098713958541, + "grad_norm": 0.0, + "learning_rate": 5.085378397006258e-07, + "loss": 1.1382, + "step": 30690 + }, + { + "epoch": 0.9011392330729932, + "grad_norm": 0.0, + "learning_rate": 5.082384864332202e-07, + "loss": 1.2559, + "step": 30691 + }, + { + "epoch": 0.9011685947501321, + "grad_norm": 0.0, + "learning_rate": 5.079392190022458e-07, + "loss": 1.1396, + "step": 30692 + }, + { + "epoch": 0.9011979564272711, + "grad_norm": 0.0, + "learning_rate": 5.076400374104084e-07, + "loss": 1.2373, + "step": 30693 + }, + { + "epoch": 0.9012273181044101, + "grad_norm": 0.0, + "learning_rate": 5.073409416604125e-07, + "loss": 1.2983, + "step": 30694 + }, + { + "epoch": 0.9012566797815491, + "grad_norm": 0.0, + "learning_rate": 5.070419317549646e-07, + "loss": 1.2451, + "step": 30695 + }, + { + "epoch": 0.9012860414586881, + "grad_norm": 0.0, + "learning_rate": 5.067430076967672e-07, + "loss": 1.2588, + "step": 30696 + }, + { + "epoch": 0.9013154031358271, + "grad_norm": 0.0, + "learning_rate": 5.064441694885247e-07, + "loss": 1.333, + "step": 30697 + }, + { + "epoch": 0.9013447648129661, + "grad_norm": 0.0, + "learning_rate": 5.061454171329372e-07, + "loss": 1.2261, + "step": 30698 + }, + { + "epoch": 0.9013741264901051, + "grad_norm": 0.0, + "learning_rate": 5.058467506327091e-07, + "loss": 1.1479, + "step": 30699 + }, + { + "epoch": 0.9014034881672441, + "grad_norm": 0.0, + "learning_rate": 5.055481699905396e-07, + "loss": 1.1909, + "step": 30700 + }, + { + "epoch": 0.9014328498443831, + "grad_norm": 0.0, + "learning_rate": 5.052496752091307e-07, + "loss": 1.2412, + "step": 30701 + }, + { + "epoch": 0.9014622115215221, + "grad_norm": 0.0, + "learning_rate": 5.049512662911805e-07, + "loss": 1.2095, + "step": 30702 + }, + { + "epoch": 0.9014915731986611, + "grad_norm": 0.0, + "learning_rate": 5.046529432393876e-07, + "loss": 1.2852, + "step": 30703 + }, + { + "epoch": 0.9015209348758001, + "grad_norm": 0.0, + "learning_rate": 5.043547060564502e-07, + "loss": 1.1943, + "step": 30704 + }, + { + "epoch": 0.9015502965529391, + "grad_norm": 0.0, + "learning_rate": 5.040565547450638e-07, + "loss": 1.167, + "step": 30705 + }, + { + "epoch": 0.9015796582300781, + "grad_norm": 0.0, + "learning_rate": 5.037584893079272e-07, + "loss": 1.1201, + "step": 30706 + }, + { + "epoch": 0.9016090199072171, + "grad_norm": 0.0, + "learning_rate": 5.03460509747733e-07, + "loss": 1.2168, + "step": 30707 + }, + { + "epoch": 0.901638381584356, + "grad_norm": 0.0, + "learning_rate": 5.031626160671787e-07, + "loss": 1.2607, + "step": 30708 + }, + { + "epoch": 0.9016677432614951, + "grad_norm": 0.0, + "learning_rate": 5.028648082689557e-07, + "loss": 1.0903, + "step": 30709 + }, + { + "epoch": 0.9016971049386341, + "grad_norm": 0.0, + "learning_rate": 5.025670863557608e-07, + "loss": 1.2764, + "step": 30710 + }, + { + "epoch": 0.901726466615773, + "grad_norm": 0.0, + "learning_rate": 5.022694503302828e-07, + "loss": 1.2002, + "step": 30711 + }, + { + "epoch": 0.9017558282929121, + "grad_norm": 0.0, + "learning_rate": 5.01971900195215e-07, + "loss": 1.1348, + "step": 30712 + }, + { + "epoch": 0.9017851899700511, + "grad_norm": 0.0, + "learning_rate": 5.016744359532489e-07, + "loss": 1.1035, + "step": 30713 + }, + { + "epoch": 0.90181455164719, + "grad_norm": 0.0, + "learning_rate": 5.013770576070709e-07, + "loss": 1.1831, + "step": 30714 + }, + { + "epoch": 0.9018439133243291, + "grad_norm": 0.0, + "learning_rate": 5.010797651593746e-07, + "loss": 1.1934, + "step": 30715 + }, + { + "epoch": 0.9018732750014681, + "grad_norm": 0.0, + "learning_rate": 5.007825586128445e-07, + "loss": 1.2617, + "step": 30716 + }, + { + "epoch": 0.901902636678607, + "grad_norm": 0.0, + "learning_rate": 5.004854379701729e-07, + "loss": 1.2471, + "step": 30717 + }, + { + "epoch": 0.9019319983557461, + "grad_norm": 0.0, + "learning_rate": 5.00188403234042e-07, + "loss": 1.2412, + "step": 30718 + }, + { + "epoch": 0.9019613600328851, + "grad_norm": 0.0, + "learning_rate": 4.998914544071431e-07, + "loss": 1.1875, + "step": 30719 + }, + { + "epoch": 0.901990721710024, + "grad_norm": 0.0, + "learning_rate": 4.995945914921574e-07, + "loss": 1.2554, + "step": 30720 + }, + { + "epoch": 0.9020200833871631, + "grad_norm": 0.0, + "learning_rate": 4.992978144917715e-07, + "loss": 1.2227, + "step": 30721 + }, + { + "epoch": 0.9020494450643021, + "grad_norm": 0.0, + "learning_rate": 4.990011234086689e-07, + "loss": 1.2803, + "step": 30722 + }, + { + "epoch": 0.902078806741441, + "grad_norm": 0.0, + "learning_rate": 4.987045182455308e-07, + "loss": 1.1416, + "step": 30723 + }, + { + "epoch": 0.9021081684185801, + "grad_norm": 0.0, + "learning_rate": 4.984079990050416e-07, + "loss": 1.0796, + "step": 30724 + }, + { + "epoch": 0.9021375300957191, + "grad_norm": 0.0, + "learning_rate": 4.981115656898816e-07, + "loss": 1.3125, + "step": 30725 + }, + { + "epoch": 0.902166891772858, + "grad_norm": 0.0, + "learning_rate": 4.978152183027341e-07, + "loss": 1.144, + "step": 30726 + }, + { + "epoch": 0.9021962534499971, + "grad_norm": 0.0, + "learning_rate": 4.975189568462768e-07, + "loss": 1.3076, + "step": 30727 + }, + { + "epoch": 0.9022256151271361, + "grad_norm": 0.0, + "learning_rate": 4.972227813231889e-07, + "loss": 1.0942, + "step": 30728 + }, + { + "epoch": 0.902254976804275, + "grad_norm": 0.0, + "learning_rate": 4.969266917361482e-07, + "loss": 1.207, + "step": 30729 + }, + { + "epoch": 0.9022843384814141, + "grad_norm": 0.0, + "learning_rate": 4.966306880878336e-07, + "loss": 1.209, + "step": 30730 + }, + { + "epoch": 0.9023137001585531, + "grad_norm": 0.0, + "learning_rate": 4.963347703809218e-07, + "loss": 1.145, + "step": 30731 + }, + { + "epoch": 0.902343061835692, + "grad_norm": 0.0, + "learning_rate": 4.960389386180875e-07, + "loss": 1.168, + "step": 30732 + }, + { + "epoch": 0.9023724235128311, + "grad_norm": 0.0, + "learning_rate": 4.957431928020084e-07, + "loss": 1.2129, + "step": 30733 + }, + { + "epoch": 0.90240178518997, + "grad_norm": 0.0, + "learning_rate": 4.95447532935357e-07, + "loss": 1.2671, + "step": 30734 + }, + { + "epoch": 0.902431146867109, + "grad_norm": 0.0, + "learning_rate": 4.951519590208087e-07, + "loss": 1.1021, + "step": 30735 + }, + { + "epoch": 0.9024605085442481, + "grad_norm": 0.0, + "learning_rate": 4.948564710610348e-07, + "loss": 1.1436, + "step": 30736 + }, + { + "epoch": 0.902489870221387, + "grad_norm": 0.0, + "learning_rate": 4.945610690587088e-07, + "loss": 1.1577, + "step": 30737 + }, + { + "epoch": 0.902519231898526, + "grad_norm": 0.0, + "learning_rate": 4.942657530164996e-07, + "loss": 1.2183, + "step": 30738 + }, + { + "epoch": 0.9025485935756651, + "grad_norm": 0.0, + "learning_rate": 4.939705229370817e-07, + "loss": 1.2686, + "step": 30739 + }, + { + "epoch": 0.902577955252804, + "grad_norm": 0.0, + "learning_rate": 4.936753788231208e-07, + "loss": 1.2656, + "step": 30740 + }, + { + "epoch": 0.902607316929943, + "grad_norm": 0.0, + "learning_rate": 4.933803206772903e-07, + "loss": 1.1714, + "step": 30741 + }, + { + "epoch": 0.9026366786070821, + "grad_norm": 0.0, + "learning_rate": 4.930853485022558e-07, + "loss": 1.2598, + "step": 30742 + }, + { + "epoch": 0.902666040284221, + "grad_norm": 0.0, + "learning_rate": 4.927904623006829e-07, + "loss": 1.1787, + "step": 30743 + }, + { + "epoch": 0.90269540196136, + "grad_norm": 0.0, + "learning_rate": 4.92495662075243e-07, + "loss": 1.2305, + "step": 30744 + }, + { + "epoch": 0.9027247636384991, + "grad_norm": 0.0, + "learning_rate": 4.922009478285994e-07, + "loss": 1.2378, + "step": 30745 + }, + { + "epoch": 0.902754125315638, + "grad_norm": 0.0, + "learning_rate": 4.919063195634188e-07, + "loss": 1.2148, + "step": 30746 + }, + { + "epoch": 0.902783486992777, + "grad_norm": 0.0, + "learning_rate": 4.916117772823614e-07, + "loss": 1.2588, + "step": 30747 + }, + { + "epoch": 0.9028128486699161, + "grad_norm": 0.0, + "learning_rate": 4.913173209880961e-07, + "loss": 1.1079, + "step": 30748 + }, + { + "epoch": 0.902842210347055, + "grad_norm": 0.0, + "learning_rate": 4.91022950683282e-07, + "loss": 1.25, + "step": 30749 + }, + { + "epoch": 0.902871572024194, + "grad_norm": 0.0, + "learning_rate": 4.907286663705835e-07, + "loss": 1.1436, + "step": 30750 + }, + { + "epoch": 0.9029009337013331, + "grad_norm": 0.0, + "learning_rate": 4.904344680526618e-07, + "loss": 1.2402, + "step": 30751 + }, + { + "epoch": 0.902930295378472, + "grad_norm": 0.0, + "learning_rate": 4.90140355732176e-07, + "loss": 1.1284, + "step": 30752 + }, + { + "epoch": 0.902959657055611, + "grad_norm": 0.0, + "learning_rate": 4.898463294117873e-07, + "loss": 1.1602, + "step": 30753 + }, + { + "epoch": 0.9029890187327501, + "grad_norm": 0.0, + "learning_rate": 4.895523890941511e-07, + "loss": 1.1255, + "step": 30754 + }, + { + "epoch": 0.903018380409889, + "grad_norm": 0.0, + "learning_rate": 4.892585347819312e-07, + "loss": 1.3184, + "step": 30755 + }, + { + "epoch": 0.903047742087028, + "grad_norm": 0.0, + "learning_rate": 4.889647664777797e-07, + "loss": 1.1274, + "step": 30756 + }, + { + "epoch": 0.903077103764167, + "grad_norm": 0.0, + "learning_rate": 4.886710841843578e-07, + "loss": 1.1885, + "step": 30757 + }, + { + "epoch": 0.903106465441306, + "grad_norm": 0.0, + "learning_rate": 4.883774879043169e-07, + "loss": 1.2451, + "step": 30758 + }, + { + "epoch": 0.903135827118445, + "grad_norm": 0.0, + "learning_rate": 4.880839776403168e-07, + "loss": 1.2256, + "step": 30759 + }, + { + "epoch": 0.903165188795584, + "grad_norm": 0.0, + "learning_rate": 4.877905533950078e-07, + "loss": 1.27, + "step": 30760 + }, + { + "epoch": 0.903194550472723, + "grad_norm": 0.0, + "learning_rate": 4.874972151710466e-07, + "loss": 1.2979, + "step": 30761 + }, + { + "epoch": 0.903223912149862, + "grad_norm": 0.0, + "learning_rate": 4.872039629710834e-07, + "loss": 1.1826, + "step": 30762 + }, + { + "epoch": 0.903253273827001, + "grad_norm": 0.0, + "learning_rate": 4.869107967977693e-07, + "loss": 1.2842, + "step": 30763 + }, + { + "epoch": 0.90328263550414, + "grad_norm": 0.0, + "learning_rate": 4.866177166537589e-07, + "loss": 1.2803, + "step": 30764 + }, + { + "epoch": 0.903311997181279, + "grad_norm": 0.0, + "learning_rate": 4.863247225417e-07, + "loss": 1.207, + "step": 30765 + }, + { + "epoch": 0.903341358858418, + "grad_norm": 0.0, + "learning_rate": 4.860318144642439e-07, + "loss": 1.2456, + "step": 30766 + }, + { + "epoch": 0.903370720535557, + "grad_norm": 0.0, + "learning_rate": 4.857389924240375e-07, + "loss": 1.2964, + "step": 30767 + }, + { + "epoch": 0.903400082212696, + "grad_norm": 0.0, + "learning_rate": 4.854462564237316e-07, + "loss": 1.2188, + "step": 30768 + }, + { + "epoch": 0.903429443889835, + "grad_norm": 0.0, + "learning_rate": 4.851536064659712e-07, + "loss": 1.1836, + "step": 30769 + }, + { + "epoch": 0.903458805566974, + "grad_norm": 0.0, + "learning_rate": 4.848610425534051e-07, + "loss": 1.0771, + "step": 30770 + }, + { + "epoch": 0.903488167244113, + "grad_norm": 0.0, + "learning_rate": 4.845685646886756e-07, + "loss": 1.1934, + "step": 30771 + }, + { + "epoch": 0.903517528921252, + "grad_norm": 0.0, + "learning_rate": 4.842761728744294e-07, + "loss": 1.2246, + "step": 30772 + }, + { + "epoch": 0.903546890598391, + "grad_norm": 0.0, + "learning_rate": 4.839838671133113e-07, + "loss": 1.1631, + "step": 30773 + }, + { + "epoch": 0.90357625227553, + "grad_norm": 0.0, + "learning_rate": 4.836916474079634e-07, + "loss": 1.1401, + "step": 30774 + }, + { + "epoch": 0.903605613952669, + "grad_norm": 0.0, + "learning_rate": 4.833995137610303e-07, + "loss": 1.3008, + "step": 30775 + }, + { + "epoch": 0.903634975629808, + "grad_norm": 0.0, + "learning_rate": 4.831074661751533e-07, + "loss": 1.0518, + "step": 30776 + }, + { + "epoch": 0.903664337306947, + "grad_norm": 0.0, + "learning_rate": 4.828155046529714e-07, + "loss": 1.209, + "step": 30777 + }, + { + "epoch": 0.903693698984086, + "grad_norm": 0.0, + "learning_rate": 4.825236291971258e-07, + "loss": 1.2021, + "step": 30778 + }, + { + "epoch": 0.903723060661225, + "grad_norm": 0.0, + "learning_rate": 4.822318398102576e-07, + "loss": 1.2305, + "step": 30779 + }, + { + "epoch": 0.903752422338364, + "grad_norm": 0.0, + "learning_rate": 4.819401364950049e-07, + "loss": 1.2139, + "step": 30780 + }, + { + "epoch": 0.903781784015503, + "grad_norm": 0.0, + "learning_rate": 4.816485192540033e-07, + "loss": 1.1279, + "step": 30781 + }, + { + "epoch": 0.903811145692642, + "grad_norm": 0.0, + "learning_rate": 4.813569880898927e-07, + "loss": 1.2065, + "step": 30782 + }, + { + "epoch": 0.9038405073697809, + "grad_norm": 0.0, + "learning_rate": 4.810655430053079e-07, + "loss": 1.2046, + "step": 30783 + }, + { + "epoch": 0.9038698690469199, + "grad_norm": 0.0, + "learning_rate": 4.807741840028868e-07, + "loss": 1.2607, + "step": 30784 + }, + { + "epoch": 0.903899230724059, + "grad_norm": 0.0, + "learning_rate": 4.804829110852627e-07, + "loss": 1.1406, + "step": 30785 + }, + { + "epoch": 0.9039285924011979, + "grad_norm": 0.0, + "learning_rate": 4.801917242550691e-07, + "loss": 1.144, + "step": 30786 + }, + { + "epoch": 0.9039579540783369, + "grad_norm": 0.0, + "learning_rate": 4.799006235149384e-07, + "loss": 1.1426, + "step": 30787 + }, + { + "epoch": 0.903987315755476, + "grad_norm": 0.0, + "learning_rate": 4.796096088675062e-07, + "loss": 1.0664, + "step": 30788 + }, + { + "epoch": 0.9040166774326149, + "grad_norm": 0.0, + "learning_rate": 4.793186803154015e-07, + "loss": 1.1479, + "step": 30789 + }, + { + "epoch": 0.9040460391097539, + "grad_norm": 0.0, + "learning_rate": 4.790278378612567e-07, + "loss": 1.2588, + "step": 30790 + }, + { + "epoch": 0.904075400786893, + "grad_norm": 0.0, + "learning_rate": 4.787370815077019e-07, + "loss": 1.3096, + "step": 30791 + }, + { + "epoch": 0.9041047624640319, + "grad_norm": 0.0, + "learning_rate": 4.78446411257365e-07, + "loss": 1.1948, + "step": 30792 + }, + { + "epoch": 0.9041341241411709, + "grad_norm": 0.0, + "learning_rate": 4.781558271128772e-07, + "loss": 1.2656, + "step": 30793 + }, + { + "epoch": 0.90416348581831, + "grad_norm": 0.0, + "learning_rate": 4.778653290768642e-07, + "loss": 1.2021, + "step": 30794 + }, + { + "epoch": 0.9041928474954489, + "grad_norm": 0.0, + "learning_rate": 4.77574917151954e-07, + "loss": 1.21, + "step": 30795 + }, + { + "epoch": 0.9042222091725879, + "grad_norm": 0.0, + "learning_rate": 4.77284591340772e-07, + "loss": 1.2441, + "step": 30796 + }, + { + "epoch": 0.904251570849727, + "grad_norm": 0.0, + "learning_rate": 4.769943516459441e-07, + "loss": 1.1494, + "step": 30797 + }, + { + "epoch": 0.9042809325268659, + "grad_norm": 0.0, + "learning_rate": 4.7670419807009484e-07, + "loss": 1.2734, + "step": 30798 + }, + { + "epoch": 0.9043102942040049, + "grad_norm": 0.0, + "learning_rate": 4.764141306158498e-07, + "loss": 1.231, + "step": 30799 + }, + { + "epoch": 0.904339655881144, + "grad_norm": 0.0, + "learning_rate": 4.761241492858304e-07, + "loss": 1.1709, + "step": 30800 + }, + { + "epoch": 0.9043690175582829, + "grad_norm": 0.0, + "learning_rate": 4.7583425408265995e-07, + "loss": 1.167, + "step": 30801 + }, + { + "epoch": 0.9043983792354219, + "grad_norm": 0.0, + "learning_rate": 4.755444450089586e-07, + "loss": 1.332, + "step": 30802 + }, + { + "epoch": 0.904427740912561, + "grad_norm": 0.0, + "learning_rate": 4.752547220673476e-07, + "loss": 1.2402, + "step": 30803 + }, + { + "epoch": 0.9044571025896999, + "grad_norm": 0.0, + "learning_rate": 4.7496508526044816e-07, + "loss": 1.1709, + "step": 30804 + }, + { + "epoch": 0.9044864642668389, + "grad_norm": 0.0, + "learning_rate": 4.7467553459087823e-07, + "loss": 1.21, + "step": 30805 + }, + { + "epoch": 0.904515825943978, + "grad_norm": 0.0, + "learning_rate": 4.7438607006125683e-07, + "loss": 1.2285, + "step": 30806 + }, + { + "epoch": 0.9045451876211169, + "grad_norm": 0.0, + "learning_rate": 4.740966916742018e-07, + "loss": 1.0908, + "step": 30807 + }, + { + "epoch": 0.9045745492982559, + "grad_norm": 0.0, + "learning_rate": 4.7380739943233114e-07, + "loss": 1.2266, + "step": 30808 + }, + { + "epoch": 0.9046039109753949, + "grad_norm": 0.0, + "learning_rate": 4.735181933382593e-07, + "loss": 1.3232, + "step": 30809 + }, + { + "epoch": 0.9046332726525339, + "grad_norm": 0.0, + "learning_rate": 4.732290733946032e-07, + "loss": 1.1831, + "step": 30810 + }, + { + "epoch": 0.9046626343296729, + "grad_norm": 0.0, + "learning_rate": 4.729400396039752e-07, + "loss": 1.165, + "step": 30811 + }, + { + "epoch": 0.9046919960068119, + "grad_norm": 0.0, + "learning_rate": 4.7265109196898973e-07, + "loss": 1.1885, + "step": 30812 + }, + { + "epoch": 0.9047213576839509, + "grad_norm": 0.0, + "learning_rate": 4.723622304922615e-07, + "loss": 1.3145, + "step": 30813 + }, + { + "epoch": 0.9047507193610899, + "grad_norm": 0.0, + "learning_rate": 4.720734551764006e-07, + "loss": 1.2646, + "step": 30814 + }, + { + "epoch": 0.9047800810382289, + "grad_norm": 0.0, + "learning_rate": 4.717847660240216e-07, + "loss": 1.2109, + "step": 30815 + }, + { + "epoch": 0.9048094427153679, + "grad_norm": 0.0, + "learning_rate": 4.7149616303773014e-07, + "loss": 1.1143, + "step": 30816 + }, + { + "epoch": 0.9048388043925069, + "grad_norm": 0.0, + "learning_rate": 4.7120764622014204e-07, + "loss": 1.1846, + "step": 30817 + }, + { + "epoch": 0.9048681660696459, + "grad_norm": 0.0, + "learning_rate": 4.709192155738629e-07, + "loss": 1.1611, + "step": 30818 + }, + { + "epoch": 0.9048975277467849, + "grad_norm": 0.0, + "learning_rate": 4.706308711015017e-07, + "loss": 1.2139, + "step": 30819 + }, + { + "epoch": 0.9049268894239239, + "grad_norm": 0.0, + "learning_rate": 4.703426128056654e-07, + "loss": 1.1733, + "step": 30820 + }, + { + "epoch": 0.9049562511010629, + "grad_norm": 0.0, + "learning_rate": 4.700544406889618e-07, + "loss": 1.2334, + "step": 30821 + }, + { + "epoch": 0.9049856127782019, + "grad_norm": 0.0, + "learning_rate": 4.6976635475399656e-07, + "loss": 1.2051, + "step": 30822 + }, + { + "epoch": 0.9050149744553408, + "grad_norm": 0.0, + "learning_rate": 4.6947835500337326e-07, + "loss": 1.125, + "step": 30823 + }, + { + "epoch": 0.9050443361324799, + "grad_norm": 0.0, + "learning_rate": 4.6919044143970083e-07, + "loss": 1.248, + "step": 30824 + }, + { + "epoch": 0.9050736978096189, + "grad_norm": 0.0, + "learning_rate": 4.6890261406557727e-07, + "loss": 1.1938, + "step": 30825 + }, + { + "epoch": 0.9051030594867578, + "grad_norm": 0.0, + "learning_rate": 4.6861487288361154e-07, + "loss": 1.2041, + "step": 30826 + }, + { + "epoch": 0.9051324211638969, + "grad_norm": 0.0, + "learning_rate": 4.6832721789640044e-07, + "loss": 1.2803, + "step": 30827 + }, + { + "epoch": 0.9051617828410359, + "grad_norm": 0.0, + "learning_rate": 4.6803964910654864e-07, + "loss": 1.2783, + "step": 30828 + }, + { + "epoch": 0.9051911445181748, + "grad_norm": 0.0, + "learning_rate": 4.677521665166529e-07, + "loss": 1.1743, + "step": 30829 + }, + { + "epoch": 0.9052205061953139, + "grad_norm": 0.0, + "learning_rate": 4.674647701293178e-07, + "loss": 1.2637, + "step": 30830 + }, + { + "epoch": 0.9052498678724529, + "grad_norm": 0.0, + "learning_rate": 4.6717745994714014e-07, + "loss": 1.2129, + "step": 30831 + }, + { + "epoch": 0.9052792295495918, + "grad_norm": 0.0, + "learning_rate": 4.668902359727168e-07, + "loss": 1.2832, + "step": 30832 + }, + { + "epoch": 0.9053085912267309, + "grad_norm": 0.0, + "learning_rate": 4.666030982086478e-07, + "loss": 1.291, + "step": 30833 + }, + { + "epoch": 0.9053379529038699, + "grad_norm": 0.0, + "learning_rate": 4.663160466575289e-07, + "loss": 1.1436, + "step": 30834 + }, + { + "epoch": 0.9053673145810088, + "grad_norm": 0.0, + "learning_rate": 4.660290813219548e-07, + "loss": 1.1582, + "step": 30835 + }, + { + "epoch": 0.9053966762581479, + "grad_norm": 0.0, + "learning_rate": 4.6574220220451995e-07, + "loss": 1.3311, + "step": 30836 + }, + { + "epoch": 0.9054260379352869, + "grad_norm": 0.0, + "learning_rate": 4.654554093078223e-07, + "loss": 1.147, + "step": 30837 + }, + { + "epoch": 0.9054553996124258, + "grad_norm": 0.0, + "learning_rate": 4.65168702634452e-07, + "loss": 1.2031, + "step": 30838 + }, + { + "epoch": 0.9054847612895649, + "grad_norm": 0.0, + "learning_rate": 4.648820821870037e-07, + "loss": 1.291, + "step": 30839 + }, + { + "epoch": 0.9055141229667039, + "grad_norm": 0.0, + "learning_rate": 4.645955479680686e-07, + "loss": 1.1807, + "step": 30840 + }, + { + "epoch": 0.9055434846438428, + "grad_norm": 0.0, + "learning_rate": 4.643090999802369e-07, + "loss": 1.1514, + "step": 30841 + }, + { + "epoch": 0.9055728463209819, + "grad_norm": 0.0, + "learning_rate": 4.6402273822610197e-07, + "loss": 1.1587, + "step": 30842 + }, + { + "epoch": 0.9056022079981209, + "grad_norm": 0.0, + "learning_rate": 4.637364627082508e-07, + "loss": 1.2676, + "step": 30843 + }, + { + "epoch": 0.9056315696752598, + "grad_norm": 0.0, + "learning_rate": 4.634502734292734e-07, + "loss": 1.1489, + "step": 30844 + }, + { + "epoch": 0.9056609313523989, + "grad_norm": 0.0, + "learning_rate": 4.6316417039175667e-07, + "loss": 1.2129, + "step": 30845 + }, + { + "epoch": 0.9056902930295379, + "grad_norm": 0.0, + "learning_rate": 4.628781535982896e-07, + "loss": 1.2461, + "step": 30846 + }, + { + "epoch": 0.9057196547066768, + "grad_norm": 0.0, + "learning_rate": 4.625922230514568e-07, + "loss": 1.2139, + "step": 30847 + }, + { + "epoch": 0.9057490163838159, + "grad_norm": 0.0, + "learning_rate": 4.623063787538462e-07, + "loss": 1.1567, + "step": 30848 + }, + { + "epoch": 0.9057783780609548, + "grad_norm": 0.0, + "learning_rate": 4.6202062070804133e-07, + "loss": 1.2769, + "step": 30849 + }, + { + "epoch": 0.9058077397380938, + "grad_norm": 0.0, + "learning_rate": 4.6173494891662565e-07, + "loss": 1.2861, + "step": 30850 + }, + { + "epoch": 0.9058371014152329, + "grad_norm": 0.0, + "learning_rate": 4.6144936338218705e-07, + "loss": 1.2998, + "step": 30851 + }, + { + "epoch": 0.9058664630923718, + "grad_norm": 0.0, + "learning_rate": 4.611638641073013e-07, + "loss": 1.2334, + "step": 30852 + }, + { + "epoch": 0.9058958247695108, + "grad_norm": 0.0, + "learning_rate": 4.608784510945552e-07, + "loss": 1.2056, + "step": 30853 + }, + { + "epoch": 0.9059251864466499, + "grad_norm": 0.0, + "learning_rate": 4.605931243465267e-07, + "loss": 1.123, + "step": 30854 + }, + { + "epoch": 0.9059545481237888, + "grad_norm": 0.0, + "learning_rate": 4.603078838657993e-07, + "loss": 1.3027, + "step": 30855 + }, + { + "epoch": 0.9059839098009278, + "grad_norm": 0.0, + "learning_rate": 4.6002272965494865e-07, + "loss": 1.1587, + "step": 30856 + }, + { + "epoch": 0.9060132714780669, + "grad_norm": 0.0, + "learning_rate": 4.597376617165583e-07, + "loss": 1.2266, + "step": 30857 + }, + { + "epoch": 0.9060426331552058, + "grad_norm": 0.0, + "learning_rate": 4.5945268005320286e-07, + "loss": 1.1802, + "step": 30858 + }, + { + "epoch": 0.9060719948323448, + "grad_norm": 0.0, + "learning_rate": 4.591677846674603e-07, + "loss": 1.1616, + "step": 30859 + }, + { + "epoch": 0.9061013565094839, + "grad_norm": 0.0, + "learning_rate": 4.588829755619073e-07, + "loss": 1.3115, + "step": 30860 + }, + { + "epoch": 0.9061307181866228, + "grad_norm": 0.0, + "learning_rate": 4.585982527391164e-07, + "loss": 1.2793, + "step": 30861 + }, + { + "epoch": 0.9061600798637618, + "grad_norm": 0.0, + "learning_rate": 4.583136162016677e-07, + "loss": 0.9692, + "step": 30862 + }, + { + "epoch": 0.9061894415409009, + "grad_norm": 0.0, + "learning_rate": 4.580290659521314e-07, + "loss": 1.248, + "step": 30863 + }, + { + "epoch": 0.9062188032180398, + "grad_norm": 0.0, + "learning_rate": 4.577446019930831e-07, + "loss": 1.1748, + "step": 30864 + }, + { + "epoch": 0.9062481648951788, + "grad_norm": 0.0, + "learning_rate": 4.5746022432709203e-07, + "loss": 1.1753, + "step": 30865 + }, + { + "epoch": 0.9062775265723179, + "grad_norm": 0.0, + "learning_rate": 4.571759329567338e-07, + "loss": 1.1523, + "step": 30866 + }, + { + "epoch": 0.9063068882494568, + "grad_norm": 0.0, + "learning_rate": 4.5689172788457856e-07, + "loss": 1.1777, + "step": 30867 + }, + { + "epoch": 0.9063362499265958, + "grad_norm": 0.0, + "learning_rate": 4.566076091131943e-07, + "loss": 1.1934, + "step": 30868 + }, + { + "epoch": 0.9063656116037349, + "grad_norm": 0.0, + "learning_rate": 4.563235766451513e-07, + "loss": 1.2388, + "step": 30869 + }, + { + "epoch": 0.9063949732808738, + "grad_norm": 0.0, + "learning_rate": 4.5603963048301835e-07, + "loss": 1.2842, + "step": 30870 + }, + { + "epoch": 0.9064243349580128, + "grad_norm": 0.0, + "learning_rate": 4.557557706293636e-07, + "loss": 1.252, + "step": 30871 + }, + { + "epoch": 0.9064536966351519, + "grad_norm": 0.0, + "learning_rate": 4.554719970867527e-07, + "loss": 1.3359, + "step": 30872 + }, + { + "epoch": 0.9064830583122908, + "grad_norm": 0.0, + "learning_rate": 4.5518830985775363e-07, + "loss": 1.3223, + "step": 30873 + }, + { + "epoch": 0.9065124199894298, + "grad_norm": 0.0, + "learning_rate": 4.54904708944931e-07, + "loss": 1.2681, + "step": 30874 + }, + { + "epoch": 0.9065417816665688, + "grad_norm": 0.0, + "learning_rate": 4.546211943508505e-07, + "loss": 1.208, + "step": 30875 + }, + { + "epoch": 0.9065711433437078, + "grad_norm": 0.0, + "learning_rate": 4.5433776607807455e-07, + "loss": 1.1982, + "step": 30876 + }, + { + "epoch": 0.9066005050208468, + "grad_norm": 0.0, + "learning_rate": 4.540544241291678e-07, + "loss": 1.3496, + "step": 30877 + }, + { + "epoch": 0.9066298666979858, + "grad_norm": 0.0, + "learning_rate": 4.5377116850668925e-07, + "loss": 1.1865, + "step": 30878 + }, + { + "epoch": 0.9066592283751248, + "grad_norm": 0.0, + "learning_rate": 4.534879992132046e-07, + "loss": 1.084, + "step": 30879 + }, + { + "epoch": 0.9066885900522638, + "grad_norm": 0.0, + "learning_rate": 4.5320491625127307e-07, + "loss": 1.0962, + "step": 30880 + }, + { + "epoch": 0.9067179517294028, + "grad_norm": 0.0, + "learning_rate": 4.5292191962345355e-07, + "loss": 1.1685, + "step": 30881 + }, + { + "epoch": 0.9067473134065418, + "grad_norm": 0.0, + "learning_rate": 4.526390093323063e-07, + "loss": 1.2075, + "step": 30882 + }, + { + "epoch": 0.9067766750836808, + "grad_norm": 0.0, + "learning_rate": 4.523561853803904e-07, + "loss": 1.2021, + "step": 30883 + }, + { + "epoch": 0.9068060367608197, + "grad_norm": 0.0, + "learning_rate": 4.5207344777026265e-07, + "loss": 1.1411, + "step": 30884 + }, + { + "epoch": 0.9068353984379588, + "grad_norm": 0.0, + "learning_rate": 4.5179079650447877e-07, + "loss": 1.23, + "step": 30885 + }, + { + "epoch": 0.9068647601150978, + "grad_norm": 0.0, + "learning_rate": 4.5150823158559786e-07, + "loss": 1.2305, + "step": 30886 + }, + { + "epoch": 0.9068941217922367, + "grad_norm": 0.0, + "learning_rate": 4.5122575301617123e-07, + "loss": 1.1855, + "step": 30887 + }, + { + "epoch": 0.9069234834693758, + "grad_norm": 0.0, + "learning_rate": 4.509433607987568e-07, + "loss": 1.209, + "step": 30888 + }, + { + "epoch": 0.9069528451465148, + "grad_norm": 0.0, + "learning_rate": 4.5066105493590804e-07, + "loss": 1.2715, + "step": 30889 + }, + { + "epoch": 0.9069822068236537, + "grad_norm": 0.0, + "learning_rate": 4.5037883543017415e-07, + "loss": 1.1304, + "step": 30890 + }, + { + "epoch": 0.9070115685007928, + "grad_norm": 0.0, + "learning_rate": 4.5009670228411294e-07, + "loss": 1.123, + "step": 30891 + }, + { + "epoch": 0.9070409301779317, + "grad_norm": 0.0, + "learning_rate": 4.4981465550027246e-07, + "loss": 1.1904, + "step": 30892 + }, + { + "epoch": 0.9070702918550707, + "grad_norm": 0.0, + "learning_rate": 4.49532695081204e-07, + "loss": 1.2158, + "step": 30893 + }, + { + "epoch": 0.9070996535322098, + "grad_norm": 0.0, + "learning_rate": 4.492508210294555e-07, + "loss": 1.1562, + "step": 30894 + }, + { + "epoch": 0.9071290152093487, + "grad_norm": 0.0, + "learning_rate": 4.4896903334757935e-07, + "loss": 1.1436, + "step": 30895 + }, + { + "epoch": 0.9071583768864877, + "grad_norm": 0.0, + "learning_rate": 4.4868733203812134e-07, + "loss": 1.0864, + "step": 30896 + }, + { + "epoch": 0.9071877385636268, + "grad_norm": 0.0, + "learning_rate": 4.484057171036316e-07, + "loss": 1.2891, + "step": 30897 + }, + { + "epoch": 0.9072171002407657, + "grad_norm": 0.0, + "learning_rate": 4.4812418854665253e-07, + "loss": 1.1377, + "step": 30898 + }, + { + "epoch": 0.9072464619179047, + "grad_norm": 0.0, + "learning_rate": 4.4784274636973436e-07, + "loss": 1.2139, + "step": 30899 + }, + { + "epoch": 0.9072758235950438, + "grad_norm": 0.0, + "learning_rate": 4.475613905754217e-07, + "loss": 1.3652, + "step": 30900 + }, + { + "epoch": 0.9073051852721827, + "grad_norm": 0.0, + "learning_rate": 4.47280121166257e-07, + "loss": 1.2666, + "step": 30901 + }, + { + "epoch": 0.9073345469493217, + "grad_norm": 0.0, + "learning_rate": 4.469989381447848e-07, + "loss": 1.1089, + "step": 30902 + }, + { + "epoch": 0.9073639086264608, + "grad_norm": 0.0, + "learning_rate": 4.4671784151354646e-07, + "loss": 1.1167, + "step": 30903 + }, + { + "epoch": 0.9073932703035997, + "grad_norm": 0.0, + "learning_rate": 4.464368312750855e-07, + "loss": 1.2183, + "step": 30904 + }, + { + "epoch": 0.9074226319807387, + "grad_norm": 0.0, + "learning_rate": 4.461559074319433e-07, + "loss": 1.2319, + "step": 30905 + }, + { + "epoch": 0.9074519936578778, + "grad_norm": 0.0, + "learning_rate": 4.4587506998666095e-07, + "loss": 1.1064, + "step": 30906 + }, + { + "epoch": 0.9074813553350167, + "grad_norm": 0.0, + "learning_rate": 4.455943189417766e-07, + "loss": 1.127, + "step": 30907 + }, + { + "epoch": 0.9075107170121557, + "grad_norm": 0.0, + "learning_rate": 4.453136542998304e-07, + "loss": 1.1255, + "step": 30908 + }, + { + "epoch": 0.9075400786892948, + "grad_norm": 0.0, + "learning_rate": 4.450330760633592e-07, + "loss": 1.3125, + "step": 30909 + }, + { + "epoch": 0.9075694403664337, + "grad_norm": 0.0, + "learning_rate": 4.447525842348999e-07, + "loss": 1.1045, + "step": 30910 + }, + { + "epoch": 0.9075988020435727, + "grad_norm": 0.0, + "learning_rate": 4.4447217881699146e-07, + "loss": 1.1719, + "step": 30911 + }, + { + "epoch": 0.9076281637207118, + "grad_norm": 0.0, + "learning_rate": 4.441918598121675e-07, + "loss": 1.2441, + "step": 30912 + }, + { + "epoch": 0.9076575253978507, + "grad_norm": 0.0, + "learning_rate": 4.439116272229638e-07, + "loss": 1.2529, + "step": 30913 + }, + { + "epoch": 0.9076868870749897, + "grad_norm": 0.0, + "learning_rate": 4.436314810519149e-07, + "loss": 1.3467, + "step": 30914 + }, + { + "epoch": 0.9077162487521288, + "grad_norm": 0.0, + "learning_rate": 4.433514213015544e-07, + "loss": 1.1948, + "step": 30915 + }, + { + "epoch": 0.9077456104292677, + "grad_norm": 0.0, + "learning_rate": 4.430714479744147e-07, + "loss": 1.1934, + "step": 30916 + }, + { + "epoch": 0.9077749721064067, + "grad_norm": 0.0, + "learning_rate": 4.427915610730271e-07, + "loss": 1.1846, + "step": 30917 + }, + { + "epoch": 0.9078043337835457, + "grad_norm": 0.0, + "learning_rate": 4.4251176059992293e-07, + "loss": 1.1187, + "step": 30918 + }, + { + "epoch": 0.9078336954606847, + "grad_norm": 0.0, + "learning_rate": 4.422320465576324e-07, + "loss": 1.1885, + "step": 30919 + }, + { + "epoch": 0.9078630571378237, + "grad_norm": 0.0, + "learning_rate": 4.419524189486857e-07, + "loss": 1.1582, + "step": 30920 + }, + { + "epoch": 0.9078924188149627, + "grad_norm": 0.0, + "learning_rate": 4.416728777756096e-07, + "loss": 1.0332, + "step": 30921 + }, + { + "epoch": 0.9079217804921017, + "grad_norm": 0.0, + "learning_rate": 4.4139342304093555e-07, + "loss": 1.1797, + "step": 30922 + }, + { + "epoch": 0.9079511421692407, + "grad_norm": 0.0, + "learning_rate": 4.4111405474718703e-07, + "loss": 1.1313, + "step": 30923 + }, + { + "epoch": 0.9079805038463797, + "grad_norm": 0.0, + "learning_rate": 4.4083477289689313e-07, + "loss": 1.1455, + "step": 30924 + }, + { + "epoch": 0.9080098655235187, + "grad_norm": 0.0, + "learning_rate": 4.405555774925796e-07, + "loss": 1.1436, + "step": 30925 + }, + { + "epoch": 0.9080392272006577, + "grad_norm": 0.0, + "learning_rate": 4.402764685367689e-07, + "loss": 1.2612, + "step": 30926 + }, + { + "epoch": 0.9080685888777967, + "grad_norm": 0.0, + "learning_rate": 4.399974460319867e-07, + "loss": 1.2275, + "step": 30927 + }, + { + "epoch": 0.9080979505549357, + "grad_norm": 0.0, + "learning_rate": 4.397185099807555e-07, + "loss": 1.1689, + "step": 30928 + }, + { + "epoch": 0.9081273122320747, + "grad_norm": 0.0, + "learning_rate": 4.3943966038559995e-07, + "loss": 1.0093, + "step": 30929 + }, + { + "epoch": 0.9081566739092137, + "grad_norm": 0.0, + "learning_rate": 4.391608972490369e-07, + "loss": 1.3311, + "step": 30930 + }, + { + "epoch": 0.9081860355863527, + "grad_norm": 0.0, + "learning_rate": 4.388822205735932e-07, + "loss": 0.9922, + "step": 30931 + }, + { + "epoch": 0.9082153972634917, + "grad_norm": 0.0, + "learning_rate": 4.386036303617858e-07, + "loss": 1.2461, + "step": 30932 + }, + { + "epoch": 0.9082447589406307, + "grad_norm": 0.0, + "learning_rate": 4.383251266161348e-07, + "loss": 1.1787, + "step": 30933 + }, + { + "epoch": 0.9082741206177697, + "grad_norm": 0.0, + "learning_rate": 4.380467093391561e-07, + "loss": 1.2686, + "step": 30934 + }, + { + "epoch": 0.9083034822949086, + "grad_norm": 0.0, + "learning_rate": 4.3776837853337196e-07, + "loss": 1.2656, + "step": 30935 + }, + { + "epoch": 0.9083328439720477, + "grad_norm": 0.0, + "learning_rate": 4.3749013420129605e-07, + "loss": 1.0947, + "step": 30936 + }, + { + "epoch": 0.9083622056491867, + "grad_norm": 0.0, + "learning_rate": 4.372119763454463e-07, + "loss": 1.1235, + "step": 30937 + }, + { + "epoch": 0.9083915673263256, + "grad_norm": 0.0, + "learning_rate": 4.3693390496833856e-07, + "loss": 1.1523, + "step": 30938 + }, + { + "epoch": 0.9084209290034647, + "grad_norm": 0.0, + "learning_rate": 4.366559200724851e-07, + "loss": 1.1602, + "step": 30939 + }, + { + "epoch": 0.9084502906806037, + "grad_norm": 0.0, + "learning_rate": 4.36378021660403e-07, + "loss": 1.147, + "step": 30940 + }, + { + "epoch": 0.9084796523577426, + "grad_norm": 0.0, + "learning_rate": 4.361002097346034e-07, + "loss": 1.2695, + "step": 30941 + }, + { + "epoch": 0.9085090140348817, + "grad_norm": 0.0, + "learning_rate": 4.358224842975989e-07, + "loss": 1.1592, + "step": 30942 + }, + { + "epoch": 0.9085383757120207, + "grad_norm": 0.0, + "learning_rate": 4.355448453519007e-07, + "loss": 1.3242, + "step": 30943 + }, + { + "epoch": 0.9085677373891596, + "grad_norm": 0.0, + "learning_rate": 4.3526729290002013e-07, + "loss": 1.1621, + "step": 30944 + }, + { + "epoch": 0.9085970990662987, + "grad_norm": 0.0, + "learning_rate": 4.3498982694446636e-07, + "loss": 1.25, + "step": 30945 + }, + { + "epoch": 0.9086264607434377, + "grad_norm": 0.0, + "learning_rate": 4.3471244748774956e-07, + "loss": 1.2471, + "step": 30946 + }, + { + "epoch": 0.9086558224205766, + "grad_norm": 0.0, + "learning_rate": 4.3443515453237775e-07, + "loss": 1.2388, + "step": 30947 + }, + { + "epoch": 0.9086851840977157, + "grad_norm": 0.0, + "learning_rate": 4.3415794808085886e-07, + "loss": 1.1709, + "step": 30948 + }, + { + "epoch": 0.9087145457748547, + "grad_norm": 0.0, + "learning_rate": 4.3388082813569987e-07, + "loss": 1.1924, + "step": 30949 + }, + { + "epoch": 0.9087439074519936, + "grad_norm": 0.0, + "learning_rate": 4.336037946994065e-07, + "loss": 1.2256, + "step": 30950 + }, + { + "epoch": 0.9087732691291327, + "grad_norm": 0.0, + "learning_rate": 4.3332684777448455e-07, + "loss": 1.2202, + "step": 30951 + }, + { + "epoch": 0.9088026308062717, + "grad_norm": 0.0, + "learning_rate": 4.330499873634353e-07, + "loss": 1.2773, + "step": 30952 + }, + { + "epoch": 0.9088319924834106, + "grad_norm": 0.0, + "learning_rate": 4.3277321346876787e-07, + "loss": 1.1362, + "step": 30953 + }, + { + "epoch": 0.9088613541605497, + "grad_norm": 0.0, + "learning_rate": 4.3249652609298033e-07, + "loss": 1.2197, + "step": 30954 + }, + { + "epoch": 0.9088907158376887, + "grad_norm": 0.0, + "learning_rate": 4.3221992523857835e-07, + "loss": 1.2881, + "step": 30955 + }, + { + "epoch": 0.9089200775148276, + "grad_norm": 0.0, + "learning_rate": 4.319434109080611e-07, + "loss": 1.146, + "step": 30956 + }, + { + "epoch": 0.9089494391919667, + "grad_norm": 0.0, + "learning_rate": 4.316669831039322e-07, + "loss": 1.2358, + "step": 30957 + }, + { + "epoch": 0.9089788008691057, + "grad_norm": 0.0, + "learning_rate": 4.313906418286884e-07, + "loss": 1.2285, + "step": 30958 + }, + { + "epoch": 0.9090081625462446, + "grad_norm": 0.0, + "learning_rate": 4.311143870848278e-07, + "loss": 1.1162, + "step": 30959 + }, + { + "epoch": 0.9090375242233837, + "grad_norm": 0.0, + "learning_rate": 4.308382188748517e-07, + "loss": 1.1479, + "step": 30960 + }, + { + "epoch": 0.9090668859005226, + "grad_norm": 0.0, + "learning_rate": 4.3056213720125586e-07, + "loss": 1.1562, + "step": 30961 + }, + { + "epoch": 0.9090962475776616, + "grad_norm": 0.0, + "learning_rate": 4.302861420665383e-07, + "loss": 1.2314, + "step": 30962 + }, + { + "epoch": 0.9091256092548007, + "grad_norm": 0.0, + "learning_rate": 4.300102334731915e-07, + "loss": 1.2246, + "step": 30963 + }, + { + "epoch": 0.9091549709319396, + "grad_norm": 0.0, + "learning_rate": 4.297344114237156e-07, + "loss": 1.2354, + "step": 30964 + }, + { + "epoch": 0.9091843326090786, + "grad_norm": 0.0, + "learning_rate": 4.294586759206021e-07, + "loss": 1.2197, + "step": 30965 + }, + { + "epoch": 0.9092136942862177, + "grad_norm": 0.0, + "learning_rate": 4.2918302696634553e-07, + "loss": 1.2129, + "step": 30966 + }, + { + "epoch": 0.9092430559633566, + "grad_norm": 0.0, + "learning_rate": 4.28907464563435e-07, + "loss": 1.1865, + "step": 30967 + }, + { + "epoch": 0.9092724176404956, + "grad_norm": 0.0, + "learning_rate": 4.2863198871436864e-07, + "loss": 1.1572, + "step": 30968 + }, + { + "epoch": 0.9093017793176347, + "grad_norm": 0.0, + "learning_rate": 4.283565994216332e-07, + "loss": 1.2129, + "step": 30969 + }, + { + "epoch": 0.9093311409947736, + "grad_norm": 0.0, + "learning_rate": 4.28081296687719e-07, + "loss": 1.2383, + "step": 30970 + }, + { + "epoch": 0.9093605026719126, + "grad_norm": 0.0, + "learning_rate": 4.2780608051511963e-07, + "loss": 1.2603, + "step": 30971 + }, + { + "epoch": 0.9093898643490517, + "grad_norm": 0.0, + "learning_rate": 4.2753095090631855e-07, + "loss": 1.2124, + "step": 30972 + }, + { + "epoch": 0.9094192260261906, + "grad_norm": 0.0, + "learning_rate": 4.2725590786380833e-07, + "loss": 1.2939, + "step": 30973 + }, + { + "epoch": 0.9094485877033296, + "grad_norm": 0.0, + "learning_rate": 4.2698095139007576e-07, + "loss": 1.1313, + "step": 30974 + }, + { + "epoch": 0.9094779493804687, + "grad_norm": 0.0, + "learning_rate": 4.267060814876045e-07, + "loss": 1.2695, + "step": 30975 + }, + { + "epoch": 0.9095073110576076, + "grad_norm": 0.0, + "learning_rate": 4.264312981588814e-07, + "loss": 1.2085, + "step": 30976 + }, + { + "epoch": 0.9095366727347466, + "grad_norm": 0.0, + "learning_rate": 4.261566014063934e-07, + "loss": 1.2344, + "step": 30977 + }, + { + "epoch": 0.9095660344118857, + "grad_norm": 0.0, + "learning_rate": 4.2588199123262285e-07, + "loss": 1.1333, + "step": 30978 + }, + { + "epoch": 0.9095953960890246, + "grad_norm": 0.0, + "learning_rate": 4.2560746764005233e-07, + "loss": 1.1895, + "step": 30979 + }, + { + "epoch": 0.9096247577661636, + "grad_norm": 0.0, + "learning_rate": 4.253330306311665e-07, + "loss": 1.1138, + "step": 30980 + }, + { + "epoch": 0.9096541194433027, + "grad_norm": 0.0, + "learning_rate": 4.2505868020844554e-07, + "loss": 1.0386, + "step": 30981 + }, + { + "epoch": 0.9096834811204416, + "grad_norm": 0.0, + "learning_rate": 4.2478441637437194e-07, + "loss": 1.189, + "step": 30982 + }, + { + "epoch": 0.9097128427975806, + "grad_norm": 0.0, + "learning_rate": 4.2451023913142373e-07, + "loss": 1.064, + "step": 30983 + }, + { + "epoch": 0.9097422044747197, + "grad_norm": 0.0, + "learning_rate": 4.242361484820834e-07, + "loss": 1.1484, + "step": 30984 + }, + { + "epoch": 0.9097715661518586, + "grad_norm": 0.0, + "learning_rate": 4.2396214442882667e-07, + "loss": 1.1221, + "step": 30985 + }, + { + "epoch": 0.9098009278289976, + "grad_norm": 0.0, + "learning_rate": 4.236882269741327e-07, + "loss": 1.248, + "step": 30986 + }, + { + "epoch": 0.9098302895061365, + "grad_norm": 0.0, + "learning_rate": 4.234143961204795e-07, + "loss": 1.2314, + "step": 30987 + }, + { + "epoch": 0.9098596511832756, + "grad_norm": 0.0, + "learning_rate": 4.231406518703418e-07, + "loss": 1.002, + "step": 30988 + }, + { + "epoch": 0.9098890128604146, + "grad_norm": 0.0, + "learning_rate": 4.2286699422619647e-07, + "loss": 1.0698, + "step": 30989 + }, + { + "epoch": 0.9099183745375535, + "grad_norm": 0.0, + "learning_rate": 4.225934231905182e-07, + "loss": 1.2285, + "step": 30990 + }, + { + "epoch": 0.9099477362146926, + "grad_norm": 0.0, + "learning_rate": 4.223199387657795e-07, + "loss": 1.1958, + "step": 30991 + }, + { + "epoch": 0.9099770978918316, + "grad_norm": 0.0, + "learning_rate": 4.2204654095445387e-07, + "loss": 1.2715, + "step": 30992 + }, + { + "epoch": 0.9100064595689705, + "grad_norm": 0.0, + "learning_rate": 4.21773229759016e-07, + "loss": 1.3164, + "step": 30993 + }, + { + "epoch": 0.9100358212461096, + "grad_norm": 0.0, + "learning_rate": 4.21500005181934e-07, + "loss": 1.0425, + "step": 30994 + }, + { + "epoch": 0.9100651829232486, + "grad_norm": 0.0, + "learning_rate": 4.212268672256814e-07, + "loss": 1.2393, + "step": 30995 + }, + { + "epoch": 0.9100945446003875, + "grad_norm": 0.0, + "learning_rate": 4.2095381589272735e-07, + "loss": 1.2241, + "step": 30996 + }, + { + "epoch": 0.9101239062775266, + "grad_norm": 0.0, + "learning_rate": 4.206808511855409e-07, + "loss": 1.2139, + "step": 30997 + }, + { + "epoch": 0.9101532679546656, + "grad_norm": 0.0, + "learning_rate": 4.204079731065913e-07, + "loss": 1.1611, + "step": 30998 + }, + { + "epoch": 0.9101826296318045, + "grad_norm": 0.0, + "learning_rate": 4.2013518165834656e-07, + "loss": 1.2134, + "step": 30999 + }, + { + "epoch": 0.9102119913089436, + "grad_norm": 0.0, + "learning_rate": 4.198624768432724e-07, + "loss": 1.1113, + "step": 31000 + }, + { + "epoch": 0.9102413529860826, + "grad_norm": 0.0, + "learning_rate": 4.195898586638336e-07, + "loss": 1.3193, + "step": 31001 + }, + { + "epoch": 0.9102707146632215, + "grad_norm": 0.0, + "learning_rate": 4.193173271224993e-07, + "loss": 1.2119, + "step": 31002 + }, + { + "epoch": 0.9103000763403606, + "grad_norm": 0.0, + "learning_rate": 4.190448822217308e-07, + "loss": 1.1997, + "step": 31003 + }, + { + "epoch": 0.9103294380174995, + "grad_norm": 0.0, + "learning_rate": 4.1877252396399393e-07, + "loss": 1.3389, + "step": 31004 + }, + { + "epoch": 0.9103587996946385, + "grad_norm": 0.0, + "learning_rate": 4.185002523517501e-07, + "loss": 1.292, + "step": 31005 + }, + { + "epoch": 0.9103881613717776, + "grad_norm": 0.0, + "learning_rate": 4.1822806738746393e-07, + "loss": 1.2041, + "step": 31006 + }, + { + "epoch": 0.9104175230489165, + "grad_norm": 0.0, + "learning_rate": 4.1795596907359457e-07, + "loss": 1.1724, + "step": 31007 + }, + { + "epoch": 0.9104468847260555, + "grad_norm": 0.0, + "learning_rate": 4.176839574126024e-07, + "loss": 1.2529, + "step": 31008 + }, + { + "epoch": 0.9104762464031946, + "grad_norm": 0.0, + "learning_rate": 4.174120324069497e-07, + "loss": 1.1729, + "step": 31009 + }, + { + "epoch": 0.9105056080803335, + "grad_norm": 0.0, + "learning_rate": 4.1714019405909246e-07, + "loss": 1.1514, + "step": 31010 + }, + { + "epoch": 0.9105349697574725, + "grad_norm": 0.0, + "learning_rate": 4.1686844237149193e-07, + "loss": 1.3008, + "step": 31011 + }, + { + "epoch": 0.9105643314346116, + "grad_norm": 0.0, + "learning_rate": 4.165967773466029e-07, + "loss": 1.2788, + "step": 31012 + }, + { + "epoch": 0.9105936931117505, + "grad_norm": 0.0, + "learning_rate": 4.163251989868855e-07, + "loss": 1.2148, + "step": 31013 + }, + { + "epoch": 0.9106230547888895, + "grad_norm": 0.0, + "learning_rate": 4.160537072947923e-07, + "loss": 1.1953, + "step": 31014 + }, + { + "epoch": 0.9106524164660286, + "grad_norm": 0.0, + "learning_rate": 4.1578230227278137e-07, + "loss": 1.0879, + "step": 31015 + }, + { + "epoch": 0.9106817781431675, + "grad_norm": 0.0, + "learning_rate": 4.1551098392330404e-07, + "loss": 1.1924, + "step": 31016 + }, + { + "epoch": 0.9107111398203065, + "grad_norm": 0.0, + "learning_rate": 4.152397522488161e-07, + "loss": 1.2266, + "step": 31017 + }, + { + "epoch": 0.9107405014974456, + "grad_norm": 0.0, + "learning_rate": 4.149686072517711e-07, + "loss": 1.1411, + "step": 31018 + }, + { + "epoch": 0.9107698631745845, + "grad_norm": 0.0, + "learning_rate": 4.1469754893461724e-07, + "loss": 1.3975, + "step": 31019 + }, + { + "epoch": 0.9107992248517235, + "grad_norm": 0.0, + "learning_rate": 4.144265772998102e-07, + "loss": 1.1782, + "step": 31020 + }, + { + "epoch": 0.9108285865288626, + "grad_norm": 0.0, + "learning_rate": 4.141556923497969e-07, + "loss": 1.2207, + "step": 31021 + }, + { + "epoch": 0.9108579482060015, + "grad_norm": 0.0, + "learning_rate": 4.138848940870299e-07, + "loss": 1.1626, + "step": 31022 + }, + { + "epoch": 0.9108873098831405, + "grad_norm": 0.0, + "learning_rate": 4.1361418251395724e-07, + "loss": 1.1436, + "step": 31023 + }, + { + "epoch": 0.9109166715602796, + "grad_norm": 0.0, + "learning_rate": 4.133435576330258e-07, + "loss": 1.2031, + "step": 31024 + }, + { + "epoch": 0.9109460332374185, + "grad_norm": 0.0, + "learning_rate": 4.1307301944668364e-07, + "loss": 1.1392, + "step": 31025 + }, + { + "epoch": 0.9109753949145575, + "grad_norm": 0.0, + "learning_rate": 4.128025679573777e-07, + "loss": 1.125, + "step": 31026 + }, + { + "epoch": 0.9110047565916966, + "grad_norm": 0.0, + "learning_rate": 4.125322031675538e-07, + "loss": 1.3394, + "step": 31027 + }, + { + "epoch": 0.9110341182688355, + "grad_norm": 0.0, + "learning_rate": 4.1226192507965557e-07, + "loss": 1.252, + "step": 31028 + }, + { + "epoch": 0.9110634799459745, + "grad_norm": 0.0, + "learning_rate": 4.1199173369612986e-07, + "loss": 1.2393, + "step": 31029 + }, + { + "epoch": 0.9110928416231135, + "grad_norm": 0.0, + "learning_rate": 4.117216290194159e-07, + "loss": 0.9697, + "step": 31030 + }, + { + "epoch": 0.9111222033002525, + "grad_norm": 0.0, + "learning_rate": 4.1145161105196284e-07, + "loss": 1.1431, + "step": 31031 + }, + { + "epoch": 0.9111515649773915, + "grad_norm": 0.0, + "learning_rate": 4.1118167979620426e-07, + "loss": 1.2852, + "step": 31032 + }, + { + "epoch": 0.9111809266545305, + "grad_norm": 0.0, + "learning_rate": 4.109118352545882e-07, + "loss": 1.0322, + "step": 31033 + }, + { + "epoch": 0.9112102883316695, + "grad_norm": 0.0, + "learning_rate": 4.106420774295505e-07, + "loss": 1.1284, + "step": 31034 + }, + { + "epoch": 0.9112396500088085, + "grad_norm": 0.0, + "learning_rate": 4.103724063235337e-07, + "loss": 1.0142, + "step": 31035 + }, + { + "epoch": 0.9112690116859475, + "grad_norm": 0.0, + "learning_rate": 4.101028219389747e-07, + "loss": 1.0908, + "step": 31036 + }, + { + "epoch": 0.9112983733630865, + "grad_norm": 0.0, + "learning_rate": 4.0983332427831147e-07, + "loss": 1.2798, + "step": 31037 + }, + { + "epoch": 0.9113277350402255, + "grad_norm": 0.0, + "learning_rate": 4.095639133439822e-07, + "loss": 1.2725, + "step": 31038 + }, + { + "epoch": 0.9113570967173645, + "grad_norm": 0.0, + "learning_rate": 4.0929458913842255e-07, + "loss": 1.2749, + "step": 31039 + }, + { + "epoch": 0.9113864583945035, + "grad_norm": 0.0, + "learning_rate": 4.090253516640685e-07, + "loss": 1.2231, + "step": 31040 + }, + { + "epoch": 0.9114158200716425, + "grad_norm": 0.0, + "learning_rate": 4.0875620092335257e-07, + "loss": 1.2686, + "step": 31041 + }, + { + "epoch": 0.9114451817487815, + "grad_norm": 0.0, + "learning_rate": 4.0848713691871265e-07, + "loss": 1.2021, + "step": 31042 + }, + { + "epoch": 0.9114745434259205, + "grad_norm": 0.0, + "learning_rate": 4.0821815965257915e-07, + "loss": 1.1758, + "step": 31043 + }, + { + "epoch": 0.9115039051030595, + "grad_norm": 0.0, + "learning_rate": 4.0794926912738565e-07, + "loss": 1.2266, + "step": 31044 + }, + { + "epoch": 0.9115332667801985, + "grad_norm": 0.0, + "learning_rate": 4.076804653455624e-07, + "loss": 1.25, + "step": 31045 + }, + { + "epoch": 0.9115626284573375, + "grad_norm": 0.0, + "learning_rate": 4.07411748309543e-07, + "loss": 1.2344, + "step": 31046 + }, + { + "epoch": 0.9115919901344764, + "grad_norm": 0.0, + "learning_rate": 4.071431180217555e-07, + "loss": 1.1289, + "step": 31047 + }, + { + "epoch": 0.9116213518116155, + "grad_norm": 0.0, + "learning_rate": 4.0687457448462917e-07, + "loss": 1.1895, + "step": 31048 + }, + { + "epoch": 0.9116507134887545, + "grad_norm": 0.0, + "learning_rate": 4.066061177005931e-07, + "loss": 1.2617, + "step": 31049 + }, + { + "epoch": 0.9116800751658934, + "grad_norm": 0.0, + "learning_rate": 4.063377476720742e-07, + "loss": 1.1851, + "step": 31050 + }, + { + "epoch": 0.9117094368430325, + "grad_norm": 0.0, + "learning_rate": 4.0606946440150066e-07, + "loss": 1.2129, + "step": 31051 + }, + { + "epoch": 0.9117387985201715, + "grad_norm": 0.0, + "learning_rate": 4.05801267891297e-07, + "loss": 1.1113, + "step": 31052 + }, + { + "epoch": 0.9117681601973104, + "grad_norm": 0.0, + "learning_rate": 4.055331581438904e-07, + "loss": 1.3076, + "step": 31053 + }, + { + "epoch": 0.9117975218744495, + "grad_norm": 0.0, + "learning_rate": 4.052651351617043e-07, + "loss": 1.2705, + "step": 31054 + }, + { + "epoch": 0.9118268835515885, + "grad_norm": 0.0, + "learning_rate": 4.049971989471635e-07, + "loss": 1.2832, + "step": 31055 + }, + { + "epoch": 0.9118562452287274, + "grad_norm": 0.0, + "learning_rate": 4.0472934950269163e-07, + "loss": 1.0933, + "step": 31056 + }, + { + "epoch": 0.9118856069058665, + "grad_norm": 0.0, + "learning_rate": 4.044615868307067e-07, + "loss": 1.2744, + "step": 31057 + }, + { + "epoch": 0.9119149685830055, + "grad_norm": 0.0, + "learning_rate": 4.0419391093363457e-07, + "loss": 1.2397, + "step": 31058 + }, + { + "epoch": 0.9119443302601444, + "grad_norm": 0.0, + "learning_rate": 4.0392632181389335e-07, + "loss": 1.1431, + "step": 31059 + }, + { + "epoch": 0.9119736919372835, + "grad_norm": 0.0, + "learning_rate": 4.036588194739055e-07, + "loss": 1.2139, + "step": 31060 + }, + { + "epoch": 0.9120030536144225, + "grad_norm": 0.0, + "learning_rate": 4.033914039160869e-07, + "loss": 1.1919, + "step": 31061 + }, + { + "epoch": 0.9120324152915614, + "grad_norm": 0.0, + "learning_rate": 4.0312407514286e-07, + "loss": 1.2876, + "step": 31062 + }, + { + "epoch": 0.9120617769687005, + "grad_norm": 0.0, + "learning_rate": 4.0285683315663846e-07, + "loss": 1.0645, + "step": 31063 + }, + { + "epoch": 0.9120911386458395, + "grad_norm": 0.0, + "learning_rate": 4.0258967795984037e-07, + "loss": 1.1855, + "step": 31064 + }, + { + "epoch": 0.9121205003229784, + "grad_norm": 0.0, + "learning_rate": 4.0232260955488155e-07, + "loss": 1.1934, + "step": 31065 + }, + { + "epoch": 0.9121498620001175, + "grad_norm": 0.0, + "learning_rate": 4.0205562794417784e-07, + "loss": 1.2441, + "step": 31066 + }, + { + "epoch": 0.9121792236772565, + "grad_norm": 0.0, + "learning_rate": 4.01788733130144e-07, + "loss": 1.1997, + "step": 31067 + }, + { + "epoch": 0.9122085853543954, + "grad_norm": 0.0, + "learning_rate": 4.015219251151903e-07, + "loss": 1.1318, + "step": 31068 + }, + { + "epoch": 0.9122379470315345, + "grad_norm": 0.0, + "learning_rate": 4.0125520390173476e-07, + "loss": 1.1362, + "step": 31069 + }, + { + "epoch": 0.9122673087086735, + "grad_norm": 0.0, + "learning_rate": 4.0098856949218447e-07, + "loss": 1.0605, + "step": 31070 + }, + { + "epoch": 0.9122966703858124, + "grad_norm": 0.0, + "learning_rate": 4.00722021888954e-07, + "loss": 1.2471, + "step": 31071 + }, + { + "epoch": 0.9123260320629515, + "grad_norm": 0.0, + "learning_rate": 4.004555610944527e-07, + "loss": 1.3252, + "step": 31072 + }, + { + "epoch": 0.9123553937400904, + "grad_norm": 0.0, + "learning_rate": 4.001891871110908e-07, + "loss": 1.2676, + "step": 31073 + }, + { + "epoch": 0.9123847554172294, + "grad_norm": 0.0, + "learning_rate": 3.999228999412752e-07, + "loss": 1.2539, + "step": 31074 + }, + { + "epoch": 0.9124141170943685, + "grad_norm": 0.0, + "learning_rate": 3.9965669958741625e-07, + "loss": 1.2212, + "step": 31075 + }, + { + "epoch": 0.9124434787715074, + "grad_norm": 0.0, + "learning_rate": 3.9939058605192094e-07, + "loss": 1.1509, + "step": 31076 + }, + { + "epoch": 0.9124728404486464, + "grad_norm": 0.0, + "learning_rate": 3.99124559337194e-07, + "loss": 1.105, + "step": 31077 + }, + { + "epoch": 0.9125022021257855, + "grad_norm": 0.0, + "learning_rate": 3.988586194456445e-07, + "loss": 1.2002, + "step": 31078 + }, + { + "epoch": 0.9125315638029244, + "grad_norm": 0.0, + "learning_rate": 3.9859276637967294e-07, + "loss": 1.1973, + "step": 31079 + }, + { + "epoch": 0.9125609254800634, + "grad_norm": 0.0, + "learning_rate": 3.9832700014168724e-07, + "loss": 1.2012, + "step": 31080 + }, + { + "epoch": 0.9125902871572025, + "grad_norm": 0.0, + "learning_rate": 3.9806132073409e-07, + "loss": 1.1758, + "step": 31081 + }, + { + "epoch": 0.9126196488343414, + "grad_norm": 0.0, + "learning_rate": 3.977957281592837e-07, + "loss": 1.1538, + "step": 31082 + }, + { + "epoch": 0.9126490105114804, + "grad_norm": 0.0, + "learning_rate": 3.9753022241966755e-07, + "loss": 1.0776, + "step": 31083 + }, + { + "epoch": 0.9126783721886195, + "grad_norm": 0.0, + "learning_rate": 3.972648035176463e-07, + "loss": 1.21, + "step": 31084 + }, + { + "epoch": 0.9127077338657584, + "grad_norm": 0.0, + "learning_rate": 3.96999471455618e-07, + "loss": 1.1572, + "step": 31085 + }, + { + "epoch": 0.9127370955428974, + "grad_norm": 0.0, + "learning_rate": 3.9673422623598413e-07, + "loss": 1.2383, + "step": 31086 + }, + { + "epoch": 0.9127664572200364, + "grad_norm": 0.0, + "learning_rate": 3.964690678611416e-07, + "loss": 1.1733, + "step": 31087 + }, + { + "epoch": 0.9127958188971754, + "grad_norm": 0.0, + "learning_rate": 3.9620399633348847e-07, + "loss": 1.25, + "step": 31088 + }, + { + "epoch": 0.9128251805743144, + "grad_norm": 0.0, + "learning_rate": 3.959390116554229e-07, + "loss": 1.1152, + "step": 31089 + }, + { + "epoch": 0.9128545422514533, + "grad_norm": 0.0, + "learning_rate": 3.9567411382933963e-07, + "loss": 1.248, + "step": 31090 + }, + { + "epoch": 0.9128839039285924, + "grad_norm": 0.0, + "learning_rate": 3.9540930285763555e-07, + "loss": 1.1733, + "step": 31091 + }, + { + "epoch": 0.9129132656057314, + "grad_norm": 0.0, + "learning_rate": 3.951445787427033e-07, + "loss": 1.1372, + "step": 31092 + }, + { + "epoch": 0.9129426272828703, + "grad_norm": 0.0, + "learning_rate": 3.9487994148694087e-07, + "loss": 1.0459, + "step": 31093 + }, + { + "epoch": 0.9129719889600094, + "grad_norm": 0.0, + "learning_rate": 3.946153910927364e-07, + "loss": 1.022, + "step": 31094 + }, + { + "epoch": 0.9130013506371484, + "grad_norm": 0.0, + "learning_rate": 3.9435092756248685e-07, + "loss": 1.1328, + "step": 31095 + }, + { + "epoch": 0.9130307123142873, + "grad_norm": 0.0, + "learning_rate": 3.940865508985825e-07, + "loss": 1.2031, + "step": 31096 + }, + { + "epoch": 0.9130600739914264, + "grad_norm": 0.0, + "learning_rate": 3.9382226110341257e-07, + "loss": 1.1914, + "step": 31097 + }, + { + "epoch": 0.9130894356685654, + "grad_norm": 0.0, + "learning_rate": 3.935580581793685e-07, + "loss": 1.252, + "step": 31098 + }, + { + "epoch": 0.9131187973457043, + "grad_norm": 0.0, + "learning_rate": 3.932939421288384e-07, + "loss": 1.1758, + "step": 31099 + }, + { + "epoch": 0.9131481590228434, + "grad_norm": 0.0, + "learning_rate": 3.9302991295421254e-07, + "loss": 1.2109, + "step": 31100 + }, + { + "epoch": 0.9131775206999824, + "grad_norm": 0.0, + "learning_rate": 3.927659706578757e-07, + "loss": 1.1089, + "step": 31101 + }, + { + "epoch": 0.9132068823771213, + "grad_norm": 0.0, + "learning_rate": 3.925021152422182e-07, + "loss": 1.2085, + "step": 31102 + }, + { + "epoch": 0.9132362440542604, + "grad_norm": 0.0, + "learning_rate": 3.9223834670962356e-07, + "loss": 1.1357, + "step": 31103 + }, + { + "epoch": 0.9132656057313994, + "grad_norm": 0.0, + "learning_rate": 3.9197466506247897e-07, + "loss": 1.2559, + "step": 31104 + }, + { + "epoch": 0.9132949674085383, + "grad_norm": 0.0, + "learning_rate": 3.91711070303169e-07, + "loss": 1.1274, + "step": 31105 + }, + { + "epoch": 0.9133243290856774, + "grad_norm": 0.0, + "learning_rate": 3.9144756243407524e-07, + "loss": 1.2314, + "step": 31106 + }, + { + "epoch": 0.9133536907628164, + "grad_norm": 0.0, + "learning_rate": 3.911841414575834e-07, + "loss": 1.1646, + "step": 31107 + }, + { + "epoch": 0.9133830524399553, + "grad_norm": 0.0, + "learning_rate": 3.9092080737607175e-07, + "loss": 1.2725, + "step": 31108 + }, + { + "epoch": 0.9134124141170944, + "grad_norm": 0.0, + "learning_rate": 3.9065756019192604e-07, + "loss": 1.2695, + "step": 31109 + }, + { + "epoch": 0.9134417757942334, + "grad_norm": 0.0, + "learning_rate": 3.9039439990752324e-07, + "loss": 1.2002, + "step": 31110 + }, + { + "epoch": 0.9134711374713723, + "grad_norm": 0.0, + "learning_rate": 3.901313265252471e-07, + "loss": 1.1929, + "step": 31111 + }, + { + "epoch": 0.9135004991485114, + "grad_norm": 0.0, + "learning_rate": 3.8986834004747344e-07, + "loss": 1.2534, + "step": 31112 + }, + { + "epoch": 0.9135298608256504, + "grad_norm": 0.0, + "learning_rate": 3.8960544047658254e-07, + "loss": 1.2568, + "step": 31113 + }, + { + "epoch": 0.9135592225027893, + "grad_norm": 0.0, + "learning_rate": 3.8934262781494926e-07, + "loss": 1.2715, + "step": 31114 + }, + { + "epoch": 0.9135885841799284, + "grad_norm": 0.0, + "learning_rate": 3.890799020649527e-07, + "loss": 1.0693, + "step": 31115 + }, + { + "epoch": 0.9136179458570673, + "grad_norm": 0.0, + "learning_rate": 3.8881726322896887e-07, + "loss": 1.0894, + "step": 31116 + }, + { + "epoch": 0.9136473075342063, + "grad_norm": 0.0, + "learning_rate": 3.8855471130937017e-07, + "loss": 1.1904, + "step": 31117 + }, + { + "epoch": 0.9136766692113454, + "grad_norm": 0.0, + "learning_rate": 3.882922463085337e-07, + "loss": 1.1377, + "step": 31118 + }, + { + "epoch": 0.9137060308884843, + "grad_norm": 0.0, + "learning_rate": 3.8802986822883194e-07, + "loss": 1.0786, + "step": 31119 + }, + { + "epoch": 0.9137353925656233, + "grad_norm": 0.0, + "learning_rate": 3.8776757707263745e-07, + "loss": 1.1587, + "step": 31120 + }, + { + "epoch": 0.9137647542427624, + "grad_norm": 0.0, + "learning_rate": 3.875053728423239e-07, + "loss": 1.2319, + "step": 31121 + }, + { + "epoch": 0.9137941159199013, + "grad_norm": 0.0, + "learning_rate": 3.8724325554026056e-07, + "loss": 1.3359, + "step": 31122 + }, + { + "epoch": 0.9138234775970403, + "grad_norm": 0.0, + "learning_rate": 3.869812251688165e-07, + "loss": 1.1685, + "step": 31123 + }, + { + "epoch": 0.9138528392741794, + "grad_norm": 0.0, + "learning_rate": 3.8671928173036554e-07, + "loss": 1.1045, + "step": 31124 + }, + { + "epoch": 0.9138822009513183, + "grad_norm": 0.0, + "learning_rate": 3.8645742522727346e-07, + "loss": 1.1343, + "step": 31125 + }, + { + "epoch": 0.9139115626284573, + "grad_norm": 0.0, + "learning_rate": 3.861956556619073e-07, + "loss": 1.2256, + "step": 31126 + }, + { + "epoch": 0.9139409243055964, + "grad_norm": 0.0, + "learning_rate": 3.859339730366374e-07, + "loss": 1.1118, + "step": 31127 + }, + { + "epoch": 0.9139702859827353, + "grad_norm": 0.0, + "learning_rate": 3.8567237735382735e-07, + "loss": 1.3154, + "step": 31128 + }, + { + "epoch": 0.9139996476598743, + "grad_norm": 0.0, + "learning_rate": 3.854108686158464e-07, + "loss": 1.3613, + "step": 31129 + }, + { + "epoch": 0.9140290093370134, + "grad_norm": 0.0, + "learning_rate": 3.851494468250572e-07, + "loss": 1.2119, + "step": 31130 + }, + { + "epoch": 0.9140583710141523, + "grad_norm": 0.0, + "learning_rate": 3.8488811198382324e-07, + "loss": 1.2358, + "step": 31131 + }, + { + "epoch": 0.9140877326912913, + "grad_norm": 0.0, + "learning_rate": 3.846268640945072e-07, + "loss": 1.0503, + "step": 31132 + }, + { + "epoch": 0.9141170943684304, + "grad_norm": 0.0, + "learning_rate": 3.84365703159475e-07, + "loss": 1.2046, + "step": 31133 + }, + { + "epoch": 0.9141464560455693, + "grad_norm": 0.0, + "learning_rate": 3.841046291810846e-07, + "loss": 1.1914, + "step": 31134 + }, + { + "epoch": 0.9141758177227083, + "grad_norm": 0.0, + "learning_rate": 3.8384364216169976e-07, + "loss": 1.166, + "step": 31135 + }, + { + "epoch": 0.9142051793998474, + "grad_norm": 0.0, + "learning_rate": 3.8358274210367974e-07, + "loss": 1.1602, + "step": 31136 + }, + { + "epoch": 0.9142345410769863, + "grad_norm": 0.0, + "learning_rate": 3.833219290093848e-07, + "loss": 1.2305, + "step": 31137 + }, + { + "epoch": 0.9142639027541253, + "grad_norm": 0.0, + "learning_rate": 3.8306120288117087e-07, + "loss": 1.1924, + "step": 31138 + }, + { + "epoch": 0.9142932644312644, + "grad_norm": 0.0, + "learning_rate": 3.828005637213972e-07, + "loss": 1.3008, + "step": 31139 + }, + { + "epoch": 0.9143226261084033, + "grad_norm": 0.0, + "learning_rate": 3.825400115324218e-07, + "loss": 1.2168, + "step": 31140 + }, + { + "epoch": 0.9143519877855423, + "grad_norm": 0.0, + "learning_rate": 3.822795463165996e-07, + "loss": 1.1538, + "step": 31141 + }, + { + "epoch": 0.9143813494626813, + "grad_norm": 0.0, + "learning_rate": 3.8201916807628747e-07, + "loss": 1.1646, + "step": 31142 + }, + { + "epoch": 0.9144107111398203, + "grad_norm": 0.0, + "learning_rate": 3.8175887681383804e-07, + "loss": 1.1655, + "step": 31143 + }, + { + "epoch": 0.9144400728169593, + "grad_norm": 0.0, + "learning_rate": 3.814986725316072e-07, + "loss": 1.1211, + "step": 31144 + }, + { + "epoch": 0.9144694344940983, + "grad_norm": 0.0, + "learning_rate": 3.812385552319475e-07, + "loss": 1.1104, + "step": 31145 + }, + { + "epoch": 0.9144987961712373, + "grad_norm": 0.0, + "learning_rate": 3.8097852491721043e-07, + "loss": 1.1528, + "step": 31146 + }, + { + "epoch": 0.9145281578483763, + "grad_norm": 0.0, + "learning_rate": 3.807185815897485e-07, + "loss": 1.2134, + "step": 31147 + }, + { + "epoch": 0.9145575195255153, + "grad_norm": 0.0, + "learning_rate": 3.80458725251911e-07, + "loss": 1.0776, + "step": 31148 + }, + { + "epoch": 0.9145868812026543, + "grad_norm": 0.0, + "learning_rate": 3.801989559060493e-07, + "loss": 1.2217, + "step": 31149 + }, + { + "epoch": 0.9146162428797933, + "grad_norm": 0.0, + "learning_rate": 3.7993927355451156e-07, + "loss": 1.2476, + "step": 31150 + }, + { + "epoch": 0.9146456045569323, + "grad_norm": 0.0, + "learning_rate": 3.796796781996481e-07, + "loss": 1.1445, + "step": 31151 + }, + { + "epoch": 0.9146749662340713, + "grad_norm": 0.0, + "learning_rate": 3.794201698438038e-07, + "loss": 1.1851, + "step": 31152 + }, + { + "epoch": 0.9147043279112103, + "grad_norm": 0.0, + "learning_rate": 3.7916074848932673e-07, + "loss": 1.2617, + "step": 31153 + }, + { + "epoch": 0.9147336895883493, + "grad_norm": 0.0, + "learning_rate": 3.7890141413856493e-07, + "loss": 1.2573, + "step": 31154 + }, + { + "epoch": 0.9147630512654883, + "grad_norm": 0.0, + "learning_rate": 3.7864216679386e-07, + "loss": 1.0459, + "step": 31155 + }, + { + "epoch": 0.9147924129426273, + "grad_norm": 0.0, + "learning_rate": 3.7838300645755886e-07, + "loss": 1.2432, + "step": 31156 + }, + { + "epoch": 0.9148217746197663, + "grad_norm": 0.0, + "learning_rate": 3.78123933132003e-07, + "loss": 1.1167, + "step": 31157 + }, + { + "epoch": 0.9148511362969053, + "grad_norm": 0.0, + "learning_rate": 3.778649468195372e-07, + "loss": 1.2344, + "step": 31158 + }, + { + "epoch": 0.9148804979740442, + "grad_norm": 0.0, + "learning_rate": 3.77606047522503e-07, + "loss": 1.1597, + "step": 31159 + }, + { + "epoch": 0.9149098596511833, + "grad_norm": 0.0, + "learning_rate": 3.7734723524324166e-07, + "loss": 1.3506, + "step": 31160 + }, + { + "epoch": 0.9149392213283223, + "grad_norm": 0.0, + "learning_rate": 3.7708850998409483e-07, + "loss": 1.229, + "step": 31161 + }, + { + "epoch": 0.9149685830054612, + "grad_norm": 0.0, + "learning_rate": 3.768298717473995e-07, + "loss": 1.168, + "step": 31162 + }, + { + "epoch": 0.9149979446826003, + "grad_norm": 0.0, + "learning_rate": 3.7657132053549594e-07, + "loss": 1.2427, + "step": 31163 + }, + { + "epoch": 0.9150273063597393, + "grad_norm": 0.0, + "learning_rate": 3.7631285635072346e-07, + "loss": 1.1973, + "step": 31164 + }, + { + "epoch": 0.9150566680368782, + "grad_norm": 0.0, + "learning_rate": 3.7605447919541905e-07, + "loss": 1.2275, + "step": 31165 + }, + { + "epoch": 0.9150860297140173, + "grad_norm": 0.0, + "learning_rate": 3.757961890719175e-07, + "loss": 1.2144, + "step": 31166 + }, + { + "epoch": 0.9151153913911563, + "grad_norm": 0.0, + "learning_rate": 3.75537985982557e-07, + "loss": 1.1025, + "step": 31167 + }, + { + "epoch": 0.9151447530682952, + "grad_norm": 0.0, + "learning_rate": 3.752798699296689e-07, + "loss": 1.1206, + "step": 31168 + }, + { + "epoch": 0.9151741147454343, + "grad_norm": 0.0, + "learning_rate": 3.7502184091559257e-07, + "loss": 1.2197, + "step": 31169 + }, + { + "epoch": 0.9152034764225733, + "grad_norm": 0.0, + "learning_rate": 3.747638989426583e-07, + "loss": 1.1978, + "step": 31170 + }, + { + "epoch": 0.9152328380997122, + "grad_norm": 0.0, + "learning_rate": 3.7450604401319866e-07, + "loss": 1.2173, + "step": 31171 + }, + { + "epoch": 0.9152621997768513, + "grad_norm": 0.0, + "learning_rate": 3.7424827612954517e-07, + "loss": 1.2646, + "step": 31172 + }, + { + "epoch": 0.9152915614539903, + "grad_norm": 0.0, + "learning_rate": 3.7399059529403035e-07, + "loss": 1.1807, + "step": 31173 + }, + { + "epoch": 0.9153209231311292, + "grad_norm": 0.0, + "learning_rate": 3.737330015089846e-07, + "loss": 1.2783, + "step": 31174 + }, + { + "epoch": 0.9153502848082683, + "grad_norm": 0.0, + "learning_rate": 3.734754947767349e-07, + "loss": 1.3848, + "step": 31175 + }, + { + "epoch": 0.9153796464854073, + "grad_norm": 0.0, + "learning_rate": 3.7321807509961394e-07, + "loss": 1.2656, + "step": 31176 + }, + { + "epoch": 0.9154090081625462, + "grad_norm": 0.0, + "learning_rate": 3.729607424799453e-07, + "loss": 1.3252, + "step": 31177 + }, + { + "epoch": 0.9154383698396853, + "grad_norm": 0.0, + "learning_rate": 3.7270349692005934e-07, + "loss": 1.2383, + "step": 31178 + }, + { + "epoch": 0.9154677315168243, + "grad_norm": 0.0, + "learning_rate": 3.7244633842228095e-07, + "loss": 1.166, + "step": 31179 + }, + { + "epoch": 0.9154970931939632, + "grad_norm": 0.0, + "learning_rate": 3.721892669889371e-07, + "loss": 1.1924, + "step": 31180 + }, + { + "epoch": 0.9155264548711023, + "grad_norm": 0.0, + "learning_rate": 3.7193228262234924e-07, + "loss": 1.2817, + "step": 31181 + }, + { + "epoch": 0.9155558165482413, + "grad_norm": 0.0, + "learning_rate": 3.716753853248445e-07, + "loss": 1.1113, + "step": 31182 + }, + { + "epoch": 0.9155851782253802, + "grad_norm": 0.0, + "learning_rate": 3.7141857509874423e-07, + "loss": 1.1655, + "step": 31183 + }, + { + "epoch": 0.9156145399025193, + "grad_norm": 0.0, + "learning_rate": 3.7116185194637333e-07, + "loss": 1.0591, + "step": 31184 + }, + { + "epoch": 0.9156439015796582, + "grad_norm": 0.0, + "learning_rate": 3.709052158700521e-07, + "loss": 1.144, + "step": 31185 + }, + { + "epoch": 0.9156732632567972, + "grad_norm": 0.0, + "learning_rate": 3.7064866687209986e-07, + "loss": 1.1177, + "step": 31186 + }, + { + "epoch": 0.9157026249339362, + "grad_norm": 0.0, + "learning_rate": 3.703922049548392e-07, + "loss": 1.0708, + "step": 31187 + }, + { + "epoch": 0.9157319866110752, + "grad_norm": 0.0, + "learning_rate": 3.7013583012058596e-07, + "loss": 1.3486, + "step": 31188 + }, + { + "epoch": 0.9157613482882142, + "grad_norm": 0.0, + "learning_rate": 3.698795423716617e-07, + "loss": 1.1865, + "step": 31189 + }, + { + "epoch": 0.9157907099653532, + "grad_norm": 0.0, + "learning_rate": 3.696233417103823e-07, + "loss": 1.2861, + "step": 31190 + }, + { + "epoch": 0.9158200716424922, + "grad_norm": 0.0, + "learning_rate": 3.69367228139067e-07, + "loss": 1.2188, + "step": 31191 + }, + { + "epoch": 0.9158494333196312, + "grad_norm": 0.0, + "learning_rate": 3.6911120166002847e-07, + "loss": 1.2202, + "step": 31192 + }, + { + "epoch": 0.9158787949967702, + "grad_norm": 0.0, + "learning_rate": 3.6885526227558476e-07, + "loss": 1.2114, + "step": 31193 + }, + { + "epoch": 0.9159081566739092, + "grad_norm": 0.0, + "learning_rate": 3.6859940998804967e-07, + "loss": 1.1836, + "step": 31194 + }, + { + "epoch": 0.9159375183510482, + "grad_norm": 0.0, + "learning_rate": 3.683436447997368e-07, + "loss": 1.1094, + "step": 31195 + }, + { + "epoch": 0.9159668800281872, + "grad_norm": 0.0, + "learning_rate": 3.6808796671296e-07, + "loss": 1.1621, + "step": 31196 + }, + { + "epoch": 0.9159962417053262, + "grad_norm": 0.0, + "learning_rate": 3.678323757300284e-07, + "loss": 1.2295, + "step": 31197 + }, + { + "epoch": 0.9160256033824652, + "grad_norm": 0.0, + "learning_rate": 3.675768718532557e-07, + "loss": 1.2578, + "step": 31198 + }, + { + "epoch": 0.9160549650596042, + "grad_norm": 0.0, + "learning_rate": 3.6732145508495245e-07, + "loss": 1.1572, + "step": 31199 + }, + { + "epoch": 0.9160843267367432, + "grad_norm": 0.0, + "learning_rate": 3.670661254274288e-07, + "loss": 1.1743, + "step": 31200 + }, + { + "epoch": 0.9161136884138822, + "grad_norm": 0.0, + "learning_rate": 3.6681088288299305e-07, + "loss": 1.1963, + "step": 31201 + }, + { + "epoch": 0.9161430500910211, + "grad_norm": 0.0, + "learning_rate": 3.665557274539533e-07, + "loss": 1.1577, + "step": 31202 + }, + { + "epoch": 0.9161724117681602, + "grad_norm": 0.0, + "learning_rate": 3.663006591426188e-07, + "loss": 1.1973, + "step": 31203 + }, + { + "epoch": 0.9162017734452992, + "grad_norm": 0.0, + "learning_rate": 3.660456779512933e-07, + "loss": 1.2422, + "step": 31204 + }, + { + "epoch": 0.9162311351224381, + "grad_norm": 0.0, + "learning_rate": 3.6579078388228495e-07, + "loss": 1.1172, + "step": 31205 + }, + { + "epoch": 0.9162604967995772, + "grad_norm": 0.0, + "learning_rate": 3.6553597693789633e-07, + "loss": 1.2217, + "step": 31206 + }, + { + "epoch": 0.9162898584767162, + "grad_norm": 0.0, + "learning_rate": 3.652812571204345e-07, + "loss": 1.2979, + "step": 31207 + }, + { + "epoch": 0.9163192201538551, + "grad_norm": 0.0, + "learning_rate": 3.6502662443220096e-07, + "loss": 1.1694, + "step": 31208 + }, + { + "epoch": 0.9163485818309942, + "grad_norm": 0.0, + "learning_rate": 3.6477207887549937e-07, + "loss": 1.3008, + "step": 31209 + }, + { + "epoch": 0.9163779435081332, + "grad_norm": 0.0, + "learning_rate": 3.645176204526313e-07, + "loss": 1.1953, + "step": 31210 + }, + { + "epoch": 0.9164073051852721, + "grad_norm": 0.0, + "learning_rate": 3.642632491658993e-07, + "loss": 1.2256, + "step": 31211 + }, + { + "epoch": 0.9164366668624112, + "grad_norm": 0.0, + "learning_rate": 3.6400896501760155e-07, + "loss": 1.2627, + "step": 31212 + }, + { + "epoch": 0.9164660285395502, + "grad_norm": 0.0, + "learning_rate": 3.637547680100384e-07, + "loss": 1.0317, + "step": 31213 + }, + { + "epoch": 0.9164953902166891, + "grad_norm": 0.0, + "learning_rate": 3.6350065814551025e-07, + "loss": 1.0952, + "step": 31214 + }, + { + "epoch": 0.9165247518938282, + "grad_norm": 0.0, + "learning_rate": 3.632466354263109e-07, + "loss": 1.3184, + "step": 31215 + }, + { + "epoch": 0.9165541135709672, + "grad_norm": 0.0, + "learning_rate": 3.6299269985474285e-07, + "loss": 1.1885, + "step": 31216 + }, + { + "epoch": 0.9165834752481061, + "grad_norm": 0.0, + "learning_rate": 3.627388514330976e-07, + "loss": 1.3115, + "step": 31217 + }, + { + "epoch": 0.9166128369252452, + "grad_norm": 0.0, + "learning_rate": 3.624850901636745e-07, + "loss": 1.0527, + "step": 31218 + }, + { + "epoch": 0.9166421986023842, + "grad_norm": 0.0, + "learning_rate": 3.622314160487672e-07, + "loss": 1.1787, + "step": 31219 + }, + { + "epoch": 0.9166715602795231, + "grad_norm": 0.0, + "learning_rate": 3.6197782909066945e-07, + "loss": 1.0806, + "step": 31220 + }, + { + "epoch": 0.9167009219566622, + "grad_norm": 0.0, + "learning_rate": 3.617243292916728e-07, + "loss": 1.1929, + "step": 31221 + }, + { + "epoch": 0.9167302836338012, + "grad_norm": 0.0, + "learning_rate": 3.614709166540731e-07, + "loss": 1.1113, + "step": 31222 + }, + { + "epoch": 0.9167596453109401, + "grad_norm": 0.0, + "learning_rate": 3.6121759118016073e-07, + "loss": 1.2188, + "step": 31223 + }, + { + "epoch": 0.9167890069880792, + "grad_norm": 0.0, + "learning_rate": 3.6096435287222395e-07, + "loss": 1.1235, + "step": 31224 + }, + { + "epoch": 0.9168183686652182, + "grad_norm": 0.0, + "learning_rate": 3.607112017325576e-07, + "loss": 1.188, + "step": 31225 + }, + { + "epoch": 0.9168477303423571, + "grad_norm": 0.0, + "learning_rate": 3.604581377634464e-07, + "loss": 1.1123, + "step": 31226 + }, + { + "epoch": 0.9168770920194962, + "grad_norm": 0.0, + "learning_rate": 3.602051609671831e-07, + "loss": 1.0991, + "step": 31227 + }, + { + "epoch": 0.9169064536966351, + "grad_norm": 0.0, + "learning_rate": 3.5995227134605237e-07, + "loss": 1.3027, + "step": 31228 + }, + { + "epoch": 0.9169358153737741, + "grad_norm": 0.0, + "learning_rate": 3.596994689023425e-07, + "loss": 1.2715, + "step": 31229 + }, + { + "epoch": 0.9169651770509132, + "grad_norm": 0.0, + "learning_rate": 3.5944675363833724e-07, + "loss": 1.1978, + "step": 31230 + }, + { + "epoch": 0.9169945387280521, + "grad_norm": 0.0, + "learning_rate": 3.591941255563258e-07, + "loss": 1.2646, + "step": 31231 + }, + { + "epoch": 0.9170239004051911, + "grad_norm": 0.0, + "learning_rate": 3.5894158465858975e-07, + "loss": 1.2217, + "step": 31232 + }, + { + "epoch": 0.9170532620823302, + "grad_norm": 0.0, + "learning_rate": 3.586891309474161e-07, + "loss": 0.9712, + "step": 31233 + }, + { + "epoch": 0.9170826237594691, + "grad_norm": 0.0, + "learning_rate": 3.584367644250852e-07, + "loss": 1.2529, + "step": 31234 + }, + { + "epoch": 0.9171119854366081, + "grad_norm": 0.0, + "learning_rate": 3.581844850938776e-07, + "loss": 1.1934, + "step": 31235 + }, + { + "epoch": 0.9171413471137472, + "grad_norm": 0.0, + "learning_rate": 3.579322929560802e-07, + "loss": 1.1538, + "step": 31236 + }, + { + "epoch": 0.9171707087908861, + "grad_norm": 0.0, + "learning_rate": 3.576801880139691e-07, + "loss": 1.2744, + "step": 31237 + }, + { + "epoch": 0.9172000704680251, + "grad_norm": 0.0, + "learning_rate": 3.574281702698257e-07, + "loss": 1.1533, + "step": 31238 + }, + { + "epoch": 0.9172294321451642, + "grad_norm": 0.0, + "learning_rate": 3.571762397259271e-07, + "loss": 1.1675, + "step": 31239 + }, + { + "epoch": 0.9172587938223031, + "grad_norm": 0.0, + "learning_rate": 3.569243963845559e-07, + "loss": 1.1626, + "step": 31240 + }, + { + "epoch": 0.9172881554994421, + "grad_norm": 0.0, + "learning_rate": 3.5667264024798475e-07, + "loss": 1.2129, + "step": 31241 + }, + { + "epoch": 0.9173175171765812, + "grad_norm": 0.0, + "learning_rate": 3.5642097131849406e-07, + "loss": 1.1689, + "step": 31242 + }, + { + "epoch": 0.9173468788537201, + "grad_norm": 0.0, + "learning_rate": 3.561693895983587e-07, + "loss": 1.2241, + "step": 31243 + }, + { + "epoch": 0.9173762405308591, + "grad_norm": 0.0, + "learning_rate": 3.559178950898523e-07, + "loss": 1.3467, + "step": 31244 + }, + { + "epoch": 0.9174056022079982, + "grad_norm": 0.0, + "learning_rate": 3.5566648779525093e-07, + "loss": 1.2588, + "step": 31245 + }, + { + "epoch": 0.9174349638851371, + "grad_norm": 0.0, + "learning_rate": 3.554151677168272e-07, + "loss": 1.2012, + "step": 31246 + }, + { + "epoch": 0.9174643255622761, + "grad_norm": 0.0, + "learning_rate": 3.5516393485685365e-07, + "loss": 1.1299, + "step": 31247 + }, + { + "epoch": 0.9174936872394152, + "grad_norm": 0.0, + "learning_rate": 3.54912789217603e-07, + "loss": 1.2295, + "step": 31248 + }, + { + "epoch": 0.9175230489165541, + "grad_norm": 0.0, + "learning_rate": 3.546617308013467e-07, + "loss": 1.1069, + "step": 31249 + }, + { + "epoch": 0.9175524105936931, + "grad_norm": 0.0, + "learning_rate": 3.54410759610353e-07, + "loss": 1.3037, + "step": 31250 + }, + { + "epoch": 0.9175817722708322, + "grad_norm": 0.0, + "learning_rate": 3.541598756468945e-07, + "loss": 1.2822, + "step": 31251 + }, + { + "epoch": 0.9176111339479711, + "grad_norm": 0.0, + "learning_rate": 3.539090789132382e-07, + "loss": 1.1504, + "step": 31252 + }, + { + "epoch": 0.9176404956251101, + "grad_norm": 0.0, + "learning_rate": 3.5365836941165353e-07, + "loss": 1.3223, + "step": 31253 + }, + { + "epoch": 0.9176698573022491, + "grad_norm": 0.0, + "learning_rate": 3.534077471444064e-07, + "loss": 1.1396, + "step": 31254 + }, + { + "epoch": 0.9176992189793881, + "grad_norm": 0.0, + "learning_rate": 3.531572121137616e-07, + "loss": 1.1699, + "step": 31255 + }, + { + "epoch": 0.9177285806565271, + "grad_norm": 0.0, + "learning_rate": 3.529067643219886e-07, + "loss": 1.2017, + "step": 31256 + }, + { + "epoch": 0.9177579423336661, + "grad_norm": 0.0, + "learning_rate": 3.526564037713487e-07, + "loss": 1.2432, + "step": 31257 + }, + { + "epoch": 0.9177873040108051, + "grad_norm": 0.0, + "learning_rate": 3.524061304641091e-07, + "loss": 1.1714, + "step": 31258 + }, + { + "epoch": 0.9178166656879441, + "grad_norm": 0.0, + "learning_rate": 3.521559444025313e-07, + "loss": 1.2246, + "step": 31259 + }, + { + "epoch": 0.9178460273650831, + "grad_norm": 0.0, + "learning_rate": 3.5190584558887795e-07, + "loss": 1.1172, + "step": 31260 + }, + { + "epoch": 0.9178753890422221, + "grad_norm": 0.0, + "learning_rate": 3.516558340254117e-07, + "loss": 1.0718, + "step": 31261 + }, + { + "epoch": 0.9179047507193611, + "grad_norm": 0.0, + "learning_rate": 3.514059097143929e-07, + "loss": 1.2422, + "step": 31262 + }, + { + "epoch": 0.9179341123965001, + "grad_norm": 0.0, + "learning_rate": 3.5115607265808093e-07, + "loss": 1.2705, + "step": 31263 + }, + { + "epoch": 0.9179634740736391, + "grad_norm": 0.0, + "learning_rate": 3.5090632285873503e-07, + "loss": 1.1553, + "step": 31264 + }, + { + "epoch": 0.9179928357507781, + "grad_norm": 0.0, + "learning_rate": 3.5065666031861457e-07, + "loss": 1.2812, + "step": 31265 + }, + { + "epoch": 0.9180221974279171, + "grad_norm": 0.0, + "learning_rate": 3.5040708503997655e-07, + "loss": 1.2305, + "step": 31266 + }, + { + "epoch": 0.9180515591050561, + "grad_norm": 0.0, + "learning_rate": 3.5015759702508035e-07, + "loss": 1.1621, + "step": 31267 + }, + { + "epoch": 0.918080920782195, + "grad_norm": 0.0, + "learning_rate": 3.4990819627617965e-07, + "loss": 1.1211, + "step": 31268 + }, + { + "epoch": 0.9181102824593341, + "grad_norm": 0.0, + "learning_rate": 3.4965888279553164e-07, + "loss": 1.1812, + "step": 31269 + }, + { + "epoch": 0.9181396441364731, + "grad_norm": 0.0, + "learning_rate": 3.494096565853877e-07, + "loss": 1.2148, + "step": 31270 + }, + { + "epoch": 0.918169005813612, + "grad_norm": 0.0, + "learning_rate": 3.491605176480051e-07, + "loss": 1.252, + "step": 31271 + }, + { + "epoch": 0.9181983674907511, + "grad_norm": 0.0, + "learning_rate": 3.489114659856341e-07, + "loss": 1.2207, + "step": 31272 + }, + { + "epoch": 0.9182277291678901, + "grad_norm": 0.0, + "learning_rate": 3.486625016005296e-07, + "loss": 1.2656, + "step": 31273 + }, + { + "epoch": 0.918257090845029, + "grad_norm": 0.0, + "learning_rate": 3.484136244949421e-07, + "loss": 1.2715, + "step": 31274 + }, + { + "epoch": 0.9182864525221681, + "grad_norm": 0.0, + "learning_rate": 3.4816483467112085e-07, + "loss": 1.1582, + "step": 31275 + }, + { + "epoch": 0.9183158141993071, + "grad_norm": 0.0, + "learning_rate": 3.479161321313185e-07, + "loss": 1.1177, + "step": 31276 + }, + { + "epoch": 0.918345175876446, + "grad_norm": 0.0, + "learning_rate": 3.4766751687778213e-07, + "loss": 1.2793, + "step": 31277 + }, + { + "epoch": 0.9183745375535851, + "grad_norm": 0.0, + "learning_rate": 3.4741898891275996e-07, + "loss": 1.3672, + "step": 31278 + }, + { + "epoch": 0.9184038992307241, + "grad_norm": 0.0, + "learning_rate": 3.4717054823849905e-07, + "loss": 1.248, + "step": 31279 + }, + { + "epoch": 0.918433260907863, + "grad_norm": 0.0, + "learning_rate": 3.4692219485724875e-07, + "loss": 1.3184, + "step": 31280 + }, + { + "epoch": 0.9184626225850021, + "grad_norm": 0.0, + "learning_rate": 3.4667392877125174e-07, + "loss": 1.1997, + "step": 31281 + }, + { + "epoch": 0.9184919842621411, + "grad_norm": 0.0, + "learning_rate": 3.4642574998275505e-07, + "loss": 1.3193, + "step": 31282 + }, + { + "epoch": 0.91852134593928, + "grad_norm": 0.0, + "learning_rate": 3.461776584940035e-07, + "loss": 1.1255, + "step": 31283 + }, + { + "epoch": 0.9185507076164191, + "grad_norm": 0.0, + "learning_rate": 3.459296543072388e-07, + "loss": 1.0068, + "step": 31284 + }, + { + "epoch": 0.9185800692935581, + "grad_norm": 0.0, + "learning_rate": 3.4568173742470455e-07, + "loss": 1.1113, + "step": 31285 + }, + { + "epoch": 0.918609430970697, + "grad_norm": 0.0, + "learning_rate": 3.454339078486435e-07, + "loss": 1.1958, + "step": 31286 + }, + { + "epoch": 0.9186387926478361, + "grad_norm": 0.0, + "learning_rate": 3.4518616558129716e-07, + "loss": 1.3115, + "step": 31287 + }, + { + "epoch": 0.9186681543249751, + "grad_norm": 0.0, + "learning_rate": 3.449385106249026e-07, + "loss": 1.2188, + "step": 31288 + }, + { + "epoch": 0.918697516002114, + "grad_norm": 0.0, + "learning_rate": 3.446909429817036e-07, + "loss": 1.1943, + "step": 31289 + }, + { + "epoch": 0.918726877679253, + "grad_norm": 0.0, + "learning_rate": 3.4444346265393505e-07, + "loss": 1.1777, + "step": 31290 + }, + { + "epoch": 0.9187562393563921, + "grad_norm": 0.0, + "learning_rate": 3.4419606964383954e-07, + "loss": 1.2036, + "step": 31291 + }, + { + "epoch": 0.918785601033531, + "grad_norm": 0.0, + "learning_rate": 3.4394876395365096e-07, + "loss": 1.1475, + "step": 31292 + }, + { + "epoch": 0.91881496271067, + "grad_norm": 0.0, + "learning_rate": 3.4370154558560743e-07, + "loss": 1.2568, + "step": 31293 + }, + { + "epoch": 0.918844324387809, + "grad_norm": 0.0, + "learning_rate": 3.4345441454194274e-07, + "loss": 1.335, + "step": 31294 + }, + { + "epoch": 0.918873686064948, + "grad_norm": 0.0, + "learning_rate": 3.432073708248929e-07, + "loss": 1.1733, + "step": 31295 + }, + { + "epoch": 0.918903047742087, + "grad_norm": 0.0, + "learning_rate": 3.429604144366927e-07, + "loss": 1.1123, + "step": 31296 + }, + { + "epoch": 0.918932409419226, + "grad_norm": 0.0, + "learning_rate": 3.427135453795727e-07, + "loss": 1.2432, + "step": 31297 + }, + { + "epoch": 0.918961771096365, + "grad_norm": 0.0, + "learning_rate": 3.424667636557699e-07, + "loss": 1.1323, + "step": 31298 + }, + { + "epoch": 0.918991132773504, + "grad_norm": 0.0, + "learning_rate": 3.422200692675115e-07, + "loss": 1.2832, + "step": 31299 + }, + { + "epoch": 0.919020494450643, + "grad_norm": 0.0, + "learning_rate": 3.419734622170312e-07, + "loss": 1.2949, + "step": 31300 + }, + { + "epoch": 0.919049856127782, + "grad_norm": 0.0, + "learning_rate": 3.417269425065595e-07, + "loss": 1.3086, + "step": 31301 + }, + { + "epoch": 0.919079217804921, + "grad_norm": 0.0, + "learning_rate": 3.414805101383234e-07, + "loss": 1.1807, + "step": 31302 + }, + { + "epoch": 0.91910857948206, + "grad_norm": 0.0, + "learning_rate": 3.4123416511455344e-07, + "loss": 1.2695, + "step": 31303 + }, + { + "epoch": 0.919137941159199, + "grad_norm": 0.0, + "learning_rate": 3.4098790743747443e-07, + "loss": 1.0889, + "step": 31304 + }, + { + "epoch": 0.919167302836338, + "grad_norm": 0.0, + "learning_rate": 3.4074173710931804e-07, + "loss": 1.0479, + "step": 31305 + }, + { + "epoch": 0.919196664513477, + "grad_norm": 0.0, + "learning_rate": 3.4049565413230567e-07, + "loss": 1.2285, + "step": 31306 + }, + { + "epoch": 0.919226026190616, + "grad_norm": 0.0, + "learning_rate": 3.4024965850866676e-07, + "loss": 1.2324, + "step": 31307 + }, + { + "epoch": 0.919255387867755, + "grad_norm": 0.0, + "learning_rate": 3.400037502406228e-07, + "loss": 1.2002, + "step": 31308 + }, + { + "epoch": 0.919284749544894, + "grad_norm": 0.0, + "learning_rate": 3.3975792933039984e-07, + "loss": 1.1577, + "step": 31309 + }, + { + "epoch": 0.919314111222033, + "grad_norm": 0.0, + "learning_rate": 3.3951219578021943e-07, + "loss": 1.2607, + "step": 31310 + }, + { + "epoch": 0.919343472899172, + "grad_norm": 0.0, + "learning_rate": 3.3926654959230533e-07, + "loss": 1.2012, + "step": 31311 + }, + { + "epoch": 0.919372834576311, + "grad_norm": 0.0, + "learning_rate": 3.3902099076887687e-07, + "loss": 1.1279, + "step": 31312 + }, + { + "epoch": 0.91940219625345, + "grad_norm": 0.0, + "learning_rate": 3.387755193121556e-07, + "loss": 1.3916, + "step": 31313 + }, + { + "epoch": 0.919431557930589, + "grad_norm": 0.0, + "learning_rate": 3.3853013522436305e-07, + "loss": 1.2002, + "step": 31314 + }, + { + "epoch": 0.919460919607728, + "grad_norm": 0.0, + "learning_rate": 3.382848385077142e-07, + "loss": 1.3848, + "step": 31315 + }, + { + "epoch": 0.919490281284867, + "grad_norm": 0.0, + "learning_rate": 3.3803962916443276e-07, + "loss": 1.291, + "step": 31316 + }, + { + "epoch": 0.919519642962006, + "grad_norm": 0.0, + "learning_rate": 3.3779450719673256e-07, + "loss": 1.2861, + "step": 31317 + }, + { + "epoch": 0.919549004639145, + "grad_norm": 0.0, + "learning_rate": 3.375494726068307e-07, + "loss": 1.0752, + "step": 31318 + }, + { + "epoch": 0.919578366316284, + "grad_norm": 0.0, + "learning_rate": 3.3730452539694313e-07, + "loss": 1.1592, + "step": 31319 + }, + { + "epoch": 0.9196077279934229, + "grad_norm": 0.0, + "learning_rate": 3.3705966556928595e-07, + "loss": 1.1372, + "step": 31320 + }, + { + "epoch": 0.919637089670562, + "grad_norm": 0.0, + "learning_rate": 3.3681489312607283e-07, + "loss": 1.0947, + "step": 31321 + }, + { + "epoch": 0.919666451347701, + "grad_norm": 0.0, + "learning_rate": 3.365702080695177e-07, + "loss": 1.25, + "step": 31322 + }, + { + "epoch": 0.9196958130248399, + "grad_norm": 0.0, + "learning_rate": 3.3632561040183307e-07, + "loss": 1.1948, + "step": 31323 + }, + { + "epoch": 0.919725174701979, + "grad_norm": 0.0, + "learning_rate": 3.3608110012522953e-07, + "loss": 1.2393, + "step": 31324 + }, + { + "epoch": 0.919754536379118, + "grad_norm": 0.0, + "learning_rate": 3.358366772419208e-07, + "loss": 1.1562, + "step": 31325 + }, + { + "epoch": 0.9197838980562569, + "grad_norm": 0.0, + "learning_rate": 3.3559234175411626e-07, + "loss": 1.1938, + "step": 31326 + }, + { + "epoch": 0.919813259733396, + "grad_norm": 0.0, + "learning_rate": 3.353480936640252e-07, + "loss": 1.145, + "step": 31327 + }, + { + "epoch": 0.919842621410535, + "grad_norm": 0.0, + "learning_rate": 3.351039329738559e-07, + "loss": 1.1602, + "step": 31328 + }, + { + "epoch": 0.9198719830876739, + "grad_norm": 0.0, + "learning_rate": 3.348598596858177e-07, + "loss": 1.3203, + "step": 31329 + }, + { + "epoch": 0.919901344764813, + "grad_norm": 0.0, + "learning_rate": 3.3461587380211545e-07, + "loss": 1.1265, + "step": 31330 + }, + { + "epoch": 0.919930706441952, + "grad_norm": 0.0, + "learning_rate": 3.343719753249586e-07, + "loss": 1.2422, + "step": 31331 + }, + { + "epoch": 0.9199600681190909, + "grad_norm": 0.0, + "learning_rate": 3.3412816425655193e-07, + "loss": 1.0811, + "step": 31332 + }, + { + "epoch": 0.91998942979623, + "grad_norm": 0.0, + "learning_rate": 3.338844405990982e-07, + "loss": 1.293, + "step": 31333 + }, + { + "epoch": 0.920018791473369, + "grad_norm": 0.0, + "learning_rate": 3.336408043548045e-07, + "loss": 1.3257, + "step": 31334 + }, + { + "epoch": 0.9200481531505079, + "grad_norm": 0.0, + "learning_rate": 3.333972555258713e-07, + "loss": 1.1895, + "step": 31335 + }, + { + "epoch": 0.920077514827647, + "grad_norm": 0.0, + "learning_rate": 3.3315379411450354e-07, + "loss": 1.1729, + "step": 31336 + }, + { + "epoch": 0.920106876504786, + "grad_norm": 0.0, + "learning_rate": 3.3291042012290053e-07, + "loss": 1.1538, + "step": 31337 + }, + { + "epoch": 0.9201362381819249, + "grad_norm": 0.0, + "learning_rate": 3.32667133553265e-07, + "loss": 1.3164, + "step": 31338 + }, + { + "epoch": 0.920165599859064, + "grad_norm": 0.0, + "learning_rate": 3.324239344077962e-07, + "loss": 1.1558, + "step": 31339 + }, + { + "epoch": 0.920194961536203, + "grad_norm": 0.0, + "learning_rate": 3.3218082268869355e-07, + "loss": 1.2148, + "step": 31340 + }, + { + "epoch": 0.9202243232133419, + "grad_norm": 0.0, + "learning_rate": 3.319377983981564e-07, + "loss": 1.1274, + "step": 31341 + }, + { + "epoch": 0.920253684890481, + "grad_norm": 0.0, + "learning_rate": 3.3169486153838196e-07, + "loss": 1.2266, + "step": 31342 + }, + { + "epoch": 0.92028304656762, + "grad_norm": 0.0, + "learning_rate": 3.3145201211156606e-07, + "loss": 1.2461, + "step": 31343 + }, + { + "epoch": 0.9203124082447589, + "grad_norm": 0.0, + "learning_rate": 3.3120925011990603e-07, + "loss": 1.2402, + "step": 31344 + }, + { + "epoch": 0.920341769921898, + "grad_norm": 0.0, + "learning_rate": 3.3096657556559664e-07, + "loss": 1.1602, + "step": 31345 + }, + { + "epoch": 0.9203711315990369, + "grad_norm": 0.0, + "learning_rate": 3.3072398845083286e-07, + "loss": 1.2471, + "step": 31346 + }, + { + "epoch": 0.9204004932761759, + "grad_norm": 0.0, + "learning_rate": 3.304814887778085e-07, + "loss": 1.2646, + "step": 31347 + }, + { + "epoch": 0.920429854953315, + "grad_norm": 0.0, + "learning_rate": 3.302390765487162e-07, + "loss": 1.1387, + "step": 31348 + }, + { + "epoch": 0.9204592166304539, + "grad_norm": 0.0, + "learning_rate": 3.2999675176574985e-07, + "loss": 1.2397, + "step": 31349 + }, + { + "epoch": 0.9204885783075929, + "grad_norm": 0.0, + "learning_rate": 3.2975451443109876e-07, + "loss": 1.2148, + "step": 31350 + }, + { + "epoch": 0.920517939984732, + "grad_norm": 0.0, + "learning_rate": 3.2951236454695336e-07, + "loss": 1.1143, + "step": 31351 + }, + { + "epoch": 0.9205473016618709, + "grad_norm": 0.0, + "learning_rate": 3.2927030211550524e-07, + "loss": 1.2451, + "step": 31352 + }, + { + "epoch": 0.9205766633390099, + "grad_norm": 0.0, + "learning_rate": 3.290283271389416e-07, + "loss": 1.1758, + "step": 31353 + }, + { + "epoch": 0.920606025016149, + "grad_norm": 0.0, + "learning_rate": 3.287864396194518e-07, + "loss": 1.293, + "step": 31354 + }, + { + "epoch": 0.9206353866932879, + "grad_norm": 0.0, + "learning_rate": 3.285446395592229e-07, + "loss": 1.2085, + "step": 31355 + }, + { + "epoch": 0.9206647483704269, + "grad_norm": 0.0, + "learning_rate": 3.283029269604421e-07, + "loss": 1.2402, + "step": 31356 + }, + { + "epoch": 0.920694110047566, + "grad_norm": 0.0, + "learning_rate": 3.2806130182529427e-07, + "loss": 1.1621, + "step": 31357 + }, + { + "epoch": 0.9207234717247049, + "grad_norm": 0.0, + "learning_rate": 3.278197641559655e-07, + "loss": 1.3369, + "step": 31358 + }, + { + "epoch": 0.9207528334018439, + "grad_norm": 0.0, + "learning_rate": 3.275783139546407e-07, + "loss": 1.2432, + "step": 31359 + }, + { + "epoch": 0.920782195078983, + "grad_norm": 0.0, + "learning_rate": 3.273369512235014e-07, + "loss": 1.1357, + "step": 31360 + }, + { + "epoch": 0.9208115567561219, + "grad_norm": 0.0, + "learning_rate": 3.2709567596473145e-07, + "loss": 1.1279, + "step": 31361 + }, + { + "epoch": 0.9208409184332609, + "grad_norm": 0.0, + "learning_rate": 3.268544881805125e-07, + "loss": 1.2842, + "step": 31362 + }, + { + "epoch": 0.9208702801104, + "grad_norm": 0.0, + "learning_rate": 3.26613387873026e-07, + "loss": 1.2197, + "step": 31363 + }, + { + "epoch": 0.9208996417875389, + "grad_norm": 0.0, + "learning_rate": 3.263723750444503e-07, + "loss": 1.1689, + "step": 31364 + }, + { + "epoch": 0.9209290034646779, + "grad_norm": 0.0, + "learning_rate": 3.261314496969681e-07, + "loss": 1.1924, + "step": 31365 + }, + { + "epoch": 0.920958365141817, + "grad_norm": 0.0, + "learning_rate": 3.258906118327576e-07, + "loss": 1.2236, + "step": 31366 + }, + { + "epoch": 0.9209877268189559, + "grad_norm": 0.0, + "learning_rate": 3.256498614539949e-07, + "loss": 1.0854, + "step": 31367 + }, + { + "epoch": 0.9210170884960949, + "grad_norm": 0.0, + "learning_rate": 3.2540919856285827e-07, + "loss": 1.228, + "step": 31368 + }, + { + "epoch": 0.9210464501732339, + "grad_norm": 0.0, + "learning_rate": 3.2516862316152365e-07, + "loss": 1.1558, + "step": 31369 + }, + { + "epoch": 0.9210758118503729, + "grad_norm": 0.0, + "learning_rate": 3.2492813525216716e-07, + "loss": 1.2363, + "step": 31370 + }, + { + "epoch": 0.9211051735275119, + "grad_norm": 0.0, + "learning_rate": 3.246877348369637e-07, + "loss": 1.335, + "step": 31371 + }, + { + "epoch": 0.9211345352046509, + "grad_norm": 0.0, + "learning_rate": 3.24447421918086e-07, + "loss": 1.2139, + "step": 31372 + }, + { + "epoch": 0.9211638968817899, + "grad_norm": 0.0, + "learning_rate": 3.2420719649770895e-07, + "loss": 1.3486, + "step": 31373 + }, + { + "epoch": 0.9211932585589289, + "grad_norm": 0.0, + "learning_rate": 3.2396705857800414e-07, + "loss": 1.1123, + "step": 31374 + }, + { + "epoch": 0.9212226202360679, + "grad_norm": 0.0, + "learning_rate": 3.237270081611432e-07, + "loss": 1.0918, + "step": 31375 + }, + { + "epoch": 0.9212519819132069, + "grad_norm": 0.0, + "learning_rate": 3.2348704524929775e-07, + "loss": 1.2695, + "step": 31376 + }, + { + "epoch": 0.9212813435903459, + "grad_norm": 0.0, + "learning_rate": 3.23247169844636e-07, + "loss": 1.2461, + "step": 31377 + }, + { + "epoch": 0.9213107052674849, + "grad_norm": 0.0, + "learning_rate": 3.2300738194932955e-07, + "loss": 1.3037, + "step": 31378 + }, + { + "epoch": 0.9213400669446239, + "grad_norm": 0.0, + "learning_rate": 3.227676815655434e-07, + "loss": 1.373, + "step": 31379 + }, + { + "epoch": 0.9213694286217629, + "grad_norm": 0.0, + "learning_rate": 3.2252806869545015e-07, + "loss": 1.3428, + "step": 31380 + }, + { + "epoch": 0.9213987902989019, + "grad_norm": 0.0, + "learning_rate": 3.2228854334121263e-07, + "loss": 1.2529, + "step": 31381 + }, + { + "epoch": 0.9214281519760409, + "grad_norm": 0.0, + "learning_rate": 3.220491055049979e-07, + "loss": 1.1353, + "step": 31382 + }, + { + "epoch": 0.9214575136531798, + "grad_norm": 0.0, + "learning_rate": 3.2180975518897316e-07, + "loss": 1.106, + "step": 31383 + }, + { + "epoch": 0.9214868753303189, + "grad_norm": 0.0, + "learning_rate": 3.2157049239530114e-07, + "loss": 1.1357, + "step": 31384 + }, + { + "epoch": 0.9215162370074579, + "grad_norm": 0.0, + "learning_rate": 3.2133131712614564e-07, + "loss": 1.3076, + "step": 31385 + }, + { + "epoch": 0.9215455986845968, + "grad_norm": 0.0, + "learning_rate": 3.2109222938366824e-07, + "loss": 1.1489, + "step": 31386 + }, + { + "epoch": 0.9215749603617359, + "grad_norm": 0.0, + "learning_rate": 3.208532291700339e-07, + "loss": 1.1685, + "step": 31387 + }, + { + "epoch": 0.9216043220388749, + "grad_norm": 0.0, + "learning_rate": 3.20614316487402e-07, + "loss": 1.1343, + "step": 31388 + }, + { + "epoch": 0.9216336837160138, + "grad_norm": 0.0, + "learning_rate": 3.2037549133793535e-07, + "loss": 1.1768, + "step": 31389 + }, + { + "epoch": 0.9216630453931528, + "grad_norm": 0.0, + "learning_rate": 3.2013675372378985e-07, + "loss": 1.3164, + "step": 31390 + }, + { + "epoch": 0.9216924070702919, + "grad_norm": 0.0, + "learning_rate": 3.1989810364712936e-07, + "loss": 1.2593, + "step": 31391 + }, + { + "epoch": 0.9217217687474308, + "grad_norm": 0.0, + "learning_rate": 3.196595411101089e-07, + "loss": 1.3828, + "step": 31392 + }, + { + "epoch": 0.9217511304245698, + "grad_norm": 0.0, + "learning_rate": 3.1942106611488444e-07, + "loss": 1.251, + "step": 31393 + }, + { + "epoch": 0.9217804921017089, + "grad_norm": 0.0, + "learning_rate": 3.1918267866361654e-07, + "loss": 1.2456, + "step": 31394 + }, + { + "epoch": 0.9218098537788478, + "grad_norm": 0.0, + "learning_rate": 3.189443787584567e-07, + "loss": 1.2217, + "step": 31395 + }, + { + "epoch": 0.9218392154559868, + "grad_norm": 0.0, + "learning_rate": 3.187061664015645e-07, + "loss": 1.2134, + "step": 31396 + }, + { + "epoch": 0.9218685771331259, + "grad_norm": 0.0, + "learning_rate": 3.1846804159509026e-07, + "loss": 1.168, + "step": 31397 + }, + { + "epoch": 0.9218979388102648, + "grad_norm": 0.0, + "learning_rate": 3.1823000434119014e-07, + "loss": 1.2725, + "step": 31398 + }, + { + "epoch": 0.9219273004874038, + "grad_norm": 0.0, + "learning_rate": 3.1799205464201565e-07, + "loss": 1.1572, + "step": 31399 + }, + { + "epoch": 0.9219566621645429, + "grad_norm": 0.0, + "learning_rate": 3.1775419249971853e-07, + "loss": 1.1514, + "step": 31400 + }, + { + "epoch": 0.9219860238416818, + "grad_norm": 0.0, + "learning_rate": 3.175164179164492e-07, + "loss": 1.2427, + "step": 31401 + }, + { + "epoch": 0.9220153855188208, + "grad_norm": 0.0, + "learning_rate": 3.172787308943581e-07, + "loss": 1.1768, + "step": 31402 + }, + { + "epoch": 0.9220447471959599, + "grad_norm": 0.0, + "learning_rate": 3.1704113143559703e-07, + "loss": 1.3125, + "step": 31403 + }, + { + "epoch": 0.9220741088730988, + "grad_norm": 0.0, + "learning_rate": 3.1680361954231076e-07, + "loss": 1.1533, + "step": 31404 + }, + { + "epoch": 0.9221034705502378, + "grad_norm": 0.0, + "learning_rate": 3.16566195216651e-07, + "loss": 1.0908, + "step": 31405 + }, + { + "epoch": 0.9221328322273769, + "grad_norm": 0.0, + "learning_rate": 3.1632885846076153e-07, + "loss": 1.2739, + "step": 31406 + }, + { + "epoch": 0.9221621939045158, + "grad_norm": 0.0, + "learning_rate": 3.160916092767918e-07, + "loss": 1.1279, + "step": 31407 + }, + { + "epoch": 0.9221915555816548, + "grad_norm": 0.0, + "learning_rate": 3.1585444766688566e-07, + "loss": 1.2412, + "step": 31408 + }, + { + "epoch": 0.9222209172587938, + "grad_norm": 0.0, + "learning_rate": 3.156173736331869e-07, + "loss": 1.1904, + "step": 31409 + }, + { + "epoch": 0.9222502789359328, + "grad_norm": 0.0, + "learning_rate": 3.153803871778405e-07, + "loss": 1.1987, + "step": 31410 + }, + { + "epoch": 0.9222796406130718, + "grad_norm": 0.0, + "learning_rate": 3.1514348830299025e-07, + "loss": 1.2046, + "step": 31411 + }, + { + "epoch": 0.9223090022902108, + "grad_norm": 0.0, + "learning_rate": 3.1490667701077783e-07, + "loss": 1.2646, + "step": 31412 + }, + { + "epoch": 0.9223383639673498, + "grad_norm": 0.0, + "learning_rate": 3.146699533033437e-07, + "loss": 1.0332, + "step": 31413 + }, + { + "epoch": 0.9223677256444888, + "grad_norm": 0.0, + "learning_rate": 3.1443331718283064e-07, + "loss": 1.0947, + "step": 31414 + }, + { + "epoch": 0.9223970873216278, + "grad_norm": 0.0, + "learning_rate": 3.141967686513758e-07, + "loss": 1.1245, + "step": 31415 + }, + { + "epoch": 0.9224264489987668, + "grad_norm": 0.0, + "learning_rate": 3.1396030771112306e-07, + "loss": 1.2314, + "step": 31416 + }, + { + "epoch": 0.9224558106759058, + "grad_norm": 0.0, + "learning_rate": 3.1372393436420623e-07, + "loss": 1.1748, + "step": 31417 + }, + { + "epoch": 0.9224851723530448, + "grad_norm": 0.0, + "learning_rate": 3.1348764861276473e-07, + "loss": 1.1602, + "step": 31418 + }, + { + "epoch": 0.9225145340301838, + "grad_norm": 0.0, + "learning_rate": 3.1325145045893457e-07, + "loss": 1.2388, + "step": 31419 + }, + { + "epoch": 0.9225438957073228, + "grad_norm": 0.0, + "learning_rate": 3.13015339904853e-07, + "loss": 1.2471, + "step": 31420 + }, + { + "epoch": 0.9225732573844618, + "grad_norm": 0.0, + "learning_rate": 3.1277931695265497e-07, + "loss": 1.165, + "step": 31421 + }, + { + "epoch": 0.9226026190616008, + "grad_norm": 0.0, + "learning_rate": 3.1254338160447315e-07, + "loss": 1.1621, + "step": 31422 + }, + { + "epoch": 0.9226319807387398, + "grad_norm": 0.0, + "learning_rate": 3.1230753386244484e-07, + "loss": 1.2188, + "step": 31423 + }, + { + "epoch": 0.9226613424158788, + "grad_norm": 0.0, + "learning_rate": 3.1207177372869936e-07, + "loss": 1.2236, + "step": 31424 + }, + { + "epoch": 0.9226907040930178, + "grad_norm": 0.0, + "learning_rate": 3.1183610120536945e-07, + "loss": 1.1343, + "step": 31425 + }, + { + "epoch": 0.9227200657701567, + "grad_norm": 0.0, + "learning_rate": 3.1160051629458676e-07, + "loss": 1.3779, + "step": 31426 + }, + { + "epoch": 0.9227494274472958, + "grad_norm": 0.0, + "learning_rate": 3.113650189984829e-07, + "loss": 1.2305, + "step": 31427 + }, + { + "epoch": 0.9227787891244348, + "grad_norm": 0.0, + "learning_rate": 3.111296093191851e-07, + "loss": 1.1953, + "step": 31428 + }, + { + "epoch": 0.9228081508015737, + "grad_norm": 0.0, + "learning_rate": 3.108942872588239e-07, + "loss": 1.3271, + "step": 31429 + }, + { + "epoch": 0.9228375124787128, + "grad_norm": 0.0, + "learning_rate": 3.106590528195286e-07, + "loss": 1.2393, + "step": 31430 + }, + { + "epoch": 0.9228668741558518, + "grad_norm": 0.0, + "learning_rate": 3.104239060034231e-07, + "loss": 1.2617, + "step": 31431 + }, + { + "epoch": 0.9228962358329907, + "grad_norm": 0.0, + "learning_rate": 3.1018884681263684e-07, + "loss": 1.2461, + "step": 31432 + }, + { + "epoch": 0.9229255975101298, + "grad_norm": 0.0, + "learning_rate": 3.099538752492937e-07, + "loss": 1.168, + "step": 31433 + }, + { + "epoch": 0.9229549591872688, + "grad_norm": 0.0, + "learning_rate": 3.0971899131552077e-07, + "loss": 1.2622, + "step": 31434 + }, + { + "epoch": 0.9229843208644077, + "grad_norm": 0.0, + "learning_rate": 3.094841950134386e-07, + "loss": 1.2661, + "step": 31435 + }, + { + "epoch": 0.9230136825415468, + "grad_norm": 0.0, + "learning_rate": 3.092494863451745e-07, + "loss": 1.1377, + "step": 31436 + }, + { + "epoch": 0.9230430442186858, + "grad_norm": 0.0, + "learning_rate": 3.0901486531284665e-07, + "loss": 1.1514, + "step": 31437 + }, + { + "epoch": 0.9230724058958247, + "grad_norm": 0.0, + "learning_rate": 3.0878033191858227e-07, + "loss": 1.1768, + "step": 31438 + }, + { + "epoch": 0.9231017675729638, + "grad_norm": 0.0, + "learning_rate": 3.0854588616449634e-07, + "loss": 1.208, + "step": 31439 + }, + { + "epoch": 0.9231311292501028, + "grad_norm": 0.0, + "learning_rate": 3.083115280527149e-07, + "loss": 1.2227, + "step": 31440 + }, + { + "epoch": 0.9231604909272417, + "grad_norm": 0.0, + "learning_rate": 3.0807725758535413e-07, + "loss": 1.1265, + "step": 31441 + }, + { + "epoch": 0.9231898526043808, + "grad_norm": 0.0, + "learning_rate": 3.078430747645311e-07, + "loss": 1.3145, + "step": 31442 + }, + { + "epoch": 0.9232192142815198, + "grad_norm": 0.0, + "learning_rate": 3.0760897959236645e-07, + "loss": 1.0835, + "step": 31443 + }, + { + "epoch": 0.9232485759586587, + "grad_norm": 0.0, + "learning_rate": 3.0737497207097513e-07, + "loss": 1.3984, + "step": 31444 + }, + { + "epoch": 0.9232779376357978, + "grad_norm": 0.0, + "learning_rate": 3.071410522024754e-07, + "loss": 1.2773, + "step": 31445 + }, + { + "epoch": 0.9233072993129368, + "grad_norm": 0.0, + "learning_rate": 3.0690721998898e-07, + "loss": 1.1807, + "step": 31446 + }, + { + "epoch": 0.9233366609900757, + "grad_norm": 0.0, + "learning_rate": 3.066734754326062e-07, + "loss": 1.2617, + "step": 31447 + }, + { + "epoch": 0.9233660226672148, + "grad_norm": 0.0, + "learning_rate": 3.0643981853546666e-07, + "loss": 1.1465, + "step": 31448 + }, + { + "epoch": 0.9233953843443538, + "grad_norm": 0.0, + "learning_rate": 3.0620624929967426e-07, + "loss": 1.2852, + "step": 31449 + }, + { + "epoch": 0.9234247460214927, + "grad_norm": 0.0, + "learning_rate": 3.0597276772734164e-07, + "loss": 1.1558, + "step": 31450 + }, + { + "epoch": 0.9234541076986318, + "grad_norm": 0.0, + "learning_rate": 3.057393738205794e-07, + "loss": 1.1816, + "step": 31451 + }, + { + "epoch": 0.9234834693757707, + "grad_norm": 0.0, + "learning_rate": 3.0550606758149914e-07, + "loss": 1.1689, + "step": 31452 + }, + { + "epoch": 0.9235128310529097, + "grad_norm": 0.0, + "learning_rate": 3.0527284901220923e-07, + "loss": 1.2949, + "step": 31453 + }, + { + "epoch": 0.9235421927300488, + "grad_norm": 0.0, + "learning_rate": 3.0503971811482126e-07, + "loss": 1.1792, + "step": 31454 + }, + { + "epoch": 0.9235715544071877, + "grad_norm": 0.0, + "learning_rate": 3.0480667489144023e-07, + "loss": 1.3242, + "step": 31455 + }, + { + "epoch": 0.9236009160843267, + "grad_norm": 0.0, + "learning_rate": 3.045737193441778e-07, + "loss": 1.3818, + "step": 31456 + }, + { + "epoch": 0.9236302777614658, + "grad_norm": 0.0, + "learning_rate": 3.0434085147513673e-07, + "loss": 1.1812, + "step": 31457 + }, + { + "epoch": 0.9236596394386047, + "grad_norm": 0.0, + "learning_rate": 3.041080712864253e-07, + "loss": 1.1245, + "step": 31458 + }, + { + "epoch": 0.9236890011157437, + "grad_norm": 0.0, + "learning_rate": 3.0387537878014626e-07, + "loss": 1.231, + "step": 31459 + }, + { + "epoch": 0.9237183627928828, + "grad_norm": 0.0, + "learning_rate": 3.036427739584069e-07, + "loss": 1.3154, + "step": 31460 + }, + { + "epoch": 0.9237477244700217, + "grad_norm": 0.0, + "learning_rate": 3.0341025682330884e-07, + "loss": 1.2939, + "step": 31461 + }, + { + "epoch": 0.9237770861471607, + "grad_norm": 0.0, + "learning_rate": 3.031778273769548e-07, + "loss": 1.1089, + "step": 31462 + }, + { + "epoch": 0.9238064478242998, + "grad_norm": 0.0, + "learning_rate": 3.0294548562144756e-07, + "loss": 1.2173, + "step": 31463 + }, + { + "epoch": 0.9238358095014387, + "grad_norm": 0.0, + "learning_rate": 3.0271323155888654e-07, + "loss": 1.2349, + "step": 31464 + }, + { + "epoch": 0.9238651711785777, + "grad_norm": 0.0, + "learning_rate": 3.0248106519137454e-07, + "loss": 1.1416, + "step": 31465 + }, + { + "epoch": 0.9238945328557168, + "grad_norm": 0.0, + "learning_rate": 3.022489865210099e-07, + "loss": 1.2754, + "step": 31466 + }, + { + "epoch": 0.9239238945328557, + "grad_norm": 0.0, + "learning_rate": 3.0201699554989194e-07, + "loss": 1.1611, + "step": 31467 + }, + { + "epoch": 0.9239532562099947, + "grad_norm": 0.0, + "learning_rate": 3.0178509228011574e-07, + "loss": 1.2061, + "step": 31468 + }, + { + "epoch": 0.9239826178871338, + "grad_norm": 0.0, + "learning_rate": 3.015532767137819e-07, + "loss": 1.2539, + "step": 31469 + }, + { + "epoch": 0.9240119795642727, + "grad_norm": 0.0, + "learning_rate": 3.013215488529864e-07, + "loss": 1.2427, + "step": 31470 + }, + { + "epoch": 0.9240413412414117, + "grad_norm": 0.0, + "learning_rate": 3.01089908699822e-07, + "loss": 1.1426, + "step": 31471 + }, + { + "epoch": 0.9240707029185508, + "grad_norm": 0.0, + "learning_rate": 3.0085835625638716e-07, + "loss": 1.1138, + "step": 31472 + }, + { + "epoch": 0.9241000645956897, + "grad_norm": 0.0, + "learning_rate": 3.006268915247745e-07, + "loss": 1.1064, + "step": 31473 + }, + { + "epoch": 0.9241294262728287, + "grad_norm": 0.0, + "learning_rate": 3.0039551450707585e-07, + "loss": 1.2427, + "step": 31474 + }, + { + "epoch": 0.9241587879499678, + "grad_norm": 0.0, + "learning_rate": 3.0016422520538493e-07, + "loss": 1.2119, + "step": 31475 + }, + { + "epoch": 0.9241881496271067, + "grad_norm": 0.0, + "learning_rate": 2.9993302362179346e-07, + "loss": 1.2319, + "step": 31476 + }, + { + "epoch": 0.9242175113042457, + "grad_norm": 0.0, + "learning_rate": 2.9970190975839085e-07, + "loss": 1.209, + "step": 31477 + }, + { + "epoch": 0.9242468729813847, + "grad_norm": 0.0, + "learning_rate": 2.9947088361726885e-07, + "loss": 1.2998, + "step": 31478 + }, + { + "epoch": 0.9242762346585237, + "grad_norm": 0.0, + "learning_rate": 2.992399452005157e-07, + "loss": 1.2393, + "step": 31479 + }, + { + "epoch": 0.9243055963356627, + "grad_norm": 0.0, + "learning_rate": 2.9900909451021974e-07, + "loss": 1.1021, + "step": 31480 + }, + { + "epoch": 0.9243349580128017, + "grad_norm": 0.0, + "learning_rate": 2.9877833154847047e-07, + "loss": 1.1782, + "step": 31481 + }, + { + "epoch": 0.9243643196899407, + "grad_norm": 0.0, + "learning_rate": 2.985476563173528e-07, + "loss": 1.1997, + "step": 31482 + }, + { + "epoch": 0.9243936813670797, + "grad_norm": 0.0, + "learning_rate": 2.9831706881895295e-07, + "loss": 1.25, + "step": 31483 + }, + { + "epoch": 0.9244230430442187, + "grad_norm": 0.0, + "learning_rate": 2.980865690553558e-07, + "loss": 1.1221, + "step": 31484 + }, + { + "epoch": 0.9244524047213577, + "grad_norm": 0.0, + "learning_rate": 2.978561570286476e-07, + "loss": 1.2275, + "step": 31485 + }, + { + "epoch": 0.9244817663984967, + "grad_norm": 0.0, + "learning_rate": 2.9762583274090985e-07, + "loss": 1.2949, + "step": 31486 + }, + { + "epoch": 0.9245111280756357, + "grad_norm": 0.0, + "learning_rate": 2.9739559619422763e-07, + "loss": 1.0293, + "step": 31487 + }, + { + "epoch": 0.9245404897527747, + "grad_norm": 0.0, + "learning_rate": 2.971654473906804e-07, + "loss": 1.1094, + "step": 31488 + }, + { + "epoch": 0.9245698514299137, + "grad_norm": 0.0, + "learning_rate": 2.9693538633235207e-07, + "loss": 1.0278, + "step": 31489 + }, + { + "epoch": 0.9245992131070526, + "grad_norm": 0.0, + "learning_rate": 2.9670541302132204e-07, + "loss": 1.106, + "step": 31490 + }, + { + "epoch": 0.9246285747841917, + "grad_norm": 0.0, + "learning_rate": 2.964755274596709e-07, + "loss": 1.1543, + "step": 31491 + }, + { + "epoch": 0.9246579364613307, + "grad_norm": 0.0, + "learning_rate": 2.9624572964947584e-07, + "loss": 1.2539, + "step": 31492 + }, + { + "epoch": 0.9246872981384696, + "grad_norm": 0.0, + "learning_rate": 2.960160195928152e-07, + "loss": 1.1982, + "step": 31493 + }, + { + "epoch": 0.9247166598156087, + "grad_norm": 0.0, + "learning_rate": 2.957863972917674e-07, + "loss": 1.27, + "step": 31494 + }, + { + "epoch": 0.9247460214927476, + "grad_norm": 0.0, + "learning_rate": 2.955568627484073e-07, + "loss": 1.1562, + "step": 31495 + }, + { + "epoch": 0.9247753831698866, + "grad_norm": 0.0, + "learning_rate": 2.953274159648134e-07, + "loss": 1.1846, + "step": 31496 + }, + { + "epoch": 0.9248047448470257, + "grad_norm": 0.0, + "learning_rate": 2.9509805694305946e-07, + "loss": 1.3076, + "step": 31497 + }, + { + "epoch": 0.9248341065241646, + "grad_norm": 0.0, + "learning_rate": 2.9486878568521835e-07, + "loss": 1.2178, + "step": 31498 + }, + { + "epoch": 0.9248634682013036, + "grad_norm": 0.0, + "learning_rate": 2.9463960219336506e-07, + "loss": 1.2051, + "step": 31499 + }, + { + "epoch": 0.9248928298784427, + "grad_norm": 0.0, + "learning_rate": 2.9441050646957017e-07, + "loss": 1.1714, + "step": 31500 + }, + { + "epoch": 0.9249221915555816, + "grad_norm": 0.0, + "learning_rate": 2.941814985159075e-07, + "loss": 1.2271, + "step": 31501 + }, + { + "epoch": 0.9249515532327206, + "grad_norm": 0.0, + "learning_rate": 2.9395257833444655e-07, + "loss": 1.2354, + "step": 31502 + }, + { + "epoch": 0.9249809149098597, + "grad_norm": 0.0, + "learning_rate": 2.9372374592725903e-07, + "loss": 1.2549, + "step": 31503 + }, + { + "epoch": 0.9250102765869986, + "grad_norm": 0.0, + "learning_rate": 2.934950012964133e-07, + "loss": 1.1694, + "step": 31504 + }, + { + "epoch": 0.9250396382641376, + "grad_norm": 0.0, + "learning_rate": 2.932663444439776e-07, + "loss": 1.1538, + "step": 31505 + }, + { + "epoch": 0.9250689999412767, + "grad_norm": 0.0, + "learning_rate": 2.930377753720215e-07, + "loss": 1.1846, + "step": 31506 + }, + { + "epoch": 0.9250983616184156, + "grad_norm": 0.0, + "learning_rate": 2.928092940826099e-07, + "loss": 1.2568, + "step": 31507 + }, + { + "epoch": 0.9251277232955546, + "grad_norm": 0.0, + "learning_rate": 2.9258090057781017e-07, + "loss": 1.1533, + "step": 31508 + }, + { + "epoch": 0.9251570849726937, + "grad_norm": 0.0, + "learning_rate": 2.9235259485968723e-07, + "loss": 1.2178, + "step": 31509 + }, + { + "epoch": 0.9251864466498326, + "grad_norm": 0.0, + "learning_rate": 2.921243769303062e-07, + "loss": 1.2617, + "step": 31510 + }, + { + "epoch": 0.9252158083269716, + "grad_norm": 0.0, + "learning_rate": 2.918962467917297e-07, + "loss": 1.1338, + "step": 31511 + }, + { + "epoch": 0.9252451700041107, + "grad_norm": 0.0, + "learning_rate": 2.9166820444602284e-07, + "loss": 1.1772, + "step": 31512 + }, + { + "epoch": 0.9252745316812496, + "grad_norm": 0.0, + "learning_rate": 2.914402498952462e-07, + "loss": 1.1445, + "step": 31513 + }, + { + "epoch": 0.9253038933583886, + "grad_norm": 0.0, + "learning_rate": 2.912123831414615e-07, + "loss": 1.1313, + "step": 31514 + }, + { + "epoch": 0.9253332550355277, + "grad_norm": 0.0, + "learning_rate": 2.909846041867315e-07, + "loss": 1.0718, + "step": 31515 + }, + { + "epoch": 0.9253626167126666, + "grad_norm": 0.0, + "learning_rate": 2.907569130331123e-07, + "loss": 1.248, + "step": 31516 + }, + { + "epoch": 0.9253919783898056, + "grad_norm": 0.0, + "learning_rate": 2.905293096826656e-07, + "loss": 1.1304, + "step": 31517 + }, + { + "epoch": 0.9254213400669447, + "grad_norm": 0.0, + "learning_rate": 2.903017941374486e-07, + "loss": 1.2466, + "step": 31518 + }, + { + "epoch": 0.9254507017440836, + "grad_norm": 0.0, + "learning_rate": 2.900743663995198e-07, + "loss": 1.2251, + "step": 31519 + }, + { + "epoch": 0.9254800634212226, + "grad_norm": 0.0, + "learning_rate": 2.898470264709341e-07, + "loss": 1.165, + "step": 31520 + }, + { + "epoch": 0.9255094250983616, + "grad_norm": 0.0, + "learning_rate": 2.8961977435374986e-07, + "loss": 1.2656, + "step": 31521 + }, + { + "epoch": 0.9255387867755006, + "grad_norm": 0.0, + "learning_rate": 2.8939261005002107e-07, + "loss": 1.3906, + "step": 31522 + }, + { + "epoch": 0.9255681484526396, + "grad_norm": 0.0, + "learning_rate": 2.891655335618016e-07, + "loss": 1.2646, + "step": 31523 + }, + { + "epoch": 0.9255975101297786, + "grad_norm": 0.0, + "learning_rate": 2.8893854489114415e-07, + "loss": 1.2666, + "step": 31524 + }, + { + "epoch": 0.9256268718069176, + "grad_norm": 0.0, + "learning_rate": 2.8871164404010275e-07, + "loss": 1.2217, + "step": 31525 + }, + { + "epoch": 0.9256562334840566, + "grad_norm": 0.0, + "learning_rate": 2.884848310107291e-07, + "loss": 1.2422, + "step": 31526 + }, + { + "epoch": 0.9256855951611956, + "grad_norm": 0.0, + "learning_rate": 2.882581058050749e-07, + "loss": 1.2432, + "step": 31527 + }, + { + "epoch": 0.9257149568383346, + "grad_norm": 0.0, + "learning_rate": 2.8803146842518835e-07, + "loss": 1.1406, + "step": 31528 + }, + { + "epoch": 0.9257443185154736, + "grad_norm": 0.0, + "learning_rate": 2.8780491887312243e-07, + "loss": 1.2783, + "step": 31529 + }, + { + "epoch": 0.9257736801926126, + "grad_norm": 0.0, + "learning_rate": 2.8757845715092327e-07, + "loss": 1.1021, + "step": 31530 + }, + { + "epoch": 0.9258030418697516, + "grad_norm": 0.0, + "learning_rate": 2.873520832606402e-07, + "loss": 1.1885, + "step": 31531 + }, + { + "epoch": 0.9258324035468906, + "grad_norm": 0.0, + "learning_rate": 2.8712579720431843e-07, + "loss": 1.2383, + "step": 31532 + }, + { + "epoch": 0.9258617652240296, + "grad_norm": 0.0, + "learning_rate": 2.868995989840062e-07, + "loss": 1.2979, + "step": 31533 + }, + { + "epoch": 0.9258911269011686, + "grad_norm": 0.0, + "learning_rate": 2.866734886017486e-07, + "loss": 1.0859, + "step": 31534 + }, + { + "epoch": 0.9259204885783076, + "grad_norm": 0.0, + "learning_rate": 2.864474660595895e-07, + "loss": 1.2305, + "step": 31535 + }, + { + "epoch": 0.9259498502554466, + "grad_norm": 0.0, + "learning_rate": 2.86221531359574e-07, + "loss": 1.2549, + "step": 31536 + }, + { + "epoch": 0.9259792119325856, + "grad_norm": 0.0, + "learning_rate": 2.859956845037448e-07, + "loss": 0.9907, + "step": 31537 + }, + { + "epoch": 0.9260085736097246, + "grad_norm": 0.0, + "learning_rate": 2.857699254941448e-07, + "loss": 1.2334, + "step": 31538 + }, + { + "epoch": 0.9260379352868636, + "grad_norm": 0.0, + "learning_rate": 2.8554425433281574e-07, + "loss": 1.2354, + "step": 31539 + }, + { + "epoch": 0.9260672969640026, + "grad_norm": 0.0, + "learning_rate": 2.85318671021797e-07, + "loss": 1.1978, + "step": 31540 + }, + { + "epoch": 0.9260966586411415, + "grad_norm": 0.0, + "learning_rate": 2.850931755631303e-07, + "loss": 1.0815, + "step": 31541 + }, + { + "epoch": 0.9261260203182806, + "grad_norm": 0.0, + "learning_rate": 2.84867767958853e-07, + "loss": 1.249, + "step": 31542 + }, + { + "epoch": 0.9261553819954196, + "grad_norm": 0.0, + "learning_rate": 2.846424482110044e-07, + "loss": 1.1709, + "step": 31543 + }, + { + "epoch": 0.9261847436725585, + "grad_norm": 0.0, + "learning_rate": 2.84417216321623e-07, + "loss": 1.1318, + "step": 31544 + }, + { + "epoch": 0.9262141053496976, + "grad_norm": 0.0, + "learning_rate": 2.841920722927449e-07, + "loss": 1.2324, + "step": 31545 + }, + { + "epoch": 0.9262434670268366, + "grad_norm": 0.0, + "learning_rate": 2.839670161264063e-07, + "loss": 1.2725, + "step": 31546 + }, + { + "epoch": 0.9262728287039755, + "grad_norm": 0.0, + "learning_rate": 2.837420478246422e-07, + "loss": 1.0859, + "step": 31547 + }, + { + "epoch": 0.9263021903811146, + "grad_norm": 0.0, + "learning_rate": 2.8351716738948765e-07, + "loss": 1.1719, + "step": 31548 + }, + { + "epoch": 0.9263315520582536, + "grad_norm": 0.0, + "learning_rate": 2.832923748229743e-07, + "loss": 1.1865, + "step": 31549 + }, + { + "epoch": 0.9263609137353925, + "grad_norm": 0.0, + "learning_rate": 2.8306767012713734e-07, + "loss": 1.1606, + "step": 31550 + }, + { + "epoch": 0.9263902754125316, + "grad_norm": 0.0, + "learning_rate": 2.828430533040072e-07, + "loss": 1.2051, + "step": 31551 + }, + { + "epoch": 0.9264196370896706, + "grad_norm": 0.0, + "learning_rate": 2.8261852435561785e-07, + "loss": 1.3809, + "step": 31552 + }, + { + "epoch": 0.9264489987668095, + "grad_norm": 0.0, + "learning_rate": 2.8239408328399555e-07, + "loss": 1.1416, + "step": 31553 + }, + { + "epoch": 0.9264783604439486, + "grad_norm": 0.0, + "learning_rate": 2.821697300911741e-07, + "loss": 1.3018, + "step": 31554 + }, + { + "epoch": 0.9265077221210876, + "grad_norm": 0.0, + "learning_rate": 2.8194546477917975e-07, + "loss": 1.1704, + "step": 31555 + }, + { + "epoch": 0.9265370837982265, + "grad_norm": 0.0, + "learning_rate": 2.817212873500419e-07, + "loss": 1.3555, + "step": 31556 + }, + { + "epoch": 0.9265664454753656, + "grad_norm": 0.0, + "learning_rate": 2.8149719780578675e-07, + "loss": 1.2012, + "step": 31557 + }, + { + "epoch": 0.9265958071525046, + "grad_norm": 0.0, + "learning_rate": 2.812731961484416e-07, + "loss": 1.1001, + "step": 31558 + }, + { + "epoch": 0.9266251688296435, + "grad_norm": 0.0, + "learning_rate": 2.810492823800326e-07, + "loss": 1.3008, + "step": 31559 + }, + { + "epoch": 0.9266545305067826, + "grad_norm": 0.0, + "learning_rate": 2.8082545650258254e-07, + "loss": 1.2129, + "step": 31560 + }, + { + "epoch": 0.9266838921839216, + "grad_norm": 0.0, + "learning_rate": 2.8060171851811756e-07, + "loss": 1.1865, + "step": 31561 + }, + { + "epoch": 0.9267132538610605, + "grad_norm": 0.0, + "learning_rate": 2.8037806842866056e-07, + "loss": 1.2266, + "step": 31562 + }, + { + "epoch": 0.9267426155381996, + "grad_norm": 0.0, + "learning_rate": 2.801545062362343e-07, + "loss": 1.2632, + "step": 31563 + }, + { + "epoch": 0.9267719772153385, + "grad_norm": 0.0, + "learning_rate": 2.799310319428594e-07, + "loss": 1.2764, + "step": 31564 + }, + { + "epoch": 0.9268013388924775, + "grad_norm": 0.0, + "learning_rate": 2.7970764555055875e-07, + "loss": 1.1533, + "step": 31565 + }, + { + "epoch": 0.9268307005696166, + "grad_norm": 0.0, + "learning_rate": 2.7948434706134954e-07, + "loss": 1.2568, + "step": 31566 + }, + { + "epoch": 0.9268600622467555, + "grad_norm": 0.0, + "learning_rate": 2.792611364772535e-07, + "loss": 1.1196, + "step": 31567 + }, + { + "epoch": 0.9268894239238945, + "grad_norm": 0.0, + "learning_rate": 2.7903801380029016e-07, + "loss": 1.1094, + "step": 31568 + }, + { + "epoch": 0.9269187856010336, + "grad_norm": 0.0, + "learning_rate": 2.7881497903247345e-07, + "loss": 1.2583, + "step": 31569 + }, + { + "epoch": 0.9269481472781725, + "grad_norm": 0.0, + "learning_rate": 2.785920321758229e-07, + "loss": 1.1602, + "step": 31570 + }, + { + "epoch": 0.9269775089553115, + "grad_norm": 0.0, + "learning_rate": 2.783691732323557e-07, + "loss": 1.2539, + "step": 31571 + }, + { + "epoch": 0.9270068706324506, + "grad_norm": 0.0, + "learning_rate": 2.781464022040847e-07, + "loss": 1.2021, + "step": 31572 + }, + { + "epoch": 0.9270362323095895, + "grad_norm": 0.0, + "learning_rate": 2.7792371909302506e-07, + "loss": 1.1021, + "step": 31573 + }, + { + "epoch": 0.9270655939867285, + "grad_norm": 0.0, + "learning_rate": 2.7770112390119176e-07, + "loss": 1.1436, + "step": 31574 + }, + { + "epoch": 0.9270949556638676, + "grad_norm": 0.0, + "learning_rate": 2.774786166305965e-07, + "loss": 1.2617, + "step": 31575 + }, + { + "epoch": 0.9271243173410065, + "grad_norm": 0.0, + "learning_rate": 2.772561972832533e-07, + "loss": 1.2539, + "step": 31576 + }, + { + "epoch": 0.9271536790181455, + "grad_norm": 0.0, + "learning_rate": 2.7703386586117155e-07, + "loss": 1.1162, + "step": 31577 + }, + { + "epoch": 0.9271830406952846, + "grad_norm": 0.0, + "learning_rate": 2.7681162236636304e-07, + "loss": 1.1992, + "step": 31578 + }, + { + "epoch": 0.9272124023724235, + "grad_norm": 0.0, + "learning_rate": 2.765894668008373e-07, + "loss": 1.209, + "step": 31579 + }, + { + "epoch": 0.9272417640495625, + "grad_norm": 0.0, + "learning_rate": 2.763673991666027e-07, + "loss": 1.1982, + "step": 31580 + }, + { + "epoch": 0.9272711257267016, + "grad_norm": 0.0, + "learning_rate": 2.7614541946566875e-07, + "loss": 1.1997, + "step": 31581 + }, + { + "epoch": 0.9273004874038405, + "grad_norm": 0.0, + "learning_rate": 2.7592352770004047e-07, + "loss": 1.2578, + "step": 31582 + }, + { + "epoch": 0.9273298490809795, + "grad_norm": 0.0, + "learning_rate": 2.7570172387172747e-07, + "loss": 1.1309, + "step": 31583 + }, + { + "epoch": 0.9273592107581186, + "grad_norm": 0.0, + "learning_rate": 2.7548000798273356e-07, + "loss": 1.2627, + "step": 31584 + }, + { + "epoch": 0.9273885724352575, + "grad_norm": 0.0, + "learning_rate": 2.75258380035065e-07, + "loss": 1.2832, + "step": 31585 + }, + { + "epoch": 0.9274179341123965, + "grad_norm": 0.0, + "learning_rate": 2.750368400307246e-07, + "loss": 1.3486, + "step": 31586 + }, + { + "epoch": 0.9274472957895356, + "grad_norm": 0.0, + "learning_rate": 2.748153879717175e-07, + "loss": 1.2788, + "step": 31587 + }, + { + "epoch": 0.9274766574666745, + "grad_norm": 0.0, + "learning_rate": 2.745940238600453e-07, + "loss": 1.1172, + "step": 31588 + }, + { + "epoch": 0.9275060191438135, + "grad_norm": 0.0, + "learning_rate": 2.7437274769770983e-07, + "loss": 1.1099, + "step": 31589 + }, + { + "epoch": 0.9275353808209524, + "grad_norm": 0.0, + "learning_rate": 2.741515594867128e-07, + "loss": 1.0718, + "step": 31590 + }, + { + "epoch": 0.9275647424980915, + "grad_norm": 0.0, + "learning_rate": 2.739304592290537e-07, + "loss": 1.207, + "step": 31591 + }, + { + "epoch": 0.9275941041752305, + "grad_norm": 0.0, + "learning_rate": 2.7370944692673206e-07, + "loss": 1.123, + "step": 31592 + }, + { + "epoch": 0.9276234658523694, + "grad_norm": 0.0, + "learning_rate": 2.734885225817474e-07, + "loss": 1.1519, + "step": 31593 + }, + { + "epoch": 0.9276528275295085, + "grad_norm": 0.0, + "learning_rate": 2.73267686196097e-07, + "loss": 1.2686, + "step": 31594 + }, + { + "epoch": 0.9276821892066475, + "grad_norm": 0.0, + "learning_rate": 2.7304693777177707e-07, + "loss": 1.2539, + "step": 31595 + }, + { + "epoch": 0.9277115508837864, + "grad_norm": 0.0, + "learning_rate": 2.72826277310787e-07, + "loss": 1.2202, + "step": 31596 + }, + { + "epoch": 0.9277409125609255, + "grad_norm": 0.0, + "learning_rate": 2.7260570481511763e-07, + "loss": 1.1523, + "step": 31597 + }, + { + "epoch": 0.9277702742380645, + "grad_norm": 0.0, + "learning_rate": 2.723852202867672e-07, + "loss": 1.1943, + "step": 31598 + }, + { + "epoch": 0.9277996359152034, + "grad_norm": 0.0, + "learning_rate": 2.7216482372772856e-07, + "loss": 1.1841, + "step": 31599 + }, + { + "epoch": 0.9278289975923425, + "grad_norm": 0.0, + "learning_rate": 2.7194451513999353e-07, + "loss": 1.2236, + "step": 31600 + }, + { + "epoch": 0.9278583592694815, + "grad_norm": 0.0, + "learning_rate": 2.7172429452555717e-07, + "loss": 1.209, + "step": 31601 + }, + { + "epoch": 0.9278877209466204, + "grad_norm": 0.0, + "learning_rate": 2.715041618864078e-07, + "loss": 1.2651, + "step": 31602 + }, + { + "epoch": 0.9279170826237595, + "grad_norm": 0.0, + "learning_rate": 2.712841172245395e-07, + "loss": 1.2725, + "step": 31603 + }, + { + "epoch": 0.9279464443008985, + "grad_norm": 0.0, + "learning_rate": 2.710641605419406e-07, + "loss": 1.2764, + "step": 31604 + }, + { + "epoch": 0.9279758059780374, + "grad_norm": 0.0, + "learning_rate": 2.708442918405996e-07, + "loss": 1.2188, + "step": 31605 + }, + { + "epoch": 0.9280051676551765, + "grad_norm": 0.0, + "learning_rate": 2.7062451112250476e-07, + "loss": 1.3213, + "step": 31606 + }, + { + "epoch": 0.9280345293323154, + "grad_norm": 0.0, + "learning_rate": 2.704048183896446e-07, + "loss": 1.3003, + "step": 31607 + }, + { + "epoch": 0.9280638910094544, + "grad_norm": 0.0, + "learning_rate": 2.701852136440064e-07, + "loss": 1.1968, + "step": 31608 + }, + { + "epoch": 0.9280932526865935, + "grad_norm": 0.0, + "learning_rate": 2.6996569688757304e-07, + "loss": 1.0913, + "step": 31609 + }, + { + "epoch": 0.9281226143637324, + "grad_norm": 0.0, + "learning_rate": 2.6974626812233396e-07, + "loss": 1.3057, + "step": 31610 + }, + { + "epoch": 0.9281519760408714, + "grad_norm": 0.0, + "learning_rate": 2.6952692735026875e-07, + "loss": 1.0693, + "step": 31611 + }, + { + "epoch": 0.9281813377180105, + "grad_norm": 0.0, + "learning_rate": 2.693076745733658e-07, + "loss": 1.1802, + "step": 31612 + }, + { + "epoch": 0.9282106993951494, + "grad_norm": 0.0, + "learning_rate": 2.690885097936058e-07, + "loss": 1.1953, + "step": 31613 + }, + { + "epoch": 0.9282400610722884, + "grad_norm": 0.0, + "learning_rate": 2.688694330129693e-07, + "loss": 1.29, + "step": 31614 + }, + { + "epoch": 0.9282694227494275, + "grad_norm": 0.0, + "learning_rate": 2.6865044423343813e-07, + "loss": 1.2397, + "step": 31615 + }, + { + "epoch": 0.9282987844265664, + "grad_norm": 0.0, + "learning_rate": 2.684315434569951e-07, + "loss": 1.2803, + "step": 31616 + }, + { + "epoch": 0.9283281461037054, + "grad_norm": 0.0, + "learning_rate": 2.6821273068561747e-07, + "loss": 1.1787, + "step": 31617 + }, + { + "epoch": 0.9283575077808445, + "grad_norm": 0.0, + "learning_rate": 2.6799400592128267e-07, + "loss": 1.1816, + "step": 31618 + }, + { + "epoch": 0.9283868694579834, + "grad_norm": 0.0, + "learning_rate": 2.6777536916597236e-07, + "loss": 1.1753, + "step": 31619 + }, + { + "epoch": 0.9284162311351224, + "grad_norm": 0.0, + "learning_rate": 2.675568204216605e-07, + "loss": 1.2017, + "step": 31620 + }, + { + "epoch": 0.9284455928122615, + "grad_norm": 0.0, + "learning_rate": 2.6733835969032676e-07, + "loss": 1.1479, + "step": 31621 + }, + { + "epoch": 0.9284749544894004, + "grad_norm": 0.0, + "learning_rate": 2.671199869739438e-07, + "loss": 1.1694, + "step": 31622 + }, + { + "epoch": 0.9285043161665394, + "grad_norm": 0.0, + "learning_rate": 2.6690170227448687e-07, + "loss": 1.1812, + "step": 31623 + }, + { + "epoch": 0.9285336778436785, + "grad_norm": 0.0, + "learning_rate": 2.66683505593931e-07, + "loss": 1.1548, + "step": 31624 + }, + { + "epoch": 0.9285630395208174, + "grad_norm": 0.0, + "learning_rate": 2.66465396934249e-07, + "loss": 1.1172, + "step": 31625 + }, + { + "epoch": 0.9285924011979564, + "grad_norm": 0.0, + "learning_rate": 2.662473762974127e-07, + "loss": 1.1768, + "step": 31626 + }, + { + "epoch": 0.9286217628750955, + "grad_norm": 0.0, + "learning_rate": 2.6602944368539497e-07, + "loss": 1.1909, + "step": 31627 + }, + { + "epoch": 0.9286511245522344, + "grad_norm": 0.0, + "learning_rate": 2.658115991001653e-07, + "loss": 1.1436, + "step": 31628 + }, + { + "epoch": 0.9286804862293734, + "grad_norm": 0.0, + "learning_rate": 2.6559384254369435e-07, + "loss": 1.251, + "step": 31629 + }, + { + "epoch": 0.9287098479065125, + "grad_norm": 0.0, + "learning_rate": 2.653761740179528e-07, + "loss": 1.1201, + "step": 31630 + }, + { + "epoch": 0.9287392095836514, + "grad_norm": 0.0, + "learning_rate": 2.651585935249046e-07, + "loss": 1.3662, + "step": 31631 + }, + { + "epoch": 0.9287685712607904, + "grad_norm": 0.0, + "learning_rate": 2.649411010665226e-07, + "loss": 1.2607, + "step": 31632 + }, + { + "epoch": 0.9287979329379294, + "grad_norm": 0.0, + "learning_rate": 2.647236966447708e-07, + "loss": 1.1855, + "step": 31633 + }, + { + "epoch": 0.9288272946150684, + "grad_norm": 0.0, + "learning_rate": 2.6450638026161547e-07, + "loss": 1.2539, + "step": 31634 + }, + { + "epoch": 0.9288566562922074, + "grad_norm": 0.0, + "learning_rate": 2.6428915191902274e-07, + "loss": 1.2773, + "step": 31635 + }, + { + "epoch": 0.9288860179693464, + "grad_norm": 0.0, + "learning_rate": 2.6407201161895657e-07, + "loss": 1.2349, + "step": 31636 + }, + { + "epoch": 0.9289153796464854, + "grad_norm": 0.0, + "learning_rate": 2.63854959363381e-07, + "loss": 1.1973, + "step": 31637 + }, + { + "epoch": 0.9289447413236244, + "grad_norm": 0.0, + "learning_rate": 2.636379951542578e-07, + "loss": 1.1475, + "step": 31638 + }, + { + "epoch": 0.9289741030007634, + "grad_norm": 0.0, + "learning_rate": 2.6342111899355095e-07, + "loss": 1.0698, + "step": 31639 + }, + { + "epoch": 0.9290034646779024, + "grad_norm": 0.0, + "learning_rate": 2.6320433088321884e-07, + "loss": 1.064, + "step": 31640 + }, + { + "epoch": 0.9290328263550414, + "grad_norm": 0.0, + "learning_rate": 2.6298763082522437e-07, + "loss": 1.1714, + "step": 31641 + }, + { + "epoch": 0.9290621880321804, + "grad_norm": 0.0, + "learning_rate": 2.62771018821526e-07, + "loss": 1.0386, + "step": 31642 + }, + { + "epoch": 0.9290915497093194, + "grad_norm": 0.0, + "learning_rate": 2.6255449487408324e-07, + "loss": 1.145, + "step": 31643 + }, + { + "epoch": 0.9291209113864584, + "grad_norm": 0.0, + "learning_rate": 2.623380589848534e-07, + "loss": 1.1553, + "step": 31644 + }, + { + "epoch": 0.9291502730635974, + "grad_norm": 0.0, + "learning_rate": 2.62121711155795e-07, + "loss": 1.1826, + "step": 31645 + }, + { + "epoch": 0.9291796347407364, + "grad_norm": 0.0, + "learning_rate": 2.6190545138886415e-07, + "loss": 1.0903, + "step": 31646 + }, + { + "epoch": 0.9292089964178754, + "grad_norm": 0.0, + "learning_rate": 2.616892796860171e-07, + "loss": 1.2168, + "step": 31647 + }, + { + "epoch": 0.9292383580950144, + "grad_norm": 0.0, + "learning_rate": 2.614731960492067e-07, + "loss": 1.25, + "step": 31648 + }, + { + "epoch": 0.9292677197721534, + "grad_norm": 0.0, + "learning_rate": 2.612572004803882e-07, + "loss": 1.1992, + "step": 31649 + }, + { + "epoch": 0.9292970814492924, + "grad_norm": 0.0, + "learning_rate": 2.6104129298151535e-07, + "loss": 1.207, + "step": 31650 + }, + { + "epoch": 0.9293264431264314, + "grad_norm": 0.0, + "learning_rate": 2.60825473554539e-07, + "loss": 1.2783, + "step": 31651 + }, + { + "epoch": 0.9293558048035704, + "grad_norm": 0.0, + "learning_rate": 2.6060974220141424e-07, + "loss": 1.2607, + "step": 31652 + }, + { + "epoch": 0.9293851664807093, + "grad_norm": 0.0, + "learning_rate": 2.603940989240894e-07, + "loss": 1.2969, + "step": 31653 + }, + { + "epoch": 0.9294145281578484, + "grad_norm": 0.0, + "learning_rate": 2.6017854372451413e-07, + "loss": 1.2236, + "step": 31654 + }, + { + "epoch": 0.9294438898349874, + "grad_norm": 0.0, + "learning_rate": 2.5996307660463906e-07, + "loss": 1.2305, + "step": 31655 + }, + { + "epoch": 0.9294732515121263, + "grad_norm": 0.0, + "learning_rate": 2.597476975664126e-07, + "loss": 1.251, + "step": 31656 + }, + { + "epoch": 0.9295026131892654, + "grad_norm": 0.0, + "learning_rate": 2.5953240661178213e-07, + "loss": 1.1436, + "step": 31657 + }, + { + "epoch": 0.9295319748664044, + "grad_norm": 0.0, + "learning_rate": 2.593172037426939e-07, + "loss": 1.2998, + "step": 31658 + }, + { + "epoch": 0.9295613365435433, + "grad_norm": 0.0, + "learning_rate": 2.5910208896109624e-07, + "loss": 1.1416, + "step": 31659 + }, + { + "epoch": 0.9295906982206824, + "grad_norm": 0.0, + "learning_rate": 2.58887062268931e-07, + "loss": 1.0137, + "step": 31660 + }, + { + "epoch": 0.9296200598978214, + "grad_norm": 0.0, + "learning_rate": 2.5867212366814663e-07, + "loss": 1.228, + "step": 31661 + }, + { + "epoch": 0.9296494215749603, + "grad_norm": 0.0, + "learning_rate": 2.584572731606849e-07, + "loss": 1.1772, + "step": 31662 + }, + { + "epoch": 0.9296787832520994, + "grad_norm": 0.0, + "learning_rate": 2.582425107484887e-07, + "loss": 1.2285, + "step": 31663 + }, + { + "epoch": 0.9297081449292384, + "grad_norm": 0.0, + "learning_rate": 2.5802783643349984e-07, + "loss": 1.1333, + "step": 31664 + }, + { + "epoch": 0.9297375066063773, + "grad_norm": 0.0, + "learning_rate": 2.5781325021766114e-07, + "loss": 1.2471, + "step": 31665 + }, + { + "epoch": 0.9297668682835164, + "grad_norm": 0.0, + "learning_rate": 2.575987521029122e-07, + "loss": 1.2012, + "step": 31666 + }, + { + "epoch": 0.9297962299606554, + "grad_norm": 0.0, + "learning_rate": 2.5738434209119143e-07, + "loss": 1.1528, + "step": 31667 + }, + { + "epoch": 0.9298255916377943, + "grad_norm": 0.0, + "learning_rate": 2.571700201844396e-07, + "loss": 1.1763, + "step": 31668 + }, + { + "epoch": 0.9298549533149334, + "grad_norm": 0.0, + "learning_rate": 2.5695578638459505e-07, + "loss": 1.0293, + "step": 31669 + }, + { + "epoch": 0.9298843149920724, + "grad_norm": 0.0, + "learning_rate": 2.5674164069359407e-07, + "loss": 1.0718, + "step": 31670 + }, + { + "epoch": 0.9299136766692113, + "grad_norm": 0.0, + "learning_rate": 2.565275831133751e-07, + "loss": 1.1089, + "step": 31671 + }, + { + "epoch": 0.9299430383463504, + "grad_norm": 0.0, + "learning_rate": 2.56313613645871e-07, + "loss": 1.207, + "step": 31672 + }, + { + "epoch": 0.9299724000234894, + "grad_norm": 0.0, + "learning_rate": 2.560997322930181e-07, + "loss": 1.2539, + "step": 31673 + }, + { + "epoch": 0.9300017617006283, + "grad_norm": 0.0, + "learning_rate": 2.558859390567525e-07, + "loss": 1.2095, + "step": 31674 + }, + { + "epoch": 0.9300311233777674, + "grad_norm": 0.0, + "learning_rate": 2.5567223393900385e-07, + "loss": 1.2314, + "step": 31675 + }, + { + "epoch": 0.9300604850549063, + "grad_norm": 0.0, + "learning_rate": 2.5545861694170836e-07, + "loss": 1.1406, + "step": 31676 + }, + { + "epoch": 0.9300898467320453, + "grad_norm": 0.0, + "learning_rate": 2.552450880667956e-07, + "loss": 1.0767, + "step": 31677 + }, + { + "epoch": 0.9301192084091844, + "grad_norm": 0.0, + "learning_rate": 2.550316473161973e-07, + "loss": 1.1226, + "step": 31678 + }, + { + "epoch": 0.9301485700863233, + "grad_norm": 0.0, + "learning_rate": 2.548182946918432e-07, + "loss": 1.2119, + "step": 31679 + }, + { + "epoch": 0.9301779317634623, + "grad_norm": 0.0, + "learning_rate": 2.5460503019566265e-07, + "loss": 1.21, + "step": 31680 + }, + { + "epoch": 0.9302072934406014, + "grad_norm": 0.0, + "learning_rate": 2.5439185382958533e-07, + "loss": 1.1885, + "step": 31681 + }, + { + "epoch": 0.9302366551177403, + "grad_norm": 0.0, + "learning_rate": 2.5417876559553747e-07, + "loss": 1.0933, + "step": 31682 + }, + { + "epoch": 0.9302660167948793, + "grad_norm": 0.0, + "learning_rate": 2.539657654954486e-07, + "loss": 1.0923, + "step": 31683 + }, + { + "epoch": 0.9302953784720184, + "grad_norm": 0.0, + "learning_rate": 2.537528535312417e-07, + "loss": 1.1401, + "step": 31684 + }, + { + "epoch": 0.9303247401491573, + "grad_norm": 0.0, + "learning_rate": 2.5354002970484516e-07, + "loss": 1.1475, + "step": 31685 + }, + { + "epoch": 0.9303541018262963, + "grad_norm": 0.0, + "learning_rate": 2.533272940181819e-07, + "loss": 1.0898, + "step": 31686 + }, + { + "epoch": 0.9303834635034354, + "grad_norm": 0.0, + "learning_rate": 2.5311464647317594e-07, + "loss": 1.1904, + "step": 31687 + }, + { + "epoch": 0.9304128251805743, + "grad_norm": 0.0, + "learning_rate": 2.5290208707175025e-07, + "loss": 1.2109, + "step": 31688 + }, + { + "epoch": 0.9304421868577133, + "grad_norm": 0.0, + "learning_rate": 2.526896158158265e-07, + "loss": 1.248, + "step": 31689 + }, + { + "epoch": 0.9304715485348524, + "grad_norm": 0.0, + "learning_rate": 2.524772327073288e-07, + "loss": 1.1865, + "step": 31690 + }, + { + "epoch": 0.9305009102119913, + "grad_norm": 0.0, + "learning_rate": 2.522649377481734e-07, + "loss": 1.1826, + "step": 31691 + }, + { + "epoch": 0.9305302718891303, + "grad_norm": 0.0, + "learning_rate": 2.520527309402843e-07, + "loss": 1.2178, + "step": 31692 + }, + { + "epoch": 0.9305596335662693, + "grad_norm": 0.0, + "learning_rate": 2.5184061228557764e-07, + "loss": 1.2383, + "step": 31693 + }, + { + "epoch": 0.9305889952434083, + "grad_norm": 0.0, + "learning_rate": 2.516285817859743e-07, + "loss": 1.1685, + "step": 31694 + }, + { + "epoch": 0.9306183569205473, + "grad_norm": 0.0, + "learning_rate": 2.5141663944339035e-07, + "loss": 1.0088, + "step": 31695 + }, + { + "epoch": 0.9306477185976862, + "grad_norm": 0.0, + "learning_rate": 2.5120478525974214e-07, + "loss": 1.1685, + "step": 31696 + }, + { + "epoch": 0.9306770802748253, + "grad_norm": 0.0, + "learning_rate": 2.5099301923694587e-07, + "loss": 1.1768, + "step": 31697 + }, + { + "epoch": 0.9307064419519643, + "grad_norm": 0.0, + "learning_rate": 2.5078134137691444e-07, + "loss": 1.2109, + "step": 31698 + }, + { + "epoch": 0.9307358036291032, + "grad_norm": 0.0, + "learning_rate": 2.505697516815664e-07, + "loss": 1.2373, + "step": 31699 + }, + { + "epoch": 0.9307651653062423, + "grad_norm": 0.0, + "learning_rate": 2.5035825015281123e-07, + "loss": 1.125, + "step": 31700 + }, + { + "epoch": 0.9307945269833813, + "grad_norm": 0.0, + "learning_rate": 2.5014683679256525e-07, + "loss": 1.0986, + "step": 31701 + }, + { + "epoch": 0.9308238886605202, + "grad_norm": 0.0, + "learning_rate": 2.49935511602738e-07, + "loss": 1.228, + "step": 31702 + }, + { + "epoch": 0.9308532503376593, + "grad_norm": 0.0, + "learning_rate": 2.497242745852402e-07, + "loss": 1.3086, + "step": 31703 + }, + { + "epoch": 0.9308826120147983, + "grad_norm": 0.0, + "learning_rate": 2.495131257419825e-07, + "loss": 1.1943, + "step": 31704 + }, + { + "epoch": 0.9309119736919372, + "grad_norm": 0.0, + "learning_rate": 2.4930206507487566e-07, + "loss": 1.2461, + "step": 31705 + }, + { + "epoch": 0.9309413353690763, + "grad_norm": 0.0, + "learning_rate": 2.490910925858281e-07, + "loss": 1.1733, + "step": 31706 + }, + { + "epoch": 0.9309706970462153, + "grad_norm": 0.0, + "learning_rate": 2.48880208276745e-07, + "loss": 1.2207, + "step": 31707 + }, + { + "epoch": 0.9310000587233542, + "grad_norm": 0.0, + "learning_rate": 2.48669412149537e-07, + "loss": 1.1616, + "step": 31708 + }, + { + "epoch": 0.9310294204004933, + "grad_norm": 0.0, + "learning_rate": 2.4845870420610704e-07, + "loss": 1.2852, + "step": 31709 + }, + { + "epoch": 0.9310587820776323, + "grad_norm": 0.0, + "learning_rate": 2.4824808444836477e-07, + "loss": 1.1196, + "step": 31710 + }, + { + "epoch": 0.9310881437547712, + "grad_norm": 0.0, + "learning_rate": 2.4803755287821197e-07, + "loss": 1.189, + "step": 31711 + }, + { + "epoch": 0.9311175054319103, + "grad_norm": 0.0, + "learning_rate": 2.478271094975526e-07, + "loss": 1.2822, + "step": 31712 + }, + { + "epoch": 0.9311468671090493, + "grad_norm": 0.0, + "learning_rate": 2.476167543082897e-07, + "loss": 1.0654, + "step": 31713 + }, + { + "epoch": 0.9311762287861882, + "grad_norm": 0.0, + "learning_rate": 2.474064873123272e-07, + "loss": 1.1392, + "step": 31714 + }, + { + "epoch": 0.9312055904633273, + "grad_norm": 0.0, + "learning_rate": 2.4719630851156363e-07, + "loss": 1.1133, + "step": 31715 + }, + { + "epoch": 0.9312349521404663, + "grad_norm": 0.0, + "learning_rate": 2.4698621790790413e-07, + "loss": 1.1768, + "step": 31716 + }, + { + "epoch": 0.9312643138176052, + "grad_norm": 0.0, + "learning_rate": 2.4677621550324496e-07, + "loss": 1.2881, + "step": 31717 + }, + { + "epoch": 0.9312936754947443, + "grad_norm": 0.0, + "learning_rate": 2.4656630129948564e-07, + "loss": 1.2656, + "step": 31718 + }, + { + "epoch": 0.9313230371718833, + "grad_norm": 0.0, + "learning_rate": 2.463564752985259e-07, + "loss": 1.0176, + "step": 31719 + }, + { + "epoch": 0.9313523988490222, + "grad_norm": 0.0, + "learning_rate": 2.4614673750226305e-07, + "loss": 1.208, + "step": 31720 + }, + { + "epoch": 0.9313817605261613, + "grad_norm": 0.0, + "learning_rate": 2.459370879125922e-07, + "loss": 1.2119, + "step": 31721 + }, + { + "epoch": 0.9314111222033002, + "grad_norm": 0.0, + "learning_rate": 2.457275265314107e-07, + "loss": 1.1724, + "step": 31722 + }, + { + "epoch": 0.9314404838804392, + "grad_norm": 0.0, + "learning_rate": 2.4551805336061276e-07, + "loss": 1.2676, + "step": 31723 + }, + { + "epoch": 0.9314698455575783, + "grad_norm": 0.0, + "learning_rate": 2.453086684020933e-07, + "loss": 1.1797, + "step": 31724 + }, + { + "epoch": 0.9314992072347172, + "grad_norm": 0.0, + "learning_rate": 2.4509937165774653e-07, + "loss": 1.2617, + "step": 31725 + }, + { + "epoch": 0.9315285689118562, + "grad_norm": 0.0, + "learning_rate": 2.4489016312946425e-07, + "loss": 1.1943, + "step": 31726 + }, + { + "epoch": 0.9315579305889953, + "grad_norm": 0.0, + "learning_rate": 2.446810428191393e-07, + "loss": 1.1533, + "step": 31727 + }, + { + "epoch": 0.9315872922661342, + "grad_norm": 0.0, + "learning_rate": 2.444720107286613e-07, + "loss": 1.2173, + "step": 31728 + }, + { + "epoch": 0.9316166539432732, + "grad_norm": 0.0, + "learning_rate": 2.4426306685991997e-07, + "loss": 1.2637, + "step": 31729 + }, + { + "epoch": 0.9316460156204123, + "grad_norm": 0.0, + "learning_rate": 2.440542112148081e-07, + "loss": 1.2891, + "step": 31730 + }, + { + "epoch": 0.9316753772975512, + "grad_norm": 0.0, + "learning_rate": 2.4384544379521093e-07, + "loss": 1.2354, + "step": 31731 + }, + { + "epoch": 0.9317047389746902, + "grad_norm": 0.0, + "learning_rate": 2.436367646030191e-07, + "loss": 1.084, + "step": 31732 + }, + { + "epoch": 0.9317341006518293, + "grad_norm": 0.0, + "learning_rate": 2.434281736401178e-07, + "loss": 1.2456, + "step": 31733 + }, + { + "epoch": 0.9317634623289682, + "grad_norm": 0.0, + "learning_rate": 2.432196709083956e-07, + "loss": 1.2949, + "step": 31734 + }, + { + "epoch": 0.9317928240061072, + "grad_norm": 0.0, + "learning_rate": 2.430112564097364e-07, + "loss": 1.1846, + "step": 31735 + }, + { + "epoch": 0.9318221856832463, + "grad_norm": 0.0, + "learning_rate": 2.428029301460244e-07, + "loss": 1.1924, + "step": 31736 + }, + { + "epoch": 0.9318515473603852, + "grad_norm": 0.0, + "learning_rate": 2.4259469211914575e-07, + "loss": 1.1235, + "step": 31737 + }, + { + "epoch": 0.9318809090375242, + "grad_norm": 0.0, + "learning_rate": 2.4238654233098013e-07, + "loss": 1.2178, + "step": 31738 + }, + { + "epoch": 0.9319102707146633, + "grad_norm": 0.0, + "learning_rate": 2.421784807834138e-07, + "loss": 1.1787, + "step": 31739 + }, + { + "epoch": 0.9319396323918022, + "grad_norm": 0.0, + "learning_rate": 2.419705074783252e-07, + "loss": 1.3018, + "step": 31740 + }, + { + "epoch": 0.9319689940689412, + "grad_norm": 0.0, + "learning_rate": 2.417626224175962e-07, + "loss": 1.1748, + "step": 31741 + }, + { + "epoch": 0.9319983557460803, + "grad_norm": 0.0, + "learning_rate": 2.415548256031075e-07, + "loss": 1.2061, + "step": 31742 + }, + { + "epoch": 0.9320277174232192, + "grad_norm": 0.0, + "learning_rate": 2.413471170367376e-07, + "loss": 1.2979, + "step": 31743 + }, + { + "epoch": 0.9320570791003582, + "grad_norm": 0.0, + "learning_rate": 2.411394967203662e-07, + "loss": 0.9946, + "step": 31744 + }, + { + "epoch": 0.9320864407774972, + "grad_norm": 0.0, + "learning_rate": 2.409319646558683e-07, + "loss": 1.2646, + "step": 31745 + }, + { + "epoch": 0.9321158024546362, + "grad_norm": 0.0, + "learning_rate": 2.4072452084512256e-07, + "loss": 1.0576, + "step": 31746 + }, + { + "epoch": 0.9321451641317752, + "grad_norm": 0.0, + "learning_rate": 2.4051716529000293e-07, + "loss": 1.3047, + "step": 31747 + }, + { + "epoch": 0.9321745258089142, + "grad_norm": 0.0, + "learning_rate": 2.403098979923879e-07, + "loss": 1.335, + "step": 31748 + }, + { + "epoch": 0.9322038874860532, + "grad_norm": 0.0, + "learning_rate": 2.4010271895414826e-07, + "loss": 1.1494, + "step": 31749 + }, + { + "epoch": 0.9322332491631922, + "grad_norm": 0.0, + "learning_rate": 2.3989562817716027e-07, + "loss": 1.1582, + "step": 31750 + }, + { + "epoch": 0.9322626108403312, + "grad_norm": 0.0, + "learning_rate": 2.396886256632969e-07, + "loss": 1.1475, + "step": 31751 + }, + { + "epoch": 0.9322919725174702, + "grad_norm": 0.0, + "learning_rate": 2.394817114144277e-07, + "loss": 1.1279, + "step": 31752 + }, + { + "epoch": 0.9323213341946092, + "grad_norm": 0.0, + "learning_rate": 2.392748854324245e-07, + "loss": 1.1475, + "step": 31753 + }, + { + "epoch": 0.9323506958717482, + "grad_norm": 0.0, + "learning_rate": 2.390681477191592e-07, + "loss": 1.0898, + "step": 31754 + }, + { + "epoch": 0.9323800575488872, + "grad_norm": 0.0, + "learning_rate": 2.388614982765003e-07, + "loss": 1.1738, + "step": 31755 + }, + { + "epoch": 0.9324094192260262, + "grad_norm": 0.0, + "learning_rate": 2.386549371063152e-07, + "loss": 1.2529, + "step": 31756 + }, + { + "epoch": 0.9324387809031652, + "grad_norm": 0.0, + "learning_rate": 2.384484642104756e-07, + "loss": 1.3467, + "step": 31757 + }, + { + "epoch": 0.9324681425803042, + "grad_norm": 0.0, + "learning_rate": 2.3824207959084467e-07, + "loss": 1.2256, + "step": 31758 + }, + { + "epoch": 0.9324975042574432, + "grad_norm": 0.0, + "learning_rate": 2.380357832492919e-07, + "loss": 1.1274, + "step": 31759 + }, + { + "epoch": 0.9325268659345822, + "grad_norm": 0.0, + "learning_rate": 2.3782957518768135e-07, + "loss": 1.1777, + "step": 31760 + }, + { + "epoch": 0.9325562276117212, + "grad_norm": 0.0, + "learning_rate": 2.3762345540787934e-07, + "loss": 1.1787, + "step": 31761 + }, + { + "epoch": 0.9325855892888602, + "grad_norm": 0.0, + "learning_rate": 2.374174239117466e-07, + "loss": 1.0898, + "step": 31762 + }, + { + "epoch": 0.9326149509659992, + "grad_norm": 0.0, + "learning_rate": 2.372114807011494e-07, + "loss": 1.1953, + "step": 31763 + }, + { + "epoch": 0.9326443126431382, + "grad_norm": 0.0, + "learning_rate": 2.370056257779474e-07, + "loss": 1.2305, + "step": 31764 + }, + { + "epoch": 0.9326736743202771, + "grad_norm": 0.0, + "learning_rate": 2.367998591440057e-07, + "loss": 1.2065, + "step": 31765 + }, + { + "epoch": 0.9327030359974162, + "grad_norm": 0.0, + "learning_rate": 2.365941808011829e-07, + "loss": 1.1387, + "step": 31766 + }, + { + "epoch": 0.9327323976745552, + "grad_norm": 0.0, + "learning_rate": 2.363885907513386e-07, + "loss": 1.2046, + "step": 31767 + }, + { + "epoch": 0.9327617593516941, + "grad_norm": 0.0, + "learning_rate": 2.3618308899633346e-07, + "loss": 1.1055, + "step": 31768 + }, + { + "epoch": 0.9327911210288332, + "grad_norm": 0.0, + "learning_rate": 2.3597767553802607e-07, + "loss": 1.1787, + "step": 31769 + }, + { + "epoch": 0.9328204827059722, + "grad_norm": 0.0, + "learning_rate": 2.3577235037827274e-07, + "loss": 1.2178, + "step": 31770 + }, + { + "epoch": 0.9328498443831111, + "grad_norm": 0.0, + "learning_rate": 2.355671135189286e-07, + "loss": 1.293, + "step": 31771 + }, + { + "epoch": 0.9328792060602502, + "grad_norm": 0.0, + "learning_rate": 2.3536196496185437e-07, + "loss": 1.1616, + "step": 31772 + }, + { + "epoch": 0.9329085677373892, + "grad_norm": 0.0, + "learning_rate": 2.3515690470890084e-07, + "loss": 1.2246, + "step": 31773 + }, + { + "epoch": 0.9329379294145281, + "grad_norm": 0.0, + "learning_rate": 2.349519327619254e-07, + "loss": 1.3174, + "step": 31774 + }, + { + "epoch": 0.9329672910916672, + "grad_norm": 0.0, + "learning_rate": 2.347470491227799e-07, + "loss": 1.2812, + "step": 31775 + }, + { + "epoch": 0.9329966527688062, + "grad_norm": 0.0, + "learning_rate": 2.345422537933184e-07, + "loss": 1.1948, + "step": 31776 + }, + { + "epoch": 0.9330260144459451, + "grad_norm": 0.0, + "learning_rate": 2.3433754677539276e-07, + "loss": 1.249, + "step": 31777 + }, + { + "epoch": 0.9330553761230842, + "grad_norm": 0.0, + "learning_rate": 2.3413292807085153e-07, + "loss": 1.2207, + "step": 31778 + }, + { + "epoch": 0.9330847378002232, + "grad_norm": 0.0, + "learning_rate": 2.3392839768154874e-07, + "loss": 1.126, + "step": 31779 + }, + { + "epoch": 0.9331140994773621, + "grad_norm": 0.0, + "learning_rate": 2.3372395560933182e-07, + "loss": 1.1118, + "step": 31780 + }, + { + "epoch": 0.9331434611545012, + "grad_norm": 0.0, + "learning_rate": 2.335196018560515e-07, + "loss": 1.1489, + "step": 31781 + }, + { + "epoch": 0.9331728228316402, + "grad_norm": 0.0, + "learning_rate": 2.3331533642355408e-07, + "loss": 1.126, + "step": 31782 + }, + { + "epoch": 0.9332021845087791, + "grad_norm": 0.0, + "learning_rate": 2.33111159313687e-07, + "loss": 1.1426, + "step": 31783 + }, + { + "epoch": 0.9332315461859182, + "grad_norm": 0.0, + "learning_rate": 2.3290707052829765e-07, + "loss": 1.0737, + "step": 31784 + }, + { + "epoch": 0.9332609078630572, + "grad_norm": 0.0, + "learning_rate": 2.327030700692312e-07, + "loss": 1.3174, + "step": 31785 + }, + { + "epoch": 0.9332902695401961, + "grad_norm": 0.0, + "learning_rate": 2.324991579383329e-07, + "loss": 1.1245, + "step": 31786 + }, + { + "epoch": 0.9333196312173352, + "grad_norm": 0.0, + "learning_rate": 2.3229533413744454e-07, + "loss": 1.1943, + "step": 31787 + }, + { + "epoch": 0.9333489928944741, + "grad_norm": 0.0, + "learning_rate": 2.3209159866841136e-07, + "loss": 1.165, + "step": 31788 + }, + { + "epoch": 0.9333783545716131, + "grad_norm": 0.0, + "learning_rate": 2.318879515330752e-07, + "loss": 1.2246, + "step": 31789 + }, + { + "epoch": 0.9334077162487522, + "grad_norm": 0.0, + "learning_rate": 2.3168439273327903e-07, + "loss": 1.3184, + "step": 31790 + }, + { + "epoch": 0.9334370779258911, + "grad_norm": 0.0, + "learning_rate": 2.3148092227086138e-07, + "loss": 1.3174, + "step": 31791 + }, + { + "epoch": 0.9334664396030301, + "grad_norm": 0.0, + "learning_rate": 2.3127754014766523e-07, + "loss": 1.1494, + "step": 31792 + }, + { + "epoch": 0.9334958012801691, + "grad_norm": 0.0, + "learning_rate": 2.310742463655269e-07, + "loss": 1.1885, + "step": 31793 + }, + { + "epoch": 0.9335251629573081, + "grad_norm": 0.0, + "learning_rate": 2.3087104092628708e-07, + "loss": 1.207, + "step": 31794 + }, + { + "epoch": 0.9335545246344471, + "grad_norm": 0.0, + "learning_rate": 2.3066792383178106e-07, + "loss": 1.1729, + "step": 31795 + }, + { + "epoch": 0.9335838863115861, + "grad_norm": 0.0, + "learning_rate": 2.304648950838473e-07, + "loss": 1.2095, + "step": 31796 + }, + { + "epoch": 0.9336132479887251, + "grad_norm": 0.0, + "learning_rate": 2.3026195468432212e-07, + "loss": 1.3379, + "step": 31797 + }, + { + "epoch": 0.9336426096658641, + "grad_norm": 0.0, + "learning_rate": 2.300591026350385e-07, + "loss": 1.3154, + "step": 31798 + }, + { + "epoch": 0.9336719713430031, + "grad_norm": 0.0, + "learning_rate": 2.2985633893783387e-07, + "loss": 1.2266, + "step": 31799 + }, + { + "epoch": 0.9337013330201421, + "grad_norm": 0.0, + "learning_rate": 2.2965366359454012e-07, + "loss": 1.1577, + "step": 31800 + }, + { + "epoch": 0.9337306946972811, + "grad_norm": 0.0, + "learning_rate": 2.294510766069924e-07, + "loss": 1.2271, + "step": 31801 + }, + { + "epoch": 0.93376005637442, + "grad_norm": 0.0, + "learning_rate": 2.2924857797701816e-07, + "loss": 1.1162, + "step": 31802 + }, + { + "epoch": 0.9337894180515591, + "grad_norm": 0.0, + "learning_rate": 2.290461677064526e-07, + "loss": 1.1919, + "step": 31803 + }, + { + "epoch": 0.9338187797286981, + "grad_norm": 0.0, + "learning_rate": 2.2884384579712537e-07, + "loss": 1.2622, + "step": 31804 + }, + { + "epoch": 0.933848141405837, + "grad_norm": 0.0, + "learning_rate": 2.286416122508639e-07, + "loss": 1.2334, + "step": 31805 + }, + { + "epoch": 0.9338775030829761, + "grad_norm": 0.0, + "learning_rate": 2.2843946706950114e-07, + "loss": 1.1904, + "step": 31806 + }, + { + "epoch": 0.9339068647601151, + "grad_norm": 0.0, + "learning_rate": 2.2823741025486124e-07, + "loss": 1.249, + "step": 31807 + }, + { + "epoch": 0.933936226437254, + "grad_norm": 0.0, + "learning_rate": 2.2803544180877268e-07, + "loss": 1.1929, + "step": 31808 + }, + { + "epoch": 0.9339655881143931, + "grad_norm": 0.0, + "learning_rate": 2.2783356173306403e-07, + "loss": 1.2246, + "step": 31809 + }, + { + "epoch": 0.9339949497915321, + "grad_norm": 0.0, + "learning_rate": 2.2763177002955827e-07, + "loss": 1.1523, + "step": 31810 + }, + { + "epoch": 0.934024311468671, + "grad_norm": 0.0, + "learning_rate": 2.274300667000806e-07, + "loss": 1.2363, + "step": 31811 + }, + { + "epoch": 0.9340536731458101, + "grad_norm": 0.0, + "learning_rate": 2.2722845174645625e-07, + "loss": 1.168, + "step": 31812 + }, + { + "epoch": 0.9340830348229491, + "grad_norm": 0.0, + "learning_rate": 2.2702692517050708e-07, + "loss": 1.3154, + "step": 31813 + }, + { + "epoch": 0.934112396500088, + "grad_norm": 0.0, + "learning_rate": 2.2682548697405714e-07, + "loss": 1.0537, + "step": 31814 + }, + { + "epoch": 0.9341417581772271, + "grad_norm": 0.0, + "learning_rate": 2.2662413715892728e-07, + "loss": 1.2603, + "step": 31815 + }, + { + "epoch": 0.9341711198543661, + "grad_norm": 0.0, + "learning_rate": 2.264228757269371e-07, + "loss": 1.1216, + "step": 31816 + }, + { + "epoch": 0.934200481531505, + "grad_norm": 0.0, + "learning_rate": 2.2622170267990962e-07, + "loss": 1.1904, + "step": 31817 + }, + { + "epoch": 0.9342298432086441, + "grad_norm": 0.0, + "learning_rate": 2.2602061801966223e-07, + "loss": 1.166, + "step": 31818 + }, + { + "epoch": 0.9342592048857831, + "grad_norm": 0.0, + "learning_rate": 2.2581962174801352e-07, + "loss": 1.3213, + "step": 31819 + }, + { + "epoch": 0.934288566562922, + "grad_norm": 0.0, + "learning_rate": 2.256187138667798e-07, + "loss": 1.1421, + "step": 31820 + }, + { + "epoch": 0.9343179282400611, + "grad_norm": 0.0, + "learning_rate": 2.2541789437778072e-07, + "loss": 1.2783, + "step": 31821 + }, + { + "epoch": 0.9343472899172001, + "grad_norm": 0.0, + "learning_rate": 2.2521716328282928e-07, + "loss": 1.0845, + "step": 31822 + }, + { + "epoch": 0.934376651594339, + "grad_norm": 0.0, + "learning_rate": 2.2501652058374402e-07, + "loss": 1.1816, + "step": 31823 + }, + { + "epoch": 0.9344060132714781, + "grad_norm": 0.0, + "learning_rate": 2.248159662823368e-07, + "loss": 1.0894, + "step": 31824 + }, + { + "epoch": 0.9344353749486171, + "grad_norm": 0.0, + "learning_rate": 2.246155003804229e-07, + "loss": 1.1494, + "step": 31825 + }, + { + "epoch": 0.934464736625756, + "grad_norm": 0.0, + "learning_rate": 2.2441512287981527e-07, + "loss": 1.1196, + "step": 31826 + }, + { + "epoch": 0.9344940983028951, + "grad_norm": 0.0, + "learning_rate": 2.2421483378232355e-07, + "loss": 1.1147, + "step": 31827 + }, + { + "epoch": 0.934523459980034, + "grad_norm": 0.0, + "learning_rate": 2.240146330897619e-07, + "loss": 1.1958, + "step": 31828 + }, + { + "epoch": 0.934552821657173, + "grad_norm": 0.0, + "learning_rate": 2.2381452080393885e-07, + "loss": 1.2354, + "step": 31829 + }, + { + "epoch": 0.9345821833343121, + "grad_norm": 0.0, + "learning_rate": 2.2361449692666514e-07, + "loss": 1.2549, + "step": 31830 + }, + { + "epoch": 0.934611545011451, + "grad_norm": 0.0, + "learning_rate": 2.2341456145974827e-07, + "loss": 1.209, + "step": 31831 + }, + { + "epoch": 0.93464090668859, + "grad_norm": 0.0, + "learning_rate": 2.2321471440499898e-07, + "loss": 1.1377, + "step": 31832 + }, + { + "epoch": 0.9346702683657291, + "grad_norm": 0.0, + "learning_rate": 2.230149557642225e-07, + "loss": 1.1138, + "step": 31833 + }, + { + "epoch": 0.934699630042868, + "grad_norm": 0.0, + "learning_rate": 2.2281528553922626e-07, + "loss": 1.1572, + "step": 31834 + }, + { + "epoch": 0.934728991720007, + "grad_norm": 0.0, + "learning_rate": 2.226157037318144e-07, + "loss": 1.1309, + "step": 31835 + }, + { + "epoch": 0.9347583533971461, + "grad_norm": 0.0, + "learning_rate": 2.2241621034379214e-07, + "loss": 1.1968, + "step": 31836 + }, + { + "epoch": 0.934787715074285, + "grad_norm": 0.0, + "learning_rate": 2.2221680537696578e-07, + "loss": 1.1279, + "step": 31837 + }, + { + "epoch": 0.934817076751424, + "grad_norm": 0.0, + "learning_rate": 2.22017488833135e-07, + "loss": 1.041, + "step": 31838 + }, + { + "epoch": 0.9348464384285631, + "grad_norm": 0.0, + "learning_rate": 2.2181826071410616e-07, + "loss": 1.2661, + "step": 31839 + }, + { + "epoch": 0.934875800105702, + "grad_norm": 0.0, + "learning_rate": 2.216191210216778e-07, + "loss": 1.1992, + "step": 31840 + }, + { + "epoch": 0.934905161782841, + "grad_norm": 0.0, + "learning_rate": 2.2142006975765185e-07, + "loss": 1.2319, + "step": 31841 + }, + { + "epoch": 0.9349345234599801, + "grad_norm": 0.0, + "learning_rate": 2.2122110692383015e-07, + "loss": 1.1255, + "step": 31842 + }, + { + "epoch": 0.934963885137119, + "grad_norm": 0.0, + "learning_rate": 2.2102223252200904e-07, + "loss": 1.2139, + "step": 31843 + }, + { + "epoch": 0.934993246814258, + "grad_norm": 0.0, + "learning_rate": 2.2082344655398935e-07, + "loss": 1.2715, + "step": 31844 + }, + { + "epoch": 0.9350226084913971, + "grad_norm": 0.0, + "learning_rate": 2.2062474902156627e-07, + "loss": 1.1831, + "step": 31845 + }, + { + "epoch": 0.935051970168536, + "grad_norm": 0.0, + "learning_rate": 2.204261399265395e-07, + "loss": 1.2842, + "step": 31846 + }, + { + "epoch": 0.935081331845675, + "grad_norm": 0.0, + "learning_rate": 2.2022761927070201e-07, + "loss": 1.1055, + "step": 31847 + }, + { + "epoch": 0.9351106935228141, + "grad_norm": 0.0, + "learning_rate": 2.200291870558524e-07, + "loss": 1.2393, + "step": 31848 + }, + { + "epoch": 0.935140055199953, + "grad_norm": 0.0, + "learning_rate": 2.1983084328378258e-07, + "loss": 1.1406, + "step": 31849 + }, + { + "epoch": 0.935169416877092, + "grad_norm": 0.0, + "learning_rate": 2.1963258795628773e-07, + "loss": 1.2129, + "step": 31850 + }, + { + "epoch": 0.9351987785542311, + "grad_norm": 0.0, + "learning_rate": 2.1943442107516088e-07, + "loss": 1.0454, + "step": 31851 + }, + { + "epoch": 0.93522814023137, + "grad_norm": 0.0, + "learning_rate": 2.1923634264219284e-07, + "loss": 1.1255, + "step": 31852 + }, + { + "epoch": 0.935257501908509, + "grad_norm": 0.0, + "learning_rate": 2.190383526591755e-07, + "loss": 1.1309, + "step": 31853 + }, + { + "epoch": 0.935286863585648, + "grad_norm": 0.0, + "learning_rate": 2.1884045112789852e-07, + "loss": 1.1333, + "step": 31854 + }, + { + "epoch": 0.935316225262787, + "grad_norm": 0.0, + "learning_rate": 2.1864263805015273e-07, + "loss": 1.2178, + "step": 31855 + }, + { + "epoch": 0.935345586939926, + "grad_norm": 0.0, + "learning_rate": 2.1844491342772556e-07, + "loss": 1.1562, + "step": 31856 + }, + { + "epoch": 0.935374948617065, + "grad_norm": 0.0, + "learning_rate": 2.182472772624078e-07, + "loss": 1.1606, + "step": 31857 + }, + { + "epoch": 0.935404310294204, + "grad_norm": 0.0, + "learning_rate": 2.180497295559847e-07, + "loss": 1.2959, + "step": 31858 + }, + { + "epoch": 0.935433671971343, + "grad_norm": 0.0, + "learning_rate": 2.178522703102437e-07, + "loss": 1.0103, + "step": 31859 + }, + { + "epoch": 0.935463033648482, + "grad_norm": 0.0, + "learning_rate": 2.1765489952696784e-07, + "loss": 1.2197, + "step": 31860 + }, + { + "epoch": 0.935492395325621, + "grad_norm": 0.0, + "learning_rate": 2.1745761720794566e-07, + "loss": 1.2583, + "step": 31861 + }, + { + "epoch": 0.93552175700276, + "grad_norm": 0.0, + "learning_rate": 2.172604233549591e-07, + "loss": 1.2197, + "step": 31862 + }, + { + "epoch": 0.935551118679899, + "grad_norm": 0.0, + "learning_rate": 2.170633179697923e-07, + "loss": 1.1221, + "step": 31863 + }, + { + "epoch": 0.935580480357038, + "grad_norm": 0.0, + "learning_rate": 2.1686630105422711e-07, + "loss": 1.1143, + "step": 31864 + }, + { + "epoch": 0.935609842034177, + "grad_norm": 0.0, + "learning_rate": 2.166693726100444e-07, + "loss": 1.1924, + "step": 31865 + }, + { + "epoch": 0.935639203711316, + "grad_norm": 0.0, + "learning_rate": 2.1647253263902712e-07, + "loss": 1.127, + "step": 31866 + }, + { + "epoch": 0.935668565388455, + "grad_norm": 0.0, + "learning_rate": 2.16275781142955e-07, + "loss": 1.1758, + "step": 31867 + }, + { + "epoch": 0.935697927065594, + "grad_norm": 0.0, + "learning_rate": 2.160791181236066e-07, + "loss": 1.1787, + "step": 31868 + }, + { + "epoch": 0.935727288742733, + "grad_norm": 0.0, + "learning_rate": 2.158825435827594e-07, + "loss": 1.3184, + "step": 31869 + }, + { + "epoch": 0.935756650419872, + "grad_norm": 0.0, + "learning_rate": 2.1568605752219196e-07, + "loss": 1.2773, + "step": 31870 + }, + { + "epoch": 0.935786012097011, + "grad_norm": 0.0, + "learning_rate": 2.1548965994368175e-07, + "loss": 1.2383, + "step": 31871 + }, + { + "epoch": 0.93581537377415, + "grad_norm": 0.0, + "learning_rate": 2.1529335084900515e-07, + "loss": 1.3369, + "step": 31872 + }, + { + "epoch": 0.935844735451289, + "grad_norm": 0.0, + "learning_rate": 2.1509713023993628e-07, + "loss": 1.2578, + "step": 31873 + }, + { + "epoch": 0.935874097128428, + "grad_norm": 0.0, + "learning_rate": 2.1490099811824926e-07, + "loss": 1.3213, + "step": 31874 + }, + { + "epoch": 0.935903458805567, + "grad_norm": 0.0, + "learning_rate": 2.1470495448571827e-07, + "loss": 1.0688, + "step": 31875 + }, + { + "epoch": 0.935932820482706, + "grad_norm": 0.0, + "learning_rate": 2.1450899934411852e-07, + "loss": 1.2148, + "step": 31876 + }, + { + "epoch": 0.935962182159845, + "grad_norm": 0.0, + "learning_rate": 2.143131326952186e-07, + "loss": 1.25, + "step": 31877 + }, + { + "epoch": 0.935991543836984, + "grad_norm": 0.0, + "learning_rate": 2.141173545407904e-07, + "loss": 1.1328, + "step": 31878 + }, + { + "epoch": 0.936020905514123, + "grad_norm": 0.0, + "learning_rate": 2.139216648826059e-07, + "loss": 1.21, + "step": 31879 + }, + { + "epoch": 0.9360502671912619, + "grad_norm": 0.0, + "learning_rate": 2.1372606372243364e-07, + "loss": 1.2222, + "step": 31880 + }, + { + "epoch": 0.936079628868401, + "grad_norm": 0.0, + "learning_rate": 2.1353055106204336e-07, + "loss": 1.2002, + "step": 31881 + }, + { + "epoch": 0.93610899054554, + "grad_norm": 0.0, + "learning_rate": 2.1333512690320357e-07, + "loss": 1.1611, + "step": 31882 + }, + { + "epoch": 0.9361383522226789, + "grad_norm": 0.0, + "learning_rate": 2.1313979124767958e-07, + "loss": 1.2402, + "step": 31883 + }, + { + "epoch": 0.936167713899818, + "grad_norm": 0.0, + "learning_rate": 2.1294454409723996e-07, + "loss": 1.2485, + "step": 31884 + }, + { + "epoch": 0.936197075576957, + "grad_norm": 0.0, + "learning_rate": 2.1274938545364775e-07, + "loss": 1.1035, + "step": 31885 + }, + { + "epoch": 0.9362264372540959, + "grad_norm": 0.0, + "learning_rate": 2.1255431531867043e-07, + "loss": 1.1851, + "step": 31886 + }, + { + "epoch": 0.936255798931235, + "grad_norm": 0.0, + "learning_rate": 2.123593336940699e-07, + "loss": 1.1274, + "step": 31887 + }, + { + "epoch": 0.936285160608374, + "grad_norm": 0.0, + "learning_rate": 2.1216444058161144e-07, + "loss": 1.2412, + "step": 31888 + }, + { + "epoch": 0.9363145222855129, + "grad_norm": 0.0, + "learning_rate": 2.1196963598305586e-07, + "loss": 1.2153, + "step": 31889 + }, + { + "epoch": 0.936343883962652, + "grad_norm": 0.0, + "learning_rate": 2.117749199001673e-07, + "loss": 1.1733, + "step": 31890 + }, + { + "epoch": 0.936373245639791, + "grad_norm": 0.0, + "learning_rate": 2.1158029233470324e-07, + "loss": 1.0835, + "step": 31891 + }, + { + "epoch": 0.9364026073169299, + "grad_norm": 0.0, + "learning_rate": 2.113857532884267e-07, + "loss": 1.375, + "step": 31892 + }, + { + "epoch": 0.9364319689940689, + "grad_norm": 0.0, + "learning_rate": 2.111913027630952e-07, + "loss": 1.1099, + "step": 31893 + }, + { + "epoch": 0.936461330671208, + "grad_norm": 0.0, + "learning_rate": 2.1099694076046728e-07, + "loss": 1.1641, + "step": 31894 + }, + { + "epoch": 0.9364906923483469, + "grad_norm": 0.0, + "learning_rate": 2.108026672823016e-07, + "loss": 1.2949, + "step": 31895 + }, + { + "epoch": 0.9365200540254859, + "grad_norm": 0.0, + "learning_rate": 2.1060848233035447e-07, + "loss": 1.2222, + "step": 31896 + }, + { + "epoch": 0.936549415702625, + "grad_norm": 0.0, + "learning_rate": 2.104143859063812e-07, + "loss": 1.1841, + "step": 31897 + }, + { + "epoch": 0.9365787773797639, + "grad_norm": 0.0, + "learning_rate": 2.1022037801213812e-07, + "loss": 1.1514, + "step": 31898 + }, + { + "epoch": 0.9366081390569029, + "grad_norm": 0.0, + "learning_rate": 2.1002645864937942e-07, + "loss": 1.0894, + "step": 31899 + }, + { + "epoch": 0.936637500734042, + "grad_norm": 0.0, + "learning_rate": 2.0983262781985925e-07, + "loss": 1.1704, + "step": 31900 + }, + { + "epoch": 0.9366668624111809, + "grad_norm": 0.0, + "learning_rate": 2.096388855253295e-07, + "loss": 1.0762, + "step": 31901 + }, + { + "epoch": 0.9366962240883199, + "grad_norm": 0.0, + "learning_rate": 2.0944523176754216e-07, + "loss": 1.2183, + "step": 31902 + }, + { + "epoch": 0.936725585765459, + "grad_norm": 0.0, + "learning_rate": 2.0925166654825023e-07, + "loss": 1.207, + "step": 31903 + }, + { + "epoch": 0.9367549474425979, + "grad_norm": 0.0, + "learning_rate": 2.0905818986920236e-07, + "loss": 1.2432, + "step": 31904 + }, + { + "epoch": 0.9367843091197369, + "grad_norm": 0.0, + "learning_rate": 2.0886480173214818e-07, + "loss": 1.2349, + "step": 31905 + }, + { + "epoch": 0.9368136707968759, + "grad_norm": 0.0, + "learning_rate": 2.0867150213883746e-07, + "loss": 1.2422, + "step": 31906 + }, + { + "epoch": 0.9368430324740149, + "grad_norm": 0.0, + "learning_rate": 2.084782910910188e-07, + "loss": 1.1274, + "step": 31907 + }, + { + "epoch": 0.9368723941511539, + "grad_norm": 0.0, + "learning_rate": 2.0828516859043746e-07, + "loss": 1.2041, + "step": 31908 + }, + { + "epoch": 0.9369017558282929, + "grad_norm": 0.0, + "learning_rate": 2.0809213463884092e-07, + "loss": 1.1812, + "step": 31909 + }, + { + "epoch": 0.9369311175054319, + "grad_norm": 0.0, + "learning_rate": 2.0789918923797557e-07, + "loss": 1.146, + "step": 31910 + }, + { + "epoch": 0.9369604791825709, + "grad_norm": 0.0, + "learning_rate": 2.0770633238958558e-07, + "loss": 1.1733, + "step": 31911 + }, + { + "epoch": 0.9369898408597099, + "grad_norm": 0.0, + "learning_rate": 2.0751356409541513e-07, + "loss": 1.1958, + "step": 31912 + }, + { + "epoch": 0.9370192025368489, + "grad_norm": 0.0, + "learning_rate": 2.073208843572072e-07, + "loss": 1.2246, + "step": 31913 + }, + { + "epoch": 0.9370485642139879, + "grad_norm": 0.0, + "learning_rate": 2.0712829317670492e-07, + "loss": 1.3184, + "step": 31914 + }, + { + "epoch": 0.9370779258911269, + "grad_norm": 0.0, + "learning_rate": 2.0693579055564906e-07, + "loss": 1.2183, + "step": 31915 + }, + { + "epoch": 0.9371072875682659, + "grad_norm": 0.0, + "learning_rate": 2.0674337649578157e-07, + "loss": 1.1592, + "step": 31916 + }, + { + "epoch": 0.9371366492454049, + "grad_norm": 0.0, + "learning_rate": 2.065510509988411e-07, + "loss": 1.2822, + "step": 31917 + }, + { + "epoch": 0.9371660109225439, + "grad_norm": 0.0, + "learning_rate": 2.0635881406656733e-07, + "loss": 1.1787, + "step": 31918 + }, + { + "epoch": 0.9371953725996829, + "grad_norm": 0.0, + "learning_rate": 2.0616666570069887e-07, + "loss": 1.0024, + "step": 31919 + }, + { + "epoch": 0.9372247342768218, + "grad_norm": 0.0, + "learning_rate": 2.0597460590297435e-07, + "loss": 1.2236, + "step": 31920 + }, + { + "epoch": 0.9372540959539609, + "grad_norm": 0.0, + "learning_rate": 2.0578263467512905e-07, + "loss": 1.2441, + "step": 31921 + }, + { + "epoch": 0.9372834576310999, + "grad_norm": 0.0, + "learning_rate": 2.0559075201890043e-07, + "loss": 1.0947, + "step": 31922 + }, + { + "epoch": 0.9373128193082388, + "grad_norm": 0.0, + "learning_rate": 2.053989579360227e-07, + "loss": 1.293, + "step": 31923 + }, + { + "epoch": 0.9373421809853779, + "grad_norm": 0.0, + "learning_rate": 2.0520725242823003e-07, + "loss": 1.1846, + "step": 31924 + }, + { + "epoch": 0.9373715426625169, + "grad_norm": 0.0, + "learning_rate": 2.0501563549725767e-07, + "loss": 1.1011, + "step": 31925 + }, + { + "epoch": 0.9374009043396558, + "grad_norm": 0.0, + "learning_rate": 2.0482410714483758e-07, + "loss": 1.1641, + "step": 31926 + }, + { + "epoch": 0.9374302660167949, + "grad_norm": 0.0, + "learning_rate": 2.046326673727006e-07, + "loss": 1.2178, + "step": 31927 + }, + { + "epoch": 0.9374596276939339, + "grad_norm": 0.0, + "learning_rate": 2.0444131618257978e-07, + "loss": 1.1924, + "step": 31928 + }, + { + "epoch": 0.9374889893710728, + "grad_norm": 0.0, + "learning_rate": 2.0425005357620374e-07, + "loss": 1.1938, + "step": 31929 + }, + { + "epoch": 0.9375183510482119, + "grad_norm": 0.0, + "learning_rate": 2.0405887955530445e-07, + "loss": 1.2017, + "step": 31930 + }, + { + "epoch": 0.9375477127253509, + "grad_norm": 0.0, + "learning_rate": 2.038677941216094e-07, + "loss": 1.1621, + "step": 31931 + }, + { + "epoch": 0.9375770744024898, + "grad_norm": 0.0, + "learning_rate": 2.036767972768472e-07, + "loss": 1.0864, + "step": 31932 + }, + { + "epoch": 0.9376064360796289, + "grad_norm": 0.0, + "learning_rate": 2.0348588902274312e-07, + "loss": 1.1772, + "step": 31933 + }, + { + "epoch": 0.9376357977567679, + "grad_norm": 0.0, + "learning_rate": 2.0329506936102583e-07, + "loss": 1.2524, + "step": 31934 + }, + { + "epoch": 0.9376651594339068, + "grad_norm": 0.0, + "learning_rate": 2.0310433829342058e-07, + "loss": 1.2627, + "step": 31935 + }, + { + "epoch": 0.9376945211110459, + "grad_norm": 0.0, + "learning_rate": 2.0291369582165043e-07, + "loss": 1.1816, + "step": 31936 + }, + { + "epoch": 0.9377238827881849, + "grad_norm": 0.0, + "learning_rate": 2.027231419474418e-07, + "loss": 1.1533, + "step": 31937 + }, + { + "epoch": 0.9377532444653238, + "grad_norm": 0.0, + "learning_rate": 2.0253267667251663e-07, + "loss": 1.1689, + "step": 31938 + }, + { + "epoch": 0.9377826061424629, + "grad_norm": 0.0, + "learning_rate": 2.02342299998598e-07, + "loss": 1.1777, + "step": 31939 + }, + { + "epoch": 0.9378119678196019, + "grad_norm": 0.0, + "learning_rate": 2.0215201192740675e-07, + "loss": 1.2056, + "step": 31940 + }, + { + "epoch": 0.9378413294967408, + "grad_norm": 0.0, + "learning_rate": 2.0196181246066482e-07, + "loss": 1.146, + "step": 31941 + }, + { + "epoch": 0.9378706911738799, + "grad_norm": 0.0, + "learning_rate": 2.0177170160009197e-07, + "loss": 1.2788, + "step": 31942 + }, + { + "epoch": 0.9379000528510189, + "grad_norm": 0.0, + "learning_rate": 2.0158167934740568e-07, + "loss": 1.1895, + "step": 31943 + }, + { + "epoch": 0.9379294145281578, + "grad_norm": 0.0, + "learning_rate": 2.0139174570432575e-07, + "loss": 1.1318, + "step": 31944 + }, + { + "epoch": 0.9379587762052969, + "grad_norm": 0.0, + "learning_rate": 2.012019006725696e-07, + "loss": 1.3076, + "step": 31945 + }, + { + "epoch": 0.9379881378824358, + "grad_norm": 0.0, + "learning_rate": 2.0101214425385484e-07, + "loss": 1.3018, + "step": 31946 + }, + { + "epoch": 0.9380174995595748, + "grad_norm": 0.0, + "learning_rate": 2.008224764498967e-07, + "loss": 1.3154, + "step": 31947 + }, + { + "epoch": 0.9380468612367139, + "grad_norm": 0.0, + "learning_rate": 2.0063289726241053e-07, + "loss": 1.0962, + "step": 31948 + }, + { + "epoch": 0.9380762229138528, + "grad_norm": 0.0, + "learning_rate": 2.0044340669311047e-07, + "loss": 1.2002, + "step": 31949 + }, + { + "epoch": 0.9381055845909918, + "grad_norm": 0.0, + "learning_rate": 2.002540047437107e-07, + "loss": 1.252, + "step": 31950 + }, + { + "epoch": 0.9381349462681309, + "grad_norm": 0.0, + "learning_rate": 2.0006469141592322e-07, + "loss": 1.2354, + "step": 31951 + }, + { + "epoch": 0.9381643079452698, + "grad_norm": 0.0, + "learning_rate": 1.9987546671146107e-07, + "loss": 1.2192, + "step": 31952 + }, + { + "epoch": 0.9381936696224088, + "grad_norm": 0.0, + "learning_rate": 1.9968633063203514e-07, + "loss": 1.2227, + "step": 31953 + }, + { + "epoch": 0.9382230312995479, + "grad_norm": 0.0, + "learning_rate": 1.9949728317935512e-07, + "loss": 1.2734, + "step": 31954 + }, + { + "epoch": 0.9382523929766868, + "grad_norm": 0.0, + "learning_rate": 1.993083243551308e-07, + "loss": 1.1313, + "step": 31955 + }, + { + "epoch": 0.9382817546538258, + "grad_norm": 0.0, + "learning_rate": 1.9911945416107192e-07, + "loss": 1.2529, + "step": 31956 + }, + { + "epoch": 0.9383111163309649, + "grad_norm": 0.0, + "learning_rate": 1.98930672598886e-07, + "loss": 1.2134, + "step": 31957 + }, + { + "epoch": 0.9383404780081038, + "grad_norm": 0.0, + "learning_rate": 1.9874197967027943e-07, + "loss": 1.2295, + "step": 31958 + }, + { + "epoch": 0.9383698396852428, + "grad_norm": 0.0, + "learning_rate": 1.9855337537695973e-07, + "loss": 1.2007, + "step": 31959 + }, + { + "epoch": 0.9383992013623819, + "grad_norm": 0.0, + "learning_rate": 1.9836485972063113e-07, + "loss": 1.1846, + "step": 31960 + }, + { + "epoch": 0.9384285630395208, + "grad_norm": 0.0, + "learning_rate": 1.9817643270300113e-07, + "loss": 1.103, + "step": 31961 + }, + { + "epoch": 0.9384579247166598, + "grad_norm": 0.0, + "learning_rate": 1.979880943257706e-07, + "loss": 1.1387, + "step": 31962 + }, + { + "epoch": 0.9384872863937989, + "grad_norm": 0.0, + "learning_rate": 1.9779984459064484e-07, + "loss": 1.252, + "step": 31963 + }, + { + "epoch": 0.9385166480709378, + "grad_norm": 0.0, + "learning_rate": 1.976116834993247e-07, + "loss": 1.1924, + "step": 31964 + }, + { + "epoch": 0.9385460097480768, + "grad_norm": 0.0, + "learning_rate": 1.9742361105351327e-07, + "loss": 1.0728, + "step": 31965 + }, + { + "epoch": 0.9385753714252159, + "grad_norm": 0.0, + "learning_rate": 1.972356272549103e-07, + "loss": 1.2163, + "step": 31966 + }, + { + "epoch": 0.9386047331023548, + "grad_norm": 0.0, + "learning_rate": 1.9704773210521556e-07, + "loss": 1.1895, + "step": 31967 + }, + { + "epoch": 0.9386340947794938, + "grad_norm": 0.0, + "learning_rate": 1.9685992560612986e-07, + "loss": 1.0991, + "step": 31968 + }, + { + "epoch": 0.9386634564566328, + "grad_norm": 0.0, + "learning_rate": 1.9667220775934859e-07, + "loss": 1.2881, + "step": 31969 + }, + { + "epoch": 0.9386928181337718, + "grad_norm": 0.0, + "learning_rate": 1.9648457856657254e-07, + "loss": 1.1787, + "step": 31970 + }, + { + "epoch": 0.9387221798109108, + "grad_norm": 0.0, + "learning_rate": 1.9629703802949706e-07, + "loss": 1.1724, + "step": 31971 + }, + { + "epoch": 0.9387515414880498, + "grad_norm": 0.0, + "learning_rate": 1.9610958614981856e-07, + "loss": 1.209, + "step": 31972 + }, + { + "epoch": 0.9387809031651888, + "grad_norm": 0.0, + "learning_rate": 1.9592222292923123e-07, + "loss": 1.2383, + "step": 31973 + }, + { + "epoch": 0.9388102648423278, + "grad_norm": 0.0, + "learning_rate": 1.957349483694304e-07, + "loss": 1.2852, + "step": 31974 + }, + { + "epoch": 0.9388396265194668, + "grad_norm": 0.0, + "learning_rate": 1.9554776247211026e-07, + "loss": 1.0493, + "step": 31975 + }, + { + "epoch": 0.9388689881966058, + "grad_norm": 0.0, + "learning_rate": 1.9536066523896057e-07, + "loss": 1.0879, + "step": 31976 + }, + { + "epoch": 0.9388983498737448, + "grad_norm": 0.0, + "learning_rate": 1.9517365667167775e-07, + "loss": 1.2119, + "step": 31977 + }, + { + "epoch": 0.9389277115508838, + "grad_norm": 0.0, + "learning_rate": 1.9498673677194823e-07, + "loss": 1.1357, + "step": 31978 + }, + { + "epoch": 0.9389570732280228, + "grad_norm": 0.0, + "learning_rate": 1.947999055414662e-07, + "loss": 1.1118, + "step": 31979 + }, + { + "epoch": 0.9389864349051618, + "grad_norm": 0.0, + "learning_rate": 1.9461316298191924e-07, + "loss": 1.1792, + "step": 31980 + }, + { + "epoch": 0.9390157965823008, + "grad_norm": 0.0, + "learning_rate": 1.9442650909499815e-07, + "loss": 1.1392, + "step": 31981 + }, + { + "epoch": 0.9390451582594398, + "grad_norm": 0.0, + "learning_rate": 1.9423994388238832e-07, + "loss": 1.2144, + "step": 31982 + }, + { + "epoch": 0.9390745199365788, + "grad_norm": 0.0, + "learning_rate": 1.9405346734577724e-07, + "loss": 1.2764, + "step": 31983 + }, + { + "epoch": 0.9391038816137178, + "grad_norm": 0.0, + "learning_rate": 1.9386707948685246e-07, + "loss": 1.1816, + "step": 31984 + }, + { + "epoch": 0.9391332432908568, + "grad_norm": 0.0, + "learning_rate": 1.936807803072993e-07, + "loss": 1.1567, + "step": 31985 + }, + { + "epoch": 0.9391626049679958, + "grad_norm": 0.0, + "learning_rate": 1.9349456980880198e-07, + "loss": 1.2695, + "step": 31986 + }, + { + "epoch": 0.9391919666451348, + "grad_norm": 0.0, + "learning_rate": 1.933084479930447e-07, + "loss": 1.1748, + "step": 31987 + }, + { + "epoch": 0.9392213283222738, + "grad_norm": 0.0, + "learning_rate": 1.9312241486171057e-07, + "loss": 1.1685, + "step": 31988 + }, + { + "epoch": 0.9392506899994127, + "grad_norm": 0.0, + "learning_rate": 1.9293647041648267e-07, + "loss": 1.2354, + "step": 31989 + }, + { + "epoch": 0.9392800516765518, + "grad_norm": 0.0, + "learning_rate": 1.9275061465904188e-07, + "loss": 1.1255, + "step": 31990 + }, + { + "epoch": 0.9393094133536908, + "grad_norm": 0.0, + "learning_rate": 1.9256484759106907e-07, + "loss": 1.3145, + "step": 31991 + }, + { + "epoch": 0.9393387750308297, + "grad_norm": 0.0, + "learning_rate": 1.9237916921424294e-07, + "loss": 1.2666, + "step": 31992 + }, + { + "epoch": 0.9393681367079687, + "grad_norm": 0.0, + "learning_rate": 1.921935795302443e-07, + "loss": 1.248, + "step": 31993 + }, + { + "epoch": 0.9393974983851078, + "grad_norm": 0.0, + "learning_rate": 1.9200807854075076e-07, + "loss": 1.3623, + "step": 31994 + }, + { + "epoch": 0.9394268600622467, + "grad_norm": 0.0, + "learning_rate": 1.9182266624744094e-07, + "loss": 1.2627, + "step": 31995 + }, + { + "epoch": 0.9394562217393857, + "grad_norm": 0.0, + "learning_rate": 1.9163734265198909e-07, + "loss": 1.1401, + "step": 31996 + }, + { + "epoch": 0.9394855834165248, + "grad_norm": 0.0, + "learning_rate": 1.9145210775607381e-07, + "loss": 1.1841, + "step": 31997 + }, + { + "epoch": 0.9395149450936637, + "grad_norm": 0.0, + "learning_rate": 1.9126696156136937e-07, + "loss": 1.2949, + "step": 31998 + }, + { + "epoch": 0.9395443067708027, + "grad_norm": 0.0, + "learning_rate": 1.9108190406954997e-07, + "loss": 1.2349, + "step": 31999 + }, + { + "epoch": 0.9395736684479418, + "grad_norm": 0.0, + "learning_rate": 1.908969352822876e-07, + "loss": 1.1489, + "step": 32000 + }, + { + "epoch": 0.9396030301250807, + "grad_norm": 0.0, + "learning_rate": 1.907120552012587e-07, + "loss": 1.2744, + "step": 32001 + }, + { + "epoch": 0.9396323918022197, + "grad_norm": 0.0, + "learning_rate": 1.905272638281308e-07, + "loss": 1.1382, + "step": 32002 + }, + { + "epoch": 0.9396617534793588, + "grad_norm": 0.0, + "learning_rate": 1.9034256116457818e-07, + "loss": 1.2686, + "step": 32003 + }, + { + "epoch": 0.9396911151564977, + "grad_norm": 0.0, + "learning_rate": 1.9015794721226944e-07, + "loss": 1.2236, + "step": 32004 + }, + { + "epoch": 0.9397204768336367, + "grad_norm": 0.0, + "learning_rate": 1.8997342197287438e-07, + "loss": 1.2725, + "step": 32005 + }, + { + "epoch": 0.9397498385107758, + "grad_norm": 0.0, + "learning_rate": 1.897889854480639e-07, + "loss": 1.2554, + "step": 32006 + }, + { + "epoch": 0.9397792001879147, + "grad_norm": 0.0, + "learning_rate": 1.8960463763950222e-07, + "loss": 1.2773, + "step": 32007 + }, + { + "epoch": 0.9398085618650537, + "grad_norm": 0.0, + "learning_rate": 1.894203785488591e-07, + "loss": 1.147, + "step": 32008 + }, + { + "epoch": 0.9398379235421928, + "grad_norm": 0.0, + "learning_rate": 1.8923620817779876e-07, + "loss": 1.3262, + "step": 32009 + }, + { + "epoch": 0.9398672852193317, + "grad_norm": 0.0, + "learning_rate": 1.8905212652798989e-07, + "loss": 1.1997, + "step": 32010 + }, + { + "epoch": 0.9398966468964707, + "grad_norm": 0.0, + "learning_rate": 1.8886813360109447e-07, + "loss": 1.1997, + "step": 32011 + }, + { + "epoch": 0.9399260085736098, + "grad_norm": 0.0, + "learning_rate": 1.8868422939877674e-07, + "loss": 1.1714, + "step": 32012 + }, + { + "epoch": 0.9399553702507487, + "grad_norm": 0.0, + "learning_rate": 1.885004139227009e-07, + "loss": 1.27, + "step": 32013 + }, + { + "epoch": 0.9399847319278877, + "grad_norm": 0.0, + "learning_rate": 1.883166871745279e-07, + "loss": 1.1846, + "step": 32014 + }, + { + "epoch": 0.9400140936050267, + "grad_norm": 0.0, + "learning_rate": 1.881330491559208e-07, + "loss": 1.2363, + "step": 32015 + }, + { + "epoch": 0.9400434552821657, + "grad_norm": 0.0, + "learning_rate": 1.879494998685383e-07, + "loss": 1.2314, + "step": 32016 + }, + { + "epoch": 0.9400728169593047, + "grad_norm": 0.0, + "learning_rate": 1.8776603931404236e-07, + "loss": 1.2275, + "step": 32017 + }, + { + "epoch": 0.9401021786364437, + "grad_norm": 0.0, + "learning_rate": 1.875826674940895e-07, + "loss": 1.249, + "step": 32018 + }, + { + "epoch": 0.9401315403135827, + "grad_norm": 0.0, + "learning_rate": 1.8739938441034166e-07, + "loss": 1.1143, + "step": 32019 + }, + { + "epoch": 0.9401609019907217, + "grad_norm": 0.0, + "learning_rate": 1.87216190064452e-07, + "loss": 1.3682, + "step": 32020 + }, + { + "epoch": 0.9401902636678607, + "grad_norm": 0.0, + "learning_rate": 1.8703308445808144e-07, + "loss": 1.125, + "step": 32021 + }, + { + "epoch": 0.9402196253449997, + "grad_norm": 0.0, + "learning_rate": 1.8685006759288305e-07, + "loss": 1.3027, + "step": 32022 + }, + { + "epoch": 0.9402489870221387, + "grad_norm": 0.0, + "learning_rate": 1.866671394705133e-07, + "loss": 1.2129, + "step": 32023 + }, + { + "epoch": 0.9402783486992777, + "grad_norm": 0.0, + "learning_rate": 1.864843000926253e-07, + "loss": 1.1768, + "step": 32024 + }, + { + "epoch": 0.9403077103764167, + "grad_norm": 0.0, + "learning_rate": 1.863015494608722e-07, + "loss": 1.3066, + "step": 32025 + }, + { + "epoch": 0.9403370720535557, + "grad_norm": 0.0, + "learning_rate": 1.8611888757690934e-07, + "loss": 1.3174, + "step": 32026 + }, + { + "epoch": 0.9403664337306947, + "grad_norm": 0.0, + "learning_rate": 1.8593631444238536e-07, + "loss": 1.3203, + "step": 32027 + }, + { + "epoch": 0.9403957954078337, + "grad_norm": 0.0, + "learning_rate": 1.8575383005895342e-07, + "loss": 1.1338, + "step": 32028 + }, + { + "epoch": 0.9404251570849727, + "grad_norm": 0.0, + "learning_rate": 1.855714344282622e-07, + "loss": 1.2705, + "step": 32029 + }, + { + "epoch": 0.9404545187621117, + "grad_norm": 0.0, + "learning_rate": 1.853891275519626e-07, + "loss": 1.1357, + "step": 32030 + }, + { + "epoch": 0.9404838804392507, + "grad_norm": 0.0, + "learning_rate": 1.8520690943170327e-07, + "loss": 1.1748, + "step": 32031 + }, + { + "epoch": 0.9405132421163896, + "grad_norm": 0.0, + "learning_rate": 1.8502478006913073e-07, + "loss": 1.2354, + "step": 32032 + }, + { + "epoch": 0.9405426037935287, + "grad_norm": 0.0, + "learning_rate": 1.848427394658936e-07, + "loss": 1.0767, + "step": 32033 + }, + { + "epoch": 0.9405719654706677, + "grad_norm": 0.0, + "learning_rate": 1.8466078762363503e-07, + "loss": 1.2388, + "step": 32034 + }, + { + "epoch": 0.9406013271478066, + "grad_norm": 0.0, + "learning_rate": 1.8447892454400485e-07, + "loss": 1.312, + "step": 32035 + }, + { + "epoch": 0.9406306888249457, + "grad_norm": 0.0, + "learning_rate": 1.842971502286439e-07, + "loss": 1.1929, + "step": 32036 + }, + { + "epoch": 0.9406600505020847, + "grad_norm": 0.0, + "learning_rate": 1.8411546467919982e-07, + "loss": 1.209, + "step": 32037 + }, + { + "epoch": 0.9406894121792236, + "grad_norm": 0.0, + "learning_rate": 1.8393386789731238e-07, + "loss": 1.3672, + "step": 32038 + }, + { + "epoch": 0.9407187738563627, + "grad_norm": 0.0, + "learning_rate": 1.837523598846247e-07, + "loss": 1.2988, + "step": 32039 + }, + { + "epoch": 0.9407481355335017, + "grad_norm": 0.0, + "learning_rate": 1.835709406427788e-07, + "loss": 1.2812, + "step": 32040 + }, + { + "epoch": 0.9407774972106406, + "grad_norm": 0.0, + "learning_rate": 1.833896101734145e-07, + "loss": 1.3359, + "step": 32041 + }, + { + "epoch": 0.9408068588877797, + "grad_norm": 0.0, + "learning_rate": 1.832083684781727e-07, + "loss": 1.1992, + "step": 32042 + }, + { + "epoch": 0.9408362205649187, + "grad_norm": 0.0, + "learning_rate": 1.8302721555869097e-07, + "loss": 1.0884, + "step": 32043 + }, + { + "epoch": 0.9408655822420576, + "grad_norm": 0.0, + "learning_rate": 1.82846151416608e-07, + "loss": 1.0786, + "step": 32044 + }, + { + "epoch": 0.9408949439191967, + "grad_norm": 0.0, + "learning_rate": 1.8266517605356137e-07, + "loss": 1.2432, + "step": 32045 + }, + { + "epoch": 0.9409243055963357, + "grad_norm": 0.0, + "learning_rate": 1.8248428947118868e-07, + "loss": 1.1689, + "step": 32046 + }, + { + "epoch": 0.9409536672734746, + "grad_norm": 0.0, + "learning_rate": 1.8230349167112527e-07, + "loss": 1.3276, + "step": 32047 + }, + { + "epoch": 0.9409830289506137, + "grad_norm": 0.0, + "learning_rate": 1.821227826550054e-07, + "loss": 1.3018, + "step": 32048 + }, + { + "epoch": 0.9410123906277527, + "grad_norm": 0.0, + "learning_rate": 1.8194216242446328e-07, + "loss": 1.2192, + "step": 32049 + }, + { + "epoch": 0.9410417523048916, + "grad_norm": 0.0, + "learning_rate": 1.817616309811332e-07, + "loss": 1.1885, + "step": 32050 + }, + { + "epoch": 0.9410711139820307, + "grad_norm": 0.0, + "learning_rate": 1.8158118832664606e-07, + "loss": 1.168, + "step": 32051 + }, + { + "epoch": 0.9411004756591697, + "grad_norm": 0.0, + "learning_rate": 1.81400834462635e-07, + "loss": 1.1704, + "step": 32052 + }, + { + "epoch": 0.9411298373363086, + "grad_norm": 0.0, + "learning_rate": 1.8122056939073207e-07, + "loss": 1.2002, + "step": 32053 + }, + { + "epoch": 0.9411591990134477, + "grad_norm": 0.0, + "learning_rate": 1.8104039311256484e-07, + "loss": 1.2656, + "step": 32054 + }, + { + "epoch": 0.9411885606905867, + "grad_norm": 0.0, + "learning_rate": 1.808603056297642e-07, + "loss": 1.127, + "step": 32055 + }, + { + "epoch": 0.9412179223677256, + "grad_norm": 0.0, + "learning_rate": 1.8068030694395887e-07, + "loss": 1.2427, + "step": 32056 + }, + { + "epoch": 0.9412472840448647, + "grad_norm": 0.0, + "learning_rate": 1.8050039705677648e-07, + "loss": 1.1558, + "step": 32057 + }, + { + "epoch": 0.9412766457220036, + "grad_norm": 0.0, + "learning_rate": 1.8032057596984342e-07, + "loss": 1.2139, + "step": 32058 + }, + { + "epoch": 0.9413060073991426, + "grad_norm": 0.0, + "learning_rate": 1.8014084368478623e-07, + "loss": 1.3018, + "step": 32059 + }, + { + "epoch": 0.9413353690762817, + "grad_norm": 0.0, + "learning_rate": 1.7996120020323027e-07, + "loss": 1.2539, + "step": 32060 + }, + { + "epoch": 0.9413647307534206, + "grad_norm": 0.0, + "learning_rate": 1.7978164552679978e-07, + "loss": 1.1084, + "step": 32061 + }, + { + "epoch": 0.9413940924305596, + "grad_norm": 0.0, + "learning_rate": 1.7960217965712012e-07, + "loss": 1.2432, + "step": 32062 + }, + { + "epoch": 0.9414234541076987, + "grad_norm": 0.0, + "learning_rate": 1.7942280259581224e-07, + "loss": 1.166, + "step": 32063 + }, + { + "epoch": 0.9414528157848376, + "grad_norm": 0.0, + "learning_rate": 1.7924351434449928e-07, + "loss": 1.1816, + "step": 32064 + }, + { + "epoch": 0.9414821774619766, + "grad_norm": 0.0, + "learning_rate": 1.7906431490480215e-07, + "loss": 1.2021, + "step": 32065 + }, + { + "epoch": 0.9415115391391157, + "grad_norm": 0.0, + "learning_rate": 1.7888520427834178e-07, + "loss": 1.1924, + "step": 32066 + }, + { + "epoch": 0.9415409008162546, + "grad_norm": 0.0, + "learning_rate": 1.7870618246673687e-07, + "loss": 1.2271, + "step": 32067 + }, + { + "epoch": 0.9415702624933936, + "grad_norm": 0.0, + "learning_rate": 1.7852724947160727e-07, + "loss": 1.3281, + "step": 32068 + }, + { + "epoch": 0.9415996241705327, + "grad_norm": 0.0, + "learning_rate": 1.7834840529457166e-07, + "loss": 0.9248, + "step": 32069 + }, + { + "epoch": 0.9416289858476716, + "grad_norm": 0.0, + "learning_rate": 1.781696499372465e-07, + "loss": 1.2646, + "step": 32070 + }, + { + "epoch": 0.9416583475248106, + "grad_norm": 0.0, + "learning_rate": 1.7799098340124942e-07, + "loss": 1.2246, + "step": 32071 + }, + { + "epoch": 0.9416877092019497, + "grad_norm": 0.0, + "learning_rate": 1.778124056881947e-07, + "loss": 1.1338, + "step": 32072 + }, + { + "epoch": 0.9417170708790886, + "grad_norm": 0.0, + "learning_rate": 1.7763391679969876e-07, + "loss": 1.1938, + "step": 32073 + }, + { + "epoch": 0.9417464325562276, + "grad_norm": 0.0, + "learning_rate": 1.7745551673737372e-07, + "loss": 0.9614, + "step": 32074 + }, + { + "epoch": 0.9417757942333667, + "grad_norm": 0.0, + "learning_rate": 1.772772055028349e-07, + "loss": 1.2783, + "step": 32075 + }, + { + "epoch": 0.9418051559105056, + "grad_norm": 0.0, + "learning_rate": 1.7709898309769213e-07, + "loss": 1.04, + "step": 32076 + }, + { + "epoch": 0.9418345175876446, + "grad_norm": 0.0, + "learning_rate": 1.7692084952356081e-07, + "loss": 1.0825, + "step": 32077 + }, + { + "epoch": 0.9418638792647837, + "grad_norm": 0.0, + "learning_rate": 1.7674280478204854e-07, + "loss": 1.0386, + "step": 32078 + }, + { + "epoch": 0.9418932409419226, + "grad_norm": 0.0, + "learning_rate": 1.7656484887476843e-07, + "loss": 1.2803, + "step": 32079 + }, + { + "epoch": 0.9419226026190616, + "grad_norm": 0.0, + "learning_rate": 1.7638698180332815e-07, + "loss": 1.1445, + "step": 32080 + }, + { + "epoch": 0.9419519642962007, + "grad_norm": 0.0, + "learning_rate": 1.7620920356933524e-07, + "loss": 1.1172, + "step": 32081 + }, + { + "epoch": 0.9419813259733396, + "grad_norm": 0.0, + "learning_rate": 1.7603151417439957e-07, + "loss": 1.1924, + "step": 32082 + }, + { + "epoch": 0.9420106876504786, + "grad_norm": 0.0, + "learning_rate": 1.7585391362012538e-07, + "loss": 1.2183, + "step": 32083 + }, + { + "epoch": 0.9420400493276176, + "grad_norm": 0.0, + "learning_rate": 1.756764019081214e-07, + "loss": 1.1958, + "step": 32084 + }, + { + "epoch": 0.9420694110047566, + "grad_norm": 0.0, + "learning_rate": 1.754989790399908e-07, + "loss": 0.9585, + "step": 32085 + }, + { + "epoch": 0.9420987726818956, + "grad_norm": 0.0, + "learning_rate": 1.7532164501733895e-07, + "loss": 1.1753, + "step": 32086 + }, + { + "epoch": 0.9421281343590346, + "grad_norm": 0.0, + "learning_rate": 1.7514439984177122e-07, + "loss": 1.2485, + "step": 32087 + }, + { + "epoch": 0.9421574960361736, + "grad_norm": 0.0, + "learning_rate": 1.7496724351488747e-07, + "loss": 1.3174, + "step": 32088 + }, + { + "epoch": 0.9421868577133126, + "grad_norm": 0.0, + "learning_rate": 1.7479017603829195e-07, + "loss": 1.188, + "step": 32089 + }, + { + "epoch": 0.9422162193904516, + "grad_norm": 0.0, + "learning_rate": 1.746131974135845e-07, + "loss": 1.252, + "step": 32090 + }, + { + "epoch": 0.9422455810675906, + "grad_norm": 0.0, + "learning_rate": 1.744363076423672e-07, + "loss": 1.1704, + "step": 32091 + }, + { + "epoch": 0.9422749427447296, + "grad_norm": 0.0, + "learning_rate": 1.7425950672623647e-07, + "loss": 1.2065, + "step": 32092 + }, + { + "epoch": 0.9423043044218686, + "grad_norm": 0.0, + "learning_rate": 1.7408279466679556e-07, + "loss": 1.2461, + "step": 32093 + }, + { + "epoch": 0.9423336660990076, + "grad_norm": 0.0, + "learning_rate": 1.7390617146563983e-07, + "loss": 1.106, + "step": 32094 + }, + { + "epoch": 0.9423630277761466, + "grad_norm": 0.0, + "learning_rate": 1.7372963712436684e-07, + "loss": 1.2461, + "step": 32095 + }, + { + "epoch": 0.9423923894532855, + "grad_norm": 0.0, + "learning_rate": 1.7355319164457428e-07, + "loss": 1.207, + "step": 32096 + }, + { + "epoch": 0.9424217511304246, + "grad_norm": 0.0, + "learning_rate": 1.7337683502785641e-07, + "loss": 1.1943, + "step": 32097 + }, + { + "epoch": 0.9424511128075636, + "grad_norm": 0.0, + "learning_rate": 1.7320056727580746e-07, + "loss": 1.5068, + "step": 32098 + }, + { + "epoch": 0.9424804744847025, + "grad_norm": 0.0, + "learning_rate": 1.730243883900229e-07, + "loss": 1.2241, + "step": 32099 + }, + { + "epoch": 0.9425098361618416, + "grad_norm": 0.0, + "learning_rate": 1.728482983720958e-07, + "loss": 1.2378, + "step": 32100 + }, + { + "epoch": 0.9425391978389805, + "grad_norm": 0.0, + "learning_rate": 1.7267229722361833e-07, + "loss": 1.1904, + "step": 32101 + }, + { + "epoch": 0.9425685595161195, + "grad_norm": 0.0, + "learning_rate": 1.7249638494618138e-07, + "loss": 1.1904, + "step": 32102 + }, + { + "epoch": 0.9425979211932586, + "grad_norm": 0.0, + "learning_rate": 1.72320561541377e-07, + "loss": 1.167, + "step": 32103 + }, + { + "epoch": 0.9426272828703975, + "grad_norm": 0.0, + "learning_rate": 1.7214482701079506e-07, + "loss": 1.2305, + "step": 32104 + }, + { + "epoch": 0.9426566445475365, + "grad_norm": 0.0, + "learning_rate": 1.7196918135602424e-07, + "loss": 1.2593, + "step": 32105 + }, + { + "epoch": 0.9426860062246756, + "grad_norm": 0.0, + "learning_rate": 1.7179362457865333e-07, + "loss": 1.2246, + "step": 32106 + }, + { + "epoch": 0.9427153679018145, + "grad_norm": 0.0, + "learning_rate": 1.7161815668026881e-07, + "loss": 1.1636, + "step": 32107 + }, + { + "epoch": 0.9427447295789535, + "grad_norm": 0.0, + "learning_rate": 1.7144277766245943e-07, + "loss": 1.2207, + "step": 32108 + }, + { + "epoch": 0.9427740912560926, + "grad_norm": 0.0, + "learning_rate": 1.7126748752680943e-07, + "loss": 1.1206, + "step": 32109 + }, + { + "epoch": 0.9428034529332315, + "grad_norm": 0.0, + "learning_rate": 1.7109228627490426e-07, + "loss": 1.2344, + "step": 32110 + }, + { + "epoch": 0.9428328146103705, + "grad_norm": 0.0, + "learning_rate": 1.709171739083304e-07, + "loss": 1.2598, + "step": 32111 + }, + { + "epoch": 0.9428621762875096, + "grad_norm": 0.0, + "learning_rate": 1.707421504286688e-07, + "loss": 1.1245, + "step": 32112 + }, + { + "epoch": 0.9428915379646485, + "grad_norm": 0.0, + "learning_rate": 1.705672158375038e-07, + "loss": 1.0542, + "step": 32113 + }, + { + "epoch": 0.9429208996417875, + "grad_norm": 0.0, + "learning_rate": 1.7039237013641519e-07, + "loss": 1.2212, + "step": 32114 + }, + { + "epoch": 0.9429502613189266, + "grad_norm": 0.0, + "learning_rate": 1.7021761332698727e-07, + "loss": 1.0996, + "step": 32115 + }, + { + "epoch": 0.9429796229960655, + "grad_norm": 0.0, + "learning_rate": 1.7004294541079769e-07, + "loss": 1.1309, + "step": 32116 + }, + { + "epoch": 0.9430089846732045, + "grad_norm": 0.0, + "learning_rate": 1.698683663894285e-07, + "loss": 1.2085, + "step": 32117 + }, + { + "epoch": 0.9430383463503436, + "grad_norm": 0.0, + "learning_rate": 1.6969387626445622e-07, + "loss": 1.3115, + "step": 32118 + }, + { + "epoch": 0.9430677080274825, + "grad_norm": 0.0, + "learning_rate": 1.695194750374607e-07, + "loss": 1.0864, + "step": 32119 + }, + { + "epoch": 0.9430970697046215, + "grad_norm": 0.0, + "learning_rate": 1.6934516271001845e-07, + "loss": 1.1201, + "step": 32120 + }, + { + "epoch": 0.9431264313817606, + "grad_norm": 0.0, + "learning_rate": 1.6917093928370486e-07, + "loss": 1.1396, + "step": 32121 + }, + { + "epoch": 0.9431557930588995, + "grad_norm": 0.0, + "learning_rate": 1.689968047600965e-07, + "loss": 1.1123, + "step": 32122 + }, + { + "epoch": 0.9431851547360385, + "grad_norm": 0.0, + "learning_rate": 1.688227591407676e-07, + "loss": 1.0391, + "step": 32123 + }, + { + "epoch": 0.9432145164131776, + "grad_norm": 0.0, + "learning_rate": 1.6864880242729253e-07, + "loss": 1.3096, + "step": 32124 + }, + { + "epoch": 0.9432438780903165, + "grad_norm": 0.0, + "learning_rate": 1.6847493462124332e-07, + "loss": 1.252, + "step": 32125 + }, + { + "epoch": 0.9432732397674555, + "grad_norm": 0.0, + "learning_rate": 1.6830115572419424e-07, + "loss": 1.1406, + "step": 32126 + }, + { + "epoch": 0.9433026014445945, + "grad_norm": 0.0, + "learning_rate": 1.681274657377141e-07, + "loss": 1.0898, + "step": 32127 + }, + { + "epoch": 0.9433319631217335, + "grad_norm": 0.0, + "learning_rate": 1.6795386466337715e-07, + "loss": 1.1968, + "step": 32128 + }, + { + "epoch": 0.9433613247988725, + "grad_norm": 0.0, + "learning_rate": 1.6778035250275105e-07, + "loss": 1.3301, + "step": 32129 + }, + { + "epoch": 0.9433906864760115, + "grad_norm": 0.0, + "learning_rate": 1.6760692925740562e-07, + "loss": 1.1777, + "step": 32130 + }, + { + "epoch": 0.9434200481531505, + "grad_norm": 0.0, + "learning_rate": 1.6743359492890742e-07, + "loss": 1.2456, + "step": 32131 + }, + { + "epoch": 0.9434494098302895, + "grad_norm": 0.0, + "learning_rate": 1.6726034951882631e-07, + "loss": 1.1948, + "step": 32132 + }, + { + "epoch": 0.9434787715074285, + "grad_norm": 0.0, + "learning_rate": 1.6708719302872766e-07, + "loss": 1.0386, + "step": 32133 + }, + { + "epoch": 0.9435081331845675, + "grad_norm": 0.0, + "learning_rate": 1.6691412546017806e-07, + "loss": 1.2539, + "step": 32134 + }, + { + "epoch": 0.9435374948617065, + "grad_norm": 0.0, + "learning_rate": 1.6674114681474286e-07, + "loss": 1.1523, + "step": 32135 + }, + { + "epoch": 0.9435668565388455, + "grad_norm": 0.0, + "learning_rate": 1.6656825709398529e-07, + "loss": 1.1641, + "step": 32136 + }, + { + "epoch": 0.9435962182159845, + "grad_norm": 0.0, + "learning_rate": 1.6639545629946963e-07, + "loss": 1.1724, + "step": 32137 + }, + { + "epoch": 0.9436255798931235, + "grad_norm": 0.0, + "learning_rate": 1.6622274443275688e-07, + "loss": 1.272, + "step": 32138 + }, + { + "epoch": 0.9436549415702625, + "grad_norm": 0.0, + "learning_rate": 1.6605012149541133e-07, + "loss": 1.2725, + "step": 32139 + }, + { + "epoch": 0.9436843032474015, + "grad_norm": 0.0, + "learning_rate": 1.6587758748899396e-07, + "loss": 1.1807, + "step": 32140 + }, + { + "epoch": 0.9437136649245405, + "grad_norm": 0.0, + "learning_rate": 1.6570514241506242e-07, + "loss": 1.127, + "step": 32141 + }, + { + "epoch": 0.9437430266016795, + "grad_norm": 0.0, + "learning_rate": 1.6553278627517877e-07, + "loss": 1.1475, + "step": 32142 + }, + { + "epoch": 0.9437723882788185, + "grad_norm": 0.0, + "learning_rate": 1.6536051907090068e-07, + "loss": 1.1885, + "step": 32143 + }, + { + "epoch": 0.9438017499559574, + "grad_norm": 0.0, + "learning_rate": 1.6518834080378577e-07, + "loss": 1.2246, + "step": 32144 + }, + { + "epoch": 0.9438311116330965, + "grad_norm": 0.0, + "learning_rate": 1.650162514753928e-07, + "loss": 1.145, + "step": 32145 + }, + { + "epoch": 0.9438604733102355, + "grad_norm": 0.0, + "learning_rate": 1.6484425108727608e-07, + "loss": 1.1738, + "step": 32146 + }, + { + "epoch": 0.9438898349873744, + "grad_norm": 0.0, + "learning_rate": 1.6467233964098995e-07, + "loss": 1.1943, + "step": 32147 + }, + { + "epoch": 0.9439191966645135, + "grad_norm": 0.0, + "learning_rate": 1.64500517138092e-07, + "loss": 1.2285, + "step": 32148 + }, + { + "epoch": 0.9439485583416525, + "grad_norm": 0.0, + "learning_rate": 1.6432878358013548e-07, + "loss": 1.2207, + "step": 32149 + }, + { + "epoch": 0.9439779200187914, + "grad_norm": 0.0, + "learning_rate": 1.6415713896867135e-07, + "loss": 1.2148, + "step": 32150 + }, + { + "epoch": 0.9440072816959305, + "grad_norm": 0.0, + "learning_rate": 1.6398558330525394e-07, + "loss": 1.2266, + "step": 32151 + }, + { + "epoch": 0.9440366433730695, + "grad_norm": 0.0, + "learning_rate": 1.638141165914331e-07, + "loss": 1.2725, + "step": 32152 + }, + { + "epoch": 0.9440660050502084, + "grad_norm": 0.0, + "learning_rate": 1.6364273882876203e-07, + "loss": 1.2432, + "step": 32153 + }, + { + "epoch": 0.9440953667273475, + "grad_norm": 0.0, + "learning_rate": 1.6347145001878728e-07, + "loss": 1.2065, + "step": 32154 + }, + { + "epoch": 0.9441247284044865, + "grad_norm": 0.0, + "learning_rate": 1.6330025016306096e-07, + "loss": 1.1621, + "step": 32155 + }, + { + "epoch": 0.9441540900816254, + "grad_norm": 0.0, + "learning_rate": 1.6312913926312735e-07, + "loss": 1.1709, + "step": 32156 + }, + { + "epoch": 0.9441834517587645, + "grad_norm": 0.0, + "learning_rate": 1.6295811732053857e-07, + "loss": 1.2578, + "step": 32157 + }, + { + "epoch": 0.9442128134359035, + "grad_norm": 0.0, + "learning_rate": 1.627871843368367e-07, + "loss": 1.0273, + "step": 32158 + }, + { + "epoch": 0.9442421751130424, + "grad_norm": 0.0, + "learning_rate": 1.6261634031357053e-07, + "loss": 1.1934, + "step": 32159 + }, + { + "epoch": 0.9442715367901815, + "grad_norm": 0.0, + "learning_rate": 1.6244558525228548e-07, + "loss": 1.2002, + "step": 32160 + }, + { + "epoch": 0.9443008984673205, + "grad_norm": 0.0, + "learning_rate": 1.6227491915452365e-07, + "loss": 1.2148, + "step": 32161 + }, + { + "epoch": 0.9443302601444594, + "grad_norm": 0.0, + "learning_rate": 1.6210434202182823e-07, + "loss": 1.1909, + "step": 32162 + }, + { + "epoch": 0.9443596218215985, + "grad_norm": 0.0, + "learning_rate": 1.6193385385574356e-07, + "loss": 1.2529, + "step": 32163 + }, + { + "epoch": 0.9443889834987375, + "grad_norm": 0.0, + "learning_rate": 1.6176345465781063e-07, + "loss": 1.2217, + "step": 32164 + }, + { + "epoch": 0.9444183451758764, + "grad_norm": 0.0, + "learning_rate": 1.615931444295693e-07, + "loss": 1.3008, + "step": 32165 + }, + { + "epoch": 0.9444477068530155, + "grad_norm": 0.0, + "learning_rate": 1.614229231725628e-07, + "loss": 1.2441, + "step": 32166 + }, + { + "epoch": 0.9444770685301545, + "grad_norm": 0.0, + "learning_rate": 1.6125279088832657e-07, + "loss": 1.2363, + "step": 32167 + }, + { + "epoch": 0.9445064302072934, + "grad_norm": 0.0, + "learning_rate": 1.610827475784027e-07, + "loss": 1.0361, + "step": 32168 + }, + { + "epoch": 0.9445357918844325, + "grad_norm": 0.0, + "learning_rate": 1.6091279324432662e-07, + "loss": 1.3125, + "step": 32169 + }, + { + "epoch": 0.9445651535615714, + "grad_norm": 0.0, + "learning_rate": 1.6074292788763603e-07, + "loss": 1.2383, + "step": 32170 + }, + { + "epoch": 0.9445945152387104, + "grad_norm": 0.0, + "learning_rate": 1.6057315150986742e-07, + "loss": 1.229, + "step": 32171 + }, + { + "epoch": 0.9446238769158495, + "grad_norm": 0.0, + "learning_rate": 1.6040346411255403e-07, + "loss": 1.2065, + "step": 32172 + }, + { + "epoch": 0.9446532385929884, + "grad_norm": 0.0, + "learning_rate": 1.6023386569723355e-07, + "loss": 1.2422, + "step": 32173 + }, + { + "epoch": 0.9446826002701274, + "grad_norm": 0.0, + "learning_rate": 1.6006435626543804e-07, + "loss": 1.1445, + "step": 32174 + }, + { + "epoch": 0.9447119619472665, + "grad_norm": 0.0, + "learning_rate": 1.5989493581870075e-07, + "loss": 1.1519, + "step": 32175 + }, + { + "epoch": 0.9447413236244054, + "grad_norm": 0.0, + "learning_rate": 1.5972560435855268e-07, + "loss": 1.2446, + "step": 32176 + }, + { + "epoch": 0.9447706853015444, + "grad_norm": 0.0, + "learning_rate": 1.59556361886527e-07, + "loss": 1.1631, + "step": 32177 + }, + { + "epoch": 0.9448000469786835, + "grad_norm": 0.0, + "learning_rate": 1.5938720840415368e-07, + "loss": 1.1416, + "step": 32178 + }, + { + "epoch": 0.9448294086558224, + "grad_norm": 0.0, + "learning_rate": 1.5921814391296142e-07, + "loss": 1.2754, + "step": 32179 + }, + { + "epoch": 0.9448587703329614, + "grad_norm": 0.0, + "learning_rate": 1.5904916841448015e-07, + "loss": 1.1851, + "step": 32180 + }, + { + "epoch": 0.9448881320101005, + "grad_norm": 0.0, + "learning_rate": 1.5888028191023753e-07, + "loss": 1.249, + "step": 32181 + }, + { + "epoch": 0.9449174936872394, + "grad_norm": 0.0, + "learning_rate": 1.587114844017601e-07, + "loss": 1.1826, + "step": 32182 + }, + { + "epoch": 0.9449468553643784, + "grad_norm": 0.0, + "learning_rate": 1.5854277589057555e-07, + "loss": 1.3066, + "step": 32183 + }, + { + "epoch": 0.9449762170415175, + "grad_norm": 0.0, + "learning_rate": 1.5837415637820929e-07, + "loss": 1.0762, + "step": 32184 + }, + { + "epoch": 0.9450055787186564, + "grad_norm": 0.0, + "learning_rate": 1.582056258661857e-07, + "loss": 1.1509, + "step": 32185 + }, + { + "epoch": 0.9450349403957954, + "grad_norm": 0.0, + "learning_rate": 1.5803718435603022e-07, + "loss": 1.1592, + "step": 32186 + }, + { + "epoch": 0.9450643020729345, + "grad_norm": 0.0, + "learning_rate": 1.5786883184926384e-07, + "loss": 1.2227, + "step": 32187 + }, + { + "epoch": 0.9450936637500734, + "grad_norm": 0.0, + "learning_rate": 1.5770056834741087e-07, + "loss": 1.2158, + "step": 32188 + }, + { + "epoch": 0.9451230254272124, + "grad_norm": 0.0, + "learning_rate": 1.5753239385199236e-07, + "loss": 1.1714, + "step": 32189 + }, + { + "epoch": 0.9451523871043515, + "grad_norm": 0.0, + "learning_rate": 1.573643083645282e-07, + "loss": 1.2061, + "step": 32190 + }, + { + "epoch": 0.9451817487814904, + "grad_norm": 0.0, + "learning_rate": 1.5719631188653937e-07, + "loss": 1.2607, + "step": 32191 + }, + { + "epoch": 0.9452111104586294, + "grad_norm": 0.0, + "learning_rate": 1.570284044195458e-07, + "loss": 1.2734, + "step": 32192 + }, + { + "epoch": 0.9452404721357685, + "grad_norm": 0.0, + "learning_rate": 1.5686058596506516e-07, + "loss": 1.3203, + "step": 32193 + }, + { + "epoch": 0.9452698338129074, + "grad_norm": 0.0, + "learning_rate": 1.566928565246151e-07, + "loss": 1.2607, + "step": 32194 + }, + { + "epoch": 0.9452991954900464, + "grad_norm": 0.0, + "learning_rate": 1.5652521609971216e-07, + "loss": 1.1079, + "step": 32195 + }, + { + "epoch": 0.9453285571671853, + "grad_norm": 0.0, + "learning_rate": 1.5635766469187187e-07, + "loss": 1.1523, + "step": 32196 + }, + { + "epoch": 0.9453579188443244, + "grad_norm": 0.0, + "learning_rate": 1.5619020230261183e-07, + "loss": 1.3213, + "step": 32197 + }, + { + "epoch": 0.9453872805214634, + "grad_norm": 0.0, + "learning_rate": 1.5602282893344424e-07, + "loss": 1.2256, + "step": 32198 + }, + { + "epoch": 0.9454166421986023, + "grad_norm": 0.0, + "learning_rate": 1.5585554458588226e-07, + "loss": 1.377, + "step": 32199 + }, + { + "epoch": 0.9454460038757414, + "grad_norm": 0.0, + "learning_rate": 1.5568834926144138e-07, + "loss": 1.1753, + "step": 32200 + }, + { + "epoch": 0.9454753655528804, + "grad_norm": 0.0, + "learning_rate": 1.555212429616304e-07, + "loss": 1.1123, + "step": 32201 + }, + { + "epoch": 0.9455047272300193, + "grad_norm": 0.0, + "learning_rate": 1.5535422568796365e-07, + "loss": 1.165, + "step": 32202 + }, + { + "epoch": 0.9455340889071584, + "grad_norm": 0.0, + "learning_rate": 1.5518729744194883e-07, + "loss": 1.29, + "step": 32203 + }, + { + "epoch": 0.9455634505842974, + "grad_norm": 0.0, + "learning_rate": 1.5502045822509693e-07, + "loss": 1.2842, + "step": 32204 + }, + { + "epoch": 0.9455928122614363, + "grad_norm": 0.0, + "learning_rate": 1.5485370803891565e-07, + "loss": 1.2021, + "step": 32205 + }, + { + "epoch": 0.9456221739385754, + "grad_norm": 0.0, + "learning_rate": 1.5468704688491488e-07, + "loss": 1.1138, + "step": 32206 + }, + { + "epoch": 0.9456515356157144, + "grad_norm": 0.0, + "learning_rate": 1.5452047476459897e-07, + "loss": 1.2969, + "step": 32207 + }, + { + "epoch": 0.9456808972928533, + "grad_norm": 0.0, + "learning_rate": 1.5435399167947673e-07, + "loss": 1.2559, + "step": 32208 + }, + { + "epoch": 0.9457102589699924, + "grad_norm": 0.0, + "learning_rate": 1.5418759763105363e-07, + "loss": 1.207, + "step": 32209 + }, + { + "epoch": 0.9457396206471314, + "grad_norm": 0.0, + "learning_rate": 1.5402129262083288e-07, + "loss": 1.2871, + "step": 32210 + }, + { + "epoch": 0.9457689823242703, + "grad_norm": 0.0, + "learning_rate": 1.5385507665031996e-07, + "loss": 1.1382, + "step": 32211 + }, + { + "epoch": 0.9457983440014094, + "grad_norm": 0.0, + "learning_rate": 1.5368894972101588e-07, + "loss": 1.1777, + "step": 32212 + }, + { + "epoch": 0.9458277056785483, + "grad_norm": 0.0, + "learning_rate": 1.5352291183442503e-07, + "loss": 1.1992, + "step": 32213 + }, + { + "epoch": 0.9458570673556873, + "grad_norm": 0.0, + "learning_rate": 1.5335696299204727e-07, + "loss": 1.2227, + "step": 32214 + }, + { + "epoch": 0.9458864290328264, + "grad_norm": 0.0, + "learning_rate": 1.5319110319538587e-07, + "loss": 1.1855, + "step": 32215 + }, + { + "epoch": 0.9459157907099653, + "grad_norm": 0.0, + "learning_rate": 1.5302533244593743e-07, + "loss": 1.145, + "step": 32216 + }, + { + "epoch": 0.9459451523871043, + "grad_norm": 0.0, + "learning_rate": 1.5285965074520403e-07, + "loss": 1.1812, + "step": 32217 + }, + { + "epoch": 0.9459745140642434, + "grad_norm": 0.0, + "learning_rate": 1.526940580946823e-07, + "loss": 1.3008, + "step": 32218 + }, + { + "epoch": 0.9460038757413823, + "grad_norm": 0.0, + "learning_rate": 1.52528554495871e-07, + "loss": 1.2158, + "step": 32219 + }, + { + "epoch": 0.9460332374185213, + "grad_norm": 0.0, + "learning_rate": 1.5236313995026453e-07, + "loss": 1.2471, + "step": 32220 + }, + { + "epoch": 0.9460625990956604, + "grad_norm": 0.0, + "learning_rate": 1.5219781445936054e-07, + "loss": 1.2021, + "step": 32221 + }, + { + "epoch": 0.9460919607727993, + "grad_norm": 0.0, + "learning_rate": 1.5203257802465454e-07, + "loss": 1.2773, + "step": 32222 + }, + { + "epoch": 0.9461213224499383, + "grad_norm": 0.0, + "learning_rate": 1.5186743064763866e-07, + "loss": 1.186, + "step": 32223 + }, + { + "epoch": 0.9461506841270774, + "grad_norm": 0.0, + "learning_rate": 1.5170237232980945e-07, + "loss": 1.2246, + "step": 32224 + }, + { + "epoch": 0.9461800458042163, + "grad_norm": 0.0, + "learning_rate": 1.5153740307265575e-07, + "loss": 1.1914, + "step": 32225 + }, + { + "epoch": 0.9462094074813553, + "grad_norm": 0.0, + "learning_rate": 1.5137252287767302e-07, + "loss": 1.0625, + "step": 32226 + }, + { + "epoch": 0.9462387691584944, + "grad_norm": 0.0, + "learning_rate": 1.5120773174635116e-07, + "loss": 1.1108, + "step": 32227 + }, + { + "epoch": 0.9462681308356333, + "grad_norm": 0.0, + "learning_rate": 1.5104302968018015e-07, + "loss": 1.3604, + "step": 32228 + }, + { + "epoch": 0.9462974925127723, + "grad_norm": 0.0, + "learning_rate": 1.5087841668064874e-07, + "loss": 1.2764, + "step": 32229 + }, + { + "epoch": 0.9463268541899114, + "grad_norm": 0.0, + "learning_rate": 1.5071389274924576e-07, + "loss": 1.2256, + "step": 32230 + }, + { + "epoch": 0.9463562158670503, + "grad_norm": 0.0, + "learning_rate": 1.5054945788746e-07, + "loss": 1.2676, + "step": 32231 + }, + { + "epoch": 0.9463855775441893, + "grad_norm": 0.0, + "learning_rate": 1.5038511209677808e-07, + "loss": 1.3164, + "step": 32232 + }, + { + "epoch": 0.9464149392213284, + "grad_norm": 0.0, + "learning_rate": 1.5022085537868658e-07, + "loss": 1.1475, + "step": 32233 + }, + { + "epoch": 0.9464443008984673, + "grad_norm": 0.0, + "learning_rate": 1.5005668773466874e-07, + "loss": 1.1807, + "step": 32234 + }, + { + "epoch": 0.9464736625756063, + "grad_norm": 0.0, + "learning_rate": 1.4989260916621228e-07, + "loss": 1.1475, + "step": 32235 + }, + { + "epoch": 0.9465030242527454, + "grad_norm": 0.0, + "learning_rate": 1.4972861967479936e-07, + "loss": 1.1226, + "step": 32236 + }, + { + "epoch": 0.9465323859298843, + "grad_norm": 0.0, + "learning_rate": 1.495647192619143e-07, + "loss": 1.0166, + "step": 32237 + }, + { + "epoch": 0.9465617476070233, + "grad_norm": 0.0, + "learning_rate": 1.4940090792903705e-07, + "loss": 1.1831, + "step": 32238 + }, + { + "epoch": 0.9465911092841623, + "grad_norm": 0.0, + "learning_rate": 1.4923718567764977e-07, + "loss": 1.1367, + "step": 32239 + }, + { + "epoch": 0.9466204709613013, + "grad_norm": 0.0, + "learning_rate": 1.490735525092346e-07, + "loss": 1.1914, + "step": 32240 + }, + { + "epoch": 0.9466498326384403, + "grad_norm": 0.0, + "learning_rate": 1.4891000842526814e-07, + "loss": 1.0117, + "step": 32241 + }, + { + "epoch": 0.9466791943155793, + "grad_norm": 0.0, + "learning_rate": 1.4874655342723365e-07, + "loss": 1.1885, + "step": 32242 + }, + { + "epoch": 0.9467085559927183, + "grad_norm": 0.0, + "learning_rate": 1.485831875166055e-07, + "loss": 1.147, + "step": 32243 + }, + { + "epoch": 0.9467379176698573, + "grad_norm": 0.0, + "learning_rate": 1.484199106948636e-07, + "loss": 1.2402, + "step": 32244 + }, + { + "epoch": 0.9467672793469963, + "grad_norm": 0.0, + "learning_rate": 1.4825672296348237e-07, + "loss": 1.1611, + "step": 32245 + }, + { + "epoch": 0.9467966410241353, + "grad_norm": 0.0, + "learning_rate": 1.4809362432393948e-07, + "loss": 1.0723, + "step": 32246 + }, + { + "epoch": 0.9468260027012743, + "grad_norm": 0.0, + "learning_rate": 1.4793061477770931e-07, + "loss": 1.1982, + "step": 32247 + }, + { + "epoch": 0.9468553643784133, + "grad_norm": 0.0, + "learning_rate": 1.4776769432626513e-07, + "loss": 1.2583, + "step": 32248 + }, + { + "epoch": 0.9468847260555523, + "grad_norm": 0.0, + "learning_rate": 1.4760486297108135e-07, + "loss": 1.2998, + "step": 32249 + }, + { + "epoch": 0.9469140877326913, + "grad_norm": 0.0, + "learning_rate": 1.4744212071362895e-07, + "loss": 1.2134, + "step": 32250 + }, + { + "epoch": 0.9469434494098303, + "grad_norm": 0.0, + "learning_rate": 1.4727946755538236e-07, + "loss": 1.0991, + "step": 32251 + }, + { + "epoch": 0.9469728110869693, + "grad_norm": 0.0, + "learning_rate": 1.471169034978104e-07, + "loss": 1.3145, + "step": 32252 + }, + { + "epoch": 0.9470021727641083, + "grad_norm": 0.0, + "learning_rate": 1.4695442854238296e-07, + "loss": 1.2168, + "step": 32253 + }, + { + "epoch": 0.9470315344412473, + "grad_norm": 0.0, + "learning_rate": 1.4679204269057e-07, + "loss": 1.2017, + "step": 32254 + }, + { + "epoch": 0.9470608961183863, + "grad_norm": 0.0, + "learning_rate": 1.4662974594384038e-07, + "loss": 1.1455, + "step": 32255 + }, + { + "epoch": 0.9470902577955252, + "grad_norm": 0.0, + "learning_rate": 1.464675383036618e-07, + "loss": 1.1675, + "step": 32256 + }, + { + "epoch": 0.9471196194726643, + "grad_norm": 0.0, + "learning_rate": 1.463054197714997e-07, + "loss": 1.1187, + "step": 32257 + }, + { + "epoch": 0.9471489811498033, + "grad_norm": 0.0, + "learning_rate": 1.4614339034882296e-07, + "loss": 1.1577, + "step": 32258 + }, + { + "epoch": 0.9471783428269422, + "grad_norm": 0.0, + "learning_rate": 1.4598145003709375e-07, + "loss": 1.2065, + "step": 32259 + }, + { + "epoch": 0.9472077045040813, + "grad_norm": 0.0, + "learning_rate": 1.4581959883777862e-07, + "loss": 0.978, + "step": 32260 + }, + { + "epoch": 0.9472370661812203, + "grad_norm": 0.0, + "learning_rate": 1.4565783675234092e-07, + "loss": 1.2559, + "step": 32261 + }, + { + "epoch": 0.9472664278583592, + "grad_norm": 0.0, + "learning_rate": 1.4549616378224274e-07, + "loss": 1.2676, + "step": 32262 + }, + { + "epoch": 0.9472957895354983, + "grad_norm": 0.0, + "learning_rate": 1.4533457992894517e-07, + "loss": 1.2021, + "step": 32263 + }, + { + "epoch": 0.9473251512126373, + "grad_norm": 0.0, + "learning_rate": 1.451730851939126e-07, + "loss": 1.0298, + "step": 32264 + }, + { + "epoch": 0.9473545128897762, + "grad_norm": 0.0, + "learning_rate": 1.4501167957860273e-07, + "loss": 1.2305, + "step": 32265 + }, + { + "epoch": 0.9473838745669153, + "grad_norm": 0.0, + "learning_rate": 1.4485036308447663e-07, + "loss": 1.1299, + "step": 32266 + }, + { + "epoch": 0.9474132362440543, + "grad_norm": 0.0, + "learning_rate": 1.4468913571299204e-07, + "loss": 1.1909, + "step": 32267 + }, + { + "epoch": 0.9474425979211932, + "grad_norm": 0.0, + "learning_rate": 1.4452799746560885e-07, + "loss": 1.0776, + "step": 32268 + }, + { + "epoch": 0.9474719595983323, + "grad_norm": 0.0, + "learning_rate": 1.4436694834378262e-07, + "loss": 1.3213, + "step": 32269 + }, + { + "epoch": 0.9475013212754713, + "grad_norm": 0.0, + "learning_rate": 1.442059883489688e-07, + "loss": 1.2539, + "step": 32270 + }, + { + "epoch": 0.9475306829526102, + "grad_norm": 0.0, + "learning_rate": 1.4404511748262518e-07, + "loss": 1.2373, + "step": 32271 + }, + { + "epoch": 0.9475600446297493, + "grad_norm": 0.0, + "learning_rate": 1.4388433574620608e-07, + "loss": 1.0757, + "step": 32272 + }, + { + "epoch": 0.9475894063068883, + "grad_norm": 0.0, + "learning_rate": 1.4372364314116482e-07, + "loss": 1.1523, + "step": 32273 + }, + { + "epoch": 0.9476187679840272, + "grad_norm": 0.0, + "learning_rate": 1.4356303966895468e-07, + "loss": 1.1523, + "step": 32274 + }, + { + "epoch": 0.9476481296611663, + "grad_norm": 0.0, + "learning_rate": 1.4340252533102784e-07, + "loss": 1.335, + "step": 32275 + }, + { + "epoch": 0.9476774913383053, + "grad_norm": 0.0, + "learning_rate": 1.4324210012883755e-07, + "loss": 1.2148, + "step": 32276 + }, + { + "epoch": 0.9477068530154442, + "grad_norm": 0.0, + "learning_rate": 1.4308176406383267e-07, + "loss": 1.2471, + "step": 32277 + }, + { + "epoch": 0.9477362146925833, + "grad_norm": 0.0, + "learning_rate": 1.4292151713746427e-07, + "loss": 1.2354, + "step": 32278 + }, + { + "epoch": 0.9477655763697223, + "grad_norm": 0.0, + "learning_rate": 1.4276135935118007e-07, + "loss": 1.0762, + "step": 32279 + }, + { + "epoch": 0.9477949380468612, + "grad_norm": 0.0, + "learning_rate": 1.4260129070643002e-07, + "loss": 1.1274, + "step": 32280 + }, + { + "epoch": 0.9478242997240003, + "grad_norm": 0.0, + "learning_rate": 1.4244131120465964e-07, + "loss": 1.0776, + "step": 32281 + }, + { + "epoch": 0.9478536614011392, + "grad_norm": 0.0, + "learning_rate": 1.422814208473189e-07, + "loss": 1.3564, + "step": 32282 + }, + { + "epoch": 0.9478830230782782, + "grad_norm": 0.0, + "learning_rate": 1.4212161963585104e-07, + "loss": 1.2954, + "step": 32283 + }, + { + "epoch": 0.9479123847554173, + "grad_norm": 0.0, + "learning_rate": 1.419619075717016e-07, + "loss": 1.1904, + "step": 32284 + }, + { + "epoch": 0.9479417464325562, + "grad_norm": 0.0, + "learning_rate": 1.4180228465631608e-07, + "loss": 1.083, + "step": 32285 + }, + { + "epoch": 0.9479711081096952, + "grad_norm": 0.0, + "learning_rate": 1.4164275089113778e-07, + "loss": 1.3506, + "step": 32286 + }, + { + "epoch": 0.9480004697868343, + "grad_norm": 0.0, + "learning_rate": 1.4148330627760776e-07, + "loss": 1.2598, + "step": 32287 + }, + { + "epoch": 0.9480298314639732, + "grad_norm": 0.0, + "learning_rate": 1.4132395081716932e-07, + "loss": 1.1323, + "step": 32288 + }, + { + "epoch": 0.9480591931411122, + "grad_norm": 0.0, + "learning_rate": 1.4116468451126353e-07, + "loss": 1.125, + "step": 32289 + }, + { + "epoch": 0.9480885548182513, + "grad_norm": 0.0, + "learning_rate": 1.4100550736132922e-07, + "loss": 1.1172, + "step": 32290 + }, + { + "epoch": 0.9481179164953902, + "grad_norm": 0.0, + "learning_rate": 1.408464193688086e-07, + "loss": 1.2109, + "step": 32291 + }, + { + "epoch": 0.9481472781725292, + "grad_norm": 0.0, + "learning_rate": 1.4068742053513827e-07, + "loss": 1.207, + "step": 32292 + }, + { + "epoch": 0.9481766398496683, + "grad_norm": 0.0, + "learning_rate": 1.4052851086175602e-07, + "loss": 1.3936, + "step": 32293 + }, + { + "epoch": 0.9482060015268072, + "grad_norm": 0.0, + "learning_rate": 1.4036969035009952e-07, + "loss": 1.1963, + "step": 32294 + }, + { + "epoch": 0.9482353632039462, + "grad_norm": 0.0, + "learning_rate": 1.402109590016054e-07, + "loss": 1.2959, + "step": 32295 + }, + { + "epoch": 0.9482647248810852, + "grad_norm": 0.0, + "learning_rate": 1.4005231681770925e-07, + "loss": 1.0767, + "step": 32296 + }, + { + "epoch": 0.9482940865582242, + "grad_norm": 0.0, + "learning_rate": 1.3989376379984433e-07, + "loss": 1.3877, + "step": 32297 + }, + { + "epoch": 0.9483234482353632, + "grad_norm": 0.0, + "learning_rate": 1.397352999494461e-07, + "loss": 1.2158, + "step": 32298 + }, + { + "epoch": 0.9483528099125021, + "grad_norm": 0.0, + "learning_rate": 1.3957692526794574e-07, + "loss": 1.1953, + "step": 32299 + }, + { + "epoch": 0.9483821715896412, + "grad_norm": 0.0, + "learning_rate": 1.3941863975677762e-07, + "loss": 1.2783, + "step": 32300 + }, + { + "epoch": 0.9484115332667802, + "grad_norm": 0.0, + "learning_rate": 1.3926044341737166e-07, + "loss": 1.1035, + "step": 32301 + }, + { + "epoch": 0.9484408949439191, + "grad_norm": 0.0, + "learning_rate": 1.3910233625115898e-07, + "loss": 1.1631, + "step": 32302 + }, + { + "epoch": 0.9484702566210582, + "grad_norm": 0.0, + "learning_rate": 1.3894431825956845e-07, + "loss": 1.2725, + "step": 32303 + }, + { + "epoch": 0.9484996182981972, + "grad_norm": 0.0, + "learning_rate": 1.3878638944403e-07, + "loss": 1.3027, + "step": 32304 + }, + { + "epoch": 0.9485289799753361, + "grad_norm": 0.0, + "learning_rate": 1.3862854980597252e-07, + "loss": 1.1587, + "step": 32305 + }, + { + "epoch": 0.9485583416524752, + "grad_norm": 0.0, + "learning_rate": 1.3847079934682262e-07, + "loss": 1.1284, + "step": 32306 + }, + { + "epoch": 0.9485877033296142, + "grad_norm": 0.0, + "learning_rate": 1.3831313806800695e-07, + "loss": 1.123, + "step": 32307 + }, + { + "epoch": 0.9486170650067531, + "grad_norm": 0.0, + "learning_rate": 1.3815556597094992e-07, + "loss": 1.1763, + "step": 32308 + }, + { + "epoch": 0.9486464266838922, + "grad_norm": 0.0, + "learning_rate": 1.379980830570793e-07, + "loss": 1.3691, + "step": 32309 + }, + { + "epoch": 0.9486757883610312, + "grad_norm": 0.0, + "learning_rate": 1.3784068932781724e-07, + "loss": 1.1821, + "step": 32310 + }, + { + "epoch": 0.9487051500381701, + "grad_norm": 0.0, + "learning_rate": 1.376833847845871e-07, + "loss": 1.124, + "step": 32311 + }, + { + "epoch": 0.9487345117153092, + "grad_norm": 0.0, + "learning_rate": 1.3752616942881214e-07, + "loss": 1.106, + "step": 32312 + }, + { + "epoch": 0.9487638733924482, + "grad_norm": 0.0, + "learning_rate": 1.3736904326191348e-07, + "loss": 1.3096, + "step": 32313 + }, + { + "epoch": 0.9487932350695871, + "grad_norm": 0.0, + "learning_rate": 1.3721200628531216e-07, + "loss": 1.2451, + "step": 32314 + }, + { + "epoch": 0.9488225967467262, + "grad_norm": 0.0, + "learning_rate": 1.3705505850042932e-07, + "loss": 1.2207, + "step": 32315 + }, + { + "epoch": 0.9488519584238652, + "grad_norm": 0.0, + "learning_rate": 1.368981999086838e-07, + "loss": 1.2388, + "step": 32316 + }, + { + "epoch": 0.9488813201010041, + "grad_norm": 0.0, + "learning_rate": 1.3674143051149447e-07, + "loss": 1.2734, + "step": 32317 + }, + { + "epoch": 0.9489106817781432, + "grad_norm": 0.0, + "learning_rate": 1.3658475031027685e-07, + "loss": 1.0645, + "step": 32318 + }, + { + "epoch": 0.9489400434552822, + "grad_norm": 0.0, + "learning_rate": 1.3642815930644983e-07, + "loss": 1.0601, + "step": 32319 + }, + { + "epoch": 0.9489694051324211, + "grad_norm": 0.0, + "learning_rate": 1.362716575014289e-07, + "loss": 1.2363, + "step": 32320 + }, + { + "epoch": 0.9489987668095602, + "grad_norm": 0.0, + "learning_rate": 1.3611524489662965e-07, + "loss": 1.1626, + "step": 32321 + }, + { + "epoch": 0.9490281284866992, + "grad_norm": 0.0, + "learning_rate": 1.3595892149346645e-07, + "loss": 1.3027, + "step": 32322 + }, + { + "epoch": 0.9490574901638381, + "grad_norm": 0.0, + "learning_rate": 1.3580268729335267e-07, + "loss": 1.2324, + "step": 32323 + }, + { + "epoch": 0.9490868518409772, + "grad_norm": 0.0, + "learning_rate": 1.3564654229770268e-07, + "loss": 1.2627, + "step": 32324 + }, + { + "epoch": 0.9491162135181161, + "grad_norm": 0.0, + "learning_rate": 1.354904865079265e-07, + "loss": 1.2012, + "step": 32325 + }, + { + "epoch": 0.9491455751952551, + "grad_norm": 0.0, + "learning_rate": 1.3533451992543634e-07, + "loss": 1.1055, + "step": 32326 + }, + { + "epoch": 0.9491749368723942, + "grad_norm": 0.0, + "learning_rate": 1.3517864255164214e-07, + "loss": 1.3242, + "step": 32327 + }, + { + "epoch": 0.9492042985495331, + "grad_norm": 0.0, + "learning_rate": 1.3502285438795393e-07, + "loss": 1.231, + "step": 32328 + }, + { + "epoch": 0.9492336602266721, + "grad_norm": 0.0, + "learning_rate": 1.3486715543578055e-07, + "loss": 1.166, + "step": 32329 + }, + { + "epoch": 0.9492630219038112, + "grad_norm": 0.0, + "learning_rate": 1.347115456965298e-07, + "loss": 1.2852, + "step": 32330 + }, + { + "epoch": 0.9492923835809501, + "grad_norm": 0.0, + "learning_rate": 1.3455602517160937e-07, + "loss": 1.2124, + "step": 32331 + }, + { + "epoch": 0.9493217452580891, + "grad_norm": 0.0, + "learning_rate": 1.3440059386242488e-07, + "loss": 1.1797, + "step": 32332 + }, + { + "epoch": 0.9493511069352282, + "grad_norm": 0.0, + "learning_rate": 1.3424525177038296e-07, + "loss": 1.1953, + "step": 32333 + }, + { + "epoch": 0.9493804686123671, + "grad_norm": 0.0, + "learning_rate": 1.34089998896888e-07, + "loss": 1.2202, + "step": 32334 + }, + { + "epoch": 0.9494098302895061, + "grad_norm": 0.0, + "learning_rate": 1.339348352433445e-07, + "loss": 1.1699, + "step": 32335 + }, + { + "epoch": 0.9494391919666452, + "grad_norm": 0.0, + "learning_rate": 1.3377976081115462e-07, + "loss": 1.2617, + "step": 32336 + }, + { + "epoch": 0.9494685536437841, + "grad_norm": 0.0, + "learning_rate": 1.3362477560171949e-07, + "loss": 1.2598, + "step": 32337 + }, + { + "epoch": 0.9494979153209231, + "grad_norm": 0.0, + "learning_rate": 1.3346987961644464e-07, + "loss": 1.2944, + "step": 32338 + }, + { + "epoch": 0.9495272769980622, + "grad_norm": 0.0, + "learning_rate": 1.3331507285672673e-07, + "loss": 1.25, + "step": 32339 + }, + { + "epoch": 0.9495566386752011, + "grad_norm": 0.0, + "learning_rate": 1.3316035532396797e-07, + "loss": 1.1187, + "step": 32340 + }, + { + "epoch": 0.9495860003523401, + "grad_norm": 0.0, + "learning_rate": 1.3300572701956838e-07, + "loss": 1.2246, + "step": 32341 + }, + { + "epoch": 0.9496153620294792, + "grad_norm": 0.0, + "learning_rate": 1.3285118794492345e-07, + "loss": 1.2329, + "step": 32342 + }, + { + "epoch": 0.9496447237066181, + "grad_norm": 0.0, + "learning_rate": 1.3269673810143213e-07, + "loss": 1.2461, + "step": 32343 + }, + { + "epoch": 0.9496740853837571, + "grad_norm": 0.0, + "learning_rate": 1.3254237749049215e-07, + "loss": 1.1597, + "step": 32344 + }, + { + "epoch": 0.9497034470608962, + "grad_norm": 0.0, + "learning_rate": 1.3238810611349794e-07, + "loss": 1.1885, + "step": 32345 + }, + { + "epoch": 0.9497328087380351, + "grad_norm": 0.0, + "learning_rate": 1.322339239718462e-07, + "loss": 1.0288, + "step": 32346 + }, + { + "epoch": 0.9497621704151741, + "grad_norm": 0.0, + "learning_rate": 1.3207983106692913e-07, + "loss": 1.2979, + "step": 32347 + }, + { + "epoch": 0.9497915320923132, + "grad_norm": 0.0, + "learning_rate": 1.3192582740014114e-07, + "loss": 1.2109, + "step": 32348 + }, + { + "epoch": 0.9498208937694521, + "grad_norm": 0.0, + "learning_rate": 1.3177191297287673e-07, + "loss": 1.1904, + "step": 32349 + }, + { + "epoch": 0.9498502554465911, + "grad_norm": 0.0, + "learning_rate": 1.3161808778652584e-07, + "loss": 1.2773, + "step": 32350 + }, + { + "epoch": 0.9498796171237301, + "grad_norm": 0.0, + "learning_rate": 1.3146435184247964e-07, + "loss": 1.1836, + "step": 32351 + }, + { + "epoch": 0.9499089788008691, + "grad_norm": 0.0, + "learning_rate": 1.3131070514212696e-07, + "loss": 1.2061, + "step": 32352 + }, + { + "epoch": 0.9499383404780081, + "grad_norm": 0.0, + "learning_rate": 1.3115714768686115e-07, + "loss": 1.123, + "step": 32353 + }, + { + "epoch": 0.9499677021551471, + "grad_norm": 0.0, + "learning_rate": 1.310036794780678e-07, + "loss": 1.2773, + "step": 32354 + }, + { + "epoch": 0.9499970638322861, + "grad_norm": 0.0, + "learning_rate": 1.3085030051713576e-07, + "loss": 1.251, + "step": 32355 + }, + { + "epoch": 0.9500264255094251, + "grad_norm": 0.0, + "learning_rate": 1.306970108054517e-07, + "loss": 1.168, + "step": 32356 + }, + { + "epoch": 0.9500557871865641, + "grad_norm": 0.0, + "learning_rate": 1.3054381034440233e-07, + "loss": 1.3125, + "step": 32357 + }, + { + "epoch": 0.9500851488637031, + "grad_norm": 0.0, + "learning_rate": 1.3039069913537316e-07, + "loss": 1.2153, + "step": 32358 + }, + { + "epoch": 0.9501145105408421, + "grad_norm": 0.0, + "learning_rate": 1.3023767717974867e-07, + "loss": 1.1626, + "step": 32359 + }, + { + "epoch": 0.9501438722179811, + "grad_norm": 0.0, + "learning_rate": 1.300847444789133e-07, + "loss": 0.9746, + "step": 32360 + }, + { + "epoch": 0.9501732338951201, + "grad_norm": 0.0, + "learning_rate": 1.29931901034247e-07, + "loss": 1.2773, + "step": 32361 + }, + { + "epoch": 0.9502025955722591, + "grad_norm": 0.0, + "learning_rate": 1.2977914684713654e-07, + "loss": 1.2314, + "step": 32362 + }, + { + "epoch": 0.9502319572493981, + "grad_norm": 0.0, + "learning_rate": 1.2962648191895965e-07, + "loss": 1.2734, + "step": 32363 + }, + { + "epoch": 0.9502613189265371, + "grad_norm": 0.0, + "learning_rate": 1.2947390625109856e-07, + "loss": 1.1758, + "step": 32364 + }, + { + "epoch": 0.950290680603676, + "grad_norm": 0.0, + "learning_rate": 1.293214198449333e-07, + "loss": 1.0234, + "step": 32365 + }, + { + "epoch": 0.9503200422808151, + "grad_norm": 0.0, + "learning_rate": 1.2916902270184272e-07, + "loss": 1.1128, + "step": 32366 + }, + { + "epoch": 0.9503494039579541, + "grad_norm": 0.0, + "learning_rate": 1.2901671482320466e-07, + "loss": 1.1177, + "step": 32367 + }, + { + "epoch": 0.950378765635093, + "grad_norm": 0.0, + "learning_rate": 1.2886449621039465e-07, + "loss": 1.0322, + "step": 32368 + }, + { + "epoch": 0.9504081273122321, + "grad_norm": 0.0, + "learning_rate": 1.287123668647927e-07, + "loss": 1.1763, + "step": 32369 + }, + { + "epoch": 0.9504374889893711, + "grad_norm": 0.0, + "learning_rate": 1.285603267877722e-07, + "loss": 1.2598, + "step": 32370 + }, + { + "epoch": 0.95046685066651, + "grad_norm": 0.0, + "learning_rate": 1.2840837598070865e-07, + "loss": 1.2441, + "step": 32371 + }, + { + "epoch": 0.9504962123436491, + "grad_norm": 0.0, + "learning_rate": 1.2825651444497655e-07, + "loss": 1.1802, + "step": 32372 + }, + { + "epoch": 0.9505255740207881, + "grad_norm": 0.0, + "learning_rate": 1.2810474218194923e-07, + "loss": 1.2236, + "step": 32373 + }, + { + "epoch": 0.950554935697927, + "grad_norm": 0.0, + "learning_rate": 1.2795305919299893e-07, + "loss": 1.2637, + "step": 32374 + }, + { + "epoch": 0.9505842973750661, + "grad_norm": 0.0, + "learning_rate": 1.278014654794968e-07, + "loss": 1.2256, + "step": 32375 + }, + { + "epoch": 0.9506136590522051, + "grad_norm": 0.0, + "learning_rate": 1.2764996104281503e-07, + "loss": 1.2969, + "step": 32376 + }, + { + "epoch": 0.950643020729344, + "grad_norm": 0.0, + "learning_rate": 1.2749854588432253e-07, + "loss": 1.1353, + "step": 32377 + }, + { + "epoch": 0.9506723824064831, + "grad_norm": 0.0, + "learning_rate": 1.2734722000538934e-07, + "loss": 1.085, + "step": 32378 + }, + { + "epoch": 0.9507017440836221, + "grad_norm": 0.0, + "learning_rate": 1.2719598340738326e-07, + "loss": 1.1162, + "step": 32379 + }, + { + "epoch": 0.950731105760761, + "grad_norm": 0.0, + "learning_rate": 1.2704483609167318e-07, + "loss": 1.2627, + "step": 32380 + }, + { + "epoch": 0.9507604674379001, + "grad_norm": 0.0, + "learning_rate": 1.2689377805962467e-07, + "loss": 1.1538, + "step": 32381 + }, + { + "epoch": 0.9507898291150391, + "grad_norm": 0.0, + "learning_rate": 1.267428093126044e-07, + "loss": 1.2178, + "step": 32382 + }, + { + "epoch": 0.950819190792178, + "grad_norm": 0.0, + "learning_rate": 1.2659192985197687e-07, + "loss": 1.2354, + "step": 32383 + }, + { + "epoch": 0.9508485524693171, + "grad_norm": 0.0, + "learning_rate": 1.2644113967910765e-07, + "loss": 1.1313, + "step": 32384 + }, + { + "epoch": 0.9508779141464561, + "grad_norm": 0.0, + "learning_rate": 1.2629043879536007e-07, + "loss": 1.2231, + "step": 32385 + }, + { + "epoch": 0.950907275823595, + "grad_norm": 0.0, + "learning_rate": 1.261398272020964e-07, + "loss": 1.2075, + "step": 32386 + }, + { + "epoch": 0.9509366375007341, + "grad_norm": 0.0, + "learning_rate": 1.2598930490067996e-07, + "loss": 1.1812, + "step": 32387 + }, + { + "epoch": 0.9509659991778731, + "grad_norm": 0.0, + "learning_rate": 1.2583887189246968e-07, + "loss": 1.1401, + "step": 32388 + }, + { + "epoch": 0.950995360855012, + "grad_norm": 0.0, + "learning_rate": 1.256885281788278e-07, + "loss": 1.291, + "step": 32389 + }, + { + "epoch": 0.9510247225321511, + "grad_norm": 0.0, + "learning_rate": 1.2553827376111216e-07, + "loss": 1.2246, + "step": 32390 + }, + { + "epoch": 0.95105408420929, + "grad_norm": 0.0, + "learning_rate": 1.2538810864068495e-07, + "loss": 1.2212, + "step": 32391 + }, + { + "epoch": 0.951083445886429, + "grad_norm": 0.0, + "learning_rate": 1.252380328189007e-07, + "loss": 1.3291, + "step": 32392 + }, + { + "epoch": 0.9511128075635681, + "grad_norm": 0.0, + "learning_rate": 1.2508804629711935e-07, + "loss": 1.1533, + "step": 32393 + }, + { + "epoch": 0.951142169240707, + "grad_norm": 0.0, + "learning_rate": 1.249381490766932e-07, + "loss": 1.145, + "step": 32394 + }, + { + "epoch": 0.951171530917846, + "grad_norm": 0.0, + "learning_rate": 1.247883411589823e-07, + "loss": 1.1709, + "step": 32395 + }, + { + "epoch": 0.951200892594985, + "grad_norm": 0.0, + "learning_rate": 1.2463862254534e-07, + "loss": 1.1895, + "step": 32396 + }, + { + "epoch": 0.951230254272124, + "grad_norm": 0.0, + "learning_rate": 1.2448899323711848e-07, + "loss": 1.1128, + "step": 32397 + }, + { + "epoch": 0.951259615949263, + "grad_norm": 0.0, + "learning_rate": 1.2433945323567232e-07, + "loss": 1.2871, + "step": 32398 + }, + { + "epoch": 0.951288977626402, + "grad_norm": 0.0, + "learning_rate": 1.241900025423537e-07, + "loss": 1.0669, + "step": 32399 + }, + { + "epoch": 0.951318339303541, + "grad_norm": 0.0, + "learning_rate": 1.240406411585149e-07, + "loss": 1.2129, + "step": 32400 + }, + { + "epoch": 0.95134770098068, + "grad_norm": 0.0, + "learning_rate": 1.2389136908550482e-07, + "loss": 1.1636, + "step": 32401 + }, + { + "epoch": 0.951377062657819, + "grad_norm": 0.0, + "learning_rate": 1.237421863246757e-07, + "loss": 1.1465, + "step": 32402 + }, + { + "epoch": 0.951406424334958, + "grad_norm": 0.0, + "learning_rate": 1.235930928773732e-07, + "loss": 1.1987, + "step": 32403 + }, + { + "epoch": 0.951435786012097, + "grad_norm": 0.0, + "learning_rate": 1.2344408874494952e-07, + "loss": 1.1558, + "step": 32404 + }, + { + "epoch": 0.951465147689236, + "grad_norm": 0.0, + "learning_rate": 1.2329517392875022e-07, + "loss": 1.1987, + "step": 32405 + }, + { + "epoch": 0.951494509366375, + "grad_norm": 0.0, + "learning_rate": 1.2314634843012098e-07, + "loss": 1.2505, + "step": 32406 + }, + { + "epoch": 0.951523871043514, + "grad_norm": 0.0, + "learning_rate": 1.2299761225041062e-07, + "loss": 1.1396, + "step": 32407 + }, + { + "epoch": 0.951553232720653, + "grad_norm": 0.0, + "learning_rate": 1.2284896539096147e-07, + "loss": 1.1499, + "step": 32408 + }, + { + "epoch": 0.951582594397792, + "grad_norm": 0.0, + "learning_rate": 1.227004078531191e-07, + "loss": 1.146, + "step": 32409 + }, + { + "epoch": 0.951611956074931, + "grad_norm": 0.0, + "learning_rate": 1.2255193963822465e-07, + "loss": 1.1597, + "step": 32410 + }, + { + "epoch": 0.95164131775207, + "grad_norm": 0.0, + "learning_rate": 1.2240356074762482e-07, + "loss": 1.0981, + "step": 32411 + }, + { + "epoch": 0.951670679429209, + "grad_norm": 0.0, + "learning_rate": 1.2225527118265856e-07, + "loss": 1.1157, + "step": 32412 + }, + { + "epoch": 0.951700041106348, + "grad_norm": 0.0, + "learning_rate": 1.22107070944667e-07, + "loss": 1.1206, + "step": 32413 + }, + { + "epoch": 0.9517294027834869, + "grad_norm": 0.0, + "learning_rate": 1.219589600349913e-07, + "loss": 1.1284, + "step": 32414 + }, + { + "epoch": 0.951758764460626, + "grad_norm": 0.0, + "learning_rate": 1.2181093845497148e-07, + "loss": 1.123, + "step": 32415 + }, + { + "epoch": 0.951788126137765, + "grad_norm": 0.0, + "learning_rate": 1.216630062059454e-07, + "loss": 1.103, + "step": 32416 + }, + { + "epoch": 0.9518174878149039, + "grad_norm": 0.0, + "learning_rate": 1.2151516328924862e-07, + "loss": 1.1479, + "step": 32417 + }, + { + "epoch": 0.951846849492043, + "grad_norm": 0.0, + "learning_rate": 1.213674097062223e-07, + "loss": 1.1982, + "step": 32418 + }, + { + "epoch": 0.951876211169182, + "grad_norm": 0.0, + "learning_rate": 1.212197454581987e-07, + "loss": 1.2109, + "step": 32419 + }, + { + "epoch": 0.9519055728463209, + "grad_norm": 0.0, + "learning_rate": 1.2107217054651566e-07, + "loss": 1.1553, + "step": 32420 + }, + { + "epoch": 0.95193493452346, + "grad_norm": 0.0, + "learning_rate": 1.2092468497250653e-07, + "loss": 1.1094, + "step": 32421 + }, + { + "epoch": 0.951964296200599, + "grad_norm": 0.0, + "learning_rate": 1.2077728873750583e-07, + "loss": 1.2988, + "step": 32422 + }, + { + "epoch": 0.9519936578777379, + "grad_norm": 0.0, + "learning_rate": 1.206299818428469e-07, + "loss": 1.228, + "step": 32423 + }, + { + "epoch": 0.952023019554877, + "grad_norm": 0.0, + "learning_rate": 1.2048276428986095e-07, + "loss": 1.2598, + "step": 32424 + }, + { + "epoch": 0.952052381232016, + "grad_norm": 0.0, + "learning_rate": 1.2033563607987907e-07, + "loss": 1.2285, + "step": 32425 + }, + { + "epoch": 0.9520817429091549, + "grad_norm": 0.0, + "learning_rate": 1.2018859721423249e-07, + "loss": 1.2061, + "step": 32426 + }, + { + "epoch": 0.952111104586294, + "grad_norm": 0.0, + "learning_rate": 1.200416476942512e-07, + "loss": 1.2393, + "step": 32427 + }, + { + "epoch": 0.952140466263433, + "grad_norm": 0.0, + "learning_rate": 1.1989478752126193e-07, + "loss": 1.1069, + "step": 32428 + }, + { + "epoch": 0.9521698279405719, + "grad_norm": 0.0, + "learning_rate": 1.1974801669659587e-07, + "loss": 1.1772, + "step": 32429 + }, + { + "epoch": 0.952199189617711, + "grad_norm": 0.0, + "learning_rate": 1.1960133522157856e-07, + "loss": 1.2744, + "step": 32430 + }, + { + "epoch": 0.95222855129485, + "grad_norm": 0.0, + "learning_rate": 1.194547430975368e-07, + "loss": 1.1484, + "step": 32431 + }, + { + "epoch": 0.9522579129719889, + "grad_norm": 0.0, + "learning_rate": 1.1930824032579613e-07, + "loss": 1.2432, + "step": 32432 + }, + { + "epoch": 0.952287274649128, + "grad_norm": 0.0, + "learning_rate": 1.1916182690768218e-07, + "loss": 1.2734, + "step": 32433 + }, + { + "epoch": 0.952316636326267, + "grad_norm": 0.0, + "learning_rate": 1.1901550284451835e-07, + "loss": 1.2354, + "step": 32434 + }, + { + "epoch": 0.9523459980034059, + "grad_norm": 0.0, + "learning_rate": 1.1886926813762578e-07, + "loss": 1.2378, + "step": 32435 + }, + { + "epoch": 0.952375359680545, + "grad_norm": 0.0, + "learning_rate": 1.187231227883312e-07, + "loss": 1.2227, + "step": 32436 + }, + { + "epoch": 0.952404721357684, + "grad_norm": 0.0, + "learning_rate": 1.1857706679795356e-07, + "loss": 1.2358, + "step": 32437 + }, + { + "epoch": 0.9524340830348229, + "grad_norm": 0.0, + "learning_rate": 1.1843110016781401e-07, + "loss": 1.0894, + "step": 32438 + }, + { + "epoch": 0.952463444711962, + "grad_norm": 0.0, + "learning_rate": 1.182852228992315e-07, + "loss": 1.1191, + "step": 32439 + }, + { + "epoch": 0.9524928063891009, + "grad_norm": 0.0, + "learning_rate": 1.1813943499352831e-07, + "loss": 1.1982, + "step": 32440 + }, + { + "epoch": 0.9525221680662399, + "grad_norm": 0.0, + "learning_rate": 1.1799373645202117e-07, + "loss": 1.3096, + "step": 32441 + }, + { + "epoch": 0.952551529743379, + "grad_norm": 0.0, + "learning_rate": 1.1784812727602679e-07, + "loss": 1.0713, + "step": 32442 + }, + { + "epoch": 0.9525808914205179, + "grad_norm": 0.0, + "learning_rate": 1.1770260746686191e-07, + "loss": 1.25, + "step": 32443 + }, + { + "epoch": 0.9526102530976569, + "grad_norm": 0.0, + "learning_rate": 1.1755717702584434e-07, + "loss": 1.1641, + "step": 32444 + }, + { + "epoch": 0.952639614774796, + "grad_norm": 0.0, + "learning_rate": 1.1741183595428752e-07, + "loss": 1.1797, + "step": 32445 + }, + { + "epoch": 0.9526689764519349, + "grad_norm": 0.0, + "learning_rate": 1.172665842535059e-07, + "loss": 1.2515, + "step": 32446 + }, + { + "epoch": 0.9526983381290739, + "grad_norm": 0.0, + "learning_rate": 1.1712142192481512e-07, + "loss": 1.124, + "step": 32447 + }, + { + "epoch": 0.952727699806213, + "grad_norm": 0.0, + "learning_rate": 1.1697634896952526e-07, + "loss": 1.2549, + "step": 32448 + }, + { + "epoch": 0.9527570614833519, + "grad_norm": 0.0, + "learning_rate": 1.1683136538894968e-07, + "loss": 1.0703, + "step": 32449 + }, + { + "epoch": 0.9527864231604909, + "grad_norm": 0.0, + "learning_rate": 1.1668647118439847e-07, + "loss": 1.0732, + "step": 32450 + }, + { + "epoch": 0.95281578483763, + "grad_norm": 0.0, + "learning_rate": 1.1654166635718389e-07, + "loss": 1.2285, + "step": 32451 + }, + { + "epoch": 0.9528451465147689, + "grad_norm": 0.0, + "learning_rate": 1.163969509086138e-07, + "loss": 1.1714, + "step": 32452 + }, + { + "epoch": 0.9528745081919079, + "grad_norm": 0.0, + "learning_rate": 1.1625232483999716e-07, + "loss": 1.1831, + "step": 32453 + }, + { + "epoch": 0.952903869869047, + "grad_norm": 0.0, + "learning_rate": 1.1610778815264179e-07, + "loss": 1.1768, + "step": 32454 + }, + { + "epoch": 0.9529332315461859, + "grad_norm": 0.0, + "learning_rate": 1.1596334084785443e-07, + "loss": 1.0918, + "step": 32455 + }, + { + "epoch": 0.9529625932233249, + "grad_norm": 0.0, + "learning_rate": 1.1581898292694294e-07, + "loss": 1.2158, + "step": 32456 + }, + { + "epoch": 0.952991954900464, + "grad_norm": 0.0, + "learning_rate": 1.156747143912118e-07, + "loss": 1.2051, + "step": 32457 + }, + { + "epoch": 0.9530213165776029, + "grad_norm": 0.0, + "learning_rate": 1.1553053524196556e-07, + "loss": 1.2881, + "step": 32458 + }, + { + "epoch": 0.9530506782547419, + "grad_norm": 0.0, + "learning_rate": 1.1538644548050759e-07, + "loss": 1.1982, + "step": 32459 + }, + { + "epoch": 0.953080039931881, + "grad_norm": 0.0, + "learning_rate": 1.1524244510814242e-07, + "loss": 1.2314, + "step": 32460 + }, + { + "epoch": 0.9531094016090199, + "grad_norm": 0.0, + "learning_rate": 1.1509853412617012e-07, + "loss": 1.1421, + "step": 32461 + }, + { + "epoch": 0.9531387632861589, + "grad_norm": 0.0, + "learning_rate": 1.1495471253589407e-07, + "loss": 1.1021, + "step": 32462 + }, + { + "epoch": 0.953168124963298, + "grad_norm": 0.0, + "learning_rate": 1.1481098033861327e-07, + "loss": 1.251, + "step": 32463 + }, + { + "epoch": 0.9531974866404369, + "grad_norm": 0.0, + "learning_rate": 1.1466733753562997e-07, + "loss": 1.208, + "step": 32464 + }, + { + "epoch": 0.9532268483175759, + "grad_norm": 0.0, + "learning_rate": 1.1452378412824094e-07, + "loss": 1.1558, + "step": 32465 + }, + { + "epoch": 0.9532562099947149, + "grad_norm": 0.0, + "learning_rate": 1.1438032011774514e-07, + "loss": 1.1768, + "step": 32466 + }, + { + "epoch": 0.9532855716718539, + "grad_norm": 0.0, + "learning_rate": 1.142369455054393e-07, + "loss": 1.1392, + "step": 32467 + }, + { + "epoch": 0.9533149333489929, + "grad_norm": 0.0, + "learning_rate": 1.1409366029262126e-07, + "loss": 1.0771, + "step": 32468 + }, + { + "epoch": 0.9533442950261319, + "grad_norm": 0.0, + "learning_rate": 1.1395046448058556e-07, + "loss": 1.1772, + "step": 32469 + }, + { + "epoch": 0.9533736567032709, + "grad_norm": 0.0, + "learning_rate": 1.1380735807062781e-07, + "loss": 1.186, + "step": 32470 + }, + { + "epoch": 0.9534030183804099, + "grad_norm": 0.0, + "learning_rate": 1.1366434106404145e-07, + "loss": 1.2285, + "step": 32471 + }, + { + "epoch": 0.9534323800575489, + "grad_norm": 0.0, + "learning_rate": 1.1352141346212098e-07, + "loss": 1.104, + "step": 32472 + }, + { + "epoch": 0.9534617417346879, + "grad_norm": 0.0, + "learning_rate": 1.133785752661587e-07, + "loss": 1.2158, + "step": 32473 + }, + { + "epoch": 0.9534911034118269, + "grad_norm": 0.0, + "learning_rate": 1.132358264774458e-07, + "loss": 1.207, + "step": 32474 + }, + { + "epoch": 0.9535204650889659, + "grad_norm": 0.0, + "learning_rate": 1.1309316709727237e-07, + "loss": 1.2656, + "step": 32475 + }, + { + "epoch": 0.9535498267661049, + "grad_norm": 0.0, + "learning_rate": 1.1295059712693068e-07, + "loss": 1.168, + "step": 32476 + }, + { + "epoch": 0.9535791884432439, + "grad_norm": 0.0, + "learning_rate": 1.1280811656770862e-07, + "loss": 1.147, + "step": 32477 + }, + { + "epoch": 0.9536085501203829, + "grad_norm": 0.0, + "learning_rate": 1.1266572542089404e-07, + "loss": 1.2812, + "step": 32478 + }, + { + "epoch": 0.9536379117975219, + "grad_norm": 0.0, + "learning_rate": 1.1252342368777591e-07, + "loss": 1.1699, + "step": 32479 + }, + { + "epoch": 0.9536672734746608, + "grad_norm": 0.0, + "learning_rate": 1.1238121136964097e-07, + "loss": 1.1719, + "step": 32480 + }, + { + "epoch": 0.9536966351517999, + "grad_norm": 0.0, + "learning_rate": 1.1223908846777597e-07, + "loss": 1.2783, + "step": 32481 + }, + { + "epoch": 0.9537259968289389, + "grad_norm": 0.0, + "learning_rate": 1.1209705498346434e-07, + "loss": 1.1528, + "step": 32482 + }, + { + "epoch": 0.9537553585060778, + "grad_norm": 0.0, + "learning_rate": 1.119551109179906e-07, + "loss": 1.1523, + "step": 32483 + }, + { + "epoch": 0.9537847201832169, + "grad_norm": 0.0, + "learning_rate": 1.1181325627263928e-07, + "loss": 1.165, + "step": 32484 + }, + { + "epoch": 0.9538140818603559, + "grad_norm": 0.0, + "learning_rate": 1.116714910486938e-07, + "loss": 1.0977, + "step": 32485 + }, + { + "epoch": 0.9538434435374948, + "grad_norm": 0.0, + "learning_rate": 1.1152981524743534e-07, + "loss": 1.1299, + "step": 32486 + }, + { + "epoch": 0.9538728052146339, + "grad_norm": 0.0, + "learning_rate": 1.1138822887014511e-07, + "loss": 1.2725, + "step": 32487 + }, + { + "epoch": 0.9539021668917729, + "grad_norm": 0.0, + "learning_rate": 1.112467319181032e-07, + "loss": 1.2417, + "step": 32488 + }, + { + "epoch": 0.9539315285689118, + "grad_norm": 0.0, + "learning_rate": 1.111053243925897e-07, + "loss": 1.0767, + "step": 32489 + }, + { + "epoch": 0.9539608902460509, + "grad_norm": 0.0, + "learning_rate": 1.1096400629488468e-07, + "loss": 1.1943, + "step": 32490 + }, + { + "epoch": 0.9539902519231899, + "grad_norm": 0.0, + "learning_rate": 1.1082277762626381e-07, + "loss": 1.1714, + "step": 32491 + }, + { + "epoch": 0.9540196136003288, + "grad_norm": 0.0, + "learning_rate": 1.1068163838800494e-07, + "loss": 1.252, + "step": 32492 + }, + { + "epoch": 0.9540489752774679, + "grad_norm": 0.0, + "learning_rate": 1.1054058858138483e-07, + "loss": 1.1885, + "step": 32493 + }, + { + "epoch": 0.9540783369546069, + "grad_norm": 0.0, + "learning_rate": 1.1039962820767913e-07, + "loss": 1.1255, + "step": 32494 + }, + { + "epoch": 0.9541076986317458, + "grad_norm": 0.0, + "learning_rate": 1.1025875726816237e-07, + "loss": 1.3457, + "step": 32495 + }, + { + "epoch": 0.9541370603088849, + "grad_norm": 0.0, + "learning_rate": 1.1011797576410798e-07, + "loss": 1.2275, + "step": 32496 + }, + { + "epoch": 0.9541664219860239, + "grad_norm": 0.0, + "learning_rate": 1.099772836967905e-07, + "loss": 1.1724, + "step": 32497 + }, + { + "epoch": 0.9541957836631628, + "grad_norm": 0.0, + "learning_rate": 1.0983668106748113e-07, + "loss": 1.1357, + "step": 32498 + }, + { + "epoch": 0.9542251453403018, + "grad_norm": 0.0, + "learning_rate": 1.0969616787745108e-07, + "loss": 1.3164, + "step": 32499 + }, + { + "epoch": 0.9542545070174409, + "grad_norm": 0.0, + "learning_rate": 1.0955574412797154e-07, + "loss": 1.1553, + "step": 32500 + }, + { + "epoch": 0.9542838686945798, + "grad_norm": 0.0, + "learning_rate": 1.0941540982031262e-07, + "loss": 1.2695, + "step": 32501 + }, + { + "epoch": 0.9543132303717188, + "grad_norm": 0.0, + "learning_rate": 1.0927516495574219e-07, + "loss": 1.0742, + "step": 32502 + }, + { + "epoch": 0.9543425920488579, + "grad_norm": 0.0, + "learning_rate": 1.0913500953553147e-07, + "loss": 1.3765, + "step": 32503 + }, + { + "epoch": 0.9543719537259968, + "grad_norm": 0.0, + "learning_rate": 1.0899494356094387e-07, + "loss": 1.2153, + "step": 32504 + }, + { + "epoch": 0.9544013154031358, + "grad_norm": 0.0, + "learning_rate": 1.0885496703324949e-07, + "loss": 1.2061, + "step": 32505 + }, + { + "epoch": 0.9544306770802748, + "grad_norm": 0.0, + "learning_rate": 1.0871507995371288e-07, + "loss": 1.2529, + "step": 32506 + }, + { + "epoch": 0.9544600387574138, + "grad_norm": 0.0, + "learning_rate": 1.085752823235986e-07, + "loss": 1.1011, + "step": 32507 + }, + { + "epoch": 0.9544894004345528, + "grad_norm": 0.0, + "learning_rate": 1.0843557414417116e-07, + "loss": 1.146, + "step": 32508 + }, + { + "epoch": 0.9545187621116918, + "grad_norm": 0.0, + "learning_rate": 1.0829595541669402e-07, + "loss": 1.1963, + "step": 32509 + }, + { + "epoch": 0.9545481237888308, + "grad_norm": 0.0, + "learning_rate": 1.0815642614242949e-07, + "loss": 1.229, + "step": 32510 + }, + { + "epoch": 0.9545774854659698, + "grad_norm": 0.0, + "learning_rate": 1.080169863226399e-07, + "loss": 1.2671, + "step": 32511 + }, + { + "epoch": 0.9546068471431088, + "grad_norm": 0.0, + "learning_rate": 1.0787763595858647e-07, + "loss": 1.1729, + "step": 32512 + }, + { + "epoch": 0.9546362088202478, + "grad_norm": 0.0, + "learning_rate": 1.0773837505152928e-07, + "loss": 1.1299, + "step": 32513 + }, + { + "epoch": 0.9546655704973868, + "grad_norm": 0.0, + "learning_rate": 1.0759920360272736e-07, + "loss": 1.2271, + "step": 32514 + }, + { + "epoch": 0.9546949321745258, + "grad_norm": 0.0, + "learning_rate": 1.0746012161343854e-07, + "loss": 1.4443, + "step": 32515 + }, + { + "epoch": 0.9547242938516648, + "grad_norm": 0.0, + "learning_rate": 1.0732112908492187e-07, + "loss": 1.2412, + "step": 32516 + }, + { + "epoch": 0.9547536555288038, + "grad_norm": 0.0, + "learning_rate": 1.0718222601843408e-07, + "loss": 1.082, + "step": 32517 + }, + { + "epoch": 0.9547830172059428, + "grad_norm": 0.0, + "learning_rate": 1.0704341241523086e-07, + "loss": 1.1206, + "step": 32518 + }, + { + "epoch": 0.9548123788830818, + "grad_norm": 0.0, + "learning_rate": 1.0690468827656675e-07, + "loss": 1.2471, + "step": 32519 + }, + { + "epoch": 0.9548417405602208, + "grad_norm": 0.0, + "learning_rate": 1.0676605360369851e-07, + "loss": 1.1099, + "step": 32520 + }, + { + "epoch": 0.9548711022373598, + "grad_norm": 0.0, + "learning_rate": 1.0662750839787739e-07, + "loss": 1.3262, + "step": 32521 + }, + { + "epoch": 0.9549004639144988, + "grad_norm": 0.0, + "learning_rate": 1.0648905266035902e-07, + "loss": 1.1104, + "step": 32522 + }, + { + "epoch": 0.9549298255916377, + "grad_norm": 0.0, + "learning_rate": 1.0635068639239243e-07, + "loss": 1.1152, + "step": 32523 + }, + { + "epoch": 0.9549591872687768, + "grad_norm": 0.0, + "learning_rate": 1.0621240959523104e-07, + "loss": 1.0835, + "step": 32524 + }, + { + "epoch": 0.9549885489459158, + "grad_norm": 0.0, + "learning_rate": 1.0607422227012499e-07, + "loss": 1.2349, + "step": 32525 + }, + { + "epoch": 0.9550179106230547, + "grad_norm": 0.0, + "learning_rate": 1.0593612441832213e-07, + "loss": 1.1772, + "step": 32526 + }, + { + "epoch": 0.9550472723001938, + "grad_norm": 0.0, + "learning_rate": 1.057981160410737e-07, + "loss": 1.1172, + "step": 32527 + }, + { + "epoch": 0.9550766339773328, + "grad_norm": 0.0, + "learning_rate": 1.056601971396265e-07, + "loss": 1.2715, + "step": 32528 + }, + { + "epoch": 0.9551059956544717, + "grad_norm": 0.0, + "learning_rate": 1.055223677152295e-07, + "loss": 1.1533, + "step": 32529 + }, + { + "epoch": 0.9551353573316108, + "grad_norm": 0.0, + "learning_rate": 1.0538462776912727e-07, + "loss": 1.147, + "step": 32530 + }, + { + "epoch": 0.9551647190087498, + "grad_norm": 0.0, + "learning_rate": 1.052469773025655e-07, + "loss": 1.2324, + "step": 32531 + }, + { + "epoch": 0.9551940806858887, + "grad_norm": 0.0, + "learning_rate": 1.0510941631678873e-07, + "loss": 1.2227, + "step": 32532 + }, + { + "epoch": 0.9552234423630278, + "grad_norm": 0.0, + "learning_rate": 1.0497194481304263e-07, + "loss": 1.1572, + "step": 32533 + }, + { + "epoch": 0.9552528040401668, + "grad_norm": 0.0, + "learning_rate": 1.0483456279256842e-07, + "loss": 1.1943, + "step": 32534 + }, + { + "epoch": 0.9552821657173057, + "grad_norm": 0.0, + "learning_rate": 1.0469727025660959e-07, + "loss": 1.1943, + "step": 32535 + }, + { + "epoch": 0.9553115273944448, + "grad_norm": 0.0, + "learning_rate": 1.0456006720640844e-07, + "loss": 1.1265, + "step": 32536 + }, + { + "epoch": 0.9553408890715838, + "grad_norm": 0.0, + "learning_rate": 1.0442295364320398e-07, + "loss": 1.3154, + "step": 32537 + }, + { + "epoch": 0.9553702507487227, + "grad_norm": 0.0, + "learning_rate": 1.0428592956823747e-07, + "loss": 1.1543, + "step": 32538 + }, + { + "epoch": 0.9553996124258618, + "grad_norm": 0.0, + "learning_rate": 1.0414899498274788e-07, + "loss": 1.188, + "step": 32539 + }, + { + "epoch": 0.9554289741030008, + "grad_norm": 0.0, + "learning_rate": 1.0401214988797426e-07, + "loss": 1.229, + "step": 32540 + }, + { + "epoch": 0.9554583357801397, + "grad_norm": 0.0, + "learning_rate": 1.0387539428515115e-07, + "loss": 1.166, + "step": 32541 + }, + { + "epoch": 0.9554876974572788, + "grad_norm": 0.0, + "learning_rate": 1.0373872817551867e-07, + "loss": 1.208, + "step": 32542 + }, + { + "epoch": 0.9555170591344178, + "grad_norm": 0.0, + "learning_rate": 1.0360215156031028e-07, + "loss": 1.2695, + "step": 32543 + }, + { + "epoch": 0.9555464208115567, + "grad_norm": 0.0, + "learning_rate": 1.0346566444076279e-07, + "loss": 1.2422, + "step": 32544 + }, + { + "epoch": 0.9555757824886958, + "grad_norm": 0.0, + "learning_rate": 1.0332926681810961e-07, + "loss": 1.2539, + "step": 32545 + }, + { + "epoch": 0.9556051441658348, + "grad_norm": 0.0, + "learning_rate": 1.0319295869358426e-07, + "loss": 1.1899, + "step": 32546 + }, + { + "epoch": 0.9556345058429737, + "grad_norm": 0.0, + "learning_rate": 1.0305674006842015e-07, + "loss": 1.2256, + "step": 32547 + }, + { + "epoch": 0.9556638675201128, + "grad_norm": 0.0, + "learning_rate": 1.0292061094384854e-07, + "loss": 1.248, + "step": 32548 + }, + { + "epoch": 0.9556932291972517, + "grad_norm": 0.0, + "learning_rate": 1.0278457132110064e-07, + "loss": 1.2051, + "step": 32549 + }, + { + "epoch": 0.9557225908743907, + "grad_norm": 0.0, + "learning_rate": 1.026486212014055e-07, + "loss": 1.1934, + "step": 32550 + }, + { + "epoch": 0.9557519525515298, + "grad_norm": 0.0, + "learning_rate": 1.0251276058599435e-07, + "loss": 1.1509, + "step": 32551 + }, + { + "epoch": 0.9557813142286687, + "grad_norm": 0.0, + "learning_rate": 1.023769894760951e-07, + "loss": 1.1689, + "step": 32552 + }, + { + "epoch": 0.9558106759058077, + "grad_norm": 0.0, + "learning_rate": 1.0224130787293562e-07, + "loss": 1.2334, + "step": 32553 + }, + { + "epoch": 0.9558400375829468, + "grad_norm": 0.0, + "learning_rate": 1.0210571577774275e-07, + "loss": 1.1978, + "step": 32554 + }, + { + "epoch": 0.9558693992600857, + "grad_norm": 0.0, + "learning_rate": 1.0197021319174327e-07, + "loss": 1.2861, + "step": 32555 + }, + { + "epoch": 0.9558987609372247, + "grad_norm": 0.0, + "learning_rate": 1.0183480011616176e-07, + "loss": 1.0942, + "step": 32556 + }, + { + "epoch": 0.9559281226143638, + "grad_norm": 0.0, + "learning_rate": 1.0169947655222278e-07, + "loss": 1.165, + "step": 32557 + }, + { + "epoch": 0.9559574842915027, + "grad_norm": 0.0, + "learning_rate": 1.0156424250115093e-07, + "loss": 1.1787, + "step": 32558 + }, + { + "epoch": 0.9559868459686417, + "grad_norm": 0.0, + "learning_rate": 1.0142909796416745e-07, + "loss": 1.2275, + "step": 32559 + }, + { + "epoch": 0.9560162076457808, + "grad_norm": 0.0, + "learning_rate": 1.012940429424969e-07, + "loss": 1.2104, + "step": 32560 + }, + { + "epoch": 0.9560455693229197, + "grad_norm": 0.0, + "learning_rate": 1.0115907743735832e-07, + "loss": 1.2134, + "step": 32561 + }, + { + "epoch": 0.9560749310000587, + "grad_norm": 0.0, + "learning_rate": 1.0102420144997404e-07, + "loss": 1.1973, + "step": 32562 + }, + { + "epoch": 0.9561042926771978, + "grad_norm": 0.0, + "learning_rate": 1.0088941498156424e-07, + "loss": 1.1875, + "step": 32563 + }, + { + "epoch": 0.9561336543543367, + "grad_norm": 0.0, + "learning_rate": 1.0075471803334569e-07, + "loss": 1.229, + "step": 32564 + }, + { + "epoch": 0.9561630160314757, + "grad_norm": 0.0, + "learning_rate": 1.0062011060653742e-07, + "loss": 1.3535, + "step": 32565 + }, + { + "epoch": 0.9561923777086148, + "grad_norm": 0.0, + "learning_rate": 1.0048559270235625e-07, + "loss": 1.1914, + "step": 32566 + }, + { + "epoch": 0.9562217393857537, + "grad_norm": 0.0, + "learning_rate": 1.0035116432202008e-07, + "loss": 1.1992, + "step": 32567 + }, + { + "epoch": 0.9562511010628927, + "grad_norm": 0.0, + "learning_rate": 1.0021682546674238e-07, + "loss": 1.1182, + "step": 32568 + }, + { + "epoch": 0.9562804627400318, + "grad_norm": 0.0, + "learning_rate": 1.0008257613773998e-07, + "loss": 1.1543, + "step": 32569 + }, + { + "epoch": 0.9563098244171707, + "grad_norm": 0.0, + "learning_rate": 9.994841633622632e-08, + "loss": 1.2031, + "step": 32570 + }, + { + "epoch": 0.9563391860943097, + "grad_norm": 0.0, + "learning_rate": 9.981434606341489e-08, + "loss": 1.2583, + "step": 32571 + }, + { + "epoch": 0.9563685477714488, + "grad_norm": 0.0, + "learning_rate": 9.968036532051805e-08, + "loss": 1.1768, + "step": 32572 + }, + { + "epoch": 0.9563979094485877, + "grad_norm": 0.0, + "learning_rate": 9.954647410874596e-08, + "loss": 1.2119, + "step": 32573 + }, + { + "epoch": 0.9564272711257267, + "grad_norm": 0.0, + "learning_rate": 9.941267242931208e-08, + "loss": 1.2314, + "step": 32574 + }, + { + "epoch": 0.9564566328028657, + "grad_norm": 0.0, + "learning_rate": 9.927896028342432e-08, + "loss": 1.2021, + "step": 32575 + }, + { + "epoch": 0.9564859944800047, + "grad_norm": 0.0, + "learning_rate": 9.914533767229173e-08, + "loss": 1.2959, + "step": 32576 + }, + { + "epoch": 0.9565153561571437, + "grad_norm": 0.0, + "learning_rate": 9.901180459712446e-08, + "loss": 1.1602, + "step": 32577 + }, + { + "epoch": 0.9565447178342827, + "grad_norm": 0.0, + "learning_rate": 9.887836105912929e-08, + "loss": 1.2061, + "step": 32578 + }, + { + "epoch": 0.9565740795114217, + "grad_norm": 0.0, + "learning_rate": 9.874500705951195e-08, + "loss": 1.2827, + "step": 32579 + }, + { + "epoch": 0.9566034411885607, + "grad_norm": 0.0, + "learning_rate": 9.861174259948037e-08, + "loss": 1.1953, + "step": 32580 + }, + { + "epoch": 0.9566328028656997, + "grad_norm": 0.0, + "learning_rate": 9.847856768023801e-08, + "loss": 1.1855, + "step": 32581 + }, + { + "epoch": 0.9566621645428387, + "grad_norm": 0.0, + "learning_rate": 9.834548230298946e-08, + "loss": 1.2656, + "step": 32582 + }, + { + "epoch": 0.9566915262199777, + "grad_norm": 0.0, + "learning_rate": 9.821248646893933e-08, + "loss": 1.3018, + "step": 32583 + }, + { + "epoch": 0.9567208878971167, + "grad_norm": 0.0, + "learning_rate": 9.807958017928776e-08, + "loss": 1.2061, + "step": 32584 + }, + { + "epoch": 0.9567502495742557, + "grad_norm": 0.0, + "learning_rate": 9.794676343524045e-08, + "loss": 1.1611, + "step": 32585 + }, + { + "epoch": 0.9567796112513947, + "grad_norm": 0.0, + "learning_rate": 9.781403623799535e-08, + "loss": 1.1592, + "step": 32586 + }, + { + "epoch": 0.9568089729285337, + "grad_norm": 0.0, + "learning_rate": 9.76813985887548e-08, + "loss": 1.3535, + "step": 32587 + }, + { + "epoch": 0.9568383346056727, + "grad_norm": 0.0, + "learning_rate": 9.754885048871676e-08, + "loss": 1.2217, + "step": 32588 + }, + { + "epoch": 0.9568676962828117, + "grad_norm": 0.0, + "learning_rate": 9.741639193908137e-08, + "loss": 1.0459, + "step": 32589 + }, + { + "epoch": 0.9568970579599507, + "grad_norm": 0.0, + "learning_rate": 9.728402294104433e-08, + "loss": 1.1431, + "step": 32590 + }, + { + "epoch": 0.9569264196370897, + "grad_norm": 0.0, + "learning_rate": 9.715174349580469e-08, + "loss": 1.1426, + "step": 32591 + }, + { + "epoch": 0.9569557813142286, + "grad_norm": 0.0, + "learning_rate": 9.701955360455817e-08, + "loss": 1.1382, + "step": 32592 + }, + { + "epoch": 0.9569851429913677, + "grad_norm": 0.0, + "learning_rate": 9.688745326849935e-08, + "loss": 1.1436, + "step": 32593 + }, + { + "epoch": 0.9570145046685067, + "grad_norm": 0.0, + "learning_rate": 9.675544248882507e-08, + "loss": 1.29, + "step": 32594 + }, + { + "epoch": 0.9570438663456456, + "grad_norm": 0.0, + "learning_rate": 9.662352126672548e-08, + "loss": 1.3477, + "step": 32595 + }, + { + "epoch": 0.9570732280227847, + "grad_norm": 0.0, + "learning_rate": 9.649168960339739e-08, + "loss": 1.188, + "step": 32596 + }, + { + "epoch": 0.9571025896999237, + "grad_norm": 0.0, + "learning_rate": 9.635994750003098e-08, + "loss": 1.2998, + "step": 32597 + }, + { + "epoch": 0.9571319513770626, + "grad_norm": 0.0, + "learning_rate": 9.622829495781749e-08, + "loss": 1.3389, + "step": 32598 + }, + { + "epoch": 0.9571613130542016, + "grad_norm": 0.0, + "learning_rate": 9.609673197794711e-08, + "loss": 1.2686, + "step": 32599 + }, + { + "epoch": 0.9571906747313407, + "grad_norm": 0.0, + "learning_rate": 9.596525856161221e-08, + "loss": 1.2441, + "step": 32600 + }, + { + "epoch": 0.9572200364084796, + "grad_norm": 0.0, + "learning_rate": 9.58338747099985e-08, + "loss": 1.2124, + "step": 32601 + }, + { + "epoch": 0.9572493980856186, + "grad_norm": 0.0, + "learning_rate": 9.570258042429615e-08, + "loss": 1.0986, + "step": 32602 + }, + { + "epoch": 0.9572787597627577, + "grad_norm": 0.0, + "learning_rate": 9.557137570569197e-08, + "loss": 1.25, + "step": 32603 + }, + { + "epoch": 0.9573081214398966, + "grad_norm": 0.0, + "learning_rate": 9.54402605553717e-08, + "loss": 1.3428, + "step": 32604 + }, + { + "epoch": 0.9573374831170356, + "grad_norm": 0.0, + "learning_rate": 9.530923497452326e-08, + "loss": 1.1543, + "step": 32605 + }, + { + "epoch": 0.9573668447941747, + "grad_norm": 0.0, + "learning_rate": 9.517829896432795e-08, + "loss": 1.29, + "step": 32606 + }, + { + "epoch": 0.9573962064713136, + "grad_norm": 0.0, + "learning_rate": 9.504745252597369e-08, + "loss": 1.2441, + "step": 32607 + }, + { + "epoch": 0.9574255681484526, + "grad_norm": 0.0, + "learning_rate": 9.491669566064065e-08, + "loss": 1.1626, + "step": 32608 + }, + { + "epoch": 0.9574549298255917, + "grad_norm": 0.0, + "learning_rate": 9.478602836951345e-08, + "loss": 1.2305, + "step": 32609 + }, + { + "epoch": 0.9574842915027306, + "grad_norm": 0.0, + "learning_rate": 9.465545065377335e-08, + "loss": 1.2197, + "step": 32610 + }, + { + "epoch": 0.9575136531798696, + "grad_norm": 0.0, + "learning_rate": 9.452496251460052e-08, + "loss": 1.1709, + "step": 32611 + }, + { + "epoch": 0.9575430148570087, + "grad_norm": 0.0, + "learning_rate": 9.439456395317514e-08, + "loss": 1.1182, + "step": 32612 + }, + { + "epoch": 0.9575723765341476, + "grad_norm": 0.0, + "learning_rate": 9.426425497067626e-08, + "loss": 1.2441, + "step": 32613 + }, + { + "epoch": 0.9576017382112866, + "grad_norm": 0.0, + "learning_rate": 9.413403556828294e-08, + "loss": 1.1992, + "step": 32614 + }, + { + "epoch": 0.9576310998884257, + "grad_norm": 0.0, + "learning_rate": 9.4003905747172e-08, + "loss": 1.1567, + "step": 32615 + }, + { + "epoch": 0.9576604615655646, + "grad_norm": 0.0, + "learning_rate": 9.38738655085214e-08, + "loss": 1.2354, + "step": 32616 + }, + { + "epoch": 0.9576898232427036, + "grad_norm": 0.0, + "learning_rate": 9.374391485350576e-08, + "loss": 1.1694, + "step": 32617 + }, + { + "epoch": 0.9577191849198426, + "grad_norm": 0.0, + "learning_rate": 9.361405378330191e-08, + "loss": 1.2139, + "step": 32618 + }, + { + "epoch": 0.9577485465969816, + "grad_norm": 0.0, + "learning_rate": 9.348428229908225e-08, + "loss": 1.1343, + "step": 32619 + }, + { + "epoch": 0.9577779082741206, + "grad_norm": 0.0, + "learning_rate": 9.335460040202249e-08, + "loss": 1.1846, + "step": 32620 + }, + { + "epoch": 0.9578072699512596, + "grad_norm": 0.0, + "learning_rate": 9.322500809329393e-08, + "loss": 1.1812, + "step": 32621 + }, + { + "epoch": 0.9578366316283986, + "grad_norm": 0.0, + "learning_rate": 9.309550537406786e-08, + "loss": 1.2695, + "step": 32622 + }, + { + "epoch": 0.9578659933055376, + "grad_norm": 0.0, + "learning_rate": 9.296609224551667e-08, + "loss": 1.3281, + "step": 32623 + }, + { + "epoch": 0.9578953549826766, + "grad_norm": 0.0, + "learning_rate": 9.283676870881053e-08, + "loss": 1.2383, + "step": 32624 + }, + { + "epoch": 0.9579247166598156, + "grad_norm": 0.0, + "learning_rate": 9.270753476511851e-08, + "loss": 1.1924, + "step": 32625 + }, + { + "epoch": 0.9579540783369546, + "grad_norm": 0.0, + "learning_rate": 9.257839041560968e-08, + "loss": 1.3003, + "step": 32626 + }, + { + "epoch": 0.9579834400140936, + "grad_norm": 0.0, + "learning_rate": 9.244933566145198e-08, + "loss": 1.0938, + "step": 32627 + }, + { + "epoch": 0.9580128016912326, + "grad_norm": 0.0, + "learning_rate": 9.232037050381226e-08, + "loss": 1.1885, + "step": 32628 + }, + { + "epoch": 0.9580421633683716, + "grad_norm": 0.0, + "learning_rate": 9.219149494385738e-08, + "loss": 1.1196, + "step": 32629 + }, + { + "epoch": 0.9580715250455106, + "grad_norm": 0.0, + "learning_rate": 9.206270898275082e-08, + "loss": 1.2559, + "step": 32630 + }, + { + "epoch": 0.9581008867226496, + "grad_norm": 0.0, + "learning_rate": 9.193401262165946e-08, + "loss": 1.0752, + "step": 32631 + }, + { + "epoch": 0.9581302483997886, + "grad_norm": 0.0, + "learning_rate": 9.18054058617468e-08, + "loss": 1.2046, + "step": 32632 + }, + { + "epoch": 0.9581596100769276, + "grad_norm": 0.0, + "learning_rate": 9.167688870417413e-08, + "loss": 1.1162, + "step": 32633 + }, + { + "epoch": 0.9581889717540666, + "grad_norm": 0.0, + "learning_rate": 9.154846115010607e-08, + "loss": 1.1235, + "step": 32634 + }, + { + "epoch": 0.9582183334312055, + "grad_norm": 0.0, + "learning_rate": 9.142012320070171e-08, + "loss": 1.3252, + "step": 32635 + }, + { + "epoch": 0.9582476951083446, + "grad_norm": 0.0, + "learning_rate": 9.129187485712343e-08, + "loss": 1.1548, + "step": 32636 + }, + { + "epoch": 0.9582770567854836, + "grad_norm": 0.0, + "learning_rate": 9.116371612053144e-08, + "loss": 1.2471, + "step": 32637 + }, + { + "epoch": 0.9583064184626225, + "grad_norm": 0.0, + "learning_rate": 9.103564699208256e-08, + "loss": 1.0889, + "step": 32638 + }, + { + "epoch": 0.9583357801397616, + "grad_norm": 0.0, + "learning_rate": 9.090766747293589e-08, + "loss": 1.2471, + "step": 32639 + }, + { + "epoch": 0.9583651418169006, + "grad_norm": 0.0, + "learning_rate": 9.077977756424938e-08, + "loss": 1.1509, + "step": 32640 + }, + { + "epoch": 0.9583945034940395, + "grad_norm": 0.0, + "learning_rate": 9.065197726717877e-08, + "loss": 1.2075, + "step": 32641 + }, + { + "epoch": 0.9584238651711786, + "grad_norm": 0.0, + "learning_rate": 9.052426658287983e-08, + "loss": 1.1597, + "step": 32642 + }, + { + "epoch": 0.9584532268483176, + "grad_norm": 0.0, + "learning_rate": 9.039664551250826e-08, + "loss": 1.2178, + "step": 32643 + }, + { + "epoch": 0.9584825885254565, + "grad_norm": 0.0, + "learning_rate": 9.026911405721761e-08, + "loss": 1.0938, + "step": 32644 + }, + { + "epoch": 0.9585119502025956, + "grad_norm": 0.0, + "learning_rate": 9.01416722181625e-08, + "loss": 1.1992, + "step": 32645 + }, + { + "epoch": 0.9585413118797346, + "grad_norm": 0.0, + "learning_rate": 9.001431999649312e-08, + "loss": 1.3115, + "step": 32646 + }, + { + "epoch": 0.9585706735568735, + "grad_norm": 0.0, + "learning_rate": 8.98870573933619e-08, + "loss": 1.209, + "step": 32647 + }, + { + "epoch": 0.9586000352340126, + "grad_norm": 0.0, + "learning_rate": 8.975988440992012e-08, + "loss": 1.1543, + "step": 32648 + }, + { + "epoch": 0.9586293969111516, + "grad_norm": 0.0, + "learning_rate": 8.963280104731797e-08, + "loss": 1.1641, + "step": 32649 + }, + { + "epoch": 0.9586587585882905, + "grad_norm": 0.0, + "learning_rate": 8.950580730670455e-08, + "loss": 1.1299, + "step": 32650 + }, + { + "epoch": 0.9586881202654296, + "grad_norm": 0.0, + "learning_rate": 8.937890318922892e-08, + "loss": 1.1636, + "step": 32651 + }, + { + "epoch": 0.9587174819425686, + "grad_norm": 0.0, + "learning_rate": 8.925208869603686e-08, + "loss": 1.1279, + "step": 32652 + }, + { + "epoch": 0.9587468436197075, + "grad_norm": 0.0, + "learning_rate": 8.912536382827741e-08, + "loss": 1.2393, + "step": 32653 + }, + { + "epoch": 0.9587762052968466, + "grad_norm": 0.0, + "learning_rate": 8.899872858709525e-08, + "loss": 1.1719, + "step": 32654 + }, + { + "epoch": 0.9588055669739856, + "grad_norm": 0.0, + "learning_rate": 8.887218297363498e-08, + "loss": 1.1606, + "step": 32655 + }, + { + "epoch": 0.9588349286511245, + "grad_norm": 0.0, + "learning_rate": 8.874572698904238e-08, + "loss": 1.2192, + "step": 32656 + }, + { + "epoch": 0.9588642903282636, + "grad_norm": 0.0, + "learning_rate": 8.861936063445985e-08, + "loss": 1.0962, + "step": 32657 + }, + { + "epoch": 0.9588936520054026, + "grad_norm": 0.0, + "learning_rate": 8.849308391103206e-08, + "loss": 1.2041, + "step": 32658 + }, + { + "epoch": 0.9589230136825415, + "grad_norm": 0.0, + "learning_rate": 8.836689681989807e-08, + "loss": 1.1689, + "step": 32659 + }, + { + "epoch": 0.9589523753596806, + "grad_norm": 0.0, + "learning_rate": 8.824079936220143e-08, + "loss": 1.291, + "step": 32660 + }, + { + "epoch": 0.9589817370368195, + "grad_norm": 0.0, + "learning_rate": 8.811479153908232e-08, + "loss": 1.1943, + "step": 32661 + }, + { + "epoch": 0.9590110987139585, + "grad_norm": 0.0, + "learning_rate": 8.798887335167983e-08, + "loss": 1.168, + "step": 32662 + }, + { + "epoch": 0.9590404603910976, + "grad_norm": 0.0, + "learning_rate": 8.786304480113195e-08, + "loss": 1.1816, + "step": 32663 + }, + { + "epoch": 0.9590698220682365, + "grad_norm": 0.0, + "learning_rate": 8.773730588857665e-08, + "loss": 1.1353, + "step": 32664 + }, + { + "epoch": 0.9590991837453755, + "grad_norm": 0.0, + "learning_rate": 8.761165661515192e-08, + "loss": 1.1943, + "step": 32665 + }, + { + "epoch": 0.9591285454225146, + "grad_norm": 0.0, + "learning_rate": 8.74860969819924e-08, + "loss": 1.2695, + "step": 32666 + }, + { + "epoch": 0.9591579070996535, + "grad_norm": 0.0, + "learning_rate": 8.736062699023606e-08, + "loss": 1.1138, + "step": 32667 + }, + { + "epoch": 0.9591872687767925, + "grad_norm": 0.0, + "learning_rate": 8.723524664101535e-08, + "loss": 1.2695, + "step": 32668 + }, + { + "epoch": 0.9592166304539316, + "grad_norm": 0.0, + "learning_rate": 8.7109955935466e-08, + "loss": 1.2397, + "step": 32669 + }, + { + "epoch": 0.9592459921310705, + "grad_norm": 0.0, + "learning_rate": 8.698475487471936e-08, + "loss": 1.1343, + "step": 32670 + }, + { + "epoch": 0.9592753538082095, + "grad_norm": 0.0, + "learning_rate": 8.685964345990894e-08, + "loss": 1.1729, + "step": 32671 + }, + { + "epoch": 0.9593047154853486, + "grad_norm": 0.0, + "learning_rate": 8.673462169216607e-08, + "loss": 1.2734, + "step": 32672 + }, + { + "epoch": 0.9593340771624875, + "grad_norm": 0.0, + "learning_rate": 8.660968957261873e-08, + "loss": 1.2432, + "step": 32673 + }, + { + "epoch": 0.9593634388396265, + "grad_norm": 0.0, + "learning_rate": 8.648484710240046e-08, + "loss": 1.2334, + "step": 32674 + }, + { + "epoch": 0.9593928005167656, + "grad_norm": 0.0, + "learning_rate": 8.636009428263814e-08, + "loss": 1.1621, + "step": 32675 + }, + { + "epoch": 0.9594221621939045, + "grad_norm": 0.0, + "learning_rate": 8.623543111446087e-08, + "loss": 1.2271, + "step": 32676 + }, + { + "epoch": 0.9594515238710435, + "grad_norm": 0.0, + "learning_rate": 8.61108575989944e-08, + "loss": 1.2349, + "step": 32677 + }, + { + "epoch": 0.9594808855481826, + "grad_norm": 0.0, + "learning_rate": 8.598637373736785e-08, + "loss": 1.1309, + "step": 32678 + }, + { + "epoch": 0.9595102472253215, + "grad_norm": 0.0, + "learning_rate": 8.586197953070474e-08, + "loss": 1.1592, + "step": 32679 + }, + { + "epoch": 0.9595396089024605, + "grad_norm": 0.0, + "learning_rate": 8.573767498013085e-08, + "loss": 1.1919, + "step": 32680 + }, + { + "epoch": 0.9595689705795996, + "grad_norm": 0.0, + "learning_rate": 8.561346008677085e-08, + "loss": 1.186, + "step": 32681 + }, + { + "epoch": 0.9595983322567385, + "grad_norm": 0.0, + "learning_rate": 8.548933485174604e-08, + "loss": 1.2275, + "step": 32682 + }, + { + "epoch": 0.9596276939338775, + "grad_norm": 0.0, + "learning_rate": 8.536529927618108e-08, + "loss": 1.0483, + "step": 32683 + }, + { + "epoch": 0.9596570556110166, + "grad_norm": 0.0, + "learning_rate": 8.524135336119732e-08, + "loss": 1.2178, + "step": 32684 + }, + { + "epoch": 0.9596864172881555, + "grad_norm": 0.0, + "learning_rate": 8.511749710791495e-08, + "loss": 0.9756, + "step": 32685 + }, + { + "epoch": 0.9597157789652945, + "grad_norm": 0.0, + "learning_rate": 8.499373051745419e-08, + "loss": 1.2129, + "step": 32686 + }, + { + "epoch": 0.9597451406424335, + "grad_norm": 0.0, + "learning_rate": 8.487005359093525e-08, + "loss": 1.2969, + "step": 32687 + }, + { + "epoch": 0.9597745023195725, + "grad_norm": 0.0, + "learning_rate": 8.474646632947392e-08, + "loss": 1.3604, + "step": 32688 + }, + { + "epoch": 0.9598038639967115, + "grad_norm": 0.0, + "learning_rate": 8.462296873419151e-08, + "loss": 1.2061, + "step": 32689 + }, + { + "epoch": 0.9598332256738505, + "grad_norm": 0.0, + "learning_rate": 8.449956080620158e-08, + "loss": 1.2231, + "step": 32690 + }, + { + "epoch": 0.9598625873509895, + "grad_norm": 0.0, + "learning_rate": 8.437624254662213e-08, + "loss": 1.2026, + "step": 32691 + }, + { + "epoch": 0.9598919490281285, + "grad_norm": 0.0, + "learning_rate": 8.425301395656782e-08, + "loss": 1.2568, + "step": 32692 + }, + { + "epoch": 0.9599213107052675, + "grad_norm": 0.0, + "learning_rate": 8.41298750371522e-08, + "loss": 1.2402, + "step": 32693 + }, + { + "epoch": 0.9599506723824065, + "grad_norm": 0.0, + "learning_rate": 8.400682578948993e-08, + "loss": 1.2832, + "step": 32694 + }, + { + "epoch": 0.9599800340595455, + "grad_norm": 0.0, + "learning_rate": 8.388386621469457e-08, + "loss": 1.2012, + "step": 32695 + }, + { + "epoch": 0.9600093957366845, + "grad_norm": 0.0, + "learning_rate": 8.376099631387635e-08, + "loss": 1.2114, + "step": 32696 + }, + { + "epoch": 0.9600387574138235, + "grad_norm": 0.0, + "learning_rate": 8.363821608814659e-08, + "loss": 1.2012, + "step": 32697 + }, + { + "epoch": 0.9600681190909625, + "grad_norm": 0.0, + "learning_rate": 8.351552553861552e-08, + "loss": 1.3188, + "step": 32698 + }, + { + "epoch": 0.9600974807681014, + "grad_norm": 0.0, + "learning_rate": 8.339292466639448e-08, + "loss": 1.2593, + "step": 32699 + }, + { + "epoch": 0.9601268424452405, + "grad_norm": 0.0, + "learning_rate": 8.327041347259035e-08, + "loss": 1.2471, + "step": 32700 + }, + { + "epoch": 0.9601562041223795, + "grad_norm": 0.0, + "learning_rate": 8.314799195831114e-08, + "loss": 1.2246, + "step": 32701 + }, + { + "epoch": 0.9601855657995184, + "grad_norm": 0.0, + "learning_rate": 8.302566012466595e-08, + "loss": 1.2207, + "step": 32702 + }, + { + "epoch": 0.9602149274766575, + "grad_norm": 0.0, + "learning_rate": 8.290341797275835e-08, + "loss": 1.3027, + "step": 32703 + }, + { + "epoch": 0.9602442891537964, + "grad_norm": 0.0, + "learning_rate": 8.278126550369414e-08, + "loss": 1.1851, + "step": 32704 + }, + { + "epoch": 0.9602736508309354, + "grad_norm": 0.0, + "learning_rate": 8.265920271858019e-08, + "loss": 1.2148, + "step": 32705 + }, + { + "epoch": 0.9603030125080745, + "grad_norm": 0.0, + "learning_rate": 8.253722961851785e-08, + "loss": 1.1436, + "step": 32706 + }, + { + "epoch": 0.9603323741852134, + "grad_norm": 0.0, + "learning_rate": 8.241534620461178e-08, + "loss": 1.1943, + "step": 32707 + }, + { + "epoch": 0.9603617358623524, + "grad_norm": 0.0, + "learning_rate": 8.229355247796334e-08, + "loss": 1.2354, + "step": 32708 + }, + { + "epoch": 0.9603910975394915, + "grad_norm": 0.0, + "learning_rate": 8.217184843967496e-08, + "loss": 1.1221, + "step": 32709 + }, + { + "epoch": 0.9604204592166304, + "grad_norm": 0.0, + "learning_rate": 8.205023409084578e-08, + "loss": 1.2568, + "step": 32710 + }, + { + "epoch": 0.9604498208937694, + "grad_norm": 0.0, + "learning_rate": 8.192870943257714e-08, + "loss": 1.2051, + "step": 32711 + }, + { + "epoch": 0.9604791825709085, + "grad_norm": 0.0, + "learning_rate": 8.180727446596704e-08, + "loss": 1.2617, + "step": 32712 + }, + { + "epoch": 0.9605085442480474, + "grad_norm": 0.0, + "learning_rate": 8.168592919211349e-08, + "loss": 1.2622, + "step": 32713 + }, + { + "epoch": 0.9605379059251864, + "grad_norm": 0.0, + "learning_rate": 8.156467361211451e-08, + "loss": 1.1714, + "step": 32714 + }, + { + "epoch": 0.9605672676023255, + "grad_norm": 0.0, + "learning_rate": 8.144350772706588e-08, + "loss": 1.1675, + "step": 32715 + }, + { + "epoch": 0.9605966292794644, + "grad_norm": 0.0, + "learning_rate": 8.132243153806452e-08, + "loss": 1.0449, + "step": 32716 + }, + { + "epoch": 0.9606259909566034, + "grad_norm": 0.0, + "learning_rate": 8.120144504620398e-08, + "loss": 1.2676, + "step": 32717 + }, + { + "epoch": 0.9606553526337425, + "grad_norm": 0.0, + "learning_rate": 8.108054825257894e-08, + "loss": 1.1748, + "step": 32718 + }, + { + "epoch": 0.9606847143108814, + "grad_norm": 0.0, + "learning_rate": 8.095974115828297e-08, + "loss": 1.3022, + "step": 32719 + }, + { + "epoch": 0.9607140759880204, + "grad_norm": 0.0, + "learning_rate": 8.083902376440855e-08, + "loss": 1.2959, + "step": 32720 + }, + { + "epoch": 0.9607434376651595, + "grad_norm": 0.0, + "learning_rate": 8.071839607204812e-08, + "loss": 1.2437, + "step": 32721 + }, + { + "epoch": 0.9607727993422984, + "grad_norm": 0.0, + "learning_rate": 8.059785808228971e-08, + "loss": 1.3262, + "step": 32722 + }, + { + "epoch": 0.9608021610194374, + "grad_norm": 0.0, + "learning_rate": 8.047740979622687e-08, + "loss": 1.3008, + "step": 32723 + }, + { + "epoch": 0.9608315226965765, + "grad_norm": 0.0, + "learning_rate": 8.035705121494542e-08, + "loss": 1.2139, + "step": 32724 + }, + { + "epoch": 0.9608608843737154, + "grad_norm": 0.0, + "learning_rate": 8.023678233953669e-08, + "loss": 1.106, + "step": 32725 + }, + { + "epoch": 0.9608902460508544, + "grad_norm": 0.0, + "learning_rate": 8.01166031710876e-08, + "loss": 1.2373, + "step": 32726 + }, + { + "epoch": 0.9609196077279935, + "grad_norm": 0.0, + "learning_rate": 7.999651371068507e-08, + "loss": 1.2637, + "step": 32727 + }, + { + "epoch": 0.9609489694051324, + "grad_norm": 0.0, + "learning_rate": 7.987651395941265e-08, + "loss": 1.1323, + "step": 32728 + }, + { + "epoch": 0.9609783310822714, + "grad_norm": 0.0, + "learning_rate": 7.97566039183595e-08, + "loss": 1.2754, + "step": 32729 + }, + { + "epoch": 0.9610076927594104, + "grad_norm": 0.0, + "learning_rate": 7.963678358860693e-08, + "loss": 1.1377, + "step": 32730 + }, + { + "epoch": 0.9610370544365494, + "grad_norm": 0.0, + "learning_rate": 7.951705297123969e-08, + "loss": 1.2534, + "step": 32731 + }, + { + "epoch": 0.9610664161136884, + "grad_norm": 0.0, + "learning_rate": 7.93974120673402e-08, + "loss": 1.1177, + "step": 32732 + }, + { + "epoch": 0.9610957777908274, + "grad_norm": 0.0, + "learning_rate": 7.927786087799094e-08, + "loss": 1.2471, + "step": 32733 + }, + { + "epoch": 0.9611251394679664, + "grad_norm": 0.0, + "learning_rate": 7.915839940427217e-08, + "loss": 1.1875, + "step": 32734 + }, + { + "epoch": 0.9611545011451054, + "grad_norm": 0.0, + "learning_rate": 7.903902764726634e-08, + "loss": 1.2188, + "step": 32735 + }, + { + "epoch": 0.9611838628222444, + "grad_norm": 0.0, + "learning_rate": 7.891974560805038e-08, + "loss": 1.1235, + "step": 32736 + }, + { + "epoch": 0.9612132244993834, + "grad_norm": 0.0, + "learning_rate": 7.880055328770342e-08, + "loss": 1.3506, + "step": 32737 + }, + { + "epoch": 0.9612425861765224, + "grad_norm": 0.0, + "learning_rate": 7.868145068730459e-08, + "loss": 1.1538, + "step": 32738 + }, + { + "epoch": 0.9612719478536614, + "grad_norm": 0.0, + "learning_rate": 7.856243780792972e-08, + "loss": 1.2383, + "step": 32739 + }, + { + "epoch": 0.9613013095308004, + "grad_norm": 0.0, + "learning_rate": 7.84435146506557e-08, + "loss": 1.2188, + "step": 32740 + }, + { + "epoch": 0.9613306712079394, + "grad_norm": 0.0, + "learning_rate": 7.832468121655834e-08, + "loss": 1.2368, + "step": 32741 + }, + { + "epoch": 0.9613600328850784, + "grad_norm": 0.0, + "learning_rate": 7.820593750671124e-08, + "loss": 1.252, + "step": 32742 + }, + { + "epoch": 0.9613893945622174, + "grad_norm": 0.0, + "learning_rate": 7.80872835221902e-08, + "loss": 1.167, + "step": 32743 + }, + { + "epoch": 0.9614187562393564, + "grad_norm": 0.0, + "learning_rate": 7.796871926406657e-08, + "loss": 1.1865, + "step": 32744 + }, + { + "epoch": 0.9614481179164954, + "grad_norm": 0.0, + "learning_rate": 7.785024473341173e-08, + "loss": 1.2041, + "step": 32745 + }, + { + "epoch": 0.9614774795936344, + "grad_norm": 0.0, + "learning_rate": 7.773185993129928e-08, + "loss": 1.2378, + "step": 32746 + }, + { + "epoch": 0.9615068412707733, + "grad_norm": 0.0, + "learning_rate": 7.761356485879834e-08, + "loss": 1.103, + "step": 32747 + }, + { + "epoch": 0.9615362029479124, + "grad_norm": 0.0, + "learning_rate": 7.749535951697806e-08, + "loss": 1.3867, + "step": 32748 + }, + { + "epoch": 0.9615655646250514, + "grad_norm": 0.0, + "learning_rate": 7.737724390690981e-08, + "loss": 1.2705, + "step": 32749 + }, + { + "epoch": 0.9615949263021903, + "grad_norm": 0.0, + "learning_rate": 7.725921802966053e-08, + "loss": 1.3696, + "step": 32750 + }, + { + "epoch": 0.9616242879793294, + "grad_norm": 0.0, + "learning_rate": 7.714128188629711e-08, + "loss": 1.0923, + "step": 32751 + }, + { + "epoch": 0.9616536496564684, + "grad_norm": 0.0, + "learning_rate": 7.702343547788649e-08, + "loss": 1.2041, + "step": 32752 + }, + { + "epoch": 0.9616830113336073, + "grad_norm": 0.0, + "learning_rate": 7.690567880549338e-08, + "loss": 1.3379, + "step": 32753 + }, + { + "epoch": 0.9617123730107464, + "grad_norm": 0.0, + "learning_rate": 7.67880118701847e-08, + "loss": 1.2383, + "step": 32754 + }, + { + "epoch": 0.9617417346878854, + "grad_norm": 0.0, + "learning_rate": 7.667043467302182e-08, + "loss": 1.2007, + "step": 32755 + }, + { + "epoch": 0.9617710963650243, + "grad_norm": 0.0, + "learning_rate": 7.655294721507056e-08, + "loss": 1.1743, + "step": 32756 + }, + { + "epoch": 0.9618004580421634, + "grad_norm": 0.0, + "learning_rate": 7.64355494973923e-08, + "loss": 1.085, + "step": 32757 + }, + { + "epoch": 0.9618298197193024, + "grad_norm": 0.0, + "learning_rate": 7.63182415210495e-08, + "loss": 1.1973, + "step": 32758 + }, + { + "epoch": 0.9618591813964413, + "grad_norm": 0.0, + "learning_rate": 7.620102328710244e-08, + "loss": 1.2617, + "step": 32759 + }, + { + "epoch": 0.9618885430735804, + "grad_norm": 0.0, + "learning_rate": 7.608389479661027e-08, + "loss": 1.2178, + "step": 32760 + }, + { + "epoch": 0.9619179047507194, + "grad_norm": 0.0, + "learning_rate": 7.596685605063325e-08, + "loss": 1.1167, + "step": 32761 + }, + { + "epoch": 0.9619472664278583, + "grad_norm": 0.0, + "learning_rate": 7.584990705022944e-08, + "loss": 1.2256, + "step": 32762 + }, + { + "epoch": 0.9619766281049974, + "grad_norm": 0.0, + "learning_rate": 7.573304779645574e-08, + "loss": 1.2334, + "step": 32763 + }, + { + "epoch": 0.9620059897821364, + "grad_norm": 0.0, + "learning_rate": 7.561627829037021e-08, + "loss": 1.1089, + "step": 32764 + }, + { + "epoch": 0.9620353514592753, + "grad_norm": 0.0, + "learning_rate": 7.549959853302979e-08, + "loss": 1.1904, + "step": 32765 + }, + { + "epoch": 0.9620647131364144, + "grad_norm": 0.0, + "learning_rate": 7.538300852548586e-08, + "loss": 1.1411, + "step": 32766 + }, + { + "epoch": 0.9620940748135534, + "grad_norm": 0.0, + "learning_rate": 7.526650826879644e-08, + "loss": 1.1367, + "step": 32767 + }, + { + "epoch": 0.9621234364906923, + "grad_norm": 0.0, + "learning_rate": 7.515009776401405e-08, + "loss": 1.335, + "step": 32768 + }, + { + "epoch": 0.9621527981678314, + "grad_norm": 0.0, + "learning_rate": 7.503377701219117e-08, + "loss": 1.2139, + "step": 32769 + }, + { + "epoch": 0.9621821598449704, + "grad_norm": 0.0, + "learning_rate": 7.491754601437917e-08, + "loss": 1.2344, + "step": 32770 + }, + { + "epoch": 0.9622115215221093, + "grad_norm": 0.0, + "learning_rate": 7.480140477162944e-08, + "loss": 1.1709, + "step": 32771 + }, + { + "epoch": 0.9622408831992484, + "grad_norm": 0.0, + "learning_rate": 7.468535328499337e-08, + "loss": 1.1006, + "step": 32772 + }, + { + "epoch": 0.9622702448763873, + "grad_norm": 0.0, + "learning_rate": 7.45693915555179e-08, + "loss": 1.1348, + "step": 32773 + }, + { + "epoch": 0.9622996065535263, + "grad_norm": 0.0, + "learning_rate": 7.44535195842544e-08, + "loss": 1.293, + "step": 32774 + }, + { + "epoch": 0.9623289682306654, + "grad_norm": 0.0, + "learning_rate": 7.43377373722498e-08, + "loss": 1.1553, + "step": 32775 + }, + { + "epoch": 0.9623583299078043, + "grad_norm": 0.0, + "learning_rate": 7.422204492055218e-08, + "loss": 1.2539, + "step": 32776 + }, + { + "epoch": 0.9623876915849433, + "grad_norm": 0.0, + "learning_rate": 7.410644223020402e-08, + "loss": 1.127, + "step": 32777 + }, + { + "epoch": 0.9624170532620824, + "grad_norm": 0.0, + "learning_rate": 7.399092930225559e-08, + "loss": 1.312, + "step": 32778 + }, + { + "epoch": 0.9624464149392213, + "grad_norm": 0.0, + "learning_rate": 7.38755061377483e-08, + "loss": 1.2969, + "step": 32779 + }, + { + "epoch": 0.9624757766163603, + "grad_norm": 0.0, + "learning_rate": 7.376017273772685e-08, + "loss": 1.1025, + "step": 32780 + }, + { + "epoch": 0.9625051382934994, + "grad_norm": 0.0, + "learning_rate": 7.364492910323484e-08, + "loss": 1.2314, + "step": 32781 + }, + { + "epoch": 0.9625344999706383, + "grad_norm": 0.0, + "learning_rate": 7.352977523531257e-08, + "loss": 1.2002, + "step": 32782 + }, + { + "epoch": 0.9625638616477773, + "grad_norm": 0.0, + "learning_rate": 7.341471113500475e-08, + "loss": 1.2383, + "step": 32783 + }, + { + "epoch": 0.9625932233249164, + "grad_norm": 0.0, + "learning_rate": 7.329973680334834e-08, + "loss": 1.2646, + "step": 32784 + }, + { + "epoch": 0.9626225850020553, + "grad_norm": 0.0, + "learning_rate": 7.318485224138583e-08, + "loss": 1.2163, + "step": 32785 + }, + { + "epoch": 0.9626519466791943, + "grad_norm": 0.0, + "learning_rate": 7.307005745015416e-08, + "loss": 1.2793, + "step": 32786 + }, + { + "epoch": 0.9626813083563334, + "grad_norm": 0.0, + "learning_rate": 7.295535243069251e-08, + "loss": 1.1211, + "step": 32787 + }, + { + "epoch": 0.9627106700334723, + "grad_norm": 0.0, + "learning_rate": 7.284073718403784e-08, + "loss": 1.0757, + "step": 32788 + }, + { + "epoch": 0.9627400317106113, + "grad_norm": 0.0, + "learning_rate": 7.272621171122707e-08, + "loss": 1.1685, + "step": 32789 + }, + { + "epoch": 0.9627693933877504, + "grad_norm": 0.0, + "learning_rate": 7.261177601329606e-08, + "loss": 1.1675, + "step": 32790 + }, + { + "epoch": 0.9627987550648893, + "grad_norm": 0.0, + "learning_rate": 7.24974300912773e-08, + "loss": 1.1631, + "step": 32791 + }, + { + "epoch": 0.9628281167420283, + "grad_norm": 0.0, + "learning_rate": 7.238317394620886e-08, + "loss": 1.2212, + "step": 32792 + }, + { + "epoch": 0.9628574784191674, + "grad_norm": 0.0, + "learning_rate": 7.226900757912103e-08, + "loss": 1.2773, + "step": 32793 + }, + { + "epoch": 0.9628868400963063, + "grad_norm": 0.0, + "learning_rate": 7.215493099104742e-08, + "loss": 1.1724, + "step": 32794 + }, + { + "epoch": 0.9629162017734453, + "grad_norm": 0.0, + "learning_rate": 7.204094418301833e-08, + "loss": 1.21, + "step": 32795 + }, + { + "epoch": 0.9629455634505844, + "grad_norm": 0.0, + "learning_rate": 7.192704715606625e-08, + "loss": 1.1768, + "step": 32796 + }, + { + "epoch": 0.9629749251277233, + "grad_norm": 0.0, + "learning_rate": 7.181323991122035e-08, + "loss": 1.2236, + "step": 32797 + }, + { + "epoch": 0.9630042868048623, + "grad_norm": 0.0, + "learning_rate": 7.169952244951095e-08, + "loss": 1.2188, + "step": 32798 + }, + { + "epoch": 0.9630336484820012, + "grad_norm": 0.0, + "learning_rate": 7.158589477196499e-08, + "loss": 1.168, + "step": 32799 + }, + { + "epoch": 0.9630630101591403, + "grad_norm": 0.0, + "learning_rate": 7.147235687960941e-08, + "loss": 1.1172, + "step": 32800 + }, + { + "epoch": 0.9630923718362793, + "grad_norm": 0.0, + "learning_rate": 7.13589087734734e-08, + "loss": 1.2495, + "step": 32801 + }, + { + "epoch": 0.9631217335134182, + "grad_norm": 0.0, + "learning_rate": 7.12455504545806e-08, + "loss": 1.2422, + "step": 32802 + }, + { + "epoch": 0.9631510951905573, + "grad_norm": 0.0, + "learning_rate": 7.113228192395793e-08, + "loss": 1.291, + "step": 32803 + }, + { + "epoch": 0.9631804568676963, + "grad_norm": 0.0, + "learning_rate": 7.101910318262795e-08, + "loss": 1.2217, + "step": 32804 + }, + { + "epoch": 0.9632098185448352, + "grad_norm": 0.0, + "learning_rate": 7.090601423161536e-08, + "loss": 1.125, + "step": 32805 + }, + { + "epoch": 0.9632391802219743, + "grad_norm": 0.0, + "learning_rate": 7.079301507194158e-08, + "loss": 1.1592, + "step": 32806 + }, + { + "epoch": 0.9632685418991133, + "grad_norm": 0.0, + "learning_rate": 7.068010570463135e-08, + "loss": 1.186, + "step": 32807 + }, + { + "epoch": 0.9632979035762522, + "grad_norm": 0.0, + "learning_rate": 7.056728613070273e-08, + "loss": 1.1997, + "step": 32808 + }, + { + "epoch": 0.9633272652533913, + "grad_norm": 0.0, + "learning_rate": 7.045455635117825e-08, + "loss": 1.251, + "step": 32809 + }, + { + "epoch": 0.9633566269305303, + "grad_norm": 0.0, + "learning_rate": 7.034191636707488e-08, + "loss": 1.1338, + "step": 32810 + }, + { + "epoch": 0.9633859886076692, + "grad_norm": 0.0, + "learning_rate": 7.02293661794129e-08, + "loss": 1.2676, + "step": 32811 + }, + { + "epoch": 0.9634153502848083, + "grad_norm": 0.0, + "learning_rate": 7.011690578921038e-08, + "loss": 1.1665, + "step": 32812 + }, + { + "epoch": 0.9634447119619473, + "grad_norm": 0.0, + "learning_rate": 7.000453519748318e-08, + "loss": 1.1187, + "step": 32813 + }, + { + "epoch": 0.9634740736390862, + "grad_norm": 0.0, + "learning_rate": 6.989225440524827e-08, + "loss": 1.1787, + "step": 32814 + }, + { + "epoch": 0.9635034353162253, + "grad_norm": 0.0, + "learning_rate": 6.97800634135204e-08, + "loss": 1.1953, + "step": 32815 + }, + { + "epoch": 0.9635327969933642, + "grad_norm": 0.0, + "learning_rate": 6.96679622233154e-08, + "loss": 1.1787, + "step": 32816 + }, + { + "epoch": 0.9635621586705032, + "grad_norm": 0.0, + "learning_rate": 6.955595083564582e-08, + "loss": 1.1299, + "step": 32817 + }, + { + "epoch": 0.9635915203476423, + "grad_norm": 0.0, + "learning_rate": 6.944402925152526e-08, + "loss": 1.2334, + "step": 32818 + }, + { + "epoch": 0.9636208820247812, + "grad_norm": 0.0, + "learning_rate": 6.933219747196518e-08, + "loss": 1.3203, + "step": 32819 + }, + { + "epoch": 0.9636502437019202, + "grad_norm": 0.0, + "learning_rate": 6.922045549797807e-08, + "loss": 1.207, + "step": 32820 + }, + { + "epoch": 0.9636796053790593, + "grad_norm": 0.0, + "learning_rate": 6.910880333057312e-08, + "loss": 1.2051, + "step": 32821 + }, + { + "epoch": 0.9637089670561982, + "grad_norm": 0.0, + "learning_rate": 6.899724097075956e-08, + "loss": 1.1318, + "step": 32822 + }, + { + "epoch": 0.9637383287333372, + "grad_norm": 0.0, + "learning_rate": 6.888576841954875e-08, + "loss": 1.1807, + "step": 32823 + }, + { + "epoch": 0.9637676904104763, + "grad_norm": 0.0, + "learning_rate": 6.877438567794659e-08, + "loss": 1.2578, + "step": 32824 + }, + { + "epoch": 0.9637970520876152, + "grad_norm": 0.0, + "learning_rate": 6.866309274696225e-08, + "loss": 1.2559, + "step": 32825 + }, + { + "epoch": 0.9638264137647542, + "grad_norm": 0.0, + "learning_rate": 6.855188962760051e-08, + "loss": 1.0864, + "step": 32826 + }, + { + "epoch": 0.9638557754418933, + "grad_norm": 0.0, + "learning_rate": 6.84407763208672e-08, + "loss": 1.2119, + "step": 32827 + }, + { + "epoch": 0.9638851371190322, + "grad_norm": 0.0, + "learning_rate": 6.832975282776711e-08, + "loss": 1.1855, + "step": 32828 + }, + { + "epoch": 0.9639144987961712, + "grad_norm": 0.0, + "learning_rate": 6.821881914930495e-08, + "loss": 1.2861, + "step": 32829 + }, + { + "epoch": 0.9639438604733103, + "grad_norm": 0.0, + "learning_rate": 6.810797528648328e-08, + "loss": 1.0332, + "step": 32830 + }, + { + "epoch": 0.9639732221504492, + "grad_norm": 0.0, + "learning_rate": 6.799722124030461e-08, + "loss": 1.2524, + "step": 32831 + }, + { + "epoch": 0.9640025838275882, + "grad_norm": 0.0, + "learning_rate": 6.78865570117715e-08, + "loss": 1.228, + "step": 32832 + }, + { + "epoch": 0.9640319455047273, + "grad_norm": 0.0, + "learning_rate": 6.777598260188423e-08, + "loss": 1.3477, + "step": 32833 + }, + { + "epoch": 0.9640613071818662, + "grad_norm": 0.0, + "learning_rate": 6.766549801164202e-08, + "loss": 1.2334, + "step": 32834 + }, + { + "epoch": 0.9640906688590052, + "grad_norm": 0.0, + "learning_rate": 6.755510324204406e-08, + "loss": 1.1558, + "step": 32835 + }, + { + "epoch": 0.9641200305361443, + "grad_norm": 0.0, + "learning_rate": 6.744479829408845e-08, + "loss": 1.2466, + "step": 32836 + }, + { + "epoch": 0.9641493922132832, + "grad_norm": 0.0, + "learning_rate": 6.73345831687744e-08, + "loss": 1.3154, + "step": 32837 + }, + { + "epoch": 0.9641787538904222, + "grad_norm": 0.0, + "learning_rate": 6.722445786709664e-08, + "loss": 1.165, + "step": 32838 + }, + { + "epoch": 0.9642081155675613, + "grad_norm": 0.0, + "learning_rate": 6.711442239005328e-08, + "loss": 1.3213, + "step": 32839 + }, + { + "epoch": 0.9642374772447002, + "grad_norm": 0.0, + "learning_rate": 6.700447673863686e-08, + "loss": 1.1592, + "step": 32840 + }, + { + "epoch": 0.9642668389218392, + "grad_norm": 0.0, + "learning_rate": 6.689462091384324e-08, + "loss": 1.1299, + "step": 32841 + }, + { + "epoch": 0.9642962005989782, + "grad_norm": 0.0, + "learning_rate": 6.678485491666497e-08, + "loss": 1.1626, + "step": 32842 + }, + { + "epoch": 0.9643255622761172, + "grad_norm": 0.0, + "learning_rate": 6.66751787480957e-08, + "loss": 1.1963, + "step": 32843 + }, + { + "epoch": 0.9643549239532562, + "grad_norm": 0.0, + "learning_rate": 6.656559240912686e-08, + "loss": 1.2588, + "step": 32844 + }, + { + "epoch": 0.9643842856303952, + "grad_norm": 0.0, + "learning_rate": 6.645609590074876e-08, + "loss": 1.21, + "step": 32845 + }, + { + "epoch": 0.9644136473075342, + "grad_norm": 0.0, + "learning_rate": 6.634668922395283e-08, + "loss": 1.2568, + "step": 32846 + }, + { + "epoch": 0.9644430089846732, + "grad_norm": 0.0, + "learning_rate": 6.623737237972827e-08, + "loss": 1.2129, + "step": 32847 + }, + { + "epoch": 0.9644723706618122, + "grad_norm": 0.0, + "learning_rate": 6.61281453690621e-08, + "loss": 1.3037, + "step": 32848 + }, + { + "epoch": 0.9645017323389512, + "grad_norm": 0.0, + "learning_rate": 6.601900819294348e-08, + "loss": 1.1084, + "step": 32849 + }, + { + "epoch": 0.9645310940160902, + "grad_norm": 0.0, + "learning_rate": 6.590996085235946e-08, + "loss": 1.1743, + "step": 32850 + }, + { + "epoch": 0.9645604556932292, + "grad_norm": 0.0, + "learning_rate": 6.580100334829586e-08, + "loss": 1.1724, + "step": 32851 + }, + { + "epoch": 0.9645898173703682, + "grad_norm": 0.0, + "learning_rate": 6.569213568173749e-08, + "loss": 1.1987, + "step": 32852 + }, + { + "epoch": 0.9646191790475072, + "grad_norm": 0.0, + "learning_rate": 6.55833578536702e-08, + "loss": 0.9858, + "step": 32853 + }, + { + "epoch": 0.9646485407246462, + "grad_norm": 0.0, + "learning_rate": 6.547466986507545e-08, + "loss": 1.1045, + "step": 32854 + }, + { + "epoch": 0.9646779024017852, + "grad_norm": 0.0, + "learning_rate": 6.536607171693799e-08, + "loss": 1.1694, + "step": 32855 + }, + { + "epoch": 0.9647072640789242, + "grad_norm": 0.0, + "learning_rate": 6.525756341024036e-08, + "loss": 1.1851, + "step": 32856 + }, + { + "epoch": 0.9647366257560632, + "grad_norm": 0.0, + "learning_rate": 6.514914494596292e-08, + "loss": 1.1875, + "step": 32857 + }, + { + "epoch": 0.9647659874332022, + "grad_norm": 0.0, + "learning_rate": 6.504081632508597e-08, + "loss": 1.2085, + "step": 32858 + }, + { + "epoch": 0.9647953491103411, + "grad_norm": 0.0, + "learning_rate": 6.493257754858873e-08, + "loss": 1.1709, + "step": 32859 + }, + { + "epoch": 0.9648247107874802, + "grad_norm": 0.0, + "learning_rate": 6.482442861745041e-08, + "loss": 1.1533, + "step": 32860 + }, + { + "epoch": 0.9648540724646192, + "grad_norm": 0.0, + "learning_rate": 6.471636953264915e-08, + "loss": 1.165, + "step": 32861 + }, + { + "epoch": 0.9648834341417581, + "grad_norm": 0.0, + "learning_rate": 6.460840029516302e-08, + "loss": 1.2529, + "step": 32862 + }, + { + "epoch": 0.9649127958188972, + "grad_norm": 0.0, + "learning_rate": 6.450052090596681e-08, + "loss": 1.1572, + "step": 32863 + }, + { + "epoch": 0.9649421574960362, + "grad_norm": 0.0, + "learning_rate": 6.439273136603752e-08, + "loss": 1.2812, + "step": 32864 + }, + { + "epoch": 0.9649715191731751, + "grad_norm": 0.0, + "learning_rate": 6.428503167634881e-08, + "loss": 1.3301, + "step": 32865 + }, + { + "epoch": 0.9650008808503142, + "grad_norm": 0.0, + "learning_rate": 6.417742183787545e-08, + "loss": 1.1304, + "step": 32866 + }, + { + "epoch": 0.9650302425274532, + "grad_norm": 0.0, + "learning_rate": 6.406990185159112e-08, + "loss": 1.2637, + "step": 32867 + }, + { + "epoch": 0.9650596042045921, + "grad_norm": 0.0, + "learning_rate": 6.396247171846615e-08, + "loss": 1.1641, + "step": 32868 + }, + { + "epoch": 0.9650889658817312, + "grad_norm": 0.0, + "learning_rate": 6.385513143947309e-08, + "loss": 1.2578, + "step": 32869 + }, + { + "epoch": 0.9651183275588702, + "grad_norm": 0.0, + "learning_rate": 6.374788101558338e-08, + "loss": 1.1167, + "step": 32870 + }, + { + "epoch": 0.9651476892360091, + "grad_norm": 0.0, + "learning_rate": 6.364072044776626e-08, + "loss": 1.2803, + "step": 32871 + }, + { + "epoch": 0.9651770509131482, + "grad_norm": 0.0, + "learning_rate": 6.353364973699094e-08, + "loss": 1.144, + "step": 32872 + }, + { + "epoch": 0.9652064125902872, + "grad_norm": 0.0, + "learning_rate": 6.342666888422445e-08, + "loss": 1.2979, + "step": 32873 + }, + { + "epoch": 0.9652357742674261, + "grad_norm": 0.0, + "learning_rate": 6.331977789043708e-08, + "loss": 1.124, + "step": 32874 + }, + { + "epoch": 0.9652651359445652, + "grad_norm": 0.0, + "learning_rate": 6.321297675659254e-08, + "loss": 1.2104, + "step": 32875 + }, + { + "epoch": 0.9652944976217042, + "grad_norm": 0.0, + "learning_rate": 6.310626548365894e-08, + "loss": 1.1548, + "step": 32876 + }, + { + "epoch": 0.9653238592988431, + "grad_norm": 0.0, + "learning_rate": 6.299964407259995e-08, + "loss": 1.2295, + "step": 32877 + }, + { + "epoch": 0.9653532209759822, + "grad_norm": 0.0, + "learning_rate": 6.289311252437924e-08, + "loss": 1.3164, + "step": 32878 + }, + { + "epoch": 0.9653825826531212, + "grad_norm": 0.0, + "learning_rate": 6.27866708399616e-08, + "loss": 1.1943, + "step": 32879 + }, + { + "epoch": 0.9654119443302601, + "grad_norm": 0.0, + "learning_rate": 6.268031902030847e-08, + "loss": 1.3174, + "step": 32880 + }, + { + "epoch": 0.9654413060073992, + "grad_norm": 0.0, + "learning_rate": 6.257405706638353e-08, + "loss": 1.3486, + "step": 32881 + }, + { + "epoch": 0.9654706676845382, + "grad_norm": 0.0, + "learning_rate": 6.24678849791449e-08, + "loss": 1.1528, + "step": 32882 + }, + { + "epoch": 0.9655000293616771, + "grad_norm": 0.0, + "learning_rate": 6.236180275955517e-08, + "loss": 1.2583, + "step": 32883 + }, + { + "epoch": 0.9655293910388162, + "grad_norm": 0.0, + "learning_rate": 6.225581040857243e-08, + "loss": 1.1602, + "step": 32884 + }, + { + "epoch": 0.9655587527159551, + "grad_norm": 0.0, + "learning_rate": 6.214990792715592e-08, + "loss": 1.1006, + "step": 32885 + }, + { + "epoch": 0.9655881143930941, + "grad_norm": 0.0, + "learning_rate": 6.204409531626266e-08, + "loss": 1.2222, + "step": 32886 + }, + { + "epoch": 0.9656174760702332, + "grad_norm": 0.0, + "learning_rate": 6.193837257684964e-08, + "loss": 1.0854, + "step": 32887 + }, + { + "epoch": 0.9656468377473721, + "grad_norm": 0.0, + "learning_rate": 6.18327397098728e-08, + "loss": 1.1543, + "step": 32888 + }, + { + "epoch": 0.9656761994245111, + "grad_norm": 0.0, + "learning_rate": 6.172719671628802e-08, + "loss": 1.3838, + "step": 32889 + }, + { + "epoch": 0.9657055611016502, + "grad_norm": 0.0, + "learning_rate": 6.162174359705009e-08, + "loss": 1.1611, + "step": 32890 + }, + { + "epoch": 0.9657349227787891, + "grad_norm": 0.0, + "learning_rate": 6.151638035311158e-08, + "loss": 1.2695, + "step": 32891 + }, + { + "epoch": 0.9657642844559281, + "grad_norm": 0.0, + "learning_rate": 6.141110698542508e-08, + "loss": 1.2402, + "step": 32892 + }, + { + "epoch": 0.9657936461330672, + "grad_norm": 0.0, + "learning_rate": 6.130592349494313e-08, + "loss": 1.2285, + "step": 32893 + }, + { + "epoch": 0.9658230078102061, + "grad_norm": 0.0, + "learning_rate": 6.120082988261833e-08, + "loss": 1.0884, + "step": 32894 + }, + { + "epoch": 0.9658523694873451, + "grad_norm": 0.0, + "learning_rate": 6.10958261493988e-08, + "loss": 1.208, + "step": 32895 + }, + { + "epoch": 0.9658817311644842, + "grad_norm": 0.0, + "learning_rate": 6.099091229623489e-08, + "loss": 1.2207, + "step": 32896 + }, + { + "epoch": 0.9659110928416231, + "grad_norm": 0.0, + "learning_rate": 6.088608832407694e-08, + "loss": 1.2207, + "step": 32897 + }, + { + "epoch": 0.9659404545187621, + "grad_norm": 0.0, + "learning_rate": 6.078135423386977e-08, + "loss": 1.2148, + "step": 32898 + }, + { + "epoch": 0.9659698161959012, + "grad_norm": 0.0, + "learning_rate": 6.067671002656372e-08, + "loss": 1.2148, + "step": 32899 + }, + { + "epoch": 0.9659991778730401, + "grad_norm": 0.0, + "learning_rate": 6.05721557031036e-08, + "loss": 1.0151, + "step": 32900 + }, + { + "epoch": 0.9660285395501791, + "grad_norm": 0.0, + "learning_rate": 6.04676912644342e-08, + "loss": 1.1934, + "step": 32901 + }, + { + "epoch": 0.966057901227318, + "grad_norm": 0.0, + "learning_rate": 6.036331671150142e-08, + "loss": 1.1367, + "step": 32902 + }, + { + "epoch": 0.9660872629044571, + "grad_norm": 0.0, + "learning_rate": 6.025903204524897e-08, + "loss": 1.2451, + "step": 32903 + }, + { + "epoch": 0.9661166245815961, + "grad_norm": 0.0, + "learning_rate": 6.01548372666183e-08, + "loss": 1.2393, + "step": 32904 + }, + { + "epoch": 0.966145986258735, + "grad_norm": 0.0, + "learning_rate": 6.005073237655423e-08, + "loss": 1.1147, + "step": 32905 + }, + { + "epoch": 0.9661753479358741, + "grad_norm": 0.0, + "learning_rate": 5.994671737599711e-08, + "loss": 1.1206, + "step": 32906 + }, + { + "epoch": 0.9662047096130131, + "grad_norm": 0.0, + "learning_rate": 5.984279226588841e-08, + "loss": 1.2637, + "step": 32907 + }, + { + "epoch": 0.966234071290152, + "grad_norm": 0.0, + "learning_rate": 5.973895704716625e-08, + "loss": 1.2588, + "step": 32908 + }, + { + "epoch": 0.9662634329672911, + "grad_norm": 0.0, + "learning_rate": 5.963521172076992e-08, + "loss": 1.2168, + "step": 32909 + }, + { + "epoch": 0.9662927946444301, + "grad_norm": 0.0, + "learning_rate": 5.953155628763863e-08, + "loss": 1.2129, + "step": 32910 + }, + { + "epoch": 0.966322156321569, + "grad_norm": 0.0, + "learning_rate": 5.942799074870831e-08, + "loss": 1.3711, + "step": 32911 + }, + { + "epoch": 0.9663515179987081, + "grad_norm": 0.0, + "learning_rate": 5.9324515104917105e-08, + "loss": 1.126, + "step": 32912 + }, + { + "epoch": 0.9663808796758471, + "grad_norm": 0.0, + "learning_rate": 5.9221129357200924e-08, + "loss": 1.3018, + "step": 32913 + }, + { + "epoch": 0.966410241352986, + "grad_norm": 0.0, + "learning_rate": 5.911783350649347e-08, + "loss": 1.1509, + "step": 32914 + }, + { + "epoch": 0.9664396030301251, + "grad_norm": 0.0, + "learning_rate": 5.9014627553728446e-08, + "loss": 1.1973, + "step": 32915 + }, + { + "epoch": 0.9664689647072641, + "grad_norm": 0.0, + "learning_rate": 5.891151149984176e-08, + "loss": 1.1699, + "step": 32916 + }, + { + "epoch": 0.966498326384403, + "grad_norm": 0.0, + "learning_rate": 5.8808485345762666e-08, + "loss": 1.2627, + "step": 32917 + }, + { + "epoch": 0.9665276880615421, + "grad_norm": 0.0, + "learning_rate": 5.8705549092424875e-08, + "loss": 1.2251, + "step": 32918 + }, + { + "epoch": 0.9665570497386811, + "grad_norm": 0.0, + "learning_rate": 5.8602702740759856e-08, + "loss": 1.2207, + "step": 32919 + }, + { + "epoch": 0.96658641141582, + "grad_norm": 0.0, + "learning_rate": 5.8499946291694646e-08, + "loss": 1.3496, + "step": 32920 + }, + { + "epoch": 0.9666157730929591, + "grad_norm": 0.0, + "learning_rate": 5.839727974616183e-08, + "loss": 1.186, + "step": 32921 + }, + { + "epoch": 0.9666451347700981, + "grad_norm": 0.0, + "learning_rate": 5.829470310508845e-08, + "loss": 1.2593, + "step": 32922 + }, + { + "epoch": 0.966674496447237, + "grad_norm": 0.0, + "learning_rate": 5.8192216369401536e-08, + "loss": 1.0425, + "step": 32923 + }, + { + "epoch": 0.9667038581243761, + "grad_norm": 0.0, + "learning_rate": 5.8089819540029235e-08, + "loss": 1.2124, + "step": 32924 + }, + { + "epoch": 0.966733219801515, + "grad_norm": 0.0, + "learning_rate": 5.798751261789748e-08, + "loss": 1.3242, + "step": 32925 + }, + { + "epoch": 0.966762581478654, + "grad_norm": 0.0, + "learning_rate": 5.788529560392997e-08, + "loss": 1.1934, + "step": 32926 + }, + { + "epoch": 0.9667919431557931, + "grad_norm": 0.0, + "learning_rate": 5.7783168499051526e-08, + "loss": 1.0918, + "step": 32927 + }, + { + "epoch": 0.966821304832932, + "grad_norm": 0.0, + "learning_rate": 5.768113130418695e-08, + "loss": 1.146, + "step": 32928 + }, + { + "epoch": 0.966850666510071, + "grad_norm": 0.0, + "learning_rate": 5.757918402025664e-08, + "loss": 1.1323, + "step": 32929 + }, + { + "epoch": 0.9668800281872101, + "grad_norm": 0.0, + "learning_rate": 5.7477326648185396e-08, + "loss": 1.1768, + "step": 32930 + }, + { + "epoch": 0.966909389864349, + "grad_norm": 0.0, + "learning_rate": 5.7375559188893594e-08, + "loss": 1.3159, + "step": 32931 + }, + { + "epoch": 0.966938751541488, + "grad_norm": 0.0, + "learning_rate": 5.72738816433005e-08, + "loss": 1.249, + "step": 32932 + }, + { + "epoch": 0.9669681132186271, + "grad_norm": 0.0, + "learning_rate": 5.717229401232538e-08, + "loss": 1.1743, + "step": 32933 + }, + { + "epoch": 0.966997474895766, + "grad_norm": 0.0, + "learning_rate": 5.70707962968875e-08, + "loss": 1.2583, + "step": 32934 + }, + { + "epoch": 0.967026836572905, + "grad_norm": 0.0, + "learning_rate": 5.696938849790501e-08, + "loss": 1.1699, + "step": 32935 + }, + { + "epoch": 0.9670561982500441, + "grad_norm": 0.0, + "learning_rate": 5.686807061629496e-08, + "loss": 1.2432, + "step": 32936 + }, + { + "epoch": 0.967085559927183, + "grad_norm": 0.0, + "learning_rate": 5.676684265297439e-08, + "loss": 1.1611, + "step": 32937 + }, + { + "epoch": 0.967114921604322, + "grad_norm": 0.0, + "learning_rate": 5.6665704608855896e-08, + "loss": 1.1934, + "step": 32938 + }, + { + "epoch": 0.9671442832814611, + "grad_norm": 0.0, + "learning_rate": 5.656465648485765e-08, + "loss": 1.1396, + "step": 32939 + }, + { + "epoch": 0.9671736449586, + "grad_norm": 0.0, + "learning_rate": 5.6463698281891134e-08, + "loss": 1.1738, + "step": 32940 + }, + { + "epoch": 0.967203006635739, + "grad_norm": 0.0, + "learning_rate": 5.636283000087006e-08, + "loss": 1.1968, + "step": 32941 + }, + { + "epoch": 0.9672323683128781, + "grad_norm": 0.0, + "learning_rate": 5.6262051642705927e-08, + "loss": 1.0898, + "step": 32942 + }, + { + "epoch": 0.967261729990017, + "grad_norm": 0.0, + "learning_rate": 5.616136320831134e-08, + "loss": 1.166, + "step": 32943 + }, + { + "epoch": 0.967291091667156, + "grad_norm": 0.0, + "learning_rate": 5.606076469859556e-08, + "loss": 1.1025, + "step": 32944 + }, + { + "epoch": 0.9673204533442951, + "grad_norm": 0.0, + "learning_rate": 5.596025611447009e-08, + "loss": 1.1709, + "step": 32945 + }, + { + "epoch": 0.967349815021434, + "grad_norm": 0.0, + "learning_rate": 5.585983745684198e-08, + "loss": 1.0835, + "step": 32946 + }, + { + "epoch": 0.967379176698573, + "grad_norm": 0.0, + "learning_rate": 5.575950872661939e-08, + "loss": 1.2998, + "step": 32947 + }, + { + "epoch": 0.9674085383757121, + "grad_norm": 0.0, + "learning_rate": 5.56592699247116e-08, + "loss": 1.2969, + "step": 32948 + }, + { + "epoch": 0.967437900052851, + "grad_norm": 0.0, + "learning_rate": 5.555912105202344e-08, + "loss": 1.1807, + "step": 32949 + }, + { + "epoch": 0.96746726172999, + "grad_norm": 0.0, + "learning_rate": 5.545906210946084e-08, + "loss": 1.229, + "step": 32950 + }, + { + "epoch": 0.967496623407129, + "grad_norm": 0.0, + "learning_rate": 5.535909309792864e-08, + "loss": 1.1807, + "step": 32951 + }, + { + "epoch": 0.967525985084268, + "grad_norm": 0.0, + "learning_rate": 5.525921401833057e-08, + "loss": 1.1455, + "step": 32952 + }, + { + "epoch": 0.967555346761407, + "grad_norm": 0.0, + "learning_rate": 5.5159424871571446e-08, + "loss": 1.1621, + "step": 32953 + }, + { + "epoch": 0.967584708438546, + "grad_norm": 0.0, + "learning_rate": 5.5059725658551665e-08, + "loss": 1.2051, + "step": 32954 + }, + { + "epoch": 0.967614070115685, + "grad_norm": 0.0, + "learning_rate": 5.4960116380173845e-08, + "loss": 1.1553, + "step": 32955 + }, + { + "epoch": 0.967643431792824, + "grad_norm": 0.0, + "learning_rate": 5.486059703733948e-08, + "loss": 1.334, + "step": 32956 + }, + { + "epoch": 0.967672793469963, + "grad_norm": 0.0, + "learning_rate": 5.476116763094674e-08, + "loss": 1.0981, + "step": 32957 + }, + { + "epoch": 0.967702155147102, + "grad_norm": 0.0, + "learning_rate": 5.466182816189602e-08, + "loss": 1.167, + "step": 32958 + }, + { + "epoch": 0.967731516824241, + "grad_norm": 0.0, + "learning_rate": 5.456257863108549e-08, + "loss": 1.2197, + "step": 32959 + }, + { + "epoch": 0.96776087850138, + "grad_norm": 0.0, + "learning_rate": 5.4463419039413324e-08, + "loss": 1.0942, + "step": 32960 + }, + { + "epoch": 0.967790240178519, + "grad_norm": 0.0, + "learning_rate": 5.436434938777435e-08, + "loss": 1.2061, + "step": 32961 + }, + { + "epoch": 0.967819601855658, + "grad_norm": 0.0, + "learning_rate": 5.426536967706564e-08, + "loss": 1.1934, + "step": 32962 + }, + { + "epoch": 0.967848963532797, + "grad_norm": 0.0, + "learning_rate": 5.4166479908183135e-08, + "loss": 1.292, + "step": 32963 + }, + { + "epoch": 0.967878325209936, + "grad_norm": 0.0, + "learning_rate": 5.406768008202057e-08, + "loss": 1.249, + "step": 32964 + }, + { + "epoch": 0.967907686887075, + "grad_norm": 0.0, + "learning_rate": 5.3968970199470564e-08, + "loss": 1.1836, + "step": 32965 + }, + { + "epoch": 0.967937048564214, + "grad_norm": 0.0, + "learning_rate": 5.387035026142684e-08, + "loss": 1.2109, + "step": 32966 + }, + { + "epoch": 0.967966410241353, + "grad_norm": 0.0, + "learning_rate": 5.3771820268780914e-08, + "loss": 1.1045, + "step": 32967 + }, + { + "epoch": 0.967995771918492, + "grad_norm": 0.0, + "learning_rate": 5.367338022242319e-08, + "loss": 1.2905, + "step": 32968 + }, + { + "epoch": 0.968025133595631, + "grad_norm": 0.0, + "learning_rate": 5.357503012324405e-08, + "loss": 1.2046, + "step": 32969 + }, + { + "epoch": 0.96805449527277, + "grad_norm": 0.0, + "learning_rate": 5.3476769972133916e-08, + "loss": 1.0542, + "step": 32970 + }, + { + "epoch": 0.968083856949909, + "grad_norm": 0.0, + "learning_rate": 5.3378599769979835e-08, + "loss": 1.1914, + "step": 32971 + }, + { + "epoch": 0.968113218627048, + "grad_norm": 0.0, + "learning_rate": 5.32805195176711e-08, + "loss": 1.084, + "step": 32972 + }, + { + "epoch": 0.968142580304187, + "grad_norm": 0.0, + "learning_rate": 5.318252921609368e-08, + "loss": 1.146, + "step": 32973 + }, + { + "epoch": 0.968171941981326, + "grad_norm": 0.0, + "learning_rate": 5.3084628866134634e-08, + "loss": 1.2646, + "step": 32974 + }, + { + "epoch": 0.968201303658465, + "grad_norm": 0.0, + "learning_rate": 5.29868184686777e-08, + "loss": 1.3398, + "step": 32975 + }, + { + "epoch": 0.968230665335604, + "grad_norm": 0.0, + "learning_rate": 5.2889098024609956e-08, + "loss": 1.1099, + "step": 32976 + }, + { + "epoch": 0.9682600270127429, + "grad_norm": 0.0, + "learning_rate": 5.27914675348129e-08, + "loss": 1.1523, + "step": 32977 + }, + { + "epoch": 0.968289388689882, + "grad_norm": 0.0, + "learning_rate": 5.269392700016918e-08, + "loss": 1.186, + "step": 32978 + }, + { + "epoch": 0.968318750367021, + "grad_norm": 0.0, + "learning_rate": 5.259647642156362e-08, + "loss": 1.1289, + "step": 32979 + }, + { + "epoch": 0.9683481120441599, + "grad_norm": 0.0, + "learning_rate": 5.2499115799874434e-08, + "loss": 1.3145, + "step": 32980 + }, + { + "epoch": 0.968377473721299, + "grad_norm": 0.0, + "learning_rate": 5.240184513598312e-08, + "loss": 1.2383, + "step": 32981 + }, + { + "epoch": 0.968406835398438, + "grad_norm": 0.0, + "learning_rate": 5.230466443077009e-08, + "loss": 1.0654, + "step": 32982 + }, + { + "epoch": 0.9684361970755769, + "grad_norm": 0.0, + "learning_rate": 5.220757368511353e-08, + "loss": 1.291, + "step": 32983 + }, + { + "epoch": 0.968465558752716, + "grad_norm": 0.0, + "learning_rate": 5.211057289989163e-08, + "loss": 1.292, + "step": 32984 + }, + { + "epoch": 0.968494920429855, + "grad_norm": 0.0, + "learning_rate": 5.2013662075981466e-08, + "loss": 1.1284, + "step": 32985 + }, + { + "epoch": 0.9685242821069939, + "grad_norm": 0.0, + "learning_rate": 5.191684121426011e-08, + "loss": 1.2754, + "step": 32986 + }, + { + "epoch": 0.968553643784133, + "grad_norm": 0.0, + "learning_rate": 5.1820110315601305e-08, + "loss": 1.168, + "step": 32987 + }, + { + "epoch": 0.968583005461272, + "grad_norm": 0.0, + "learning_rate": 5.172346938088213e-08, + "loss": 1.0454, + "step": 32988 + }, + { + "epoch": 0.9686123671384109, + "grad_norm": 0.0, + "learning_rate": 5.1626918410974116e-08, + "loss": 1.1362, + "step": 32989 + }, + { + "epoch": 0.96864172881555, + "grad_norm": 0.0, + "learning_rate": 5.153045740675322e-08, + "loss": 1.0962, + "step": 32990 + }, + { + "epoch": 0.968671090492689, + "grad_norm": 0.0, + "learning_rate": 5.1434086369088754e-08, + "loss": 1.0098, + "step": 32991 + }, + { + "epoch": 0.9687004521698279, + "grad_norm": 0.0, + "learning_rate": 5.133780529885446e-08, + "loss": 1.1748, + "step": 32992 + }, + { + "epoch": 0.968729813846967, + "grad_norm": 0.0, + "learning_rate": 5.124161419691964e-08, + "loss": 1.2178, + "step": 32993 + }, + { + "epoch": 0.968759175524106, + "grad_norm": 0.0, + "learning_rate": 5.1145513064155824e-08, + "loss": 1.2461, + "step": 32994 + }, + { + "epoch": 0.9687885372012449, + "grad_norm": 0.0, + "learning_rate": 5.104950190143121e-08, + "loss": 1.2129, + "step": 32995 + }, + { + "epoch": 0.968817898878384, + "grad_norm": 0.0, + "learning_rate": 5.095358070961287e-08, + "loss": 1.1875, + "step": 32996 + }, + { + "epoch": 0.968847260555523, + "grad_norm": 0.0, + "learning_rate": 5.085774948957012e-08, + "loss": 1.1929, + "step": 32997 + }, + { + "epoch": 0.9688766222326619, + "grad_norm": 0.0, + "learning_rate": 5.0762008242167816e-08, + "loss": 1.2148, + "step": 32998 + }, + { + "epoch": 0.968905983909801, + "grad_norm": 0.0, + "learning_rate": 5.066635696827415e-08, + "loss": 1.146, + "step": 32999 + }, + { + "epoch": 0.96893534558694, + "grad_norm": 0.0, + "learning_rate": 5.057079566875178e-08, + "loss": 1.1094, + "step": 33000 + }, + { + "epoch": 0.9689647072640789, + "grad_norm": 0.0, + "learning_rate": 5.047532434446556e-08, + "loss": 1.106, + "step": 33001 + }, + { + "epoch": 0.9689940689412179, + "grad_norm": 0.0, + "learning_rate": 5.037994299627924e-08, + "loss": 1.1851, + "step": 33002 + }, + { + "epoch": 0.9690234306183569, + "grad_norm": 0.0, + "learning_rate": 5.028465162505547e-08, + "loss": 1.0552, + "step": 33003 + }, + { + "epoch": 0.9690527922954959, + "grad_norm": 0.0, + "learning_rate": 5.018945023165467e-08, + "loss": 1.0776, + "step": 33004 + }, + { + "epoch": 0.9690821539726349, + "grad_norm": 0.0, + "learning_rate": 5.0094338816940593e-08, + "loss": 1.2202, + "step": 33005 + }, + { + "epoch": 0.9691115156497739, + "grad_norm": 0.0, + "learning_rate": 4.999931738177144e-08, + "loss": 1.0928, + "step": 33006 + }, + { + "epoch": 0.9691408773269129, + "grad_norm": 0.0, + "learning_rate": 4.990438592700542e-08, + "loss": 1.3096, + "step": 33007 + }, + { + "epoch": 0.9691702390040519, + "grad_norm": 0.0, + "learning_rate": 4.980954445350294e-08, + "loss": 1.2568, + "step": 33008 + }, + { + "epoch": 0.9691996006811909, + "grad_norm": 0.0, + "learning_rate": 4.971479296212112e-08, + "loss": 1.2246, + "step": 33009 + }, + { + "epoch": 0.9692289623583299, + "grad_norm": 0.0, + "learning_rate": 4.9620131453717025e-08, + "loss": 1.2617, + "step": 33010 + }, + { + "epoch": 0.9692583240354689, + "grad_norm": 0.0, + "learning_rate": 4.952555992914554e-08, + "loss": 1.1553, + "step": 33011 + }, + { + "epoch": 0.9692876857126079, + "grad_norm": 0.0, + "learning_rate": 4.943107838926375e-08, + "loss": 1.1543, + "step": 33012 + }, + { + "epoch": 0.9693170473897469, + "grad_norm": 0.0, + "learning_rate": 4.933668683492432e-08, + "loss": 1.2236, + "step": 33013 + }, + { + "epoch": 0.9693464090668859, + "grad_norm": 0.0, + "learning_rate": 4.9242385266982104e-08, + "loss": 1.2061, + "step": 33014 + }, + { + "epoch": 0.9693757707440249, + "grad_norm": 0.0, + "learning_rate": 4.914817368628977e-08, + "loss": 1.2871, + "step": 33015 + }, + { + "epoch": 0.9694051324211639, + "grad_norm": 0.0, + "learning_rate": 4.9054052093698845e-08, + "loss": 1.1582, + "step": 33016 + }, + { + "epoch": 0.9694344940983028, + "grad_norm": 0.0, + "learning_rate": 4.896002049005977e-08, + "loss": 1.231, + "step": 33017 + }, + { + "epoch": 0.9694638557754419, + "grad_norm": 0.0, + "learning_rate": 4.886607887622408e-08, + "loss": 1.2158, + "step": 33018 + }, + { + "epoch": 0.9694932174525809, + "grad_norm": 0.0, + "learning_rate": 4.877222725304109e-08, + "loss": 1.1045, + "step": 33019 + }, + { + "epoch": 0.9695225791297198, + "grad_norm": 0.0, + "learning_rate": 4.8678465621359025e-08, + "loss": 1.2422, + "step": 33020 + }, + { + "epoch": 0.9695519408068589, + "grad_norm": 0.0, + "learning_rate": 4.85847939820272e-08, + "loss": 1.0938, + "step": 33021 + }, + { + "epoch": 0.9695813024839979, + "grad_norm": 0.0, + "learning_rate": 4.8491212335890494e-08, + "loss": 1.1338, + "step": 33022 + }, + { + "epoch": 0.9696106641611368, + "grad_norm": 0.0, + "learning_rate": 4.839772068379711e-08, + "loss": 1.3428, + "step": 33023 + }, + { + "epoch": 0.9696400258382759, + "grad_norm": 0.0, + "learning_rate": 4.830431902659194e-08, + "loss": 1.2266, + "step": 33024 + }, + { + "epoch": 0.9696693875154149, + "grad_norm": 0.0, + "learning_rate": 4.8211007365118746e-08, + "loss": 1.1777, + "step": 33025 + }, + { + "epoch": 0.9696987491925538, + "grad_norm": 0.0, + "learning_rate": 4.8117785700222406e-08, + "loss": 1.1431, + "step": 33026 + }, + { + "epoch": 0.9697281108696929, + "grad_norm": 0.0, + "learning_rate": 4.802465403274559e-08, + "loss": 1.207, + "step": 33027 + }, + { + "epoch": 0.9697574725468319, + "grad_norm": 0.0, + "learning_rate": 4.793161236353094e-08, + "loss": 1.2715, + "step": 33028 + }, + { + "epoch": 0.9697868342239708, + "grad_norm": 0.0, + "learning_rate": 4.7838660693418916e-08, + "loss": 1.3047, + "step": 33029 + }, + { + "epoch": 0.9698161959011099, + "grad_norm": 0.0, + "learning_rate": 4.774579902325105e-08, + "loss": 1.1558, + "step": 33030 + }, + { + "epoch": 0.9698455575782489, + "grad_norm": 0.0, + "learning_rate": 4.765302735386668e-08, + "loss": 1.1665, + "step": 33031 + }, + { + "epoch": 0.9698749192553878, + "grad_norm": 0.0, + "learning_rate": 4.756034568610512e-08, + "loss": 1.0889, + "step": 33032 + }, + { + "epoch": 0.9699042809325269, + "grad_norm": 0.0, + "learning_rate": 4.74677540208035e-08, + "loss": 1.2495, + "step": 33033 + }, + { + "epoch": 0.9699336426096659, + "grad_norm": 0.0, + "learning_rate": 4.7375252358800026e-08, + "loss": 1.2588, + "step": 33034 + }, + { + "epoch": 0.9699630042868048, + "grad_norm": 0.0, + "learning_rate": 4.728284070093181e-08, + "loss": 1.1182, + "step": 33035 + }, + { + "epoch": 0.9699923659639439, + "grad_norm": 0.0, + "learning_rate": 4.719051904803262e-08, + "loss": 1.2744, + "step": 33036 + }, + { + "epoch": 0.9700217276410829, + "grad_norm": 0.0, + "learning_rate": 4.709828740093958e-08, + "loss": 1.1958, + "step": 33037 + }, + { + "epoch": 0.9700510893182218, + "grad_norm": 0.0, + "learning_rate": 4.7006145760485345e-08, + "loss": 1.208, + "step": 33038 + }, + { + "epoch": 0.9700804509953609, + "grad_norm": 0.0, + "learning_rate": 4.6914094127503693e-08, + "loss": 1.1812, + "step": 33039 + }, + { + "epoch": 0.9701098126724998, + "grad_norm": 0.0, + "learning_rate": 4.682213250282619e-08, + "loss": 1.1499, + "step": 33040 + }, + { + "epoch": 0.9701391743496388, + "grad_norm": 0.0, + "learning_rate": 4.6730260887286606e-08, + "loss": 1.2041, + "step": 33041 + }, + { + "epoch": 0.9701685360267779, + "grad_norm": 0.0, + "learning_rate": 4.663847928171317e-08, + "loss": 1.2598, + "step": 33042 + }, + { + "epoch": 0.9701978977039168, + "grad_norm": 0.0, + "learning_rate": 4.6546787686937435e-08, + "loss": 1.1396, + "step": 33043 + }, + { + "epoch": 0.9702272593810558, + "grad_norm": 0.0, + "learning_rate": 4.645518610378763e-08, + "loss": 1.2314, + "step": 33044 + }, + { + "epoch": 0.9702566210581949, + "grad_norm": 0.0, + "learning_rate": 4.6363674533093095e-08, + "loss": 1.1914, + "step": 33045 + }, + { + "epoch": 0.9702859827353338, + "grad_norm": 0.0, + "learning_rate": 4.627225297568094e-08, + "loss": 1.1377, + "step": 33046 + }, + { + "epoch": 0.9703153444124728, + "grad_norm": 0.0, + "learning_rate": 4.618092143237829e-08, + "loss": 1.1611, + "step": 33047 + }, + { + "epoch": 0.9703447060896119, + "grad_norm": 0.0, + "learning_rate": 4.6089679904011144e-08, + "loss": 1.228, + "step": 33048 + }, + { + "epoch": 0.9703740677667508, + "grad_norm": 0.0, + "learning_rate": 4.5998528391403286e-08, + "loss": 1.2383, + "step": 33049 + }, + { + "epoch": 0.9704034294438898, + "grad_norm": 0.0, + "learning_rate": 4.5907466895380726e-08, + "loss": 1.1934, + "step": 33050 + }, + { + "epoch": 0.9704327911210289, + "grad_norm": 0.0, + "learning_rate": 4.581649541676614e-08, + "loss": 1.335, + "step": 33051 + }, + { + "epoch": 0.9704621527981678, + "grad_norm": 0.0, + "learning_rate": 4.5725613956382195e-08, + "loss": 1.3926, + "step": 33052 + }, + { + "epoch": 0.9704915144753068, + "grad_norm": 0.0, + "learning_rate": 4.563482251505047e-08, + "loss": 1.3271, + "step": 33053 + }, + { + "epoch": 0.9705208761524459, + "grad_norm": 0.0, + "learning_rate": 4.554412109359252e-08, + "loss": 1.2959, + "step": 33054 + }, + { + "epoch": 0.9705502378295848, + "grad_norm": 0.0, + "learning_rate": 4.54535096928288e-08, + "loss": 1.1748, + "step": 33055 + }, + { + "epoch": 0.9705795995067238, + "grad_norm": 0.0, + "learning_rate": 4.536298831357866e-08, + "loss": 1.1816, + "step": 33056 + }, + { + "epoch": 0.9706089611838629, + "grad_norm": 0.0, + "learning_rate": 4.527255695665922e-08, + "loss": 1.1738, + "step": 33057 + }, + { + "epoch": 0.9706383228610018, + "grad_norm": 0.0, + "learning_rate": 4.5182215622890936e-08, + "loss": 1.2871, + "step": 33058 + }, + { + "epoch": 0.9706676845381408, + "grad_norm": 0.0, + "learning_rate": 4.509196431308871e-08, + "loss": 1.2212, + "step": 33059 + }, + { + "epoch": 0.9706970462152799, + "grad_norm": 0.0, + "learning_rate": 4.5001803028068556e-08, + "loss": 1.145, + "step": 33060 + }, + { + "epoch": 0.9707264078924188, + "grad_norm": 0.0, + "learning_rate": 4.49117317686476e-08, + "loss": 1.1689, + "step": 33061 + }, + { + "epoch": 0.9707557695695578, + "grad_norm": 0.0, + "learning_rate": 4.482175053563964e-08, + "loss": 1.1216, + "step": 33062 + }, + { + "epoch": 0.9707851312466969, + "grad_norm": 0.0, + "learning_rate": 4.473185932985735e-08, + "loss": 1.2036, + "step": 33063 + }, + { + "epoch": 0.9708144929238358, + "grad_norm": 0.0, + "learning_rate": 4.464205815211453e-08, + "loss": 1.2471, + "step": 33064 + }, + { + "epoch": 0.9708438546009748, + "grad_norm": 0.0, + "learning_rate": 4.455234700322386e-08, + "loss": 1.1943, + "step": 33065 + }, + { + "epoch": 0.9708732162781138, + "grad_norm": 0.0, + "learning_rate": 4.4462725883995805e-08, + "loss": 1.1382, + "step": 33066 + }, + { + "epoch": 0.9709025779552528, + "grad_norm": 0.0, + "learning_rate": 4.4373194795239713e-08, + "loss": 1.2939, + "step": 33067 + }, + { + "epoch": 0.9709319396323918, + "grad_norm": 0.0, + "learning_rate": 4.4283753737768277e-08, + "loss": 1.2402, + "step": 33068 + }, + { + "epoch": 0.9709613013095308, + "grad_norm": 0.0, + "learning_rate": 4.4194402712387506e-08, + "loss": 1.1738, + "step": 33069 + }, + { + "epoch": 0.9709906629866698, + "grad_norm": 0.0, + "learning_rate": 4.410514171990676e-08, + "loss": 1.1904, + "step": 33070 + }, + { + "epoch": 0.9710200246638088, + "grad_norm": 0.0, + "learning_rate": 4.4015970761132066e-08, + "loss": 1.2163, + "step": 33071 + }, + { + "epoch": 0.9710493863409478, + "grad_norm": 0.0, + "learning_rate": 4.3926889836871657e-08, + "loss": 1.2383, + "step": 33072 + }, + { + "epoch": 0.9710787480180868, + "grad_norm": 0.0, + "learning_rate": 4.3837898947930446e-08, + "loss": 0.9932, + "step": 33073 + }, + { + "epoch": 0.9711081096952258, + "grad_norm": 0.0, + "learning_rate": 4.374899809511224e-08, + "loss": 1.1948, + "step": 33074 + }, + { + "epoch": 0.9711374713723648, + "grad_norm": 0.0, + "learning_rate": 4.366018727922194e-08, + "loss": 1.1616, + "step": 33075 + }, + { + "epoch": 0.9711668330495038, + "grad_norm": 0.0, + "learning_rate": 4.357146650106226e-08, + "loss": 1.1787, + "step": 33076 + }, + { + "epoch": 0.9711961947266428, + "grad_norm": 0.0, + "learning_rate": 4.348283576143586e-08, + "loss": 1.1665, + "step": 33077 + }, + { + "epoch": 0.9712255564037818, + "grad_norm": 0.0, + "learning_rate": 4.339429506114434e-08, + "loss": 1.2529, + "step": 33078 + }, + { + "epoch": 0.9712549180809208, + "grad_norm": 0.0, + "learning_rate": 4.330584440098817e-08, + "loss": 1.1729, + "step": 33079 + }, + { + "epoch": 0.9712842797580598, + "grad_norm": 0.0, + "learning_rate": 4.321748378176671e-08, + "loss": 1.2666, + "step": 33080 + }, + { + "epoch": 0.9713136414351988, + "grad_norm": 0.0, + "learning_rate": 4.312921320428043e-08, + "loss": 1.2168, + "step": 33081 + }, + { + "epoch": 0.9713430031123378, + "grad_norm": 0.0, + "learning_rate": 4.3041032669325354e-08, + "loss": 1.1748, + "step": 33082 + }, + { + "epoch": 0.9713723647894767, + "grad_norm": 0.0, + "learning_rate": 4.295294217770085e-08, + "loss": 1.1187, + "step": 33083 + }, + { + "epoch": 0.9714017264666158, + "grad_norm": 0.0, + "learning_rate": 4.286494173020295e-08, + "loss": 1.0811, + "step": 33084 + }, + { + "epoch": 0.9714310881437548, + "grad_norm": 0.0, + "learning_rate": 4.277703132762767e-08, + "loss": 1.2207, + "step": 33085 + }, + { + "epoch": 0.9714604498208937, + "grad_norm": 0.0, + "learning_rate": 4.268921097076884e-08, + "loss": 1.2715, + "step": 33086 + }, + { + "epoch": 0.9714898114980328, + "grad_norm": 0.0, + "learning_rate": 4.260148066042247e-08, + "loss": 1.1948, + "step": 33087 + }, + { + "epoch": 0.9715191731751718, + "grad_norm": 0.0, + "learning_rate": 4.251384039738127e-08, + "loss": 1.1509, + "step": 33088 + }, + { + "epoch": 0.9715485348523107, + "grad_norm": 0.0, + "learning_rate": 4.242629018243683e-08, + "loss": 1.293, + "step": 33089 + }, + { + "epoch": 0.9715778965294498, + "grad_norm": 0.0, + "learning_rate": 4.233883001638184e-08, + "loss": 1.1118, + "step": 33090 + }, + { + "epoch": 0.9716072582065888, + "grad_norm": 0.0, + "learning_rate": 4.225145990000679e-08, + "loss": 1.2036, + "step": 33091 + }, + { + "epoch": 0.9716366198837277, + "grad_norm": 0.0, + "learning_rate": 4.216417983410215e-08, + "loss": 1.3164, + "step": 33092 + }, + { + "epoch": 0.9716659815608668, + "grad_norm": 0.0, + "learning_rate": 4.2076989819457293e-08, + "loss": 1.2998, + "step": 33093 + }, + { + "epoch": 0.9716953432380058, + "grad_norm": 0.0, + "learning_rate": 4.198988985686048e-08, + "loss": 1.0747, + "step": 33094 + }, + { + "epoch": 0.9717247049151447, + "grad_norm": 0.0, + "learning_rate": 4.190287994709996e-08, + "loss": 1.2246, + "step": 33095 + }, + { + "epoch": 0.9717540665922838, + "grad_norm": 0.0, + "learning_rate": 4.1815960090960674e-08, + "loss": 1.2588, + "step": 33096 + }, + { + "epoch": 0.9717834282694228, + "grad_norm": 0.0, + "learning_rate": 4.1729130289230866e-08, + "loss": 1.0908, + "step": 33097 + }, + { + "epoch": 0.9718127899465617, + "grad_norm": 0.0, + "learning_rate": 4.164239054269437e-08, + "loss": 1.2852, + "step": 33098 + }, + { + "epoch": 0.9718421516237008, + "grad_norm": 0.0, + "learning_rate": 4.15557408521372e-08, + "loss": 1.2783, + "step": 33099 + }, + { + "epoch": 0.9718715133008398, + "grad_norm": 0.0, + "learning_rate": 4.146918121834098e-08, + "loss": 1.1562, + "step": 33100 + }, + { + "epoch": 0.9719008749779787, + "grad_norm": 0.0, + "learning_rate": 4.138271164208951e-08, + "loss": 1.1807, + "step": 33101 + }, + { + "epoch": 0.9719302366551177, + "grad_norm": 0.0, + "learning_rate": 4.12963321241644e-08, + "loss": 1.2114, + "step": 33102 + }, + { + "epoch": 0.9719595983322568, + "grad_norm": 0.0, + "learning_rate": 4.121004266534723e-08, + "loss": 1.0977, + "step": 33103 + }, + { + "epoch": 0.9719889600093957, + "grad_norm": 0.0, + "learning_rate": 4.1123843266418495e-08, + "loss": 1.1763, + "step": 33104 + }, + { + "epoch": 0.9720183216865347, + "grad_norm": 0.0, + "learning_rate": 4.1037733928156467e-08, + "loss": 1.4082, + "step": 33105 + }, + { + "epoch": 0.9720476833636738, + "grad_norm": 0.0, + "learning_rate": 4.0951714651341625e-08, + "loss": 1.21, + "step": 33106 + }, + { + "epoch": 0.9720770450408127, + "grad_norm": 0.0, + "learning_rate": 4.086578543675002e-08, + "loss": 1.2139, + "step": 33107 + }, + { + "epoch": 0.9721064067179517, + "grad_norm": 0.0, + "learning_rate": 4.077994628516102e-08, + "loss": 1.2285, + "step": 33108 + }, + { + "epoch": 0.9721357683950907, + "grad_norm": 0.0, + "learning_rate": 4.069419719734846e-08, + "loss": 1.1562, + "step": 33109 + }, + { + "epoch": 0.9721651300722297, + "grad_norm": 0.0, + "learning_rate": 4.060853817408839e-08, + "loss": 1.2305, + "step": 33110 + }, + { + "epoch": 0.9721944917493687, + "grad_norm": 0.0, + "learning_rate": 4.052296921615684e-08, + "loss": 1.2559, + "step": 33111 + }, + { + "epoch": 0.9722238534265077, + "grad_norm": 0.0, + "learning_rate": 4.043749032432764e-08, + "loss": 1.1743, + "step": 33112 + }, + { + "epoch": 0.9722532151036467, + "grad_norm": 0.0, + "learning_rate": 4.03521014993713e-08, + "loss": 1.2544, + "step": 33113 + }, + { + "epoch": 0.9722825767807857, + "grad_norm": 0.0, + "learning_rate": 4.026680274206274e-08, + "loss": 1.3115, + "step": 33114 + }, + { + "epoch": 0.9723119384579247, + "grad_norm": 0.0, + "learning_rate": 4.018159405317135e-08, + "loss": 1.1235, + "step": 33115 + }, + { + "epoch": 0.9723413001350637, + "grad_norm": 0.0, + "learning_rate": 4.0096475433468727e-08, + "loss": 1.1885, + "step": 33116 + }, + { + "epoch": 0.9723706618122027, + "grad_norm": 0.0, + "learning_rate": 4.001144688372538e-08, + "loss": 1.1221, + "step": 33117 + }, + { + "epoch": 0.9724000234893417, + "grad_norm": 0.0, + "learning_rate": 3.992650840470735e-08, + "loss": 1.1934, + "step": 33118 + }, + { + "epoch": 0.9724293851664807, + "grad_norm": 0.0, + "learning_rate": 3.9841659997186254e-08, + "loss": 1.1719, + "step": 33119 + }, + { + "epoch": 0.9724587468436197, + "grad_norm": 0.0, + "learning_rate": 3.975690166192814e-08, + "loss": 1.1836, + "step": 33120 + }, + { + "epoch": 0.9724881085207587, + "grad_norm": 0.0, + "learning_rate": 3.9672233399699056e-08, + "loss": 1.21, + "step": 33121 + }, + { + "epoch": 0.9725174701978977, + "grad_norm": 0.0, + "learning_rate": 3.9587655211263956e-08, + "loss": 1.1724, + "step": 33122 + }, + { + "epoch": 0.9725468318750367, + "grad_norm": 0.0, + "learning_rate": 3.950316709738888e-08, + "loss": 1.0879, + "step": 33123 + }, + { + "epoch": 0.9725761935521757, + "grad_norm": 0.0, + "learning_rate": 3.941876905883768e-08, + "loss": 1.1694, + "step": 33124 + }, + { + "epoch": 0.9726055552293147, + "grad_norm": 0.0, + "learning_rate": 3.9334461096373064e-08, + "loss": 1.2383, + "step": 33125 + }, + { + "epoch": 0.9726349169064537, + "grad_norm": 0.0, + "learning_rate": 3.9250243210758876e-08, + "loss": 1.1304, + "step": 33126 + }, + { + "epoch": 0.9726642785835927, + "grad_norm": 0.0, + "learning_rate": 3.91661154027545e-08, + "loss": 1.1328, + "step": 33127 + }, + { + "epoch": 0.9726936402607317, + "grad_norm": 0.0, + "learning_rate": 3.9082077673123776e-08, + "loss": 1.291, + "step": 33128 + }, + { + "epoch": 0.9727230019378706, + "grad_norm": 0.0, + "learning_rate": 3.899813002262387e-08, + "loss": 1.23, + "step": 33129 + }, + { + "epoch": 0.9727523636150097, + "grad_norm": 0.0, + "learning_rate": 3.891427245201418e-08, + "loss": 1.2676, + "step": 33130 + }, + { + "epoch": 0.9727817252921487, + "grad_norm": 0.0, + "learning_rate": 3.883050496205409e-08, + "loss": 1.3379, + "step": 33131 + }, + { + "epoch": 0.9728110869692876, + "grad_norm": 0.0, + "learning_rate": 3.8746827553501896e-08, + "loss": 1.2656, + "step": 33132 + }, + { + "epoch": 0.9728404486464267, + "grad_norm": 0.0, + "learning_rate": 3.866324022711254e-08, + "loss": 1.1187, + "step": 33133 + }, + { + "epoch": 0.9728698103235657, + "grad_norm": 0.0, + "learning_rate": 3.8579742983642086e-08, + "loss": 1.1147, + "step": 33134 + }, + { + "epoch": 0.9728991720007046, + "grad_norm": 0.0, + "learning_rate": 3.84963358238466e-08, + "loss": 1.1084, + "step": 33135 + }, + { + "epoch": 0.9729285336778437, + "grad_norm": 0.0, + "learning_rate": 3.841301874848102e-08, + "loss": 1.2461, + "step": 33136 + }, + { + "epoch": 0.9729578953549827, + "grad_norm": 0.0, + "learning_rate": 3.8329791758296984e-08, + "loss": 1.2295, + "step": 33137 + }, + { + "epoch": 0.9729872570321216, + "grad_norm": 0.0, + "learning_rate": 3.8246654854047214e-08, + "loss": 1.1387, + "step": 33138 + }, + { + "epoch": 0.9730166187092607, + "grad_norm": 0.0, + "learning_rate": 3.8163608036484446e-08, + "loss": 1.2803, + "step": 33139 + }, + { + "epoch": 0.9730459803863997, + "grad_norm": 0.0, + "learning_rate": 3.8080651306359184e-08, + "loss": 1.1841, + "step": 33140 + }, + { + "epoch": 0.9730753420635386, + "grad_norm": 0.0, + "learning_rate": 3.799778466442305e-08, + "loss": 1.2275, + "step": 33141 + }, + { + "epoch": 0.9731047037406777, + "grad_norm": 0.0, + "learning_rate": 3.7915008111423235e-08, + "loss": 1.168, + "step": 33142 + }, + { + "epoch": 0.9731340654178167, + "grad_norm": 0.0, + "learning_rate": 3.783232164810913e-08, + "loss": 1.1729, + "step": 33143 + }, + { + "epoch": 0.9731634270949556, + "grad_norm": 0.0, + "learning_rate": 3.774972527522902e-08, + "loss": 1.2891, + "step": 33144 + }, + { + "epoch": 0.9731927887720947, + "grad_norm": 0.0, + "learning_rate": 3.7667218993530094e-08, + "loss": 1.1904, + "step": 33145 + }, + { + "epoch": 0.9732221504492337, + "grad_norm": 0.0, + "learning_rate": 3.758480280375731e-08, + "loss": 1.2993, + "step": 33146 + }, + { + "epoch": 0.9732515121263726, + "grad_norm": 0.0, + "learning_rate": 3.7502476706656745e-08, + "loss": 1.1675, + "step": 33147 + }, + { + "epoch": 0.9732808738035117, + "grad_norm": 0.0, + "learning_rate": 3.7420240702972235e-08, + "loss": 1.1377, + "step": 33148 + }, + { + "epoch": 0.9733102354806507, + "grad_norm": 0.0, + "learning_rate": 3.733809479344874e-08, + "loss": 1.293, + "step": 33149 + }, + { + "epoch": 0.9733395971577896, + "grad_norm": 0.0, + "learning_rate": 3.7256038978827904e-08, + "loss": 1.1016, + "step": 33150 + }, + { + "epoch": 0.9733689588349287, + "grad_norm": 0.0, + "learning_rate": 3.717407325985245e-08, + "loss": 1.2183, + "step": 33151 + }, + { + "epoch": 0.9733983205120676, + "grad_norm": 0.0, + "learning_rate": 3.709219763726291e-08, + "loss": 1.1748, + "step": 33152 + }, + { + "epoch": 0.9734276821892066, + "grad_norm": 0.0, + "learning_rate": 3.7010412111800895e-08, + "loss": 1.0923, + "step": 33153 + }, + { + "epoch": 0.9734570438663457, + "grad_norm": 0.0, + "learning_rate": 3.692871668420472e-08, + "loss": 1.2354, + "step": 33154 + }, + { + "epoch": 0.9734864055434846, + "grad_norm": 0.0, + "learning_rate": 3.684711135521379e-08, + "loss": 1.2637, + "step": 33155 + }, + { + "epoch": 0.9735157672206236, + "grad_norm": 0.0, + "learning_rate": 3.6765596125566404e-08, + "loss": 1.2666, + "step": 33156 + }, + { + "epoch": 0.9735451288977627, + "grad_norm": 0.0, + "learning_rate": 3.6684170995998634e-08, + "loss": 1.1665, + "step": 33157 + }, + { + "epoch": 0.9735744905749016, + "grad_norm": 0.0, + "learning_rate": 3.660283596724767e-08, + "loss": 1.1958, + "step": 33158 + }, + { + "epoch": 0.9736038522520406, + "grad_norm": 0.0, + "learning_rate": 3.65215910400496e-08, + "loss": 1.2485, + "step": 33159 + }, + { + "epoch": 0.9736332139291797, + "grad_norm": 0.0, + "learning_rate": 3.644043621513826e-08, + "loss": 1.2119, + "step": 33160 + }, + { + "epoch": 0.9736625756063186, + "grad_norm": 0.0, + "learning_rate": 3.635937149324753e-08, + "loss": 1.1465, + "step": 33161 + }, + { + "epoch": 0.9736919372834576, + "grad_norm": 0.0, + "learning_rate": 3.627839687511015e-08, + "loss": 1.2017, + "step": 33162 + }, + { + "epoch": 0.9737212989605967, + "grad_norm": 0.0, + "learning_rate": 3.619751236145996e-08, + "loss": 1.2441, + "step": 33163 + }, + { + "epoch": 0.9737506606377356, + "grad_norm": 0.0, + "learning_rate": 3.611671795302751e-08, + "loss": 1.1943, + "step": 33164 + }, + { + "epoch": 0.9737800223148746, + "grad_norm": 0.0, + "learning_rate": 3.6036013650542215e-08, + "loss": 1.0278, + "step": 33165 + }, + { + "epoch": 0.9738093839920137, + "grad_norm": 0.0, + "learning_rate": 3.59553994547368e-08, + "loss": 1.1187, + "step": 33166 + }, + { + "epoch": 0.9738387456691526, + "grad_norm": 0.0, + "learning_rate": 3.587487536633627e-08, + "loss": 1.1338, + "step": 33167 + }, + { + "epoch": 0.9738681073462916, + "grad_norm": 0.0, + "learning_rate": 3.579444138607335e-08, + "loss": 1.2705, + "step": 33168 + }, + { + "epoch": 0.9738974690234307, + "grad_norm": 0.0, + "learning_rate": 3.571409751467192e-08, + "loss": 1.2769, + "step": 33169 + }, + { + "epoch": 0.9739268307005696, + "grad_norm": 0.0, + "learning_rate": 3.5633843752860274e-08, + "loss": 1.2939, + "step": 33170 + }, + { + "epoch": 0.9739561923777086, + "grad_norm": 0.0, + "learning_rate": 3.55536801013634e-08, + "loss": 1.1807, + "step": 33171 + }, + { + "epoch": 0.9739855540548477, + "grad_norm": 0.0, + "learning_rate": 3.547360656090626e-08, + "loss": 1.187, + "step": 33172 + }, + { + "epoch": 0.9740149157319866, + "grad_norm": 0.0, + "learning_rate": 3.539362313221384e-08, + "loss": 1.2402, + "step": 33173 + }, + { + "epoch": 0.9740442774091256, + "grad_norm": 0.0, + "learning_rate": 3.531372981600778e-08, + "loss": 1.3223, + "step": 33174 + }, + { + "epoch": 0.9740736390862647, + "grad_norm": 0.0, + "learning_rate": 3.5233926613013056e-08, + "loss": 1.2041, + "step": 33175 + }, + { + "epoch": 0.9741030007634036, + "grad_norm": 0.0, + "learning_rate": 3.51542135239491e-08, + "loss": 1.1484, + "step": 33176 + }, + { + "epoch": 0.9741323624405426, + "grad_norm": 0.0, + "learning_rate": 3.5074590549538654e-08, + "loss": 1.1992, + "step": 33177 + }, + { + "epoch": 0.9741617241176816, + "grad_norm": 0.0, + "learning_rate": 3.4995057690500046e-08, + "loss": 1.228, + "step": 33178 + }, + { + "epoch": 0.9741910857948206, + "grad_norm": 0.0, + "learning_rate": 3.491561494755269e-08, + "loss": 1.2725, + "step": 33179 + }, + { + "epoch": 0.9742204474719596, + "grad_norm": 0.0, + "learning_rate": 3.4836262321416015e-08, + "loss": 1.2222, + "step": 33180 + }, + { + "epoch": 0.9742498091490986, + "grad_norm": 0.0, + "learning_rate": 3.4756999812807226e-08, + "loss": 1.2705, + "step": 33181 + }, + { + "epoch": 0.9742791708262376, + "grad_norm": 0.0, + "learning_rate": 3.467782742244241e-08, + "loss": 1.2666, + "step": 33182 + }, + { + "epoch": 0.9743085325033766, + "grad_norm": 0.0, + "learning_rate": 3.4598745151038785e-08, + "loss": 1.3086, + "step": 33183 + }, + { + "epoch": 0.9743378941805156, + "grad_norm": 0.0, + "learning_rate": 3.451975299931132e-08, + "loss": 1.1484, + "step": 33184 + }, + { + "epoch": 0.9743672558576546, + "grad_norm": 0.0, + "learning_rate": 3.4440850967972784e-08, + "loss": 1.1284, + "step": 33185 + }, + { + "epoch": 0.9743966175347936, + "grad_norm": 0.0, + "learning_rate": 3.4362039057739274e-08, + "loss": 1.1519, + "step": 33186 + }, + { + "epoch": 0.9744259792119326, + "grad_norm": 0.0, + "learning_rate": 3.4283317269320215e-08, + "loss": 1.0068, + "step": 33187 + }, + { + "epoch": 0.9744553408890716, + "grad_norm": 0.0, + "learning_rate": 3.4204685603430596e-08, + "loss": 1.2021, + "step": 33188 + }, + { + "epoch": 0.9744847025662106, + "grad_norm": 0.0, + "learning_rate": 3.412614406077985e-08, + "loss": 1.1348, + "step": 33189 + }, + { + "epoch": 0.9745140642433496, + "grad_norm": 0.0, + "learning_rate": 3.404769264207963e-08, + "loss": 1.1431, + "step": 33190 + }, + { + "epoch": 0.9745434259204886, + "grad_norm": 0.0, + "learning_rate": 3.396933134803715e-08, + "loss": 0.9702, + "step": 33191 + }, + { + "epoch": 0.9745727875976276, + "grad_norm": 0.0, + "learning_rate": 3.389106017936295e-08, + "loss": 1.041, + "step": 33192 + }, + { + "epoch": 0.9746021492747666, + "grad_norm": 0.0, + "learning_rate": 3.3812879136764234e-08, + "loss": 1.292, + "step": 33193 + }, + { + "epoch": 0.9746315109519056, + "grad_norm": 0.0, + "learning_rate": 3.373478822094822e-08, + "loss": 1.2373, + "step": 33194 + }, + { + "epoch": 0.9746608726290446, + "grad_norm": 0.0, + "learning_rate": 3.3656787432621016e-08, + "loss": 1.1191, + "step": 33195 + }, + { + "epoch": 0.9746902343061836, + "grad_norm": 0.0, + "learning_rate": 3.3578876772487615e-08, + "loss": 1.2979, + "step": 33196 + }, + { + "epoch": 0.9747195959833226, + "grad_norm": 0.0, + "learning_rate": 3.350105624125299e-08, + "loss": 1.252, + "step": 33197 + }, + { + "epoch": 0.9747489576604615, + "grad_norm": 0.0, + "learning_rate": 3.3423325839619935e-08, + "loss": 1.229, + "step": 33198 + }, + { + "epoch": 0.9747783193376006, + "grad_norm": 0.0, + "learning_rate": 3.334568556829343e-08, + "loss": 1.3281, + "step": 33199 + }, + { + "epoch": 0.9748076810147396, + "grad_norm": 0.0, + "learning_rate": 3.3268135427972916e-08, + "loss": 1.2129, + "step": 33200 + }, + { + "epoch": 0.9748370426918785, + "grad_norm": 0.0, + "learning_rate": 3.3190675419362275e-08, + "loss": 1.2178, + "step": 33201 + }, + { + "epoch": 0.9748664043690175, + "grad_norm": 0.0, + "learning_rate": 3.3113305543160944e-08, + "loss": 1.3271, + "step": 33202 + }, + { + "epoch": 0.9748957660461566, + "grad_norm": 0.0, + "learning_rate": 3.3036025800068375e-08, + "loss": 1.1787, + "step": 33203 + }, + { + "epoch": 0.9749251277232955, + "grad_norm": 0.0, + "learning_rate": 3.295883619078399e-08, + "loss": 1.3613, + "step": 33204 + }, + { + "epoch": 0.9749544894004345, + "grad_norm": 0.0, + "learning_rate": 3.288173671600503e-08, + "loss": 1.2471, + "step": 33205 + }, + { + "epoch": 0.9749838510775736, + "grad_norm": 0.0, + "learning_rate": 3.2804727376428705e-08, + "loss": 1.2065, + "step": 33206 + }, + { + "epoch": 0.9750132127547125, + "grad_norm": 0.0, + "learning_rate": 3.272780817275223e-08, + "loss": 1.2012, + "step": 33207 + }, + { + "epoch": 0.9750425744318515, + "grad_norm": 0.0, + "learning_rate": 3.2650979105671723e-08, + "loss": 1.2432, + "step": 33208 + }, + { + "epoch": 0.9750719361089906, + "grad_norm": 0.0, + "learning_rate": 3.2574240175879954e-08, + "loss": 1.3291, + "step": 33209 + }, + { + "epoch": 0.9751012977861295, + "grad_norm": 0.0, + "learning_rate": 3.249759138407194e-08, + "loss": 1.1133, + "step": 33210 + }, + { + "epoch": 0.9751306594632685, + "grad_norm": 0.0, + "learning_rate": 3.242103273094155e-08, + "loss": 1.2642, + "step": 33211 + }, + { + "epoch": 0.9751600211404076, + "grad_norm": 0.0, + "learning_rate": 3.234456421718046e-08, + "loss": 1.2539, + "step": 33212 + }, + { + "epoch": 0.9751893828175465, + "grad_norm": 0.0, + "learning_rate": 3.226818584348035e-08, + "loss": 1.1816, + "step": 33213 + }, + { + "epoch": 0.9752187444946855, + "grad_norm": 0.0, + "learning_rate": 3.2191897610531765e-08, + "loss": 1.1895, + "step": 33214 + }, + { + "epoch": 0.9752481061718246, + "grad_norm": 0.0, + "learning_rate": 3.211569951902416e-08, + "loss": 1.2188, + "step": 33215 + }, + { + "epoch": 0.9752774678489635, + "grad_norm": 0.0, + "learning_rate": 3.2039591569646975e-08, + "loss": 1.1392, + "step": 33216 + }, + { + "epoch": 0.9753068295261025, + "grad_norm": 0.0, + "learning_rate": 3.196357376308967e-08, + "loss": 1.2339, + "step": 33217 + }, + { + "epoch": 0.9753361912032416, + "grad_norm": 0.0, + "learning_rate": 3.188764610003725e-08, + "loss": 1.2646, + "step": 33218 + }, + { + "epoch": 0.9753655528803805, + "grad_norm": 0.0, + "learning_rate": 3.181180858117916e-08, + "loss": 1.252, + "step": 33219 + }, + { + "epoch": 0.9753949145575195, + "grad_norm": 0.0, + "learning_rate": 3.1736061207198185e-08, + "loss": 1.1543, + "step": 33220 + }, + { + "epoch": 0.9754242762346585, + "grad_norm": 0.0, + "learning_rate": 3.166040397878156e-08, + "loss": 1.2061, + "step": 33221 + }, + { + "epoch": 0.9754536379117975, + "grad_norm": 0.0, + "learning_rate": 3.158483689661318e-08, + "loss": 1.2207, + "step": 33222 + }, + { + "epoch": 0.9754829995889365, + "grad_norm": 0.0, + "learning_rate": 3.150935996137583e-08, + "loss": 1.0645, + "step": 33223 + }, + { + "epoch": 0.9755123612660755, + "grad_norm": 0.0, + "learning_rate": 3.1433973173752295e-08, + "loss": 1.2451, + "step": 33224 + }, + { + "epoch": 0.9755417229432145, + "grad_norm": 0.0, + "learning_rate": 3.135867653442426e-08, + "loss": 1.2402, + "step": 33225 + }, + { + "epoch": 0.9755710846203535, + "grad_norm": 0.0, + "learning_rate": 3.1283470044073395e-08, + "loss": 1.2002, + "step": 33226 + }, + { + "epoch": 0.9756004462974925, + "grad_norm": 0.0, + "learning_rate": 3.120835370337916e-08, + "loss": 1.2285, + "step": 33227 + }, + { + "epoch": 0.9756298079746315, + "grad_norm": 0.0, + "learning_rate": 3.11333275130199e-08, + "loss": 1.2632, + "step": 33228 + }, + { + "epoch": 0.9756591696517705, + "grad_norm": 0.0, + "learning_rate": 3.1058391473675066e-08, + "loss": 1.1006, + "step": 33229 + }, + { + "epoch": 0.9756885313289095, + "grad_norm": 0.0, + "learning_rate": 3.098354558602301e-08, + "loss": 1.2236, + "step": 33230 + }, + { + "epoch": 0.9757178930060485, + "grad_norm": 0.0, + "learning_rate": 3.0908789850738754e-08, + "loss": 1.1719, + "step": 33231 + }, + { + "epoch": 0.9757472546831875, + "grad_norm": 0.0, + "learning_rate": 3.083412426850063e-08, + "loss": 1.1929, + "step": 33232 + }, + { + "epoch": 0.9757766163603265, + "grad_norm": 0.0, + "learning_rate": 3.0759548839982556e-08, + "loss": 1.1191, + "step": 33233 + }, + { + "epoch": 0.9758059780374655, + "grad_norm": 0.0, + "learning_rate": 3.068506356585843e-08, + "loss": 1.208, + "step": 33234 + }, + { + "epoch": 0.9758353397146045, + "grad_norm": 0.0, + "learning_rate": 3.061066844680216e-08, + "loss": 1.3149, + "step": 33235 + }, + { + "epoch": 0.9758647013917435, + "grad_norm": 0.0, + "learning_rate": 3.053636348348765e-08, + "loss": 1.1519, + "step": 33236 + }, + { + "epoch": 0.9758940630688825, + "grad_norm": 0.0, + "learning_rate": 3.046214867658659e-08, + "loss": 1.2236, + "step": 33237 + }, + { + "epoch": 0.9759234247460215, + "grad_norm": 0.0, + "learning_rate": 3.038802402676844e-08, + "loss": 1.1865, + "step": 33238 + }, + { + "epoch": 0.9759527864231605, + "grad_norm": 0.0, + "learning_rate": 3.0313989534704876e-08, + "loss": 1.2793, + "step": 33239 + }, + { + "epoch": 0.9759821481002995, + "grad_norm": 0.0, + "learning_rate": 3.024004520106427e-08, + "loss": 1.2705, + "step": 33240 + }, + { + "epoch": 0.9760115097774384, + "grad_norm": 0.0, + "learning_rate": 3.016619102651719e-08, + "loss": 1.0986, + "step": 33241 + }, + { + "epoch": 0.9760408714545775, + "grad_norm": 0.0, + "learning_rate": 3.009242701173087e-08, + "loss": 1.23, + "step": 33242 + }, + { + "epoch": 0.9760702331317165, + "grad_norm": 0.0, + "learning_rate": 3.001875315737146e-08, + "loss": 1.2031, + "step": 33243 + }, + { + "epoch": 0.9760995948088554, + "grad_norm": 0.0, + "learning_rate": 2.994516946410619e-08, + "loss": 1.2676, + "step": 33244 + }, + { + "epoch": 0.9761289564859945, + "grad_norm": 0.0, + "learning_rate": 2.98716759326001e-08, + "loss": 1.1592, + "step": 33245 + }, + { + "epoch": 0.9761583181631335, + "grad_norm": 0.0, + "learning_rate": 2.9798272563518192e-08, + "loss": 1.2217, + "step": 33246 + }, + { + "epoch": 0.9761876798402724, + "grad_norm": 0.0, + "learning_rate": 2.9724959357523286e-08, + "loss": 1.3164, + "step": 33247 + }, + { + "epoch": 0.9762170415174115, + "grad_norm": 0.0, + "learning_rate": 2.965173631527929e-08, + "loss": 1.1982, + "step": 33248 + }, + { + "epoch": 0.9762464031945505, + "grad_norm": 0.0, + "learning_rate": 2.9578603437447895e-08, + "loss": 1.1001, + "step": 33249 + }, + { + "epoch": 0.9762757648716894, + "grad_norm": 0.0, + "learning_rate": 2.9505560724690796e-08, + "loss": 1.2559, + "step": 33250 + }, + { + "epoch": 0.9763051265488285, + "grad_norm": 0.0, + "learning_rate": 2.9432608177668576e-08, + "loss": 1.3271, + "step": 33251 + }, + { + "epoch": 0.9763344882259675, + "grad_norm": 0.0, + "learning_rate": 2.9359745797039595e-08, + "loss": 1.1582, + "step": 33252 + }, + { + "epoch": 0.9763638499031064, + "grad_norm": 0.0, + "learning_rate": 2.928697358346555e-08, + "loss": 1.2529, + "step": 33253 + }, + { + "epoch": 0.9763932115802455, + "grad_norm": 0.0, + "learning_rate": 2.9214291537601468e-08, + "loss": 1.2314, + "step": 33254 + }, + { + "epoch": 0.9764225732573845, + "grad_norm": 0.0, + "learning_rate": 2.9141699660107937e-08, + "loss": 1.2666, + "step": 33255 + }, + { + "epoch": 0.9764519349345234, + "grad_norm": 0.0, + "learning_rate": 2.906919795163776e-08, + "loss": 1.1294, + "step": 33256 + }, + { + "epoch": 0.9764812966116625, + "grad_norm": 0.0, + "learning_rate": 2.899678641284931e-08, + "loss": 1.2026, + "step": 33257 + }, + { + "epoch": 0.9765106582888015, + "grad_norm": 0.0, + "learning_rate": 2.8924465044396498e-08, + "loss": 1.2852, + "step": 33258 + }, + { + "epoch": 0.9765400199659404, + "grad_norm": 0.0, + "learning_rate": 2.8852233846933253e-08, + "loss": 1.1904, + "step": 33259 + }, + { + "epoch": 0.9765693816430795, + "grad_norm": 0.0, + "learning_rate": 2.878009282111349e-08, + "loss": 1.1519, + "step": 33260 + }, + { + "epoch": 0.9765987433202185, + "grad_norm": 0.0, + "learning_rate": 2.8708041967587806e-08, + "loss": 1.21, + "step": 33261 + }, + { + "epoch": 0.9766281049973574, + "grad_norm": 0.0, + "learning_rate": 2.863608128701012e-08, + "loss": 1.272, + "step": 33262 + }, + { + "epoch": 0.9766574666744965, + "grad_norm": 0.0, + "learning_rate": 2.856421078002991e-08, + "loss": 1.1694, + "step": 33263 + }, + { + "epoch": 0.9766868283516354, + "grad_norm": 0.0, + "learning_rate": 2.8492430447296658e-08, + "loss": 1.2114, + "step": 33264 + }, + { + "epoch": 0.9767161900287744, + "grad_norm": 0.0, + "learning_rate": 2.8420740289459848e-08, + "loss": 1.2432, + "step": 33265 + }, + { + "epoch": 0.9767455517059135, + "grad_norm": 0.0, + "learning_rate": 2.8349140307168954e-08, + "loss": 1.082, + "step": 33266 + }, + { + "epoch": 0.9767749133830524, + "grad_norm": 0.0, + "learning_rate": 2.827763050107013e-08, + "loss": 1.2383, + "step": 33267 + }, + { + "epoch": 0.9768042750601914, + "grad_norm": 0.0, + "learning_rate": 2.8206210871810635e-08, + "loss": 1.2842, + "step": 33268 + }, + { + "epoch": 0.9768336367373305, + "grad_norm": 0.0, + "learning_rate": 2.813488142003662e-08, + "loss": 1.3213, + "step": 33269 + }, + { + "epoch": 0.9768629984144694, + "grad_norm": 0.0, + "learning_rate": 2.806364214639201e-08, + "loss": 1.1597, + "step": 33270 + }, + { + "epoch": 0.9768923600916084, + "grad_norm": 0.0, + "learning_rate": 2.7992493051521853e-08, + "loss": 1.3242, + "step": 33271 + }, + { + "epoch": 0.9769217217687475, + "grad_norm": 0.0, + "learning_rate": 2.7921434136070068e-08, + "loss": 1.1655, + "step": 33272 + }, + { + "epoch": 0.9769510834458864, + "grad_norm": 0.0, + "learning_rate": 2.7850465400677263e-08, + "loss": 1.1475, + "step": 33273 + }, + { + "epoch": 0.9769804451230254, + "grad_norm": 0.0, + "learning_rate": 2.777958684598736e-08, + "loss": 1.2119, + "step": 33274 + }, + { + "epoch": 0.9770098068001645, + "grad_norm": 0.0, + "learning_rate": 2.7708798472640962e-08, + "loss": 1.189, + "step": 33275 + }, + { + "epoch": 0.9770391684773034, + "grad_norm": 0.0, + "learning_rate": 2.7638100281277557e-08, + "loss": 1.082, + "step": 33276 + }, + { + "epoch": 0.9770685301544424, + "grad_norm": 0.0, + "learning_rate": 2.7567492272536632e-08, + "loss": 1.2793, + "step": 33277 + }, + { + "epoch": 0.9770978918315815, + "grad_norm": 0.0, + "learning_rate": 2.749697444705768e-08, + "loss": 1.188, + "step": 33278 + }, + { + "epoch": 0.9771272535087204, + "grad_norm": 0.0, + "learning_rate": 2.742654680547685e-08, + "loss": 1.1582, + "step": 33279 + }, + { + "epoch": 0.9771566151858594, + "grad_norm": 0.0, + "learning_rate": 2.7356209348431417e-08, + "loss": 1.1367, + "step": 33280 + }, + { + "epoch": 0.9771859768629985, + "grad_norm": 0.0, + "learning_rate": 2.7285962076558648e-08, + "loss": 1.1548, + "step": 33281 + }, + { + "epoch": 0.9772153385401374, + "grad_norm": 0.0, + "learning_rate": 2.721580499049248e-08, + "loss": 1.3096, + "step": 33282 + }, + { + "epoch": 0.9772447002172764, + "grad_norm": 0.0, + "learning_rate": 2.7145738090867957e-08, + "loss": 1.4033, + "step": 33283 + }, + { + "epoch": 0.9772740618944155, + "grad_norm": 0.0, + "learning_rate": 2.707576137831791e-08, + "loss": 1.2466, + "step": 33284 + }, + { + "epoch": 0.9773034235715544, + "grad_norm": 0.0, + "learning_rate": 2.700587485347739e-08, + "loss": 1.3223, + "step": 33285 + }, + { + "epoch": 0.9773327852486934, + "grad_norm": 0.0, + "learning_rate": 2.6936078516975884e-08, + "loss": 1.1177, + "step": 33286 + }, + { + "epoch": 0.9773621469258325, + "grad_norm": 0.0, + "learning_rate": 2.6866372369445116e-08, + "loss": 1.1494, + "step": 33287 + }, + { + "epoch": 0.9773915086029714, + "grad_norm": 0.0, + "learning_rate": 2.67967564115168e-08, + "loss": 1.168, + "step": 33288 + }, + { + "epoch": 0.9774208702801104, + "grad_norm": 0.0, + "learning_rate": 2.6727230643819323e-08, + "loss": 1.2939, + "step": 33289 + }, + { + "epoch": 0.9774502319572494, + "grad_norm": 0.0, + "learning_rate": 2.6657795066981073e-08, + "loss": 1.3213, + "step": 33290 + }, + { + "epoch": 0.9774795936343884, + "grad_norm": 0.0, + "learning_rate": 2.6588449681631544e-08, + "loss": 1.1582, + "step": 33291 + }, + { + "epoch": 0.9775089553115274, + "grad_norm": 0.0, + "learning_rate": 2.6519194488396904e-08, + "loss": 1.1816, + "step": 33292 + }, + { + "epoch": 0.9775383169886664, + "grad_norm": 0.0, + "learning_rate": 2.6450029487902205e-08, + "loss": 1.25, + "step": 33293 + }, + { + "epoch": 0.9775676786658054, + "grad_norm": 0.0, + "learning_rate": 2.6380954680774728e-08, + "loss": 1.208, + "step": 33294 + }, + { + "epoch": 0.9775970403429444, + "grad_norm": 0.0, + "learning_rate": 2.6311970067639526e-08, + "loss": 1.3149, + "step": 33295 + }, + { + "epoch": 0.9776264020200834, + "grad_norm": 0.0, + "learning_rate": 2.6243075649118322e-08, + "loss": 1.1216, + "step": 33296 + }, + { + "epoch": 0.9776557636972224, + "grad_norm": 0.0, + "learning_rate": 2.6174271425836173e-08, + "loss": 1.2158, + "step": 33297 + }, + { + "epoch": 0.9776851253743614, + "grad_norm": 0.0, + "learning_rate": 2.6105557398413696e-08, + "loss": 1.2666, + "step": 33298 + }, + { + "epoch": 0.9777144870515004, + "grad_norm": 0.0, + "learning_rate": 2.6036933567473722e-08, + "loss": 1.144, + "step": 33299 + }, + { + "epoch": 0.9777438487286394, + "grad_norm": 0.0, + "learning_rate": 2.5968399933635756e-08, + "loss": 1.3838, + "step": 33300 + }, + { + "epoch": 0.9777732104057784, + "grad_norm": 0.0, + "learning_rate": 2.5899956497519307e-08, + "loss": 1.1523, + "step": 33301 + }, + { + "epoch": 0.9778025720829174, + "grad_norm": 0.0, + "learning_rate": 2.5831603259744988e-08, + "loss": 1.2373, + "step": 33302 + }, + { + "epoch": 0.9778319337600564, + "grad_norm": 0.0, + "learning_rate": 2.5763340220928966e-08, + "loss": 1.2109, + "step": 33303 + }, + { + "epoch": 0.9778612954371954, + "grad_norm": 0.0, + "learning_rate": 2.5695167381690754e-08, + "loss": 1.2793, + "step": 33304 + }, + { + "epoch": 0.9778906571143343, + "grad_norm": 0.0, + "learning_rate": 2.5627084742644305e-08, + "loss": 1.1401, + "step": 33305 + }, + { + "epoch": 0.9779200187914734, + "grad_norm": 0.0, + "learning_rate": 2.5559092304406896e-08, + "loss": 1.1855, + "step": 33306 + }, + { + "epoch": 0.9779493804686124, + "grad_norm": 0.0, + "learning_rate": 2.5491190067593597e-08, + "loss": 1.1362, + "step": 33307 + }, + { + "epoch": 0.9779787421457513, + "grad_norm": 0.0, + "learning_rate": 2.5423378032817246e-08, + "loss": 1.0928, + "step": 33308 + }, + { + "epoch": 0.9780081038228904, + "grad_norm": 0.0, + "learning_rate": 2.5355656200692914e-08, + "loss": 1.2324, + "step": 33309 + }, + { + "epoch": 0.9780374655000293, + "grad_norm": 0.0, + "learning_rate": 2.5288024571831217e-08, + "loss": 1.231, + "step": 33310 + }, + { + "epoch": 0.9780668271771683, + "grad_norm": 0.0, + "learning_rate": 2.5220483146845e-08, + "loss": 1.2056, + "step": 33311 + }, + { + "epoch": 0.9780961888543074, + "grad_norm": 0.0, + "learning_rate": 2.5153031926343772e-08, + "loss": 1.2495, + "step": 33312 + }, + { + "epoch": 0.9781255505314463, + "grad_norm": 0.0, + "learning_rate": 2.5085670910938163e-08, + "loss": 1.249, + "step": 33313 + }, + { + "epoch": 0.9781549122085853, + "grad_norm": 0.0, + "learning_rate": 2.5018400101237683e-08, + "loss": 1.1558, + "step": 33314 + }, + { + "epoch": 0.9781842738857244, + "grad_norm": 0.0, + "learning_rate": 2.4951219497850732e-08, + "loss": 1.2197, + "step": 33315 + }, + { + "epoch": 0.9782136355628633, + "grad_norm": 0.0, + "learning_rate": 2.48841291013846e-08, + "loss": 1.1367, + "step": 33316 + }, + { + "epoch": 0.9782429972400023, + "grad_norm": 0.0, + "learning_rate": 2.4817128912446586e-08, + "loss": 1.0972, + "step": 33317 + }, + { + "epoch": 0.9782723589171414, + "grad_norm": 0.0, + "learning_rate": 2.4750218931640646e-08, + "loss": 1.2334, + "step": 33318 + }, + { + "epoch": 0.9783017205942803, + "grad_norm": 0.0, + "learning_rate": 2.4683399159574074e-08, + "loss": 1.2578, + "step": 33319 + }, + { + "epoch": 0.9783310822714193, + "grad_norm": 0.0, + "learning_rate": 2.4616669596849718e-08, + "loss": 1.2832, + "step": 33320 + }, + { + "epoch": 0.9783604439485584, + "grad_norm": 0.0, + "learning_rate": 2.4550030244071542e-08, + "loss": 1.2744, + "step": 33321 + }, + { + "epoch": 0.9783898056256973, + "grad_norm": 0.0, + "learning_rate": 2.44834811018424e-08, + "loss": 1.3091, + "step": 33322 + }, + { + "epoch": 0.9784191673028363, + "grad_norm": 0.0, + "learning_rate": 2.4417022170764026e-08, + "loss": 1.0913, + "step": 33323 + }, + { + "epoch": 0.9784485289799754, + "grad_norm": 0.0, + "learning_rate": 2.4350653451437058e-08, + "loss": 1.1978, + "step": 33324 + }, + { + "epoch": 0.9784778906571143, + "grad_norm": 0.0, + "learning_rate": 2.428437494446212e-08, + "loss": 1.2354, + "step": 33325 + }, + { + "epoch": 0.9785072523342533, + "grad_norm": 0.0, + "learning_rate": 2.4218186650437625e-08, + "loss": 1.1416, + "step": 33326 + }, + { + "epoch": 0.9785366140113924, + "grad_norm": 0.0, + "learning_rate": 2.4152088569964204e-08, + "loss": 1.2627, + "step": 33327 + }, + { + "epoch": 0.9785659756885313, + "grad_norm": 0.0, + "learning_rate": 2.4086080703636937e-08, + "loss": 1.1978, + "step": 33328 + }, + { + "epoch": 0.9785953373656703, + "grad_norm": 0.0, + "learning_rate": 2.4020163052055345e-08, + "loss": 1.2578, + "step": 33329 + }, + { + "epoch": 0.9786246990428094, + "grad_norm": 0.0, + "learning_rate": 2.3954335615813395e-08, + "loss": 1.1562, + "step": 33330 + }, + { + "epoch": 0.9786540607199483, + "grad_norm": 0.0, + "learning_rate": 2.388859839550839e-08, + "loss": 1.063, + "step": 33331 + }, + { + "epoch": 0.9786834223970873, + "grad_norm": 0.0, + "learning_rate": 2.3822951391732074e-08, + "loss": 1.251, + "step": 33332 + }, + { + "epoch": 0.9787127840742263, + "grad_norm": 0.0, + "learning_rate": 2.3757394605081753e-08, + "loss": 1.3086, + "step": 33333 + }, + { + "epoch": 0.9787421457513653, + "grad_norm": 0.0, + "learning_rate": 2.369192803614695e-08, + "loss": 1.3027, + "step": 33334 + }, + { + "epoch": 0.9787715074285043, + "grad_norm": 0.0, + "learning_rate": 2.3626551685521637e-08, + "loss": 1.2368, + "step": 33335 + }, + { + "epoch": 0.9788008691056433, + "grad_norm": 0.0, + "learning_rate": 2.3561265553795342e-08, + "loss": 1.1392, + "step": 33336 + }, + { + "epoch": 0.9788302307827823, + "grad_norm": 0.0, + "learning_rate": 2.3496069641560925e-08, + "loss": 1.1709, + "step": 33337 + }, + { + "epoch": 0.9788595924599213, + "grad_norm": 0.0, + "learning_rate": 2.3430963949405695e-08, + "loss": 1.165, + "step": 33338 + }, + { + "epoch": 0.9788889541370603, + "grad_norm": 0.0, + "learning_rate": 2.3365948477919175e-08, + "loss": 1.2998, + "step": 33339 + }, + { + "epoch": 0.9789183158141993, + "grad_norm": 0.0, + "learning_rate": 2.330102322768979e-08, + "loss": 1.2812, + "step": 33340 + }, + { + "epoch": 0.9789476774913383, + "grad_norm": 0.0, + "learning_rate": 2.3236188199304845e-08, + "loss": 1.2529, + "step": 33341 + }, + { + "epoch": 0.9789770391684773, + "grad_norm": 0.0, + "learning_rate": 2.3171443393349423e-08, + "loss": 1.1553, + "step": 33342 + }, + { + "epoch": 0.9790064008456163, + "grad_norm": 0.0, + "learning_rate": 2.310678881040973e-08, + "loss": 1.2129, + "step": 33343 + }, + { + "epoch": 0.9790357625227553, + "grad_norm": 0.0, + "learning_rate": 2.304222445107085e-08, + "loss": 1.2578, + "step": 33344 + }, + { + "epoch": 0.9790651241998943, + "grad_norm": 0.0, + "learning_rate": 2.2977750315915647e-08, + "loss": 1.208, + "step": 33345 + }, + { + "epoch": 0.9790944858770333, + "grad_norm": 0.0, + "learning_rate": 2.2913366405528103e-08, + "loss": 1.1963, + "step": 33346 + }, + { + "epoch": 0.9791238475541723, + "grad_norm": 0.0, + "learning_rate": 2.2849072720489972e-08, + "loss": 1.3535, + "step": 33347 + }, + { + "epoch": 0.9791532092313113, + "grad_norm": 0.0, + "learning_rate": 2.2784869261383015e-08, + "loss": 1.084, + "step": 33348 + }, + { + "epoch": 0.9791825709084503, + "grad_norm": 0.0, + "learning_rate": 2.272075602878787e-08, + "loss": 1.2686, + "step": 33349 + }, + { + "epoch": 0.9792119325855893, + "grad_norm": 0.0, + "learning_rate": 2.2656733023284082e-08, + "loss": 1.2578, + "step": 33350 + }, + { + "epoch": 0.9792412942627283, + "grad_norm": 0.0, + "learning_rate": 2.2592800245451184e-08, + "loss": 1.1353, + "step": 33351 + }, + { + "epoch": 0.9792706559398673, + "grad_norm": 0.0, + "learning_rate": 2.2528957695866493e-08, + "loss": 1.251, + "step": 33352 + }, + { + "epoch": 0.9793000176170062, + "grad_norm": 0.0, + "learning_rate": 2.2465205375107325e-08, + "loss": 1.2227, + "step": 33353 + }, + { + "epoch": 0.9793293792941453, + "grad_norm": 0.0, + "learning_rate": 2.2401543283750992e-08, + "loss": 1.2773, + "step": 33354 + }, + { + "epoch": 0.9793587409712843, + "grad_norm": 0.0, + "learning_rate": 2.2337971422372594e-08, + "loss": 1.1006, + "step": 33355 + }, + { + "epoch": 0.9793881026484232, + "grad_norm": 0.0, + "learning_rate": 2.2274489791547225e-08, + "loss": 1.2139, + "step": 33356 + }, + { + "epoch": 0.9794174643255623, + "grad_norm": 0.0, + "learning_rate": 2.2211098391848874e-08, + "loss": 1.1221, + "step": 33357 + }, + { + "epoch": 0.9794468260027013, + "grad_norm": 0.0, + "learning_rate": 2.214779722385152e-08, + "loss": 1.2197, + "step": 33358 + }, + { + "epoch": 0.9794761876798402, + "grad_norm": 0.0, + "learning_rate": 2.2084586288126932e-08, + "loss": 1.3311, + "step": 33359 + }, + { + "epoch": 0.9795055493569793, + "grad_norm": 0.0, + "learning_rate": 2.202146558524576e-08, + "loss": 1.2676, + "step": 33360 + }, + { + "epoch": 0.9795349110341183, + "grad_norm": 0.0, + "learning_rate": 2.1958435115780884e-08, + "loss": 1.1816, + "step": 33361 + }, + { + "epoch": 0.9795642727112572, + "grad_norm": 0.0, + "learning_rate": 2.189549488030074e-08, + "loss": 1.1318, + "step": 33362 + }, + { + "epoch": 0.9795936343883963, + "grad_norm": 0.0, + "learning_rate": 2.1832644879374866e-08, + "loss": 1.3604, + "step": 33363 + }, + { + "epoch": 0.9796229960655353, + "grad_norm": 0.0, + "learning_rate": 2.1769885113572807e-08, + "loss": 1.2041, + "step": 33364 + }, + { + "epoch": 0.9796523577426742, + "grad_norm": 0.0, + "learning_rate": 2.1707215583460785e-08, + "loss": 1.2197, + "step": 33365 + }, + { + "epoch": 0.9796817194198133, + "grad_norm": 0.0, + "learning_rate": 2.1644636289606113e-08, + "loss": 1.2148, + "step": 33366 + }, + { + "epoch": 0.9797110810969523, + "grad_norm": 0.0, + "learning_rate": 2.15821472325739e-08, + "loss": 1.2686, + "step": 33367 + }, + { + "epoch": 0.9797404427740912, + "grad_norm": 0.0, + "learning_rate": 2.1519748412929253e-08, + "loss": 1.1475, + "step": 33368 + }, + { + "epoch": 0.9797698044512303, + "grad_norm": 0.0, + "learning_rate": 2.145743983123838e-08, + "loss": 1.1914, + "step": 33369 + }, + { + "epoch": 0.9797991661283693, + "grad_norm": 0.0, + "learning_rate": 2.1395221488063056e-08, + "loss": 1.1689, + "step": 33370 + }, + { + "epoch": 0.9798285278055082, + "grad_norm": 0.0, + "learning_rate": 2.1333093383966162e-08, + "loss": 1.1914, + "step": 33371 + }, + { + "epoch": 0.9798578894826473, + "grad_norm": 0.0, + "learning_rate": 2.1271055519509477e-08, + "loss": 1.1685, + "step": 33372 + }, + { + "epoch": 0.9798872511597863, + "grad_norm": 0.0, + "learning_rate": 2.1209107895253655e-08, + "loss": 1.2051, + "step": 33373 + }, + { + "epoch": 0.9799166128369252, + "grad_norm": 0.0, + "learning_rate": 2.1147250511760476e-08, + "loss": 1.1992, + "step": 33374 + }, + { + "epoch": 0.9799459745140643, + "grad_norm": 0.0, + "learning_rate": 2.108548336958838e-08, + "loss": 1.2119, + "step": 33375 + }, + { + "epoch": 0.9799753361912033, + "grad_norm": 0.0, + "learning_rate": 2.1023806469295803e-08, + "loss": 1.1992, + "step": 33376 + }, + { + "epoch": 0.9800046978683422, + "grad_norm": 0.0, + "learning_rate": 2.0962219811440086e-08, + "loss": 1.3047, + "step": 33377 + }, + { + "epoch": 0.9800340595454813, + "grad_norm": 0.0, + "learning_rate": 2.090072339657856e-08, + "loss": 1.3154, + "step": 33378 + }, + { + "epoch": 0.9800634212226202, + "grad_norm": 0.0, + "learning_rate": 2.0839317225268552e-08, + "loss": 1.1543, + "step": 33379 + }, + { + "epoch": 0.9800927828997592, + "grad_norm": 0.0, + "learning_rate": 2.0778001298062956e-08, + "loss": 1.165, + "step": 33380 + }, + { + "epoch": 0.9801221445768983, + "grad_norm": 0.0, + "learning_rate": 2.0716775615519103e-08, + "loss": 1.23, + "step": 33381 + }, + { + "epoch": 0.9801515062540372, + "grad_norm": 0.0, + "learning_rate": 2.0655640178187665e-08, + "loss": 1.168, + "step": 33382 + }, + { + "epoch": 0.9801808679311762, + "grad_norm": 0.0, + "learning_rate": 2.0594594986623752e-08, + "loss": 1.1973, + "step": 33383 + }, + { + "epoch": 0.9802102296083153, + "grad_norm": 0.0, + "learning_rate": 2.0533640041379143e-08, + "loss": 1.2017, + "step": 33384 + }, + { + "epoch": 0.9802395912854542, + "grad_norm": 0.0, + "learning_rate": 2.0472775343004515e-08, + "loss": 1.1377, + "step": 33385 + }, + { + "epoch": 0.9802689529625932, + "grad_norm": 0.0, + "learning_rate": 2.041200089204942e-08, + "loss": 1.1719, + "step": 33386 + }, + { + "epoch": 0.9802983146397323, + "grad_norm": 0.0, + "learning_rate": 2.0351316689064538e-08, + "loss": 1.1646, + "step": 33387 + }, + { + "epoch": 0.9803276763168712, + "grad_norm": 0.0, + "learning_rate": 2.0290722734599422e-08, + "loss": 1.1436, + "step": 33388 + }, + { + "epoch": 0.9803570379940102, + "grad_norm": 0.0, + "learning_rate": 2.0230219029200303e-08, + "loss": 1.2227, + "step": 33389 + }, + { + "epoch": 0.9803863996711493, + "grad_norm": 0.0, + "learning_rate": 2.0169805573415635e-08, + "loss": 1.103, + "step": 33390 + }, + { + "epoch": 0.9804157613482882, + "grad_norm": 0.0, + "learning_rate": 2.0109482367791645e-08, + "loss": 1.2378, + "step": 33391 + }, + { + "epoch": 0.9804451230254272, + "grad_norm": 0.0, + "learning_rate": 2.004924941287345e-08, + "loss": 1.0718, + "step": 33392 + }, + { + "epoch": 0.9804744847025663, + "grad_norm": 0.0, + "learning_rate": 1.9989106709205065e-08, + "loss": 1.084, + "step": 33393 + }, + { + "epoch": 0.9805038463797052, + "grad_norm": 0.0, + "learning_rate": 1.9929054257331604e-08, + "loss": 1.2075, + "step": 33394 + }, + { + "epoch": 0.9805332080568442, + "grad_norm": 0.0, + "learning_rate": 1.9869092057794858e-08, + "loss": 1.2705, + "step": 33395 + }, + { + "epoch": 0.9805625697339833, + "grad_norm": 0.0, + "learning_rate": 1.980922011113884e-08, + "loss": 1.2588, + "step": 33396 + }, + { + "epoch": 0.9805919314111222, + "grad_norm": 0.0, + "learning_rate": 1.974943841790311e-08, + "loss": 1.208, + "step": 33397 + }, + { + "epoch": 0.9806212930882612, + "grad_norm": 0.0, + "learning_rate": 1.9689746978630575e-08, + "loss": 1.2021, + "step": 33398 + }, + { + "epoch": 0.9806506547654003, + "grad_norm": 0.0, + "learning_rate": 1.963014579385858e-08, + "loss": 1.2202, + "step": 33399 + }, + { + "epoch": 0.9806800164425392, + "grad_norm": 0.0, + "learning_rate": 1.9570634864127802e-08, + "loss": 1.2266, + "step": 33400 + }, + { + "epoch": 0.9807093781196782, + "grad_norm": 0.0, + "learning_rate": 1.9511214189974482e-08, + "loss": 1.0815, + "step": 33401 + }, + { + "epoch": 0.9807387397968172, + "grad_norm": 0.0, + "learning_rate": 1.9451883771938186e-08, + "loss": 1.2471, + "step": 33402 + }, + { + "epoch": 0.9807681014739562, + "grad_norm": 0.0, + "learning_rate": 1.939264361055515e-08, + "loss": 1.1685, + "step": 33403 + }, + { + "epoch": 0.9807974631510952, + "grad_norm": 0.0, + "learning_rate": 1.9333493706359396e-08, + "loss": 1.1768, + "step": 33404 + }, + { + "epoch": 0.9808268248282341, + "grad_norm": 0.0, + "learning_rate": 1.927443405988827e-08, + "loss": 1.1147, + "step": 33405 + }, + { + "epoch": 0.9808561865053732, + "grad_norm": 0.0, + "learning_rate": 1.921546467167357e-08, + "loss": 1.2983, + "step": 33406 + }, + { + "epoch": 0.9808855481825122, + "grad_norm": 0.0, + "learning_rate": 1.9156585542249308e-08, + "loss": 1.1699, + "step": 33407 + }, + { + "epoch": 0.9809149098596511, + "grad_norm": 0.0, + "learning_rate": 1.9097796672149503e-08, + "loss": 1.1719, + "step": 33408 + }, + { + "epoch": 0.9809442715367902, + "grad_norm": 0.0, + "learning_rate": 1.903909806190374e-08, + "loss": 1.1528, + "step": 33409 + }, + { + "epoch": 0.9809736332139292, + "grad_norm": 0.0, + "learning_rate": 1.8980489712042695e-08, + "loss": 1.3203, + "step": 33410 + }, + { + "epoch": 0.9810029948910681, + "grad_norm": 0.0, + "learning_rate": 1.8921971623098168e-08, + "loss": 1.1807, + "step": 33411 + }, + { + "epoch": 0.9810323565682072, + "grad_norm": 0.0, + "learning_rate": 1.8863543795597516e-08, + "loss": 1.1421, + "step": 33412 + }, + { + "epoch": 0.9810617182453462, + "grad_norm": 0.0, + "learning_rate": 1.8805206230071428e-08, + "loss": 1.1978, + "step": 33413 + }, + { + "epoch": 0.9810910799224851, + "grad_norm": 0.0, + "learning_rate": 1.8746958927045035e-08, + "loss": 1.2842, + "step": 33414 + }, + { + "epoch": 0.9811204415996242, + "grad_norm": 0.0, + "learning_rate": 1.8688801887046802e-08, + "loss": 1.1514, + "step": 33415 + }, + { + "epoch": 0.9811498032767632, + "grad_norm": 0.0, + "learning_rate": 1.863073511060187e-08, + "loss": 1.2949, + "step": 33416 + }, + { + "epoch": 0.9811791649539021, + "grad_norm": 0.0, + "learning_rate": 1.8572758598235375e-08, + "loss": 1.2251, + "step": 33417 + }, + { + "epoch": 0.9812085266310412, + "grad_norm": 0.0, + "learning_rate": 1.8514872350471336e-08, + "loss": 1.166, + "step": 33418 + }, + { + "epoch": 0.9812378883081802, + "grad_norm": 0.0, + "learning_rate": 1.845707636783378e-08, + "loss": 1.3086, + "step": 33419 + }, + { + "epoch": 0.9812672499853191, + "grad_norm": 0.0, + "learning_rate": 1.8399370650845628e-08, + "loss": 1.2119, + "step": 33420 + }, + { + "epoch": 0.9812966116624582, + "grad_norm": 0.0, + "learning_rate": 1.8341755200027566e-08, + "loss": 1.0752, + "step": 33421 + }, + { + "epoch": 0.9813259733395971, + "grad_norm": 0.0, + "learning_rate": 1.8284230015902515e-08, + "loss": 1.0576, + "step": 33422 + }, + { + "epoch": 0.9813553350167361, + "grad_norm": 0.0, + "learning_rate": 1.8226795098988948e-08, + "loss": 1.1841, + "step": 33423 + }, + { + "epoch": 0.9813846966938752, + "grad_norm": 0.0, + "learning_rate": 1.8169450449806447e-08, + "loss": 1.2271, + "step": 33424 + }, + { + "epoch": 0.9814140583710141, + "grad_norm": 0.0, + "learning_rate": 1.8112196068874598e-08, + "loss": 1.2246, + "step": 33425 + }, + { + "epoch": 0.9814434200481531, + "grad_norm": 0.0, + "learning_rate": 1.8055031956709657e-08, + "loss": 1.1875, + "step": 33426 + }, + { + "epoch": 0.9814727817252922, + "grad_norm": 0.0, + "learning_rate": 1.7997958113830094e-08, + "loss": 1.1875, + "step": 33427 + }, + { + "epoch": 0.9815021434024311, + "grad_norm": 0.0, + "learning_rate": 1.7940974540751055e-08, + "loss": 1.3281, + "step": 33428 + }, + { + "epoch": 0.9815315050795701, + "grad_norm": 0.0, + "learning_rate": 1.7884081237988792e-08, + "loss": 1.1143, + "step": 33429 + }, + { + "epoch": 0.9815608667567092, + "grad_norm": 0.0, + "learning_rate": 1.7827278206057342e-08, + "loss": 1.1787, + "step": 33430 + }, + { + "epoch": 0.9815902284338481, + "grad_norm": 0.0, + "learning_rate": 1.7770565445469623e-08, + "loss": 1.1577, + "step": 33431 + }, + { + "epoch": 0.9816195901109871, + "grad_norm": 0.0, + "learning_rate": 1.7713942956739672e-08, + "loss": 1.1538, + "step": 33432 + }, + { + "epoch": 0.9816489517881262, + "grad_norm": 0.0, + "learning_rate": 1.765741074037819e-08, + "loss": 1.082, + "step": 33433 + }, + { + "epoch": 0.9816783134652651, + "grad_norm": 0.0, + "learning_rate": 1.76009687968981e-08, + "loss": 1.1499, + "step": 33434 + }, + { + "epoch": 0.9817076751424041, + "grad_norm": 0.0, + "learning_rate": 1.7544617126808994e-08, + "loss": 1.3779, + "step": 33435 + }, + { + "epoch": 0.9817370368195432, + "grad_norm": 0.0, + "learning_rate": 1.7488355730620466e-08, + "loss": 1.0771, + "step": 33436 + }, + { + "epoch": 0.9817663984966821, + "grad_norm": 0.0, + "learning_rate": 1.7432184608840995e-08, + "loss": 1.1484, + "step": 33437 + }, + { + "epoch": 0.9817957601738211, + "grad_norm": 0.0, + "learning_rate": 1.737610376197907e-08, + "loss": 1.2002, + "step": 33438 + }, + { + "epoch": 0.9818251218509602, + "grad_norm": 0.0, + "learning_rate": 1.7320113190540943e-08, + "loss": 1.1699, + "step": 33439 + }, + { + "epoch": 0.9818544835280991, + "grad_norm": 0.0, + "learning_rate": 1.7264212895033993e-08, + "loss": 1.1953, + "step": 33440 + }, + { + "epoch": 0.9818838452052381, + "grad_norm": 0.0, + "learning_rate": 1.720840287596448e-08, + "loss": 1.2603, + "step": 33441 + }, + { + "epoch": 0.9819132068823772, + "grad_norm": 0.0, + "learning_rate": 1.7152683133834226e-08, + "loss": 1.1763, + "step": 33442 + }, + { + "epoch": 0.9819425685595161, + "grad_norm": 0.0, + "learning_rate": 1.7097053669149488e-08, + "loss": 1.0864, + "step": 33443 + }, + { + "epoch": 0.9819719302366551, + "grad_norm": 0.0, + "learning_rate": 1.70415144824132e-08, + "loss": 1.1353, + "step": 33444 + }, + { + "epoch": 0.9820012919137941, + "grad_norm": 0.0, + "learning_rate": 1.6986065574127185e-08, + "loss": 1.2271, + "step": 33445 + }, + { + "epoch": 0.9820306535909331, + "grad_norm": 0.0, + "learning_rate": 1.693070694479326e-08, + "loss": 1.3047, + "step": 33446 + }, + { + "epoch": 0.9820600152680721, + "grad_norm": 0.0, + "learning_rate": 1.687543859491103e-08, + "loss": 1.1626, + "step": 33447 + }, + { + "epoch": 0.9820893769452111, + "grad_norm": 0.0, + "learning_rate": 1.68202605249812e-08, + "loss": 1.2271, + "step": 33448 + }, + { + "epoch": 0.9821187386223501, + "grad_norm": 0.0, + "learning_rate": 1.6765172735502266e-08, + "loss": 1.1533, + "step": 33449 + }, + { + "epoch": 0.9821481002994891, + "grad_norm": 0.0, + "learning_rate": 1.6710175226972714e-08, + "loss": 1.2207, + "step": 33450 + }, + { + "epoch": 0.9821774619766281, + "grad_norm": 0.0, + "learning_rate": 1.6655267999888812e-08, + "loss": 1.3086, + "step": 33451 + }, + { + "epoch": 0.9822068236537671, + "grad_norm": 0.0, + "learning_rate": 1.6600451054749054e-08, + "loss": 1.1816, + "step": 33452 + }, + { + "epoch": 0.9822361853309061, + "grad_norm": 0.0, + "learning_rate": 1.65457243920486e-08, + "loss": 1.1558, + "step": 33453 + }, + { + "epoch": 0.9822655470080451, + "grad_norm": 0.0, + "learning_rate": 1.6491088012281498e-08, + "loss": 1.1777, + "step": 33454 + }, + { + "epoch": 0.9822949086851841, + "grad_norm": 0.0, + "learning_rate": 1.6436541915941793e-08, + "loss": 1.0186, + "step": 33455 + }, + { + "epoch": 0.9823242703623231, + "grad_norm": 0.0, + "learning_rate": 1.6382086103524653e-08, + "loss": 1.2085, + "step": 33456 + }, + { + "epoch": 0.9823536320394621, + "grad_norm": 0.0, + "learning_rate": 1.632772057551968e-08, + "loss": 1.1147, + "step": 33457 + }, + { + "epoch": 0.9823829937166011, + "grad_norm": 0.0, + "learning_rate": 1.627344533242092e-08, + "loss": 1.1646, + "step": 33458 + }, + { + "epoch": 0.98241235539374, + "grad_norm": 0.0, + "learning_rate": 1.6219260374717994e-08, + "loss": 1.2754, + "step": 33459 + }, + { + "epoch": 0.9824417170708791, + "grad_norm": 0.0, + "learning_rate": 1.6165165702900497e-08, + "loss": 1.1533, + "step": 33460 + }, + { + "epoch": 0.9824710787480181, + "grad_norm": 0.0, + "learning_rate": 1.6111161317459157e-08, + "loss": 1.1431, + "step": 33461 + }, + { + "epoch": 0.982500440425157, + "grad_norm": 0.0, + "learning_rate": 1.6057247218881356e-08, + "loss": 1.2314, + "step": 33462 + }, + { + "epoch": 0.9825298021022961, + "grad_norm": 0.0, + "learning_rate": 1.6003423407654485e-08, + "loss": 1.2075, + "step": 33463 + }, + { + "epoch": 0.9825591637794351, + "grad_norm": 0.0, + "learning_rate": 1.594968988426593e-08, + "loss": 1.2168, + "step": 33464 + }, + { + "epoch": 0.982588525456574, + "grad_norm": 0.0, + "learning_rate": 1.589604664920086e-08, + "loss": 1.0342, + "step": 33465 + }, + { + "epoch": 0.9826178871337131, + "grad_norm": 0.0, + "learning_rate": 1.5842493702945548e-08, + "loss": 1.2754, + "step": 33466 + }, + { + "epoch": 0.9826472488108521, + "grad_norm": 0.0, + "learning_rate": 1.578903104598295e-08, + "loss": 1.144, + "step": 33467 + }, + { + "epoch": 0.982676610487991, + "grad_norm": 0.0, + "learning_rate": 1.5735658678797116e-08, + "loss": 1.2939, + "step": 33468 + }, + { + "epoch": 0.9827059721651301, + "grad_norm": 0.0, + "learning_rate": 1.5682376601870995e-08, + "loss": 1.1055, + "step": 33469 + }, + { + "epoch": 0.9827353338422691, + "grad_norm": 0.0, + "learning_rate": 1.5629184815686428e-08, + "loss": 1.1602, + "step": 33470 + }, + { + "epoch": 0.982764695519408, + "grad_norm": 0.0, + "learning_rate": 1.5576083320724135e-08, + "loss": 1.3018, + "step": 33471 + }, + { + "epoch": 0.9827940571965471, + "grad_norm": 0.0, + "learning_rate": 1.5523072117463732e-08, + "loss": 1.2168, + "step": 33472 + }, + { + "epoch": 0.9828234188736861, + "grad_norm": 0.0, + "learning_rate": 1.5470151206385954e-08, + "loss": 1.1343, + "step": 33473 + }, + { + "epoch": 0.982852780550825, + "grad_norm": 0.0, + "learning_rate": 1.5417320587968186e-08, + "loss": 1.2852, + "step": 33474 + }, + { + "epoch": 0.9828821422279641, + "grad_norm": 0.0, + "learning_rate": 1.5364580262688946e-08, + "loss": 1.1602, + "step": 33475 + }, + { + "epoch": 0.9829115039051031, + "grad_norm": 0.0, + "learning_rate": 1.5311930231024507e-08, + "loss": 1.1226, + "step": 33476 + }, + { + "epoch": 0.982940865582242, + "grad_norm": 0.0, + "learning_rate": 1.5259370493451163e-08, + "loss": 1.1509, + "step": 33477 + }, + { + "epoch": 0.9829702272593811, + "grad_norm": 0.0, + "learning_rate": 1.52069010504452e-08, + "loss": 1.2607, + "step": 33478 + }, + { + "epoch": 0.9829995889365201, + "grad_norm": 0.0, + "learning_rate": 1.515452190248068e-08, + "loss": 1.0449, + "step": 33479 + }, + { + "epoch": 0.983028950613659, + "grad_norm": 0.0, + "learning_rate": 1.5102233050030557e-08, + "loss": 1.2754, + "step": 33480 + }, + { + "epoch": 0.9830583122907981, + "grad_norm": 0.0, + "learning_rate": 1.505003449356779e-08, + "loss": 1.1719, + "step": 33481 + }, + { + "epoch": 0.9830876739679371, + "grad_norm": 0.0, + "learning_rate": 1.499792623356533e-08, + "loss": 1.292, + "step": 33482 + }, + { + "epoch": 0.983117035645076, + "grad_norm": 0.0, + "learning_rate": 1.4945908270492802e-08, + "loss": 1.1333, + "step": 33483 + }, + { + "epoch": 0.9831463973222151, + "grad_norm": 0.0, + "learning_rate": 1.4893980604822056e-08, + "loss": 1.0483, + "step": 33484 + }, + { + "epoch": 0.983175758999354, + "grad_norm": 0.0, + "learning_rate": 1.484214323702271e-08, + "loss": 1.3447, + "step": 33485 + }, + { + "epoch": 0.983205120676493, + "grad_norm": 0.0, + "learning_rate": 1.4790396167563281e-08, + "loss": 1.3154, + "step": 33486 + }, + { + "epoch": 0.9832344823536321, + "grad_norm": 0.0, + "learning_rate": 1.4738739396911172e-08, + "loss": 1.2363, + "step": 33487 + }, + { + "epoch": 0.983263844030771, + "grad_norm": 0.0, + "learning_rate": 1.4687172925533788e-08, + "loss": 1.2588, + "step": 33488 + }, + { + "epoch": 0.98329320570791, + "grad_norm": 0.0, + "learning_rate": 1.4635696753897422e-08, + "loss": 1.1934, + "step": 33489 + }, + { + "epoch": 0.9833225673850491, + "grad_norm": 0.0, + "learning_rate": 1.4584310882468367e-08, + "loss": 1.104, + "step": 33490 + }, + { + "epoch": 0.983351929062188, + "grad_norm": 0.0, + "learning_rate": 1.4533015311709587e-08, + "loss": 1.1177, + "step": 33491 + }, + { + "epoch": 0.983381290739327, + "grad_norm": 0.0, + "learning_rate": 1.4481810042087374e-08, + "loss": 1.2744, + "step": 33492 + }, + { + "epoch": 0.9834106524164661, + "grad_norm": 0.0, + "learning_rate": 1.4430695074062472e-08, + "loss": 1.3384, + "step": 33493 + }, + { + "epoch": 0.983440014093605, + "grad_norm": 0.0, + "learning_rate": 1.4379670408098956e-08, + "loss": 1.147, + "step": 33494 + }, + { + "epoch": 0.983469375770744, + "grad_norm": 0.0, + "learning_rate": 1.4328736044656454e-08, + "loss": 1.0952, + "step": 33495 + }, + { + "epoch": 0.9834987374478831, + "grad_norm": 0.0, + "learning_rate": 1.4277891984197933e-08, + "loss": 1.2285, + "step": 33496 + }, + { + "epoch": 0.983528099125022, + "grad_norm": 0.0, + "learning_rate": 1.4227138227180804e-08, + "loss": 1.2739, + "step": 33497 + }, + { + "epoch": 0.983557460802161, + "grad_norm": 0.0, + "learning_rate": 1.4176474774065806e-08, + "loss": 1.1602, + "step": 33498 + }, + { + "epoch": 0.9835868224793001, + "grad_norm": 0.0, + "learning_rate": 1.4125901625309245e-08, + "loss": 1.2588, + "step": 33499 + }, + { + "epoch": 0.983616184156439, + "grad_norm": 0.0, + "learning_rate": 1.4075418781369642e-08, + "loss": 1.2754, + "step": 33500 + }, + { + "epoch": 0.983645545833578, + "grad_norm": 0.0, + "learning_rate": 1.402502624270441e-08, + "loss": 1.0967, + "step": 33501 + }, + { + "epoch": 0.9836749075107171, + "grad_norm": 0.0, + "learning_rate": 1.3974724009767626e-08, + "loss": 1.2471, + "step": 33502 + }, + { + "epoch": 0.983704269187856, + "grad_norm": 0.0, + "learning_rate": 1.3924512083015595e-08, + "loss": 1.2773, + "step": 33503 + }, + { + "epoch": 0.983733630864995, + "grad_norm": 0.0, + "learning_rate": 1.3874390462901288e-08, + "loss": 1.2021, + "step": 33504 + }, + { + "epoch": 0.983762992542134, + "grad_norm": 0.0, + "learning_rate": 1.3824359149877675e-08, + "loss": 1.25, + "step": 33505 + }, + { + "epoch": 0.983792354219273, + "grad_norm": 0.0, + "learning_rate": 1.3774418144398837e-08, + "loss": 1.208, + "step": 33506 + }, + { + "epoch": 0.983821715896412, + "grad_norm": 0.0, + "learning_rate": 1.3724567446915527e-08, + "loss": 1.209, + "step": 33507 + }, + { + "epoch": 0.983851077573551, + "grad_norm": 0.0, + "learning_rate": 1.3674807057877382e-08, + "loss": 1.1885, + "step": 33508 + }, + { + "epoch": 0.98388043925069, + "grad_norm": 0.0, + "learning_rate": 1.3625136977736264e-08, + "loss": 1.2417, + "step": 33509 + }, + { + "epoch": 0.983909800927829, + "grad_norm": 0.0, + "learning_rate": 1.3575557206940704e-08, + "loss": 1.3457, + "step": 33510 + }, + { + "epoch": 0.9839391626049679, + "grad_norm": 0.0, + "learning_rate": 1.352606774593812e-08, + "loss": 1.2114, + "step": 33511 + }, + { + "epoch": 0.983968524282107, + "grad_norm": 0.0, + "learning_rate": 1.3476668595178155e-08, + "loss": 1.1738, + "step": 33512 + }, + { + "epoch": 0.983997885959246, + "grad_norm": 0.0, + "learning_rate": 1.3427359755104897e-08, + "loss": 1.1362, + "step": 33513 + }, + { + "epoch": 0.9840272476363849, + "grad_norm": 0.0, + "learning_rate": 1.3378141226166874e-08, + "loss": 1.2378, + "step": 33514 + }, + { + "epoch": 0.984056609313524, + "grad_norm": 0.0, + "learning_rate": 1.3329013008807069e-08, + "loss": 1.2622, + "step": 33515 + }, + { + "epoch": 0.984085970990663, + "grad_norm": 0.0, + "learning_rate": 1.3279975103470677e-08, + "loss": 1.2314, + "step": 33516 + }, + { + "epoch": 0.9841153326678019, + "grad_norm": 0.0, + "learning_rate": 1.323102751060179e-08, + "loss": 1.188, + "step": 33517 + }, + { + "epoch": 0.984144694344941, + "grad_norm": 0.0, + "learning_rate": 1.3182170230642277e-08, + "loss": 1.2041, + "step": 33518 + }, + { + "epoch": 0.98417405602208, + "grad_norm": 0.0, + "learning_rate": 1.3133403264032895e-08, + "loss": 1.2637, + "step": 33519 + }, + { + "epoch": 0.9842034176992189, + "grad_norm": 0.0, + "learning_rate": 1.3084726611216625e-08, + "loss": 1.124, + "step": 33520 + }, + { + "epoch": 0.984232779376358, + "grad_norm": 0.0, + "learning_rate": 1.3036140272633113e-08, + "loss": 1.29, + "step": 33521 + }, + { + "epoch": 0.984262141053497, + "grad_norm": 0.0, + "learning_rate": 1.298764424872201e-08, + "loss": 1.2134, + "step": 33522 + }, + { + "epoch": 0.9842915027306359, + "grad_norm": 0.0, + "learning_rate": 1.293923853991963e-08, + "loss": 1.2324, + "step": 33523 + }, + { + "epoch": 0.984320864407775, + "grad_norm": 0.0, + "learning_rate": 1.2890923146666734e-08, + "loss": 1.1875, + "step": 33524 + }, + { + "epoch": 0.984350226084914, + "grad_norm": 0.0, + "learning_rate": 1.2842698069398529e-08, + "loss": 1.1802, + "step": 33525 + }, + { + "epoch": 0.9843795877620529, + "grad_norm": 0.0, + "learning_rate": 1.2794563308551333e-08, + "loss": 1.0107, + "step": 33526 + }, + { + "epoch": 0.984408949439192, + "grad_norm": 0.0, + "learning_rate": 1.2746518864561464e-08, + "loss": 1.1729, + "step": 33527 + }, + { + "epoch": 0.984438311116331, + "grad_norm": 0.0, + "learning_rate": 1.2698564737861907e-08, + "loss": 1.1738, + "step": 33528 + }, + { + "epoch": 0.9844676727934699, + "grad_norm": 0.0, + "learning_rate": 1.2650700928887871e-08, + "loss": 1.0859, + "step": 33529 + }, + { + "epoch": 0.984497034470609, + "grad_norm": 0.0, + "learning_rate": 1.2602927438070123e-08, + "loss": 1.1396, + "step": 33530 + }, + { + "epoch": 0.984526396147748, + "grad_norm": 0.0, + "learning_rate": 1.2555244265842759e-08, + "loss": 1.207, + "step": 33531 + }, + { + "epoch": 0.9845557578248869, + "grad_norm": 0.0, + "learning_rate": 1.2507651412635435e-08, + "loss": 1.2271, + "step": 33532 + }, + { + "epoch": 0.984585119502026, + "grad_norm": 0.0, + "learning_rate": 1.2460148878880029e-08, + "loss": 1.1934, + "step": 33533 + }, + { + "epoch": 0.984614481179165, + "grad_norm": 0.0, + "learning_rate": 1.2412736665005087e-08, + "loss": 1.3833, + "step": 33534 + }, + { + "epoch": 0.9846438428563039, + "grad_norm": 0.0, + "learning_rate": 1.2365414771439154e-08, + "loss": 1.2944, + "step": 33535 + }, + { + "epoch": 0.984673204533443, + "grad_norm": 0.0, + "learning_rate": 1.2318183198610779e-08, + "loss": 1.1865, + "step": 33536 + }, + { + "epoch": 0.9847025662105819, + "grad_norm": 0.0, + "learning_rate": 1.2271041946947393e-08, + "loss": 1.1904, + "step": 33537 + }, + { + "epoch": 0.9847319278877209, + "grad_norm": 0.0, + "learning_rate": 1.2223991016875324e-08, + "loss": 1.1816, + "step": 33538 + }, + { + "epoch": 0.98476128956486, + "grad_norm": 0.0, + "learning_rate": 1.217703040881868e-08, + "loss": 1.2627, + "step": 33539 + }, + { + "epoch": 0.9847906512419989, + "grad_norm": 0.0, + "learning_rate": 1.2130160123203783e-08, + "loss": 1.3232, + "step": 33540 + }, + { + "epoch": 0.9848200129191379, + "grad_norm": 0.0, + "learning_rate": 1.2083380160452518e-08, + "loss": 1.2759, + "step": 33541 + }, + { + "epoch": 0.984849374596277, + "grad_norm": 0.0, + "learning_rate": 1.2036690520990101e-08, + "loss": 1.0913, + "step": 33542 + }, + { + "epoch": 0.9848787362734159, + "grad_norm": 0.0, + "learning_rate": 1.1990091205238418e-08, + "loss": 1.2061, + "step": 33543 + }, + { + "epoch": 0.9849080979505549, + "grad_norm": 0.0, + "learning_rate": 1.1943582213617133e-08, + "loss": 1.1938, + "step": 33544 + }, + { + "epoch": 0.984937459627694, + "grad_norm": 0.0, + "learning_rate": 1.189716354654924e-08, + "loss": 1.228, + "step": 33545 + }, + { + "epoch": 0.9849668213048329, + "grad_norm": 0.0, + "learning_rate": 1.1850835204452182e-08, + "loss": 1.1338, + "step": 33546 + }, + { + "epoch": 0.9849961829819719, + "grad_norm": 0.0, + "learning_rate": 1.1804597187746736e-08, + "loss": 1.1934, + "step": 33547 + }, + { + "epoch": 0.985025544659111, + "grad_norm": 0.0, + "learning_rate": 1.1758449496850344e-08, + "loss": 1.2402, + "step": 33548 + }, + { + "epoch": 0.9850549063362499, + "grad_norm": 0.0, + "learning_rate": 1.171239213217934e-08, + "loss": 1.1128, + "step": 33549 + }, + { + "epoch": 0.9850842680133889, + "grad_norm": 0.0, + "learning_rate": 1.1666425094152278e-08, + "loss": 1.2373, + "step": 33550 + }, + { + "epoch": 0.985113629690528, + "grad_norm": 0.0, + "learning_rate": 1.1620548383183273e-08, + "loss": 1.2134, + "step": 33551 + }, + { + "epoch": 0.9851429913676669, + "grad_norm": 0.0, + "learning_rate": 1.1574761999687545e-08, + "loss": 1.2212, + "step": 33552 + }, + { + "epoch": 0.9851723530448059, + "grad_norm": 0.0, + "learning_rate": 1.152906594408032e-08, + "loss": 1.2422, + "step": 33553 + }, + { + "epoch": 0.985201714721945, + "grad_norm": 0.0, + "learning_rate": 1.148346021677238e-08, + "loss": 1.146, + "step": 33554 + }, + { + "epoch": 0.9852310763990839, + "grad_norm": 0.0, + "learning_rate": 1.1437944818178947e-08, + "loss": 1.2168, + "step": 33555 + }, + { + "epoch": 0.9852604380762229, + "grad_norm": 0.0, + "learning_rate": 1.1392519748709696e-08, + "loss": 1.0146, + "step": 33556 + }, + { + "epoch": 0.985289799753362, + "grad_norm": 0.0, + "learning_rate": 1.1347185008775407e-08, + "loss": 1.2656, + "step": 33557 + }, + { + "epoch": 0.9853191614305009, + "grad_norm": 0.0, + "learning_rate": 1.1301940598787975e-08, + "loss": 1.3018, + "step": 33558 + }, + { + "epoch": 0.9853485231076399, + "grad_norm": 0.0, + "learning_rate": 1.125678651915374e-08, + "loss": 1.1523, + "step": 33559 + }, + { + "epoch": 0.985377884784779, + "grad_norm": 0.0, + "learning_rate": 1.1211722770283484e-08, + "loss": 1.1895, + "step": 33560 + }, + { + "epoch": 0.9854072464619179, + "grad_norm": 0.0, + "learning_rate": 1.116674935258355e-08, + "loss": 1.1943, + "step": 33561 + }, + { + "epoch": 0.9854366081390569, + "grad_norm": 0.0, + "learning_rate": 1.112186626646139e-08, + "loss": 1.1074, + "step": 33562 + }, + { + "epoch": 0.9854659698161959, + "grad_norm": 0.0, + "learning_rate": 1.1077073512322234e-08, + "loss": 1.1875, + "step": 33563 + }, + { + "epoch": 0.9854953314933349, + "grad_norm": 0.0, + "learning_rate": 1.1032371090570204e-08, + "loss": 1.2988, + "step": 33564 + }, + { + "epoch": 0.9855246931704739, + "grad_norm": 0.0, + "learning_rate": 1.0987759001611642e-08, + "loss": 1.1401, + "step": 33565 + }, + { + "epoch": 0.9855540548476129, + "grad_norm": 0.0, + "learning_rate": 1.0943237245849558e-08, + "loss": 1.2852, + "step": 33566 + }, + { + "epoch": 0.9855834165247519, + "grad_norm": 0.0, + "learning_rate": 1.0898805823684743e-08, + "loss": 1.2324, + "step": 33567 + }, + { + "epoch": 0.9856127782018909, + "grad_norm": 0.0, + "learning_rate": 1.085446473552132e-08, + "loss": 1.2573, + "step": 33568 + }, + { + "epoch": 0.9856421398790299, + "grad_norm": 0.0, + "learning_rate": 1.0810213981757856e-08, + "loss": 1.1914, + "step": 33569 + }, + { + "epoch": 0.9856715015561689, + "grad_norm": 0.0, + "learning_rate": 1.0766053562797362e-08, + "loss": 1.3086, + "step": 33570 + }, + { + "epoch": 0.9857008632333079, + "grad_norm": 0.0, + "learning_rate": 1.07219834790373e-08, + "loss": 1.1123, + "step": 33571 + }, + { + "epoch": 0.9857302249104469, + "grad_norm": 0.0, + "learning_rate": 1.0678003730876241e-08, + "loss": 1.2344, + "step": 33572 + }, + { + "epoch": 0.9857595865875859, + "grad_norm": 0.0, + "learning_rate": 1.0634114318712752e-08, + "loss": 1.2231, + "step": 33573 + }, + { + "epoch": 0.9857889482647249, + "grad_norm": 0.0, + "learning_rate": 1.0590315242943183e-08, + "loss": 1.29, + "step": 33574 + }, + { + "epoch": 0.9858183099418639, + "grad_norm": 0.0, + "learning_rate": 1.0546606503962775e-08, + "loss": 1.2422, + "step": 33575 + }, + { + "epoch": 0.9858476716190029, + "grad_norm": 0.0, + "learning_rate": 1.0502988102168988e-08, + "loss": 1.1748, + "step": 33576 + }, + { + "epoch": 0.9858770332961418, + "grad_norm": 0.0, + "learning_rate": 1.0459460037954839e-08, + "loss": 1.2207, + "step": 33577 + }, + { + "epoch": 0.9859063949732809, + "grad_norm": 0.0, + "learning_rate": 1.0416022311713348e-08, + "loss": 1.2539, + "step": 33578 + }, + { + "epoch": 0.9859357566504199, + "grad_norm": 0.0, + "learning_rate": 1.0372674923838644e-08, + "loss": 1.0859, + "step": 33579 + }, + { + "epoch": 0.9859651183275588, + "grad_norm": 0.0, + "learning_rate": 1.0329417874722636e-08, + "loss": 1.23, + "step": 33580 + }, + { + "epoch": 0.9859944800046979, + "grad_norm": 0.0, + "learning_rate": 1.0286251164756123e-08, + "loss": 1.2441, + "step": 33581 + }, + { + "epoch": 0.9860238416818369, + "grad_norm": 0.0, + "learning_rate": 1.0243174794328792e-08, + "loss": 1.2212, + "step": 33582 + }, + { + "epoch": 0.9860532033589758, + "grad_norm": 0.0, + "learning_rate": 1.0200188763831443e-08, + "loss": 1.063, + "step": 33583 + }, + { + "epoch": 0.9860825650361149, + "grad_norm": 0.0, + "learning_rate": 1.0157293073651542e-08, + "loss": 1.2002, + "step": 33584 + }, + { + "epoch": 0.9861119267132539, + "grad_norm": 0.0, + "learning_rate": 1.0114487724177669e-08, + "loss": 1.2119, + "step": 33585 + }, + { + "epoch": 0.9861412883903928, + "grad_norm": 0.0, + "learning_rate": 1.007177271579729e-08, + "loss": 1.209, + "step": 33586 + }, + { + "epoch": 0.9861706500675319, + "grad_norm": 0.0, + "learning_rate": 1.0029148048896764e-08, + "loss": 1.25, + "step": 33587 + }, + { + "epoch": 0.9862000117446709, + "grad_norm": 0.0, + "learning_rate": 9.986613723860228e-09, + "loss": 1.2427, + "step": 33588 + }, + { + "epoch": 0.9862293734218098, + "grad_norm": 0.0, + "learning_rate": 9.94416974107404e-09, + "loss": 1.2168, + "step": 33589 + }, + { + "epoch": 0.9862587350989489, + "grad_norm": 0.0, + "learning_rate": 9.901816100921224e-09, + "loss": 1.1416, + "step": 33590 + }, + { + "epoch": 0.9862880967760879, + "grad_norm": 0.0, + "learning_rate": 9.859552803783701e-09, + "loss": 1.2705, + "step": 33591 + }, + { + "epoch": 0.9863174584532268, + "grad_norm": 0.0, + "learning_rate": 9.817379850045605e-09, + "loss": 1.2354, + "step": 33592 + }, + { + "epoch": 0.9863468201303659, + "grad_norm": 0.0, + "learning_rate": 9.775297240087744e-09, + "loss": 1.21, + "step": 33593 + }, + { + "epoch": 0.9863761818075049, + "grad_norm": 0.0, + "learning_rate": 9.733304974289815e-09, + "loss": 1.0928, + "step": 33594 + }, + { + "epoch": 0.9864055434846438, + "grad_norm": 0.0, + "learning_rate": 9.691403053032622e-09, + "loss": 1.2842, + "step": 33595 + }, + { + "epoch": 0.9864349051617829, + "grad_norm": 0.0, + "learning_rate": 9.649591476693643e-09, + "loss": 1.2188, + "step": 33596 + }, + { + "epoch": 0.9864642668389219, + "grad_norm": 0.0, + "learning_rate": 9.607870245652574e-09, + "loss": 1.0767, + "step": 33597 + }, + { + "epoch": 0.9864936285160608, + "grad_norm": 0.0, + "learning_rate": 9.566239360286888e-09, + "loss": 1.1177, + "step": 33598 + }, + { + "epoch": 0.9865229901931999, + "grad_norm": 0.0, + "learning_rate": 9.524698820971845e-09, + "loss": 1.3047, + "step": 33599 + }, + { + "epoch": 0.9865523518703389, + "grad_norm": 0.0, + "learning_rate": 9.483248628082697e-09, + "loss": 1.0796, + "step": 33600 + }, + { + "epoch": 0.9865817135474778, + "grad_norm": 0.0, + "learning_rate": 9.441888781995812e-09, + "loss": 1.1074, + "step": 33601 + }, + { + "epoch": 0.9866110752246169, + "grad_norm": 0.0, + "learning_rate": 9.400619283084223e-09, + "loss": 1.1519, + "step": 33602 + }, + { + "epoch": 0.9866404369017558, + "grad_norm": 0.0, + "learning_rate": 9.359440131722075e-09, + "loss": 1.0811, + "step": 33603 + }, + { + "epoch": 0.9866697985788948, + "grad_norm": 0.0, + "learning_rate": 9.318351328281294e-09, + "loss": 1.2891, + "step": 33604 + }, + { + "epoch": 0.9866991602560338, + "grad_norm": 0.0, + "learning_rate": 9.277352873132695e-09, + "loss": 1.168, + "step": 33605 + }, + { + "epoch": 0.9867285219331728, + "grad_norm": 0.0, + "learning_rate": 9.23644476664709e-09, + "loss": 1.3828, + "step": 33606 + }, + { + "epoch": 0.9867578836103118, + "grad_norm": 0.0, + "learning_rate": 9.195627009196406e-09, + "loss": 1.1426, + "step": 33607 + }, + { + "epoch": 0.9867872452874508, + "grad_norm": 0.0, + "learning_rate": 9.154899601147016e-09, + "loss": 1.2363, + "step": 33608 + }, + { + "epoch": 0.9868166069645898, + "grad_norm": 0.0, + "learning_rate": 9.114262542869734e-09, + "loss": 1.1411, + "step": 33609 + }, + { + "epoch": 0.9868459686417288, + "grad_norm": 0.0, + "learning_rate": 9.073715834729824e-09, + "loss": 1.2148, + "step": 33610 + }, + { + "epoch": 0.9868753303188678, + "grad_norm": 0.0, + "learning_rate": 9.033259477095879e-09, + "loss": 1.207, + "step": 33611 + }, + { + "epoch": 0.9869046919960068, + "grad_norm": 0.0, + "learning_rate": 8.992893470332053e-09, + "loss": 1.1743, + "step": 33612 + }, + { + "epoch": 0.9869340536731458, + "grad_norm": 0.0, + "learning_rate": 8.952617814804721e-09, + "loss": 1.2559, + "step": 33613 + }, + { + "epoch": 0.9869634153502848, + "grad_norm": 0.0, + "learning_rate": 8.912432510878033e-09, + "loss": 1.2217, + "step": 33614 + }, + { + "epoch": 0.9869927770274238, + "grad_norm": 0.0, + "learning_rate": 8.872337558915034e-09, + "loss": 1.2666, + "step": 33615 + }, + { + "epoch": 0.9870221387045628, + "grad_norm": 0.0, + "learning_rate": 8.832332959277656e-09, + "loss": 1.1787, + "step": 33616 + }, + { + "epoch": 0.9870515003817018, + "grad_norm": 0.0, + "learning_rate": 8.792418712327833e-09, + "loss": 1.1084, + "step": 33617 + }, + { + "epoch": 0.9870808620588408, + "grad_norm": 0.0, + "learning_rate": 8.752594818427495e-09, + "loss": 1.2158, + "step": 33618 + }, + { + "epoch": 0.9871102237359798, + "grad_norm": 0.0, + "learning_rate": 8.712861277936357e-09, + "loss": 1.1572, + "step": 33619 + }, + { + "epoch": 0.9871395854131187, + "grad_norm": 0.0, + "learning_rate": 8.67321809121302e-09, + "loss": 1.2217, + "step": 33620 + }, + { + "epoch": 0.9871689470902578, + "grad_norm": 0.0, + "learning_rate": 8.633665258617197e-09, + "loss": 1.2588, + "step": 33621 + }, + { + "epoch": 0.9871983087673968, + "grad_norm": 0.0, + "learning_rate": 8.594202780506378e-09, + "loss": 1.0938, + "step": 33622 + }, + { + "epoch": 0.9872276704445357, + "grad_norm": 0.0, + "learning_rate": 8.554830657235834e-09, + "loss": 1.1738, + "step": 33623 + }, + { + "epoch": 0.9872570321216748, + "grad_norm": 0.0, + "learning_rate": 8.51554888916417e-09, + "loss": 1.2817, + "step": 33624 + }, + { + "epoch": 0.9872863937988138, + "grad_norm": 0.0, + "learning_rate": 8.476357476644437e-09, + "loss": 1.1533, + "step": 33625 + }, + { + "epoch": 0.9873157554759527, + "grad_norm": 0.0, + "learning_rate": 8.437256420031903e-09, + "loss": 1.1196, + "step": 33626 + }, + { + "epoch": 0.9873451171530918, + "grad_norm": 0.0, + "learning_rate": 8.39824571968073e-09, + "loss": 1.1953, + "step": 33627 + }, + { + "epoch": 0.9873744788302308, + "grad_norm": 0.0, + "learning_rate": 8.359325375941751e-09, + "loss": 1.2432, + "step": 33628 + }, + { + "epoch": 0.9874038405073697, + "grad_norm": 0.0, + "learning_rate": 8.320495389169126e-09, + "loss": 1.2705, + "step": 33629 + }, + { + "epoch": 0.9874332021845088, + "grad_norm": 0.0, + "learning_rate": 8.281755759713683e-09, + "loss": 1.0449, + "step": 33630 + }, + { + "epoch": 0.9874625638616478, + "grad_norm": 0.0, + "learning_rate": 8.243106487925146e-09, + "loss": 1.0625, + "step": 33631 + }, + { + "epoch": 0.9874919255387867, + "grad_norm": 0.0, + "learning_rate": 8.204547574152122e-09, + "loss": 1.1738, + "step": 33632 + }, + { + "epoch": 0.9875212872159258, + "grad_norm": 0.0, + "learning_rate": 8.166079018745443e-09, + "loss": 1.2017, + "step": 33633 + }, + { + "epoch": 0.9875506488930648, + "grad_norm": 0.0, + "learning_rate": 8.127700822050389e-09, + "loss": 1.2451, + "step": 33634 + }, + { + "epoch": 0.9875800105702037, + "grad_norm": 0.0, + "learning_rate": 8.089412984416678e-09, + "loss": 1.2451, + "step": 33635 + }, + { + "epoch": 0.9876093722473428, + "grad_norm": 0.0, + "learning_rate": 8.051215506189592e-09, + "loss": 1.1812, + "step": 33636 + }, + { + "epoch": 0.9876387339244818, + "grad_norm": 0.0, + "learning_rate": 8.013108387713298e-09, + "loss": 1.1807, + "step": 33637 + }, + { + "epoch": 0.9876680956016207, + "grad_norm": 0.0, + "learning_rate": 7.975091629334187e-09, + "loss": 1.1377, + "step": 33638 + }, + { + "epoch": 0.9876974572787598, + "grad_norm": 0.0, + "learning_rate": 7.937165231395316e-09, + "loss": 1.2607, + "step": 33639 + }, + { + "epoch": 0.9877268189558988, + "grad_norm": 0.0, + "learning_rate": 7.899329194239747e-09, + "loss": 1.1602, + "step": 33640 + }, + { + "epoch": 0.9877561806330377, + "grad_norm": 0.0, + "learning_rate": 7.861583518208316e-09, + "loss": 1.3369, + "step": 33641 + }, + { + "epoch": 0.9877855423101768, + "grad_norm": 0.0, + "learning_rate": 7.823928203645193e-09, + "loss": 1.2119, + "step": 33642 + }, + { + "epoch": 0.9878149039873158, + "grad_norm": 0.0, + "learning_rate": 7.786363250888996e-09, + "loss": 1.1665, + "step": 33643 + }, + { + "epoch": 0.9878442656644547, + "grad_norm": 0.0, + "learning_rate": 7.748888660279452e-09, + "loss": 1.2441, + "step": 33644 + }, + { + "epoch": 0.9878736273415938, + "grad_norm": 0.0, + "learning_rate": 7.711504432155181e-09, + "loss": 1.207, + "step": 33645 + }, + { + "epoch": 0.9879029890187327, + "grad_norm": 0.0, + "learning_rate": 7.674210566855911e-09, + "loss": 1.2373, + "step": 33646 + }, + { + "epoch": 0.9879323506958717, + "grad_norm": 0.0, + "learning_rate": 7.637007064716928e-09, + "loss": 1.0405, + "step": 33647 + }, + { + "epoch": 0.9879617123730108, + "grad_norm": 0.0, + "learning_rate": 7.599893926076852e-09, + "loss": 1.0679, + "step": 33648 + }, + { + "epoch": 0.9879910740501497, + "grad_norm": 0.0, + "learning_rate": 7.562871151268747e-09, + "loss": 1.1499, + "step": 33649 + }, + { + "epoch": 0.9880204357272887, + "grad_norm": 0.0, + "learning_rate": 7.525938740629013e-09, + "loss": 1.2656, + "step": 33650 + }, + { + "epoch": 0.9880497974044278, + "grad_norm": 0.0, + "learning_rate": 7.489096694491827e-09, + "loss": 1.1079, + "step": 33651 + }, + { + "epoch": 0.9880791590815667, + "grad_norm": 0.0, + "learning_rate": 7.452345013189144e-09, + "loss": 1.3057, + "step": 33652 + }, + { + "epoch": 0.9881085207587057, + "grad_norm": 0.0, + "learning_rate": 7.415683697055143e-09, + "loss": 1.0171, + "step": 33653 + }, + { + "epoch": 0.9881378824358448, + "grad_norm": 0.0, + "learning_rate": 7.379112746418448e-09, + "loss": 1.1904, + "step": 33654 + }, + { + "epoch": 0.9881672441129837, + "grad_norm": 0.0, + "learning_rate": 7.342632161613239e-09, + "loss": 1.2798, + "step": 33655 + }, + { + "epoch": 0.9881966057901227, + "grad_norm": 0.0, + "learning_rate": 7.30624194296703e-09, + "loss": 1.3477, + "step": 33656 + }, + { + "epoch": 0.9882259674672618, + "grad_norm": 0.0, + "learning_rate": 7.2699420908095565e-09, + "loss": 1.1562, + "step": 33657 + }, + { + "epoch": 0.9882553291444007, + "grad_norm": 0.0, + "learning_rate": 7.233732605469446e-09, + "loss": 1.2061, + "step": 33658 + }, + { + "epoch": 0.9882846908215397, + "grad_norm": 0.0, + "learning_rate": 7.197613487274213e-09, + "loss": 1.2188, + "step": 33659 + }, + { + "epoch": 0.9883140524986788, + "grad_norm": 0.0, + "learning_rate": 7.161584736549154e-09, + "loss": 1.3555, + "step": 33660 + }, + { + "epoch": 0.9883434141758177, + "grad_norm": 0.0, + "learning_rate": 7.125646353621785e-09, + "loss": 1.292, + "step": 33661 + }, + { + "epoch": 0.9883727758529567, + "grad_norm": 0.0, + "learning_rate": 7.089798338816289e-09, + "loss": 1.3018, + "step": 33662 + }, + { + "epoch": 0.9884021375300958, + "grad_norm": 0.0, + "learning_rate": 7.0540406924568535e-09, + "loss": 1.1475, + "step": 33663 + }, + { + "epoch": 0.9884314992072347, + "grad_norm": 0.0, + "learning_rate": 7.018373414867663e-09, + "loss": 1.3633, + "step": 33664 + }, + { + "epoch": 0.9884608608843737, + "grad_norm": 0.0, + "learning_rate": 6.982796506369571e-09, + "loss": 1.1846, + "step": 33665 + }, + { + "epoch": 0.9884902225615128, + "grad_norm": 0.0, + "learning_rate": 6.947309967285653e-09, + "loss": 1.291, + "step": 33666 + }, + { + "epoch": 0.9885195842386517, + "grad_norm": 0.0, + "learning_rate": 6.911913797936764e-09, + "loss": 1.2705, + "step": 33667 + }, + { + "epoch": 0.9885489459157907, + "grad_norm": 0.0, + "learning_rate": 6.876607998642648e-09, + "loss": 1.3477, + "step": 33668 + }, + { + "epoch": 0.9885783075929298, + "grad_norm": 0.0, + "learning_rate": 6.841392569723049e-09, + "loss": 1.1279, + "step": 33669 + }, + { + "epoch": 0.9886076692700687, + "grad_norm": 0.0, + "learning_rate": 6.80626751149549e-09, + "loss": 1.2021, + "step": 33670 + }, + { + "epoch": 0.9886370309472077, + "grad_norm": 0.0, + "learning_rate": 6.771232824277496e-09, + "loss": 1.1562, + "step": 33671 + }, + { + "epoch": 0.9886663926243467, + "grad_norm": 0.0, + "learning_rate": 6.736288508387701e-09, + "loss": 1.1201, + "step": 33672 + }, + { + "epoch": 0.9886957543014857, + "grad_norm": 0.0, + "learning_rate": 6.701434564140296e-09, + "loss": 1.2153, + "step": 33673 + }, + { + "epoch": 0.9887251159786247, + "grad_norm": 0.0, + "learning_rate": 6.666670991850588e-09, + "loss": 1.375, + "step": 33674 + }, + { + "epoch": 0.9887544776557637, + "grad_norm": 0.0, + "learning_rate": 6.631997791834987e-09, + "loss": 1.3545, + "step": 33675 + }, + { + "epoch": 0.9887838393329027, + "grad_norm": 0.0, + "learning_rate": 6.597414964404358e-09, + "loss": 1.1499, + "step": 33676 + }, + { + "epoch": 0.9888132010100417, + "grad_norm": 0.0, + "learning_rate": 6.5629225098728935e-09, + "loss": 1.2373, + "step": 33677 + }, + { + "epoch": 0.9888425626871807, + "grad_norm": 0.0, + "learning_rate": 6.528520428552565e-09, + "loss": 1.2251, + "step": 33678 + }, + { + "epoch": 0.9888719243643197, + "grad_norm": 0.0, + "learning_rate": 6.494208720754236e-09, + "loss": 1.1641, + "step": 33679 + }, + { + "epoch": 0.9889012860414587, + "grad_norm": 0.0, + "learning_rate": 6.459987386787658e-09, + "loss": 1.1094, + "step": 33680 + }, + { + "epoch": 0.9889306477185977, + "grad_norm": 0.0, + "learning_rate": 6.425856426963695e-09, + "loss": 1.2373, + "step": 33681 + }, + { + "epoch": 0.9889600093957367, + "grad_norm": 0.0, + "learning_rate": 6.391815841589877e-09, + "loss": 1.2324, + "step": 33682 + }, + { + "epoch": 0.9889893710728757, + "grad_norm": 0.0, + "learning_rate": 6.357865630973737e-09, + "loss": 1.1309, + "step": 33683 + }, + { + "epoch": 0.9890187327500147, + "grad_norm": 0.0, + "learning_rate": 6.3240057954228055e-09, + "loss": 1.2227, + "step": 33684 + }, + { + "epoch": 0.9890480944271537, + "grad_norm": 0.0, + "learning_rate": 6.2902363352435045e-09, + "loss": 1.0479, + "step": 33685 + }, + { + "epoch": 0.9890774561042927, + "grad_norm": 0.0, + "learning_rate": 6.256557250741147e-09, + "loss": 1.1167, + "step": 33686 + }, + { + "epoch": 0.9891068177814317, + "grad_norm": 0.0, + "learning_rate": 6.222968542219932e-09, + "loss": 1.1948, + "step": 33687 + }, + { + "epoch": 0.9891361794585707, + "grad_norm": 0.0, + "learning_rate": 6.189470209984061e-09, + "loss": 1.1445, + "step": 33688 + }, + { + "epoch": 0.9891655411357096, + "grad_norm": 0.0, + "learning_rate": 6.156062254336626e-09, + "loss": 1.2295, + "step": 33689 + }, + { + "epoch": 0.9891949028128487, + "grad_norm": 0.0, + "learning_rate": 6.122744675578496e-09, + "loss": 1.2217, + "step": 33690 + }, + { + "epoch": 0.9892242644899877, + "grad_norm": 0.0, + "learning_rate": 6.089517474012762e-09, + "loss": 1.2114, + "step": 33691 + }, + { + "epoch": 0.9892536261671266, + "grad_norm": 0.0, + "learning_rate": 6.056380649938076e-09, + "loss": 1.2412, + "step": 33692 + }, + { + "epoch": 0.9892829878442657, + "grad_norm": 0.0, + "learning_rate": 6.023334203655306e-09, + "loss": 1.2642, + "step": 33693 + }, + { + "epoch": 0.9893123495214047, + "grad_norm": 0.0, + "learning_rate": 5.990378135464214e-09, + "loss": 1.165, + "step": 33694 + }, + { + "epoch": 0.9893417111985436, + "grad_norm": 0.0, + "learning_rate": 5.957512445660119e-09, + "loss": 1.2637, + "step": 33695 + }, + { + "epoch": 0.9893710728756827, + "grad_norm": 0.0, + "learning_rate": 5.924737134542779e-09, + "loss": 1.1445, + "step": 33696 + }, + { + "epoch": 0.9894004345528217, + "grad_norm": 0.0, + "learning_rate": 5.8920522024064066e-09, + "loss": 1.2559, + "step": 33697 + }, + { + "epoch": 0.9894297962299606, + "grad_norm": 0.0, + "learning_rate": 5.859457649548539e-09, + "loss": 1.293, + "step": 33698 + }, + { + "epoch": 0.9894591579070997, + "grad_norm": 0.0, + "learning_rate": 5.826953476263386e-09, + "loss": 1.2461, + "step": 33699 + }, + { + "epoch": 0.9894885195842387, + "grad_norm": 0.0, + "learning_rate": 5.794539682844047e-09, + "loss": 1.2861, + "step": 33700 + }, + { + "epoch": 0.9895178812613776, + "grad_norm": 0.0, + "learning_rate": 5.76221626958473e-09, + "loss": 1.1885, + "step": 33701 + }, + { + "epoch": 0.9895472429385167, + "grad_norm": 0.0, + "learning_rate": 5.7299832367763155e-09, + "loss": 1.1084, + "step": 33702 + }, + { + "epoch": 0.9895766046156557, + "grad_norm": 0.0, + "learning_rate": 5.6978405847119e-09, + "loss": 1.1675, + "step": 33703 + }, + { + "epoch": 0.9896059662927946, + "grad_norm": 0.0, + "learning_rate": 5.6657883136823635e-09, + "loss": 1.1406, + "step": 33704 + }, + { + "epoch": 0.9896353279699337, + "grad_norm": 0.0, + "learning_rate": 5.633826423975253e-09, + "loss": 1.1416, + "step": 33705 + }, + { + "epoch": 0.9896646896470727, + "grad_norm": 0.0, + "learning_rate": 5.601954915881447e-09, + "loss": 1.1064, + "step": 33706 + }, + { + "epoch": 0.9896940513242116, + "grad_norm": 0.0, + "learning_rate": 5.5701737896896035e-09, + "loss": 1.2148, + "step": 33707 + }, + { + "epoch": 0.9897234130013506, + "grad_norm": 0.0, + "learning_rate": 5.53848304568505e-09, + "loss": 1.1699, + "step": 33708 + }, + { + "epoch": 0.9897527746784897, + "grad_norm": 0.0, + "learning_rate": 5.506882684156445e-09, + "loss": 1.1226, + "step": 33709 + }, + { + "epoch": 0.9897821363556286, + "grad_norm": 0.0, + "learning_rate": 5.475372705388005e-09, + "loss": 1.291, + "step": 33710 + }, + { + "epoch": 0.9898114980327676, + "grad_norm": 0.0, + "learning_rate": 5.443953109666167e-09, + "loss": 1.1094, + "step": 33711 + }, + { + "epoch": 0.9898408597099067, + "grad_norm": 0.0, + "learning_rate": 5.412623897274039e-09, + "loss": 1.3916, + "step": 33712 + }, + { + "epoch": 0.9898702213870456, + "grad_norm": 0.0, + "learning_rate": 5.381385068494727e-09, + "loss": 1.2275, + "step": 33713 + }, + { + "epoch": 0.9898995830641846, + "grad_norm": 0.0, + "learning_rate": 5.350236623612448e-09, + "loss": 1.3604, + "step": 33714 + }, + { + "epoch": 0.9899289447413236, + "grad_norm": 0.0, + "learning_rate": 5.319178562906979e-09, + "loss": 1.2285, + "step": 33715 + }, + { + "epoch": 0.9899583064184626, + "grad_norm": 0.0, + "learning_rate": 5.2882108866592064e-09, + "loss": 1.2676, + "step": 33716 + }, + { + "epoch": 0.9899876680956016, + "grad_norm": 0.0, + "learning_rate": 5.257333595150016e-09, + "loss": 1.3857, + "step": 33717 + }, + { + "epoch": 0.9900170297727406, + "grad_norm": 0.0, + "learning_rate": 5.226546688659184e-09, + "loss": 1.2031, + "step": 33718 + }, + { + "epoch": 0.9900463914498796, + "grad_norm": 0.0, + "learning_rate": 5.195850167464267e-09, + "loss": 1.2334, + "step": 33719 + }, + { + "epoch": 0.9900757531270186, + "grad_norm": 0.0, + "learning_rate": 5.16524403184282e-09, + "loss": 1.3081, + "step": 33720 + }, + { + "epoch": 0.9901051148041576, + "grad_norm": 0.0, + "learning_rate": 5.134728282071288e-09, + "loss": 1.1816, + "step": 33721 + }, + { + "epoch": 0.9901344764812966, + "grad_norm": 0.0, + "learning_rate": 5.1043029184261185e-09, + "loss": 1.2021, + "step": 33722 + }, + { + "epoch": 0.9901638381584356, + "grad_norm": 0.0, + "learning_rate": 5.073967941183755e-09, + "loss": 1.1973, + "step": 33723 + }, + { + "epoch": 0.9901931998355746, + "grad_norm": 0.0, + "learning_rate": 5.043723350616203e-09, + "loss": 1.3008, + "step": 33724 + }, + { + "epoch": 0.9902225615127136, + "grad_norm": 0.0, + "learning_rate": 5.013569146997688e-09, + "loss": 1.1484, + "step": 33725 + }, + { + "epoch": 0.9902519231898526, + "grad_norm": 0.0, + "learning_rate": 4.983505330602434e-09, + "loss": 1.2104, + "step": 33726 + }, + { + "epoch": 0.9902812848669916, + "grad_norm": 0.0, + "learning_rate": 4.953531901700226e-09, + "loss": 1.0605, + "step": 33727 + }, + { + "epoch": 0.9903106465441306, + "grad_norm": 0.0, + "learning_rate": 4.923648860563068e-09, + "loss": 1.2676, + "step": 33728 + }, + { + "epoch": 0.9903400082212696, + "grad_norm": 0.0, + "learning_rate": 4.893856207460745e-09, + "loss": 1.0947, + "step": 33729 + }, + { + "epoch": 0.9903693698984086, + "grad_norm": 0.0, + "learning_rate": 4.864153942663041e-09, + "loss": 1.2305, + "step": 33730 + }, + { + "epoch": 0.9903987315755476, + "grad_norm": 0.0, + "learning_rate": 4.83454206643974e-09, + "loss": 1.1743, + "step": 33731 + }, + { + "epoch": 0.9904280932526865, + "grad_norm": 0.0, + "learning_rate": 4.805020579056185e-09, + "loss": 1.1807, + "step": 33732 + }, + { + "epoch": 0.9904574549298256, + "grad_norm": 0.0, + "learning_rate": 4.7755894807821615e-09, + "loss": 1.2158, + "step": 33733 + }, + { + "epoch": 0.9904868166069646, + "grad_norm": 0.0, + "learning_rate": 4.746248771880791e-09, + "loss": 1.2715, + "step": 33734 + }, + { + "epoch": 0.9905161782841035, + "grad_norm": 0.0, + "learning_rate": 4.716998452619637e-09, + "loss": 1.2505, + "step": 33735 + }, + { + "epoch": 0.9905455399612426, + "grad_norm": 0.0, + "learning_rate": 4.687838523262933e-09, + "loss": 1.2686, + "step": 33736 + }, + { + "epoch": 0.9905749016383816, + "grad_norm": 0.0, + "learning_rate": 4.6587689840726924e-09, + "loss": 1.1963, + "step": 33737 + }, + { + "epoch": 0.9906042633155205, + "grad_norm": 0.0, + "learning_rate": 4.629789835314258e-09, + "loss": 1.1646, + "step": 33738 + }, + { + "epoch": 0.9906336249926596, + "grad_norm": 0.0, + "learning_rate": 4.600901077247421e-09, + "loss": 1.2197, + "step": 33739 + }, + { + "epoch": 0.9906629866697986, + "grad_norm": 0.0, + "learning_rate": 4.572102710135307e-09, + "loss": 1.23, + "step": 33740 + }, + { + "epoch": 0.9906923483469375, + "grad_norm": 0.0, + "learning_rate": 4.543394734236595e-09, + "loss": 1.167, + "step": 33741 + }, + { + "epoch": 0.9907217100240766, + "grad_norm": 0.0, + "learning_rate": 4.514777149813298e-09, + "loss": 1.3418, + "step": 33742 + }, + { + "epoch": 0.9907510717012156, + "grad_norm": 0.0, + "learning_rate": 4.48624995712188e-09, + "loss": 1.1675, + "step": 33743 + }, + { + "epoch": 0.9907804333783545, + "grad_norm": 0.0, + "learning_rate": 4.45781315642102e-09, + "loss": 1.2144, + "step": 33744 + }, + { + "epoch": 0.9908097950554936, + "grad_norm": 0.0, + "learning_rate": 4.429466747968292e-09, + "loss": 1.2598, + "step": 33745 + }, + { + "epoch": 0.9908391567326326, + "grad_norm": 0.0, + "learning_rate": 4.401210732020156e-09, + "loss": 1.3438, + "step": 33746 + }, + { + "epoch": 0.9908685184097715, + "grad_norm": 0.0, + "learning_rate": 4.373045108830854e-09, + "loss": 1.1748, + "step": 33747 + }, + { + "epoch": 0.9908978800869106, + "grad_norm": 0.0, + "learning_rate": 4.3449698786568465e-09, + "loss": 1.0591, + "step": 33748 + }, + { + "epoch": 0.9909272417640496, + "grad_norm": 0.0, + "learning_rate": 4.316985041751265e-09, + "loss": 1.2261, + "step": 33749 + }, + { + "epoch": 0.9909566034411885, + "grad_norm": 0.0, + "learning_rate": 4.28909059836613e-09, + "loss": 1.1953, + "step": 33750 + }, + { + "epoch": 0.9909859651183276, + "grad_norm": 0.0, + "learning_rate": 4.261286548755683e-09, + "loss": 1.1641, + "step": 33751 + }, + { + "epoch": 0.9910153267954666, + "grad_norm": 0.0, + "learning_rate": 4.2335728931708345e-09, + "loss": 1.1592, + "step": 33752 + }, + { + "epoch": 0.9910446884726055, + "grad_norm": 0.0, + "learning_rate": 4.2059496318602735e-09, + "loss": 1.1387, + "step": 33753 + }, + { + "epoch": 0.9910740501497446, + "grad_norm": 0.0, + "learning_rate": 4.178416765076021e-09, + "loss": 1.2515, + "step": 33754 + }, + { + "epoch": 0.9911034118268836, + "grad_norm": 0.0, + "learning_rate": 4.150974293066768e-09, + "loss": 1.1455, + "step": 33755 + }, + { + "epoch": 0.9911327735040225, + "grad_norm": 0.0, + "learning_rate": 4.123622216078982e-09, + "loss": 1.2783, + "step": 33756 + }, + { + "epoch": 0.9911621351811616, + "grad_norm": 0.0, + "learning_rate": 4.096360534362465e-09, + "loss": 1.1182, + "step": 33757 + }, + { + "epoch": 0.9911914968583005, + "grad_norm": 0.0, + "learning_rate": 4.069189248162575e-09, + "loss": 1.2686, + "step": 33758 + }, + { + "epoch": 0.9912208585354395, + "grad_norm": 0.0, + "learning_rate": 4.042108357723562e-09, + "loss": 1.1719, + "step": 33759 + }, + { + "epoch": 0.9912502202125786, + "grad_norm": 0.0, + "learning_rate": 4.015117863293005e-09, + "loss": 1.2275, + "step": 33760 + }, + { + "epoch": 0.9912795818897175, + "grad_norm": 0.0, + "learning_rate": 3.988217765114044e-09, + "loss": 1.1831, + "step": 33761 + }, + { + "epoch": 0.9913089435668565, + "grad_norm": 0.0, + "learning_rate": 3.9614080634287066e-09, + "loss": 1.0322, + "step": 33762 + }, + { + "epoch": 0.9913383052439956, + "grad_norm": 0.0, + "learning_rate": 3.934688758481242e-09, + "loss": 1.1836, + "step": 33763 + }, + { + "epoch": 0.9913676669211345, + "grad_norm": 0.0, + "learning_rate": 3.908059850511458e-09, + "loss": 1.1689, + "step": 33764 + }, + { + "epoch": 0.9913970285982735, + "grad_norm": 0.0, + "learning_rate": 3.881521339761385e-09, + "loss": 1.2656, + "step": 33765 + }, + { + "epoch": 0.9914263902754126, + "grad_norm": 0.0, + "learning_rate": 3.855073226470829e-09, + "loss": 1.1787, + "step": 33766 + }, + { + "epoch": 0.9914557519525515, + "grad_norm": 0.0, + "learning_rate": 3.828715510879599e-09, + "loss": 1.082, + "step": 33767 + }, + { + "epoch": 0.9914851136296905, + "grad_norm": 0.0, + "learning_rate": 3.802448193224173e-09, + "loss": 1.1836, + "step": 33768 + }, + { + "epoch": 0.9915144753068296, + "grad_norm": 0.0, + "learning_rate": 3.776271273744358e-09, + "loss": 1.2192, + "step": 33769 + }, + { + "epoch": 0.9915438369839685, + "grad_norm": 0.0, + "learning_rate": 3.750184752675523e-09, + "loss": 1.1455, + "step": 33770 + }, + { + "epoch": 0.9915731986611075, + "grad_norm": 0.0, + "learning_rate": 3.7241886302530338e-09, + "loss": 1.2456, + "step": 33771 + }, + { + "epoch": 0.9916025603382466, + "grad_norm": 0.0, + "learning_rate": 3.6982829067133683e-09, + "loss": 1.3252, + "step": 33772 + }, + { + "epoch": 0.9916319220153855, + "grad_norm": 0.0, + "learning_rate": 3.672467582290784e-09, + "loss": 1.2031, + "step": 33773 + }, + { + "epoch": 0.9916612836925245, + "grad_norm": 0.0, + "learning_rate": 3.646742657217317e-09, + "loss": 1.1494, + "step": 33774 + }, + { + "epoch": 0.9916906453696636, + "grad_norm": 0.0, + "learning_rate": 3.6211081317272245e-09, + "loss": 1.1626, + "step": 33775 + }, + { + "epoch": 0.9917200070468025, + "grad_norm": 0.0, + "learning_rate": 3.595564006050323e-09, + "loss": 1.1797, + "step": 33776 + }, + { + "epoch": 0.9917493687239415, + "grad_norm": 0.0, + "learning_rate": 3.5701102804197584e-09, + "loss": 1.0708, + "step": 33777 + }, + { + "epoch": 0.9917787304010806, + "grad_norm": 0.0, + "learning_rate": 3.544746955065348e-09, + "loss": 1.1592, + "step": 33778 + }, + { + "epoch": 0.9918080920782195, + "grad_norm": 0.0, + "learning_rate": 3.519474030214687e-09, + "loss": 1.0972, + "step": 33779 + }, + { + "epoch": 0.9918374537553585, + "grad_norm": 0.0, + "learning_rate": 3.4942915060987013e-09, + "loss": 1.2227, + "step": 33780 + }, + { + "epoch": 0.9918668154324976, + "grad_norm": 0.0, + "learning_rate": 3.469199382942767e-09, + "loss": 1.2007, + "step": 33781 + }, + { + "epoch": 0.9918961771096365, + "grad_norm": 0.0, + "learning_rate": 3.44419766097559e-09, + "loss": 1.147, + "step": 33782 + }, + { + "epoch": 0.9919255387867755, + "grad_norm": 0.0, + "learning_rate": 3.4192863404236554e-09, + "loss": 1.2402, + "step": 33783 + }, + { + "epoch": 0.9919549004639145, + "grad_norm": 0.0, + "learning_rate": 3.394465421510118e-09, + "loss": 1.1226, + "step": 33784 + }, + { + "epoch": 0.9919842621410535, + "grad_norm": 0.0, + "learning_rate": 3.3697349044603535e-09, + "loss": 1.2383, + "step": 33785 + }, + { + "epoch": 0.9920136238181925, + "grad_norm": 0.0, + "learning_rate": 3.3450947894997366e-09, + "loss": 1.1172, + "step": 33786 + }, + { + "epoch": 0.9920429854953315, + "grad_norm": 0.0, + "learning_rate": 3.3205450768480916e-09, + "loss": 1.2324, + "step": 33787 + }, + { + "epoch": 0.9920723471724705, + "grad_norm": 0.0, + "learning_rate": 3.2960857667296843e-09, + "loss": 1.3887, + "step": 33788 + }, + { + "epoch": 0.9921017088496095, + "grad_norm": 0.0, + "learning_rate": 3.2717168593643378e-09, + "loss": 1.2563, + "step": 33789 + }, + { + "epoch": 0.9921310705267485, + "grad_norm": 0.0, + "learning_rate": 3.247438354974097e-09, + "loss": 1.2017, + "step": 33790 + }, + { + "epoch": 0.9921604322038875, + "grad_norm": 0.0, + "learning_rate": 3.2232502537765664e-09, + "loss": 1.2036, + "step": 33791 + }, + { + "epoch": 0.9921897938810265, + "grad_norm": 0.0, + "learning_rate": 3.199152555991569e-09, + "loss": 1.2686, + "step": 33792 + }, + { + "epoch": 0.9922191555581655, + "grad_norm": 0.0, + "learning_rate": 3.1751452618378197e-09, + "loss": 1.2158, + "step": 33793 + }, + { + "epoch": 0.9922485172353045, + "grad_norm": 0.0, + "learning_rate": 3.151228371529591e-09, + "loss": 1.1875, + "step": 33794 + }, + { + "epoch": 0.9922778789124435, + "grad_norm": 0.0, + "learning_rate": 3.1274018852867072e-09, + "loss": 1.2617, + "step": 33795 + }, + { + "epoch": 0.9923072405895825, + "grad_norm": 0.0, + "learning_rate": 3.103665803322331e-09, + "loss": 1.1733, + "step": 33796 + }, + { + "epoch": 0.9923366022667215, + "grad_norm": 0.0, + "learning_rate": 3.080020125851846e-09, + "loss": 1.061, + "step": 33797 + }, + { + "epoch": 0.9923659639438605, + "grad_norm": 0.0, + "learning_rate": 3.0564648530884144e-09, + "loss": 1.2197, + "step": 33798 + }, + { + "epoch": 0.9923953256209995, + "grad_norm": 0.0, + "learning_rate": 3.03299998524631e-09, + "loss": 1.0356, + "step": 33799 + }, + { + "epoch": 0.9924246872981385, + "grad_norm": 0.0, + "learning_rate": 3.0096255225364745e-09, + "loss": 1.1606, + "step": 33800 + }, + { + "epoch": 0.9924540489752774, + "grad_norm": 0.0, + "learning_rate": 2.9863414651720714e-09, + "loss": 1.1807, + "step": 33801 + }, + { + "epoch": 0.9924834106524165, + "grad_norm": 0.0, + "learning_rate": 2.963147813361822e-09, + "loss": 1.0771, + "step": 33802 + }, + { + "epoch": 0.9925127723295555, + "grad_norm": 0.0, + "learning_rate": 2.9400445673155586e-09, + "loss": 1.1006, + "step": 33803 + }, + { + "epoch": 0.9925421340066944, + "grad_norm": 0.0, + "learning_rate": 2.9170317272431136e-09, + "loss": 1.27, + "step": 33804 + }, + { + "epoch": 0.9925714956838335, + "grad_norm": 0.0, + "learning_rate": 2.8941092933520985e-09, + "loss": 1.3877, + "step": 33805 + }, + { + "epoch": 0.9926008573609725, + "grad_norm": 0.0, + "learning_rate": 2.8712772658501255e-09, + "loss": 1.0718, + "step": 33806 + }, + { + "epoch": 0.9926302190381114, + "grad_norm": 0.0, + "learning_rate": 2.848535644943695e-09, + "loss": 1.0601, + "step": 33807 + }, + { + "epoch": 0.9926595807152504, + "grad_norm": 0.0, + "learning_rate": 2.8258844308381993e-09, + "loss": 1.1689, + "step": 33808 + }, + { + "epoch": 0.9926889423923895, + "grad_norm": 0.0, + "learning_rate": 2.803323623737919e-09, + "loss": 1.3262, + "step": 33809 + }, + { + "epoch": 0.9927183040695284, + "grad_norm": 0.0, + "learning_rate": 2.7808532238482455e-09, + "loss": 1.104, + "step": 33810 + }, + { + "epoch": 0.9927476657466674, + "grad_norm": 0.0, + "learning_rate": 2.7584732313712392e-09, + "loss": 1.2012, + "step": 33811 + }, + { + "epoch": 0.9927770274238065, + "grad_norm": 0.0, + "learning_rate": 2.736183646510071e-09, + "loss": 1.2178, + "step": 33812 + }, + { + "epoch": 0.9928063891009454, + "grad_norm": 0.0, + "learning_rate": 2.7139844694656914e-09, + "loss": 1.2705, + "step": 33813 + }, + { + "epoch": 0.9928357507780844, + "grad_norm": 0.0, + "learning_rate": 2.6918757004390504e-09, + "loss": 1.1255, + "step": 33814 + }, + { + "epoch": 0.9928651124552235, + "grad_norm": 0.0, + "learning_rate": 2.6698573396299887e-09, + "loss": 1.1836, + "step": 33815 + }, + { + "epoch": 0.9928944741323624, + "grad_norm": 0.0, + "learning_rate": 2.6479293872372357e-09, + "loss": 1.2207, + "step": 33816 + }, + { + "epoch": 0.9929238358095014, + "grad_norm": 0.0, + "learning_rate": 2.626091843460632e-09, + "loss": 1.2207, + "step": 33817 + }, + { + "epoch": 0.9929531974866405, + "grad_norm": 0.0, + "learning_rate": 2.6043447084966867e-09, + "loss": 1.1411, + "step": 33818 + }, + { + "epoch": 0.9929825591637794, + "grad_norm": 0.0, + "learning_rate": 2.5826879825419095e-09, + "loss": 1.1587, + "step": 33819 + }, + { + "epoch": 0.9930119208409184, + "grad_norm": 0.0, + "learning_rate": 2.5611216657916993e-09, + "loss": 1.251, + "step": 33820 + }, + { + "epoch": 0.9930412825180575, + "grad_norm": 0.0, + "learning_rate": 2.539645758441456e-09, + "loss": 1.1689, + "step": 33821 + }, + { + "epoch": 0.9930706441951964, + "grad_norm": 0.0, + "learning_rate": 2.518260260686578e-09, + "loss": 1.2656, + "step": 33822 + }, + { + "epoch": 0.9931000058723354, + "grad_norm": 0.0, + "learning_rate": 2.4969651727191347e-09, + "loss": 1.2446, + "step": 33823 + }, + { + "epoch": 0.9931293675494745, + "grad_norm": 0.0, + "learning_rate": 2.475760494731194e-09, + "loss": 1.1787, + "step": 33824 + }, + { + "epoch": 0.9931587292266134, + "grad_norm": 0.0, + "learning_rate": 2.4546462269159356e-09, + "loss": 1.0234, + "step": 33825 + }, + { + "epoch": 0.9931880909037524, + "grad_norm": 0.0, + "learning_rate": 2.4336223694643167e-09, + "loss": 1.2231, + "step": 33826 + }, + { + "epoch": 0.9932174525808914, + "grad_norm": 0.0, + "learning_rate": 2.4126889225650763e-09, + "loss": 1.0713, + "step": 33827 + }, + { + "epoch": 0.9932468142580304, + "grad_norm": 0.0, + "learning_rate": 2.391845886409172e-09, + "loss": 1.231, + "step": 33828 + }, + { + "epoch": 0.9932761759351694, + "grad_norm": 0.0, + "learning_rate": 2.3710932611831218e-09, + "loss": 1.1489, + "step": 33829 + }, + { + "epoch": 0.9933055376123084, + "grad_norm": 0.0, + "learning_rate": 2.350431047076773e-09, + "loss": 1.1074, + "step": 33830 + }, + { + "epoch": 0.9933348992894474, + "grad_norm": 0.0, + "learning_rate": 2.329859244275534e-09, + "loss": 1.1875, + "step": 33831 + }, + { + "epoch": 0.9933642609665864, + "grad_norm": 0.0, + "learning_rate": 2.3093778529648112e-09, + "loss": 1.1143, + "step": 33832 + }, + { + "epoch": 0.9933936226437254, + "grad_norm": 0.0, + "learning_rate": 2.288986873332233e-09, + "loss": 1.2725, + "step": 33833 + }, + { + "epoch": 0.9934229843208644, + "grad_norm": 0.0, + "learning_rate": 2.268686305559875e-09, + "loss": 1.1626, + "step": 33834 + }, + { + "epoch": 0.9934523459980034, + "grad_norm": 0.0, + "learning_rate": 2.2484761498331454e-09, + "loss": 1.2085, + "step": 33835 + }, + { + "epoch": 0.9934817076751424, + "grad_norm": 0.0, + "learning_rate": 2.2283564063330098e-09, + "loss": 1.0542, + "step": 33836 + }, + { + "epoch": 0.9935110693522814, + "grad_norm": 0.0, + "learning_rate": 2.2083270752426554e-09, + "loss": 1.0615, + "step": 33837 + }, + { + "epoch": 0.9935404310294204, + "grad_norm": 0.0, + "learning_rate": 2.188388156743049e-09, + "loss": 1.207, + "step": 33838 + }, + { + "epoch": 0.9935697927065594, + "grad_norm": 0.0, + "learning_rate": 2.1685396510140456e-09, + "loss": 1.2246, + "step": 33839 + }, + { + "epoch": 0.9935991543836984, + "grad_norm": 0.0, + "learning_rate": 2.148781558235502e-09, + "loss": 1.1855, + "step": 33840 + }, + { + "epoch": 0.9936285160608374, + "grad_norm": 0.0, + "learning_rate": 2.129113878586164e-09, + "loss": 1.2686, + "step": 33841 + }, + { + "epoch": 0.9936578777379764, + "grad_norm": 0.0, + "learning_rate": 2.109536612243668e-09, + "loss": 1.1011, + "step": 33842 + }, + { + "epoch": 0.9936872394151154, + "grad_norm": 0.0, + "learning_rate": 2.0900497593856484e-09, + "loss": 1.21, + "step": 33843 + }, + { + "epoch": 0.9937166010922543, + "grad_norm": 0.0, + "learning_rate": 2.070653320187521e-09, + "loss": 1.1777, + "step": 33844 + }, + { + "epoch": 0.9937459627693934, + "grad_norm": 0.0, + "learning_rate": 2.0513472948247014e-09, + "loss": 1.2705, + "step": 33845 + }, + { + "epoch": 0.9937753244465324, + "grad_norm": 0.0, + "learning_rate": 2.0321316834714944e-09, + "loss": 1.1597, + "step": 33846 + }, + { + "epoch": 0.9938046861236713, + "grad_norm": 0.0, + "learning_rate": 2.013006486303315e-09, + "loss": 1.1699, + "step": 33847 + }, + { + "epoch": 0.9938340478008104, + "grad_norm": 0.0, + "learning_rate": 1.9939717034911377e-09, + "loss": 1.1123, + "step": 33848 + }, + { + "epoch": 0.9938634094779494, + "grad_norm": 0.0, + "learning_rate": 1.9750273352081573e-09, + "loss": 1.1079, + "step": 33849 + }, + { + "epoch": 0.9938927711550883, + "grad_norm": 0.0, + "learning_rate": 1.9561733816264584e-09, + "loss": 1.1343, + "step": 33850 + }, + { + "epoch": 0.9939221328322274, + "grad_norm": 0.0, + "learning_rate": 1.9374098429147946e-09, + "loss": 1.105, + "step": 33851 + }, + { + "epoch": 0.9939514945093664, + "grad_norm": 0.0, + "learning_rate": 1.9187367192441408e-09, + "loss": 1.209, + "step": 33852 + }, + { + "epoch": 0.9939808561865053, + "grad_norm": 0.0, + "learning_rate": 1.9001540107821404e-09, + "loss": 1.2705, + "step": 33853 + }, + { + "epoch": 0.9940102178636444, + "grad_norm": 0.0, + "learning_rate": 1.8816617176986575e-09, + "loss": 1.1758, + "step": 33854 + }, + { + "epoch": 0.9940395795407834, + "grad_norm": 0.0, + "learning_rate": 1.863259840159115e-09, + "loss": 1.3027, + "step": 33855 + }, + { + "epoch": 0.9940689412179223, + "grad_norm": 0.0, + "learning_rate": 1.8449483783311573e-09, + "loss": 1.0732, + "step": 33856 + }, + { + "epoch": 0.9940983028950614, + "grad_norm": 0.0, + "learning_rate": 1.8267273323790968e-09, + "loss": 1.377, + "step": 33857 + }, + { + "epoch": 0.9941276645722004, + "grad_norm": 0.0, + "learning_rate": 1.8085967024694672e-09, + "loss": 1.2129, + "step": 33858 + }, + { + "epoch": 0.9941570262493393, + "grad_norm": 0.0, + "learning_rate": 1.7905564887643613e-09, + "loss": 1.2393, + "step": 33859 + }, + { + "epoch": 0.9941863879264784, + "grad_norm": 0.0, + "learning_rate": 1.7726066914280914e-09, + "loss": 1.124, + "step": 33860 + }, + { + "epoch": 0.9942157496036174, + "grad_norm": 0.0, + "learning_rate": 1.7547473106227508e-09, + "loss": 1.2275, + "step": 33861 + }, + { + "epoch": 0.9942451112807563, + "grad_norm": 0.0, + "learning_rate": 1.7369783465104317e-09, + "loss": 1.1655, + "step": 33862 + }, + { + "epoch": 0.9942744729578954, + "grad_norm": 0.0, + "learning_rate": 1.7192997992510063e-09, + "loss": 1.3311, + "step": 33863 + }, + { + "epoch": 0.9943038346350344, + "grad_norm": 0.0, + "learning_rate": 1.7017116690043468e-09, + "loss": 1.1846, + "step": 33864 + }, + { + "epoch": 0.9943331963121733, + "grad_norm": 0.0, + "learning_rate": 1.6842139559292148e-09, + "loss": 1.1157, + "step": 33865 + }, + { + "epoch": 0.9943625579893124, + "grad_norm": 0.0, + "learning_rate": 1.6668066601843725e-09, + "loss": 1.2314, + "step": 33866 + }, + { + "epoch": 0.9943919196664514, + "grad_norm": 0.0, + "learning_rate": 1.6494897819285816e-09, + "loss": 1.2539, + "step": 33867 + }, + { + "epoch": 0.9944212813435903, + "grad_norm": 0.0, + "learning_rate": 1.632263321316163e-09, + "loss": 1.1855, + "step": 33868 + }, + { + "epoch": 0.9944506430207294, + "grad_norm": 0.0, + "learning_rate": 1.6151272785036587e-09, + "loss": 1.1924, + "step": 33869 + }, + { + "epoch": 0.9944800046978683, + "grad_norm": 0.0, + "learning_rate": 1.5980816536453891e-09, + "loss": 1.2012, + "step": 33870 + }, + { + "epoch": 0.9945093663750073, + "grad_norm": 0.0, + "learning_rate": 1.581126446897896e-09, + "loss": 1.2383, + "step": 33871 + }, + { + "epoch": 0.9945387280521464, + "grad_norm": 0.0, + "learning_rate": 1.5642616584121694e-09, + "loss": 1.3359, + "step": 33872 + }, + { + "epoch": 0.9945680897292853, + "grad_norm": 0.0, + "learning_rate": 1.5474872883414205e-09, + "loss": 1.1152, + "step": 33873 + }, + { + "epoch": 0.9945974514064243, + "grad_norm": 0.0, + "learning_rate": 1.5308033368377494e-09, + "loss": 1.1582, + "step": 33874 + }, + { + "epoch": 0.9946268130835634, + "grad_norm": 0.0, + "learning_rate": 1.5142098040510366e-09, + "loss": 1.3188, + "step": 33875 + }, + { + "epoch": 0.9946561747607023, + "grad_norm": 0.0, + "learning_rate": 1.4977066901322723e-09, + "loss": 1.2559, + "step": 33876 + }, + { + "epoch": 0.9946855364378413, + "grad_norm": 0.0, + "learning_rate": 1.4812939952302264e-09, + "loss": 1.1865, + "step": 33877 + }, + { + "epoch": 0.9947148981149804, + "grad_norm": 0.0, + "learning_rate": 1.4649717194936687e-09, + "loss": 1.1387, + "step": 33878 + }, + { + "epoch": 0.9947442597921193, + "grad_norm": 0.0, + "learning_rate": 1.4487398630702588e-09, + "loss": 1.1621, + "step": 33879 + }, + { + "epoch": 0.9947736214692583, + "grad_norm": 0.0, + "learning_rate": 1.4325984261065463e-09, + "loss": 1.0854, + "step": 33880 + }, + { + "epoch": 0.9948029831463974, + "grad_norm": 0.0, + "learning_rate": 1.4165474087490805e-09, + "loss": 1.2236, + "step": 33881 + }, + { + "epoch": 0.9948323448235363, + "grad_norm": 0.0, + "learning_rate": 1.4005868111410802e-09, + "loss": 1.1865, + "step": 33882 + }, + { + "epoch": 0.9948617065006753, + "grad_norm": 0.0, + "learning_rate": 1.384716633429095e-09, + "loss": 1.2075, + "step": 33883 + }, + { + "epoch": 0.9948910681778144, + "grad_norm": 0.0, + "learning_rate": 1.3689368757552335e-09, + "loss": 1.0288, + "step": 33884 + }, + { + "epoch": 0.9949204298549533, + "grad_norm": 0.0, + "learning_rate": 1.3532475382638245e-09, + "loss": 1.2656, + "step": 33885 + }, + { + "epoch": 0.9949497915320923, + "grad_norm": 0.0, + "learning_rate": 1.3376486210947558e-09, + "loss": 1.1294, + "step": 33886 + }, + { + "epoch": 0.9949791532092314, + "grad_norm": 0.0, + "learning_rate": 1.3221401243901367e-09, + "loss": 1.1372, + "step": 33887 + }, + { + "epoch": 0.9950085148863703, + "grad_norm": 0.0, + "learning_rate": 1.3067220482898546e-09, + "loss": 1.1689, + "step": 33888 + }, + { + "epoch": 0.9950378765635093, + "grad_norm": 0.0, + "learning_rate": 1.2913943929349083e-09, + "loss": 1.1978, + "step": 33889 + }, + { + "epoch": 0.9950672382406484, + "grad_norm": 0.0, + "learning_rate": 1.2761571584618548e-09, + "loss": 1.1602, + "step": 33890 + }, + { + "epoch": 0.9950965999177873, + "grad_norm": 0.0, + "learning_rate": 1.2610103450083621e-09, + "loss": 1.0576, + "step": 33891 + }, + { + "epoch": 0.9951259615949263, + "grad_norm": 0.0, + "learning_rate": 1.245953952713208e-09, + "loss": 1.2012, + "step": 33892 + }, + { + "epoch": 0.9951553232720654, + "grad_norm": 0.0, + "learning_rate": 1.2309879817107295e-09, + "loss": 1.1611, + "step": 33893 + }, + { + "epoch": 0.9951846849492043, + "grad_norm": 0.0, + "learning_rate": 1.2161124321385941e-09, + "loss": 1.1509, + "step": 33894 + }, + { + "epoch": 0.9952140466263433, + "grad_norm": 0.0, + "learning_rate": 1.2013273041289186e-09, + "loss": 1.2246, + "step": 33895 + }, + { + "epoch": 0.9952434083034823, + "grad_norm": 0.0, + "learning_rate": 1.1866325978160398e-09, + "loss": 1.3262, + "step": 33896 + }, + { + "epoch": 0.9952727699806213, + "grad_norm": 0.0, + "learning_rate": 1.1720283133342946e-09, + "loss": 1.3018, + "step": 33897 + }, + { + "epoch": 0.9953021316577603, + "grad_norm": 0.0, + "learning_rate": 1.1575144508135793e-09, + "loss": 1.1934, + "step": 33898 + }, + { + "epoch": 0.9953314933348993, + "grad_norm": 0.0, + "learning_rate": 1.1430910103871207e-09, + "loss": 1.1055, + "step": 33899 + }, + { + "epoch": 0.9953608550120383, + "grad_norm": 0.0, + "learning_rate": 1.1287579921837044e-09, + "loss": 1.2471, + "step": 33900 + }, + { + "epoch": 0.9953902166891773, + "grad_norm": 0.0, + "learning_rate": 1.1145153963332266e-09, + "loss": 1.332, + "step": 33901 + }, + { + "epoch": 0.9954195783663163, + "grad_norm": 0.0, + "learning_rate": 1.1003632229655837e-09, + "loss": 1.2021, + "step": 33902 + }, + { + "epoch": 0.9954489400434553, + "grad_norm": 0.0, + "learning_rate": 1.0863014722084508e-09, + "loss": 1.1133, + "step": 33903 + }, + { + "epoch": 0.9954783017205943, + "grad_norm": 0.0, + "learning_rate": 1.0723301441883938e-09, + "loss": 1.1108, + "step": 33904 + }, + { + "epoch": 0.9955076633977333, + "grad_norm": 0.0, + "learning_rate": 1.0584492390319777e-09, + "loss": 1.2261, + "step": 33905 + }, + { + "epoch": 0.9955370250748723, + "grad_norm": 0.0, + "learning_rate": 1.0446587568646582e-09, + "loss": 1.3037, + "step": 33906 + }, + { + "epoch": 0.9955663867520113, + "grad_norm": 0.0, + "learning_rate": 1.0309586978118903e-09, + "loss": 1.1543, + "step": 33907 + }, + { + "epoch": 0.9955957484291502, + "grad_norm": 0.0, + "learning_rate": 1.0173490619957982e-09, + "loss": 1.0469, + "step": 33908 + }, + { + "epoch": 0.9956251101062893, + "grad_norm": 0.0, + "learning_rate": 1.0038298495418375e-09, + "loss": 1.1758, + "step": 33909 + }, + { + "epoch": 0.9956544717834283, + "grad_norm": 0.0, + "learning_rate": 9.904010605699121e-10, + "loss": 1.1255, + "step": 33910 + }, + { + "epoch": 0.9956838334605672, + "grad_norm": 0.0, + "learning_rate": 9.77062695203257e-10, + "loss": 1.2002, + "step": 33911 + }, + { + "epoch": 0.9957131951377063, + "grad_norm": 0.0, + "learning_rate": 9.638147535617759e-10, + "loss": 1.2891, + "step": 33912 + }, + { + "epoch": 0.9957425568148452, + "grad_norm": 0.0, + "learning_rate": 9.506572357653732e-10, + "loss": 1.1978, + "step": 33913 + }, + { + "epoch": 0.9957719184919842, + "grad_norm": 0.0, + "learning_rate": 9.375901419328426e-10, + "loss": 1.2324, + "step": 33914 + }, + { + "epoch": 0.9958012801691233, + "grad_norm": 0.0, + "learning_rate": 9.246134721818678e-10, + "loss": 1.292, + "step": 33915 + }, + { + "epoch": 0.9958306418462622, + "grad_norm": 0.0, + "learning_rate": 9.117272266312427e-10, + "loss": 1.2051, + "step": 33916 + }, + { + "epoch": 0.9958600035234012, + "grad_norm": 0.0, + "learning_rate": 8.989314053964304e-10, + "loss": 1.1934, + "step": 33917 + }, + { + "epoch": 0.9958893652005403, + "grad_norm": 0.0, + "learning_rate": 8.862260085928942e-10, + "loss": 1.2207, + "step": 33918 + }, + { + "epoch": 0.9959187268776792, + "grad_norm": 0.0, + "learning_rate": 8.736110363372075e-10, + "loss": 1.2305, + "step": 33919 + }, + { + "epoch": 0.9959480885548182, + "grad_norm": 0.0, + "learning_rate": 8.610864887415027e-10, + "loss": 1.1895, + "step": 33920 + }, + { + "epoch": 0.9959774502319573, + "grad_norm": 0.0, + "learning_rate": 8.486523659190226e-10, + "loss": 1.1001, + "step": 33921 + }, + { + "epoch": 0.9960068119090962, + "grad_norm": 0.0, + "learning_rate": 8.363086679841204e-10, + "loss": 1.1958, + "step": 33922 + }, + { + "epoch": 0.9960361735862352, + "grad_norm": 0.0, + "learning_rate": 8.240553950467078e-10, + "loss": 1.3008, + "step": 33923 + }, + { + "epoch": 0.9960655352633743, + "grad_norm": 0.0, + "learning_rate": 8.118925472189176e-10, + "loss": 1.1621, + "step": 33924 + }, + { + "epoch": 0.9960948969405132, + "grad_norm": 0.0, + "learning_rate": 7.998201246095516e-10, + "loss": 1.1016, + "step": 33925 + }, + { + "epoch": 0.9961242586176522, + "grad_norm": 0.0, + "learning_rate": 7.878381273285219e-10, + "loss": 1.165, + "step": 33926 + }, + { + "epoch": 0.9961536202947913, + "grad_norm": 0.0, + "learning_rate": 7.759465554835199e-10, + "loss": 1.1401, + "step": 33927 + }, + { + "epoch": 0.9961829819719302, + "grad_norm": 0.0, + "learning_rate": 7.641454091822376e-10, + "loss": 1.1855, + "step": 33928 + }, + { + "epoch": 0.9962123436490692, + "grad_norm": 0.0, + "learning_rate": 7.524346885323664e-10, + "loss": 1.1602, + "step": 33929 + }, + { + "epoch": 0.9962417053262083, + "grad_norm": 0.0, + "learning_rate": 7.408143936382672e-10, + "loss": 1.1953, + "step": 33930 + }, + { + "epoch": 0.9962710670033472, + "grad_norm": 0.0, + "learning_rate": 7.292845246065216e-10, + "loss": 1.1143, + "step": 33931 + }, + { + "epoch": 0.9963004286804862, + "grad_norm": 0.0, + "learning_rate": 7.178450815403803e-10, + "loss": 1.2852, + "step": 33932 + }, + { + "epoch": 0.9963297903576253, + "grad_norm": 0.0, + "learning_rate": 7.064960645442043e-10, + "loss": 1.3379, + "step": 33933 + }, + { + "epoch": 0.9963591520347642, + "grad_norm": 0.0, + "learning_rate": 6.952374737190238e-10, + "loss": 1.1362, + "step": 33934 + }, + { + "epoch": 0.9963885137119032, + "grad_norm": 0.0, + "learning_rate": 6.840693091680895e-10, + "loss": 1.2134, + "step": 33935 + }, + { + "epoch": 0.9964178753890423, + "grad_norm": 0.0, + "learning_rate": 6.729915709924317e-10, + "loss": 1.3535, + "step": 33936 + }, + { + "epoch": 0.9964472370661812, + "grad_norm": 0.0, + "learning_rate": 6.620042592908604e-10, + "loss": 1.1387, + "step": 33937 + }, + { + "epoch": 0.9964765987433202, + "grad_norm": 0.0, + "learning_rate": 6.511073741644059e-10, + "loss": 1.2549, + "step": 33938 + }, + { + "epoch": 0.9965059604204592, + "grad_norm": 0.0, + "learning_rate": 6.403009157107675e-10, + "loss": 1.2773, + "step": 33939 + }, + { + "epoch": 0.9965353220975982, + "grad_norm": 0.0, + "learning_rate": 6.295848840276453e-10, + "loss": 1.1621, + "step": 33940 + }, + { + "epoch": 0.9965646837747372, + "grad_norm": 0.0, + "learning_rate": 6.189592792127386e-10, + "loss": 1.2988, + "step": 33941 + }, + { + "epoch": 0.9965940454518762, + "grad_norm": 0.0, + "learning_rate": 6.084241013604164e-10, + "loss": 1.1973, + "step": 33942 + }, + { + "epoch": 0.9966234071290152, + "grad_norm": 0.0, + "learning_rate": 5.979793505672682e-10, + "loss": 1.1919, + "step": 33943 + }, + { + "epoch": 0.9966527688061542, + "grad_norm": 0.0, + "learning_rate": 5.876250269276629e-10, + "loss": 1.1055, + "step": 33944 + }, + { + "epoch": 0.9966821304832932, + "grad_norm": 0.0, + "learning_rate": 5.773611305348593e-10, + "loss": 1.1553, + "step": 33945 + }, + { + "epoch": 0.9967114921604322, + "grad_norm": 0.0, + "learning_rate": 5.671876614821159e-10, + "loss": 1.1484, + "step": 33946 + }, + { + "epoch": 0.9967408538375712, + "grad_norm": 0.0, + "learning_rate": 5.571046198615815e-10, + "loss": 1.2173, + "step": 33947 + }, + { + "epoch": 0.9967702155147102, + "grad_norm": 0.0, + "learning_rate": 5.47112005763184e-10, + "loss": 1.251, + "step": 33948 + }, + { + "epoch": 0.9967995771918492, + "grad_norm": 0.0, + "learning_rate": 5.372098192779618e-10, + "loss": 1.1841, + "step": 33949 + }, + { + "epoch": 0.9968289388689882, + "grad_norm": 0.0, + "learning_rate": 5.273980604969531e-10, + "loss": 1.2529, + "step": 33950 + }, + { + "epoch": 0.9968583005461272, + "grad_norm": 0.0, + "learning_rate": 5.176767295067553e-10, + "loss": 1.062, + "step": 33951 + }, + { + "epoch": 0.9968876622232662, + "grad_norm": 0.0, + "learning_rate": 5.080458263961863e-10, + "loss": 1.1689, + "step": 33952 + }, + { + "epoch": 0.9969170239004052, + "grad_norm": 0.0, + "learning_rate": 4.985053512518434e-10, + "loss": 1.1255, + "step": 33953 + }, + { + "epoch": 0.9969463855775442, + "grad_norm": 0.0, + "learning_rate": 4.890553041614343e-10, + "loss": 1.2344, + "step": 33954 + }, + { + "epoch": 0.9969757472546832, + "grad_norm": 0.0, + "learning_rate": 4.796956852082258e-10, + "loss": 1.2056, + "step": 33955 + }, + { + "epoch": 0.9970051089318221, + "grad_norm": 0.0, + "learning_rate": 4.704264944788151e-10, + "loss": 1.1284, + "step": 33956 + }, + { + "epoch": 0.9970344706089612, + "grad_norm": 0.0, + "learning_rate": 4.61247732056469e-10, + "loss": 1.1973, + "step": 33957 + }, + { + "epoch": 0.9970638322861002, + "grad_norm": 0.0, + "learning_rate": 4.5215939802334406e-10, + "loss": 1.188, + "step": 33958 + }, + { + "epoch": 0.9970931939632391, + "grad_norm": 0.0, + "learning_rate": 4.43161492462707e-10, + "loss": 1.2188, + "step": 33959 + }, + { + "epoch": 0.9971225556403782, + "grad_norm": 0.0, + "learning_rate": 4.3425401545560407e-10, + "loss": 1.1387, + "step": 33960 + }, + { + "epoch": 0.9971519173175172, + "grad_norm": 0.0, + "learning_rate": 4.2543696708197137e-10, + "loss": 1.1968, + "step": 33961 + }, + { + "epoch": 0.9971812789946561, + "grad_norm": 0.0, + "learning_rate": 4.167103474217449e-10, + "loss": 1.2051, + "step": 33962 + }, + { + "epoch": 0.9972106406717952, + "grad_norm": 0.0, + "learning_rate": 4.080741565548607e-10, + "loss": 1.2607, + "step": 33963 + }, + { + "epoch": 0.9972400023489342, + "grad_norm": 0.0, + "learning_rate": 3.9952839455792423e-10, + "loss": 1.2725, + "step": 33964 + }, + { + "epoch": 0.9972693640260731, + "grad_norm": 0.0, + "learning_rate": 3.910730615097613e-10, + "loss": 1.1426, + "step": 33965 + }, + { + "epoch": 0.9972987257032122, + "grad_norm": 0.0, + "learning_rate": 3.827081574858671e-10, + "loss": 1.2383, + "step": 33966 + }, + { + "epoch": 0.9973280873803512, + "grad_norm": 0.0, + "learning_rate": 3.744336825617367e-10, + "loss": 1.2119, + "step": 33967 + }, + { + "epoch": 0.9973574490574901, + "grad_norm": 0.0, + "learning_rate": 3.662496368128654e-10, + "loss": 1.2588, + "step": 33968 + }, + { + "epoch": 0.9973868107346292, + "grad_norm": 0.0, + "learning_rate": 3.581560203136381e-10, + "loss": 1.1445, + "step": 33969 + }, + { + "epoch": 0.9974161724117682, + "grad_norm": 0.0, + "learning_rate": 3.5015283313510894e-10, + "loss": 1.1514, + "step": 33970 + }, + { + "epoch": 0.9974455340889071, + "grad_norm": 0.0, + "learning_rate": 3.4224007535277327e-10, + "loss": 1.3213, + "step": 33971 + }, + { + "epoch": 0.9974748957660462, + "grad_norm": 0.0, + "learning_rate": 3.344177470354648e-10, + "loss": 1.21, + "step": 33972 + }, + { + "epoch": 0.9975042574431852, + "grad_norm": 0.0, + "learning_rate": 3.2668584825423786e-10, + "loss": 1.1562, + "step": 33973 + }, + { + "epoch": 0.9975336191203241, + "grad_norm": 0.0, + "learning_rate": 3.1904437908125694e-10, + "loss": 1.1704, + "step": 33974 + }, + { + "epoch": 0.9975629807974632, + "grad_norm": 0.0, + "learning_rate": 3.114933395831354e-10, + "loss": 1.2529, + "step": 33975 + }, + { + "epoch": 0.9975923424746022, + "grad_norm": 0.0, + "learning_rate": 3.0403272982981736e-10, + "loss": 1.126, + "step": 33976 + }, + { + "epoch": 0.9976217041517411, + "grad_norm": 0.0, + "learning_rate": 2.966625498879161e-10, + "loss": 1.249, + "step": 33977 + }, + { + "epoch": 0.9976510658288802, + "grad_norm": 0.0, + "learning_rate": 2.8938279982404504e-10, + "loss": 1.1045, + "step": 33978 + }, + { + "epoch": 0.9976804275060192, + "grad_norm": 0.0, + "learning_rate": 2.8219347970370735e-10, + "loss": 1.1553, + "step": 33979 + }, + { + "epoch": 0.9977097891831581, + "grad_norm": 0.0, + "learning_rate": 2.750945895935164e-10, + "loss": 1.1821, + "step": 33980 + }, + { + "epoch": 0.9977391508602972, + "grad_norm": 0.0, + "learning_rate": 2.680861295556447e-10, + "loss": 1.291, + "step": 33981 + }, + { + "epoch": 0.9977685125374361, + "grad_norm": 0.0, + "learning_rate": 2.6116809965559545e-10, + "loss": 1.2363, + "step": 33982 + }, + { + "epoch": 0.9977978742145751, + "grad_norm": 0.0, + "learning_rate": 2.5434049995332057e-10, + "loss": 1.1733, + "step": 33983 + }, + { + "epoch": 0.9978272358917142, + "grad_norm": 0.0, + "learning_rate": 2.4760333051321304e-10, + "loss": 1.1797, + "step": 33984 + }, + { + "epoch": 0.9978565975688531, + "grad_norm": 0.0, + "learning_rate": 2.4095659139411477e-10, + "loss": 1.2832, + "step": 33985 + }, + { + "epoch": 0.9978859592459921, + "grad_norm": 0.0, + "learning_rate": 2.3440028265819814e-10, + "loss": 1.2598, + "step": 33986 + }, + { + "epoch": 0.9979153209231312, + "grad_norm": 0.0, + "learning_rate": 2.2793440436208458e-10, + "loss": 1.1675, + "step": 33987 + }, + { + "epoch": 0.9979446826002701, + "grad_norm": 0.0, + "learning_rate": 2.2155895656683635e-10, + "loss": 1.3188, + "step": 33988 + }, + { + "epoch": 0.9979740442774091, + "grad_norm": 0.0, + "learning_rate": 2.152739393290748e-10, + "loss": 1.2734, + "step": 33989 + }, + { + "epoch": 0.9980034059545482, + "grad_norm": 0.0, + "learning_rate": 2.0907935270542134e-10, + "loss": 1.2578, + "step": 33990 + }, + { + "epoch": 0.9980327676316871, + "grad_norm": 0.0, + "learning_rate": 2.029751967513871e-10, + "loss": 1.0776, + "step": 33991 + }, + { + "epoch": 0.9980621293088261, + "grad_norm": 0.0, + "learning_rate": 1.9696147152359345e-10, + "loss": 1.248, + "step": 33992 + }, + { + "epoch": 0.9980914909859652, + "grad_norm": 0.0, + "learning_rate": 1.9103817707533113e-10, + "loss": 1.2183, + "step": 33993 + }, + { + "epoch": 0.9981208526631041, + "grad_norm": 0.0, + "learning_rate": 1.8520531345989079e-10, + "loss": 1.2168, + "step": 33994 + }, + { + "epoch": 0.9981502143402431, + "grad_norm": 0.0, + "learning_rate": 1.7946288073167338e-10, + "loss": 1.1216, + "step": 33995 + }, + { + "epoch": 0.9981795760173822, + "grad_norm": 0.0, + "learning_rate": 1.7381087894063898e-10, + "loss": 1.0859, + "step": 33996 + }, + { + "epoch": 0.9982089376945211, + "grad_norm": 0.0, + "learning_rate": 1.6824930814007823e-10, + "loss": 1.2168, + "step": 33997 + }, + { + "epoch": 0.9982382993716601, + "grad_norm": 0.0, + "learning_rate": 1.6277816837773074e-10, + "loss": 1.2456, + "step": 33998 + }, + { + "epoch": 0.9982676610487992, + "grad_norm": 0.0, + "learning_rate": 1.573974597046668e-10, + "loss": 1.2158, + "step": 33999 + }, + { + "epoch": 0.9982970227259381, + "grad_norm": 0.0, + "learning_rate": 1.521071821697362e-10, + "loss": 1.2407, + "step": 34000 + }, + { + "epoch": 0.9983263844030771, + "grad_norm": 0.0, + "learning_rate": 1.4690733582067851e-10, + "loss": 1.2275, + "step": 34001 + }, + { + "epoch": 0.9983557460802162, + "grad_norm": 0.0, + "learning_rate": 1.417979207030129e-10, + "loss": 1.041, + "step": 34002 + }, + { + "epoch": 0.9983851077573551, + "grad_norm": 0.0, + "learning_rate": 1.3677893686447896e-10, + "loss": 1.1318, + "step": 34003 + }, + { + "epoch": 0.9984144694344941, + "grad_norm": 0.0, + "learning_rate": 1.3185038435059583e-10, + "loss": 1.1704, + "step": 34004 + }, + { + "epoch": 0.9984438311116332, + "grad_norm": 0.0, + "learning_rate": 1.270122632046622e-10, + "loss": 1.2988, + "step": 34005 + }, + { + "epoch": 0.9984731927887721, + "grad_norm": 0.0, + "learning_rate": 1.22264573471087e-10, + "loss": 1.2461, + "step": 34006 + }, + { + "epoch": 0.9985025544659111, + "grad_norm": 0.0, + "learning_rate": 1.1760731519427914e-10, + "loss": 1.2334, + "step": 34007 + }, + { + "epoch": 0.99853191614305, + "grad_norm": 0.0, + "learning_rate": 1.1304048841309646e-10, + "loss": 1.0972, + "step": 34008 + }, + { + "epoch": 0.9985612778201891, + "grad_norm": 0.0, + "learning_rate": 1.0856409317194783e-10, + "loss": 1.0952, + "step": 34009 + }, + { + "epoch": 0.9985906394973281, + "grad_norm": 0.0, + "learning_rate": 1.0417812950969109e-10, + "loss": 1.2993, + "step": 34010 + }, + { + "epoch": 0.998620001174467, + "grad_norm": 0.0, + "learning_rate": 9.988259746629425e-11, + "loss": 1.333, + "step": 34011 + }, + { + "epoch": 0.9986493628516061, + "grad_norm": 0.0, + "learning_rate": 9.567749708061513e-11, + "loss": 1.2568, + "step": 34012 + }, + { + "epoch": 0.9986787245287451, + "grad_norm": 0.0, + "learning_rate": 9.156282839040131e-11, + "loss": 1.127, + "step": 34013 + }, + { + "epoch": 0.998708086205884, + "grad_norm": 0.0, + "learning_rate": 8.75385914345106e-11, + "loss": 1.2002, + "step": 34014 + }, + { + "epoch": 0.9987374478830231, + "grad_norm": 0.0, + "learning_rate": 8.360478624735991e-11, + "loss": 1.1846, + "step": 34015 + }, + { + "epoch": 0.9987668095601621, + "grad_norm": 0.0, + "learning_rate": 7.976141286447636e-11, + "loss": 1.2959, + "step": 34016 + }, + { + "epoch": 0.998796171237301, + "grad_norm": 0.0, + "learning_rate": 7.600847132138711e-11, + "loss": 1.1377, + "step": 34017 + }, + { + "epoch": 0.9988255329144401, + "grad_norm": 0.0, + "learning_rate": 7.234596165250907e-11, + "loss": 1.1631, + "step": 34018 + }, + { + "epoch": 0.9988548945915791, + "grad_norm": 0.0, + "learning_rate": 6.877388389114892e-11, + "loss": 1.2471, + "step": 34019 + }, + { + "epoch": 0.998884256268718, + "grad_norm": 0.0, + "learning_rate": 6.529223806728269e-11, + "loss": 1.1689, + "step": 34020 + }, + { + "epoch": 0.9989136179458571, + "grad_norm": 0.0, + "learning_rate": 6.190102421532729e-11, + "loss": 1.1836, + "step": 34021 + }, + { + "epoch": 0.998942979622996, + "grad_norm": 0.0, + "learning_rate": 5.860024236414851e-11, + "loss": 1.144, + "step": 34022 + }, + { + "epoch": 0.998972341300135, + "grad_norm": 0.0, + "learning_rate": 5.5389892543722403e-11, + "loss": 1.1069, + "step": 34023 + }, + { + "epoch": 0.9990017029772741, + "grad_norm": 0.0, + "learning_rate": 5.2269974784024954e-11, + "loss": 1.2583, + "step": 34024 + }, + { + "epoch": 0.999031064654413, + "grad_norm": 0.0, + "learning_rate": 4.924048911170154e-11, + "loss": 1.0967, + "step": 34025 + }, + { + "epoch": 0.999060426331552, + "grad_norm": 0.0, + "learning_rate": 4.6301435555617947e-11, + "loss": 1.2383, + "step": 34026 + }, + { + "epoch": 0.9990897880086911, + "grad_norm": 0.0, + "learning_rate": 4.345281414241953e-11, + "loss": 1.2739, + "step": 34027 + }, + { + "epoch": 0.99911914968583, + "grad_norm": 0.0, + "learning_rate": 4.06946248965312e-11, + "loss": 1.1855, + "step": 34028 + }, + { + "epoch": 0.999148511362969, + "grad_norm": 0.0, + "learning_rate": 3.80268678445983e-11, + "loss": 1.1279, + "step": 34029 + }, + { + "epoch": 0.9991778730401081, + "grad_norm": 0.0, + "learning_rate": 3.5449543008825305e-11, + "loss": 1.1865, + "step": 34030 + }, + { + "epoch": 0.999207234717247, + "grad_norm": 0.0, + "learning_rate": 3.296265041474733e-11, + "loss": 1.1094, + "step": 34031 + }, + { + "epoch": 0.999236596394386, + "grad_norm": 0.0, + "learning_rate": 3.0566190082348404e-11, + "loss": 1.2812, + "step": 34032 + }, + { + "epoch": 0.9992659580715251, + "grad_norm": 0.0, + "learning_rate": 2.8260162034943196e-11, + "loss": 1.1802, + "step": 34033 + }, + { + "epoch": 0.999295319748664, + "grad_norm": 0.0, + "learning_rate": 2.604456629362595e-11, + "loss": 1.1787, + "step": 34034 + }, + { + "epoch": 0.999324681425803, + "grad_norm": 0.0, + "learning_rate": 2.3919402877270457e-11, + "loss": 1.2305, + "step": 34035 + }, + { + "epoch": 0.9993540431029421, + "grad_norm": 0.0, + "learning_rate": 2.1884671805860735e-11, + "loss": 1.1733, + "step": 34036 + }, + { + "epoch": 0.999383404780081, + "grad_norm": 0.0, + "learning_rate": 1.9940373097160347e-11, + "loss": 1.252, + "step": 34037 + }, + { + "epoch": 0.99941276645722, + "grad_norm": 0.0, + "learning_rate": 1.8086506768932866e-11, + "loss": 1.2432, + "step": 34038 + }, + { + "epoch": 0.9994421281343591, + "grad_norm": 0.0, + "learning_rate": 1.6323072838941855e-11, + "loss": 1.2139, + "step": 34039 + }, + { + "epoch": 0.999471489811498, + "grad_norm": 0.0, + "learning_rate": 1.465007132273044e-11, + "loss": 1.1963, + "step": 34040 + }, + { + "epoch": 0.999500851488637, + "grad_norm": 0.0, + "learning_rate": 1.3067502233621298e-11, + "loss": 1.2988, + "step": 34041 + }, + { + "epoch": 0.9995302131657761, + "grad_norm": 0.0, + "learning_rate": 1.1575365588267773e-11, + "loss": 1.2568, + "step": 34042 + }, + { + "epoch": 0.999559574842915, + "grad_norm": 0.0, + "learning_rate": 1.017366139999254e-11, + "loss": 1.1577, + "step": 34043 + }, + { + "epoch": 0.999588936520054, + "grad_norm": 0.0, + "learning_rate": 8.862389679897831e-12, + "loss": 1.1406, + "step": 34044 + }, + { + "epoch": 0.9996182981971931, + "grad_norm": 0.0, + "learning_rate": 7.6415504401961e-12, + "loss": 1.2031, + "step": 34045 + }, + { + "epoch": 0.999647659874332, + "grad_norm": 0.0, + "learning_rate": 6.511143693099797e-12, + "loss": 1.1973, + "step": 34046 + }, + { + "epoch": 0.999677021551471, + "grad_norm": 0.0, + "learning_rate": 5.471169448600933e-12, + "loss": 1.2363, + "step": 34047 + }, + { + "epoch": 0.99970638322861, + "grad_norm": 0.0, + "learning_rate": 4.521627714471066e-12, + "loss": 1.2031, + "step": 34048 + }, + { + "epoch": 0.999735744905749, + "grad_norm": 0.0, + "learning_rate": 3.662518500702206e-12, + "loss": 1.2095, + "step": 34049 + }, + { + "epoch": 0.999765106582888, + "grad_norm": 0.0, + "learning_rate": 2.893841816176135e-12, + "loss": 1.1206, + "step": 34050 + }, + { + "epoch": 0.999794468260027, + "grad_norm": 0.0, + "learning_rate": 2.215597665333746e-12, + "loss": 1.2334, + "step": 34051 + }, + { + "epoch": 0.999823829937166, + "grad_norm": 0.0, + "learning_rate": 1.6277860559466008e-12, + "loss": 1.1372, + "step": 34052 + }, + { + "epoch": 0.999853191614305, + "grad_norm": 0.0, + "learning_rate": 1.1304069924555905e-12, + "loss": 1.0996, + "step": 34053 + }, + { + "epoch": 0.999882553291444, + "grad_norm": 0.0, + "learning_rate": 7.234604804118306e-13, + "loss": 1.1255, + "step": 34054 + }, + { + "epoch": 0.999911914968583, + "grad_norm": 0.0, + "learning_rate": 4.0694652203576714e-13, + "loss": 1.2754, + "step": 34055 + }, + { + "epoch": 0.999941276645722, + "grad_norm": 0.0, + "learning_rate": 1.808651217682922e-13, + "loss": 1.1421, + "step": 34056 + }, + { + "epoch": 0.999970638322861, + "grad_norm": 0.0, + "learning_rate": 4.52162807196288e-14, + "loss": 1.248, + "step": 34057 + }, + { + "epoch": 1.0, + "grad_norm": 0.0, + "learning_rate": 0.0, + "loss": 1.2246, + "step": 34058 + }, + { + "epoch": 1.0, + "step": 34058, + "total_flos": 1.729487724073504e+19, + "train_loss": 0.14290133283663162, + "train_runtime": 15117.1461, + "train_samples_per_second": 144.188, + "train_steps_per_second": 2.253 + } + ], + "logging_steps": 1.0, + "max_steps": 34058, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 3000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.729487724073504e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}