| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.08740303725554463, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 8.740303725554463e-05, | |
| "grad_norm": 7.065422058105469, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8318, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00017480607451108925, | |
| "grad_norm": 12.618020057678223, | |
| "learning_rate": 0.0002, | |
| "loss": 2.6514, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0002622091117666339, | |
| "grad_norm": 5.291403770446777, | |
| "learning_rate": 0.0003, | |
| "loss": 1.1527, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0003496121490221785, | |
| "grad_norm": 0.6172698736190796, | |
| "learning_rate": 0.0004, | |
| "loss": 0.9539, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00043701518627772313, | |
| "grad_norm": 2.0148203372955322, | |
| "learning_rate": 0.0005, | |
| "loss": 1.4452, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0005244182235332678, | |
| "grad_norm": 8.47681999206543, | |
| "learning_rate": 0.0004999562784190276, | |
| "loss": 1.8725, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0006118212607888124, | |
| "grad_norm": 1.3222665786743164, | |
| "learning_rate": 0.0004999125568380553, | |
| "loss": 1.4179, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.000699224298044357, | |
| "grad_norm": 2.153110980987549, | |
| "learning_rate": 0.0004998688352570829, | |
| "loss": 1.1031, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0007866273352999017, | |
| "grad_norm": 1.160366415977478, | |
| "learning_rate": 0.0004998251136761106, | |
| "loss": 0.9552, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0008740303725554463, | |
| "grad_norm": 0.7029749155044556, | |
| "learning_rate": 0.0004997813920951382, | |
| "loss": 1.0771, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.000961433409810991, | |
| "grad_norm": 0.7599214315414429, | |
| "learning_rate": 0.0004997376705141658, | |
| "loss": 1.0371, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0010488364470665355, | |
| "grad_norm": 1.3291207551956177, | |
| "learning_rate": 0.0004996939489331935, | |
| "loss": 0.7945, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.00113623948432208, | |
| "grad_norm": 0.6687347888946533, | |
| "learning_rate": 0.0004996502273522211, | |
| "loss": 0.9751, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0012236425215776249, | |
| "grad_norm": 0.5787840485572815, | |
| "learning_rate": 0.0004996065057712488, | |
| "loss": 1.234, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0013110455588331695, | |
| "grad_norm": 0.8155117034912109, | |
| "learning_rate": 0.0004995627841902764, | |
| "loss": 1.2566, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.001398448596088714, | |
| "grad_norm": 0.5109673142433167, | |
| "learning_rate": 0.0004995190626093039, | |
| "loss": 0.8717, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0014858516333442588, | |
| "grad_norm": 0.4625360667705536, | |
| "learning_rate": 0.0004994753410283315, | |
| "loss": 0.8922, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0015732546705998034, | |
| "grad_norm": 0.714952826499939, | |
| "learning_rate": 0.0004994316194473592, | |
| "loss": 0.921, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.001660657707855348, | |
| "grad_norm": 0.48220372200012207, | |
| "learning_rate": 0.0004993878978663869, | |
| "loss": 1.0207, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0017480607451108925, | |
| "grad_norm": 5.717684745788574, | |
| "learning_rate": 0.0004993441762854145, | |
| "loss": 1.3551, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0018354637823664373, | |
| "grad_norm": 0.5429579615592957, | |
| "learning_rate": 0.0004993004547044421, | |
| "loss": 0.8929, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.001922866819621982, | |
| "grad_norm": 6.894193172454834, | |
| "learning_rate": 0.0004992567331234697, | |
| "loss": 1.2508, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0020102698568775267, | |
| "grad_norm": 0.4427785277366638, | |
| "learning_rate": 0.0004992130115424974, | |
| "loss": 0.9662, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.002097672894133071, | |
| "grad_norm": 0.5576323866844177, | |
| "learning_rate": 0.000499169289961525, | |
| "loss": 1.0545, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.002185075931388616, | |
| "grad_norm": 1.3581053018569946, | |
| "learning_rate": 0.0004991255683805527, | |
| "loss": 1.1777, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.00227247896864416, | |
| "grad_norm": 0.609951376914978, | |
| "learning_rate": 0.0004990818467995803, | |
| "loss": 1.5921, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.002359882005899705, | |
| "grad_norm": 1.3641082048416138, | |
| "learning_rate": 0.0004990381252186079, | |
| "loss": 0.8309, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0024472850431552498, | |
| "grad_norm": 0.5976356267929077, | |
| "learning_rate": 0.0004989944036376356, | |
| "loss": 0.828, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.002534688080410794, | |
| "grad_norm": 0.6889556646347046, | |
| "learning_rate": 0.0004989506820566632, | |
| "loss": 1.4536, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.002622091117666339, | |
| "grad_norm": 0.5091891884803772, | |
| "learning_rate": 0.0004989069604756908, | |
| "loss": 1.054, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0027094941549218837, | |
| "grad_norm": 1.0312514305114746, | |
| "learning_rate": 0.0004988632388947185, | |
| "loss": 0.8454, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.002796897192177428, | |
| "grad_norm": 1.136455774307251, | |
| "learning_rate": 0.000498819517313746, | |
| "loss": 0.9365, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.002884300229432973, | |
| "grad_norm": 0.5671233534812927, | |
| "learning_rate": 0.0004987757957327737, | |
| "loss": 0.9139, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0029717032666885176, | |
| "grad_norm": 0.38321638107299805, | |
| "learning_rate": 0.0004987320741518013, | |
| "loss": 0.9383, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.003059106303944062, | |
| "grad_norm": 0.49962496757507324, | |
| "learning_rate": 0.0004986883525708289, | |
| "loss": 1.1371, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.003146509341199607, | |
| "grad_norm": 0.4470585584640503, | |
| "learning_rate": 0.0004986446309898566, | |
| "loss": 1.2636, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.003233912378455151, | |
| "grad_norm": 0.4494791626930237, | |
| "learning_rate": 0.0004986009094088842, | |
| "loss": 0.8846, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.003321315415710696, | |
| "grad_norm": 1.8432437181472778, | |
| "learning_rate": 0.0004985571878279119, | |
| "loss": 1.0042, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0034087184529662407, | |
| "grad_norm": 0.512199878692627, | |
| "learning_rate": 0.0004985134662469395, | |
| "loss": 0.9648, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.003496121490221785, | |
| "grad_norm": 0.7086130380630493, | |
| "learning_rate": 0.0004984697446659671, | |
| "loss": 0.8634, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.00358352452747733, | |
| "grad_norm": 0.34971296787261963, | |
| "learning_rate": 0.0004984260230849947, | |
| "loss": 1.1422, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0036709275647328747, | |
| "grad_norm": 0.5125827193260193, | |
| "learning_rate": 0.0004983823015040224, | |
| "loss": 0.9885, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.003758330601988419, | |
| "grad_norm": 0.363505482673645, | |
| "learning_rate": 0.0004983385799230501, | |
| "loss": 0.9047, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.003845733639243964, | |
| "grad_norm": 0.36858850717544556, | |
| "learning_rate": 0.0004982948583420777, | |
| "loss": 0.8149, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.003933136676499509, | |
| "grad_norm": 0.3395627439022064, | |
| "learning_rate": 0.0004982511367611053, | |
| "loss": 0.6765, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.004020539713755053, | |
| "grad_norm": 0.8366663455963135, | |
| "learning_rate": 0.0004982074151801329, | |
| "loss": 1.4199, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.004107942751010597, | |
| "grad_norm": 0.4986715614795685, | |
| "learning_rate": 0.0004981636935991606, | |
| "loss": 1.0475, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.004195345788266142, | |
| "grad_norm": 0.39106953144073486, | |
| "learning_rate": 0.0004981199720181882, | |
| "loss": 0.8671, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.004282748825521687, | |
| "grad_norm": 1.129980206489563, | |
| "learning_rate": 0.0004980762504372159, | |
| "loss": 0.6251, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.004370151862777232, | |
| "grad_norm": 1.9613661766052246, | |
| "learning_rate": 0.0004980325288562434, | |
| "loss": 1.5782, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0044575549000327765, | |
| "grad_norm": 0.3839377164840698, | |
| "learning_rate": 0.000497988807275271, | |
| "loss": 0.8171, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.00454495793728832, | |
| "grad_norm": 1.2072890996932983, | |
| "learning_rate": 0.0004979450856942987, | |
| "loss": 1.3112, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.004632360974543865, | |
| "grad_norm": 0.4228273630142212, | |
| "learning_rate": 0.0004979013641133263, | |
| "loss": 0.8507, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.00471976401179941, | |
| "grad_norm": 0.3379599452018738, | |
| "learning_rate": 0.000497857642532354, | |
| "loss": 0.9112, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.004807167049054955, | |
| "grad_norm": 0.4163492023944855, | |
| "learning_rate": 0.0004978139209513816, | |
| "loss": 0.9839, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0048945700863104995, | |
| "grad_norm": 1.4194269180297852, | |
| "learning_rate": 0.0004977701993704092, | |
| "loss": 1.194, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.004981973123566044, | |
| "grad_norm": 0.8857583999633789, | |
| "learning_rate": 0.0004977264777894369, | |
| "loss": 0.9047, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.005069376160821588, | |
| "grad_norm": 0.8493141531944275, | |
| "learning_rate": 0.0004976827562084645, | |
| "loss": 0.921, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.005156779198077133, | |
| "grad_norm": 0.6385464668273926, | |
| "learning_rate": 0.0004976390346274922, | |
| "loss": 0.9945, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.005244182235332678, | |
| "grad_norm": 0.6642935872077942, | |
| "learning_rate": 0.0004975953130465198, | |
| "loss": 0.8654, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005331585272588223, | |
| "grad_norm": 0.5619232654571533, | |
| "learning_rate": 0.0004975515914655474, | |
| "loss": 0.9012, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.005418988309843767, | |
| "grad_norm": 0.37755316495895386, | |
| "learning_rate": 0.0004975078698845751, | |
| "loss": 0.7285, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.005506391347099311, | |
| "grad_norm": 1.3131452798843384, | |
| "learning_rate": 0.0004974641483036027, | |
| "loss": 1.5863, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.005593794384354856, | |
| "grad_norm": 0.48203301429748535, | |
| "learning_rate": 0.0004974204267226304, | |
| "loss": 0.932, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.005681197421610401, | |
| "grad_norm": 1.7584421634674072, | |
| "learning_rate": 0.000497376705141658, | |
| "loss": 1.3908, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.005768600458865946, | |
| "grad_norm": 0.5197044610977173, | |
| "learning_rate": 0.0004973329835606855, | |
| "loss": 0.8429, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0058560034961214905, | |
| "grad_norm": 1.9259709119796753, | |
| "learning_rate": 0.0004972892619797131, | |
| "loss": 0.9317, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.005943406533377035, | |
| "grad_norm": 1.0053375959396362, | |
| "learning_rate": 0.0004972455403987408, | |
| "loss": 0.9276, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.006030809570632579, | |
| "grad_norm": 85.76437377929688, | |
| "learning_rate": 0.0004972018188177684, | |
| "loss": 5.3967, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.006118212607888124, | |
| "grad_norm": 1.9150564670562744, | |
| "learning_rate": 0.0004971580972367961, | |
| "loss": 1.2467, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006205615645143669, | |
| "grad_norm": 1.286971092224121, | |
| "learning_rate": 0.0004971143756558237, | |
| "loss": 1.055, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.006293018682399214, | |
| "grad_norm": 3.5728204250335693, | |
| "learning_rate": 0.0004970706540748513, | |
| "loss": 0.9154, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.006380421719654758, | |
| "grad_norm": 3.2489278316497803, | |
| "learning_rate": 0.000497026932493879, | |
| "loss": 1.0816, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.006467824756910302, | |
| "grad_norm": 0.7258114218711853, | |
| "learning_rate": 0.0004969832109129066, | |
| "loss": 0.8656, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.006555227794165847, | |
| "grad_norm": 1.0952316522598267, | |
| "learning_rate": 0.0004969394893319343, | |
| "loss": 0.9195, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.006642630831421392, | |
| "grad_norm": 5.054478645324707, | |
| "learning_rate": 0.0004968957677509619, | |
| "loss": 1.2343, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.006730033868676937, | |
| "grad_norm": 2.0239686965942383, | |
| "learning_rate": 0.0004968520461699895, | |
| "loss": 1.6315, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.0068174369059324814, | |
| "grad_norm": 1.3708548545837402, | |
| "learning_rate": 0.0004968083245890172, | |
| "loss": 0.8507, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.006904839943188025, | |
| "grad_norm": 0.6372014284133911, | |
| "learning_rate": 0.0004967646030080448, | |
| "loss": 0.9235, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.00699224298044357, | |
| "grad_norm": 1.0243886709213257, | |
| "learning_rate": 0.0004967208814270724, | |
| "loss": 1.0295, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007079646017699115, | |
| "grad_norm": 0.6127680540084839, | |
| "learning_rate": 0.0004966771598461001, | |
| "loss": 0.8469, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.00716704905495466, | |
| "grad_norm": 0.7449392080307007, | |
| "learning_rate": 0.0004966334382651277, | |
| "loss": 1.5825, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0072544520922102045, | |
| "grad_norm": 0.6267126798629761, | |
| "learning_rate": 0.0004965897166841554, | |
| "loss": 1.0257, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.007341855129465749, | |
| "grad_norm": 5.416685104370117, | |
| "learning_rate": 0.0004965459951031829, | |
| "loss": 1.0654, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.007429258166721293, | |
| "grad_norm": 1.0485210418701172, | |
| "learning_rate": 0.0004965022735222105, | |
| "loss": 0.8979, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.007516661203976838, | |
| "grad_norm": 1.0192244052886963, | |
| "learning_rate": 0.0004964585519412381, | |
| "loss": 1.1117, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.007604064241232383, | |
| "grad_norm": 0.7042039632797241, | |
| "learning_rate": 0.0004964148303602658, | |
| "loss": 0.9955, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.007691467278487928, | |
| "grad_norm": 0.649395227432251, | |
| "learning_rate": 0.0004963711087792935, | |
| "loss": 0.7092, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.007778870315743472, | |
| "grad_norm": 0.8017964959144592, | |
| "learning_rate": 0.0004963273871983211, | |
| "loss": 0.8941, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.007866273352999017, | |
| "grad_norm": 0.4518626630306244, | |
| "learning_rate": 0.0004962836656173487, | |
| "loss": 0.9088, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.007953676390254561, | |
| "grad_norm": 0.4033469259738922, | |
| "learning_rate": 0.0004962399440363763, | |
| "loss": 0.9251, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.008041079427510107, | |
| "grad_norm": 0.8128958940505981, | |
| "learning_rate": 0.000496196222455404, | |
| "loss": 0.975, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.00812848246476565, | |
| "grad_norm": 3.1504242420196533, | |
| "learning_rate": 0.0004961525008744317, | |
| "loss": 1.5942, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.008215885502021195, | |
| "grad_norm": 3.9139645099639893, | |
| "learning_rate": 0.0004961087792934593, | |
| "loss": 1.071, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.00830328853927674, | |
| "grad_norm": 0.7689482569694519, | |
| "learning_rate": 0.0004960650577124869, | |
| "loss": 1.038, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.008390691576532284, | |
| "grad_norm": 0.5784656405448914, | |
| "learning_rate": 0.0004960213361315145, | |
| "loss": 1.0943, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.00847809461378783, | |
| "grad_norm": 0.5716943144798279, | |
| "learning_rate": 0.0004959776145505422, | |
| "loss": 0.8874, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.008565497651043374, | |
| "grad_norm": 0.5122077465057373, | |
| "learning_rate": 0.0004959338929695698, | |
| "loss": 0.951, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.008652900688298918, | |
| "grad_norm": 0.8700870871543884, | |
| "learning_rate": 0.0004958901713885975, | |
| "loss": 0.9632, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.008740303725554463, | |
| "grad_norm": 0.5623646974563599, | |
| "learning_rate": 0.000495846449807625, | |
| "loss": 1.0711, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008827706762810007, | |
| "grad_norm": 0.589887261390686, | |
| "learning_rate": 0.0004958027282266526, | |
| "loss": 0.781, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.008915109800065553, | |
| "grad_norm": 1.63577401638031, | |
| "learning_rate": 0.0004957590066456803, | |
| "loss": 0.9118, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.009002512837321097, | |
| "grad_norm": 0.7755091786384583, | |
| "learning_rate": 0.0004957152850647079, | |
| "loss": 1.192, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.00908991587457664, | |
| "grad_norm": 0.5463851094245911, | |
| "learning_rate": 0.0004956715634837356, | |
| "loss": 0.894, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.009177318911832186, | |
| "grad_norm": 0.5253966450691223, | |
| "learning_rate": 0.0004956278419027632, | |
| "loss": 0.9432, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.00926472194908773, | |
| "grad_norm": 0.4377374053001404, | |
| "learning_rate": 0.0004955841203217908, | |
| "loss": 1.09, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.009352124986343276, | |
| "grad_norm": 0.5025166869163513, | |
| "learning_rate": 0.0004955403987408185, | |
| "loss": 0.9262, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.00943952802359882, | |
| "grad_norm": 0.45846027135849, | |
| "learning_rate": 0.0004954966771598461, | |
| "loss": 0.9428, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.009526931060854364, | |
| "grad_norm": 0.4219333529472351, | |
| "learning_rate": 0.0004954529555788738, | |
| "loss": 1.026, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.00961433409810991, | |
| "grad_norm": 0.5737212896347046, | |
| "learning_rate": 0.0004954092339979014, | |
| "loss": 1.1012, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009701737135365453, | |
| "grad_norm": 0.887387752532959, | |
| "learning_rate": 0.000495365512416929, | |
| "loss": 1.3495, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.009789140172620999, | |
| "grad_norm": 0.5145196914672852, | |
| "learning_rate": 0.0004953217908359567, | |
| "loss": 1.0266, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.009876543209876543, | |
| "grad_norm": 1.5954936742782593, | |
| "learning_rate": 0.0004952780692549843, | |
| "loss": 1.254, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.009963946247132089, | |
| "grad_norm": 0.9585819840431213, | |
| "learning_rate": 0.0004952343476740119, | |
| "loss": 1.4545, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.010051349284387633, | |
| "grad_norm": 0.8477827310562134, | |
| "learning_rate": 0.0004951906260930396, | |
| "loss": 0.9454, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.010138752321643177, | |
| "grad_norm": 1.2712616920471191, | |
| "learning_rate": 0.0004951469045120672, | |
| "loss": 0.9497, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.010226155358898722, | |
| "grad_norm": 0.5731809139251709, | |
| "learning_rate": 0.0004951031829310947, | |
| "loss": 1.0611, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.010313558396154266, | |
| "grad_norm": 2.106234550476074, | |
| "learning_rate": 0.0004950594613501224, | |
| "loss": 1.0015, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.010400961433409812, | |
| "grad_norm": 0.7425693273544312, | |
| "learning_rate": 0.00049501573976915, | |
| "loss": 1.0588, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.010488364470665356, | |
| "grad_norm": 0.5987507700920105, | |
| "learning_rate": 0.0004949720181881777, | |
| "loss": 1.0016, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0105757675079209, | |
| "grad_norm": 0.3802410364151001, | |
| "learning_rate": 0.0004949282966072053, | |
| "loss": 0.9133, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.010663170545176445, | |
| "grad_norm": 0.42108240723609924, | |
| "learning_rate": 0.0004948845750262329, | |
| "loss": 0.8675, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.01075057358243199, | |
| "grad_norm": 0.6281617879867554, | |
| "learning_rate": 0.0004948408534452606, | |
| "loss": 0.8294, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.010837976619687535, | |
| "grad_norm": 0.8346467614173889, | |
| "learning_rate": 0.0004947971318642882, | |
| "loss": 0.8333, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.010925379656943079, | |
| "grad_norm": 0.5090304613113403, | |
| "learning_rate": 0.0004947534102833158, | |
| "loss": 1.0423, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.011012782694198623, | |
| "grad_norm": 0.39572426676750183, | |
| "learning_rate": 0.0004947096887023435, | |
| "loss": 0.8565, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.011100185731454168, | |
| "grad_norm": 1.1466861963272095, | |
| "learning_rate": 0.0004946659671213711, | |
| "loss": 1.4358, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.011187588768709712, | |
| "grad_norm": 0.36562782526016235, | |
| "learning_rate": 0.0004946222455403988, | |
| "loss": 0.8373, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.011274991805965258, | |
| "grad_norm": 0.49587374925613403, | |
| "learning_rate": 0.0004945785239594264, | |
| "loss": 1.3961, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.011362394843220802, | |
| "grad_norm": 0.4852742850780487, | |
| "learning_rate": 0.000494534802378454, | |
| "loss": 1.0804, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011449797880476346, | |
| "grad_norm": 0.4050949215888977, | |
| "learning_rate": 0.0004944910807974817, | |
| "loss": 1.0482, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.011537200917731891, | |
| "grad_norm": 0.35284534096717834, | |
| "learning_rate": 0.0004944473592165093, | |
| "loss": 0.9467, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.011624603954987435, | |
| "grad_norm": 1.6482305526733398, | |
| "learning_rate": 0.000494403637635537, | |
| "loss": 1.0678, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.011712006992242981, | |
| "grad_norm": 1.103427767753601, | |
| "learning_rate": 0.0004943599160545645, | |
| "loss": 0.9495, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.011799410029498525, | |
| "grad_norm": 0.45183080434799194, | |
| "learning_rate": 0.0004943161944735921, | |
| "loss": 0.9117, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.01188681306675407, | |
| "grad_norm": 0.3565897047519684, | |
| "learning_rate": 0.0004942724728926198, | |
| "loss": 0.8209, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.011974216104009614, | |
| "grad_norm": 0.6118256449699402, | |
| "learning_rate": 0.0004942287513116474, | |
| "loss": 1.0973, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.012061619141265158, | |
| "grad_norm": 0.40304186940193176, | |
| "learning_rate": 0.0004941850297306751, | |
| "loss": 1.1167, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.012149022178520704, | |
| "grad_norm": 0.46548163890838623, | |
| "learning_rate": 0.0004941413081497027, | |
| "loss": 0.9813, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.012236425215776248, | |
| "grad_norm": 0.4140109121799469, | |
| "learning_rate": 0.0004940975865687303, | |
| "loss": 0.9859, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.012323828253031794, | |
| "grad_norm": 0.7219896912574768, | |
| "learning_rate": 0.0004940538649877579, | |
| "loss": 0.9464, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.012411231290287338, | |
| "grad_norm": 1.1531212329864502, | |
| "learning_rate": 0.0004940101434067856, | |
| "loss": 0.9439, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.012498634327542881, | |
| "grad_norm": 0.5690356492996216, | |
| "learning_rate": 0.0004939664218258133, | |
| "loss": 0.897, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.012586037364798427, | |
| "grad_norm": 4.290929317474365, | |
| "learning_rate": 0.0004939227002448409, | |
| "loss": 0.9462, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.012673440402053971, | |
| "grad_norm": 0.8283594250679016, | |
| "learning_rate": 0.0004938789786638685, | |
| "loss": 0.8452, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.012760843439309517, | |
| "grad_norm": 0.7647207975387573, | |
| "learning_rate": 0.0004938352570828961, | |
| "loss": 0.8869, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.01284824647656506, | |
| "grad_norm": 0.4244186580181122, | |
| "learning_rate": 0.0004937915355019238, | |
| "loss": 1.0727, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.012935649513820605, | |
| "grad_norm": 0.6509714722633362, | |
| "learning_rate": 0.0004937478139209514, | |
| "loss": 1.3135, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.01302305255107615, | |
| "grad_norm": 0.5276227593421936, | |
| "learning_rate": 0.0004937040923399791, | |
| "loss": 0.9124, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.013110455588331694, | |
| "grad_norm": 0.6556555032730103, | |
| "learning_rate": 0.0004936603707590067, | |
| "loss": 1.0882, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01319785862558724, | |
| "grad_norm": 0.5422887802124023, | |
| "learning_rate": 0.0004936166491780342, | |
| "loss": 0.787, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.013285261662842784, | |
| "grad_norm": 0.4304672181606293, | |
| "learning_rate": 0.0004935729275970619, | |
| "loss": 0.9496, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.013372664700098328, | |
| "grad_norm": 1.1699761152267456, | |
| "learning_rate": 0.0004935292060160895, | |
| "loss": 2.1129, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.013460067737353873, | |
| "grad_norm": 2.376859664916992, | |
| "learning_rate": 0.0004934854844351172, | |
| "loss": 1.0353, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.013547470774609417, | |
| "grad_norm": 0.6845773458480835, | |
| "learning_rate": 0.0004934417628541448, | |
| "loss": 0.739, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.013634873811864963, | |
| "grad_norm": 1.45736563205719, | |
| "learning_rate": 0.0004933980412731724, | |
| "loss": 0.9946, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.013722276849120507, | |
| "grad_norm": 0.8025717735290527, | |
| "learning_rate": 0.0004933543196922001, | |
| "loss": 0.7987, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.01380967988637605, | |
| "grad_norm": 0.4995729625225067, | |
| "learning_rate": 0.0004933105981112277, | |
| "loss": 0.8258, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.013897082923631596, | |
| "grad_norm": 0.3529548645019531, | |
| "learning_rate": 0.0004932668765302554, | |
| "loss": 0.7891, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.01398448596088714, | |
| "grad_norm": 0.3970806300640106, | |
| "learning_rate": 0.000493223154949283, | |
| "loss": 0.8748, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.014071888998142686, | |
| "grad_norm": 0.46492478251457214, | |
| "learning_rate": 0.0004931794333683106, | |
| "loss": 0.83, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.01415929203539823, | |
| "grad_norm": 0.39829567074775696, | |
| "learning_rate": 0.0004931357117873383, | |
| "loss": 0.8678, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.014246695072653776, | |
| "grad_norm": 0.44665223360061646, | |
| "learning_rate": 0.0004930919902063659, | |
| "loss": 0.8311, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.01433409810990932, | |
| "grad_norm": 0.3569469451904297, | |
| "learning_rate": 0.0004930482686253935, | |
| "loss": 0.7291, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.014421501147164863, | |
| "grad_norm": 0.5544111132621765, | |
| "learning_rate": 0.0004930045470444212, | |
| "loss": 0.7815, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.014508904184420409, | |
| "grad_norm": 0.350799024105072, | |
| "learning_rate": 0.0004929608254634488, | |
| "loss": 0.7029, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.014596307221675953, | |
| "grad_norm": 0.8473671078681946, | |
| "learning_rate": 0.0004929171038824765, | |
| "loss": 0.929, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.014683710258931499, | |
| "grad_norm": 0.46682775020599365, | |
| "learning_rate": 0.000492873382301504, | |
| "loss": 0.9511, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.014771113296187043, | |
| "grad_norm": 0.40774253010749817, | |
| "learning_rate": 0.0004928296607205316, | |
| "loss": 0.9113, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.014858516333442586, | |
| "grad_norm": 0.38683247566223145, | |
| "learning_rate": 0.0004927859391395592, | |
| "loss": 0.8733, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.014945919370698132, | |
| "grad_norm": 0.3632119297981262, | |
| "learning_rate": 0.0004927422175585869, | |
| "loss": 0.802, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.015033322407953676, | |
| "grad_norm": 0.43275561928749084, | |
| "learning_rate": 0.0004926984959776145, | |
| "loss": 0.869, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.015120725445209222, | |
| "grad_norm": 0.34049132466316223, | |
| "learning_rate": 0.0004926547743966422, | |
| "loss": 0.9312, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.015208128482464766, | |
| "grad_norm": 0.3519800901412964, | |
| "learning_rate": 0.0004926110528156698, | |
| "loss": 0.9362, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.01529553151972031, | |
| "grad_norm": 0.47325399518013, | |
| "learning_rate": 0.0004925673312346974, | |
| "loss": 0.9907, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.015382934556975855, | |
| "grad_norm": 0.3297930359840393, | |
| "learning_rate": 0.0004925236096537251, | |
| "loss": 0.9065, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0154703375942314, | |
| "grad_norm": 0.3259631097316742, | |
| "learning_rate": 0.0004924798880727527, | |
| "loss": 0.76, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.015557740631486945, | |
| "grad_norm": 0.3202175498008728, | |
| "learning_rate": 0.0004924361664917804, | |
| "loss": 0.8182, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.01564514366874249, | |
| "grad_norm": 1.7625497579574585, | |
| "learning_rate": 0.000492392444910808, | |
| "loss": 1.0324, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.015732546705998034, | |
| "grad_norm": 0.31030330061912537, | |
| "learning_rate": 0.0004923487233298356, | |
| "loss": 0.7945, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.015819949743253577, | |
| "grad_norm": 0.416181743144989, | |
| "learning_rate": 0.0004923050017488633, | |
| "loss": 0.829, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.015907352780509122, | |
| "grad_norm": 0.42921754717826843, | |
| "learning_rate": 0.0004922612801678909, | |
| "loss": 0.7401, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.015994755817764668, | |
| "grad_norm": 0.2919391989707947, | |
| "learning_rate": 0.0004922175585869186, | |
| "loss": 0.8488, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.016082158855020214, | |
| "grad_norm": 0.314208447933197, | |
| "learning_rate": 0.0004921738370059462, | |
| "loss": 0.7946, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.016169561892275756, | |
| "grad_norm": 0.503778338432312, | |
| "learning_rate": 0.0004921301154249737, | |
| "loss": 0.8052, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.0162569649295313, | |
| "grad_norm": 0.36193403601646423, | |
| "learning_rate": 0.0004920863938440014, | |
| "loss": 0.8649, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.016344367966786847, | |
| "grad_norm": 0.631439208984375, | |
| "learning_rate": 0.000492042672263029, | |
| "loss": 0.7121, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.01643177100404239, | |
| "grad_norm": 0.3578779399394989, | |
| "learning_rate": 0.0004919989506820567, | |
| "loss": 0.9566, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.016519174041297935, | |
| "grad_norm": 0.3394636809825897, | |
| "learning_rate": 0.0004919552291010843, | |
| "loss": 0.7892, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.01660657707855348, | |
| "grad_norm": 0.3014313876628876, | |
| "learning_rate": 0.0004919115075201119, | |
| "loss": 0.9773, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.016693980115809023, | |
| "grad_norm": 0.464288592338562, | |
| "learning_rate": 0.0004918677859391395, | |
| "loss": 0.8351, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.01678138315306457, | |
| "grad_norm": 0.3988270163536072, | |
| "learning_rate": 0.0004918240643581672, | |
| "loss": 0.9227, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.016868786190320114, | |
| "grad_norm": 0.3190634250640869, | |
| "learning_rate": 0.0004917803427771949, | |
| "loss": 1.0606, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.01695618922757566, | |
| "grad_norm": 0.6769363880157471, | |
| "learning_rate": 0.0004917366211962225, | |
| "loss": 1.0602, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.017043592264831202, | |
| "grad_norm": 0.3352043330669403, | |
| "learning_rate": 0.0004916928996152501, | |
| "loss": 0.9759, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.017130995302086748, | |
| "grad_norm": 0.32745465636253357, | |
| "learning_rate": 0.0004916491780342777, | |
| "loss": 0.7544, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.017218398339342293, | |
| "grad_norm": 0.6321395635604858, | |
| "learning_rate": 0.0004916054564533054, | |
| "loss": 0.6861, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.017305801376597835, | |
| "grad_norm": 0.32094526290893555, | |
| "learning_rate": 0.000491561734872333, | |
| "loss": 0.8258, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.01739320441385338, | |
| "grad_norm": 0.3911696970462799, | |
| "learning_rate": 0.0004915180132913607, | |
| "loss": 0.9963, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.017480607451108927, | |
| "grad_norm": 0.2953476905822754, | |
| "learning_rate": 0.0004914742917103883, | |
| "loss": 0.8456, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.017568010488364472, | |
| "grad_norm": 0.3092620372772217, | |
| "learning_rate": 0.0004914305701294158, | |
| "loss": 0.8644, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.017655413525620015, | |
| "grad_norm": 0.6630509495735168, | |
| "learning_rate": 0.0004913868485484435, | |
| "loss": 0.9363, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.01774281656287556, | |
| "grad_norm": 0.3516843616962433, | |
| "learning_rate": 0.0004913431269674711, | |
| "loss": 1.1422, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.017830219600131106, | |
| "grad_norm": 0.43253111839294434, | |
| "learning_rate": 0.0004912994053864988, | |
| "loss": 0.852, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.017917622637386648, | |
| "grad_norm": 0.324238657951355, | |
| "learning_rate": 0.0004912556838055264, | |
| "loss": 0.8587, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.018005025674642194, | |
| "grad_norm": 0.28279510140419006, | |
| "learning_rate": 0.000491211962224554, | |
| "loss": 1.0088, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.01809242871189774, | |
| "grad_norm": 1.4974584579467773, | |
| "learning_rate": 0.0004911682406435817, | |
| "loss": 1.0296, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.01817983174915328, | |
| "grad_norm": 0.3786958158016205, | |
| "learning_rate": 0.0004911245190626093, | |
| "loss": 1.0741, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.018267234786408827, | |
| "grad_norm": 0.294880747795105, | |
| "learning_rate": 0.0004910807974816369, | |
| "loss": 1.021, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.018354637823664373, | |
| "grad_norm": 0.36885932087898254, | |
| "learning_rate": 0.0004910370759006646, | |
| "loss": 0.9023, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01844204086091992, | |
| "grad_norm": 0.37099695205688477, | |
| "learning_rate": 0.0004909933543196922, | |
| "loss": 0.961, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.01852944389817546, | |
| "grad_norm": 0.3451802432537079, | |
| "learning_rate": 0.0004909496327387199, | |
| "loss": 0.8744, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.018616846935431006, | |
| "grad_norm": 0.34541890025138855, | |
| "learning_rate": 0.0004909059111577475, | |
| "loss": 0.9766, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.018704249972686552, | |
| "grad_norm": 0.2827027440071106, | |
| "learning_rate": 0.0004908621895767751, | |
| "loss": 0.8569, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.018791653009942094, | |
| "grad_norm": 0.3254356384277344, | |
| "learning_rate": 0.0004908184679958028, | |
| "loss": 0.9091, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.01887905604719764, | |
| "grad_norm": 0.29408493638038635, | |
| "learning_rate": 0.0004907747464148304, | |
| "loss": 0.823, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.018966459084453186, | |
| "grad_norm": 0.3414423167705536, | |
| "learning_rate": 0.0004907310248338581, | |
| "loss": 0.8197, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.019053862121708728, | |
| "grad_norm": 0.33818957209587097, | |
| "learning_rate": 0.0004906873032528857, | |
| "loss": 0.8553, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.019141265158964273, | |
| "grad_norm": 0.28477659821510315, | |
| "learning_rate": 0.0004906435816719132, | |
| "loss": 0.9008, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.01922866819621982, | |
| "grad_norm": 0.30363160371780396, | |
| "learning_rate": 0.0004905998600909408, | |
| "loss": 0.8077, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.019316071233475365, | |
| "grad_norm": 0.5011153221130371, | |
| "learning_rate": 0.0004905561385099685, | |
| "loss": 0.8938, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.019403474270730907, | |
| "grad_norm": 0.33721473813056946, | |
| "learning_rate": 0.0004905124169289961, | |
| "loss": 0.7798, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.019490877307986453, | |
| "grad_norm": 0.3752390742301941, | |
| "learning_rate": 0.0004904686953480238, | |
| "loss": 0.9064, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.019578280345241998, | |
| "grad_norm": 0.32278257608413696, | |
| "learning_rate": 0.0004904249737670514, | |
| "loss": 1.0019, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.01966568338249754, | |
| "grad_norm": 0.5604023933410645, | |
| "learning_rate": 0.000490381252186079, | |
| "loss": 0.9579, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.019753086419753086, | |
| "grad_norm": 0.26056113839149475, | |
| "learning_rate": 0.0004903375306051067, | |
| "loss": 0.7596, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.01984048945700863, | |
| "grad_norm": 0.3333994448184967, | |
| "learning_rate": 0.0004902938090241343, | |
| "loss": 1.0804, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.019927892494264177, | |
| "grad_norm": 0.3021886944770813, | |
| "learning_rate": 0.000490250087443162, | |
| "loss": 0.959, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.02001529553151972, | |
| "grad_norm": 0.2865878641605377, | |
| "learning_rate": 0.0004902063658621896, | |
| "loss": 0.9816, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.020102698568775265, | |
| "grad_norm": 0.2981945276260376, | |
| "learning_rate": 0.0004901626442812172, | |
| "loss": 0.8672, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.02019010160603081, | |
| "grad_norm": 0.34836679697036743, | |
| "learning_rate": 0.0004901189227002449, | |
| "loss": 0.9012, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.020277504643286353, | |
| "grad_norm": 0.7560614347457886, | |
| "learning_rate": 0.0004900752011192725, | |
| "loss": 1.2521, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.0203649076805419, | |
| "grad_norm": 0.2899073362350464, | |
| "learning_rate": 0.0004900314795383002, | |
| "loss": 0.9376, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.020452310717797444, | |
| "grad_norm": 0.2944093644618988, | |
| "learning_rate": 0.0004899877579573278, | |
| "loss": 0.9158, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.020539713755052987, | |
| "grad_norm": 0.2837924361228943, | |
| "learning_rate": 0.0004899440363763553, | |
| "loss": 0.9397, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.020627116792308532, | |
| "grad_norm": 0.3069987893104553, | |
| "learning_rate": 0.000489900314795383, | |
| "loss": 0.9635, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.020714519829564078, | |
| "grad_norm": 0.29966363310813904, | |
| "learning_rate": 0.0004898565932144106, | |
| "loss": 0.9103, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.020801922866819623, | |
| "grad_norm": 0.3086193799972534, | |
| "learning_rate": 0.0004898128716334383, | |
| "loss": 0.9797, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.020889325904075166, | |
| "grad_norm": 0.28495675325393677, | |
| "learning_rate": 0.0004897691500524659, | |
| "loss": 0.8221, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.02097672894133071, | |
| "grad_norm": 0.27056995034217834, | |
| "learning_rate": 0.0004897254284714935, | |
| "loss": 0.9584, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.021064131978586257, | |
| "grad_norm": 0.2837945818901062, | |
| "learning_rate": 0.0004896817068905211, | |
| "loss": 1.0047, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.0211515350158418, | |
| "grad_norm": 0.4288729429244995, | |
| "learning_rate": 0.0004896379853095488, | |
| "loss": 1.3211, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.021238938053097345, | |
| "grad_norm": 1.1985094547271729, | |
| "learning_rate": 0.0004895942637285765, | |
| "loss": 1.4015, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.02132634109035289, | |
| "grad_norm": 0.3171183466911316, | |
| "learning_rate": 0.0004895505421476041, | |
| "loss": 0.7096, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.021413744127608433, | |
| "grad_norm": 3.1765527725219727, | |
| "learning_rate": 0.0004895068205666317, | |
| "loss": 1.5594, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.02150114716486398, | |
| "grad_norm": 0.35891321301460266, | |
| "learning_rate": 0.0004894630989856593, | |
| "loss": 1.0663, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.021588550202119524, | |
| "grad_norm": 0.7044485807418823, | |
| "learning_rate": 0.000489419377404687, | |
| "loss": 1.4146, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.02167595323937507, | |
| "grad_norm": 0.361392617225647, | |
| "learning_rate": 0.0004893756558237146, | |
| "loss": 0.7964, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.021763356276630612, | |
| "grad_norm": 0.31394776701927185, | |
| "learning_rate": 0.0004893319342427423, | |
| "loss": 0.8608, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.021850759313886157, | |
| "grad_norm": 0.2853809893131256, | |
| "learning_rate": 0.0004892882126617699, | |
| "loss": 0.8628, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.021938162351141703, | |
| "grad_norm": 0.3122541904449463, | |
| "learning_rate": 0.0004892444910807975, | |
| "loss": 0.7246, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.022025565388397245, | |
| "grad_norm": 12.120355606079102, | |
| "learning_rate": 0.0004892007694998252, | |
| "loss": 1.3082, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.02211296842565279, | |
| "grad_norm": 0.3758118450641632, | |
| "learning_rate": 0.0004891570479188527, | |
| "loss": 1.0478, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.022200371462908337, | |
| "grad_norm": 1.1910297870635986, | |
| "learning_rate": 0.0004891133263378804, | |
| "loss": 1.2477, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.022287774500163882, | |
| "grad_norm": 0.8632226586341858, | |
| "learning_rate": 0.000489069604756908, | |
| "loss": 1.0988, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.022375177537419425, | |
| "grad_norm": 0.381533145904541, | |
| "learning_rate": 0.0004890258831759356, | |
| "loss": 0.8892, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.02246258057467497, | |
| "grad_norm": 0.43683141469955444, | |
| "learning_rate": 0.0004889821615949633, | |
| "loss": 0.8526, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.022549983611930516, | |
| "grad_norm": 0.6212348341941833, | |
| "learning_rate": 0.0004889384400139909, | |
| "loss": 0.9791, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.022637386649186058, | |
| "grad_norm": 0.44247013330459595, | |
| "learning_rate": 0.0004888947184330185, | |
| "loss": 1.0408, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.022724789686441604, | |
| "grad_norm": 0.5239019989967346, | |
| "learning_rate": 0.0004888509968520462, | |
| "loss": 0.8948, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02281219272369715, | |
| "grad_norm": 0.7413169145584106, | |
| "learning_rate": 0.0004888072752710738, | |
| "loss": 0.7135, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.02289959576095269, | |
| "grad_norm": 0.39856553077697754, | |
| "learning_rate": 0.0004887635536901015, | |
| "loss": 0.8587, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.022986998798208237, | |
| "grad_norm": 0.534248411655426, | |
| "learning_rate": 0.0004887198321091291, | |
| "loss": 0.9006, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.023074401835463783, | |
| "grad_norm": 0.4782329499721527, | |
| "learning_rate": 0.0004886761105281567, | |
| "loss": 0.9292, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.02316180487271933, | |
| "grad_norm": 2.2424156665802, | |
| "learning_rate": 0.0004886323889471843, | |
| "loss": 1.1921, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.02324920790997487, | |
| "grad_norm": 0.5274596810340881, | |
| "learning_rate": 0.000488588667366212, | |
| "loss": 0.9732, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.023336610947230416, | |
| "grad_norm": 1.5465450286865234, | |
| "learning_rate": 0.0004885449457852397, | |
| "loss": 0.9304, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.023424013984485962, | |
| "grad_norm": 0.5691818594932556, | |
| "learning_rate": 0.0004885012242042673, | |
| "loss": 0.9713, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.023511417021741504, | |
| "grad_norm": 0.7849003672599792, | |
| "learning_rate": 0.0004884575026232948, | |
| "loss": 0.957, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.02359882005899705, | |
| "grad_norm": 0.5940591096878052, | |
| "learning_rate": 0.0004884137810423224, | |
| "loss": 0.8786, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.023686223096252595, | |
| "grad_norm": 0.592288076877594, | |
| "learning_rate": 0.0004883700594613501, | |
| "loss": 0.8695, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.02377362613350814, | |
| "grad_norm": 0.3618888556957245, | |
| "learning_rate": 0.0004883263378803777, | |
| "loss": 0.9204, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.023861029170763683, | |
| "grad_norm": 0.5957768559455872, | |
| "learning_rate": 0.0004882826162994054, | |
| "loss": 0.9289, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.02394843220801923, | |
| "grad_norm": 2.2828385829925537, | |
| "learning_rate": 0.000488238894718433, | |
| "loss": 0.9809, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.024035835245274775, | |
| "grad_norm": 0.5379523634910583, | |
| "learning_rate": 0.00048819517313746066, | |
| "loss": 0.934, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.024123238282530317, | |
| "grad_norm": 1.698805809020996, | |
| "learning_rate": 0.00048815145155648826, | |
| "loss": 0.9954, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.024210641319785862, | |
| "grad_norm": 4.479689121246338, | |
| "learning_rate": 0.00048810772997551595, | |
| "loss": 1.3687, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.024298044357041408, | |
| "grad_norm": 2.58227276802063, | |
| "learning_rate": 0.00048806400839454355, | |
| "loss": 0.9305, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.02438544739429695, | |
| "grad_norm": 0.8035925030708313, | |
| "learning_rate": 0.0004880202868135712, | |
| "loss": 1.1649, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.024472850431552496, | |
| "grad_norm": 0.560945451259613, | |
| "learning_rate": 0.00048797656523259884, | |
| "loss": 0.7542, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.02456025346880804, | |
| "grad_norm": 1.6739729642868042, | |
| "learning_rate": 0.0004879328436516264, | |
| "loss": 1.5675, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.024647656506063587, | |
| "grad_norm": 1.0051480531692505, | |
| "learning_rate": 0.0004878891220706541, | |
| "loss": 0.9312, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.02473505954331913, | |
| "grad_norm": 0.43883591890335083, | |
| "learning_rate": 0.0004878454004896817, | |
| "loss": 0.9779, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.024822462580574675, | |
| "grad_norm": 0.668854832649231, | |
| "learning_rate": 0.00048780167890870936, | |
| "loss": 0.9906, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.02490986561783022, | |
| "grad_norm": 2.1563730239868164, | |
| "learning_rate": 0.00048775795732773695, | |
| "loss": 0.9536, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.024997268655085763, | |
| "grad_norm": 1.1613394021987915, | |
| "learning_rate": 0.0004877142357467646, | |
| "loss": 0.9793, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.02508467169234131, | |
| "grad_norm": 0.5452724695205688, | |
| "learning_rate": 0.00048767051416579224, | |
| "loss": 1.11, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.025172074729596854, | |
| "grad_norm": 1.7393804788589478, | |
| "learning_rate": 0.0004876267925848199, | |
| "loss": 1.249, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.025259477766852396, | |
| "grad_norm": 15.148497581481934, | |
| "learning_rate": 0.00048758307100384753, | |
| "loss": 1.4897, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.025346880804107942, | |
| "grad_norm": 0.8102678060531616, | |
| "learning_rate": 0.0004875393494228751, | |
| "loss": 1.0192, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.025434283841363488, | |
| "grad_norm": 3.7395308017730713, | |
| "learning_rate": 0.00048749562784190277, | |
| "loss": 1.05, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.025521686878619033, | |
| "grad_norm": 0.6473442316055298, | |
| "learning_rate": 0.0004874519062609304, | |
| "loss": 0.9341, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.025609089915874576, | |
| "grad_norm": 1.2162256240844727, | |
| "learning_rate": 0.000487408184679958, | |
| "loss": 0.9426, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.02569649295313012, | |
| "grad_norm": 0.7783584594726562, | |
| "learning_rate": 0.0004873644630989857, | |
| "loss": 0.9343, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.025783895990385667, | |
| "grad_norm": 0.7198899388313293, | |
| "learning_rate": 0.0004873207415180133, | |
| "loss": 0.89, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.02587129902764121, | |
| "grad_norm": 0.6314525604248047, | |
| "learning_rate": 0.00048727701993704094, | |
| "loss": 0.9523, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.025958702064896755, | |
| "grad_norm": 3.2664554119110107, | |
| "learning_rate": 0.00048723329835606853, | |
| "loss": 1.3729, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.0260461051021523, | |
| "grad_norm": 0.9869332909584045, | |
| "learning_rate": 0.0004871895767750962, | |
| "loss": 0.978, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.026133508139407846, | |
| "grad_norm": 0.9169254302978516, | |
| "learning_rate": 0.0004871458551941239, | |
| "loss": 0.7641, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.02622091117666339, | |
| "grad_norm": 2.386565685272217, | |
| "learning_rate": 0.00048710213361315147, | |
| "loss": 0.9728, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.026308314213918934, | |
| "grad_norm": 2.5879757404327393, | |
| "learning_rate": 0.0004870584120321791, | |
| "loss": 1.0264, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.02639571725117448, | |
| "grad_norm": 1.059586763381958, | |
| "learning_rate": 0.0004870146904512067, | |
| "loss": 0.9235, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.026483120288430022, | |
| "grad_norm": 1.9793821573257446, | |
| "learning_rate": 0.00048697096887023435, | |
| "loss": 1.5626, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.026570523325685567, | |
| "grad_norm": 1.2389543056488037, | |
| "learning_rate": 0.00048692724728926194, | |
| "loss": 0.9666, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.026657926362941113, | |
| "grad_norm": 1.1373975276947021, | |
| "learning_rate": 0.00048688352570828964, | |
| "loss": 0.993, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.026745329400196655, | |
| "grad_norm": 5.966507434844971, | |
| "learning_rate": 0.0004868398041273173, | |
| "loss": 1.0113, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.0268327324374522, | |
| "grad_norm": 1.2714189291000366, | |
| "learning_rate": 0.0004867960825463449, | |
| "loss": 0.9462, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.026920135474707747, | |
| "grad_norm": 1.397048830986023, | |
| "learning_rate": 0.0004867523609653725, | |
| "loss": 0.9511, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.027007538511963292, | |
| "grad_norm": 1.2888479232788086, | |
| "learning_rate": 0.0004867086393844001, | |
| "loss": 1.014, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.027094941549218834, | |
| "grad_norm": 3.5597853660583496, | |
| "learning_rate": 0.0004866649178034278, | |
| "loss": 1.2336, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.02718234458647438, | |
| "grad_norm": 1.4104827642440796, | |
| "learning_rate": 0.00048662119622245545, | |
| "loss": 1.0148, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.027269747623729926, | |
| "grad_norm": 1.064355492591858, | |
| "learning_rate": 0.00048657747464148305, | |
| "loss": 1.0645, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.027357150660985468, | |
| "grad_norm": 0.819186806678772, | |
| "learning_rate": 0.0004865337530605107, | |
| "loss": 0.8948, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.027444553698241014, | |
| "grad_norm": 3.036085605621338, | |
| "learning_rate": 0.0004864900314795383, | |
| "loss": 1.1567, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.02753195673549656, | |
| "grad_norm": 1.4990466833114624, | |
| "learning_rate": 0.0004864463098985659, | |
| "loss": 0.9445, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.0276193597727521, | |
| "grad_norm": 1.889307975769043, | |
| "learning_rate": 0.00048640258831759357, | |
| "loss": 1.1844, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.027706762810007647, | |
| "grad_norm": 2.072758913040161, | |
| "learning_rate": 0.0004863588667366212, | |
| "loss": 1.0734, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.027794165847263193, | |
| "grad_norm": 2.2393903732299805, | |
| "learning_rate": 0.00048631514515564886, | |
| "loss": 1.1427, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.02788156888451874, | |
| "grad_norm": 4.34975528717041, | |
| "learning_rate": 0.00048627142357467645, | |
| "loss": 1.2473, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.02796897192177428, | |
| "grad_norm": 2.8603451251983643, | |
| "learning_rate": 0.0004862277019937041, | |
| "loss": 1.1657, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.028056374959029826, | |
| "grad_norm": 3.665041923522949, | |
| "learning_rate": 0.0004861839804127317, | |
| "loss": 1.6031, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.028143777996285372, | |
| "grad_norm": 3.366703748703003, | |
| "learning_rate": 0.0004861402588317594, | |
| "loss": 1.0769, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.028231181033540914, | |
| "grad_norm": 1.470408320426941, | |
| "learning_rate": 0.00048609653725078703, | |
| "loss": 1.2034, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.02831858407079646, | |
| "grad_norm": 1.0659921169281006, | |
| "learning_rate": 0.0004860528156698146, | |
| "loss": 0.984, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.028405987108052005, | |
| "grad_norm": 4.098123550415039, | |
| "learning_rate": 0.00048600909408884227, | |
| "loss": 1.2241, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.02849339014530755, | |
| "grad_norm": 11.896109580993652, | |
| "learning_rate": 0.00048596537250786986, | |
| "loss": 2.0891, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.028580793182563093, | |
| "grad_norm": 3.2453126907348633, | |
| "learning_rate": 0.00048592165092689756, | |
| "loss": 1.1273, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.02866819621981864, | |
| "grad_norm": 2.6395857334136963, | |
| "learning_rate": 0.00048587792934592515, | |
| "loss": 1.6087, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.028755599257074185, | |
| "grad_norm": 2.1530113220214844, | |
| "learning_rate": 0.0004858342077649528, | |
| "loss": 1.2749, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.028843002294329727, | |
| "grad_norm": 4.572982311248779, | |
| "learning_rate": 0.00048579048618398044, | |
| "loss": 1.4111, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.028930405331585272, | |
| "grad_norm": 3.029306173324585, | |
| "learning_rate": 0.00048574676460300803, | |
| "loss": 1.2926, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.029017808368840818, | |
| "grad_norm": 1.7193225622177124, | |
| "learning_rate": 0.0004857030430220357, | |
| "loss": 1.1767, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.02910521140609636, | |
| "grad_norm": 10.779121398925781, | |
| "learning_rate": 0.0004856593214410633, | |
| "loss": 1.3369, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.029192614443351906, | |
| "grad_norm": 2.478919744491577, | |
| "learning_rate": 0.00048561559986009097, | |
| "loss": 1.093, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.02928001748060745, | |
| "grad_norm": 2.2353742122650146, | |
| "learning_rate": 0.00048557187827911856, | |
| "loss": 1.1168, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.029367420517862997, | |
| "grad_norm": 2.8225460052490234, | |
| "learning_rate": 0.0004855281566981462, | |
| "loss": 1.3248, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.02945482355511854, | |
| "grad_norm": 2.1292366981506348, | |
| "learning_rate": 0.00048548443511717385, | |
| "loss": 1.344, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.029542226592374085, | |
| "grad_norm": 7.299522399902344, | |
| "learning_rate": 0.0004854407135362015, | |
| "loss": 1.8145, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.02962962962962963, | |
| "grad_norm": 1.5046287775039673, | |
| "learning_rate": 0.00048539699195522914, | |
| "loss": 1.388, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.029717032666885173, | |
| "grad_norm": 3.0877699851989746, | |
| "learning_rate": 0.00048535327037425673, | |
| "loss": 1.3291, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.02980443570414072, | |
| "grad_norm": 3.4899399280548096, | |
| "learning_rate": 0.0004853095487932844, | |
| "loss": 2.0677, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.029891838741396264, | |
| "grad_norm": 11.234345436096191, | |
| "learning_rate": 0.000485265827212312, | |
| "loss": 1.625, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.029979241778651806, | |
| "grad_norm": 2.1975765228271484, | |
| "learning_rate": 0.0004852221056313396, | |
| "loss": 1.4517, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.030066644815907352, | |
| "grad_norm": 8.629820823669434, | |
| "learning_rate": 0.0004851783840503673, | |
| "loss": 1.5853, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.030154047853162898, | |
| "grad_norm": 2.3949103355407715, | |
| "learning_rate": 0.0004851346624693949, | |
| "loss": 1.2549, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.030241450890418443, | |
| "grad_norm": 159.31179809570312, | |
| "learning_rate": 0.00048509094088842255, | |
| "loss": 1.5771, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.030328853927673986, | |
| "grad_norm": 11.36462688446045, | |
| "learning_rate": 0.00048504721930745014, | |
| "loss": 1.9178, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.03041625696492953, | |
| "grad_norm": 7.807027816772461, | |
| "learning_rate": 0.0004850034977264778, | |
| "loss": 1.9789, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.030503660002185077, | |
| "grad_norm": 8.663688659667969, | |
| "learning_rate": 0.0004849597761455054, | |
| "loss": 2.0506, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.03059106303944062, | |
| "grad_norm": 2.205583095550537, | |
| "learning_rate": 0.00048491605456453307, | |
| "loss": 1.8671, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.030678466076696165, | |
| "grad_norm": 3.150911808013916, | |
| "learning_rate": 0.0004848723329835607, | |
| "loss": 1.333, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.03076586911395171, | |
| "grad_norm": 4.053075790405273, | |
| "learning_rate": 0.0004848286114025883, | |
| "loss": 1.5273, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.030853272151207256, | |
| "grad_norm": 2.823411703109741, | |
| "learning_rate": 0.00048478488982161595, | |
| "loss": 1.4247, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.0309406751884628, | |
| "grad_norm": 3.0909945964813232, | |
| "learning_rate": 0.0004847411682406436, | |
| "loss": 1.2206, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.031028078225718344, | |
| "grad_norm": 3.38694167137146, | |
| "learning_rate": 0.00048469744665967124, | |
| "loss": 1.3954, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.03111548126297389, | |
| "grad_norm": 1.5531120300292969, | |
| "learning_rate": 0.0004846537250786989, | |
| "loss": 1.4665, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.031202884300229432, | |
| "grad_norm": 2.2059831619262695, | |
| "learning_rate": 0.0004846100034977265, | |
| "loss": 1.6022, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.03129028733748498, | |
| "grad_norm": 5.113000869750977, | |
| "learning_rate": 0.0004845662819167541, | |
| "loss": 1.5966, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.03137769037474052, | |
| "grad_norm": 8.374882698059082, | |
| "learning_rate": 0.0004845225603357817, | |
| "loss": 1.7198, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.03146509341199607, | |
| "grad_norm": 6.680134296417236, | |
| "learning_rate": 0.00048447883875480936, | |
| "loss": 1.4896, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.031552496449251614, | |
| "grad_norm": 4.67073392868042, | |
| "learning_rate": 0.00048443511717383706, | |
| "loss": 1.8682, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.03163989948650715, | |
| "grad_norm": 4.780435562133789, | |
| "learning_rate": 0.00048439139559286465, | |
| "loss": 1.7389, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.0317273025237627, | |
| "grad_norm": 3.4517061710357666, | |
| "learning_rate": 0.0004843476740118923, | |
| "loss": 1.8797, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.031814705561018244, | |
| "grad_norm": 2.4916350841522217, | |
| "learning_rate": 0.0004843039524309199, | |
| "loss": 1.4436, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.03190210859827379, | |
| "grad_norm": 3.9899487495422363, | |
| "learning_rate": 0.00048426023084994753, | |
| "loss": 1.5546, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.031989511635529336, | |
| "grad_norm": 8.799160957336426, | |
| "learning_rate": 0.0004842165092689752, | |
| "loss": 1.6344, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.03207691467278488, | |
| "grad_norm": 2.636903762817383, | |
| "learning_rate": 0.0004841727876880028, | |
| "loss": 1.5937, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.03216431771004043, | |
| "grad_norm": 2.600330352783203, | |
| "learning_rate": 0.00048412906610703047, | |
| "loss": 1.5617, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.032251720747295966, | |
| "grad_norm": 2.9146833419799805, | |
| "learning_rate": 0.00048408534452605806, | |
| "loss": 2.2708, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.03233912378455151, | |
| "grad_norm": 1.6746532917022705, | |
| "learning_rate": 0.0004840416229450857, | |
| "loss": 1.3178, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03242652682180706, | |
| "grad_norm": 2.1965625286102295, | |
| "learning_rate": 0.0004839979013641133, | |
| "loss": 1.2351, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.0325139298590626, | |
| "grad_norm": 4.235499858856201, | |
| "learning_rate": 0.000483954179783141, | |
| "loss": 1.8627, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.03260133289631815, | |
| "grad_norm": 1.5351746082305908, | |
| "learning_rate": 0.00048391045820216864, | |
| "loss": 1.2413, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.032688735933573694, | |
| "grad_norm": 1.5462607145309448, | |
| "learning_rate": 0.00048386673662119623, | |
| "loss": 1.3282, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.03277613897082924, | |
| "grad_norm": 2.4433155059814453, | |
| "learning_rate": 0.0004838230150402239, | |
| "loss": 1.3913, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03286354200808478, | |
| "grad_norm": 2.431323528289795, | |
| "learning_rate": 0.00048377929345925146, | |
| "loss": 1.4269, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.032950945045340324, | |
| "grad_norm": 1.4146811962127686, | |
| "learning_rate": 0.0004837355718782791, | |
| "loss": 1.225, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.03303834808259587, | |
| "grad_norm": 1.0660099983215332, | |
| "learning_rate": 0.00048369185029730675, | |
| "loss": 1.2465, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.033125751119851415, | |
| "grad_norm": 16.820344924926758, | |
| "learning_rate": 0.0004836481287163344, | |
| "loss": 1.2228, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.03321315415710696, | |
| "grad_norm": 1.6520887613296509, | |
| "learning_rate": 0.00048360440713536204, | |
| "loss": 1.0955, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.03330055719436251, | |
| "grad_norm": 3.057648181915283, | |
| "learning_rate": 0.00048356068555438964, | |
| "loss": 1.3929, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.033387960231618045, | |
| "grad_norm": 5.74190092086792, | |
| "learning_rate": 0.0004835169639734173, | |
| "loss": 1.3873, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.03347536326887359, | |
| "grad_norm": 2.451111078262329, | |
| "learning_rate": 0.0004834732423924449, | |
| "loss": 1.2411, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.03356276630612914, | |
| "grad_norm": 7.096491813659668, | |
| "learning_rate": 0.00048342952081147257, | |
| "loss": 1.1512, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.03365016934338468, | |
| "grad_norm": 1.7510989904403687, | |
| "learning_rate": 0.0004833857992305002, | |
| "loss": 1.7508, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.03373757238064023, | |
| "grad_norm": 1.9392039775848389, | |
| "learning_rate": 0.0004833420776495278, | |
| "loss": 1.3254, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.033824975417895774, | |
| "grad_norm": 1.3087763786315918, | |
| "learning_rate": 0.00048329835606855545, | |
| "loss": 1.167, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.03391237845515132, | |
| "grad_norm": 1.0963687896728516, | |
| "learning_rate": 0.00048325463448758304, | |
| "loss": 1.193, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.03399978149240686, | |
| "grad_norm": 0.7981585264205933, | |
| "learning_rate": 0.00048321091290661074, | |
| "loss": 1.1383, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.034087184529662404, | |
| "grad_norm": 0.9217828512191772, | |
| "learning_rate": 0.00048316719132563833, | |
| "loss": 1.0119, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03417458756691795, | |
| "grad_norm": 1.242906093597412, | |
| "learning_rate": 0.000483123469744666, | |
| "loss": 1.1663, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.034261990604173495, | |
| "grad_norm": 0.9021317362785339, | |
| "learning_rate": 0.0004830797481636936, | |
| "loss": 1.1384, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.03434939364142904, | |
| "grad_norm": 0.9118911623954773, | |
| "learning_rate": 0.0004830360265827212, | |
| "loss": 1.3087, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.034436796678684586, | |
| "grad_norm": 1.754934549331665, | |
| "learning_rate": 0.0004829923050017489, | |
| "loss": 1.3614, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.03452419971594013, | |
| "grad_norm": 0.8837860822677612, | |
| "learning_rate": 0.0004829485834207765, | |
| "loss": 1.1244, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.03461160275319567, | |
| "grad_norm": 2.6078360080718994, | |
| "learning_rate": 0.00048290486183980415, | |
| "loss": 1.0216, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.034699005790451216, | |
| "grad_norm": 5.406350135803223, | |
| "learning_rate": 0.00048286114025883174, | |
| "loss": 1.0928, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.03478640882770676, | |
| "grad_norm": 2.1140406131744385, | |
| "learning_rate": 0.0004828174186778594, | |
| "loss": 1.1857, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.03487381186496231, | |
| "grad_norm": 7.267689228057861, | |
| "learning_rate": 0.00048277369709688703, | |
| "loss": 1.7055, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.03496121490221785, | |
| "grad_norm": 1.1019072532653809, | |
| "learning_rate": 0.0004827299755159147, | |
| "loss": 2.0105, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0350486179394734, | |
| "grad_norm": 7.888851165771484, | |
| "learning_rate": 0.0004826862539349423, | |
| "loss": 1.9483, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.035136020976728945, | |
| "grad_norm": 1.299735188484192, | |
| "learning_rate": 0.0004826425323539699, | |
| "loss": 1.2644, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.03522342401398448, | |
| "grad_norm": 1.5624737739562988, | |
| "learning_rate": 0.00048259881077299756, | |
| "loss": 1.0429, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.03531082705124003, | |
| "grad_norm": 1.350966453552246, | |
| "learning_rate": 0.0004825550891920252, | |
| "loss": 1.1749, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.035398230088495575, | |
| "grad_norm": 1.5936487913131714, | |
| "learning_rate": 0.0004825113676110528, | |
| "loss": 1.1733, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.03548563312575112, | |
| "grad_norm": 1.0757735967636108, | |
| "learning_rate": 0.0004824676460300805, | |
| "loss": 0.9944, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.035573036163006666, | |
| "grad_norm": 0.7153262495994568, | |
| "learning_rate": 0.0004824239244491081, | |
| "loss": 1.1921, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.03566043920026221, | |
| "grad_norm": 1.0734481811523438, | |
| "learning_rate": 0.00048238020286813573, | |
| "loss": 1.1752, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.03574784223751775, | |
| "grad_norm": 0.8831942081451416, | |
| "learning_rate": 0.0004823364812871633, | |
| "loss": 1.1402, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.035835245274773296, | |
| "grad_norm": 0.6179252862930298, | |
| "learning_rate": 0.00048229275970619096, | |
| "loss": 1.2101, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.03592264831202884, | |
| "grad_norm": 1.091264009475708, | |
| "learning_rate": 0.00048224903812521866, | |
| "loss": 1.1421, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.03601005134928439, | |
| "grad_norm": 0.8162115216255188, | |
| "learning_rate": 0.00048220531654424625, | |
| "loss": 1.2952, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.03609745438653993, | |
| "grad_norm": 1.0148085355758667, | |
| "learning_rate": 0.0004821615949632739, | |
| "loss": 0.9862, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.03618485742379548, | |
| "grad_norm": 0.9712663888931274, | |
| "learning_rate": 0.0004821178733823015, | |
| "loss": 1.1402, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.036272260461051024, | |
| "grad_norm": 0.9177207350730896, | |
| "learning_rate": 0.00048207415180132914, | |
| "loss": 1.2027, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.03635966349830656, | |
| "grad_norm": 3.5026392936706543, | |
| "learning_rate": 0.0004820304302203567, | |
| "loss": 1.4284, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.03644706653556211, | |
| "grad_norm": 1.7483121156692505, | |
| "learning_rate": 0.0004819867086393844, | |
| "loss": 1.2328, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.036534469572817654, | |
| "grad_norm": 1.423335075378418, | |
| "learning_rate": 0.00048194298705841207, | |
| "loss": 1.1085, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.0366218726100732, | |
| "grad_norm": 13.332382202148438, | |
| "learning_rate": 0.00048189926547743966, | |
| "loss": 1.2456, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.036709275647328746, | |
| "grad_norm": 1.2808276414871216, | |
| "learning_rate": 0.0004818555438964673, | |
| "loss": 1.1165, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03679667868458429, | |
| "grad_norm": 1.293886661529541, | |
| "learning_rate": 0.0004818118223154949, | |
| "loss": 1.2171, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.03688408172183984, | |
| "grad_norm": 1.1845675706863403, | |
| "learning_rate": 0.0004817681007345226, | |
| "loss": 2.0462, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.036971484759095376, | |
| "grad_norm": 0.9728288054466248, | |
| "learning_rate": 0.00048172437915355024, | |
| "loss": 1.143, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.03705888779635092, | |
| "grad_norm": 0.816474437713623, | |
| "learning_rate": 0.00048168065757257783, | |
| "loss": 1.2092, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.03714629083360647, | |
| "grad_norm": 0.6224190592765808, | |
| "learning_rate": 0.0004816369359916055, | |
| "loss": 1.0575, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.03723369387086201, | |
| "grad_norm": 0.6718823313713074, | |
| "learning_rate": 0.00048159321441063307, | |
| "loss": 1.0947, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.03732109690811756, | |
| "grad_norm": 0.6595826148986816, | |
| "learning_rate": 0.0004815494928296607, | |
| "loss": 1.4427, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.037408499945373104, | |
| "grad_norm": 11.761706352233887, | |
| "learning_rate": 0.00048150577124868836, | |
| "loss": 1.0676, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.03749590298262865, | |
| "grad_norm": 0.8342620134353638, | |
| "learning_rate": 0.000481462049667716, | |
| "loss": 1.9127, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.03758330601988419, | |
| "grad_norm": 1.1234923601150513, | |
| "learning_rate": 0.00048141832808674365, | |
| "loss": 1.1633, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.037670709057139734, | |
| "grad_norm": 1.9076615571975708, | |
| "learning_rate": 0.00048137460650577124, | |
| "loss": 1.0639, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.03775811209439528, | |
| "grad_norm": 0.6750392913818359, | |
| "learning_rate": 0.0004813308849247989, | |
| "loss": 0.9955, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.037845515131650825, | |
| "grad_norm": 0.6759085655212402, | |
| "learning_rate": 0.0004812871633438265, | |
| "loss": 1.131, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.03793291816890637, | |
| "grad_norm": 1.4919787645339966, | |
| "learning_rate": 0.0004812434417628542, | |
| "loss": 1.6338, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.03802032120616192, | |
| "grad_norm": 0.8407806754112244, | |
| "learning_rate": 0.0004811997201818818, | |
| "loss": 1.6765, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.038107724243417455, | |
| "grad_norm": 0.5378815531730652, | |
| "learning_rate": 0.0004811559986009094, | |
| "loss": 1.1115, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.038195127280673, | |
| "grad_norm": 0.705746054649353, | |
| "learning_rate": 0.00048111227701993706, | |
| "loss": 0.8717, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.03828253031792855, | |
| "grad_norm": 0.6170596480369568, | |
| "learning_rate": 0.00048106855543896465, | |
| "loss": 1.113, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.03836993335518409, | |
| "grad_norm": 0.7694591283798218, | |
| "learning_rate": 0.00048102483385799235, | |
| "loss": 0.9803, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.03845733639243964, | |
| "grad_norm": 0.44214290380477905, | |
| "learning_rate": 0.00048098111227701994, | |
| "loss": 1.0997, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.038544739429695184, | |
| "grad_norm": 1.67384934425354, | |
| "learning_rate": 0.0004809373906960476, | |
| "loss": 1.473, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.03863214246695073, | |
| "grad_norm": 0.906971275806427, | |
| "learning_rate": 0.00048089366911507523, | |
| "loss": 1.4701, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.03871954550420627, | |
| "grad_norm": 1.0720627307891846, | |
| "learning_rate": 0.0004808499475341028, | |
| "loss": 1.2818, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.038806948541461814, | |
| "grad_norm": 0.9048315286636353, | |
| "learning_rate": 0.00048080622595313046, | |
| "loss": 1.0395, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.03889435157871736, | |
| "grad_norm": 0.6810390949249268, | |
| "learning_rate": 0.0004807625043721581, | |
| "loss": 0.9983, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.038981754615972905, | |
| "grad_norm": 2.8892154693603516, | |
| "learning_rate": 0.00048071878279118575, | |
| "loss": 1.4023, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.03906915765322845, | |
| "grad_norm": 2.2658865451812744, | |
| "learning_rate": 0.00048067506121021335, | |
| "loss": 1.2289, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.039156560690483996, | |
| "grad_norm": 0.6239084005355835, | |
| "learning_rate": 0.000480631339629241, | |
| "loss": 1.012, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.03924396372773954, | |
| "grad_norm": 1.147459864616394, | |
| "learning_rate": 0.00048058761804826864, | |
| "loss": 1.0538, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.03933136676499508, | |
| "grad_norm": 0.8646839261054993, | |
| "learning_rate": 0.0004805438964672963, | |
| "loss": 0.965, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.039418769802250626, | |
| "grad_norm": 0.9366894960403442, | |
| "learning_rate": 0.0004805001748863239, | |
| "loss": 0.8447, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.03950617283950617, | |
| "grad_norm": 0.6512202024459839, | |
| "learning_rate": 0.0004804564533053515, | |
| "loss": 1.0594, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.03959357587676172, | |
| "grad_norm": 0.5651702284812927, | |
| "learning_rate": 0.00048041273172437916, | |
| "loss": 1.1249, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.03968097891401726, | |
| "grad_norm": 1.0038714408874512, | |
| "learning_rate": 0.0004803690101434068, | |
| "loss": 1.1198, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.03976838195127281, | |
| "grad_norm": 1.0579853057861328, | |
| "learning_rate": 0.0004803252885624344, | |
| "loss": 1.0889, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.039855784988528355, | |
| "grad_norm": 0.4361538887023926, | |
| "learning_rate": 0.0004802815669814621, | |
| "loss": 0.876, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.03994318802578389, | |
| "grad_norm": 0.8685644865036011, | |
| "learning_rate": 0.0004802378454004897, | |
| "loss": 0.8344, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.04003059106303944, | |
| "grad_norm": 0.5350561141967773, | |
| "learning_rate": 0.00048019412381951733, | |
| "loss": 1.0352, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.040117994100294985, | |
| "grad_norm": 0.7722122669219971, | |
| "learning_rate": 0.0004801504022385449, | |
| "loss": 0.9144, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.04020539713755053, | |
| "grad_norm": 0.5645512938499451, | |
| "learning_rate": 0.00048010668065757257, | |
| "loss": 0.9014, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.040292800174806076, | |
| "grad_norm": 0.5366953015327454, | |
| "learning_rate": 0.00048006295907660027, | |
| "loss": 1.005, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.04038020321206162, | |
| "grad_norm": 0.5673419237136841, | |
| "learning_rate": 0.00048001923749562786, | |
| "loss": 0.9666, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.04046760624931716, | |
| "grad_norm": 0.5309872031211853, | |
| "learning_rate": 0.0004799755159146555, | |
| "loss": 1.017, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.040555009286572706, | |
| "grad_norm": 0.567584753036499, | |
| "learning_rate": 0.0004799317943336831, | |
| "loss": 0.9212, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.04064241232382825, | |
| "grad_norm": 0.5049634575843811, | |
| "learning_rate": 0.00047988807275271074, | |
| "loss": 1.0515, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.0407298153610838, | |
| "grad_norm": 0.5385315418243408, | |
| "learning_rate": 0.00047984435117173833, | |
| "loss": 1.1727, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.04081721839833934, | |
| "grad_norm": 0.4884001910686493, | |
| "learning_rate": 0.00047980062959076603, | |
| "loss": 1.1159, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.04090462143559489, | |
| "grad_norm": 0.7112920880317688, | |
| "learning_rate": 0.0004797569080097937, | |
| "loss": 1.235, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.040992024472850434, | |
| "grad_norm": 0.4838173985481262, | |
| "learning_rate": 0.00047971318642882127, | |
| "loss": 0.9681, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.04107942751010597, | |
| "grad_norm": 0.45457422733306885, | |
| "learning_rate": 0.0004796694648478489, | |
| "loss": 1.1104, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.04116683054736152, | |
| "grad_norm": 0.5703690648078918, | |
| "learning_rate": 0.0004796257432668765, | |
| "loss": 1.1248, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.041254233584617064, | |
| "grad_norm": 0.450735479593277, | |
| "learning_rate": 0.00047958202168590415, | |
| "loss": 0.8925, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.04134163662187261, | |
| "grad_norm": 0.5150513052940369, | |
| "learning_rate": 0.00047953830010493185, | |
| "loss": 1.3525, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.041429039659128156, | |
| "grad_norm": 0.3937002718448639, | |
| "learning_rate": 0.00047949457852395944, | |
| "loss": 0.9275, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.0415164426963837, | |
| "grad_norm": 0.3689919114112854, | |
| "learning_rate": 0.0004794508569429871, | |
| "loss": 1.0588, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.04160384573363925, | |
| "grad_norm": 0.34137895703315735, | |
| "learning_rate": 0.0004794071353620147, | |
| "loss": 1.0148, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.041691248770894786, | |
| "grad_norm": 0.33478084206581116, | |
| "learning_rate": 0.0004793634137810423, | |
| "loss": 1.1783, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.04177865180815033, | |
| "grad_norm": 0.36996185779571533, | |
| "learning_rate": 0.00047931969220006996, | |
| "loss": 0.9166, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.04186605484540588, | |
| "grad_norm": 0.40458017587661743, | |
| "learning_rate": 0.0004792759706190976, | |
| "loss": 1.039, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.04195345788266142, | |
| "grad_norm": 0.5270059704780579, | |
| "learning_rate": 0.00047923224903812525, | |
| "loss": 0.9331, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.04204086091991697, | |
| "grad_norm": 0.38086146116256714, | |
| "learning_rate": 0.00047918852745715285, | |
| "loss": 1.2488, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.042128263957172514, | |
| "grad_norm": 0.4206714332103729, | |
| "learning_rate": 0.0004791448058761805, | |
| "loss": 0.9509, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.04221566699442806, | |
| "grad_norm": 0.45416519045829773, | |
| "learning_rate": 0.0004791010842952081, | |
| "loss": 1.0384, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.0423030700316836, | |
| "grad_norm": 0.312229722738266, | |
| "learning_rate": 0.0004790573627142358, | |
| "loss": 1.0349, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.042390473068939144, | |
| "grad_norm": 0.4084686040878296, | |
| "learning_rate": 0.0004790136411332634, | |
| "loss": 0.9074, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.04247787610619469, | |
| "grad_norm": 12.558296203613281, | |
| "learning_rate": 0.000478969919552291, | |
| "loss": 1.4943, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.042565279143450235, | |
| "grad_norm": 0.5897109508514404, | |
| "learning_rate": 0.00047892619797131866, | |
| "loss": 1.0668, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.04265268218070578, | |
| "grad_norm": 0.6350471377372742, | |
| "learning_rate": 0.00047888247639034625, | |
| "loss": 0.9479, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.04274008521796133, | |
| "grad_norm": 0.4891508221626282, | |
| "learning_rate": 0.00047883875480937395, | |
| "loss": 1.1157, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.042827488255216865, | |
| "grad_norm": 0.3619961142539978, | |
| "learning_rate": 0.00047879503322840154, | |
| "loss": 0.9912, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.04291489129247241, | |
| "grad_norm": 0.3376581072807312, | |
| "learning_rate": 0.0004787513116474292, | |
| "loss": 0.8494, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.04300229432972796, | |
| "grad_norm": 0.6040793061256409, | |
| "learning_rate": 0.00047870759006645683, | |
| "loss": 1.3237, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.0430896973669835, | |
| "grad_norm": 2.6606392860412598, | |
| "learning_rate": 0.0004786638684854844, | |
| "loss": 1.7359, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.04317710040423905, | |
| "grad_norm": 0.5396057367324829, | |
| "learning_rate": 0.00047862014690451207, | |
| "loss": 1.552, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.043264503441494594, | |
| "grad_norm": 0.42991939187049866, | |
| "learning_rate": 0.0004785764253235397, | |
| "loss": 0.99, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.04335190647875014, | |
| "grad_norm": 0.40487632155418396, | |
| "learning_rate": 0.00047853270374256736, | |
| "loss": 1.0104, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.04343930951600568, | |
| "grad_norm": 0.9767838716506958, | |
| "learning_rate": 0.00047848898216159495, | |
| "loss": 1.0582, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.043526712553261224, | |
| "grad_norm": 0.3633114695549011, | |
| "learning_rate": 0.0004784452605806226, | |
| "loss": 0.92, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.04361411559051677, | |
| "grad_norm": 0.6365157961845398, | |
| "learning_rate": 0.00047840153899965024, | |
| "loss": 0.9564, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.043701518627772315, | |
| "grad_norm": 0.4060046076774597, | |
| "learning_rate": 0.00047835781741867783, | |
| "loss": 1.046, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04378892166502786, | |
| "grad_norm": 0.3747900128364563, | |
| "learning_rate": 0.00047831409583770553, | |
| "loss": 1.0201, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.043876324702283406, | |
| "grad_norm": 0.3672393262386322, | |
| "learning_rate": 0.0004782703742567331, | |
| "loss": 1.0021, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.04396372773953895, | |
| "grad_norm": 0.3505338132381439, | |
| "learning_rate": 0.00047822665267576077, | |
| "loss": 1.0002, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.04405113077679449, | |
| "grad_norm": 5.722542762756348, | |
| "learning_rate": 0.0004781829310947884, | |
| "loss": 2.5431, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.044138533814050036, | |
| "grad_norm": 0.5349693298339844, | |
| "learning_rate": 0.000478139209513816, | |
| "loss": 1.151, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.04422593685130558, | |
| "grad_norm": 0.4468895494937897, | |
| "learning_rate": 0.0004780954879328437, | |
| "loss": 0.9958, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.04431333988856113, | |
| "grad_norm": 0.47205036878585815, | |
| "learning_rate": 0.0004780517663518713, | |
| "loss": 0.9401, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.04440074292581667, | |
| "grad_norm": 0.35336941480636597, | |
| "learning_rate": 0.00047800804477089894, | |
| "loss": 1.0982, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.04448814596307222, | |
| "grad_norm": 1.8884743452072144, | |
| "learning_rate": 0.00047796432318992653, | |
| "loss": 0.9199, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.044575549000327765, | |
| "grad_norm": 0.4091229736804962, | |
| "learning_rate": 0.0004779206016089542, | |
| "loss": 0.8953, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0446629520375833, | |
| "grad_norm": 0.4730583131313324, | |
| "learning_rate": 0.0004778768800279818, | |
| "loss": 0.8085, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.04475035507483885, | |
| "grad_norm": 0.3801075220108032, | |
| "learning_rate": 0.00047783315844700946, | |
| "loss": 0.9914, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.044837758112094395, | |
| "grad_norm": 0.3660631477832794, | |
| "learning_rate": 0.0004777894368660371, | |
| "loss": 0.9804, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.04492516114934994, | |
| "grad_norm": 0.8466418981552124, | |
| "learning_rate": 0.0004777457152850647, | |
| "loss": 1.1207, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.045012564186605486, | |
| "grad_norm": 0.3560774624347687, | |
| "learning_rate": 0.00047770199370409234, | |
| "loss": 0.8773, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.04509996722386103, | |
| "grad_norm": 0.49633318185806274, | |
| "learning_rate": 0.00047765827212312, | |
| "loss": 1.0111, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.04518737026111657, | |
| "grad_norm": 0.6001185178756714, | |
| "learning_rate": 0.00047761455054214764, | |
| "loss": 1.2566, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.045274773298372116, | |
| "grad_norm": 0.7423095703125, | |
| "learning_rate": 0.0004775708289611753, | |
| "loss": 1.1431, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.04536217633562766, | |
| "grad_norm": 0.34218892455101013, | |
| "learning_rate": 0.00047752710738020287, | |
| "loss": 0.9254, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.04544957937288321, | |
| "grad_norm": 0.336230605840683, | |
| "learning_rate": 0.0004774833857992305, | |
| "loss": 1.0015, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04553698241013875, | |
| "grad_norm": 0.39158111810684204, | |
| "learning_rate": 0.0004774396642182581, | |
| "loss": 0.8319, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.0456243854473943, | |
| "grad_norm": 0.4045357406139374, | |
| "learning_rate": 0.00047739594263728575, | |
| "loss": 0.8531, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.045711788484649844, | |
| "grad_norm": 0.5861966013908386, | |
| "learning_rate": 0.00047735222105631345, | |
| "loss": 0.9975, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.04579919152190538, | |
| "grad_norm": 0.33865249156951904, | |
| "learning_rate": 0.00047730849947534104, | |
| "loss": 0.94, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.04588659455916093, | |
| "grad_norm": 0.4759502112865448, | |
| "learning_rate": 0.0004772647778943687, | |
| "loss": 0.9581, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.045973997596416474, | |
| "grad_norm": 0.492929607629776, | |
| "learning_rate": 0.0004772210563133963, | |
| "loss": 1.3563, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.04606140063367202, | |
| "grad_norm": 0.31947705149650574, | |
| "learning_rate": 0.0004771773347324239, | |
| "loss": 0.8052, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.046148803670927566, | |
| "grad_norm": 0.3842394948005676, | |
| "learning_rate": 0.0004771336131514515, | |
| "loss": 0.9723, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.04623620670818311, | |
| "grad_norm": 0.338451623916626, | |
| "learning_rate": 0.0004770898915704792, | |
| "loss": 1.0315, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.04632360974543866, | |
| "grad_norm": 1.9640684127807617, | |
| "learning_rate": 0.00047704616998950686, | |
| "loss": 1.2013, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.046411012782694196, | |
| "grad_norm": 0.501758337020874, | |
| "learning_rate": 0.00047700244840853445, | |
| "loss": 1.0096, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.04649841581994974, | |
| "grad_norm": 0.5867491960525513, | |
| "learning_rate": 0.0004769587268275621, | |
| "loss": 0.9708, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.04658581885720529, | |
| "grad_norm": 2.1122539043426514, | |
| "learning_rate": 0.0004769150052465897, | |
| "loss": 0.8145, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.04667322189446083, | |
| "grad_norm": 0.7969621419906616, | |
| "learning_rate": 0.0004768712836656174, | |
| "loss": 0.829, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.04676062493171638, | |
| "grad_norm": 0.4205247461795807, | |
| "learning_rate": 0.00047682756208464503, | |
| "loss": 1.0063, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.046848027968971924, | |
| "grad_norm": 0.3231610059738159, | |
| "learning_rate": 0.0004767838405036726, | |
| "loss": 0.968, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.04693543100622747, | |
| "grad_norm": 1.369025707244873, | |
| "learning_rate": 0.00047674011892270027, | |
| "loss": 1.7445, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.04702283404348301, | |
| "grad_norm": 0.42706942558288574, | |
| "learning_rate": 0.00047669639734172786, | |
| "loss": 1.1781, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.047110237080738554, | |
| "grad_norm": 0.36257731914520264, | |
| "learning_rate": 0.0004766526757607555, | |
| "loss": 1.0557, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.0471976401179941, | |
| "grad_norm": 0.4783022105693817, | |
| "learning_rate": 0.00047660895417978315, | |
| "loss": 1.053, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.047285043155249645, | |
| "grad_norm": 0.3079909384250641, | |
| "learning_rate": 0.0004765652325988108, | |
| "loss": 1.1313, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.04737244619250519, | |
| "grad_norm": 0.4072510302066803, | |
| "learning_rate": 0.00047652151101783844, | |
| "loss": 0.8678, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.04745984922976074, | |
| "grad_norm": 0.36985546350479126, | |
| "learning_rate": 0.00047647778943686603, | |
| "loss": 0.9387, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.04754725226701628, | |
| "grad_norm": 0.4222630262374878, | |
| "learning_rate": 0.0004764340678558937, | |
| "loss": 0.9083, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.04763465530427182, | |
| "grad_norm": 0.39896291494369507, | |
| "learning_rate": 0.0004763903462749213, | |
| "loss": 0.9773, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.04772205834152737, | |
| "grad_norm": 0.3235687017440796, | |
| "learning_rate": 0.00047634662469394896, | |
| "loss": 0.9484, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.04780946137878291, | |
| "grad_norm": 0.3377327620983124, | |
| "learning_rate": 0.0004763029031129766, | |
| "loss": 0.9319, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.04789686441603846, | |
| "grad_norm": 0.37998026609420776, | |
| "learning_rate": 0.0004762591815320042, | |
| "loss": 1.3499, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.047984267453294004, | |
| "grad_norm": 0.37219107151031494, | |
| "learning_rate": 0.00047621545995103184, | |
| "loss": 1.1132, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.04807167049054955, | |
| "grad_norm": 0.3147220313549042, | |
| "learning_rate": 0.00047617173837005944, | |
| "loss": 0.9306, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04815907352780509, | |
| "grad_norm": 0.3832624852657318, | |
| "learning_rate": 0.00047612801678908713, | |
| "loss": 0.8518, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.048246476565060634, | |
| "grad_norm": 0.3098907172679901, | |
| "learning_rate": 0.0004760842952081147, | |
| "loss": 0.8183, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.04833387960231618, | |
| "grad_norm": 0.3062676191329956, | |
| "learning_rate": 0.00047604057362714237, | |
| "loss": 0.9226, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.048421282639571725, | |
| "grad_norm": 0.3292568624019623, | |
| "learning_rate": 0.00047599685204617, | |
| "loss": 0.9204, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.04850868567682727, | |
| "grad_norm": 0.45942652225494385, | |
| "learning_rate": 0.0004759531304651976, | |
| "loss": 1.1571, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.048596088714082816, | |
| "grad_norm": 0.3519571125507355, | |
| "learning_rate": 0.00047590940888422525, | |
| "loss": 0.9566, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.04868349175133836, | |
| "grad_norm": 0.3418327569961548, | |
| "learning_rate": 0.0004758656873032529, | |
| "loss": 1.146, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.0487708947885939, | |
| "grad_norm": 0.3338674008846283, | |
| "learning_rate": 0.00047582196572228054, | |
| "loss": 1.0859, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.048858297825849446, | |
| "grad_norm": 1.2700949907302856, | |
| "learning_rate": 0.00047577824414130813, | |
| "loss": 1.3166, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.04894570086310499, | |
| "grad_norm": 0.706069827079773, | |
| "learning_rate": 0.0004757345225603358, | |
| "loss": 1.2259, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04903310390036054, | |
| "grad_norm": 0.5171198844909668, | |
| "learning_rate": 0.0004756908009793634, | |
| "loss": 0.7985, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.04912050693761608, | |
| "grad_norm": 0.8621017932891846, | |
| "learning_rate": 0.00047564707939839107, | |
| "loss": 1.0042, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.04920790997487163, | |
| "grad_norm": 0.926487922668457, | |
| "learning_rate": 0.0004756033578174187, | |
| "loss": 0.9945, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.049295313012127175, | |
| "grad_norm": 0.9586560726165771, | |
| "learning_rate": 0.0004755596362364463, | |
| "loss": 1.5266, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.04938271604938271, | |
| "grad_norm": 0.507824182510376, | |
| "learning_rate": 0.00047551591465547395, | |
| "loss": 0.8737, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.04947011908663826, | |
| "grad_norm": 0.38291049003601074, | |
| "learning_rate": 0.0004754721930745016, | |
| "loss": 0.7636, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.049557522123893805, | |
| "grad_norm": 0.40479573607444763, | |
| "learning_rate": 0.0004754284714935292, | |
| "loss": 0.781, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.04964492516114935, | |
| "grad_norm": 0.6375040411949158, | |
| "learning_rate": 0.0004753847499125569, | |
| "loss": 1.1493, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.049732328198404896, | |
| "grad_norm": 0.3949948847293854, | |
| "learning_rate": 0.0004753410283315845, | |
| "loss": 0.9626, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.04981973123566044, | |
| "grad_norm": 0.3734526038169861, | |
| "learning_rate": 0.0004752973067506121, | |
| "loss": 0.9207, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.04990713427291599, | |
| "grad_norm": 0.5179705619812012, | |
| "learning_rate": 0.0004752535851696397, | |
| "loss": 1.3906, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.049994537310171526, | |
| "grad_norm": 0.4602389931678772, | |
| "learning_rate": 0.00047520986358866736, | |
| "loss": 1.0577, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.05008194034742707, | |
| "grad_norm": 0.30401960015296936, | |
| "learning_rate": 0.00047516614200769506, | |
| "loss": 1.13, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.05016934338468262, | |
| "grad_norm": 0.3481753170490265, | |
| "learning_rate": 0.00047512242042672265, | |
| "loss": 0.857, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.05025674642193816, | |
| "grad_norm": 0.4005964398384094, | |
| "learning_rate": 0.0004750786988457503, | |
| "loss": 0.9569, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.05034414945919371, | |
| "grad_norm": 0.43765851855278015, | |
| "learning_rate": 0.0004750349772647779, | |
| "loss": 1.2156, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.050431552496449254, | |
| "grad_norm": 0.3252186179161072, | |
| "learning_rate": 0.00047499125568380553, | |
| "loss": 1.0392, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.05051895553370479, | |
| "grad_norm": 0.3639061152935028, | |
| "learning_rate": 0.0004749475341028331, | |
| "loss": 0.914, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.05060635857096034, | |
| "grad_norm": 0.3080824911594391, | |
| "learning_rate": 0.0004749038125218608, | |
| "loss": 0.9735, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.050693761608215884, | |
| "grad_norm": 0.33566662669181824, | |
| "learning_rate": 0.00047486009094088846, | |
| "loss": 1.1619, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.05078116464547143, | |
| "grad_norm": 0.2990110218524933, | |
| "learning_rate": 0.00047481636935991605, | |
| "loss": 0.97, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.050868567682726976, | |
| "grad_norm": 0.3264564871788025, | |
| "learning_rate": 0.0004747726477789437, | |
| "loss": 0.824, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.05095597071998252, | |
| "grad_norm": 0.37740233540534973, | |
| "learning_rate": 0.0004747289261979713, | |
| "loss": 1.1715, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.05104337375723807, | |
| "grad_norm": 0.39894765615463257, | |
| "learning_rate": 0.00047468520461699894, | |
| "loss": 1.3263, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.051130776794493606, | |
| "grad_norm": 0.3279603123664856, | |
| "learning_rate": 0.00047464148303602663, | |
| "loss": 0.8633, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.05121817983174915, | |
| "grad_norm": 0.30895987153053284, | |
| "learning_rate": 0.0004745977614550542, | |
| "loss": 0.9019, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.0513055828690047, | |
| "grad_norm": 0.8510332703590393, | |
| "learning_rate": 0.00047455403987408187, | |
| "loss": 0.9492, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.05139298590626024, | |
| "grad_norm": 0.5336425304412842, | |
| "learning_rate": 0.00047451031829310946, | |
| "loss": 0.8209, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.05148038894351579, | |
| "grad_norm": 0.3380926847457886, | |
| "learning_rate": 0.0004744665967121371, | |
| "loss": 0.8024, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.051567791980771334, | |
| "grad_norm": 0.3537689447402954, | |
| "learning_rate": 0.00047442287513116475, | |
| "loss": 1.1219, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.05165519501802688, | |
| "grad_norm": 0.5417413711547852, | |
| "learning_rate": 0.0004743791535501924, | |
| "loss": 1.0341, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.05174259805528242, | |
| "grad_norm": 0.4394038915634155, | |
| "learning_rate": 0.00047433543196922004, | |
| "loss": 0.934, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.051830001092537964, | |
| "grad_norm": 0.738370954990387, | |
| "learning_rate": 0.00047429171038824763, | |
| "loss": 1.1953, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.05191740412979351, | |
| "grad_norm": 0.33024734258651733, | |
| "learning_rate": 0.0004742479888072753, | |
| "loss": 0.687, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.052004807167049055, | |
| "grad_norm": 0.3696803152561188, | |
| "learning_rate": 0.00047420426722630287, | |
| "loss": 1.0533, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.0520922102043046, | |
| "grad_norm": 0.31398460268974304, | |
| "learning_rate": 0.00047416054564533057, | |
| "loss": 1.0434, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.05217961324156015, | |
| "grad_norm": 0.3482360541820526, | |
| "learning_rate": 0.0004741168240643582, | |
| "loss": 1.2415, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.05226701627881569, | |
| "grad_norm": 0.32207486033439636, | |
| "learning_rate": 0.0004740731024833858, | |
| "loss": 1.1465, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.05235441931607123, | |
| "grad_norm": 0.2964969277381897, | |
| "learning_rate": 0.00047402938090241345, | |
| "loss": 0.8746, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.05244182235332678, | |
| "grad_norm": 0.26993119716644287, | |
| "learning_rate": 0.00047398565932144104, | |
| "loss": 0.9161, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05252922539058232, | |
| "grad_norm": 0.31088942289352417, | |
| "learning_rate": 0.00047394193774046874, | |
| "loss": 0.938, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.05261662842783787, | |
| "grad_norm": 0.2921091318130493, | |
| "learning_rate": 0.00047389821615949633, | |
| "loss": 0.914, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.052704031465093414, | |
| "grad_norm": 0.4693572223186493, | |
| "learning_rate": 0.000473854494578524, | |
| "loss": 0.9083, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.05279143450234896, | |
| "grad_norm": 0.6201152801513672, | |
| "learning_rate": 0.0004738107729975516, | |
| "loss": 1.1098, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.0528788375396045, | |
| "grad_norm": 0.48871442675590515, | |
| "learning_rate": 0.0004737670514165792, | |
| "loss": 1.1571, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.052966240576860044, | |
| "grad_norm": 0.26332658529281616, | |
| "learning_rate": 0.00047372332983560686, | |
| "loss": 0.995, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.05305364361411559, | |
| "grad_norm": 0.7663961052894592, | |
| "learning_rate": 0.0004736796082546345, | |
| "loss": 1.0206, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.053141046651371135, | |
| "grad_norm": 0.3350706100463867, | |
| "learning_rate": 0.00047363588667366215, | |
| "loss": 1.0328, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.05322844968862668, | |
| "grad_norm": 0.30147233605384827, | |
| "learning_rate": 0.00047359216509268974, | |
| "loss": 0.8874, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.053315852725882226, | |
| "grad_norm": 0.4487704038619995, | |
| "learning_rate": 0.0004735484435117174, | |
| "loss": 0.8327, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.05340325576313777, | |
| "grad_norm": 0.474685400724411, | |
| "learning_rate": 0.00047350472193074503, | |
| "loss": 0.8405, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.05349065880039331, | |
| "grad_norm": 0.6512682437896729, | |
| "learning_rate": 0.0004734610003497726, | |
| "loss": 1.418, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.053578061837648856, | |
| "grad_norm": 0.3829117715358734, | |
| "learning_rate": 0.0004734172787688003, | |
| "loss": 0.9036, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.0536654648749044, | |
| "grad_norm": 0.3626525402069092, | |
| "learning_rate": 0.0004733735571878279, | |
| "loss": 0.9919, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.05375286791215995, | |
| "grad_norm": 0.6899876594543457, | |
| "learning_rate": 0.00047332983560685555, | |
| "loss": 0.8781, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.05384027094941549, | |
| "grad_norm": 0.33936572074890137, | |
| "learning_rate": 0.0004732861140258832, | |
| "loss": 0.7375, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.05392767398667104, | |
| "grad_norm": 0.45376959443092346, | |
| "learning_rate": 0.0004732423924449108, | |
| "loss": 0.868, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.054015077023926585, | |
| "grad_norm": 0.5580937266349792, | |
| "learning_rate": 0.0004731986708639385, | |
| "loss": 1.182, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.05410248006118212, | |
| "grad_norm": 0.3207378685474396, | |
| "learning_rate": 0.0004731549492829661, | |
| "loss": 0.9069, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.05418988309843767, | |
| "grad_norm": 0.3553832769393921, | |
| "learning_rate": 0.0004731112277019937, | |
| "loss": 1.4, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.054277286135693215, | |
| "grad_norm": 0.3708738386631012, | |
| "learning_rate": 0.0004730675061210213, | |
| "loss": 1.1475, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.05436468917294876, | |
| "grad_norm": 0.35041436553001404, | |
| "learning_rate": 0.00047302378454004896, | |
| "loss": 0.9505, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.054452092210204306, | |
| "grad_norm": 0.37304723262786865, | |
| "learning_rate": 0.0004729800629590766, | |
| "loss": 0.8858, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.05453949524745985, | |
| "grad_norm": 0.34602999687194824, | |
| "learning_rate": 0.00047293634137810425, | |
| "loss": 1.0687, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.0546268982847154, | |
| "grad_norm": 0.3194156587123871, | |
| "learning_rate": 0.0004728926197971319, | |
| "loss": 0.9222, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.054714301321970936, | |
| "grad_norm": 0.34864407777786255, | |
| "learning_rate": 0.0004728488982161595, | |
| "loss": 1.1291, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.05480170435922648, | |
| "grad_norm": 0.27222639322280884, | |
| "learning_rate": 0.00047280517663518713, | |
| "loss": 0.9762, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.05488910739648203, | |
| "grad_norm": 0.289035826921463, | |
| "learning_rate": 0.0004727614550542148, | |
| "loss": 0.84, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.05497651043373757, | |
| "grad_norm": 1.1678911447525024, | |
| "learning_rate": 0.0004727177334732424, | |
| "loss": 0.8835, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.05506391347099312, | |
| "grad_norm": 0.32149800658226013, | |
| "learning_rate": 0.00047267401189227007, | |
| "loss": 0.8814, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.055151316508248664, | |
| "grad_norm": 0.3312610387802124, | |
| "learning_rate": 0.00047263029031129766, | |
| "loss": 0.9001, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.0552387195455042, | |
| "grad_norm": 0.32734236121177673, | |
| "learning_rate": 0.0004725865687303253, | |
| "loss": 0.6587, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.05532612258275975, | |
| "grad_norm": 0.780978798866272, | |
| "learning_rate": 0.0004725428471493529, | |
| "loss": 1.1513, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.055413525620015294, | |
| "grad_norm": 0.3088547885417938, | |
| "learning_rate": 0.00047249912556838054, | |
| "loss": 0.8629, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.05550092865727084, | |
| "grad_norm": 0.34646108746528625, | |
| "learning_rate": 0.00047245540398740824, | |
| "loss": 0.8972, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.055588331694526386, | |
| "grad_norm": 0.47034963965415955, | |
| "learning_rate": 0.00047241168240643583, | |
| "loss": 1.414, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.05567573473178193, | |
| "grad_norm": 0.3200039565563202, | |
| "learning_rate": 0.0004723679608254635, | |
| "loss": 1.0516, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.05576313776903748, | |
| "grad_norm": 0.3332134187221527, | |
| "learning_rate": 0.00047232423924449107, | |
| "loss": 0.9086, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.055850540806293016, | |
| "grad_norm": 0.4804655611515045, | |
| "learning_rate": 0.0004722805176635187, | |
| "loss": 0.9719, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.05593794384354856, | |
| "grad_norm": 0.3591998219490051, | |
| "learning_rate": 0.0004722367960825463, | |
| "loss": 0.7201, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05602534688080411, | |
| "grad_norm": 0.3319551944732666, | |
| "learning_rate": 0.000472193074501574, | |
| "loss": 1.1264, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.05611274991805965, | |
| "grad_norm": 0.3312825858592987, | |
| "learning_rate": 0.00047214935292060165, | |
| "loss": 1.0482, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.0562001529553152, | |
| "grad_norm": 0.3713119328022003, | |
| "learning_rate": 0.00047210563133962924, | |
| "loss": 1.1576, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.056287555992570744, | |
| "grad_norm": 0.35899418592453003, | |
| "learning_rate": 0.0004720619097586569, | |
| "loss": 0.7906, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.05637495902982629, | |
| "grad_norm": 0.31557363271713257, | |
| "learning_rate": 0.0004720181881776845, | |
| "loss": 0.9632, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.05646236206708183, | |
| "grad_norm": 0.40129950642585754, | |
| "learning_rate": 0.00047197446659671217, | |
| "loss": 1.3243, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.056549765104337374, | |
| "grad_norm": 0.3548416495323181, | |
| "learning_rate": 0.0004719307450157398, | |
| "loss": 1.0228, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.05663716814159292, | |
| "grad_norm": 0.5984897017478943, | |
| "learning_rate": 0.0004718870234347674, | |
| "loss": 0.9532, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.056724571178848465, | |
| "grad_norm": 0.2719477117061615, | |
| "learning_rate": 0.00047184330185379505, | |
| "loss": 0.9909, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.05681197421610401, | |
| "grad_norm": 0.2690770626068115, | |
| "learning_rate": 0.00047179958027282264, | |
| "loss": 0.9754, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.05689937725335956, | |
| "grad_norm": 0.3287508189678192, | |
| "learning_rate": 0.0004717558586918503, | |
| "loss": 0.823, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.0569867802906151, | |
| "grad_norm": 0.6442591547966003, | |
| "learning_rate": 0.00047171213711087793, | |
| "loss": 1.1211, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.05707418332787064, | |
| "grad_norm": 0.3647923469543457, | |
| "learning_rate": 0.0004716684155299056, | |
| "loss": 0.8892, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.05716158636512619, | |
| "grad_norm": 0.3035934269428253, | |
| "learning_rate": 0.0004716246939489332, | |
| "loss": 0.9781, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.05724898940238173, | |
| "grad_norm": 0.2986050546169281, | |
| "learning_rate": 0.0004715809723679608, | |
| "loss": 0.873, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.05733639243963728, | |
| "grad_norm": 0.3101188540458679, | |
| "learning_rate": 0.00047153725078698846, | |
| "loss": 1.1788, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.057423795476892824, | |
| "grad_norm": 1.2602791786193848, | |
| "learning_rate": 0.0004714935292060161, | |
| "loss": 1.376, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.05751119851414837, | |
| "grad_norm": 0.374224454164505, | |
| "learning_rate": 0.00047144980762504375, | |
| "loss": 0.9379, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.05759860155140391, | |
| "grad_norm": 0.35825932025909424, | |
| "learning_rate": 0.0004714060860440714, | |
| "loss": 0.9601, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.057686004588659454, | |
| "grad_norm": 0.37547796964645386, | |
| "learning_rate": 0.000471362364463099, | |
| "loss": 1.5432, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.057773407625915, | |
| "grad_norm": 0.30925118923187256, | |
| "learning_rate": 0.00047131864288212663, | |
| "loss": 0.9129, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.057860810663170545, | |
| "grad_norm": 0.43315598368644714, | |
| "learning_rate": 0.0004712749213011542, | |
| "loss": 0.7993, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.05794821370042609, | |
| "grad_norm": 1.0459505319595337, | |
| "learning_rate": 0.0004712311997201819, | |
| "loss": 1.4232, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.058035616737681636, | |
| "grad_norm": 0.4363897740840912, | |
| "learning_rate": 0.0004711874781392095, | |
| "loss": 1.3812, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.05812301977493718, | |
| "grad_norm": 0.2475530058145523, | |
| "learning_rate": 0.00047114375655823716, | |
| "loss": 0.8574, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.05821042281219272, | |
| "grad_norm": 0.352760910987854, | |
| "learning_rate": 0.0004711000349772648, | |
| "loss": 1.1236, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.058297825849448266, | |
| "grad_norm": 0.5032192468643188, | |
| "learning_rate": 0.0004710563133962924, | |
| "loss": 1.1754, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.05838522888670381, | |
| "grad_norm": 0.35939404368400574, | |
| "learning_rate": 0.0004710125918153201, | |
| "loss": 0.963, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.05847263192395936, | |
| "grad_norm": 0.4467969834804535, | |
| "learning_rate": 0.0004709688702343477, | |
| "loss": 2.0293, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.0585600349612149, | |
| "grad_norm": 0.3420664966106415, | |
| "learning_rate": 0.00047092514865337533, | |
| "loss": 1.0342, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.05864743799847045, | |
| "grad_norm": 0.3728554844856262, | |
| "learning_rate": 0.0004708814270724029, | |
| "loss": 0.9747, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.058734841035725995, | |
| "grad_norm": 1.2405109405517578, | |
| "learning_rate": 0.00047083770549143057, | |
| "loss": 1.6034, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.05882224407298153, | |
| "grad_norm": 0.3643404543399811, | |
| "learning_rate": 0.0004707939839104582, | |
| "loss": 0.7948, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.05890964711023708, | |
| "grad_norm": 0.31262850761413574, | |
| "learning_rate": 0.00047075026232948586, | |
| "loss": 0.8154, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.058997050147492625, | |
| "grad_norm": 0.49073535203933716, | |
| "learning_rate": 0.0004707065407485135, | |
| "loss": 0.9082, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.05908445318474817, | |
| "grad_norm": 0.39412635564804077, | |
| "learning_rate": 0.0004706628191675411, | |
| "loss": 1.0025, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.059171856222003716, | |
| "grad_norm": 0.40831953287124634, | |
| "learning_rate": 0.00047061909758656874, | |
| "loss": 1.0005, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.05925925925925926, | |
| "grad_norm": 0.5391172766685486, | |
| "learning_rate": 0.0004705753760055964, | |
| "loss": 0.9031, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.05934666229651481, | |
| "grad_norm": 0.31176143884658813, | |
| "learning_rate": 0.000470531654424624, | |
| "loss": 0.9589, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.059434065333770346, | |
| "grad_norm": 0.4320748448371887, | |
| "learning_rate": 0.00047048793284365167, | |
| "loss": 1.0996, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.05952146837102589, | |
| "grad_norm": 0.4102902412414551, | |
| "learning_rate": 0.00047044421126267926, | |
| "loss": 2.0338, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.05960887140828144, | |
| "grad_norm": 0.36022135615348816, | |
| "learning_rate": 0.0004704004896817069, | |
| "loss": 0.9675, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.05969627444553698, | |
| "grad_norm": 0.34680843353271484, | |
| "learning_rate": 0.0004703567681007345, | |
| "loss": 0.8765, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.05978367748279253, | |
| "grad_norm": 0.29740166664123535, | |
| "learning_rate": 0.00047031304651976214, | |
| "loss": 1.0053, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.059871080520048074, | |
| "grad_norm": 0.31341496109962463, | |
| "learning_rate": 0.00047026932493878984, | |
| "loss": 1.0295, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.05995848355730361, | |
| "grad_norm": 2.076716184616089, | |
| "learning_rate": 0.00047022560335781743, | |
| "loss": 1.5646, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.06004588659455916, | |
| "grad_norm": 0.2896002531051636, | |
| "learning_rate": 0.0004701818817768451, | |
| "loss": 0.9136, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.060133289631814704, | |
| "grad_norm": 0.37143734097480774, | |
| "learning_rate": 0.00047013816019587267, | |
| "loss": 0.8871, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.06022069266907025, | |
| "grad_norm": 0.49429547786712646, | |
| "learning_rate": 0.0004700944386149003, | |
| "loss": 1.1602, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.060308095706325796, | |
| "grad_norm": 0.3905726671218872, | |
| "learning_rate": 0.0004700507170339279, | |
| "loss": 1.1543, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.06039549874358134, | |
| "grad_norm": 0.3924982249736786, | |
| "learning_rate": 0.0004700069954529556, | |
| "loss": 0.8275, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.06048290178083689, | |
| "grad_norm": 0.27903103828430176, | |
| "learning_rate": 0.00046996327387198325, | |
| "loss": 0.8494, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.060570304818092426, | |
| "grad_norm": 0.382907897233963, | |
| "learning_rate": 0.00046991955229101084, | |
| "loss": 0.9531, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.06065770785534797, | |
| "grad_norm": 0.37153640389442444, | |
| "learning_rate": 0.0004698758307100385, | |
| "loss": 0.9131, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.06074511089260352, | |
| "grad_norm": 0.3007877767086029, | |
| "learning_rate": 0.0004698321091290661, | |
| "loss": 0.9513, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.06083251392985906, | |
| "grad_norm": 0.2546001672744751, | |
| "learning_rate": 0.0004697883875480938, | |
| "loss": 0.944, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.06091991696711461, | |
| "grad_norm": 0.27665847539901733, | |
| "learning_rate": 0.0004697446659671214, | |
| "loss": 0.7422, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.061007320004370154, | |
| "grad_norm": 0.28401628136634827, | |
| "learning_rate": 0.000469700944386149, | |
| "loss": 0.8458, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.0610947230416257, | |
| "grad_norm": 0.5097898840904236, | |
| "learning_rate": 0.00046965722280517666, | |
| "loss": 1.0018, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.06118212607888124, | |
| "grad_norm": 0.44888317584991455, | |
| "learning_rate": 0.00046961350122420425, | |
| "loss": 1.1203, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.061269529116136784, | |
| "grad_norm": 0.25764307379722595, | |
| "learning_rate": 0.0004695697796432319, | |
| "loss": 1.0156, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.06135693215339233, | |
| "grad_norm": 0.31590837240219116, | |
| "learning_rate": 0.00046952605806225954, | |
| "loss": 0.8823, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.061444335190647875, | |
| "grad_norm": 0.6337835192680359, | |
| "learning_rate": 0.0004694823364812872, | |
| "loss": 1.1565, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.06153173822790342, | |
| "grad_norm": 0.34477898478507996, | |
| "learning_rate": 0.00046943861490031483, | |
| "loss": 0.7563, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.061619141265158967, | |
| "grad_norm": 0.39787057042121887, | |
| "learning_rate": 0.0004693948933193424, | |
| "loss": 0.9804, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.06170654430241451, | |
| "grad_norm": 0.28919321298599243, | |
| "learning_rate": 0.00046935117173837007, | |
| "loss": 1.0019, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.06179394733967005, | |
| "grad_norm": 0.25737130641937256, | |
| "learning_rate": 0.00046930745015739766, | |
| "loss": 0.8751, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.0618813503769256, | |
| "grad_norm": 0.2699412703514099, | |
| "learning_rate": 0.00046926372857642536, | |
| "loss": 0.8999, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.06196875341418114, | |
| "grad_norm": 0.2957920730113983, | |
| "learning_rate": 0.000469220006995453, | |
| "loss": 0.9083, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.06205615645143669, | |
| "grad_norm": 0.2826875150203705, | |
| "learning_rate": 0.0004691762854144806, | |
| "loss": 0.946, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.062143559488692234, | |
| "grad_norm": 0.29016223549842834, | |
| "learning_rate": 0.00046913256383350824, | |
| "loss": 0.8126, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.06223096252594778, | |
| "grad_norm": 0.3504863679409027, | |
| "learning_rate": 0.00046908884225253583, | |
| "loss": 0.9127, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.06231836556320332, | |
| "grad_norm": 0.2627776861190796, | |
| "learning_rate": 0.00046904512067156353, | |
| "loss": 0.9476, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.062405768600458864, | |
| "grad_norm": 0.3002050220966339, | |
| "learning_rate": 0.0004690013990905911, | |
| "loss": 0.9444, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.06249317163771441, | |
| "grad_norm": 0.8539018630981445, | |
| "learning_rate": 0.00046895767750961876, | |
| "loss": 0.8977, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.06258057467496995, | |
| "grad_norm": 0.25260186195373535, | |
| "learning_rate": 0.0004689139559286464, | |
| "loss": 0.9615, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.0626679777122255, | |
| "grad_norm": 0.25615084171295166, | |
| "learning_rate": 0.000468870234347674, | |
| "loss": 0.8912, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.06275538074948105, | |
| "grad_norm": 0.3263600170612335, | |
| "learning_rate": 0.00046882651276670164, | |
| "loss": 0.843, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.06284278378673659, | |
| "grad_norm": 0.5694889426231384, | |
| "learning_rate": 0.0004687827911857293, | |
| "loss": 1.1624, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.06293018682399214, | |
| "grad_norm": 0.3248819410800934, | |
| "learning_rate": 0.00046873906960475693, | |
| "loss": 0.9452, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.06301758986124768, | |
| "grad_norm": 0.40857037901878357, | |
| "learning_rate": 0.0004686953480237845, | |
| "loss": 0.9117, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.06310499289850323, | |
| "grad_norm": 0.3211118280887604, | |
| "learning_rate": 0.00046865162644281217, | |
| "loss": 0.794, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.06319239593575877, | |
| "grad_norm": 0.32386934757232666, | |
| "learning_rate": 0.0004686079048618398, | |
| "loss": 1.2288, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.0632797989730143, | |
| "grad_norm": 0.3044579029083252, | |
| "learning_rate": 0.00046856418328086746, | |
| "loss": 0.9187, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.06336720201026985, | |
| "grad_norm": 0.6175875067710876, | |
| "learning_rate": 0.0004685204616998951, | |
| "loss": 0.8695, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.0634546050475254, | |
| "grad_norm": 0.7931004166603088, | |
| "learning_rate": 0.0004684767401189227, | |
| "loss": 1.3616, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.06354200808478094, | |
| "grad_norm": 0.337348997592926, | |
| "learning_rate": 0.00046843301853795034, | |
| "loss": 0.8654, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.06362941112203649, | |
| "grad_norm": 0.4152870178222656, | |
| "learning_rate": 0.000468389296956978, | |
| "loss": 1.2349, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.06371681415929203, | |
| "grad_norm": 0.3474035859107971, | |
| "learning_rate": 0.0004683455753760056, | |
| "loss": 0.9225, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.06380421719654758, | |
| "grad_norm": 0.35225990414619446, | |
| "learning_rate": 0.0004683018537950333, | |
| "loss": 0.9248, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06389162023380313, | |
| "grad_norm": 0.24920597672462463, | |
| "learning_rate": 0.00046825813221406087, | |
| "loss": 0.8138, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.06397902327105867, | |
| "grad_norm": 0.3522126376628876, | |
| "learning_rate": 0.0004682144106330885, | |
| "loss": 0.9314, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.06406642630831422, | |
| "grad_norm": 0.4510492980480194, | |
| "learning_rate": 0.0004681706890521161, | |
| "loss": 0.8733, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.06415382934556976, | |
| "grad_norm": 0.2538619935512543, | |
| "learning_rate": 0.00046812696747114375, | |
| "loss": 0.8893, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.06424123238282531, | |
| "grad_norm": 0.39753592014312744, | |
| "learning_rate": 0.0004680832458901714, | |
| "loss": 1.0493, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.06432863542008085, | |
| "grad_norm": 0.40073463320732117, | |
| "learning_rate": 0.00046803952430919904, | |
| "loss": 0.8895, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.06441603845733639, | |
| "grad_norm": 0.31110239028930664, | |
| "learning_rate": 0.0004679958027282267, | |
| "loss": 0.8689, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.06450344149459193, | |
| "grad_norm": 0.29956865310668945, | |
| "learning_rate": 0.0004679520811472543, | |
| "loss": 0.8385, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.06459084453184748, | |
| "grad_norm": 0.3735499382019043, | |
| "learning_rate": 0.0004679083595662819, | |
| "loss": 0.8552, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.06467824756910302, | |
| "grad_norm": 0.4668900966644287, | |
| "learning_rate": 0.0004678646379853095, | |
| "loss": 1.4957, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06476565060635857, | |
| "grad_norm": 0.363799512386322, | |
| "learning_rate": 0.0004678209164043372, | |
| "loss": 1.0365, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.06485305364361411, | |
| "grad_norm": 0.3261052668094635, | |
| "learning_rate": 0.00046777719482336486, | |
| "loss": 0.8972, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.06494045668086966, | |
| "grad_norm": 0.27814945578575134, | |
| "learning_rate": 0.00046773347324239245, | |
| "loss": 0.8051, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.0650278597181252, | |
| "grad_norm": 0.37245509028434753, | |
| "learning_rate": 0.0004676897516614201, | |
| "loss": 0.9421, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.06511526275538075, | |
| "grad_norm": 0.2978193163871765, | |
| "learning_rate": 0.0004676460300804477, | |
| "loss": 0.8464, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.0652026657926363, | |
| "grad_norm": 0.41827908158302307, | |
| "learning_rate": 0.00046760230849947533, | |
| "loss": 1.3154, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.06529006882989184, | |
| "grad_norm": 0.28153055906295776, | |
| "learning_rate": 0.000467558586918503, | |
| "loss": 0.812, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.06537747186714739, | |
| "grad_norm": 0.3568740487098694, | |
| "learning_rate": 0.0004675148653375306, | |
| "loss": 0.9333, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.06546487490440293, | |
| "grad_norm": 0.5805249810218811, | |
| "learning_rate": 0.00046747114375655826, | |
| "loss": 1.3821, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.06555227794165848, | |
| "grad_norm": 0.30053797364234924, | |
| "learning_rate": 0.00046742742217558585, | |
| "loss": 0.9358, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06563968097891401, | |
| "grad_norm": 0.3179711699485779, | |
| "learning_rate": 0.0004673837005946135, | |
| "loss": 0.9094, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.06572708401616956, | |
| "grad_norm": 0.2717473804950714, | |
| "learning_rate": 0.00046733997901364114, | |
| "loss": 0.7255, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.0658144870534251, | |
| "grad_norm": 0.24072229862213135, | |
| "learning_rate": 0.0004672962574326688, | |
| "loss": 1.1008, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.06590189009068065, | |
| "grad_norm": 0.3099074363708496, | |
| "learning_rate": 0.00046725253585169643, | |
| "loss": 0.8751, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.0659892931279362, | |
| "grad_norm": 0.31873032450675964, | |
| "learning_rate": 0.000467208814270724, | |
| "loss": 0.8932, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.06607669616519174, | |
| "grad_norm": 0.31468328833580017, | |
| "learning_rate": 0.00046716509268975167, | |
| "loss": 0.8792, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.06616409920244729, | |
| "grad_norm": 0.35658881068229675, | |
| "learning_rate": 0.00046712137110877926, | |
| "loss": 0.8955, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.06625150223970283, | |
| "grad_norm": 0.3107976019382477, | |
| "learning_rate": 0.00046707764952780696, | |
| "loss": 0.9174, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.06633890527695838, | |
| "grad_norm": 0.2277815192937851, | |
| "learning_rate": 0.0004670339279468346, | |
| "loss": 0.7611, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.06642630831421392, | |
| "grad_norm": 0.25561246275901794, | |
| "learning_rate": 0.0004669902063658622, | |
| "loss": 0.8041, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06651371135146947, | |
| "grad_norm": 0.2826947271823883, | |
| "learning_rate": 0.00046694648478488984, | |
| "loss": 0.7732, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.06660111438872501, | |
| "grad_norm": 0.2515583038330078, | |
| "learning_rate": 0.00046690276320391743, | |
| "loss": 1.0321, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.06668851742598056, | |
| "grad_norm": 0.26518338918685913, | |
| "learning_rate": 0.0004668590416229451, | |
| "loss": 1.1347, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.06677592046323609, | |
| "grad_norm": 0.2963607609272003, | |
| "learning_rate": 0.0004668153200419727, | |
| "loss": 0.9982, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.06686332350049164, | |
| "grad_norm": 0.2876517176628113, | |
| "learning_rate": 0.00046677159846100037, | |
| "loss": 0.6918, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.06695072653774718, | |
| "grad_norm": 0.3714672923088074, | |
| "learning_rate": 0.000466727876880028, | |
| "loss": 0.9023, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.06703812957500273, | |
| "grad_norm": 0.3568623960018158, | |
| "learning_rate": 0.0004666841552990556, | |
| "loss": 0.8378, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.06712553261225827, | |
| "grad_norm": 0.4770544469356537, | |
| "learning_rate": 0.00046664043371808325, | |
| "loss": 1.0266, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.06721293564951382, | |
| "grad_norm": 0.2760886549949646, | |
| "learning_rate": 0.0004665967121371109, | |
| "loss": 0.8276, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.06730033868676936, | |
| "grad_norm": 0.31360816955566406, | |
| "learning_rate": 0.00046655299055613854, | |
| "loss": 0.8646, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06738774172402491, | |
| "grad_norm": 0.3075156509876251, | |
| "learning_rate": 0.00046650926897516613, | |
| "loss": 1.1144, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.06747514476128046, | |
| "grad_norm": 0.3104390501976013, | |
| "learning_rate": 0.0004664655473941938, | |
| "loss": 0.8923, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.067562547798536, | |
| "grad_norm": 0.3964294493198395, | |
| "learning_rate": 0.0004664218258132214, | |
| "loss": 1.0969, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.06764995083579155, | |
| "grad_norm": 0.3698040843009949, | |
| "learning_rate": 0.000466378104232249, | |
| "loss": 0.9078, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.0677373538730471, | |
| "grad_norm": 0.28510838747024536, | |
| "learning_rate": 0.0004663343826512767, | |
| "loss": 1.0075, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.06782475691030264, | |
| "grad_norm": 0.25500908493995667, | |
| "learning_rate": 0.0004662906610703043, | |
| "loss": 0.8457, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.06791215994755818, | |
| "grad_norm": 0.27927708625793457, | |
| "learning_rate": 0.00046624693948933195, | |
| "loss": 1.01, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.06799956298481372, | |
| "grad_norm": 0.2683468461036682, | |
| "learning_rate": 0.0004662032179083596, | |
| "loss": 1.0491, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.06808696602206926, | |
| "grad_norm": 0.31843262910842896, | |
| "learning_rate": 0.0004661594963273872, | |
| "loss": 0.9467, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.06817436905932481, | |
| "grad_norm": 0.27564141154289246, | |
| "learning_rate": 0.0004661157747464149, | |
| "loss": 0.9487, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.06826177209658035, | |
| "grad_norm": 0.2407764047384262, | |
| "learning_rate": 0.00046607205316544247, | |
| "loss": 0.8939, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.0683491751338359, | |
| "grad_norm": 0.3025217652320862, | |
| "learning_rate": 0.0004660283315844701, | |
| "loss": 0.9859, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.06843657817109144, | |
| "grad_norm": 0.2979051470756531, | |
| "learning_rate": 0.0004659846100034977, | |
| "loss": 0.9136, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.06852398120834699, | |
| "grad_norm": 0.28788650035858154, | |
| "learning_rate": 0.00046594088842252535, | |
| "loss": 0.9734, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.06861138424560254, | |
| "grad_norm": 0.2947753667831421, | |
| "learning_rate": 0.000465897166841553, | |
| "loss": 0.735, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.06869878728285808, | |
| "grad_norm": 0.3203105032444, | |
| "learning_rate": 0.00046585344526058064, | |
| "loss": 0.8992, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.06878619032011363, | |
| "grad_norm": 0.2638401985168457, | |
| "learning_rate": 0.0004658097236796083, | |
| "loss": 0.8669, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.06887359335736917, | |
| "grad_norm": 0.26712629199028015, | |
| "learning_rate": 0.0004657660020986359, | |
| "loss": 0.9765, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.06896099639462472, | |
| "grad_norm": 0.4055823087692261, | |
| "learning_rate": 0.0004657222805176635, | |
| "loss": 0.8117, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.06904839943188026, | |
| "grad_norm": 0.2518852651119232, | |
| "learning_rate": 0.00046567855893669117, | |
| "loss": 0.9517, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.0691358024691358, | |
| "grad_norm": 0.27589836716651917, | |
| "learning_rate": 0.00046563483735571876, | |
| "loss": 0.7855, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.06922320550639134, | |
| "grad_norm": 0.2739314138889313, | |
| "learning_rate": 0.00046559111577474646, | |
| "loss": 0.8862, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.06931060854364689, | |
| "grad_norm": 0.3271756172180176, | |
| "learning_rate": 0.00046554739419377405, | |
| "loss": 1.2893, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.06939801158090243, | |
| "grad_norm": 0.27038949728012085, | |
| "learning_rate": 0.0004655036726128017, | |
| "loss": 0.8059, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.06948541461815798, | |
| "grad_norm": 0.2605447471141815, | |
| "learning_rate": 0.0004654599510318293, | |
| "loss": 0.8816, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.06957281765541352, | |
| "grad_norm": 0.2714409828186035, | |
| "learning_rate": 0.00046541622945085693, | |
| "loss": 0.9307, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.06966022069266907, | |
| "grad_norm": 0.2455201894044876, | |
| "learning_rate": 0.00046537250786988463, | |
| "loss": 0.8321, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.06974762372992462, | |
| "grad_norm": 0.29036253690719604, | |
| "learning_rate": 0.0004653287862889122, | |
| "loss": 0.8605, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.06983502676718016, | |
| "grad_norm": 0.24069538712501526, | |
| "learning_rate": 0.00046528506470793987, | |
| "loss": 1.0819, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.0699224298044357, | |
| "grad_norm": 0.254304975271225, | |
| "learning_rate": 0.00046524134312696746, | |
| "loss": 0.7388, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07000983284169125, | |
| "grad_norm": 0.27309149503707886, | |
| "learning_rate": 0.0004651976215459951, | |
| "loss": 0.7796, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.0700972358789468, | |
| "grad_norm": 0.26903948187828064, | |
| "learning_rate": 0.0004651538999650227, | |
| "loss": 1.0103, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.07018463891620234, | |
| "grad_norm": 0.2526533901691437, | |
| "learning_rate": 0.0004651101783840504, | |
| "loss": 0.8566, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.07027204195345789, | |
| "grad_norm": 0.2822379469871521, | |
| "learning_rate": 0.00046506645680307804, | |
| "loss": 0.9441, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.07035944499071342, | |
| "grad_norm": 0.27883851528167725, | |
| "learning_rate": 0.00046502273522210563, | |
| "loss": 0.9006, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.07044684802796897, | |
| "grad_norm": 0.23839306831359863, | |
| "learning_rate": 0.0004649790136411333, | |
| "loss": 0.8387, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.07053425106522451, | |
| "grad_norm": 0.2352200597524643, | |
| "learning_rate": 0.00046493529206016087, | |
| "loss": 0.8228, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.07062165410248006, | |
| "grad_norm": 0.31958913803100586, | |
| "learning_rate": 0.00046489157047918857, | |
| "loss": 1.0312, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.0707090571397356, | |
| "grad_norm": 0.286045640707016, | |
| "learning_rate": 0.0004648478488982162, | |
| "loss": 0.8427, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.07079646017699115, | |
| "grad_norm": 0.24101607501506805, | |
| "learning_rate": 0.0004648041273172438, | |
| "loss": 0.9986, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.0708838632142467, | |
| "grad_norm": 0.28324073553085327, | |
| "learning_rate": 0.00046476040573627145, | |
| "loss": 0.778, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.07097126625150224, | |
| "grad_norm": 0.30368572473526, | |
| "learning_rate": 0.00046471668415529904, | |
| "loss": 0.9543, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.07105866928875779, | |
| "grad_norm": 0.3159104585647583, | |
| "learning_rate": 0.0004646729625743267, | |
| "loss": 0.9481, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.07114607232601333, | |
| "grad_norm": 0.2856074869632721, | |
| "learning_rate": 0.00046462924099335433, | |
| "loss": 1.0117, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.07123347536326888, | |
| "grad_norm": 0.32605329155921936, | |
| "learning_rate": 0.00046458551941238197, | |
| "loss": 0.8451, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.07132087840052442, | |
| "grad_norm": 0.22008907794952393, | |
| "learning_rate": 0.0004645417978314096, | |
| "loss": 0.8965, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.07140828143777997, | |
| "grad_norm": 0.26317551732063293, | |
| "learning_rate": 0.0004644980762504372, | |
| "loss": 0.8644, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.0714956844750355, | |
| "grad_norm": 0.22049389779567719, | |
| "learning_rate": 0.00046445435466946485, | |
| "loss": 0.8144, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.07158308751229105, | |
| "grad_norm": 0.2786102890968323, | |
| "learning_rate": 0.00046441063308849244, | |
| "loss": 0.8841, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.07167049054954659, | |
| "grad_norm": 0.31796136498451233, | |
| "learning_rate": 0.00046436691150752014, | |
| "loss": 1.0665, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.07175789358680214, | |
| "grad_norm": 0.29958993196487427, | |
| "learning_rate": 0.0004643231899265478, | |
| "loss": 0.8789, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.07184529662405768, | |
| "grad_norm": 0.2706652283668518, | |
| "learning_rate": 0.0004642794683455754, | |
| "loss": 0.8721, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.07193269966131323, | |
| "grad_norm": 0.22537319362163544, | |
| "learning_rate": 0.000464235746764603, | |
| "loss": 0.9403, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.07202010269856877, | |
| "grad_norm": 0.34331005811691284, | |
| "learning_rate": 0.0004641920251836306, | |
| "loss": 1.1497, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.07210750573582432, | |
| "grad_norm": 0.25914907455444336, | |
| "learning_rate": 0.0004641483036026583, | |
| "loss": 1.1589, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.07219490877307987, | |
| "grad_norm": 0.2956130802631378, | |
| "learning_rate": 0.0004641045820216859, | |
| "loss": 0.8587, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.07228231181033541, | |
| "grad_norm": 0.30292391777038574, | |
| "learning_rate": 0.00046406086044071355, | |
| "loss": 0.9224, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.07236971484759096, | |
| "grad_norm": 0.3101223409175873, | |
| "learning_rate": 0.0004640171388597412, | |
| "loss": 0.9115, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.0724571178848465, | |
| "grad_norm": 0.2720979154109955, | |
| "learning_rate": 0.0004639734172787688, | |
| "loss": 0.8112, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.07254452092210205, | |
| "grad_norm": 0.2774461507797241, | |
| "learning_rate": 0.00046392969569779643, | |
| "loss": 0.9776, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.0726319239593576, | |
| "grad_norm": 0.25150200724601746, | |
| "learning_rate": 0.0004638859741168241, | |
| "loss": 1.0255, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.07271932699661313, | |
| "grad_norm": 0.2526938319206238, | |
| "learning_rate": 0.0004638422525358517, | |
| "loss": 0.7242, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.07280673003386867, | |
| "grad_norm": 0.29642441868782043, | |
| "learning_rate": 0.0004637985309548793, | |
| "loss": 1.0944, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.07289413307112422, | |
| "grad_norm": 0.250478595495224, | |
| "learning_rate": 0.00046375480937390696, | |
| "loss": 0.8324, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.07298153610837976, | |
| "grad_norm": 0.28843697905540466, | |
| "learning_rate": 0.0004637110877929346, | |
| "loss": 0.8646, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.07306893914563531, | |
| "grad_norm": 0.22244645655155182, | |
| "learning_rate": 0.00046366736621196225, | |
| "loss": 0.7966, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.07315634218289085, | |
| "grad_norm": 0.2418157458305359, | |
| "learning_rate": 0.0004636236446309899, | |
| "loss": 0.8101, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.0732437452201464, | |
| "grad_norm": 0.2781657874584198, | |
| "learning_rate": 0.0004635799230500175, | |
| "loss": 0.9902, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.07333114825740195, | |
| "grad_norm": 0.24249030649662018, | |
| "learning_rate": 0.00046353620146904513, | |
| "loss": 0.7445, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.07341855129465749, | |
| "grad_norm": 0.23980437219142914, | |
| "learning_rate": 0.0004634924798880728, | |
| "loss": 0.8168, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.07350595433191304, | |
| "grad_norm": 0.3362947106361389, | |
| "learning_rate": 0.00046344875830710037, | |
| "loss": 1.1176, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.07359335736916858, | |
| "grad_norm": 0.23380422592163086, | |
| "learning_rate": 0.00046340503672612807, | |
| "loss": 0.8311, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.07368076040642413, | |
| "grad_norm": 0.2908138632774353, | |
| "learning_rate": 0.00046336131514515566, | |
| "loss": 0.8315, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.07376816344367967, | |
| "grad_norm": 0.2556897699832916, | |
| "learning_rate": 0.0004633175935641833, | |
| "loss": 0.939, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.0738555664809352, | |
| "grad_norm": 0.3416728079319, | |
| "learning_rate": 0.0004632738719832109, | |
| "loss": 0.746, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.07394296951819075, | |
| "grad_norm": 0.2219434678554535, | |
| "learning_rate": 0.00046323015040223854, | |
| "loss": 1.0259, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.0740303725554463, | |
| "grad_norm": 0.3327368497848511, | |
| "learning_rate": 0.0004631864288212662, | |
| "loss": 1.4831, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.07411777559270184, | |
| "grad_norm": 0.28128185868263245, | |
| "learning_rate": 0.00046314270724029383, | |
| "loss": 0.9478, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.07420517862995739, | |
| "grad_norm": 0.29582032561302185, | |
| "learning_rate": 0.00046309898565932147, | |
| "loss": 0.9397, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.07429258166721293, | |
| "grad_norm": 0.26146262884140015, | |
| "learning_rate": 0.00046305526407834906, | |
| "loss": 0.6904, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07437998470446848, | |
| "grad_norm": 0.3188638389110565, | |
| "learning_rate": 0.0004630115424973767, | |
| "loss": 0.7268, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.07446738774172403, | |
| "grad_norm": 0.2691085934638977, | |
| "learning_rate": 0.0004629678209164043, | |
| "loss": 0.7836, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.07455479077897957, | |
| "grad_norm": 0.2730037569999695, | |
| "learning_rate": 0.000462924099335432, | |
| "loss": 0.8207, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.07464219381623512, | |
| "grad_norm": 0.23849952220916748, | |
| "learning_rate": 0.00046288037775445964, | |
| "loss": 0.9859, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.07472959685349066, | |
| "grad_norm": 0.24940194189548492, | |
| "learning_rate": 0.00046283665617348723, | |
| "loss": 0.7821, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.07481699989074621, | |
| "grad_norm": 0.23495396971702576, | |
| "learning_rate": 0.0004627929345925149, | |
| "loss": 0.8847, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.07490440292800175, | |
| "grad_norm": 0.25201091170310974, | |
| "learning_rate": 0.00046274921301154247, | |
| "loss": 0.8386, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.0749918059652573, | |
| "grad_norm": 0.25054988265037537, | |
| "learning_rate": 0.0004627054914305701, | |
| "loss": 0.9939, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.07507920900251283, | |
| "grad_norm": 0.39931726455688477, | |
| "learning_rate": 0.0004626617698495978, | |
| "loss": 1.1039, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.07516661203976838, | |
| "grad_norm": 0.2789982855319977, | |
| "learning_rate": 0.0004626180482686254, | |
| "loss": 1.1707, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.07525401507702392, | |
| "grad_norm": 0.282528817653656, | |
| "learning_rate": 0.00046257432668765305, | |
| "loss": 0.8738, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.07534141811427947, | |
| "grad_norm": 0.2707865536212921, | |
| "learning_rate": 0.00046253060510668064, | |
| "loss": 0.832, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.07542882115153501, | |
| "grad_norm": 0.19732601940631866, | |
| "learning_rate": 0.0004624868835257083, | |
| "loss": 0.8948, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.07551622418879056, | |
| "grad_norm": 0.2605394721031189, | |
| "learning_rate": 0.00046244316194473593, | |
| "loss": 0.7346, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.0756036272260461, | |
| "grad_norm": 0.26202288269996643, | |
| "learning_rate": 0.0004623994403637636, | |
| "loss": 0.8521, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.07569103026330165, | |
| "grad_norm": 0.3473947048187256, | |
| "learning_rate": 0.0004623557187827912, | |
| "loss": 1.043, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.0757784333005572, | |
| "grad_norm": 0.7824636697769165, | |
| "learning_rate": 0.0004623119972018188, | |
| "loss": 1.2121, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.07586583633781274, | |
| "grad_norm": 0.26076897978782654, | |
| "learning_rate": 0.00046226827562084646, | |
| "loss": 0.8669, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.07595323937506829, | |
| "grad_norm": 0.3360956013202667, | |
| "learning_rate": 0.00046222455403987405, | |
| "loss": 0.8806, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.07604064241232383, | |
| "grad_norm": 0.27572354674339294, | |
| "learning_rate": 0.00046218083245890175, | |
| "loss": 0.8105, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.07612804544957938, | |
| "grad_norm": 0.22802734375, | |
| "learning_rate": 0.0004621371108779294, | |
| "loss": 0.6879, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.07621544848683491, | |
| "grad_norm": 0.31544265151023865, | |
| "learning_rate": 0.000462093389296957, | |
| "loss": 0.835, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.07630285152409046, | |
| "grad_norm": 0.3530902564525604, | |
| "learning_rate": 0.00046204966771598463, | |
| "loss": 0.7543, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.076390254561346, | |
| "grad_norm": 0.28108978271484375, | |
| "learning_rate": 0.0004620059461350122, | |
| "loss": 0.9433, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.07647765759860155, | |
| "grad_norm": 0.2918491065502167, | |
| "learning_rate": 0.00046196222455403987, | |
| "loss": 0.9016, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.0765650606358571, | |
| "grad_norm": 0.3130475580692291, | |
| "learning_rate": 0.0004619185029730675, | |
| "loss": 0.8612, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.07665246367311264, | |
| "grad_norm": 0.2697352468967438, | |
| "learning_rate": 0.00046187478139209516, | |
| "loss": 1.0324, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.07673986671036818, | |
| "grad_norm": 0.3534733057022095, | |
| "learning_rate": 0.0004618310598111228, | |
| "loss": 0.7769, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.07682726974762373, | |
| "grad_norm": 0.46239951252937317, | |
| "learning_rate": 0.0004617873382301504, | |
| "loss": 0.8155, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.07691467278487928, | |
| "grad_norm": 0.2869885265827179, | |
| "learning_rate": 0.00046174361664917804, | |
| "loss": 0.8088, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07700207582213482, | |
| "grad_norm": 0.544746458530426, | |
| "learning_rate": 0.0004616998950682057, | |
| "loss": 1.0332, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.07708947885939037, | |
| "grad_norm": 0.28001531958580017, | |
| "learning_rate": 0.0004616561734872333, | |
| "loss": 0.8363, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.07717688189664591, | |
| "grad_norm": 0.244185671210289, | |
| "learning_rate": 0.0004616124519062609, | |
| "loss": 0.8611, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.07726428493390146, | |
| "grad_norm": 0.3561322093009949, | |
| "learning_rate": 0.00046156873032528856, | |
| "loss": 0.9298, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.077351687971157, | |
| "grad_norm": 0.2852579355239868, | |
| "learning_rate": 0.0004615250087443162, | |
| "loss": 0.9415, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.07743909100841254, | |
| "grad_norm": 0.3458700180053711, | |
| "learning_rate": 0.0004614812871633438, | |
| "loss": 0.7855, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.07752649404566808, | |
| "grad_norm": 0.33211758732795715, | |
| "learning_rate": 0.0004614375655823715, | |
| "loss": 0.7652, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.07761389708292363, | |
| "grad_norm": 0.2643268406391144, | |
| "learning_rate": 0.0004613938440013991, | |
| "loss": 0.813, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.07770130012017917, | |
| "grad_norm": 0.26717138290405273, | |
| "learning_rate": 0.00046135012242042673, | |
| "loss": 0.673, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.07778870315743472, | |
| "grad_norm": 0.2716834843158722, | |
| "learning_rate": 0.0004613064008394544, | |
| "loss": 1.0343, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.07787610619469026, | |
| "grad_norm": 0.4963998794555664, | |
| "learning_rate": 0.00046126267925848197, | |
| "loss": 1.3856, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.07796350923194581, | |
| "grad_norm": 0.3124493360519409, | |
| "learning_rate": 0.00046121895767750967, | |
| "loss": 1.0451, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.07805091226920136, | |
| "grad_norm": 0.5837683081626892, | |
| "learning_rate": 0.00046117523609653726, | |
| "loss": 1.0501, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.0781383153064569, | |
| "grad_norm": 0.31839168071746826, | |
| "learning_rate": 0.0004611315145155649, | |
| "loss": 0.9903, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.07822571834371245, | |
| "grad_norm": 0.5437602996826172, | |
| "learning_rate": 0.0004610877929345925, | |
| "loss": 1.0399, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.07831312138096799, | |
| "grad_norm": 0.3862234354019165, | |
| "learning_rate": 0.00046104407135362014, | |
| "loss": 1.0355, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.07840052441822354, | |
| "grad_norm": 0.7273140549659729, | |
| "learning_rate": 0.0004610003497726478, | |
| "loss": 0.9339, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.07848792745547908, | |
| "grad_norm": 0.31776732206344604, | |
| "learning_rate": 0.00046095662819167543, | |
| "loss": 1.405, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.07857533049273462, | |
| "grad_norm": 0.33975592255592346, | |
| "learning_rate": 0.0004609129066107031, | |
| "loss": 0.9493, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.07866273352999016, | |
| "grad_norm": 0.3096635937690735, | |
| "learning_rate": 0.00046086918502973067, | |
| "loss": 0.8949, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.07875013656724571, | |
| "grad_norm": 0.22939470410346985, | |
| "learning_rate": 0.0004608254634487583, | |
| "loss": 1.0486, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.07883753960450125, | |
| "grad_norm": 0.27594518661499023, | |
| "learning_rate": 0.0004607817418677859, | |
| "loss": 0.7005, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.0789249426417568, | |
| "grad_norm": 0.38164445757865906, | |
| "learning_rate": 0.0004607380202868136, | |
| "loss": 1.2305, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.07901234567901234, | |
| "grad_norm": 0.26803824305534363, | |
| "learning_rate": 0.00046069429870584125, | |
| "loss": 0.824, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.07909974871626789, | |
| "grad_norm": 0.3049018085002899, | |
| "learning_rate": 0.00046065057712486884, | |
| "loss": 0.8824, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.07918715175352344, | |
| "grad_norm": 0.30478763580322266, | |
| "learning_rate": 0.0004606068555438965, | |
| "loss": 0.9809, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.07927455479077898, | |
| "grad_norm": 0.276212602853775, | |
| "learning_rate": 0.0004605631339629241, | |
| "loss": 0.8166, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.07936195782803453, | |
| "grad_norm": 0.8416312336921692, | |
| "learning_rate": 0.0004605194123819517, | |
| "loss": 1.5118, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.07944936086529007, | |
| "grad_norm": 0.3249102532863617, | |
| "learning_rate": 0.0004604756908009794, | |
| "loss": 0.905, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.07953676390254562, | |
| "grad_norm": 0.3695957064628601, | |
| "learning_rate": 0.000460431969220007, | |
| "loss": 0.809, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07962416693980116, | |
| "grad_norm": 0.2533642649650574, | |
| "learning_rate": 0.00046038824763903466, | |
| "loss": 0.8706, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.07971156997705671, | |
| "grad_norm": 1.895600438117981, | |
| "learning_rate": 0.00046034452605806225, | |
| "loss": 0.906, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.07979897301431224, | |
| "grad_norm": 0.3041301369667053, | |
| "learning_rate": 0.0004603008044770899, | |
| "loss": 0.8028, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.07988637605156779, | |
| "grad_norm": 0.39580902457237244, | |
| "learning_rate": 0.0004602570828961175, | |
| "loss": 0.8785, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.07997377908882333, | |
| "grad_norm": 0.3260571360588074, | |
| "learning_rate": 0.0004602133613151452, | |
| "loss": 0.908, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.08006118212607888, | |
| "grad_norm": 0.3628925681114197, | |
| "learning_rate": 0.0004601696397341728, | |
| "loss": 0.8364, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.08014858516333442, | |
| "grad_norm": 0.4076823890209198, | |
| "learning_rate": 0.0004601259181532004, | |
| "loss": 1.93, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.08023598820058997, | |
| "grad_norm": 0.6916859149932861, | |
| "learning_rate": 0.00046008219657222806, | |
| "loss": 1.1446, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.08032339123784552, | |
| "grad_norm": 1.301007866859436, | |
| "learning_rate": 0.00046003847499125565, | |
| "loss": 1.117, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.08041079427510106, | |
| "grad_norm": 2.9351885318756104, | |
| "learning_rate": 0.00045999475341028335, | |
| "loss": 1.8147, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.0804981973123566, | |
| "grad_norm": 3.5363566875457764, | |
| "learning_rate": 0.000459951031829311, | |
| "loss": 1.4487, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.08058560034961215, | |
| "grad_norm": 1.0070669651031494, | |
| "learning_rate": 0.0004599073102483386, | |
| "loss": 0.9901, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.0806730033868677, | |
| "grad_norm": 0.42096540331840515, | |
| "learning_rate": 0.00045986358866736623, | |
| "loss": 0.8757, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.08076040642412324, | |
| "grad_norm": 0.7990926504135132, | |
| "learning_rate": 0.0004598198670863938, | |
| "loss": 1.1409, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.08084780946137879, | |
| "grad_norm": 0.6880809664726257, | |
| "learning_rate": 0.00045977614550542147, | |
| "loss": 0.9678, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.08093521249863432, | |
| "grad_norm": 0.7126320004463196, | |
| "learning_rate": 0.0004597324239244491, | |
| "loss": 0.8932, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.08102261553588987, | |
| "grad_norm": 1.2712117433547974, | |
| "learning_rate": 0.00045968870234347676, | |
| "loss": 1.7774, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.08111001857314541, | |
| "grad_norm": 1.9836965799331665, | |
| "learning_rate": 0.0004596449807625044, | |
| "loss": 1.1419, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.08119742161040096, | |
| "grad_norm": 0.6894294023513794, | |
| "learning_rate": 0.000459601259181532, | |
| "loss": 0.9666, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.0812848246476565, | |
| "grad_norm": 2.2530252933502197, | |
| "learning_rate": 0.00045955753760055964, | |
| "loss": 1.5093, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.08137222768491205, | |
| "grad_norm": 14.37427043914795, | |
| "learning_rate": 0.0004595138160195873, | |
| "loss": 1.3134, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.0814596307221676, | |
| "grad_norm": 3.392730236053467, | |
| "learning_rate": 0.00045947009443861493, | |
| "loss": 1.0883, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.08154703375942314, | |
| "grad_norm": 1.097122073173523, | |
| "learning_rate": 0.0004594263728576425, | |
| "loss": 1.0587, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.08163443679667869, | |
| "grad_norm": 0.7270208597183228, | |
| "learning_rate": 0.00045938265127667017, | |
| "loss": 1.1386, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.08172183983393423, | |
| "grad_norm": 3.5602266788482666, | |
| "learning_rate": 0.0004593389296956978, | |
| "loss": 1.1204, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.08180924287118978, | |
| "grad_norm": 1.953038215637207, | |
| "learning_rate": 0.0004592952081147254, | |
| "loss": 1.2367, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.08189664590844532, | |
| "grad_norm": 1.90444016456604, | |
| "learning_rate": 0.0004592514865337531, | |
| "loss": 1.1981, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.08198404894570087, | |
| "grad_norm": 9.526935577392578, | |
| "learning_rate": 0.0004592077649527807, | |
| "loss": 1.4363, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.08207145198295641, | |
| "grad_norm": 5.361575603485107, | |
| "learning_rate": 0.00045916404337180834, | |
| "loss": 1.4758, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.08215885502021195, | |
| "grad_norm": 49.836151123046875, | |
| "learning_rate": 0.000459120321790836, | |
| "loss": 3.2272, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.08224625805746749, | |
| "grad_norm": 6.1282877922058105, | |
| "learning_rate": 0.0004590766002098636, | |
| "loss": 2.0861, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.08233366109472304, | |
| "grad_norm": 9.320550918579102, | |
| "learning_rate": 0.0004590328786288912, | |
| "loss": 2.0217, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.08242106413197858, | |
| "grad_norm": 3.1131937503814697, | |
| "learning_rate": 0.00045898915704791887, | |
| "loss": 1.4848, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.08250846716923413, | |
| "grad_norm": 51.67763137817383, | |
| "learning_rate": 0.0004589454354669465, | |
| "loss": 3.2458, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.08259587020648967, | |
| "grad_norm": 7.247336387634277, | |
| "learning_rate": 0.0004589017138859741, | |
| "loss": 2.6957, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.08268327324374522, | |
| "grad_norm": 3.2208497524261475, | |
| "learning_rate": 0.00045885799230500175, | |
| "loss": 1.9059, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.08277067628100077, | |
| "grad_norm": 78.9037094116211, | |
| "learning_rate": 0.0004588142707240294, | |
| "loss": 5.5682, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.08285807931825631, | |
| "grad_norm": 4.832467079162598, | |
| "learning_rate": 0.00045877054914305704, | |
| "loss": 1.6731, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.08294548235551186, | |
| "grad_norm": 7.1308674812316895, | |
| "learning_rate": 0.0004587268275620847, | |
| "loss": 2.2772, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.0830328853927674, | |
| "grad_norm": 4.155465126037598, | |
| "learning_rate": 0.00045868310598111227, | |
| "loss": 2.2794, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08312028843002295, | |
| "grad_norm": 51.88750457763672, | |
| "learning_rate": 0.0004586393844001399, | |
| "loss": 4.0774, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.0832076914672785, | |
| "grad_norm": 2.969212532043457, | |
| "learning_rate": 0.00045859566281916756, | |
| "loss": 1.9225, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.08329509450453403, | |
| "grad_norm": 3.454350233078003, | |
| "learning_rate": 0.00045855194123819515, | |
| "loss": 1.6258, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.08338249754178957, | |
| "grad_norm": 46.18666458129883, | |
| "learning_rate": 0.00045850821965722285, | |
| "loss": 1.7273, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.08346990057904512, | |
| "grad_norm": 13.307456016540527, | |
| "learning_rate": 0.00045846449807625044, | |
| "loss": 2.1933, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.08355730361630066, | |
| "grad_norm": 8.283126831054688, | |
| "learning_rate": 0.0004584207764952781, | |
| "loss": 2.499, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.08364470665355621, | |
| "grad_norm": 6.291905403137207, | |
| "learning_rate": 0.0004583770549143057, | |
| "loss": 1.8399, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.08373210969081175, | |
| "grad_norm": 19.28121566772461, | |
| "learning_rate": 0.0004583333333333333, | |
| "loss": 2.6815, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.0838195127280673, | |
| "grad_norm": 9.661205291748047, | |
| "learning_rate": 0.000458289611752361, | |
| "loss": 2.3274, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.08390691576532285, | |
| "grad_norm": 15.012873649597168, | |
| "learning_rate": 0.0004582458901713886, | |
| "loss": 2.1736, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.08399431880257839, | |
| "grad_norm": 10.02956485748291, | |
| "learning_rate": 0.00045820216859041626, | |
| "loss": 2.4168, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.08408172183983394, | |
| "grad_norm": 2.234221935272217, | |
| "learning_rate": 0.00045815844700944385, | |
| "loss": 1.7808, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.08416912487708948, | |
| "grad_norm": 7.04872989654541, | |
| "learning_rate": 0.0004581147254284715, | |
| "loss": 2.1456, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.08425652791434503, | |
| "grad_norm": 3.498042106628418, | |
| "learning_rate": 0.0004580710038474991, | |
| "loss": 1.6212, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.08434393095160057, | |
| "grad_norm": 2.731658935546875, | |
| "learning_rate": 0.0004580272822665268, | |
| "loss": 1.6905, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.08443133398885612, | |
| "grad_norm": 4.867488384246826, | |
| "learning_rate": 0.00045798356068555443, | |
| "loss": 1.4945, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.08451873702611165, | |
| "grad_norm": 10.225361824035645, | |
| "learning_rate": 0.000457939839104582, | |
| "loss": 2.4163, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.0846061400633672, | |
| "grad_norm": 2.749767780303955, | |
| "learning_rate": 0.00045789611752360967, | |
| "loss": 1.49, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.08469354310062274, | |
| "grad_norm": 14.945262908935547, | |
| "learning_rate": 0.00045785239594263726, | |
| "loss": 2.4579, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.08478094613787829, | |
| "grad_norm": 4.0551228523254395, | |
| "learning_rate": 0.0004578086743616649, | |
| "loss": 1.6358, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.08486834917513383, | |
| "grad_norm": 2.8462789058685303, | |
| "learning_rate": 0.0004577649527806926, | |
| "loss": 1.6568, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.08495575221238938, | |
| "grad_norm": 3.82456111907959, | |
| "learning_rate": 0.0004577212311997202, | |
| "loss": 1.696, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.08504315524964493, | |
| "grad_norm": 2.9463558197021484, | |
| "learning_rate": 0.00045767750961874784, | |
| "loss": 1.8359, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.08513055828690047, | |
| "grad_norm": 2.811894416809082, | |
| "learning_rate": 0.00045763378803777543, | |
| "loss": 1.369, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.08521796132415602, | |
| "grad_norm": 2.092231512069702, | |
| "learning_rate": 0.0004575900664568031, | |
| "loss": 1.5433, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.08530536436141156, | |
| "grad_norm": 4.028072357177734, | |
| "learning_rate": 0.0004575463448758307, | |
| "loss": 2.4999, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.08539276739866711, | |
| "grad_norm": 10.593165397644043, | |
| "learning_rate": 0.00045750262329485836, | |
| "loss": 1.5753, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.08548017043592265, | |
| "grad_norm": 6.811407089233398, | |
| "learning_rate": 0.000457458901713886, | |
| "loss": 1.7268, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.0855675734731782, | |
| "grad_norm": 2.3520467281341553, | |
| "learning_rate": 0.0004574151801329136, | |
| "loss": 1.4044, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.08565497651043373, | |
| "grad_norm": 3.668078660964966, | |
| "learning_rate": 0.00045737145855194125, | |
| "loss": 1.718, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.08574237954768928, | |
| "grad_norm": 10.229111671447754, | |
| "learning_rate": 0.00045732773697096884, | |
| "loss": 1.7006, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.08582978258494482, | |
| "grad_norm": 5.428765773773193, | |
| "learning_rate": 0.00045728401538999654, | |
| "loss": 2.2021, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.08591718562220037, | |
| "grad_norm": 2.0686569213867188, | |
| "learning_rate": 0.0004572402938090242, | |
| "loss": 1.687, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.08600458865945591, | |
| "grad_norm": 2.371243715286255, | |
| "learning_rate": 0.00045719657222805177, | |
| "loss": 1.6734, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.08609199169671146, | |
| "grad_norm": 1.6429576873779297, | |
| "learning_rate": 0.0004571528506470794, | |
| "loss": 1.8382, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.086179394733967, | |
| "grad_norm": 2.408743381500244, | |
| "learning_rate": 0.000457109129066107, | |
| "loss": 1.45, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.08626679777122255, | |
| "grad_norm": 4.068368434906006, | |
| "learning_rate": 0.0004570654074851347, | |
| "loss": 1.7464, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.0863542008084781, | |
| "grad_norm": 1.9330801963806152, | |
| "learning_rate": 0.0004570216859041623, | |
| "loss": 1.6335, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.08644160384573364, | |
| "grad_norm": 4.200726509094238, | |
| "learning_rate": 0.00045697796432318994, | |
| "loss": 1.6781, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.08652900688298919, | |
| "grad_norm": 4.335032939910889, | |
| "learning_rate": 0.0004569342427422176, | |
| "loss": 1.7382, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08661640992024473, | |
| "grad_norm": 2.2428669929504395, | |
| "learning_rate": 0.0004568905211612452, | |
| "loss": 1.4791, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.08670381295750028, | |
| "grad_norm": 2.2247121334075928, | |
| "learning_rate": 0.0004568467995802728, | |
| "loss": 1.8668, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.08679121599475582, | |
| "grad_norm": 2.013319492340088, | |
| "learning_rate": 0.00045680307799930047, | |
| "loss": 1.4925, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.08687861903201136, | |
| "grad_norm": 1.5773614645004272, | |
| "learning_rate": 0.0004567593564183281, | |
| "loss": 1.3334, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.0869660220692669, | |
| "grad_norm": 1.1663486957550049, | |
| "learning_rate": 0.0004567156348373557, | |
| "loss": 1.5022, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.08705342510652245, | |
| "grad_norm": 1.763238549232483, | |
| "learning_rate": 0.00045667191325638335, | |
| "loss": 1.5118, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.08714082814377799, | |
| "grad_norm": 1.4888843297958374, | |
| "learning_rate": 0.000456628191675411, | |
| "loss": 1.6713, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.08722823118103354, | |
| "grad_norm": 2.5363516807556152, | |
| "learning_rate": 0.0004565844700944386, | |
| "loss": 1.4999, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.08731563421828908, | |
| "grad_norm": 2.134773015975952, | |
| "learning_rate": 0.0004565407485134663, | |
| "loss": 1.5086, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.08740303725554463, | |
| "grad_norm": 15.75776481628418, | |
| "learning_rate": 0.0004564970269324939, | |
| "loss": 2.11, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 11441, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.60783873359872e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |