| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2144, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0009328358208955224, |
| "grad_norm": 3.504828120771814, |
| "learning_rate": 9.999994632276776e-06, |
| "loss": 0.3261, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0018656716417910447, |
| "grad_norm": 2.3615723232353814, |
| "learning_rate": 9.999978529118625e-06, |
| "loss": 0.2777, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002798507462686567, |
| "grad_norm": 5.916454919666803, |
| "learning_rate": 9.999951690560122e-06, |
| "loss": 0.2836, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0037313432835820895, |
| "grad_norm": 1.8873931288294354, |
| "learning_rate": 9.999914116658896e-06, |
| "loss": 0.2506, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0046641791044776115, |
| "grad_norm": 2.2131303334001577, |
| "learning_rate": 9.999865807495616e-06, |
| "loss": 0.3082, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.005597014925373134, |
| "grad_norm": 1.9071229748203362, |
| "learning_rate": 9.999806763174009e-06, |
| "loss": 0.2879, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0065298507462686565, |
| "grad_norm": 1.705352427822773, |
| "learning_rate": 9.999736983820849e-06, |
| "loss": 0.292, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.007462686567164179, |
| "grad_norm": 1.713988508392442, |
| "learning_rate": 9.999656469585957e-06, |
| "loss": 0.27, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.008395522388059701, |
| "grad_norm": 1.7146113640225777, |
| "learning_rate": 9.999565220642204e-06, |
| "loss": 0.273, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.009328358208955223, |
| "grad_norm": 1.9717667837041324, |
| "learning_rate": 9.999463237185512e-06, |
| "loss": 0.2698, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.010261194029850746, |
| "grad_norm": 1.6944388445330036, |
| "learning_rate": 9.999350519434845e-06, |
| "loss": 0.2598, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.011194029850746268, |
| "grad_norm": 1.5056746742653715, |
| "learning_rate": 9.999227067632223e-06, |
| "loss": 0.2145, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.012126865671641791, |
| "grad_norm": 1.6049599181993224, |
| "learning_rate": 9.999092882042703e-06, |
| "loss": 0.2638, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.013059701492537313, |
| "grad_norm": 1.598791504158011, |
| "learning_rate": 9.998947962954395e-06, |
| "loss": 0.296, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.013992537313432836, |
| "grad_norm": 1.4737350024393174, |
| "learning_rate": 9.998792310678454e-06, |
| "loss": 0.2599, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.014925373134328358, |
| "grad_norm": 1.6209751246114812, |
| "learning_rate": 9.99862592554908e-06, |
| "loss": 0.2262, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01585820895522388, |
| "grad_norm": 1.5381445660374602, |
| "learning_rate": 9.998448807923517e-06, |
| "loss": 0.291, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.016791044776119403, |
| "grad_norm": 1.5243144687560854, |
| "learning_rate": 9.998260958182048e-06, |
| "loss": 0.2357, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.017723880597014924, |
| "grad_norm": 1.6139229688930536, |
| "learning_rate": 9.99806237672801e-06, |
| "loss": 0.2643, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.018656716417910446, |
| "grad_norm": 1.6217246024693246, |
| "learning_rate": 9.997853063987768e-06, |
| "loss": 0.2965, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01958955223880597, |
| "grad_norm": 1.6080924641938708, |
| "learning_rate": 9.997633020410742e-06, |
| "loss": 0.2757, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.020522388059701493, |
| "grad_norm": 1.5781983576612648, |
| "learning_rate": 9.997402246469382e-06, |
| "loss": 0.2598, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.021455223880597014, |
| "grad_norm": 1.5222547775408568, |
| "learning_rate": 9.997160742659176e-06, |
| "loss": 0.2608, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.022388059701492536, |
| "grad_norm": 1.4640943343714499, |
| "learning_rate": 9.996908509498662e-06, |
| "loss": 0.2431, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02332089552238806, |
| "grad_norm": 1.5871771632413834, |
| "learning_rate": 9.996645547529402e-06, |
| "loss": 0.3371, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.024253731343283583, |
| "grad_norm": 1.5463201473320933, |
| "learning_rate": 9.996371857316e-06, |
| "loss": 0.234, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.025186567164179104, |
| "grad_norm": 1.4718472647196803, |
| "learning_rate": 9.996087439446094e-06, |
| "loss": 0.2645, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.026119402985074626, |
| "grad_norm": 1.4683539374199128, |
| "learning_rate": 9.995792294530356e-06, |
| "loss": 0.2496, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.027052238805970148, |
| "grad_norm": 1.5295384301190804, |
| "learning_rate": 9.995486423202485e-06, |
| "loss": 0.2658, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.027985074626865673, |
| "grad_norm": 1.5622767261246613, |
| "learning_rate": 9.995169826119215e-06, |
| "loss": 0.2305, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.028917910447761194, |
| "grad_norm": 1.4856190072614097, |
| "learning_rate": 9.994842503960308e-06, |
| "loss": 0.2223, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.029850746268656716, |
| "grad_norm": 1.5394365920327815, |
| "learning_rate": 9.994504457428557e-06, |
| "loss": 0.2478, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.030783582089552237, |
| "grad_norm": 1.462578205444393, |
| "learning_rate": 9.994155687249775e-06, |
| "loss": 0.2205, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03171641791044776, |
| "grad_norm": 1.52001491082532, |
| "learning_rate": 9.993796194172806e-06, |
| "loss": 0.2576, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03264925373134328, |
| "grad_norm": 1.5000699624498415, |
| "learning_rate": 9.993425978969508e-06, |
| "loss": 0.2344, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.033582089552238806, |
| "grad_norm": 1.4030942155270072, |
| "learning_rate": 9.993045042434772e-06, |
| "loss": 0.2137, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03451492537313433, |
| "grad_norm": 1.4902543532941115, |
| "learning_rate": 9.9926533853865e-06, |
| "loss": 0.2625, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.03544776119402985, |
| "grad_norm": 1.469358131239095, |
| "learning_rate": 9.992251008665613e-06, |
| "loss": 0.2191, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.036380597014925374, |
| "grad_norm": 1.5244188812937278, |
| "learning_rate": 9.991837913136053e-06, |
| "loss": 0.2598, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.03731343283582089, |
| "grad_norm": 1.6528109851134296, |
| "learning_rate": 9.99141409968477e-06, |
| "loss": 0.3381, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03824626865671642, |
| "grad_norm": 1.6045526459467032, |
| "learning_rate": 9.990979569221733e-06, |
| "loss": 0.2637, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03917910447761194, |
| "grad_norm": 1.4764228467945528, |
| "learning_rate": 9.990534322679915e-06, |
| "loss": 0.2359, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04011194029850746, |
| "grad_norm": 1.471552235019458, |
| "learning_rate": 9.9900783610153e-06, |
| "loss": 0.2603, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.041044776119402986, |
| "grad_norm": 1.4939545950094606, |
| "learning_rate": 9.989611685206881e-06, |
| "loss": 0.2442, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04197761194029851, |
| "grad_norm": 1.5330545678463738, |
| "learning_rate": 9.989134296256648e-06, |
| "loss": 0.2168, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04291044776119403, |
| "grad_norm": 1.5722324426590966, |
| "learning_rate": 9.988646195189604e-06, |
| "loss": 0.2524, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.043843283582089554, |
| "grad_norm": 1.6673648587933672, |
| "learning_rate": 9.988147383053739e-06, |
| "loss": 0.279, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04477611940298507, |
| "grad_norm": 1.6316110614165935, |
| "learning_rate": 9.987637860920053e-06, |
| "loss": 0.277, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0457089552238806, |
| "grad_norm": 1.3017219161555913, |
| "learning_rate": 9.98711762988253e-06, |
| "loss": 0.2087, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.04664179104477612, |
| "grad_norm": 1.6585007240946634, |
| "learning_rate": 9.986586691058156e-06, |
| "loss": 0.2402, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04757462686567164, |
| "grad_norm": 1.435129267578802, |
| "learning_rate": 9.986045045586904e-06, |
| "loss": 0.2258, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.048507462686567165, |
| "grad_norm": 1.3641626482919518, |
| "learning_rate": 9.985492694631733e-06, |
| "loss": 0.2103, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.049440298507462684, |
| "grad_norm": 1.5007160160488142, |
| "learning_rate": 9.984929639378593e-06, |
| "loss": 0.2694, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05037313432835821, |
| "grad_norm": 1.509449093302392, |
| "learning_rate": 9.98435588103641e-06, |
| "loss": 0.2931, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.051305970149253734, |
| "grad_norm": 1.4629541603512073, |
| "learning_rate": 9.983771420837098e-06, |
| "loss": 0.2485, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.05223880597014925, |
| "grad_norm": 1.3311013739415256, |
| "learning_rate": 9.983176260035546e-06, |
| "loss": 0.2031, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.05317164179104478, |
| "grad_norm": 1.5115451257421706, |
| "learning_rate": 9.982570399909612e-06, |
| "loss": 0.2504, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.054104477611940295, |
| "grad_norm": 1.3579306876167139, |
| "learning_rate": 9.981953841760137e-06, |
| "loss": 0.2289, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.05503731343283582, |
| "grad_norm": 1.436056964181841, |
| "learning_rate": 9.981326586910921e-06, |
| "loss": 0.2212, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.055970149253731345, |
| "grad_norm": 1.4863669227109872, |
| "learning_rate": 9.980688636708744e-06, |
| "loss": 0.2077, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05690298507462686, |
| "grad_norm": 1.6159740773767797, |
| "learning_rate": 9.980039992523335e-06, |
| "loss": 0.2353, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.05783582089552239, |
| "grad_norm": 1.458582213485752, |
| "learning_rate": 9.979380655747395e-06, |
| "loss": 0.2133, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.058768656716417914, |
| "grad_norm": 1.4937196703685367, |
| "learning_rate": 9.978710627796577e-06, |
| "loss": 0.2609, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.05970149253731343, |
| "grad_norm": 1.5665181980933172, |
| "learning_rate": 9.978029910109491e-06, |
| "loss": 0.3306, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.06063432835820896, |
| "grad_norm": 1.4545115623875786, |
| "learning_rate": 9.9773385041477e-06, |
| "loss": 0.2248, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.061567164179104475, |
| "grad_norm": 1.367323597428911, |
| "learning_rate": 9.976636411395712e-06, |
| "loss": 0.2631, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 1.4079528542577002, |
| "learning_rate": 9.975923633360985e-06, |
| "loss": 0.2141, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.06343283582089553, |
| "grad_norm": 1.4239669532338677, |
| "learning_rate": 9.975200171573917e-06, |
| "loss": 0.2628, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.06436567164179105, |
| "grad_norm": 1.456254873499018, |
| "learning_rate": 9.974466027587844e-06, |
| "loss": 0.2258, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.06529850746268656, |
| "grad_norm": 1.4502780060390226, |
| "learning_rate": 9.973721202979038e-06, |
| "loss": 0.2491, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06623134328358209, |
| "grad_norm": 1.7170577208006956, |
| "learning_rate": 9.972965699346705e-06, |
| "loss": 0.238, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.06716417910447761, |
| "grad_norm": 1.4596891076306684, |
| "learning_rate": 9.972199518312979e-06, |
| "loss": 0.2127, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.06809701492537314, |
| "grad_norm": 1.4933040545922878, |
| "learning_rate": 9.971422661522919e-06, |
| "loss": 0.2424, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06902985074626866, |
| "grad_norm": 1.5726209337479455, |
| "learning_rate": 9.970635130644507e-06, |
| "loss": 0.2149, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06996268656716417, |
| "grad_norm": 1.4668363536094942, |
| "learning_rate": 9.96983692736864e-06, |
| "loss": 0.239, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0708955223880597, |
| "grad_norm": 1.4357573184764414, |
| "learning_rate": 9.969028053409131e-06, |
| "loss": 0.2146, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.07182835820895522, |
| "grad_norm": 1.5436317261849903, |
| "learning_rate": 9.968208510502708e-06, |
| "loss": 0.2794, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.07276119402985075, |
| "grad_norm": 1.4616769566530838, |
| "learning_rate": 9.967378300408998e-06, |
| "loss": 0.1884, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.07369402985074627, |
| "grad_norm": 1.3693601049840698, |
| "learning_rate": 9.966537424910542e-06, |
| "loss": 0.2411, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.07462686567164178, |
| "grad_norm": 1.4594105908413364, |
| "learning_rate": 9.965685885812773e-06, |
| "loss": 0.2815, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07555970149253731, |
| "grad_norm": 1.4610387019396303, |
| "learning_rate": 9.964823684944017e-06, |
| "loss": 0.2263, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.07649253731343283, |
| "grad_norm": 1.6171224207013994, |
| "learning_rate": 9.963950824155502e-06, |
| "loss": 0.2349, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.07742537313432836, |
| "grad_norm": 1.669653253270167, |
| "learning_rate": 9.963067305321334e-06, |
| "loss": 0.2529, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.07835820895522388, |
| "grad_norm": 1.4542612622001394, |
| "learning_rate": 9.96217313033851e-06, |
| "loss": 0.2857, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07929104477611941, |
| "grad_norm": 1.5163247384336487, |
| "learning_rate": 9.961268301126902e-06, |
| "loss": 0.2822, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.08022388059701492, |
| "grad_norm": 1.373641468055401, |
| "learning_rate": 9.960352819629259e-06, |
| "loss": 0.2355, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.08115671641791045, |
| "grad_norm": 1.517536486659926, |
| "learning_rate": 9.959426687811202e-06, |
| "loss": 0.2159, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.08208955223880597, |
| "grad_norm": 1.4125395258698592, |
| "learning_rate": 9.958489907661217e-06, |
| "loss": 0.2388, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0830223880597015, |
| "grad_norm": 1.3566639028343168, |
| "learning_rate": 9.957542481190656e-06, |
| "loss": 0.2173, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.08395522388059702, |
| "grad_norm": 1.49748689852031, |
| "learning_rate": 9.95658441043373e-06, |
| "loss": 0.2716, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08488805970149253, |
| "grad_norm": 1.3945447151040264, |
| "learning_rate": 9.955615697447499e-06, |
| "loss": 0.2559, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.08582089552238806, |
| "grad_norm": 1.3652676888583273, |
| "learning_rate": 9.95463634431188e-06, |
| "loss": 0.2426, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.08675373134328358, |
| "grad_norm": 1.2906675900129567, |
| "learning_rate": 9.953646353129626e-06, |
| "loss": 0.2015, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.08768656716417911, |
| "grad_norm": 1.4046299397203328, |
| "learning_rate": 9.952645726026344e-06, |
| "loss": 0.2339, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.08861940298507463, |
| "grad_norm": 1.5899253571402825, |
| "learning_rate": 9.951634465150463e-06, |
| "loss": 0.2938, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08955223880597014, |
| "grad_norm": 1.3529056182643173, |
| "learning_rate": 9.950612572673255e-06, |
| "loss": 0.2065, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.09048507462686567, |
| "grad_norm": 1.4152494711718966, |
| "learning_rate": 9.949580050788813e-06, |
| "loss": 0.2309, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0914179104477612, |
| "grad_norm": 1.330761603292533, |
| "learning_rate": 9.948536901714052e-06, |
| "loss": 0.2039, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.09235074626865672, |
| "grad_norm": 1.4554400226887259, |
| "learning_rate": 9.947483127688708e-06, |
| "loss": 0.2206, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.09328358208955224, |
| "grad_norm": 1.5989141026459632, |
| "learning_rate": 9.946418730975326e-06, |
| "loss": 0.3154, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09421641791044776, |
| "grad_norm": 1.4173678693730305, |
| "learning_rate": 9.945343713859265e-06, |
| "loss": 0.2588, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.09514925373134328, |
| "grad_norm": 1.2482382413232498, |
| "learning_rate": 9.944258078648679e-06, |
| "loss": 0.1844, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0960820895522388, |
| "grad_norm": 1.5034039526413285, |
| "learning_rate": 9.943161827674524e-06, |
| "loss": 0.2794, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.09701492537313433, |
| "grad_norm": 1.4654084731129182, |
| "learning_rate": 9.942054963290549e-06, |
| "loss": 0.2893, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.09794776119402986, |
| "grad_norm": 1.3112663438975303, |
| "learning_rate": 9.940937487873291e-06, |
| "loss": 0.2416, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09888059701492537, |
| "grad_norm": 1.4583196765720625, |
| "learning_rate": 9.939809403822069e-06, |
| "loss": 0.2563, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.09981343283582089, |
| "grad_norm": 1.5160302662676066, |
| "learning_rate": 9.938670713558983e-06, |
| "loss": 0.2758, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.10074626865671642, |
| "grad_norm": 1.3769691563103652, |
| "learning_rate": 9.9375214195289e-06, |
| "loss": 0.2187, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.10167910447761194, |
| "grad_norm": 1.3836367725537049, |
| "learning_rate": 9.936361524199457e-06, |
| "loss": 0.2439, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.10261194029850747, |
| "grad_norm": 1.3438591129480295, |
| "learning_rate": 9.935191030061052e-06, |
| "loss": 0.2104, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.10354477611940298, |
| "grad_norm": 1.3272511661454085, |
| "learning_rate": 9.934009939626841e-06, |
| "loss": 0.2139, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1044776119402985, |
| "grad_norm": 1.3915937126780076, |
| "learning_rate": 9.932818255432733e-06, |
| "loss": 0.242, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.10541044776119403, |
| "grad_norm": 1.3760119807889453, |
| "learning_rate": 9.931615980037379e-06, |
| "loss": 0.2244, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.10634328358208955, |
| "grad_norm": 1.429382679968247, |
| "learning_rate": 9.930403116022169e-06, |
| "loss": 0.257, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.10727611940298508, |
| "grad_norm": 1.4820454678416672, |
| "learning_rate": 9.929179665991234e-06, |
| "loss": 0.2634, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.10820895522388059, |
| "grad_norm": 1.3717549449001434, |
| "learning_rate": 9.92794563257143e-06, |
| "loss": 0.2269, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.10914179104477612, |
| "grad_norm": 1.4152419184213607, |
| "learning_rate": 9.926701018412335e-06, |
| "loss": 0.2371, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.11007462686567164, |
| "grad_norm": 1.4937791162173693, |
| "learning_rate": 9.925445826186246e-06, |
| "loss": 0.2784, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.11100746268656717, |
| "grad_norm": 1.4668852652230355, |
| "learning_rate": 9.924180058588177e-06, |
| "loss": 0.251, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.11194029850746269, |
| "grad_norm": 1.4588624579256642, |
| "learning_rate": 9.92290371833584e-06, |
| "loss": 0.2427, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.11287313432835822, |
| "grad_norm": 1.4192268281373919, |
| "learning_rate": 9.921616808169655e-06, |
| "loss": 0.2535, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.11380597014925373, |
| "grad_norm": 1.3360201557699796, |
| "learning_rate": 9.920319330852729e-06, |
| "loss": 0.2069, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.11473880597014925, |
| "grad_norm": 1.442691611821781, |
| "learning_rate": 9.919011289170863e-06, |
| "loss": 0.262, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.11567164179104478, |
| "grad_norm": 1.5573463553214775, |
| "learning_rate": 9.91769268593254e-06, |
| "loss": 0.2973, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1166044776119403, |
| "grad_norm": 1.338534413462635, |
| "learning_rate": 9.91636352396892e-06, |
| "loss": 0.2108, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.11753731343283583, |
| "grad_norm": 1.4018913055329987, |
| "learning_rate": 9.915023806133833e-06, |
| "loss": 0.2473, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.11847014925373134, |
| "grad_norm": 1.418439965059982, |
| "learning_rate": 9.913673535303768e-06, |
| "loss": 0.2702, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.11940298507462686, |
| "grad_norm": 1.453592069106935, |
| "learning_rate": 9.91231271437788e-06, |
| "loss": 0.2167, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.12033582089552239, |
| "grad_norm": 1.3895229333541956, |
| "learning_rate": 9.910941346277976e-06, |
| "loss": 0.2384, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.12126865671641791, |
| "grad_norm": 1.364367296904366, |
| "learning_rate": 9.909559433948501e-06, |
| "loss": 0.2114, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12220149253731344, |
| "grad_norm": 1.3482844635327793, |
| "learning_rate": 9.908166980356548e-06, |
| "loss": 0.1961, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.12313432835820895, |
| "grad_norm": 1.3630742951053016, |
| "learning_rate": 9.906763988491834e-06, |
| "loss": 0.2126, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.12406716417910447, |
| "grad_norm": 1.3064487916152947, |
| "learning_rate": 9.905350461366713e-06, |
| "loss": 0.2072, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 1.3972019735824797, |
| "learning_rate": 9.903926402016153e-06, |
| "loss": 0.2486, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.1259328358208955, |
| "grad_norm": 1.3602872226033067, |
| "learning_rate": 9.902491813497735e-06, |
| "loss": 0.2286, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.12686567164179105, |
| "grad_norm": 1.2596145835806758, |
| "learning_rate": 9.901046698891648e-06, |
| "loss": 0.2085, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.12779850746268656, |
| "grad_norm": 1.3636680187102763, |
| "learning_rate": 9.899591061300684e-06, |
| "loss": 0.2283, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1287313432835821, |
| "grad_norm": 1.3820368798328602, |
| "learning_rate": 9.898124903850228e-06, |
| "loss": 0.2515, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1296641791044776, |
| "grad_norm": 1.4127756119119061, |
| "learning_rate": 9.896648229688248e-06, |
| "loss": 0.1884, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.13059701492537312, |
| "grad_norm": 1.3516953099470277, |
| "learning_rate": 9.895161041985295e-06, |
| "loss": 0.2493, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.13152985074626866, |
| "grad_norm": 1.4736087776119398, |
| "learning_rate": 9.893663343934496e-06, |
| "loss": 0.2884, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.13246268656716417, |
| "grad_norm": 1.4800592264693608, |
| "learning_rate": 9.892155138751542e-06, |
| "loss": 0.2488, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1333955223880597, |
| "grad_norm": 1.5725783650380547, |
| "learning_rate": 9.890636429674684e-06, |
| "loss": 0.2206, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.13432835820895522, |
| "grad_norm": 1.4414825402474298, |
| "learning_rate": 9.889107219964726e-06, |
| "loss": 0.2911, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.13526119402985073, |
| "grad_norm": 1.4198450748776796, |
| "learning_rate": 9.887567512905019e-06, |
| "loss": 0.2328, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.13619402985074627, |
| "grad_norm": 1.4477631005726908, |
| "learning_rate": 9.886017311801449e-06, |
| "loss": 0.2357, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.13712686567164178, |
| "grad_norm": 1.2948928799390829, |
| "learning_rate": 9.884456619982437e-06, |
| "loss": 0.2041, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.13805970149253732, |
| "grad_norm": 1.3172494063741267, |
| "learning_rate": 9.882885440798928e-06, |
| "loss": 0.2666, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.13899253731343283, |
| "grad_norm": 1.4117329418770321, |
| "learning_rate": 9.881303777624385e-06, |
| "loss": 0.1783, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.13992537313432835, |
| "grad_norm": 1.5119339771702494, |
| "learning_rate": 9.879711633854778e-06, |
| "loss": 0.2387, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.14085820895522388, |
| "grad_norm": 1.4705530827214168, |
| "learning_rate": 9.878109012908583e-06, |
| "loss": 0.2498, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1417910447761194, |
| "grad_norm": 1.4229149575244904, |
| "learning_rate": 9.876495918226772e-06, |
| "loss": 0.2582, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.14272388059701493, |
| "grad_norm": 1.4502287985677522, |
| "learning_rate": 9.8748723532728e-06, |
| "loss": 0.226, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.14365671641791045, |
| "grad_norm": 1.3114945077716187, |
| "learning_rate": 9.873238321532609e-06, |
| "loss": 0.1934, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.14458955223880596, |
| "grad_norm": 1.3654237571951762, |
| "learning_rate": 9.871593826514607e-06, |
| "loss": 0.2284, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1455223880597015, |
| "grad_norm": 1.3814896512674422, |
| "learning_rate": 9.869938871749676e-06, |
| "loss": 0.2393, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.146455223880597, |
| "grad_norm": 1.3384575997158925, |
| "learning_rate": 9.86827346079115e-06, |
| "loss": 0.2453, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.14738805970149255, |
| "grad_norm": 1.4269277163538632, |
| "learning_rate": 9.866597597214815e-06, |
| "loss": 0.2434, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.14832089552238806, |
| "grad_norm": 1.2466769585933348, |
| "learning_rate": 9.864911284618899e-06, |
| "loss": 0.1927, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.14925373134328357, |
| "grad_norm": 1.3782896759760206, |
| "learning_rate": 9.863214526624065e-06, |
| "loss": 0.2159, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1501865671641791, |
| "grad_norm": 1.391702740422766, |
| "learning_rate": 9.861507326873407e-06, |
| "loss": 0.2506, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.15111940298507462, |
| "grad_norm": 1.3717322250887456, |
| "learning_rate": 9.859789689032434e-06, |
| "loss": 0.2532, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.15205223880597016, |
| "grad_norm": 1.286489768706081, |
| "learning_rate": 9.858061616789068e-06, |
| "loss": 0.1956, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.15298507462686567, |
| "grad_norm": 1.4077796091428876, |
| "learning_rate": 9.856323113853632e-06, |
| "loss": 0.2275, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.15391791044776118, |
| "grad_norm": 1.3326459434111209, |
| "learning_rate": 9.854574183958849e-06, |
| "loss": 0.2273, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.15485074626865672, |
| "grad_norm": 1.3422225620535737, |
| "learning_rate": 9.852814830859827e-06, |
| "loss": 0.2599, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.15578358208955223, |
| "grad_norm": 1.3195632256694576, |
| "learning_rate": 9.851045058334055e-06, |
| "loss": 0.1826, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.15671641791044777, |
| "grad_norm": 1.266822639136575, |
| "learning_rate": 9.849264870181393e-06, |
| "loss": 0.2173, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.15764925373134328, |
| "grad_norm": 1.3815334501848933, |
| "learning_rate": 9.847474270224062e-06, |
| "loss": 0.2077, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.15858208955223882, |
| "grad_norm": 1.400081571893218, |
| "learning_rate": 9.845673262306643e-06, |
| "loss": 0.2722, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15951492537313433, |
| "grad_norm": 1.4215899008723631, |
| "learning_rate": 9.843861850296058e-06, |
| "loss": 0.2297, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.16044776119402984, |
| "grad_norm": 1.3378155864059595, |
| "learning_rate": 9.842040038081572e-06, |
| "loss": 0.2473, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.16138059701492538, |
| "grad_norm": 1.3874487537624232, |
| "learning_rate": 9.840207829574777e-06, |
| "loss": 0.2578, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1623134328358209, |
| "grad_norm": 1.2370882678550728, |
| "learning_rate": 9.838365228709588e-06, |
| "loss": 0.1954, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.16324626865671643, |
| "grad_norm": 1.3358518993309685, |
| "learning_rate": 9.836512239442237e-06, |
| "loss": 0.2406, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.16417910447761194, |
| "grad_norm": 1.4150781522345088, |
| "learning_rate": 9.834648865751254e-06, |
| "loss": 0.2219, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.16511194029850745, |
| "grad_norm": 1.5355994840163563, |
| "learning_rate": 9.832775111637469e-06, |
| "loss": 0.2353, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.166044776119403, |
| "grad_norm": 1.58448880113716, |
| "learning_rate": 9.830890981124001e-06, |
| "loss": 0.2773, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1669776119402985, |
| "grad_norm": 1.4064170913348892, |
| "learning_rate": 9.828996478256246e-06, |
| "loss": 0.2411, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.16791044776119404, |
| "grad_norm": 1.343334533340573, |
| "learning_rate": 9.827091607101871e-06, |
| "loss": 0.2452, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.16884328358208955, |
| "grad_norm": 1.3862492550886658, |
| "learning_rate": 9.825176371750802e-06, |
| "loss": 0.2269, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.16977611940298507, |
| "grad_norm": 1.4314862429416477, |
| "learning_rate": 9.823250776315223e-06, |
| "loss": 0.289, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1707089552238806, |
| "grad_norm": 1.3897064722948196, |
| "learning_rate": 9.82131482492956e-06, |
| "loss": 0.2456, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.17164179104477612, |
| "grad_norm": 1.4425674269218023, |
| "learning_rate": 9.81936852175047e-06, |
| "loss": 0.2426, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.17257462686567165, |
| "grad_norm": 1.3179382745429815, |
| "learning_rate": 9.817411870956843e-06, |
| "loss": 0.2202, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.17350746268656717, |
| "grad_norm": 1.4657681026021574, |
| "learning_rate": 9.81544487674978e-06, |
| "loss": 0.278, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.17444029850746268, |
| "grad_norm": 1.39854851518916, |
| "learning_rate": 9.813467543352598e-06, |
| "loss": 0.2452, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.17537313432835822, |
| "grad_norm": 1.4897917428136873, |
| "learning_rate": 9.811479875010801e-06, |
| "loss": 0.2795, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.17630597014925373, |
| "grad_norm": 1.45635595333176, |
| "learning_rate": 9.809481875992097e-06, |
| "loss": 0.2349, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.17723880597014927, |
| "grad_norm": 1.4518420147728965, |
| "learning_rate": 9.807473550586368e-06, |
| "loss": 0.2513, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.17817164179104478, |
| "grad_norm": 1.4205330284183837, |
| "learning_rate": 9.805454903105663e-06, |
| "loss": 0.2035, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.1791044776119403, |
| "grad_norm": 1.4932540354115929, |
| "learning_rate": 9.803425937884202e-06, |
| "loss": 0.2407, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.18003731343283583, |
| "grad_norm": 1.4332138465693849, |
| "learning_rate": 9.801386659278354e-06, |
| "loss": 0.2444, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.18097014925373134, |
| "grad_norm": 1.432375521439532, |
| "learning_rate": 9.799337071666632e-06, |
| "loss": 0.2618, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.18190298507462688, |
| "grad_norm": 1.3471819631859887, |
| "learning_rate": 9.797277179449684e-06, |
| "loss": 0.2668, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1828358208955224, |
| "grad_norm": 1.284197489128854, |
| "learning_rate": 9.79520698705028e-06, |
| "loss": 0.2015, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.1837686567164179, |
| "grad_norm": 1.3569140878727413, |
| "learning_rate": 9.793126498913313e-06, |
| "loss": 0.2431, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.18470149253731344, |
| "grad_norm": 1.38127819209215, |
| "learning_rate": 9.791035719505773e-06, |
| "loss": 0.2279, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.18563432835820895, |
| "grad_norm": 1.3106409372023844, |
| "learning_rate": 9.788934653316751e-06, |
| "loss": 0.2225, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1865671641791045, |
| "grad_norm": 1.385041781717496, |
| "learning_rate": 9.786823304857424e-06, |
| "loss": 0.2577, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 1.3374693150632666, |
| "learning_rate": 9.784701678661045e-06, |
| "loss": 0.2055, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.1884328358208955, |
| "grad_norm": 1.2943022183043107, |
| "learning_rate": 9.782569779282936e-06, |
| "loss": 0.1775, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.18936567164179105, |
| "grad_norm": 1.4905925652544376, |
| "learning_rate": 9.780427611300474e-06, |
| "loss": 0.2704, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.19029850746268656, |
| "grad_norm": 1.3498812547956893, |
| "learning_rate": 9.778275179313084e-06, |
| "loss": 0.2123, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1912313432835821, |
| "grad_norm": 1.4192006060768911, |
| "learning_rate": 9.776112487942234e-06, |
| "loss": 0.2311, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.1921641791044776, |
| "grad_norm": 1.318443539824902, |
| "learning_rate": 9.77393954183141e-06, |
| "loss": 0.212, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.19309701492537312, |
| "grad_norm": 1.4033355905689706, |
| "learning_rate": 9.771756345646126e-06, |
| "loss": 0.2445, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.19402985074626866, |
| "grad_norm": 1.4135479045174189, |
| "learning_rate": 9.769562904073896e-06, |
| "loss": 0.2259, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.19496268656716417, |
| "grad_norm": 1.5189371630436521, |
| "learning_rate": 9.767359221824236e-06, |
| "loss": 0.3221, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1958955223880597, |
| "grad_norm": 1.501654221671077, |
| "learning_rate": 9.765145303628649e-06, |
| "loss": 0.2155, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.19682835820895522, |
| "grad_norm": 1.3422511623677083, |
| "learning_rate": 9.762921154240614e-06, |
| "loss": 0.213, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.19776119402985073, |
| "grad_norm": 1.417574830290805, |
| "learning_rate": 9.76068677843558e-06, |
| "loss": 0.2494, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.19869402985074627, |
| "grad_norm": 1.4303838899263415, |
| "learning_rate": 9.75844218101095e-06, |
| "loss": 0.229, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.19962686567164178, |
| "grad_norm": 1.3210532389813487, |
| "learning_rate": 9.756187366786077e-06, |
| "loss": 0.2287, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.20055970149253732, |
| "grad_norm": 1.4301375913667724, |
| "learning_rate": 9.753922340602245e-06, |
| "loss": 0.2262, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.20149253731343283, |
| "grad_norm": 1.464427337181014, |
| "learning_rate": 9.751647107322668e-06, |
| "loss": 0.2387, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.20242537313432835, |
| "grad_norm": 1.283762633828094, |
| "learning_rate": 9.749361671832478e-06, |
| "loss": 0.2051, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.20335820895522388, |
| "grad_norm": 1.4046271563330237, |
| "learning_rate": 9.747066039038707e-06, |
| "loss": 0.2809, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2042910447761194, |
| "grad_norm": 1.3858875789586913, |
| "learning_rate": 9.744760213870286e-06, |
| "loss": 0.2559, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.20522388059701493, |
| "grad_norm": 1.4633410789476218, |
| "learning_rate": 9.742444201278022e-06, |
| "loss": 0.1991, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.20615671641791045, |
| "grad_norm": 1.35952836186008, |
| "learning_rate": 9.740118006234607e-06, |
| "loss": 0.2196, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.20708955223880596, |
| "grad_norm": 1.3170051893053112, |
| "learning_rate": 9.737781633734586e-06, |
| "loss": 0.1885, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2080223880597015, |
| "grad_norm": 1.3364564968573158, |
| "learning_rate": 9.735435088794361e-06, |
| "loss": 0.2092, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.208955223880597, |
| "grad_norm": 1.310621521925476, |
| "learning_rate": 9.733078376452172e-06, |
| "loss": 0.1858, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.20988805970149255, |
| "grad_norm": 1.3305475178644435, |
| "learning_rate": 9.730711501768091e-06, |
| "loss": 0.2396, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.21082089552238806, |
| "grad_norm": 1.3635865870378607, |
| "learning_rate": 9.72833446982401e-06, |
| "loss": 0.2343, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.21175373134328357, |
| "grad_norm": 1.3892908120924656, |
| "learning_rate": 9.725947285723629e-06, |
| "loss": 0.2494, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.2126865671641791, |
| "grad_norm": 1.4923858063155087, |
| "learning_rate": 9.723549954592447e-06, |
| "loss": 0.2684, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.21361940298507462, |
| "grad_norm": 1.4794880593929247, |
| "learning_rate": 9.721142481577744e-06, |
| "loss": 0.2433, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.21455223880597016, |
| "grad_norm": 1.319052535367511, |
| "learning_rate": 9.718724871848581e-06, |
| "loss": 0.1957, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.21548507462686567, |
| "grad_norm": 1.4725370664727746, |
| "learning_rate": 9.716297130595784e-06, |
| "loss": 0.2691, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.21641791044776118, |
| "grad_norm": 1.450324974590314, |
| "learning_rate": 9.713859263031928e-06, |
| "loss": 0.2075, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.21735074626865672, |
| "grad_norm": 1.4029613700148216, |
| "learning_rate": 9.711411274391334e-06, |
| "loss": 0.2636, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.21828358208955223, |
| "grad_norm": 1.3218009630051155, |
| "learning_rate": 9.70895316993005e-06, |
| "loss": 0.1856, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.21921641791044777, |
| "grad_norm": 1.3772203562395018, |
| "learning_rate": 9.706484954925848e-06, |
| "loss": 0.2221, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.22014925373134328, |
| "grad_norm": 1.338495075466863, |
| "learning_rate": 9.704006634678205e-06, |
| "loss": 0.2422, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.22108208955223882, |
| "grad_norm": 1.3505260994538177, |
| "learning_rate": 9.701518214508295e-06, |
| "loss": 0.2219, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.22201492537313433, |
| "grad_norm": 1.3796800954684891, |
| "learning_rate": 9.69901969975898e-06, |
| "loss": 0.2599, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.22294776119402984, |
| "grad_norm": 1.3801269945094994, |
| "learning_rate": 9.696511095794794e-06, |
| "loss": 0.2658, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.22388059701492538, |
| "grad_norm": 1.373830630137388, |
| "learning_rate": 9.693992408001934e-06, |
| "loss": 0.2366, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2248134328358209, |
| "grad_norm": 1.3099841410165376, |
| "learning_rate": 9.691463641788244e-06, |
| "loss": 0.2444, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.22574626865671643, |
| "grad_norm": 1.2221561716646219, |
| "learning_rate": 9.688924802583215e-06, |
| "loss": 0.1762, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.22667910447761194, |
| "grad_norm": 1.4100739023363644, |
| "learning_rate": 9.68637589583796e-06, |
| "loss": 0.27, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.22761194029850745, |
| "grad_norm": 1.3049181672879322, |
| "learning_rate": 9.683816927025212e-06, |
| "loss": 0.2358, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.228544776119403, |
| "grad_norm": 1.3168040345621081, |
| "learning_rate": 9.6812479016393e-06, |
| "loss": 0.2132, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2294776119402985, |
| "grad_norm": 1.3491278430037894, |
| "learning_rate": 9.678668825196155e-06, |
| "loss": 0.225, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.23041044776119404, |
| "grad_norm": 1.3564580773647767, |
| "learning_rate": 9.676079703233283e-06, |
| "loss": 0.2174, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.23134328358208955, |
| "grad_norm": 1.3908729062228546, |
| "learning_rate": 9.673480541309761e-06, |
| "loss": 0.2413, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.23227611940298507, |
| "grad_norm": 1.269500269440636, |
| "learning_rate": 9.670871345006221e-06, |
| "loss": 0.1922, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.2332089552238806, |
| "grad_norm": 1.370700147884663, |
| "learning_rate": 9.66825211992484e-06, |
| "loss": 0.2288, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.23414179104477612, |
| "grad_norm": 1.267163085637987, |
| "learning_rate": 9.665622871689329e-06, |
| "loss": 0.2046, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.23507462686567165, |
| "grad_norm": 1.298510817211434, |
| "learning_rate": 9.662983605944918e-06, |
| "loss": 0.2151, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.23600746268656717, |
| "grad_norm": 1.311525030838766, |
| "learning_rate": 9.660334328358345e-06, |
| "loss": 0.2041, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.23694029850746268, |
| "grad_norm": 1.3768838650896877, |
| "learning_rate": 9.65767504461785e-06, |
| "loss": 0.2488, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.23787313432835822, |
| "grad_norm": 1.3453744719741791, |
| "learning_rate": 9.65500576043315e-06, |
| "loss": 0.2079, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.23880597014925373, |
| "grad_norm": 1.2776829624435933, |
| "learning_rate": 9.652326481535434e-06, |
| "loss": 0.1966, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.23973880597014927, |
| "grad_norm": 1.3767181262573056, |
| "learning_rate": 9.649637213677357e-06, |
| "loss": 0.2377, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.24067164179104478, |
| "grad_norm": 1.510423982807279, |
| "learning_rate": 9.646937962633014e-06, |
| "loss": 0.2917, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2416044776119403, |
| "grad_norm": 1.387266789016929, |
| "learning_rate": 9.64422873419794e-06, |
| "loss": 0.2163, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.24253731343283583, |
| "grad_norm": 1.3166417891082647, |
| "learning_rate": 9.64150953418909e-06, |
| "loss": 0.2357, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.24347014925373134, |
| "grad_norm": 1.2797218591347883, |
| "learning_rate": 9.63878036844483e-06, |
| "loss": 0.2081, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.24440298507462688, |
| "grad_norm": 1.311554046142437, |
| "learning_rate": 9.636041242824921e-06, |
| "loss": 0.1977, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2453358208955224, |
| "grad_norm": 1.3999553328296679, |
| "learning_rate": 9.63329216321051e-06, |
| "loss": 0.2363, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2462686567164179, |
| "grad_norm": 1.3554101059225228, |
| "learning_rate": 9.630533135504118e-06, |
| "loss": 0.2508, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.24720149253731344, |
| "grad_norm": 1.3676131273735823, |
| "learning_rate": 9.627764165629623e-06, |
| "loss": 0.2192, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.24813432835820895, |
| "grad_norm": 1.3976149276187737, |
| "learning_rate": 9.624985259532251e-06, |
| "loss": 0.2292, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2490671641791045, |
| "grad_norm": 1.5796116347031655, |
| "learning_rate": 9.622196423178562e-06, |
| "loss": 0.2759, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.5270801308560245, |
| "learning_rate": 9.619397662556434e-06, |
| "loss": 0.2455, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.25093283582089554, |
| "grad_norm": 1.414754787779726, |
| "learning_rate": 9.61658898367506e-06, |
| "loss": 0.1956, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.251865671641791, |
| "grad_norm": 1.313800531793292, |
| "learning_rate": 9.613770392564921e-06, |
| "loss": 0.2041, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.25279850746268656, |
| "grad_norm": 1.4419235484624284, |
| "learning_rate": 9.610941895277784e-06, |
| "loss": 0.2756, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.2537313432835821, |
| "grad_norm": 1.449298486490396, |
| "learning_rate": 9.608103497886687e-06, |
| "loss": 0.2678, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.25466417910447764, |
| "grad_norm": 1.413245202772997, |
| "learning_rate": 9.605255206485922e-06, |
| "loss": 0.2551, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.2555970149253731, |
| "grad_norm": 1.2714103244027373, |
| "learning_rate": 9.602397027191026e-06, |
| "loss": 0.1897, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.25652985074626866, |
| "grad_norm": 1.3662586560658077, |
| "learning_rate": 9.599528966138763e-06, |
| "loss": 0.2685, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2574626865671642, |
| "grad_norm": 1.3062222445535137, |
| "learning_rate": 9.596651029487116e-06, |
| "loss": 0.2123, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2583955223880597, |
| "grad_norm": 1.2910765824769777, |
| "learning_rate": 9.593763223415275e-06, |
| "loss": 0.2362, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.2593283582089552, |
| "grad_norm": 1.2822070852083836, |
| "learning_rate": 9.590865554123614e-06, |
| "loss": 0.2068, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.26026119402985076, |
| "grad_norm": 1.392777607498332, |
| "learning_rate": 9.587958027833691e-06, |
| "loss": 0.2229, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.26119402985074625, |
| "grad_norm": 1.4255656729973265, |
| "learning_rate": 9.585040650788222e-06, |
| "loss": 0.2477, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2621268656716418, |
| "grad_norm": 1.4169296716396083, |
| "learning_rate": 9.582113429251076e-06, |
| "loss": 0.1878, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.2630597014925373, |
| "grad_norm": 1.3513572522860997, |
| "learning_rate": 9.579176369507262e-06, |
| "loss": 0.1998, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.26399253731343286, |
| "grad_norm": 1.3667340131720151, |
| "learning_rate": 9.576229477862905e-06, |
| "loss": 0.2251, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.26492537313432835, |
| "grad_norm": 1.2853412344374613, |
| "learning_rate": 9.573272760645248e-06, |
| "loss": 0.236, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.2658582089552239, |
| "grad_norm": 1.4691562800080749, |
| "learning_rate": 9.570306224202625e-06, |
| "loss": 0.3, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.2667910447761194, |
| "grad_norm": 1.3925927328135574, |
| "learning_rate": 9.567329874904456e-06, |
| "loss": 0.2249, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2677238805970149, |
| "grad_norm": 1.439250276065806, |
| "learning_rate": 9.56434371914123e-06, |
| "loss": 0.2476, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.26865671641791045, |
| "grad_norm": 1.38564396436711, |
| "learning_rate": 9.561347763324484e-06, |
| "loss": 0.2584, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.269589552238806, |
| "grad_norm": 1.399725121507279, |
| "learning_rate": 9.55834201388681e-06, |
| "loss": 0.2033, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.27052238805970147, |
| "grad_norm": 1.4448814363169957, |
| "learning_rate": 9.555326477281816e-06, |
| "loss": 0.2303, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.271455223880597, |
| "grad_norm": 1.3032836699591415, |
| "learning_rate": 9.55230115998413e-06, |
| "loss": 0.1913, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.27238805970149255, |
| "grad_norm": 1.3592667166710317, |
| "learning_rate": 9.549266068489377e-06, |
| "loss": 0.1912, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.2733208955223881, |
| "grad_norm": 1.3387689709683435, |
| "learning_rate": 9.546221209314172e-06, |
| "loss": 0.2436, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.27425373134328357, |
| "grad_norm": 1.3735637776687901, |
| "learning_rate": 9.543166588996095e-06, |
| "loss": 0.2401, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2751865671641791, |
| "grad_norm": 1.5670786625519744, |
| "learning_rate": 9.540102214093696e-06, |
| "loss": 0.3073, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.27611940298507465, |
| "grad_norm": 1.2913637891950454, |
| "learning_rate": 9.537028091186453e-06, |
| "loss": 0.205, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.27705223880597013, |
| "grad_norm": 1.419222985662341, |
| "learning_rate": 9.533944226874787e-06, |
| "loss": 0.2355, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.27798507462686567, |
| "grad_norm": 1.3369499615171254, |
| "learning_rate": 9.530850627780031e-06, |
| "loss": 0.2227, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2789179104477612, |
| "grad_norm": 1.302876915331245, |
| "learning_rate": 9.527747300544417e-06, |
| "loss": 0.2379, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.2798507462686567, |
| "grad_norm": 1.3275069238503028, |
| "learning_rate": 9.524634251831064e-06, |
| "loss": 0.2041, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.28078358208955223, |
| "grad_norm": 1.3029244538742029, |
| "learning_rate": 9.521511488323968e-06, |
| "loss": 0.2101, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.28171641791044777, |
| "grad_norm": 1.4213044150670375, |
| "learning_rate": 9.518379016727979e-06, |
| "loss": 0.2401, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2826492537313433, |
| "grad_norm": 1.3307001580231097, |
| "learning_rate": 9.515236843768796e-06, |
| "loss": 0.2359, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2835820895522388, |
| "grad_norm": 1.3336151644523149, |
| "learning_rate": 9.512084976192944e-06, |
| "loss": 0.1733, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.28451492537313433, |
| "grad_norm": 1.2890758576612307, |
| "learning_rate": 9.508923420767761e-06, |
| "loss": 0.2354, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.28544776119402987, |
| "grad_norm": 1.449676011395037, |
| "learning_rate": 9.505752184281391e-06, |
| "loss": 0.2595, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.28638059701492535, |
| "grad_norm": 1.3083299671213158, |
| "learning_rate": 9.502571273542765e-06, |
| "loss": 0.1848, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.2873134328358209, |
| "grad_norm": 1.347052858071002, |
| "learning_rate": 9.499380695381577e-06, |
| "loss": 0.2129, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.28824626865671643, |
| "grad_norm": 1.3624392783860741, |
| "learning_rate": 9.496180456648287e-06, |
| "loss": 0.1911, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.2891791044776119, |
| "grad_norm": 1.384030158008443, |
| "learning_rate": 9.492970564214093e-06, |
| "loss": 0.2636, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.29011194029850745, |
| "grad_norm": 1.2657375947456138, |
| "learning_rate": 9.489751024970917e-06, |
| "loss": 0.2169, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.291044776119403, |
| "grad_norm": 1.336905624247156, |
| "learning_rate": 9.486521845831403e-06, |
| "loss": 0.2014, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.29197761194029853, |
| "grad_norm": 1.3592589856849937, |
| "learning_rate": 9.48328303372888e-06, |
| "loss": 0.2385, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.292910447761194, |
| "grad_norm": 1.4208885596358325, |
| "learning_rate": 9.480034595617374e-06, |
| "loss": 0.2375, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.29384328358208955, |
| "grad_norm": 1.439265171883172, |
| "learning_rate": 9.476776538471567e-06, |
| "loss": 0.2033, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2947761194029851, |
| "grad_norm": 1.4281095450068615, |
| "learning_rate": 9.4735088692868e-06, |
| "loss": 0.2845, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.2957089552238806, |
| "grad_norm": 1.4346746831675432, |
| "learning_rate": 9.470231595079051e-06, |
| "loss": 0.2325, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2966417910447761, |
| "grad_norm": 1.4014402224696572, |
| "learning_rate": 9.466944722884918e-06, |
| "loss": 0.1886, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.29757462686567165, |
| "grad_norm": 1.317389277924677, |
| "learning_rate": 9.463648259761613e-06, |
| "loss": 0.21, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.29850746268656714, |
| "grad_norm": 1.274308126049011, |
| "learning_rate": 9.460342212786933e-06, |
| "loss": 0.1835, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2994402985074627, |
| "grad_norm": 1.3212156231617627, |
| "learning_rate": 9.45702658905926e-06, |
| "loss": 0.226, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3003731343283582, |
| "grad_norm": 1.3164665027162752, |
| "learning_rate": 9.453701395697528e-06, |
| "loss": 0.2405, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.30130597014925375, |
| "grad_norm": 1.3515078744929137, |
| "learning_rate": 9.450366639841232e-06, |
| "loss": 0.2784, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.30223880597014924, |
| "grad_norm": 1.3701250089759658, |
| "learning_rate": 9.447022328650382e-06, |
| "loss": 0.2393, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3031716417910448, |
| "grad_norm": 1.4467880233772275, |
| "learning_rate": 9.44366846930552e-06, |
| "loss": 0.2465, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3041044776119403, |
| "grad_norm": 1.3771540876067927, |
| "learning_rate": 9.440305069007678e-06, |
| "loss": 0.2346, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3050373134328358, |
| "grad_norm": 1.3565472388783033, |
| "learning_rate": 9.436932134978378e-06, |
| "loss": 0.2531, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.30597014925373134, |
| "grad_norm": 1.4340547477308785, |
| "learning_rate": 9.43354967445961e-06, |
| "loss": 0.252, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3069029850746269, |
| "grad_norm": 1.332979343322144, |
| "learning_rate": 9.430157694713817e-06, |
| "loss": 0.2229, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.30783582089552236, |
| "grad_norm": 1.265965935369589, |
| "learning_rate": 9.426756203023886e-06, |
| "loss": 0.1874, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3087686567164179, |
| "grad_norm": 1.3230512165885997, |
| "learning_rate": 9.42334520669312e-06, |
| "loss": 0.1967, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.30970149253731344, |
| "grad_norm": 1.3394167852727439, |
| "learning_rate": 9.419924713045234e-06, |
| "loss": 0.2153, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.310634328358209, |
| "grad_norm": 1.394569996114462, |
| "learning_rate": 9.416494729424334e-06, |
| "loss": 0.2437, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.31156716417910446, |
| "grad_norm": 1.3675625019234572, |
| "learning_rate": 9.413055263194902e-06, |
| "loss": 0.1944, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.4436671719611258, |
| "learning_rate": 9.409606321741776e-06, |
| "loss": 0.2441, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.31343283582089554, |
| "grad_norm": 1.3395252962767008, |
| "learning_rate": 9.406147912470142e-06, |
| "loss": 0.217, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.314365671641791, |
| "grad_norm": 1.2362809735457574, |
| "learning_rate": 9.402680042805517e-06, |
| "loss": 0.1843, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.31529850746268656, |
| "grad_norm": 1.4528930102963502, |
| "learning_rate": 9.399202720193723e-06, |
| "loss": 0.2532, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3162313432835821, |
| "grad_norm": 1.4476764879536321, |
| "learning_rate": 9.395715952100882e-06, |
| "loss": 0.2267, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.31716417910447764, |
| "grad_norm": 1.3827760190955474, |
| "learning_rate": 9.392219746013399e-06, |
| "loss": 0.1961, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3180970149253731, |
| "grad_norm": 1.3224792417888853, |
| "learning_rate": 9.38871410943794e-06, |
| "loss": 0.229, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.31902985074626866, |
| "grad_norm": 1.3670929236147178, |
| "learning_rate": 9.385199049901418e-06, |
| "loss": 0.2092, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3199626865671642, |
| "grad_norm": 1.5700881422687811, |
| "learning_rate": 9.381674574950981e-06, |
| "loss": 0.2812, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3208955223880597, |
| "grad_norm": 1.2819951775763399, |
| "learning_rate": 9.378140692153991e-06, |
| "loss": 0.2057, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3218283582089552, |
| "grad_norm": 1.4025000620916979, |
| "learning_rate": 9.374597409098011e-06, |
| "loss": 0.2824, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.32276119402985076, |
| "grad_norm": 1.386974140069185, |
| "learning_rate": 9.371044733390786e-06, |
| "loss": 0.2108, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.32369402985074625, |
| "grad_norm": 1.2954081638008537, |
| "learning_rate": 9.367482672660226e-06, |
| "loss": 0.2173, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3246268656716418, |
| "grad_norm": 1.3479486636153222, |
| "learning_rate": 9.363911234554394e-06, |
| "loss": 0.2013, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3255597014925373, |
| "grad_norm": 1.3024944216710168, |
| "learning_rate": 9.360330426741488e-06, |
| "loss": 0.2264, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.32649253731343286, |
| "grad_norm": 1.336369716253106, |
| "learning_rate": 9.356740256909822e-06, |
| "loss": 0.1936, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.32742537313432835, |
| "grad_norm": 1.4184100938638582, |
| "learning_rate": 9.353140732767811e-06, |
| "loss": 0.2476, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3283582089552239, |
| "grad_norm": 1.398208351719416, |
| "learning_rate": 9.349531862043952e-06, |
| "loss": 0.231, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3292910447761194, |
| "grad_norm": 1.4263392740126681, |
| "learning_rate": 9.345913652486815e-06, |
| "loss": 0.2102, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3302238805970149, |
| "grad_norm": 1.333836826585129, |
| "learning_rate": 9.342286111865023e-06, |
| "loss": 0.2261, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.33115671641791045, |
| "grad_norm": 1.2767788258210142, |
| "learning_rate": 9.338649247967221e-06, |
| "loss": 0.2374, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.332089552238806, |
| "grad_norm": 1.5017433864293555, |
| "learning_rate": 9.335003068602087e-06, |
| "loss": 0.2536, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.33302238805970147, |
| "grad_norm": 1.3395453592365296, |
| "learning_rate": 9.33134758159829e-06, |
| "loss": 0.2158, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.333955223880597, |
| "grad_norm": 1.1527112023803379, |
| "learning_rate": 9.32768279480449e-06, |
| "loss": 0.1606, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.33488805970149255, |
| "grad_norm": 1.4608914907807746, |
| "learning_rate": 9.32400871608931e-06, |
| "loss": 0.2198, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3358208955223881, |
| "grad_norm": 1.4120297778863582, |
| "learning_rate": 9.320325353341325e-06, |
| "loss": 0.2271, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.33675373134328357, |
| "grad_norm": 1.3454385285721666, |
| "learning_rate": 9.316632714469044e-06, |
| "loss": 0.2207, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.3376865671641791, |
| "grad_norm": 1.4474892479226766, |
| "learning_rate": 9.312930807400893e-06, |
| "loss": 0.2536, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.33861940298507465, |
| "grad_norm": 1.3463722181697488, |
| "learning_rate": 9.309219640085196e-06, |
| "loss": 0.1933, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.33955223880597013, |
| "grad_norm": 1.28823123640731, |
| "learning_rate": 9.305499220490162e-06, |
| "loss": 0.2178, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.34048507462686567, |
| "grad_norm": 1.4294791094282566, |
| "learning_rate": 9.30176955660386e-06, |
| "loss": 0.2698, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3414179104477612, |
| "grad_norm": 1.493909659198492, |
| "learning_rate": 9.298030656434215e-06, |
| "loss": 0.2423, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3423507462686567, |
| "grad_norm": 1.503880145512656, |
| "learning_rate": 9.294282528008981e-06, |
| "loss": 0.2257, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.34328358208955223, |
| "grad_norm": 1.333114077695562, |
| "learning_rate": 9.290525179375722e-06, |
| "loss": 0.1707, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.34421641791044777, |
| "grad_norm": 1.2349069876312035, |
| "learning_rate": 9.286758618601801e-06, |
| "loss": 0.1934, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3451492537313433, |
| "grad_norm": 1.4718018200541485, |
| "learning_rate": 9.28298285377436e-06, |
| "loss": 0.2482, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3460820895522388, |
| "grad_norm": 1.5413975233614134, |
| "learning_rate": 9.279197893000305e-06, |
| "loss": 0.2086, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.34701492537313433, |
| "grad_norm": 1.4107305952629499, |
| "learning_rate": 9.275403744406282e-06, |
| "loss": 0.2305, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.34794776119402987, |
| "grad_norm": 1.2780210723834629, |
| "learning_rate": 9.271600416138669e-06, |
| "loss": 0.2093, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.34888059701492535, |
| "grad_norm": 1.3697674107174014, |
| "learning_rate": 9.26778791636355e-06, |
| "loss": 0.223, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3498134328358209, |
| "grad_norm": 1.4273089435605406, |
| "learning_rate": 9.263966253266705e-06, |
| "loss": 0.2368, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.35074626865671643, |
| "grad_norm": 1.3805965144164551, |
| "learning_rate": 9.260135435053583e-06, |
| "loss": 0.2178, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3516791044776119, |
| "grad_norm": 1.3842697410395384, |
| "learning_rate": 9.256295469949295e-06, |
| "loss": 0.2333, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.35261194029850745, |
| "grad_norm": 1.302286084633613, |
| "learning_rate": 9.252446366198586e-06, |
| "loss": 0.1893, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.353544776119403, |
| "grad_norm": 1.361784401424521, |
| "learning_rate": 9.248588132065828e-06, |
| "loss": 0.2469, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.35447761194029853, |
| "grad_norm": 1.356482260144131, |
| "learning_rate": 9.244720775834993e-06, |
| "loss": 0.2284, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.355410447761194, |
| "grad_norm": 1.3825317630910496, |
| "learning_rate": 9.240844305809641e-06, |
| "loss": 0.2309, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.35634328358208955, |
| "grad_norm": 1.3391371234122509, |
| "learning_rate": 9.2369587303129e-06, |
| "loss": 0.2131, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.3572761194029851, |
| "grad_norm": 1.4748961962979945, |
| "learning_rate": 9.233064057687444e-06, |
| "loss": 0.2088, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.3582089552238806, |
| "grad_norm": 1.4605973374859904, |
| "learning_rate": 9.229160296295488e-06, |
| "loss": 0.2232, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.3591417910447761, |
| "grad_norm": 1.3009340090036816, |
| "learning_rate": 9.225247454518752e-06, |
| "loss": 0.1865, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.36007462686567165, |
| "grad_norm": 1.337221511833832, |
| "learning_rate": 9.221325540758459e-06, |
| "loss": 0.1986, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.36100746268656714, |
| "grad_norm": 1.4010446178744635, |
| "learning_rate": 9.217394563435306e-06, |
| "loss": 0.2581, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.3619402985074627, |
| "grad_norm": 1.2816959963616372, |
| "learning_rate": 9.213454530989454e-06, |
| "loss": 0.1839, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3628731343283582, |
| "grad_norm": 1.3669145076068119, |
| "learning_rate": 9.209505451880504e-06, |
| "loss": 0.1982, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.36380597014925375, |
| "grad_norm": 1.3905705737339125, |
| "learning_rate": 9.205547334587483e-06, |
| "loss": 0.2397, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.36473880597014924, |
| "grad_norm": 1.2603360041662903, |
| "learning_rate": 9.201580187608818e-06, |
| "loss": 0.2043, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.3656716417910448, |
| "grad_norm": 1.5586670110559833, |
| "learning_rate": 9.197604019462334e-06, |
| "loss": 0.2125, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.3666044776119403, |
| "grad_norm": 1.2690662617236015, |
| "learning_rate": 9.193618838685213e-06, |
| "loss": 0.1993, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3675373134328358, |
| "grad_norm": 1.341769910680454, |
| "learning_rate": 9.189624653833997e-06, |
| "loss": 0.2118, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.36847014925373134, |
| "grad_norm": 1.3367513943714662, |
| "learning_rate": 9.185621473484558e-06, |
| "loss": 0.2473, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.3694029850746269, |
| "grad_norm": 1.3820435433945204, |
| "learning_rate": 9.18160930623208e-06, |
| "loss": 0.2119, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.37033582089552236, |
| "grad_norm": 1.2943072011184742, |
| "learning_rate": 9.177588160691044e-06, |
| "loss": 0.2035, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.3712686567164179, |
| "grad_norm": 1.3473164568630493, |
| "learning_rate": 9.173558045495212e-06, |
| "loss": 0.1908, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.37220149253731344, |
| "grad_norm": 1.4333605080144285, |
| "learning_rate": 9.169518969297598e-06, |
| "loss": 0.2524, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.373134328358209, |
| "grad_norm": 1.3166066985246667, |
| "learning_rate": 9.165470940770458e-06, |
| "loss": 0.2094, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.37406716417910446, |
| "grad_norm": 1.4239397473404334, |
| "learning_rate": 9.161413968605274e-06, |
| "loss": 0.2357, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 1.3411122706114262, |
| "learning_rate": 9.157348061512728e-06, |
| "loss": 0.2254, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.37593283582089554, |
| "grad_norm": 1.2846258510307025, |
| "learning_rate": 9.15327322822268e-06, |
| "loss": 0.1828, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.376865671641791, |
| "grad_norm": 1.2409415483459634, |
| "learning_rate": 9.149189477484169e-06, |
| "loss": 0.2003, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.37779850746268656, |
| "grad_norm": 1.2227527914049607, |
| "learning_rate": 9.145096818065365e-06, |
| "loss": 0.2259, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.3787313432835821, |
| "grad_norm": 1.4138015334010903, |
| "learning_rate": 9.140995258753577e-06, |
| "loss": 0.2522, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.37966417910447764, |
| "grad_norm": 1.4737839013873402, |
| "learning_rate": 9.136884808355219e-06, |
| "loss": 0.2929, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.3805970149253731, |
| "grad_norm": 1.4308442254453198, |
| "learning_rate": 9.132765475695795e-06, |
| "loss": 0.2169, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.38152985074626866, |
| "grad_norm": 1.290712133189965, |
| "learning_rate": 9.128637269619878e-06, |
| "loss": 0.2192, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.3824626865671642, |
| "grad_norm": 1.3116978703979065, |
| "learning_rate": 9.124500198991098e-06, |
| "loss": 0.2167, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3833955223880597, |
| "grad_norm": 1.2736502633453959, |
| "learning_rate": 9.12035427269211e-06, |
| "loss": 0.2052, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.3843283582089552, |
| "grad_norm": 1.3231801576512818, |
| "learning_rate": 9.116199499624596e-06, |
| "loss": 0.2396, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.38526119402985076, |
| "grad_norm": 1.2853453099268355, |
| "learning_rate": 9.112035888709219e-06, |
| "loss": 0.2299, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.38619402985074625, |
| "grad_norm": 1.2938537527377314, |
| "learning_rate": 9.107863448885624e-06, |
| "loss": 0.1803, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.3871268656716418, |
| "grad_norm": 1.4049132483353646, |
| "learning_rate": 9.103682189112413e-06, |
| "loss": 0.2356, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.3880597014925373, |
| "grad_norm": 1.2872197040412747, |
| "learning_rate": 9.099492118367123e-06, |
| "loss": 0.1891, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.38899253731343286, |
| "grad_norm": 1.3013097402971163, |
| "learning_rate": 9.095293245646212e-06, |
| "loss": 0.2198, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.38992537313432835, |
| "grad_norm": 1.3448050767428346, |
| "learning_rate": 9.091085579965034e-06, |
| "loss": 0.2813, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3908582089552239, |
| "grad_norm": 1.3765739745948038, |
| "learning_rate": 9.08686913035782e-06, |
| "loss": 0.2607, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3917910447761194, |
| "grad_norm": 1.2736711574783002, |
| "learning_rate": 9.082643905877665e-06, |
| "loss": 0.1773, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3927238805970149, |
| "grad_norm": 1.2635354964139183, |
| "learning_rate": 9.078409915596506e-06, |
| "loss": 0.2189, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.39365671641791045, |
| "grad_norm": 1.3303045131479903, |
| "learning_rate": 9.074167168605096e-06, |
| "loss": 0.1938, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.394589552238806, |
| "grad_norm": 1.2925868456600165, |
| "learning_rate": 9.069915674012995e-06, |
| "loss": 0.1588, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.39552238805970147, |
| "grad_norm": 1.3442352091237408, |
| "learning_rate": 9.065655440948536e-06, |
| "loss": 0.2465, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.396455223880597, |
| "grad_norm": 1.3209591762725579, |
| "learning_rate": 9.061386478558822e-06, |
| "loss": 0.2311, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.39738805970149255, |
| "grad_norm": 1.3202035679156854, |
| "learning_rate": 9.057108796009697e-06, |
| "loss": 0.2542, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3983208955223881, |
| "grad_norm": 1.2533498431687355, |
| "learning_rate": 9.052822402485727e-06, |
| "loss": 0.1865, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.39925373134328357, |
| "grad_norm": 1.2299852771240687, |
| "learning_rate": 9.048527307190182e-06, |
| "loss": 0.2163, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4001865671641791, |
| "grad_norm": 1.2503929440804913, |
| "learning_rate": 9.044223519345016e-06, |
| "loss": 0.1874, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.40111940298507465, |
| "grad_norm": 1.3216934001751737, |
| "learning_rate": 9.039911048190843e-06, |
| "loss": 0.2123, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.40205223880597013, |
| "grad_norm": 1.3239969525003676, |
| "learning_rate": 9.035589902986928e-06, |
| "loss": 0.2043, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.40298507462686567, |
| "grad_norm": 1.412007085946633, |
| "learning_rate": 9.03126009301115e-06, |
| "loss": 0.2275, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4039179104477612, |
| "grad_norm": 1.3728799593814967, |
| "learning_rate": 9.026921627560001e-06, |
| "loss": 0.2259, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.4048507462686567, |
| "grad_norm": 1.358199171602574, |
| "learning_rate": 9.022574515948554e-06, |
| "loss": 0.2502, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.40578358208955223, |
| "grad_norm": 1.2294059495823382, |
| "learning_rate": 9.018218767510445e-06, |
| "loss": 0.2121, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.40671641791044777, |
| "grad_norm": 1.4141686440372585, |
| "learning_rate": 9.013854391597856e-06, |
| "loss": 0.2779, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4076492537313433, |
| "grad_norm": 1.2946191221322243, |
| "learning_rate": 9.009481397581489e-06, |
| "loss": 0.1833, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4085820895522388, |
| "grad_norm": 1.351996529947262, |
| "learning_rate": 9.005099794850554e-06, |
| "loss": 0.2359, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.40951492537313433, |
| "grad_norm": 1.301343400669271, |
| "learning_rate": 9.000709592812743e-06, |
| "loss": 0.2247, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.41044776119402987, |
| "grad_norm": 1.352989620123602, |
| "learning_rate": 8.996310800894215e-06, |
| "loss": 0.2334, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.41138059701492535, |
| "grad_norm": 1.3175223253148902, |
| "learning_rate": 8.991903428539566e-06, |
| "loss": 0.244, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4123134328358209, |
| "grad_norm": 1.2973696190684876, |
| "learning_rate": 8.987487485211817e-06, |
| "loss": 0.2017, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.41324626865671643, |
| "grad_norm": 1.3589491444232285, |
| "learning_rate": 8.983062980392394e-06, |
| "loss": 0.2234, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.4141791044776119, |
| "grad_norm": 1.3714705942529328, |
| "learning_rate": 8.978629923581104e-06, |
| "loss": 0.2104, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.41511194029850745, |
| "grad_norm": 1.2643963582167521, |
| "learning_rate": 8.974188324296115e-06, |
| "loss": 0.2061, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.416044776119403, |
| "grad_norm": 1.374417969556956, |
| "learning_rate": 8.969738192073939e-06, |
| "loss": 0.2379, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.41697761194029853, |
| "grad_norm": 1.2795266726469274, |
| "learning_rate": 8.965279536469406e-06, |
| "loss": 0.1792, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.417910447761194, |
| "grad_norm": 1.539527566481072, |
| "learning_rate": 8.960812367055646e-06, |
| "loss": 0.2795, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.41884328358208955, |
| "grad_norm": 1.3966125864951573, |
| "learning_rate": 8.956336693424076e-06, |
| "loss": 0.2622, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.4197761194029851, |
| "grad_norm": 1.3281178679930612, |
| "learning_rate": 8.951852525184361e-06, |
| "loss": 0.2047, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4207089552238806, |
| "grad_norm": 1.343025406228925, |
| "learning_rate": 8.947359871964415e-06, |
| "loss": 0.2581, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.4216417910447761, |
| "grad_norm": 1.2642884902145128, |
| "learning_rate": 8.94285874341036e-06, |
| "loss": 0.2315, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.42257462686567165, |
| "grad_norm": 1.2777621737129141, |
| "learning_rate": 8.938349149186527e-06, |
| "loss": 0.2081, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.42350746268656714, |
| "grad_norm": 1.3748536858035563, |
| "learning_rate": 8.933831098975416e-06, |
| "loss": 0.2615, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4244402985074627, |
| "grad_norm": 1.331415513725724, |
| "learning_rate": 8.929304602477681e-06, |
| "loss": 0.2468, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4253731343283582, |
| "grad_norm": 1.2761378826109053, |
| "learning_rate": 8.924769669412117e-06, |
| "loss": 0.2239, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.42630597014925375, |
| "grad_norm": 1.284142601434654, |
| "learning_rate": 8.92022630951563e-06, |
| "loss": 0.2071, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.42723880597014924, |
| "grad_norm": 1.3209951417631995, |
| "learning_rate": 8.915674532543218e-06, |
| "loss": 0.2172, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.4281716417910448, |
| "grad_norm": 1.271616268742621, |
| "learning_rate": 8.911114348267954e-06, |
| "loss": 0.22, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.4291044776119403, |
| "grad_norm": 1.257564050424032, |
| "learning_rate": 8.906545766480961e-06, |
| "loss": 0.2041, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4300373134328358, |
| "grad_norm": 1.2645376908104489, |
| "learning_rate": 8.90196879699139e-06, |
| "loss": 0.1872, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.43097014925373134, |
| "grad_norm": 1.4142300040480527, |
| "learning_rate": 8.897383449626407e-06, |
| "loss": 0.2538, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.4319029850746269, |
| "grad_norm": 1.3758724410443472, |
| "learning_rate": 8.892789734231158e-06, |
| "loss": 0.2524, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.43283582089552236, |
| "grad_norm": 1.3530802717750985, |
| "learning_rate": 8.888187660668762e-06, |
| "loss": 0.2343, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.4337686567164179, |
| "grad_norm": 1.410062910966012, |
| "learning_rate": 8.88357723882028e-06, |
| "loss": 0.2316, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.43470149253731344, |
| "grad_norm": 1.2537537090855193, |
| "learning_rate": 8.878958478584702e-06, |
| "loss": 0.2089, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.435634328358209, |
| "grad_norm": 1.2951276863764574, |
| "learning_rate": 8.87433138987892e-06, |
| "loss": 0.2362, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.43656716417910446, |
| "grad_norm": 1.2881126860233525, |
| "learning_rate": 8.869695982637703e-06, |
| "loss": 0.2003, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 1.277822481224725, |
| "learning_rate": 8.865052266813686e-06, |
| "loss": 0.2026, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.43843283582089554, |
| "grad_norm": 1.4012002589366537, |
| "learning_rate": 8.86040025237734e-06, |
| "loss": 0.2559, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.439365671641791, |
| "grad_norm": 1.4071481786290667, |
| "learning_rate": 8.855739949316957e-06, |
| "loss": 0.2396, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.44029850746268656, |
| "grad_norm": 1.2757247732597385, |
| "learning_rate": 8.851071367638625e-06, |
| "loss": 0.2242, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4412313432835821, |
| "grad_norm": 1.394162739130695, |
| "learning_rate": 8.846394517366202e-06, |
| "loss": 0.2304, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.44216417910447764, |
| "grad_norm": 1.2545998211941618, |
| "learning_rate": 8.841709408541304e-06, |
| "loss": 0.2248, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.4430970149253731, |
| "grad_norm": 1.3407244194077155, |
| "learning_rate": 8.837016051223281e-06, |
| "loss": 0.2599, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.44402985074626866, |
| "grad_norm": 1.3149582884940112, |
| "learning_rate": 8.832314455489188e-06, |
| "loss": 0.2129, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4449626865671642, |
| "grad_norm": 1.332806260986635, |
| "learning_rate": 8.827604631433771e-06, |
| "loss": 0.2353, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.4458955223880597, |
| "grad_norm": 1.219532308210816, |
| "learning_rate": 8.822886589169443e-06, |
| "loss": 0.2008, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.4468283582089552, |
| "grad_norm": 1.2571868893463725, |
| "learning_rate": 8.818160338826262e-06, |
| "loss": 0.1888, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.44776119402985076, |
| "grad_norm": 1.250447624595975, |
| "learning_rate": 8.81342589055191e-06, |
| "loss": 0.2034, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.44869402985074625, |
| "grad_norm": 1.432874296705188, |
| "learning_rate": 8.80868325451167e-06, |
| "loss": 0.2098, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.4496268656716418, |
| "grad_norm": 1.1940303384477422, |
| "learning_rate": 8.803932440888404e-06, |
| "loss": 0.1792, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.4505597014925373, |
| "grad_norm": 1.3281007803218612, |
| "learning_rate": 8.799173459882534e-06, |
| "loss": 0.2309, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.45149253731343286, |
| "grad_norm": 1.4531661248443652, |
| "learning_rate": 8.794406321712017e-06, |
| "loss": 0.2171, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.45242537313432835, |
| "grad_norm": 1.2485056199303581, |
| "learning_rate": 8.789631036612324e-06, |
| "loss": 0.1974, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.4533582089552239, |
| "grad_norm": 1.3147902796079114, |
| "learning_rate": 8.784847614836418e-06, |
| "loss": 0.2238, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.4542910447761194, |
| "grad_norm": 1.2516311910221467, |
| "learning_rate": 8.780056066654734e-06, |
| "loss": 0.188, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.4552238805970149, |
| "grad_norm": 1.418005163198149, |
| "learning_rate": 8.775256402355155e-06, |
| "loss": 0.1961, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.45615671641791045, |
| "grad_norm": 1.4460731180502704, |
| "learning_rate": 8.770448632242984e-06, |
| "loss": 0.2675, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.457089552238806, |
| "grad_norm": 1.418299887927644, |
| "learning_rate": 8.765632766640937e-06, |
| "loss": 0.2119, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.45802238805970147, |
| "grad_norm": 1.4354119801995215, |
| "learning_rate": 8.760808815889105e-06, |
| "loss": 0.2301, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.458955223880597, |
| "grad_norm": 1.2713842322726168, |
| "learning_rate": 8.755976790344945e-06, |
| "loss": 0.2129, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.45988805970149255, |
| "grad_norm": 1.3143191873476805, |
| "learning_rate": 8.751136700383243e-06, |
| "loss": 0.2182, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.4608208955223881, |
| "grad_norm": 1.348721547945487, |
| "learning_rate": 8.746288556396104e-06, |
| "loss": 0.2323, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.46175373134328357, |
| "grad_norm": 1.3836440530622345, |
| "learning_rate": 8.74143236879293e-06, |
| "loss": 0.2389, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4626865671641791, |
| "grad_norm": 1.2376424574935958, |
| "learning_rate": 8.736568148000386e-06, |
| "loss": 0.19, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.46361940298507465, |
| "grad_norm": 1.2986566312073538, |
| "learning_rate": 8.731695904462389e-06, |
| "loss": 0.2358, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.46455223880597013, |
| "grad_norm": 1.2552562207036049, |
| "learning_rate": 8.726815648640084e-06, |
| "loss": 0.1946, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.46548507462686567, |
| "grad_norm": 1.2972481020683562, |
| "learning_rate": 8.721927391011812e-06, |
| "loss": 0.2122, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.4664179104477612, |
| "grad_norm": 1.383067739478344, |
| "learning_rate": 8.7170311420731e-06, |
| "loss": 0.2672, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4664179104477612, |
| "eval_loss": 0.23051877319812775, |
| "eval_runtime": 3.439, |
| "eval_samples_per_second": 25.298, |
| "eval_steps_per_second": 6.397, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4673507462686567, |
| "grad_norm": 1.2632786993125102, |
| "learning_rate": 8.712126912336631e-06, |
| "loss": 0.207, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.46828358208955223, |
| "grad_norm": 1.2435472593530392, |
| "learning_rate": 8.707214712332227e-06, |
| "loss": 0.1969, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.46921641791044777, |
| "grad_norm": 1.371389841592427, |
| "learning_rate": 8.702294552606815e-06, |
| "loss": 0.2569, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4701492537313433, |
| "grad_norm": 1.1936395590557582, |
| "learning_rate": 8.697366443724424e-06, |
| "loss": 0.1828, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4710820895522388, |
| "grad_norm": 1.1722012737846563, |
| "learning_rate": 8.692430396266138e-06, |
| "loss": 0.1936, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.47201492537313433, |
| "grad_norm": 1.403394574957527, |
| "learning_rate": 8.687486420830093e-06, |
| "loss": 0.2647, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.47294776119402987, |
| "grad_norm": 1.2870699600150621, |
| "learning_rate": 8.682534528031447e-06, |
| "loss": 0.212, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.47388059701492535, |
| "grad_norm": 1.3177979647832148, |
| "learning_rate": 8.677574728502355e-06, |
| "loss": 0.2261, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.4748134328358209, |
| "grad_norm": 1.3767619394332882, |
| "learning_rate": 8.67260703289195e-06, |
| "loss": 0.2443, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.47574626865671643, |
| "grad_norm": 1.274531083090423, |
| "learning_rate": 8.667631451866317e-06, |
| "loss": 0.1821, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4766791044776119, |
| "grad_norm": 1.4471466002963596, |
| "learning_rate": 8.662647996108475e-06, |
| "loss": 0.2135, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.47761194029850745, |
| "grad_norm": 1.3413363690939575, |
| "learning_rate": 8.657656676318346e-06, |
| "loss": 0.2019, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.478544776119403, |
| "grad_norm": 1.3457177629223023, |
| "learning_rate": 8.65265750321274e-06, |
| "loss": 0.2698, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.47947761194029853, |
| "grad_norm": 1.2975394462316192, |
| "learning_rate": 8.64765048752533e-06, |
| "loss": 0.2537, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.480410447761194, |
| "grad_norm": 1.380115000712049, |
| "learning_rate": 8.642635640006623e-06, |
| "loss": 0.2151, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.48134328358208955, |
| "grad_norm": 1.2294150732600446, |
| "learning_rate": 8.637612971423945e-06, |
| "loss": 0.1742, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.4822761194029851, |
| "grad_norm": 1.3408677173066745, |
| "learning_rate": 8.632582492561414e-06, |
| "loss": 0.2137, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.4832089552238806, |
| "grad_norm": 1.2584110827285158, |
| "learning_rate": 8.627544214219918e-06, |
| "loss": 0.2281, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.4841417910447761, |
| "grad_norm": 1.4625077729613687, |
| "learning_rate": 8.622498147217091e-06, |
| "loss": 0.2527, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.48507462686567165, |
| "grad_norm": 1.333638345288828, |
| "learning_rate": 8.617444302387288e-06, |
| "loss": 0.1941, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.48600746268656714, |
| "grad_norm": 1.2537139376845916, |
| "learning_rate": 8.612382690581567e-06, |
| "loss": 0.2236, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.4869402985074627, |
| "grad_norm": 1.2840159146700987, |
| "learning_rate": 8.607313322667657e-06, |
| "loss": 0.2346, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.4878731343283582, |
| "grad_norm": 1.391601937565303, |
| "learning_rate": 8.602236209529948e-06, |
| "loss": 0.224, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.48880597014925375, |
| "grad_norm": 1.3247869152095644, |
| "learning_rate": 8.597151362069452e-06, |
| "loss": 0.2217, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.48973880597014924, |
| "grad_norm": 1.4021254940584056, |
| "learning_rate": 8.59205879120379e-06, |
| "loss": 0.201, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4906716417910448, |
| "grad_norm": 1.3157615398452691, |
| "learning_rate": 8.58695850786717e-06, |
| "loss": 0.2187, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.4916044776119403, |
| "grad_norm": 1.3532240240654663, |
| "learning_rate": 8.581850523010353e-06, |
| "loss": 0.2518, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.4925373134328358, |
| "grad_norm": 1.3134374105006694, |
| "learning_rate": 8.576734847600639e-06, |
| "loss": 0.2012, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.49347014925373134, |
| "grad_norm": 1.3550733246495734, |
| "learning_rate": 8.571611492621839e-06, |
| "loss": 0.1959, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.4944029850746269, |
| "grad_norm": 1.3414829685575018, |
| "learning_rate": 8.566480469074256e-06, |
| "loss": 0.2169, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.49533582089552236, |
| "grad_norm": 1.390360326736497, |
| "learning_rate": 8.561341787974653e-06, |
| "loss": 0.2779, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.4962686567164179, |
| "grad_norm": 1.2680890882771152, |
| "learning_rate": 8.55619546035624e-06, |
| "loss": 0.2351, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.49720149253731344, |
| "grad_norm": 1.262419636891151, |
| "learning_rate": 8.55104149726864e-06, |
| "loss": 0.2307, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.498134328358209, |
| "grad_norm": 1.2957731983042295, |
| "learning_rate": 8.545879909777872e-06, |
| "loss": 0.2002, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.49906716417910446, |
| "grad_norm": 1.2538569059905391, |
| "learning_rate": 8.540710708966326e-06, |
| "loss": 0.214, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.2924302879204521, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.2009, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5009328358208955, |
| "grad_norm": 1.2711717751935465, |
| "learning_rate": 8.530349511792165e-06, |
| "loss": 0.214, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5018656716417911, |
| "grad_norm": 1.3427986027184053, |
| "learning_rate": 8.525157537675966e-06, |
| "loss": 0.2289, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5027985074626866, |
| "grad_norm": 1.3073171392549023, |
| "learning_rate": 8.519957994731768e-06, |
| "loss": 0.2105, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.503731343283582, |
| "grad_norm": 1.2722660142973075, |
| "learning_rate": 8.514750894123463e-06, |
| "loss": 0.1889, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5046641791044776, |
| "grad_norm": 1.2452516022238156, |
| "learning_rate": 8.509536247031152e-06, |
| "loss": 0.2055, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5055970149253731, |
| "grad_norm": 1.2974718233932705, |
| "learning_rate": 8.504314064651154e-06, |
| "loss": 0.1962, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5065298507462687, |
| "grad_norm": 1.389455409648838, |
| "learning_rate": 8.499084358195957e-06, |
| "loss": 0.2399, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5074626865671642, |
| "grad_norm": 1.3616712771765143, |
| "learning_rate": 8.49384713889421e-06, |
| "loss": 0.2282, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5083955223880597, |
| "grad_norm": 1.234256347358437, |
| "learning_rate": 8.488602417990687e-06, |
| "loss": 0.1911, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5093283582089553, |
| "grad_norm": 1.5012470347965328, |
| "learning_rate": 8.483350206746277e-06, |
| "loss": 0.2228, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5102611940298507, |
| "grad_norm": 1.3895212595531514, |
| "learning_rate": 8.478090516437947e-06, |
| "loss": 0.2267, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5111940298507462, |
| "grad_norm": 1.3944450308287069, |
| "learning_rate": 8.472823358358716e-06, |
| "loss": 0.2124, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5121268656716418, |
| "grad_norm": 1.241137196118042, |
| "learning_rate": 8.467548743817645e-06, |
| "loss": 0.2131, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.5130597014925373, |
| "grad_norm": 1.1941039288456845, |
| "learning_rate": 8.462266684139805e-06, |
| "loss": 0.1896, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5139925373134329, |
| "grad_norm": 1.37141699422829, |
| "learning_rate": 8.456977190666247e-06, |
| "loss": 0.203, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5149253731343284, |
| "grad_norm": 1.264274514723379, |
| "learning_rate": 8.451680274753986e-06, |
| "loss": 0.1992, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5158582089552238, |
| "grad_norm": 1.2475581664048347, |
| "learning_rate": 8.446375947775976e-06, |
| "loss": 0.21, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5167910447761194, |
| "grad_norm": 1.1750175255906279, |
| "learning_rate": 8.441064221121078e-06, |
| "loss": 0.1775, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5177238805970149, |
| "grad_norm": 1.250151778052525, |
| "learning_rate": 8.435745106194043e-06, |
| "loss": 0.2067, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5186567164179104, |
| "grad_norm": 1.3309654266729336, |
| "learning_rate": 8.430418614415488e-06, |
| "loss": 0.2342, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.519589552238806, |
| "grad_norm": 1.318453627016584, |
| "learning_rate": 8.425084757221864e-06, |
| "loss": 0.2055, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.5205223880597015, |
| "grad_norm": 1.418140023992704, |
| "learning_rate": 8.419743546065442e-06, |
| "loss": 0.2184, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.5214552238805971, |
| "grad_norm": 1.3959518735990981, |
| "learning_rate": 8.414394992414276e-06, |
| "loss": 0.2429, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.5223880597014925, |
| "grad_norm": 1.2598734786346006, |
| "learning_rate": 8.40903910775219e-06, |
| "loss": 0.1856, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.523320895522388, |
| "grad_norm": 1.3414907853633748, |
| "learning_rate": 8.403675903578745e-06, |
| "loss": 0.2222, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.5242537313432836, |
| "grad_norm": 1.3347668559036316, |
| "learning_rate": 8.398305391409221e-06, |
| "loss": 0.2392, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.5251865671641791, |
| "grad_norm": 1.3978351879329773, |
| "learning_rate": 8.392927582774586e-06, |
| "loss": 0.2579, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.5261194029850746, |
| "grad_norm": 1.2856902817689613, |
| "learning_rate": 8.387542489221477e-06, |
| "loss": 0.2355, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.5270522388059702, |
| "grad_norm": 1.3482815664102457, |
| "learning_rate": 8.38215012231217e-06, |
| "loss": 0.2072, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5279850746268657, |
| "grad_norm": 1.2983598827313911, |
| "learning_rate": 8.376750493624556e-06, |
| "loss": 0.2077, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.5289179104477612, |
| "grad_norm": 1.3895329159962166, |
| "learning_rate": 8.371343614752124e-06, |
| "loss": 0.2021, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.5298507462686567, |
| "grad_norm": 1.3295405138308913, |
| "learning_rate": 8.36592949730392e-06, |
| "loss": 0.199, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.5307835820895522, |
| "grad_norm": 1.283862155323016, |
| "learning_rate": 8.360508152904544e-06, |
| "loss": 0.2216, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.5317164179104478, |
| "grad_norm": 1.24320649554638, |
| "learning_rate": 8.355079593194102e-06, |
| "loss": 0.1912, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5326492537313433, |
| "grad_norm": 1.4117502387459255, |
| "learning_rate": 8.349643829828198e-06, |
| "loss": 0.2304, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.5335820895522388, |
| "grad_norm": 1.3931937360094073, |
| "learning_rate": 8.344200874477901e-06, |
| "loss": 0.2254, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.5345149253731343, |
| "grad_norm": 1.3523007674514218, |
| "learning_rate": 8.338750738829723e-06, |
| "loss": 0.2344, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.5354477611940298, |
| "grad_norm": 1.2450871950282771, |
| "learning_rate": 8.33329343458559e-06, |
| "loss": 0.2088, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.5363805970149254, |
| "grad_norm": 1.3441841407397146, |
| "learning_rate": 8.327828973462823e-06, |
| "loss": 0.2562, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.5373134328358209, |
| "grad_norm": 1.301540588732784, |
| "learning_rate": 8.32235736719411e-06, |
| "loss": 0.2059, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.5382462686567164, |
| "grad_norm": 1.2306114909644439, |
| "learning_rate": 8.316878627527474e-06, |
| "loss": 0.2082, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.539179104477612, |
| "grad_norm": 1.26349335480942, |
| "learning_rate": 8.311392766226261e-06, |
| "loss": 0.2258, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.5401119402985075, |
| "grad_norm": 1.2190459793055979, |
| "learning_rate": 8.305899795069102e-06, |
| "loss": 0.2055, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.5410447761194029, |
| "grad_norm": 1.234466722835294, |
| "learning_rate": 8.300399725849902e-06, |
| "loss": 0.2085, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5419776119402985, |
| "grad_norm": 1.3094795394905954, |
| "learning_rate": 8.294892570377794e-06, |
| "loss": 0.2352, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.542910447761194, |
| "grad_norm": 1.1820382478545008, |
| "learning_rate": 8.289378340477138e-06, |
| "loss": 0.1761, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.5438432835820896, |
| "grad_norm": 1.3309794913007937, |
| "learning_rate": 8.283857047987475e-06, |
| "loss": 0.1963, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.5447761194029851, |
| "grad_norm": 1.3019079556605686, |
| "learning_rate": 8.278328704763516e-06, |
| "loss": 0.1934, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.5457089552238806, |
| "grad_norm": 1.2802340210839334, |
| "learning_rate": 8.272793322675103e-06, |
| "loss": 0.2048, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5466417910447762, |
| "grad_norm": 1.2763571802306888, |
| "learning_rate": 8.2672509136072e-06, |
| "loss": 0.2, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5475746268656716, |
| "grad_norm": 1.3104917380211667, |
| "learning_rate": 8.261701489459852e-06, |
| "loss": 0.2043, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.5485074626865671, |
| "grad_norm": 1.403312055285428, |
| "learning_rate": 8.256145062148168e-06, |
| "loss": 0.2278, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5494402985074627, |
| "grad_norm": 1.4440532908602695, |
| "learning_rate": 8.250581643602293e-06, |
| "loss": 0.2663, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.5503731343283582, |
| "grad_norm": 1.2954811986308785, |
| "learning_rate": 8.245011245767385e-06, |
| "loss": 0.2108, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5513059701492538, |
| "grad_norm": 1.428979335450204, |
| "learning_rate": 8.239433880603585e-06, |
| "loss": 0.2753, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5522388059701493, |
| "grad_norm": 1.2834945741130208, |
| "learning_rate": 8.233849560085994e-06, |
| "loss": 0.2308, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5531716417910447, |
| "grad_norm": 1.310861000017192, |
| "learning_rate": 8.228258296204647e-06, |
| "loss": 0.2199, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5541044776119403, |
| "grad_norm": 1.414164947125919, |
| "learning_rate": 8.222660100964487e-06, |
| "loss": 0.225, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5550373134328358, |
| "grad_norm": 1.4371239993652112, |
| "learning_rate": 8.217054986385336e-06, |
| "loss": 0.2486, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5559701492537313, |
| "grad_norm": 1.2923123326954635, |
| "learning_rate": 8.211442964501879e-06, |
| "loss": 0.2069, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5569029850746269, |
| "grad_norm": 1.2877468481074716, |
| "learning_rate": 8.205824047363627e-06, |
| "loss": 0.1978, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.5578358208955224, |
| "grad_norm": 1.3463440860422735, |
| "learning_rate": 8.200198247034897e-06, |
| "loss": 0.2268, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.558768656716418, |
| "grad_norm": 1.333488528093814, |
| "learning_rate": 8.194565575594784e-06, |
| "loss": 0.2128, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5597014925373134, |
| "grad_norm": 1.3246372107891478, |
| "learning_rate": 8.188926045137139e-06, |
| "loss": 0.2312, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5606343283582089, |
| "grad_norm": 1.2673792193477058, |
| "learning_rate": 8.183279667770534e-06, |
| "loss": 0.1904, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.5615671641791045, |
| "grad_norm": 1.4226097193723977, |
| "learning_rate": 8.177626455618245e-06, |
| "loss": 0.2411, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 1.3246126171335773, |
| "learning_rate": 8.171966420818227e-06, |
| "loss": 0.2621, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.5634328358208955, |
| "grad_norm": 1.3533798674718698, |
| "learning_rate": 8.166299575523081e-06, |
| "loss": 0.209, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.5643656716417911, |
| "grad_norm": 1.435325938380449, |
| "learning_rate": 8.160625931900022e-06, |
| "loss": 0.2188, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5652985074626866, |
| "grad_norm": 1.4480142027219483, |
| "learning_rate": 8.154945502130877e-06, |
| "loss": 0.2267, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.566231343283582, |
| "grad_norm": 1.2876510191778578, |
| "learning_rate": 8.149258298412033e-06, |
| "loss": 0.1969, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.5671641791044776, |
| "grad_norm": 1.3790311508015853, |
| "learning_rate": 8.143564332954426e-06, |
| "loss": 0.2527, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5680970149253731, |
| "grad_norm": 1.2745691146097093, |
| "learning_rate": 8.137863617983506e-06, |
| "loss": 0.1928, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5690298507462687, |
| "grad_norm": 1.4184536736628595, |
| "learning_rate": 8.132156165739216e-06, |
| "loss": 0.2549, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5699626865671642, |
| "grad_norm": 1.3236046692752557, |
| "learning_rate": 8.12644198847597e-06, |
| "loss": 0.1754, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.5708955223880597, |
| "grad_norm": 1.1976066020851874, |
| "learning_rate": 8.120721098462612e-06, |
| "loss": 0.1859, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5718283582089553, |
| "grad_norm": 1.2801823115311637, |
| "learning_rate": 8.114993507982408e-06, |
| "loss": 0.1937, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5727611940298507, |
| "grad_norm": 1.397910990799343, |
| "learning_rate": 8.109259229333005e-06, |
| "loss": 0.2362, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5736940298507462, |
| "grad_norm": 1.2380118843638828, |
| "learning_rate": 8.103518274826408e-06, |
| "loss": 0.1957, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5746268656716418, |
| "grad_norm": 1.2923134364968993, |
| "learning_rate": 8.097770656788961e-06, |
| "loss": 0.2036, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.5755597014925373, |
| "grad_norm": 1.3088551068005592, |
| "learning_rate": 8.092016387561316e-06, |
| "loss": 0.2183, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.5764925373134329, |
| "grad_norm": 1.3350688818343555, |
| "learning_rate": 8.086255479498398e-06, |
| "loss": 0.2612, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.5774253731343284, |
| "grad_norm": 1.3275422480188492, |
| "learning_rate": 8.080487944969395e-06, |
| "loss": 0.2446, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.5783582089552238, |
| "grad_norm": 1.3420496546097096, |
| "learning_rate": 8.074713796357717e-06, |
| "loss": 0.2252, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5792910447761194, |
| "grad_norm": 1.2681186056512634, |
| "learning_rate": 8.068933046060976e-06, |
| "loss": 0.2278, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.5802238805970149, |
| "grad_norm": 1.4269201636687978, |
| "learning_rate": 8.063145706490961e-06, |
| "loss": 0.2738, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.5811567164179104, |
| "grad_norm": 1.3211328025917792, |
| "learning_rate": 8.057351790073601e-06, |
| "loss": 0.1956, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.582089552238806, |
| "grad_norm": 1.2945232584986655, |
| "learning_rate": 8.051551309248961e-06, |
| "loss": 0.2237, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.5830223880597015, |
| "grad_norm": 1.421072978242554, |
| "learning_rate": 8.045744276471185e-06, |
| "loss": 0.2451, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5839552238805971, |
| "grad_norm": 1.4533180953467522, |
| "learning_rate": 8.039930704208492e-06, |
| "loss": 0.27, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.5848880597014925, |
| "grad_norm": 1.3155482973906332, |
| "learning_rate": 8.034110604943144e-06, |
| "loss": 0.2066, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.585820895522388, |
| "grad_norm": 1.4332765411034296, |
| "learning_rate": 8.028283991171408e-06, |
| "loss": 0.219, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.5867537313432836, |
| "grad_norm": 1.335468080964928, |
| "learning_rate": 8.02245087540355e-06, |
| "loss": 0.1863, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.5876865671641791, |
| "grad_norm": 1.3115973526953748, |
| "learning_rate": 8.016611270163783e-06, |
| "loss": 0.2144, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5886194029850746, |
| "grad_norm": 1.351986420193054, |
| "learning_rate": 8.010765187990268e-06, |
| "loss": 0.2183, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.5895522388059702, |
| "grad_norm": 1.4929872029838387, |
| "learning_rate": 8.004912641435064e-06, |
| "loss": 0.2862, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.5904850746268657, |
| "grad_norm": 1.2731526175510945, |
| "learning_rate": 7.999053643064108e-06, |
| "loss": 0.193, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.5914179104477612, |
| "grad_norm": 1.427846234487328, |
| "learning_rate": 7.993188205457195e-06, |
| "loss": 0.2389, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.5923507462686567, |
| "grad_norm": 1.2785953885496837, |
| "learning_rate": 7.987316341207942e-06, |
| "loss": 0.2026, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5932835820895522, |
| "grad_norm": 1.3353601466548186, |
| "learning_rate": 7.981438062923767e-06, |
| "loss": 0.212, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.5942164179104478, |
| "grad_norm": 1.3874063096107416, |
| "learning_rate": 7.975553383225857e-06, |
| "loss": 0.2738, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.5951492537313433, |
| "grad_norm": 1.3925772765894244, |
| "learning_rate": 7.969662314749148e-06, |
| "loss": 0.2551, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.5960820895522388, |
| "grad_norm": 1.3499900031324361, |
| "learning_rate": 7.963764870142286e-06, |
| "loss": 0.2459, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.5970149253731343, |
| "grad_norm": 1.3034783407482293, |
| "learning_rate": 7.957861062067614e-06, |
| "loss": 0.2571, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5979477611940298, |
| "grad_norm": 1.3012226651014325, |
| "learning_rate": 7.951950903201133e-06, |
| "loss": 0.1911, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.5988805970149254, |
| "grad_norm": 1.352696538850524, |
| "learning_rate": 7.946034406232481e-06, |
| "loss": 0.2157, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.5998134328358209, |
| "grad_norm": 1.2598648626979547, |
| "learning_rate": 7.940111583864909e-06, |
| "loss": 0.1902, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6007462686567164, |
| "grad_norm": 1.2568549391070882, |
| "learning_rate": 7.934182448815244e-06, |
| "loss": 0.1963, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.601679104477612, |
| "grad_norm": 1.3016294510176711, |
| "learning_rate": 7.928247013813867e-06, |
| "loss": 0.2098, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.6026119402985075, |
| "grad_norm": 1.4082924172412312, |
| "learning_rate": 7.922305291604688e-06, |
| "loss": 0.2653, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6035447761194029, |
| "grad_norm": 1.3726324485129864, |
| "learning_rate": 7.916357294945116e-06, |
| "loss": 0.2194, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6044776119402985, |
| "grad_norm": 1.2451400940034223, |
| "learning_rate": 7.910403036606028e-06, |
| "loss": 0.1846, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.605410447761194, |
| "grad_norm": 1.2728449461164848, |
| "learning_rate": 7.90444252937175e-06, |
| "loss": 0.2083, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.6063432835820896, |
| "grad_norm": 1.3773934769808678, |
| "learning_rate": 7.898475786040025e-06, |
| "loss": 0.209, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6072761194029851, |
| "grad_norm": 1.405684014418204, |
| "learning_rate": 7.892502819421979e-06, |
| "loss": 0.2408, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.6082089552238806, |
| "grad_norm": 1.3138210727359434, |
| "learning_rate": 7.88652364234211e-06, |
| "loss": 0.216, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.6091417910447762, |
| "grad_norm": 1.272247116045296, |
| "learning_rate": 7.880538267638243e-06, |
| "loss": 0.2261, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.6100746268656716, |
| "grad_norm": 1.2742313717737872, |
| "learning_rate": 7.874546708161512e-06, |
| "loss": 0.193, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.6110074626865671, |
| "grad_norm": 1.3353009311794783, |
| "learning_rate": 7.868548976776328e-06, |
| "loss": 0.2395, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.6119402985074627, |
| "grad_norm": 1.261662018670456, |
| "learning_rate": 7.86254508636036e-06, |
| "loss": 0.2475, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.6128731343283582, |
| "grad_norm": 1.2686834317416418, |
| "learning_rate": 7.856535049804495e-06, |
| "loss": 0.1734, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.6138059701492538, |
| "grad_norm": 1.2987561305634387, |
| "learning_rate": 7.850518880012815e-06, |
| "loss": 0.2272, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.6147388059701493, |
| "grad_norm": 1.2645798199797904, |
| "learning_rate": 7.844496589902577e-06, |
| "loss": 0.2086, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.6156716417910447, |
| "grad_norm": 1.2881560804345098, |
| "learning_rate": 7.838468192404176e-06, |
| "loss": 0.2402, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6166044776119403, |
| "grad_norm": 1.3617560232793342, |
| "learning_rate": 7.83243370046112e-06, |
| "loss": 0.2563, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.6175373134328358, |
| "grad_norm": 1.242645518937026, |
| "learning_rate": 7.826393127029998e-06, |
| "loss": 0.1715, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.6184701492537313, |
| "grad_norm": 1.2222827975734027, |
| "learning_rate": 7.820346485080466e-06, |
| "loss": 0.2152, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.6194029850746269, |
| "grad_norm": 1.501581792730611, |
| "learning_rate": 7.814293787595197e-06, |
| "loss": 0.2325, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.6203358208955224, |
| "grad_norm": 1.3996008994629061, |
| "learning_rate": 7.80823504756988e-06, |
| "loss": 0.2174, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.621268656716418, |
| "grad_norm": 1.2085367755829877, |
| "learning_rate": 7.80217027801317e-06, |
| "loss": 0.1789, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.6222014925373134, |
| "grad_norm": 1.268280098224564, |
| "learning_rate": 7.796099491946665e-06, |
| "loss": 0.1904, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.6231343283582089, |
| "grad_norm": 1.2755297915497015, |
| "learning_rate": 7.790022702404887e-06, |
| "loss": 0.1982, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.6240671641791045, |
| "grad_norm": 1.3752880079713161, |
| "learning_rate": 7.783939922435244e-06, |
| "loss": 0.2617, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 1.3760610529149837, |
| "learning_rate": 7.777851165098012e-06, |
| "loss": 0.2268, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6259328358208955, |
| "grad_norm": 1.2876371621650369, |
| "learning_rate": 7.771756443466292e-06, |
| "loss": 0.2373, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.6268656716417911, |
| "grad_norm": 1.3413023124396006, |
| "learning_rate": 7.765655770625997e-06, |
| "loss": 0.2423, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.6277985074626866, |
| "grad_norm": 1.3067002081116716, |
| "learning_rate": 7.759549159675819e-06, |
| "loss": 0.1845, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.628731343283582, |
| "grad_norm": 1.4016448515552087, |
| "learning_rate": 7.753436623727193e-06, |
| "loss": 0.1792, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.6296641791044776, |
| "grad_norm": 1.27456610650169, |
| "learning_rate": 7.747318175904281e-06, |
| "loss": 0.2139, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.6305970149253731, |
| "grad_norm": 1.3121464973056274, |
| "learning_rate": 7.741193829343937e-06, |
| "loss": 0.2139, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.6315298507462687, |
| "grad_norm": 1.3607854375467525, |
| "learning_rate": 7.73506359719568e-06, |
| "loss": 0.2303, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.6324626865671642, |
| "grad_norm": 1.2158147491065376, |
| "learning_rate": 7.728927492621665e-06, |
| "loss": 0.1887, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.6333955223880597, |
| "grad_norm": 1.3402425409255352, |
| "learning_rate": 7.722785528796657e-06, |
| "loss": 0.2171, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.6343283582089553, |
| "grad_norm": 1.3178852845138038, |
| "learning_rate": 7.716637718908002e-06, |
| "loss": 0.205, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6352611940298507, |
| "grad_norm": 1.2253743856627322, |
| "learning_rate": 7.710484076155595e-06, |
| "loss": 0.1834, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.6361940298507462, |
| "grad_norm": 1.2606137619057538, |
| "learning_rate": 7.704324613751856e-06, |
| "loss": 0.2106, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.6371268656716418, |
| "grad_norm": 1.2691977152639984, |
| "learning_rate": 7.698159344921704e-06, |
| "loss": 0.1913, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.6380597014925373, |
| "grad_norm": 1.4050053172099695, |
| "learning_rate": 7.691988282902519e-06, |
| "loss": 0.2649, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.6389925373134329, |
| "grad_norm": 1.2987797625819433, |
| "learning_rate": 7.685811440944121e-06, |
| "loss": 0.2174, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.6399253731343284, |
| "grad_norm": 1.5415194185556067, |
| "learning_rate": 7.679628832308743e-06, |
| "loss": 0.267, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.6408582089552238, |
| "grad_norm": 1.214096524298427, |
| "learning_rate": 7.673440470270998e-06, |
| "loss": 0.1743, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.6417910447761194, |
| "grad_norm": 1.4037990172322976, |
| "learning_rate": 7.667246368117852e-06, |
| "loss": 0.2573, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.6427238805970149, |
| "grad_norm": 1.2985067455372177, |
| "learning_rate": 7.661046539148596e-06, |
| "loss": 0.2067, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.6436567164179104, |
| "grad_norm": 1.3698413507795268, |
| "learning_rate": 7.654840996674813e-06, |
| "loss": 0.2305, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.644589552238806, |
| "grad_norm": 1.2335877423016568, |
| "learning_rate": 7.648629754020359e-06, |
| "loss": 0.1892, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.6455223880597015, |
| "grad_norm": 1.2599139522935205, |
| "learning_rate": 7.642412824521328e-06, |
| "loss": 0.186, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.6464552238805971, |
| "grad_norm": 1.4056428024323913, |
| "learning_rate": 7.636190221526022e-06, |
| "loss": 0.2824, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.6473880597014925, |
| "grad_norm": 1.3733426492933554, |
| "learning_rate": 7.629961958394923e-06, |
| "loss": 0.2392, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.648320895522388, |
| "grad_norm": 1.280382940927538, |
| "learning_rate": 7.623728048500669e-06, |
| "loss": 0.2425, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.6492537313432836, |
| "grad_norm": 1.3553528855964356, |
| "learning_rate": 7.617488505228023e-06, |
| "loss": 0.2442, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.6501865671641791, |
| "grad_norm": 1.2788028409563263, |
| "learning_rate": 7.611243341973839e-06, |
| "loss": 0.2052, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.6511194029850746, |
| "grad_norm": 1.1777433569265974, |
| "learning_rate": 7.6049925721470455e-06, |
| "loss": 0.1718, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.6520522388059702, |
| "grad_norm": 1.3055979038996244, |
| "learning_rate": 7.598736209168595e-06, |
| "loss": 0.2287, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.6529850746268657, |
| "grad_norm": 1.274827166623728, |
| "learning_rate": 7.592474266471464e-06, |
| "loss": 0.2234, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6539179104477612, |
| "grad_norm": 1.260025562760807, |
| "learning_rate": 7.5862067575006e-06, |
| "loss": 0.1654, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.6548507462686567, |
| "grad_norm": 1.2899093555470775, |
| "learning_rate": 7.579933695712905e-06, |
| "loss": 0.2328, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.6557835820895522, |
| "grad_norm": 1.3355248187559228, |
| "learning_rate": 7.573655094577204e-06, |
| "loss": 0.2558, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.6567164179104478, |
| "grad_norm": 1.2496645745418022, |
| "learning_rate": 7.56737096757421e-06, |
| "loss": 0.1839, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.6576492537313433, |
| "grad_norm": 1.3061822327647394, |
| "learning_rate": 7.56108132819651e-06, |
| "loss": 0.2056, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.6585820895522388, |
| "grad_norm": 1.361324005957055, |
| "learning_rate": 7.5547861899485175e-06, |
| "loss": 0.2374, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.6595149253731343, |
| "grad_norm": 1.329252555227055, |
| "learning_rate": 7.5484855663464595e-06, |
| "loss": 0.237, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.6604477611940298, |
| "grad_norm": 1.2593760566247931, |
| "learning_rate": 7.542179470918336e-06, |
| "loss": 0.1985, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.6613805970149254, |
| "grad_norm": 1.3178489992256595, |
| "learning_rate": 7.535867917203897e-06, |
| "loss": 0.2046, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.6623134328358209, |
| "grad_norm": 1.3733355088588963, |
| "learning_rate": 7.529550918754609e-06, |
| "loss": 0.2579, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6632462686567164, |
| "grad_norm": 1.3287402740753909, |
| "learning_rate": 7.523228489133639e-06, |
| "loss": 0.2385, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.664179104477612, |
| "grad_norm": 1.336829144755182, |
| "learning_rate": 7.5169006419157985e-06, |
| "loss": 0.2519, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6651119402985075, |
| "grad_norm": 1.2822083018178134, |
| "learning_rate": 7.510567390687549e-06, |
| "loss": 0.2429, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.6660447761194029, |
| "grad_norm": 1.3227489712739566, |
| "learning_rate": 7.504228749046941e-06, |
| "loss": 0.2532, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.6669776119402985, |
| "grad_norm": 1.2158384388400283, |
| "learning_rate": 7.497884730603608e-06, |
| "loss": 0.1889, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.667910447761194, |
| "grad_norm": 1.2300611255968865, |
| "learning_rate": 7.491535348978719e-06, |
| "loss": 0.2031, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.6688432835820896, |
| "grad_norm": 1.3917027799740254, |
| "learning_rate": 7.485180617804968e-06, |
| "loss": 0.2087, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.6697761194029851, |
| "grad_norm": 1.2984111895743784, |
| "learning_rate": 7.478820550726528e-06, |
| "loss": 0.2404, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.6707089552238806, |
| "grad_norm": 1.3555569274235573, |
| "learning_rate": 7.472455161399031e-06, |
| "loss": 0.247, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.6716417910447762, |
| "grad_norm": 1.3816030387793872, |
| "learning_rate": 7.466084463489537e-06, |
| "loss": 0.2182, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6725746268656716, |
| "grad_norm": 1.2663395556813302, |
| "learning_rate": 7.459708470676504e-06, |
| "loss": 0.2221, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.6735074626865671, |
| "grad_norm": 1.3067189909365724, |
| "learning_rate": 7.453327196649756e-06, |
| "loss": 0.1994, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.6744402985074627, |
| "grad_norm": 1.3354233334197128, |
| "learning_rate": 7.446940655110457e-06, |
| "loss": 0.1899, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.6753731343283582, |
| "grad_norm": 1.3794949675774215, |
| "learning_rate": 7.440548859771086e-06, |
| "loss": 0.2159, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.6763059701492538, |
| "grad_norm": 1.3100708012693134, |
| "learning_rate": 7.434151824355396e-06, |
| "loss": 0.2233, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6772388059701493, |
| "grad_norm": 1.271288846350891, |
| "learning_rate": 7.4277495625983916e-06, |
| "loss": 0.2234, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.6781716417910447, |
| "grad_norm": 1.2975245411849095, |
| "learning_rate": 7.421342088246304e-06, |
| "loss": 0.2407, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.6791044776119403, |
| "grad_norm": 1.2435497512296882, |
| "learning_rate": 7.414929415056551e-06, |
| "loss": 0.1997, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.6800373134328358, |
| "grad_norm": 1.3107202575780283, |
| "learning_rate": 7.408511556797714e-06, |
| "loss": 0.2396, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.6809701492537313, |
| "grad_norm": 1.3303686258359244, |
| "learning_rate": 7.402088527249508e-06, |
| "loss": 0.2735, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6819029850746269, |
| "grad_norm": 1.2785847204338623, |
| "learning_rate": 7.395660340202752e-06, |
| "loss": 0.1935, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.6828358208955224, |
| "grad_norm": 1.2944858838779594, |
| "learning_rate": 7.389227009459335e-06, |
| "loss": 0.2036, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.683768656716418, |
| "grad_norm": 1.3189703714874468, |
| "learning_rate": 7.382788548832196e-06, |
| "loss": 0.2557, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.6847014925373134, |
| "grad_norm": 1.2868391513140633, |
| "learning_rate": 7.3763449721452815e-06, |
| "loss": 0.1901, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.6856343283582089, |
| "grad_norm": 1.357808247604318, |
| "learning_rate": 7.369896293233531e-06, |
| "loss": 0.2247, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6865671641791045, |
| "grad_norm": 1.3124644418425873, |
| "learning_rate": 7.363442525942827e-06, |
| "loss": 0.1975, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 1.2731450065419383, |
| "learning_rate": 7.3569836841299905e-06, |
| "loss": 0.1819, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.6884328358208955, |
| "grad_norm": 1.3480325337789343, |
| "learning_rate": 7.350519781662726e-06, |
| "loss": 0.2502, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.6893656716417911, |
| "grad_norm": 1.4153540585327467, |
| "learning_rate": 7.3440508324196126e-06, |
| "loss": 0.2524, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.6902985074626866, |
| "grad_norm": 1.3182119358350495, |
| "learning_rate": 7.3375768502900626e-06, |
| "loss": 0.2217, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.691231343283582, |
| "grad_norm": 1.32600345807212, |
| "learning_rate": 7.331097849174292e-06, |
| "loss": 0.2132, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.6921641791044776, |
| "grad_norm": 1.2238434834138427, |
| "learning_rate": 7.3246138429832945e-06, |
| "loss": 0.2025, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.6930970149253731, |
| "grad_norm": 1.280187761697935, |
| "learning_rate": 7.3181248456388124e-06, |
| "loss": 0.1997, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.6940298507462687, |
| "grad_norm": 1.2115465450484972, |
| "learning_rate": 7.311630871073301e-06, |
| "loss": 0.1858, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.6949626865671642, |
| "grad_norm": 1.3194935216161052, |
| "learning_rate": 7.305131933229902e-06, |
| "loss": 0.2629, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6958955223880597, |
| "grad_norm": 1.3474367087215364, |
| "learning_rate": 7.298628046062417e-06, |
| "loss": 0.2106, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.6968283582089553, |
| "grad_norm": 1.2772038481835335, |
| "learning_rate": 7.292119223535273e-06, |
| "loss": 0.1831, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.6977611940298507, |
| "grad_norm": 1.3913533127594813, |
| "learning_rate": 7.2856054796234944e-06, |
| "loss": 0.2819, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.6986940298507462, |
| "grad_norm": 1.2343619133538144, |
| "learning_rate": 7.279086828312666e-06, |
| "loss": 0.195, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.6996268656716418, |
| "grad_norm": 1.1736678408101116, |
| "learning_rate": 7.272563283598918e-06, |
| "loss": 0.1897, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7005597014925373, |
| "grad_norm": 1.2037655185681564, |
| "learning_rate": 7.266034859488883e-06, |
| "loss": 0.1949, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.7014925373134329, |
| "grad_norm": 1.3734571083959768, |
| "learning_rate": 7.25950156999967e-06, |
| "loss": 0.2412, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.7024253731343284, |
| "grad_norm": 1.255827542301762, |
| "learning_rate": 7.252963429158835e-06, |
| "loss": 0.1766, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.7033582089552238, |
| "grad_norm": 1.3750180737088755, |
| "learning_rate": 7.246420451004352e-06, |
| "loss": 0.2157, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.7042910447761194, |
| "grad_norm": 1.3432725001900598, |
| "learning_rate": 7.239872649584574e-06, |
| "loss": 0.222, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.7052238805970149, |
| "grad_norm": 1.3717844129040624, |
| "learning_rate": 7.23332003895822e-06, |
| "loss": 0.2127, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.7061567164179104, |
| "grad_norm": 1.2185242804017098, |
| "learning_rate": 7.226762633194331e-06, |
| "loss": 0.172, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.707089552238806, |
| "grad_norm": 1.2951740775174005, |
| "learning_rate": 7.220200446372239e-06, |
| "loss": 0.237, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.7080223880597015, |
| "grad_norm": 1.4291494271201155, |
| "learning_rate": 7.2136334925815455e-06, |
| "loss": 0.303, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.7089552238805971, |
| "grad_norm": 1.2767443600841293, |
| "learning_rate": 7.207061785922089e-06, |
| "loss": 0.2645, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7098880597014925, |
| "grad_norm": 1.2558172096850615, |
| "learning_rate": 7.20048534050391e-06, |
| "loss": 0.1923, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.710820895522388, |
| "grad_norm": 1.2682779571350995, |
| "learning_rate": 7.193904170447223e-06, |
| "loss": 0.1779, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.7117537313432836, |
| "grad_norm": 1.4248910177875456, |
| "learning_rate": 7.187318289882387e-06, |
| "loss": 0.2371, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.7126865671641791, |
| "grad_norm": 1.2194662674040737, |
| "learning_rate": 7.1807277129498774e-06, |
| "loss": 0.1942, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.7136194029850746, |
| "grad_norm": 1.24249804380326, |
| "learning_rate": 7.17413245380025e-06, |
| "loss": 0.2263, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.7145522388059702, |
| "grad_norm": 1.3870816837211544, |
| "learning_rate": 7.167532526594116e-06, |
| "loss": 0.2344, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.7154850746268657, |
| "grad_norm": 1.3983268621959495, |
| "learning_rate": 7.160927945502109e-06, |
| "loss": 0.2187, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.7164179104477612, |
| "grad_norm": 1.2771984166842354, |
| "learning_rate": 7.1543187247048525e-06, |
| "loss": 0.2029, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.7173507462686567, |
| "grad_norm": 1.3874043953579143, |
| "learning_rate": 7.147704878392935e-06, |
| "loss": 0.2774, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.7182835820895522, |
| "grad_norm": 1.1553166176292557, |
| "learning_rate": 7.141086420766875e-06, |
| "loss": 0.1844, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7192164179104478, |
| "grad_norm": 1.4378856847783543, |
| "learning_rate": 7.134463366037091e-06, |
| "loss": 0.2848, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.7201492537313433, |
| "grad_norm": 1.2503624007594234, |
| "learning_rate": 7.1278357284238745e-06, |
| "loss": 0.1789, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.7210820895522388, |
| "grad_norm": 1.2534733748456004, |
| "learning_rate": 7.121203522157354e-06, |
| "loss": 0.1838, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.7220149253731343, |
| "grad_norm": 1.2915708752816295, |
| "learning_rate": 7.114566761477468e-06, |
| "loss": 0.249, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.7229477611940298, |
| "grad_norm": 1.279147688945239, |
| "learning_rate": 7.107925460633936e-06, |
| "loss": 0.2184, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.7238805970149254, |
| "grad_norm": 1.310867616394776, |
| "learning_rate": 7.101279633886222e-06, |
| "loss": 0.2431, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.7248134328358209, |
| "grad_norm": 1.3011380699356498, |
| "learning_rate": 7.094629295503513e-06, |
| "loss": 0.2408, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.7257462686567164, |
| "grad_norm": 1.3435867669486072, |
| "learning_rate": 7.087974459764675e-06, |
| "loss": 0.2097, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.726679104477612, |
| "grad_norm": 1.2481127962379166, |
| "learning_rate": 7.081315140958236e-06, |
| "loss": 0.1873, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.7276119402985075, |
| "grad_norm": 1.2347418383894004, |
| "learning_rate": 7.074651353382349e-06, |
| "loss": 0.1863, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7285447761194029, |
| "grad_norm": 1.1862014967353693, |
| "learning_rate": 7.067983111344762e-06, |
| "loss": 0.1799, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.7294776119402985, |
| "grad_norm": 1.288195756530044, |
| "learning_rate": 7.061310429162782e-06, |
| "loss": 0.2364, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.730410447761194, |
| "grad_norm": 1.3419414029305805, |
| "learning_rate": 7.054633321163258e-06, |
| "loss": 0.2427, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.7313432835820896, |
| "grad_norm": 1.264073939413233, |
| "learning_rate": 7.047951801682533e-06, |
| "loss": 0.1923, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.7322761194029851, |
| "grad_norm": 1.2705408941546572, |
| "learning_rate": 7.041265885066428e-06, |
| "loss": 0.2326, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.7332089552238806, |
| "grad_norm": 1.373108720756827, |
| "learning_rate": 7.034575585670205e-06, |
| "loss": 0.2455, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.7341417910447762, |
| "grad_norm": 1.3545548371493887, |
| "learning_rate": 7.027880917858529e-06, |
| "loss": 0.261, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.7350746268656716, |
| "grad_norm": 1.366716670408608, |
| "learning_rate": 7.021181896005456e-06, |
| "loss": 0.2319, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.7360074626865671, |
| "grad_norm": 1.2549936147832534, |
| "learning_rate": 7.014478534494378e-06, |
| "loss": 0.2337, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.7369402985074627, |
| "grad_norm": 1.336996910856189, |
| "learning_rate": 7.007770847718014e-06, |
| "loss": 0.1859, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7378731343283582, |
| "grad_norm": 1.2310220030547625, |
| "learning_rate": 7.001058850078366e-06, |
| "loss": 0.1885, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.7388059701492538, |
| "grad_norm": 1.2740115144273731, |
| "learning_rate": 6.994342555986692e-06, |
| "loss": 0.1969, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.7397388059701493, |
| "grad_norm": 1.1889466328158036, |
| "learning_rate": 6.987621979863475e-06, |
| "loss": 0.1618, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.7406716417910447, |
| "grad_norm": 1.2248802281858773, |
| "learning_rate": 6.9808971361383935e-06, |
| "loss": 0.2103, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.7416044776119403, |
| "grad_norm": 1.4300940894297904, |
| "learning_rate": 6.9741680392502845e-06, |
| "loss": 0.2702, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.7425373134328358, |
| "grad_norm": 1.2116830325525012, |
| "learning_rate": 6.967434703647123e-06, |
| "loss": 0.1764, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.7434701492537313, |
| "grad_norm": 1.3112261692065397, |
| "learning_rate": 6.960697143785979e-06, |
| "loss": 0.2494, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.7444029850746269, |
| "grad_norm": 1.4169735590782278, |
| "learning_rate": 6.953955374132996e-06, |
| "loss": 0.2954, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.7453358208955224, |
| "grad_norm": 1.2798391276899126, |
| "learning_rate": 6.947209409163357e-06, |
| "loss": 0.2087, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.746268656716418, |
| "grad_norm": 1.3846133582561395, |
| "learning_rate": 6.9404592633612486e-06, |
| "loss": 0.243, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7472014925373134, |
| "grad_norm": 1.3262645304233895, |
| "learning_rate": 6.93370495121984e-06, |
| "loss": 0.2253, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.7481343283582089, |
| "grad_norm": 1.246757162976938, |
| "learning_rate": 6.926946487241239e-06, |
| "loss": 0.2214, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.7490671641791045, |
| "grad_norm": 1.350005845462039, |
| "learning_rate": 6.920183885936473e-06, |
| "loss": 0.2375, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.2209331776937584, |
| "learning_rate": 6.913417161825449e-06, |
| "loss": 0.2199, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.7509328358208955, |
| "grad_norm": 1.2267624877565906, |
| "learning_rate": 6.90664632943693e-06, |
| "loss": 0.1936, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.7518656716417911, |
| "grad_norm": 1.172302982511542, |
| "learning_rate": 6.899871403308498e-06, |
| "loss": 0.211, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.7527985074626866, |
| "grad_norm": 1.2676625196680436, |
| "learning_rate": 6.893092397986523e-06, |
| "loss": 0.2283, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.753731343283582, |
| "grad_norm": 1.3374734891623064, |
| "learning_rate": 6.886309328026135e-06, |
| "loss": 0.225, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.7546641791044776, |
| "grad_norm": 1.3195162787198706, |
| "learning_rate": 6.879522207991191e-06, |
| "loss": 0.249, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.7555970149253731, |
| "grad_norm": 1.2275779555398447, |
| "learning_rate": 6.872731052454243e-06, |
| "loss": 0.2211, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7565298507462687, |
| "grad_norm": 1.289029763845464, |
| "learning_rate": 6.865935875996509e-06, |
| "loss": 0.2281, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.7574626865671642, |
| "grad_norm": 1.3541277141371464, |
| "learning_rate": 6.85913669320784e-06, |
| "loss": 0.1995, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.7583955223880597, |
| "grad_norm": 1.321909779991229, |
| "learning_rate": 6.852333518686688e-06, |
| "loss": 0.2344, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.7593283582089553, |
| "grad_norm": 1.224871553289291, |
| "learning_rate": 6.845526367040076e-06, |
| "loss": 0.1985, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.7602611940298507, |
| "grad_norm": 1.3775854589181042, |
| "learning_rate": 6.838715252883567e-06, |
| "loss": 0.2605, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.7611940298507462, |
| "grad_norm": 1.4821817128027184, |
| "learning_rate": 6.831900190841232e-06, |
| "loss": 0.2936, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.7621268656716418, |
| "grad_norm": 1.2397684942862677, |
| "learning_rate": 6.825081195545615e-06, |
| "loss": 0.2292, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.7630597014925373, |
| "grad_norm": 1.345221673931461, |
| "learning_rate": 6.818258281637709e-06, |
| "loss": 0.2335, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.7639925373134329, |
| "grad_norm": 1.426682576710431, |
| "learning_rate": 6.811431463766922e-06, |
| "loss": 0.2553, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.7649253731343284, |
| "grad_norm": 1.2602342447821695, |
| "learning_rate": 6.804600756591037e-06, |
| "loss": 0.1955, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7658582089552238, |
| "grad_norm": 1.3593906222216048, |
| "learning_rate": 6.797766174776197e-06, |
| "loss": 0.2566, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.7667910447761194, |
| "grad_norm": 1.1752129115708971, |
| "learning_rate": 6.790927732996855e-06, |
| "loss": 0.2007, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.7677238805970149, |
| "grad_norm": 1.2985625823041023, |
| "learning_rate": 6.78408544593576e-06, |
| "loss": 0.2312, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.7686567164179104, |
| "grad_norm": 1.3215582389199496, |
| "learning_rate": 6.777239328283909e-06, |
| "loss": 0.2264, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.769589552238806, |
| "grad_norm": 1.2861815312659521, |
| "learning_rate": 6.770389394740531e-06, |
| "loss": 0.224, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.7705223880597015, |
| "grad_norm": 1.2062615319767154, |
| "learning_rate": 6.763535660013044e-06, |
| "loss": 0.2001, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.7714552238805971, |
| "grad_norm": 1.192048831923032, |
| "learning_rate": 6.756678138817029e-06, |
| "loss": 0.1804, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.7723880597014925, |
| "grad_norm": 1.3169223961426715, |
| "learning_rate": 6.749816845876196e-06, |
| "loss": 0.2107, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.773320895522388, |
| "grad_norm": 1.2199842237106207, |
| "learning_rate": 6.742951795922355e-06, |
| "loss": 0.1968, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.7742537313432836, |
| "grad_norm": 1.3463678809767026, |
| "learning_rate": 6.736083003695378e-06, |
| "loss": 0.2309, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7751865671641791, |
| "grad_norm": 1.38996426766889, |
| "learning_rate": 6.729210483943176e-06, |
| "loss": 0.2584, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.7761194029850746, |
| "grad_norm": 1.228356172288229, |
| "learning_rate": 6.722334251421665e-06, |
| "loss": 0.1962, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.7770522388059702, |
| "grad_norm": 1.3002701079919168, |
| "learning_rate": 6.715454320894728e-06, |
| "loss": 0.2167, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.7779850746268657, |
| "grad_norm": 1.2962121956490298, |
| "learning_rate": 6.708570707134192e-06, |
| "loss": 0.2013, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.7789179104477612, |
| "grad_norm": 1.29750596117597, |
| "learning_rate": 6.701683424919789e-06, |
| "loss": 0.2245, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.7798507462686567, |
| "grad_norm": 1.3104874159288715, |
| "learning_rate": 6.6947924890391295e-06, |
| "loss": 0.2265, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.7807835820895522, |
| "grad_norm": 1.33957381485937, |
| "learning_rate": 6.687897914287667e-06, |
| "loss": 0.2432, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.7817164179104478, |
| "grad_norm": 1.2654860183364611, |
| "learning_rate": 6.680999715468669e-06, |
| "loss": 0.2329, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.7826492537313433, |
| "grad_norm": 1.3809743084115988, |
| "learning_rate": 6.674097907393186e-06, |
| "loss": 0.2678, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.7835820895522388, |
| "grad_norm": 1.3419028339922356, |
| "learning_rate": 6.667192504880016e-06, |
| "loss": 0.2387, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7845149253731343, |
| "grad_norm": 1.270127692627935, |
| "learning_rate": 6.660283522755674e-06, |
| "loss": 0.231, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.7854477611940298, |
| "grad_norm": 1.323400044376785, |
| "learning_rate": 6.653370975854362e-06, |
| "loss": 0.2551, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.7863805970149254, |
| "grad_norm": 1.306885444890462, |
| "learning_rate": 6.646454879017934e-06, |
| "loss": 0.2402, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.7873134328358209, |
| "grad_norm": 1.279516309492333, |
| "learning_rate": 6.639535247095868e-06, |
| "loss": 0.2151, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.7882462686567164, |
| "grad_norm": 1.2841870522741121, |
| "learning_rate": 6.632612094945234e-06, |
| "loss": 0.2191, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.789179104477612, |
| "grad_norm": 1.384854633009525, |
| "learning_rate": 6.625685437430656e-06, |
| "loss": 0.2904, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.7901119402985075, |
| "grad_norm": 1.325501174260801, |
| "learning_rate": 6.618755289424285e-06, |
| "loss": 0.2128, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.7910447761194029, |
| "grad_norm": 1.2696793987185822, |
| "learning_rate": 6.611821665805769e-06, |
| "loss": 0.2056, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.7919776119402985, |
| "grad_norm": 1.3494734406081708, |
| "learning_rate": 6.604884581462219e-06, |
| "loss": 0.2479, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.792910447761194, |
| "grad_norm": 1.3238649985819988, |
| "learning_rate": 6.597944051288169e-06, |
| "loss": 0.2196, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7938432835820896, |
| "grad_norm": 1.2737431492446287, |
| "learning_rate": 6.5910000901855606e-06, |
| "loss": 0.185, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.7947761194029851, |
| "grad_norm": 1.261459403427656, |
| "learning_rate": 6.5840527130637e-06, |
| "loss": 0.1649, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.7957089552238806, |
| "grad_norm": 1.3334323935868955, |
| "learning_rate": 6.577101934839222e-06, |
| "loss": 0.2042, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.7966417910447762, |
| "grad_norm": 1.326118422926226, |
| "learning_rate": 6.570147770436071e-06, |
| "loss": 0.2118, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.7975746268656716, |
| "grad_norm": 1.2735300323417673, |
| "learning_rate": 6.56319023478546e-06, |
| "loss": 0.2433, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7985074626865671, |
| "grad_norm": 1.1516244040283352, |
| "learning_rate": 6.556229342825835e-06, |
| "loss": 0.196, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.7994402985074627, |
| "grad_norm": 1.2798641246720104, |
| "learning_rate": 6.549265109502856e-06, |
| "loss": 0.1941, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.8003731343283582, |
| "grad_norm": 1.2451080478624204, |
| "learning_rate": 6.542297549769353e-06, |
| "loss": 0.2012, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.8013059701492538, |
| "grad_norm": 1.2256859001807432, |
| "learning_rate": 6.5353266785852976e-06, |
| "loss": 0.21, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.8022388059701493, |
| "grad_norm": 1.2375250508685598, |
| "learning_rate": 6.528352510917774e-06, |
| "loss": 0.202, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8031716417910447, |
| "grad_norm": 1.267433173640717, |
| "learning_rate": 6.521375061740945e-06, |
| "loss": 0.2315, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.8041044776119403, |
| "grad_norm": 1.2414436142743404, |
| "learning_rate": 6.514394346036013e-06, |
| "loss": 0.2269, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.8050373134328358, |
| "grad_norm": 1.3305170506619883, |
| "learning_rate": 6.507410378791198e-06, |
| "loss": 0.2104, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.8059701492537313, |
| "grad_norm": 1.26434323921976, |
| "learning_rate": 6.500423175001705e-06, |
| "loss": 0.2242, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.8069029850746269, |
| "grad_norm": 1.2803166868672846, |
| "learning_rate": 6.493432749669682e-06, |
| "loss": 0.1832, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.8078358208955224, |
| "grad_norm": 1.3081597715295281, |
| "learning_rate": 6.486439117804195e-06, |
| "loss": 0.184, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.808768656716418, |
| "grad_norm": 1.2160280841149596, |
| "learning_rate": 6.479442294421199e-06, |
| "loss": 0.1772, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.8097014925373134, |
| "grad_norm": 1.187633961625421, |
| "learning_rate": 6.472442294543497e-06, |
| "loss": 0.1885, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.8106343283582089, |
| "grad_norm": 1.4011122461263708, |
| "learning_rate": 6.465439133200715e-06, |
| "loss": 0.2081, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.8115671641791045, |
| "grad_norm": 1.4590773110743023, |
| "learning_rate": 6.458432825429264e-06, |
| "loss": 0.2345, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 1.3977394231703384, |
| "learning_rate": 6.451423386272312e-06, |
| "loss": 0.2305, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.8134328358208955, |
| "grad_norm": 1.3240392180046858, |
| "learning_rate": 6.444410830779753e-06, |
| "loss": 0.2039, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.8143656716417911, |
| "grad_norm": 1.3010915438246045, |
| "learning_rate": 6.437395174008169e-06, |
| "loss": 0.204, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.8152985074626866, |
| "grad_norm": 1.1697900871038651, |
| "learning_rate": 6.4303764310208015e-06, |
| "loss": 0.1722, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.816231343283582, |
| "grad_norm": 1.2738694790905063, |
| "learning_rate": 6.4233546168875185e-06, |
| "loss": 0.242, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.8171641791044776, |
| "grad_norm": 1.298362565686738, |
| "learning_rate": 6.4163297466847795e-06, |
| "loss": 0.2254, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.8180970149253731, |
| "grad_norm": 1.3150297463061384, |
| "learning_rate": 6.409301835495611e-06, |
| "loss": 0.247, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.8190298507462687, |
| "grad_norm": 1.28947125272434, |
| "learning_rate": 6.402270898409565e-06, |
| "loss": 0.2055, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.8199626865671642, |
| "grad_norm": 1.1479792741170483, |
| "learning_rate": 6.395236950522691e-06, |
| "loss": 0.1885, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.8208955223880597, |
| "grad_norm": 1.2901505058336102, |
| "learning_rate": 6.388200006937503e-06, |
| "loss": 0.2275, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8218283582089553, |
| "grad_norm": 1.3612667025324792, |
| "learning_rate": 6.381160082762949e-06, |
| "loss": 0.2483, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.8227611940298507, |
| "grad_norm": 1.3057407524788593, |
| "learning_rate": 6.374117193114373e-06, |
| "loss": 0.2208, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.8236940298507462, |
| "grad_norm": 1.2683413858234487, |
| "learning_rate": 6.3670713531134865e-06, |
| "loss": 0.2391, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.8246268656716418, |
| "grad_norm": 1.2679383991747715, |
| "learning_rate": 6.3600225778883395e-06, |
| "loss": 0.2304, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.8255597014925373, |
| "grad_norm": 1.291847617097924, |
| "learning_rate": 6.352970882573283e-06, |
| "loss": 0.2398, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.8264925373134329, |
| "grad_norm": 1.2834163636252958, |
| "learning_rate": 6.3459162823089325e-06, |
| "loss": 0.2115, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.8274253731343284, |
| "grad_norm": 1.2881959492626784, |
| "learning_rate": 6.338858792242147e-06, |
| "loss": 0.2399, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.8283582089552238, |
| "grad_norm": 1.220315589698992, |
| "learning_rate": 6.33179842752599e-06, |
| "loss": 0.1731, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.8292910447761194, |
| "grad_norm": 1.2527952925756376, |
| "learning_rate": 6.324735203319691e-06, |
| "loss": 0.2183, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.8302238805970149, |
| "grad_norm": 1.3134848726122574, |
| "learning_rate": 6.317669134788625e-06, |
| "loss": 0.2124, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8311567164179104, |
| "grad_norm": 1.3821896713692334, |
| "learning_rate": 6.3106002371042716e-06, |
| "loss": 0.2542, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.832089552238806, |
| "grad_norm": 1.3281677908441818, |
| "learning_rate": 6.303528525444185e-06, |
| "loss": 0.206, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.8330223880597015, |
| "grad_norm": 1.2195697801878131, |
| "learning_rate": 6.296454014991962e-06, |
| "loss": 0.209, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.8339552238805971, |
| "grad_norm": 1.281174429543326, |
| "learning_rate": 6.289376720937208e-06, |
| "loss": 0.2063, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.8348880597014925, |
| "grad_norm": 1.2628179124634378, |
| "learning_rate": 6.282296658475508e-06, |
| "loss": 0.2221, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.835820895522388, |
| "grad_norm": 1.3001412968878898, |
| "learning_rate": 6.275213842808383e-06, |
| "loss": 0.2347, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.8367537313432836, |
| "grad_norm": 1.332802953329869, |
| "learning_rate": 6.268128289143274e-06, |
| "loss": 0.257, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.8376865671641791, |
| "grad_norm": 1.4211822881217815, |
| "learning_rate": 6.261040012693498e-06, |
| "loss": 0.2491, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.8386194029850746, |
| "grad_norm": 1.1835916153283808, |
| "learning_rate": 6.253949028678214e-06, |
| "loss": 0.1787, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.8395522388059702, |
| "grad_norm": 1.30040911243428, |
| "learning_rate": 6.246855352322403e-06, |
| "loss": 0.2218, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8404850746268657, |
| "grad_norm": 1.2751227090200317, |
| "learning_rate": 6.2397589988568175e-06, |
| "loss": 0.1832, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.8414179104477612, |
| "grad_norm": 1.336123053932801, |
| "learning_rate": 6.232659983517964e-06, |
| "loss": 0.2538, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.8423507462686567, |
| "grad_norm": 1.2973986362379657, |
| "learning_rate": 6.22555832154806e-06, |
| "loss": 0.1842, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.8432835820895522, |
| "grad_norm": 1.293187865349667, |
| "learning_rate": 6.21845402819501e-06, |
| "loss": 0.2144, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.8442164179104478, |
| "grad_norm": 1.4216499384977432, |
| "learning_rate": 6.211347118712365e-06, |
| "loss": 0.2583, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.8451492537313433, |
| "grad_norm": 1.289953059628164, |
| "learning_rate": 6.204237608359296e-06, |
| "loss": 0.2042, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.8460820895522388, |
| "grad_norm": 1.2412676307218935, |
| "learning_rate": 6.197125512400555e-06, |
| "loss": 0.195, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.8470149253731343, |
| "grad_norm": 1.216660198794667, |
| "learning_rate": 6.190010846106446e-06, |
| "loss": 0.2033, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.8479477611940298, |
| "grad_norm": 1.346208844228071, |
| "learning_rate": 6.182893624752796e-06, |
| "loss": 0.2585, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.8488805970149254, |
| "grad_norm": 1.2971232768563918, |
| "learning_rate": 6.1757738636209115e-06, |
| "loss": 0.2304, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8498134328358209, |
| "grad_norm": 1.2139712586213292, |
| "learning_rate": 6.168651577997558e-06, |
| "loss": 0.1774, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.8507462686567164, |
| "grad_norm": 1.312466911856846, |
| "learning_rate": 6.161526783174917e-06, |
| "loss": 0.2283, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.851679104477612, |
| "grad_norm": 1.304590817855252, |
| "learning_rate": 6.154399494450559e-06, |
| "loss": 0.2139, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.8526119402985075, |
| "grad_norm": 1.2634075009111383, |
| "learning_rate": 6.14726972712741e-06, |
| "loss": 0.2252, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.8535447761194029, |
| "grad_norm": 1.2470179967350044, |
| "learning_rate": 6.140137496513718e-06, |
| "loss": 0.1914, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.8544776119402985, |
| "grad_norm": 1.2646976296163095, |
| "learning_rate": 6.1330028179230185e-06, |
| "loss": 0.2066, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.855410447761194, |
| "grad_norm": 1.1803232608347158, |
| "learning_rate": 6.125865706674103e-06, |
| "loss": 0.1708, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.8563432835820896, |
| "grad_norm": 1.2689825513910395, |
| "learning_rate": 6.1187261780909835e-06, |
| "loss": 0.2114, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.8572761194029851, |
| "grad_norm": 1.2346460755363526, |
| "learning_rate": 6.111584247502871e-06, |
| "loss": 0.2333, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.8582089552238806, |
| "grad_norm": 1.260220733261907, |
| "learning_rate": 6.104439930244125e-06, |
| "loss": 0.18, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8591417910447762, |
| "grad_norm": 1.285111466705012, |
| "learning_rate": 6.0972932416542326e-06, |
| "loss": 0.2324, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.8600746268656716, |
| "grad_norm": 1.2460248488612586, |
| "learning_rate": 6.090144197077774e-06, |
| "loss": 0.2397, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.8610074626865671, |
| "grad_norm": 1.3234609423589674, |
| "learning_rate": 6.082992811864385e-06, |
| "loss": 0.2126, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.8619402985074627, |
| "grad_norm": 1.2509638402578742, |
| "learning_rate": 6.075839101368728e-06, |
| "loss": 0.2047, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.8628731343283582, |
| "grad_norm": 1.360932430546119, |
| "learning_rate": 6.068683080950458e-06, |
| "loss": 0.2705, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.8638059701492538, |
| "grad_norm": 1.224176428108218, |
| "learning_rate": 6.061524765974191e-06, |
| "loss": 0.2087, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.8647388059701493, |
| "grad_norm": 1.2150273756251704, |
| "learning_rate": 6.054364171809467e-06, |
| "loss": 0.1813, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.8656716417910447, |
| "grad_norm": 1.3381501042482085, |
| "learning_rate": 6.047201313830724e-06, |
| "loss": 0.2353, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.8666044776119403, |
| "grad_norm": 1.4149342164499332, |
| "learning_rate": 6.040036207417252e-06, |
| "loss": 0.2405, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.8675373134328358, |
| "grad_norm": 1.2226530757677798, |
| "learning_rate": 6.032868867953181e-06, |
| "loss": 0.2066, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8684701492537313, |
| "grad_norm": 1.3101998121678258, |
| "learning_rate": 6.025699310827423e-06, |
| "loss": 0.2337, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.8694029850746269, |
| "grad_norm": 1.257761679103726, |
| "learning_rate": 6.01852755143366e-06, |
| "loss": 0.1732, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.8703358208955224, |
| "grad_norm": 1.2844011334095315, |
| "learning_rate": 6.011353605170303e-06, |
| "loss": 0.2174, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.871268656716418, |
| "grad_norm": 1.2817045648391725, |
| "learning_rate": 6.004177487440448e-06, |
| "loss": 0.2065, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.8722014925373134, |
| "grad_norm": 1.253138318239424, |
| "learning_rate": 5.996999213651866e-06, |
| "loss": 0.2031, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.8731343283582089, |
| "grad_norm": 1.3346763040111453, |
| "learning_rate": 5.98981879921695e-06, |
| "loss": 0.2113, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.8740671641791045, |
| "grad_norm": 1.23116222222353, |
| "learning_rate": 5.982636259552691e-06, |
| "loss": 0.2099, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 1.204120206628589, |
| "learning_rate": 5.975451610080643e-06, |
| "loss": 0.1834, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.8759328358208955, |
| "grad_norm": 1.2206632469272407, |
| "learning_rate": 5.968264866226888e-06, |
| "loss": 0.201, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.8768656716417911, |
| "grad_norm": 1.374354159020404, |
| "learning_rate": 5.961076043422011e-06, |
| "loss": 0.2338, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.8777985074626866, |
| "grad_norm": 1.3883837341962375, |
| "learning_rate": 5.953885157101054e-06, |
| "loss": 0.287, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.878731343283582, |
| "grad_norm": 1.3503256511984503, |
| "learning_rate": 5.9466922227034915e-06, |
| "loss": 0.224, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.8796641791044776, |
| "grad_norm": 1.4151723372695262, |
| "learning_rate": 5.939497255673197e-06, |
| "loss": 0.258, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.8805970149253731, |
| "grad_norm": 1.3110628245283942, |
| "learning_rate": 5.932300271458406e-06, |
| "loss": 0.2009, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.8815298507462687, |
| "grad_norm": 1.2922411186960756, |
| "learning_rate": 5.925101285511687e-06, |
| "loss": 0.1875, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.8824626865671642, |
| "grad_norm": 1.2244793038262138, |
| "learning_rate": 5.9179003132899075e-06, |
| "loss": 0.214, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.8833955223880597, |
| "grad_norm": 1.276674565595725, |
| "learning_rate": 5.910697370254195e-06, |
| "loss": 0.2237, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.8843283582089553, |
| "grad_norm": 1.2332094258137027, |
| "learning_rate": 5.90349247186991e-06, |
| "loss": 0.1976, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.8852611940298507, |
| "grad_norm": 1.1889954764792152, |
| "learning_rate": 5.8962856336066175e-06, |
| "loss": 0.1948, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.8861940298507462, |
| "grad_norm": 1.3664765584935996, |
| "learning_rate": 5.889076870938041e-06, |
| "loss": 0.2504, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8871268656716418, |
| "grad_norm": 1.3032321848741621, |
| "learning_rate": 5.881866199342035e-06, |
| "loss": 0.2608, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.8880597014925373, |
| "grad_norm": 1.3377092395995163, |
| "learning_rate": 5.874653634300555e-06, |
| "loss": 0.2043, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.8889925373134329, |
| "grad_norm": 1.3320395505868632, |
| "learning_rate": 5.867439191299629e-06, |
| "loss": 0.2566, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.8899253731343284, |
| "grad_norm": 1.3630820848349416, |
| "learning_rate": 5.860222885829302e-06, |
| "loss": 0.2373, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.8908582089552238, |
| "grad_norm": 1.3345807317360974, |
| "learning_rate": 5.853004733383631e-06, |
| "loss": 0.2438, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8917910447761194, |
| "grad_norm": 1.278615606825059, |
| "learning_rate": 5.845784749460632e-06, |
| "loss": 0.2104, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.8927238805970149, |
| "grad_norm": 1.2799506965670524, |
| "learning_rate": 5.838562949562257e-06, |
| "loss": 0.2288, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.8936567164179104, |
| "grad_norm": 1.2913790288284412, |
| "learning_rate": 5.831339349194352e-06, |
| "loss": 0.2114, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.894589552238806, |
| "grad_norm": 1.2194240810958283, |
| "learning_rate": 5.824113963866635e-06, |
| "loss": 0.1746, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.8955223880597015, |
| "grad_norm": 1.2703407850964368, |
| "learning_rate": 5.816886809092651e-06, |
| "loss": 0.2049, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8964552238805971, |
| "grad_norm": 1.3099986058972541, |
| "learning_rate": 5.809657900389749e-06, |
| "loss": 0.197, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.8973880597014925, |
| "grad_norm": 1.376627956546677, |
| "learning_rate": 5.802427253279042e-06, |
| "loss": 0.1987, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.898320895522388, |
| "grad_norm": 1.4401252624901832, |
| "learning_rate": 5.795194883285371e-06, |
| "loss": 0.2391, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.8992537313432836, |
| "grad_norm": 1.2451440182815314, |
| "learning_rate": 5.787960805937283e-06, |
| "loss": 0.2011, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.9001865671641791, |
| "grad_norm": 1.2173276185663062, |
| "learning_rate": 5.780725036766988e-06, |
| "loss": 0.2017, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.9011194029850746, |
| "grad_norm": 1.2605780909868989, |
| "learning_rate": 5.773487591310329e-06, |
| "loss": 0.2029, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.9020522388059702, |
| "grad_norm": 1.1729539366805513, |
| "learning_rate": 5.7662484851067435e-06, |
| "loss": 0.1764, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.9029850746268657, |
| "grad_norm": 1.2941189951902425, |
| "learning_rate": 5.759007733699245e-06, |
| "loss": 0.2276, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.9039179104477612, |
| "grad_norm": 1.4279132641665215, |
| "learning_rate": 5.751765352634369e-06, |
| "loss": 0.2741, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.9048507462686567, |
| "grad_norm": 1.1819532323786706, |
| "learning_rate": 5.7445213574621565e-06, |
| "loss": 0.1939, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9057835820895522, |
| "grad_norm": 1.2426120494149469, |
| "learning_rate": 5.73727576373611e-06, |
| "loss": 0.1947, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.9067164179104478, |
| "grad_norm": 1.337603891990572, |
| "learning_rate": 5.730028587013168e-06, |
| "loss": 0.2336, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.9076492537313433, |
| "grad_norm": 1.1832053184869655, |
| "learning_rate": 5.722779842853665e-06, |
| "loss": 0.2092, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.9085820895522388, |
| "grad_norm": 1.2401664664738221, |
| "learning_rate": 5.715529546821303e-06, |
| "loss": 0.1889, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.9095149253731343, |
| "grad_norm": 1.2853976947091192, |
| "learning_rate": 5.708277714483114e-06, |
| "loss": 0.244, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.9104477611940298, |
| "grad_norm": 1.0995739573370429, |
| "learning_rate": 5.701024361409431e-06, |
| "loss": 0.1633, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.9113805970149254, |
| "grad_norm": 1.3409416136518981, |
| "learning_rate": 5.693769503173847e-06, |
| "loss": 0.2369, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.9123134328358209, |
| "grad_norm": 1.2983356696907475, |
| "learning_rate": 5.6865131553531925e-06, |
| "loss": 0.2696, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.9132462686567164, |
| "grad_norm": 1.2323698861651475, |
| "learning_rate": 5.679255333527498e-06, |
| "loss": 0.1971, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.914179104477612, |
| "grad_norm": 1.314131614199844, |
| "learning_rate": 5.671996053279949e-06, |
| "loss": 0.2168, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9151119402985075, |
| "grad_norm": 1.2957854875866097, |
| "learning_rate": 5.664735330196871e-06, |
| "loss": 0.2177, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.9160447761194029, |
| "grad_norm": 1.3538437754250945, |
| "learning_rate": 5.657473179867686e-06, |
| "loss": 0.2457, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.9169776119402985, |
| "grad_norm": 1.2207256127091906, |
| "learning_rate": 5.6502096178848786e-06, |
| "loss": 0.1802, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.917910447761194, |
| "grad_norm": 1.228820602876557, |
| "learning_rate": 5.642944659843962e-06, |
| "loss": 0.2141, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.9188432835820896, |
| "grad_norm": 1.2793579747049237, |
| "learning_rate": 5.635678321343453e-06, |
| "loss": 0.2037, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.9197761194029851, |
| "grad_norm": 1.3437393655992524, |
| "learning_rate": 5.628410617984828e-06, |
| "loss": 0.2008, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.9207089552238806, |
| "grad_norm": 1.2489129719017027, |
| "learning_rate": 5.6211415653724965e-06, |
| "loss": 0.2046, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.9216417910447762, |
| "grad_norm": 1.2894068857021934, |
| "learning_rate": 5.613871179113761e-06, |
| "loss": 0.2137, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.9225746268656716, |
| "grad_norm": 1.319224823579118, |
| "learning_rate": 5.606599474818793e-06, |
| "loss": 0.2477, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.9235074626865671, |
| "grad_norm": 1.2386689277056102, |
| "learning_rate": 5.5993264681005875e-06, |
| "loss": 0.2158, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9244402985074627, |
| "grad_norm": 1.3325603148451477, |
| "learning_rate": 5.592052174574942e-06, |
| "loss": 0.2087, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.9253731343283582, |
| "grad_norm": 1.1456735516877268, |
| "learning_rate": 5.584776609860414e-06, |
| "loss": 0.1569, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.9263059701492538, |
| "grad_norm": 1.2619437102530076, |
| "learning_rate": 5.5774997895782875e-06, |
| "loss": 0.2053, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.9272388059701493, |
| "grad_norm": 1.2446625743320796, |
| "learning_rate": 5.570221729352549e-06, |
| "loss": 0.2323, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.9281716417910447, |
| "grad_norm": 1.3816218116328933, |
| "learning_rate": 5.562942444809842e-06, |
| "loss": 0.2549, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.9291044776119403, |
| "grad_norm": 1.3210090999184592, |
| "learning_rate": 5.555661951579443e-06, |
| "loss": 0.2193, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.9300373134328358, |
| "grad_norm": 1.3003885784496516, |
| "learning_rate": 5.5483802652932165e-06, |
| "loss": 0.2111, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.9309701492537313, |
| "grad_norm": 1.2050797563542268, |
| "learning_rate": 5.541097401585596e-06, |
| "loss": 0.1778, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.9319029850746269, |
| "grad_norm": 1.177432522858866, |
| "learning_rate": 5.53381337609354e-06, |
| "loss": 0.1784, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.9328358208955224, |
| "grad_norm": 1.1743424389077388, |
| "learning_rate": 5.5265282044565005e-06, |
| "loss": 0.1791, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9328358208955224, |
| "eval_loss": 0.21907268464565277, |
| "eval_runtime": 3.4377, |
| "eval_samples_per_second": 25.308, |
| "eval_steps_per_second": 6.4, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.933768656716418, |
| "grad_norm": 1.1635684511765956, |
| "learning_rate": 5.519241902316392e-06, |
| "loss": 0.1816, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.9347014925373134, |
| "grad_norm": 1.3832271905864557, |
| "learning_rate": 5.511954485317558e-06, |
| "loss": 0.2517, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.9356343283582089, |
| "grad_norm": 1.2598044090680305, |
| "learning_rate": 5.504665969106731e-06, |
| "loss": 0.1921, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.9365671641791045, |
| "grad_norm": 1.2368589807753354, |
| "learning_rate": 5.497376369333005e-06, |
| "loss": 0.1973, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 1.3209236079778428, |
| "learning_rate": 5.490085701647805e-06, |
| "loss": 0.1921, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.9384328358208955, |
| "grad_norm": 1.253108089681956, |
| "learning_rate": 5.482793981704842e-06, |
| "loss": 0.1515, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.9393656716417911, |
| "grad_norm": 1.278153011620517, |
| "learning_rate": 5.475501225160092e-06, |
| "loss": 0.2183, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.9402985074626866, |
| "grad_norm": 1.3035057923325821, |
| "learning_rate": 5.468207447671755e-06, |
| "loss": 0.2398, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.941231343283582, |
| "grad_norm": 1.2568883295893882, |
| "learning_rate": 5.4609126649002206e-06, |
| "loss": 0.197, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.9421641791044776, |
| "grad_norm": 1.3164928013180202, |
| "learning_rate": 5.45361689250804e-06, |
| "loss": 0.2031, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9430970149253731, |
| "grad_norm": 1.2609431407237786, |
| "learning_rate": 5.446320146159888e-06, |
| "loss": 0.2138, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.9440298507462687, |
| "grad_norm": 1.287828094306322, |
| "learning_rate": 5.43902244152253e-06, |
| "loss": 0.2007, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.9449626865671642, |
| "grad_norm": 1.2224301172959635, |
| "learning_rate": 5.431723794264789e-06, |
| "loss": 0.191, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.9458955223880597, |
| "grad_norm": 1.2296770029919053, |
| "learning_rate": 5.424424220057514e-06, |
| "loss": 0.2002, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.9468283582089553, |
| "grad_norm": 1.375993340109819, |
| "learning_rate": 5.417123734573541e-06, |
| "loss": 0.2306, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.9477611940298507, |
| "grad_norm": 1.335234251332967, |
| "learning_rate": 5.409822353487666e-06, |
| "loss": 0.2509, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.9486940298507462, |
| "grad_norm": 1.2393282472383353, |
| "learning_rate": 5.402520092476604e-06, |
| "loss": 0.2009, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.9496268656716418, |
| "grad_norm": 1.2356611423329757, |
| "learning_rate": 5.395216967218961e-06, |
| "loss": 0.2063, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.9505597014925373, |
| "grad_norm": 1.307932934613099, |
| "learning_rate": 5.387912993395203e-06, |
| "loss": 0.2324, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.9514925373134329, |
| "grad_norm": 1.3243498570571994, |
| "learning_rate": 5.38060818668761e-06, |
| "loss": 0.2388, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.9524253731343284, |
| "grad_norm": 1.1645371806053033, |
| "learning_rate": 5.373302562780256e-06, |
| "loss": 0.1886, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.9533582089552238, |
| "grad_norm": 1.297730287399217, |
| "learning_rate": 5.365996137358969e-06, |
| "loss": 0.2073, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.9542910447761194, |
| "grad_norm": 1.250457649249079, |
| "learning_rate": 5.358688926111293e-06, |
| "loss": 0.189, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.9552238805970149, |
| "grad_norm": 1.2705310178726523, |
| "learning_rate": 5.351380944726465e-06, |
| "loss": 0.2266, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.9561567164179104, |
| "grad_norm": 1.383325173135325, |
| "learning_rate": 5.344072208895376e-06, |
| "loss": 0.2612, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.957089552238806, |
| "grad_norm": 1.2801631073608817, |
| "learning_rate": 5.33676273431053e-06, |
| "loss": 0.2197, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.9580223880597015, |
| "grad_norm": 1.313531018381848, |
| "learning_rate": 5.329452536666025e-06, |
| "loss": 0.2202, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.9589552238805971, |
| "grad_norm": 1.3086085921045902, |
| "learning_rate": 5.322141631657507e-06, |
| "loss": 0.2456, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.9598880597014925, |
| "grad_norm": 1.257199461783946, |
| "learning_rate": 5.314830034982142e-06, |
| "loss": 0.2229, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.960820895522388, |
| "grad_norm": 1.2749870374862313, |
| "learning_rate": 5.30751776233858e-06, |
| "loss": 0.2209, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.9617537313432836, |
| "grad_norm": 1.2394504236431108, |
| "learning_rate": 5.300204829426923e-06, |
| "loss": 0.2157, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.9626865671641791, |
| "grad_norm": 1.2209777484060933, |
| "learning_rate": 5.292891251948694e-06, |
| "loss": 0.2167, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.9636194029850746, |
| "grad_norm": 1.2368359051230373, |
| "learning_rate": 5.2855770456067936e-06, |
| "loss": 0.1912, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.9645522388059702, |
| "grad_norm": 1.3016803387713154, |
| "learning_rate": 5.278262226105476e-06, |
| "loss": 0.201, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.9654850746268657, |
| "grad_norm": 1.2524173615322032, |
| "learning_rate": 5.270946809150315e-06, |
| "loss": 0.2159, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.9664179104477612, |
| "grad_norm": 1.2683630285568621, |
| "learning_rate": 5.263630810448161e-06, |
| "loss": 0.2101, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.9673507462686567, |
| "grad_norm": 1.3024423342612523, |
| "learning_rate": 5.256314245707118e-06, |
| "loss": 0.2346, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.9682835820895522, |
| "grad_norm": 1.2352735531904246, |
| "learning_rate": 5.2489971306365025e-06, |
| "loss": 0.1927, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.9692164179104478, |
| "grad_norm": 1.2910473126613706, |
| "learning_rate": 5.2416794809468145e-06, |
| "loss": 0.1807, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.9701492537313433, |
| "grad_norm": 1.2631389614599406, |
| "learning_rate": 5.234361312349701e-06, |
| "loss": 0.2146, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.9710820895522388, |
| "grad_norm": 1.2549022169376725, |
| "learning_rate": 5.227042640557924e-06, |
| "loss": 0.2111, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.9720149253731343, |
| "grad_norm": 1.2247144243410715, |
| "learning_rate": 5.219723481285326e-06, |
| "loss": 0.1699, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.9729477611940298, |
| "grad_norm": 1.257160905866937, |
| "learning_rate": 5.212403850246794e-06, |
| "loss": 0.2209, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.9738805970149254, |
| "grad_norm": 1.2629034927846967, |
| "learning_rate": 5.205083763158228e-06, |
| "loss": 0.2412, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.9748134328358209, |
| "grad_norm": 1.2990007211575794, |
| "learning_rate": 5.197763235736512e-06, |
| "loss": 0.1919, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.9757462686567164, |
| "grad_norm": 1.311144641480511, |
| "learning_rate": 5.190442283699472e-06, |
| "loss": 0.2012, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.976679104477612, |
| "grad_norm": 1.3260780293625873, |
| "learning_rate": 5.183120922765842e-06, |
| "loss": 0.2002, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.9776119402985075, |
| "grad_norm": 1.188604340910306, |
| "learning_rate": 5.175799168655241e-06, |
| "loss": 0.1973, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.9785447761194029, |
| "grad_norm": 1.315357343596737, |
| "learning_rate": 5.168477037088129e-06, |
| "loss": 0.252, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.9794776119402985, |
| "grad_norm": 1.2147122512747308, |
| "learning_rate": 5.161154543785773e-06, |
| "loss": 0.1833, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.980410447761194, |
| "grad_norm": 1.3545401003703426, |
| "learning_rate": 5.153831704470224e-06, |
| "loss": 0.222, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.9813432835820896, |
| "grad_norm": 1.34778657278548, |
| "learning_rate": 5.146508534864267e-06, |
| "loss": 0.1957, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.9822761194029851, |
| "grad_norm": 1.186757360343082, |
| "learning_rate": 5.1391850506914055e-06, |
| "loss": 0.2058, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.9832089552238806, |
| "grad_norm": 1.2544761756808898, |
| "learning_rate": 5.131861267675813e-06, |
| "loss": 0.203, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.9841417910447762, |
| "grad_norm": 1.310346335595727, |
| "learning_rate": 5.124537201542303e-06, |
| "loss": 0.2254, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.9850746268656716, |
| "grad_norm": 1.3016888883046014, |
| "learning_rate": 5.117212868016303e-06, |
| "loss": 0.1994, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.9860074626865671, |
| "grad_norm": 1.2638298711530747, |
| "learning_rate": 5.109888282823809e-06, |
| "loss": 0.2293, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.9869402985074627, |
| "grad_norm": 1.2883471738550145, |
| "learning_rate": 5.10256346169136e-06, |
| "loss": 0.2185, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.9878731343283582, |
| "grad_norm": 1.4541116567456558, |
| "learning_rate": 5.095238420346e-06, |
| "loss": 0.2787, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.9888059701492538, |
| "grad_norm": 1.2579334018092971, |
| "learning_rate": 5.087913174515247e-06, |
| "loss": 0.1723, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.9897388059701493, |
| "grad_norm": 1.2457324155617162, |
| "learning_rate": 5.080587739927061e-06, |
| "loss": 0.1943, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.9906716417910447, |
| "grad_norm": 1.1872127809722754, |
| "learning_rate": 5.073262132309801e-06, |
| "loss": 0.1711, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.9916044776119403, |
| "grad_norm": 1.1823669677918365, |
| "learning_rate": 5.0659363673922e-06, |
| "loss": 0.1954, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.9925373134328358, |
| "grad_norm": 1.338060737656068, |
| "learning_rate": 5.058610460903332e-06, |
| "loss": 0.2344, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.9934701492537313, |
| "grad_norm": 1.2439993576220725, |
| "learning_rate": 5.0512844285725715e-06, |
| "loss": 0.2049, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.9944029850746269, |
| "grad_norm": 1.2745912155788846, |
| "learning_rate": 5.043958286129562e-06, |
| "loss": 0.2073, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.9953358208955224, |
| "grad_norm": 1.2268753263397718, |
| "learning_rate": 5.036632049304189e-06, |
| "loss": 0.1862, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.996268656716418, |
| "grad_norm": 1.3112166103200893, |
| "learning_rate": 5.029305733826533e-06, |
| "loss": 0.2043, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.9972014925373134, |
| "grad_norm": 1.2980556932588276, |
| "learning_rate": 5.021979355426851e-06, |
| "loss": 0.1931, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.9981343283582089, |
| "grad_norm": 1.3165921241201037, |
| "learning_rate": 5.0146529298355305e-06, |
| "loss": 0.2276, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9990671641791045, |
| "grad_norm": 1.1963395543514876, |
| "learning_rate": 5.007326472783061e-06, |
| "loss": 0.1893, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.236538164498631, |
| "learning_rate": 5e-06, |
| "loss": 0.1633, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.0009328358208955, |
| "grad_norm": 1.2479547546792191, |
| "learning_rate": 4.992673527216939e-06, |
| "loss": 0.1578, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.001865671641791, |
| "grad_norm": 1.3305876874607747, |
| "learning_rate": 4.985347070164471e-06, |
| "loss": 0.1739, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.0027985074626866, |
| "grad_norm": 1.237995595567211, |
| "learning_rate": 4.97802064457315e-06, |
| "loss": 0.1947, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.0037313432835822, |
| "grad_norm": 1.204780028041673, |
| "learning_rate": 4.970694266173467e-06, |
| "loss": 0.1933, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.0046641791044777, |
| "grad_norm": 1.276059263832681, |
| "learning_rate": 4.963367950695814e-06, |
| "loss": 0.1783, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.0055970149253732, |
| "grad_norm": 1.309820410447539, |
| "learning_rate": 4.956041713870439e-06, |
| "loss": 0.2121, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.0065298507462686, |
| "grad_norm": 1.4483451181535176, |
| "learning_rate": 4.948715571427432e-06, |
| "loss": 0.1996, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.007462686567164, |
| "grad_norm": 1.3271595457277217, |
| "learning_rate": 4.94138953909667e-06, |
| "loss": 0.1788, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.0083955223880596, |
| "grad_norm": 1.3870634078200816, |
| "learning_rate": 4.934063632607802e-06, |
| "loss": 0.1837, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.0093283582089552, |
| "grad_norm": 1.2307753356456927, |
| "learning_rate": 4.9267378676902014e-06, |
| "loss": 0.1558, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.0102611940298507, |
| "grad_norm": 1.38292309795395, |
| "learning_rate": 4.9194122600729396e-06, |
| "loss": 0.1834, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.0111940298507462, |
| "grad_norm": 1.2878361217427439, |
| "learning_rate": 4.9120868254847535e-06, |
| "loss": 0.1501, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.0121268656716418, |
| "grad_norm": 1.3923743816580727, |
| "learning_rate": 4.9047615796540014e-06, |
| "loss": 0.1943, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.0130597014925373, |
| "grad_norm": 1.3496982501402985, |
| "learning_rate": 4.897436538308641e-06, |
| "loss": 0.1889, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.0139925373134329, |
| "grad_norm": 1.2551367521987897, |
| "learning_rate": 4.890111717176193e-06, |
| "loss": 0.179, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.0149253731343284, |
| "grad_norm": 1.2732948826314543, |
| "learning_rate": 4.882787131983698e-06, |
| "loss": 0.1594, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.015858208955224, |
| "grad_norm": 1.2430616813440283, |
| "learning_rate": 4.875462798457698e-06, |
| "loss": 0.1569, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.0167910447761195, |
| "grad_norm": 1.2440828869258598, |
| "learning_rate": 4.8681387323241895e-06, |
| "loss": 0.1618, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.017723880597015, |
| "grad_norm": 1.24177612792635, |
| "learning_rate": 4.860814949308595e-06, |
| "loss": 0.1529, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.0186567164179103, |
| "grad_norm": 1.2695148146798123, |
| "learning_rate": 4.853491465135733e-06, |
| "loss": 0.1898, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.0195895522388059, |
| "grad_norm": 1.3003959474708364, |
| "learning_rate": 4.8461682955297795e-06, |
| "loss": 0.1832, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.0205223880597014, |
| "grad_norm": 1.2117092616641418, |
| "learning_rate": 4.83884545621423e-06, |
| "loss": 0.1651, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.021455223880597, |
| "grad_norm": 1.2739216395954265, |
| "learning_rate": 4.831522962911874e-06, |
| "loss": 0.1517, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.0223880597014925, |
| "grad_norm": 1.355545800661616, |
| "learning_rate": 4.82420083134476e-06, |
| "loss": 0.2124, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.023320895522388, |
| "grad_norm": 1.3120936898639997, |
| "learning_rate": 4.8168790772341595e-06, |
| "loss": 0.2069, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.0242537313432836, |
| "grad_norm": 1.4257530106077783, |
| "learning_rate": 4.80955771630053e-06, |
| "loss": 0.221, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.025186567164179, |
| "grad_norm": 1.2501902868432049, |
| "learning_rate": 4.8022367642634886e-06, |
| "loss": 0.1711, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.0261194029850746, |
| "grad_norm": 1.3281722597905856, |
| "learning_rate": 4.794916236841773e-06, |
| "loss": 0.1585, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0270522388059702, |
| "grad_norm": 1.3188983055502095, |
| "learning_rate": 4.787596149753208e-06, |
| "loss": 0.1633, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.0279850746268657, |
| "grad_norm": 1.3378048102111348, |
| "learning_rate": 4.780276518714675e-06, |
| "loss": 0.1648, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.0289179104477613, |
| "grad_norm": 1.2067917710652079, |
| "learning_rate": 4.7729573594420765e-06, |
| "loss": 0.1482, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.0298507462686568, |
| "grad_norm": 1.1149191069835047, |
| "learning_rate": 4.765638687650299e-06, |
| "loss": 0.11, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.0307835820895523, |
| "grad_norm": 1.2203251235917674, |
| "learning_rate": 4.758320519053186e-06, |
| "loss": 0.1484, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.0317164179104477, |
| "grad_norm": 1.2189178116466146, |
| "learning_rate": 4.7510028693635e-06, |
| "loss": 0.1427, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.0326492537313432, |
| "grad_norm": 1.372735938568047, |
| "learning_rate": 4.743685754292885e-06, |
| "loss": 0.2167, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.0335820895522387, |
| "grad_norm": 1.3282242998736706, |
| "learning_rate": 4.736369189551841e-06, |
| "loss": 0.1985, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.0345149253731343, |
| "grad_norm": 1.4369705125880448, |
| "learning_rate": 4.729053190849686e-06, |
| "loss": 0.2087, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.0354477611940298, |
| "grad_norm": 1.1942738930700767, |
| "learning_rate": 4.721737773894525e-06, |
| "loss": 0.1347, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.0363805970149254, |
| "grad_norm": 1.1913762459997832, |
| "learning_rate": 4.714422954393208e-06, |
| "loss": 0.1777, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.037313432835821, |
| "grad_norm": 1.2267283573790213, |
| "learning_rate": 4.7071087480513075e-06, |
| "loss": 0.1501, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.0382462686567164, |
| "grad_norm": 1.3046537812115568, |
| "learning_rate": 4.699795170573078e-06, |
| "loss": 0.1829, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.039179104477612, |
| "grad_norm": 1.198889462535983, |
| "learning_rate": 4.692482237661421e-06, |
| "loss": 0.1508, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.0401119402985075, |
| "grad_norm": 1.259168735576952, |
| "learning_rate": 4.6851699650178595e-06, |
| "loss": 0.1642, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.041044776119403, |
| "grad_norm": 1.336907239087494, |
| "learning_rate": 4.677858368342495e-06, |
| "loss": 0.1912, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.0419776119402986, |
| "grad_norm": 1.2619832711180206, |
| "learning_rate": 4.670547463333976e-06, |
| "loss": 0.1455, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.0429104477611941, |
| "grad_norm": 1.220034852354567, |
| "learning_rate": 4.66323726568947e-06, |
| "loss": 0.1547, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.0438432835820897, |
| "grad_norm": 1.248237699247018, |
| "learning_rate": 4.655927791104627e-06, |
| "loss": 0.1445, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.044776119402985, |
| "grad_norm": 1.2788401711193869, |
| "learning_rate": 4.6486190552735375e-06, |
| "loss": 0.1766, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0457089552238805, |
| "grad_norm": 1.370368413770226, |
| "learning_rate": 4.641311073888709e-06, |
| "loss": 0.194, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.046641791044776, |
| "grad_norm": 1.2631891199390823, |
| "learning_rate": 4.6340038626410335e-06, |
| "loss": 0.1752, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.0475746268656716, |
| "grad_norm": 1.1920491518946674, |
| "learning_rate": 4.626697437219746e-06, |
| "loss": 0.1248, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.0485074626865671, |
| "grad_norm": 1.2240491739822534, |
| "learning_rate": 4.619391813312391e-06, |
| "loss": 0.1625, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.0494402985074627, |
| "grad_norm": 1.319460412241081, |
| "learning_rate": 4.6120870066047976e-06, |
| "loss": 0.1971, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.0503731343283582, |
| "grad_norm": 1.264580192904606, |
| "learning_rate": 4.6047830327810396e-06, |
| "loss": 0.186, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.0513059701492538, |
| "grad_norm": 1.1887375529482427, |
| "learning_rate": 4.597479907523397e-06, |
| "loss": 0.142, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.0522388059701493, |
| "grad_norm": 1.3019193689654893, |
| "learning_rate": 4.590177646512335e-06, |
| "loss": 0.1574, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.0531716417910448, |
| "grad_norm": 1.3520770477490758, |
| "learning_rate": 4.5828762654264595e-06, |
| "loss": 0.2208, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.0541044776119404, |
| "grad_norm": 1.3539523658619874, |
| "learning_rate": 4.575575779942487e-06, |
| "loss": 0.2068, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.055037313432836, |
| "grad_norm": 1.2367294883717355, |
| "learning_rate": 4.568276205735211e-06, |
| "loss": 0.1532, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.0559701492537314, |
| "grad_norm": 1.2491884318330926, |
| "learning_rate": 4.560977558477471e-06, |
| "loss": 0.1519, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.0569029850746268, |
| "grad_norm": 1.19498379683984, |
| "learning_rate": 4.553679853840114e-06, |
| "loss": 0.1364, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.0578358208955223, |
| "grad_norm": 1.2111964940176165, |
| "learning_rate": 4.546383107491963e-06, |
| "loss": 0.1803, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.0587686567164178, |
| "grad_norm": 1.2421854687745804, |
| "learning_rate": 4.539087335099781e-06, |
| "loss": 0.1393, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.0597014925373134, |
| "grad_norm": 1.2448365427323471, |
| "learning_rate": 4.531792552328247e-06, |
| "loss": 0.1829, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.060634328358209, |
| "grad_norm": 1.3333692826121972, |
| "learning_rate": 4.52449877483991e-06, |
| "loss": 0.187, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.0615671641791045, |
| "grad_norm": 1.2354906525606948, |
| "learning_rate": 4.51720601829516e-06, |
| "loss": 0.1899, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.0625, |
| "grad_norm": 1.3201744820123211, |
| "learning_rate": 4.509914298352197e-06, |
| "loss": 0.2219, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.0634328358208955, |
| "grad_norm": 1.278633468940202, |
| "learning_rate": 4.502623630666997e-06, |
| "loss": 0.1682, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.064365671641791, |
| "grad_norm": 1.3164919074298664, |
| "learning_rate": 4.495334030893272e-06, |
| "loss": 0.1887, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.0652985074626866, |
| "grad_norm": 1.3431779551114107, |
| "learning_rate": 4.488045514682444e-06, |
| "loss": 0.2015, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.0662313432835822, |
| "grad_norm": 1.2656504973462495, |
| "learning_rate": 4.480758097683608e-06, |
| "loss": 0.1716, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.0671641791044777, |
| "grad_norm": 1.3709142303612718, |
| "learning_rate": 4.4734717955435e-06, |
| "loss": 0.1854, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.0680970149253732, |
| "grad_norm": 1.2494186095135305, |
| "learning_rate": 4.466186623906462e-06, |
| "loss": 0.1487, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.0690298507462686, |
| "grad_norm": 1.330911651469678, |
| "learning_rate": 4.458902598414407e-06, |
| "loss": 0.1765, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.069962686567164, |
| "grad_norm": 1.2014798451429747, |
| "learning_rate": 4.451619734706786e-06, |
| "loss": 0.135, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.0708955223880596, |
| "grad_norm": 1.335995551880163, |
| "learning_rate": 4.44433804842056e-06, |
| "loss": 0.1923, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.0718283582089552, |
| "grad_norm": 1.2401414131378654, |
| "learning_rate": 4.437057555190159e-06, |
| "loss": 0.1883, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.0727611940298507, |
| "grad_norm": 1.1528790815227568, |
| "learning_rate": 4.429778270647452e-06, |
| "loss": 0.1441, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.0736940298507462, |
| "grad_norm": 1.2902145418144173, |
| "learning_rate": 4.422500210421713e-06, |
| "loss": 0.1828, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.0746268656716418, |
| "grad_norm": 1.25051022835927, |
| "learning_rate": 4.415223390139588e-06, |
| "loss": 0.1621, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.0755597014925373, |
| "grad_norm": 1.2532151202153479, |
| "learning_rate": 4.40794782542506e-06, |
| "loss": 0.1615, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.0764925373134329, |
| "grad_norm": 1.483668421157478, |
| "learning_rate": 4.400673531899413e-06, |
| "loss": 0.2112, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.0774253731343284, |
| "grad_norm": 1.3062093686578884, |
| "learning_rate": 4.393400525181208e-06, |
| "loss": 0.164, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.078358208955224, |
| "grad_norm": 1.2299161462076307, |
| "learning_rate": 4.38612882088624e-06, |
| "loss": 0.1434, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.0792910447761195, |
| "grad_norm": 1.238818335888987, |
| "learning_rate": 4.378858434627504e-06, |
| "loss": 0.1559, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.080223880597015, |
| "grad_norm": 1.2915690997598344, |
| "learning_rate": 4.371589382015171e-06, |
| "loss": 0.1784, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.0811567164179103, |
| "grad_norm": 1.1839866337995881, |
| "learning_rate": 4.364321678656548e-06, |
| "loss": 0.1178, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.0820895522388059, |
| "grad_norm": 1.3599177349094425, |
| "learning_rate": 4.357055340156041e-06, |
| "loss": 0.192, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.0830223880597014, |
| "grad_norm": 1.223492563759451, |
| "learning_rate": 4.349790382115125e-06, |
| "loss": 0.1774, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.083955223880597, |
| "grad_norm": 1.3573643375419246, |
| "learning_rate": 4.342526820132316e-06, |
| "loss": 0.1902, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.0848880597014925, |
| "grad_norm": 1.2752241604652728, |
| "learning_rate": 4.335264669803131e-06, |
| "loss": 0.1893, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.085820895522388, |
| "grad_norm": 1.3819680352974013, |
| "learning_rate": 4.328003946720053e-06, |
| "loss": 0.2114, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.0867537313432836, |
| "grad_norm": 1.1909614809262572, |
| "learning_rate": 4.320744666472504e-06, |
| "loss": 0.1443, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.087686567164179, |
| "grad_norm": 1.2475137107433707, |
| "learning_rate": 4.313486844646808e-06, |
| "loss": 0.1764, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.0886194029850746, |
| "grad_norm": 1.2181237630025556, |
| "learning_rate": 4.3062304968261545e-06, |
| "loss": 0.1663, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.0895522388059702, |
| "grad_norm": 1.2425063726533858, |
| "learning_rate": 4.2989756385905715e-06, |
| "loss": 0.1384, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.0904850746268657, |
| "grad_norm": 1.3832152612220123, |
| "learning_rate": 4.291722285516887e-06, |
| "loss": 0.2491, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.0914179104477613, |
| "grad_norm": 1.3080535208233788, |
| "learning_rate": 4.284470453178698e-06, |
| "loss": 0.1993, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.0923507462686568, |
| "grad_norm": 1.3291999749207284, |
| "learning_rate": 4.277220157146335e-06, |
| "loss": 0.1574, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.0932835820895523, |
| "grad_norm": 1.3149288003758082, |
| "learning_rate": 4.269971412986833e-06, |
| "loss": 0.1649, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.0942164179104477, |
| "grad_norm": 1.3969699379485367, |
| "learning_rate": 4.262724236263892e-06, |
| "loss": 0.2198, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.0951492537313432, |
| "grad_norm": 1.2513590469723619, |
| "learning_rate": 4.255478642537846e-06, |
| "loss": 0.1558, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.0960820895522387, |
| "grad_norm": 1.2738684649625196, |
| "learning_rate": 4.248234647365632e-06, |
| "loss": 0.1623, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.0970149253731343, |
| "grad_norm": 1.3008995242875747, |
| "learning_rate": 4.240992266300757e-06, |
| "loss": 0.1873, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.0979477611940298, |
| "grad_norm": 1.3647443465246403, |
| "learning_rate": 4.233751514893257e-06, |
| "loss": 0.1809, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.0988805970149254, |
| "grad_norm": 1.2902770059265867, |
| "learning_rate": 4.226512408689674e-06, |
| "loss": 0.1627, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.099813432835821, |
| "grad_norm": 1.2552774636523052, |
| "learning_rate": 4.219274963233014e-06, |
| "loss": 0.1742, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.1007462686567164, |
| "grad_norm": 1.2994851316208804, |
| "learning_rate": 4.212039194062718e-06, |
| "loss": 0.1818, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.101679104477612, |
| "grad_norm": 1.2955747010389893, |
| "learning_rate": 4.20480511671463e-06, |
| "loss": 0.1775, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.1026119402985075, |
| "grad_norm": 1.3932391378184827, |
| "learning_rate": 4.19757274672096e-06, |
| "loss": 0.2347, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.103544776119403, |
| "grad_norm": 1.2655556196719828, |
| "learning_rate": 4.1903420996102515e-06, |
| "loss": 0.1898, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.1044776119402986, |
| "grad_norm": 1.3020632302296102, |
| "learning_rate": 4.183113190907349e-06, |
| "loss": 0.1584, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.1054104477611941, |
| "grad_norm": 1.3265349936085298, |
| "learning_rate": 4.175886036133366e-06, |
| "loss": 0.1927, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.1063432835820897, |
| "grad_norm": 1.31504432408727, |
| "learning_rate": 4.16866065080565e-06, |
| "loss": 0.1751, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.107276119402985, |
| "grad_norm": 1.200509069590439, |
| "learning_rate": 4.161437050437746e-06, |
| "loss": 0.1581, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.1082089552238805, |
| "grad_norm": 1.2910674844500896, |
| "learning_rate": 4.1542152505393694e-06, |
| "loss": 0.1575, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.109141791044776, |
| "grad_norm": 1.3637483518464766, |
| "learning_rate": 4.146995266616371e-06, |
| "loss": 0.2139, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.1100746268656716, |
| "grad_norm": 1.4447566279813335, |
| "learning_rate": 4.1397771141706995e-06, |
| "loss": 0.1985, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.1110074626865671, |
| "grad_norm": 1.328516864124165, |
| "learning_rate": 4.132560808700374e-06, |
| "loss": 0.1696, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.1119402985074627, |
| "grad_norm": 1.27769217641431, |
| "learning_rate": 4.125346365699446e-06, |
| "loss": 0.1906, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.1128731343283582, |
| "grad_norm": 1.1918835238011203, |
| "learning_rate": 4.118133800657968e-06, |
| "loss": 0.1583, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.1138059701492538, |
| "grad_norm": 1.3366450390085258, |
| "learning_rate": 4.110923129061961e-06, |
| "loss": 0.1839, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.1147388059701493, |
| "grad_norm": 1.326326008877487, |
| "learning_rate": 4.103714366393383e-06, |
| "loss": 0.1636, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.1156716417910448, |
| "grad_norm": 1.2393685258161005, |
| "learning_rate": 4.09650752813009e-06, |
| "loss": 0.1452, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.1166044776119404, |
| "grad_norm": 1.1971809037071681, |
| "learning_rate": 4.089302629745806e-06, |
| "loss": 0.145, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.117537313432836, |
| "grad_norm": 1.2609209237949275, |
| "learning_rate": 4.082099686710093e-06, |
| "loss": 0.1666, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.1184701492537314, |
| "grad_norm": 1.2398171855960034, |
| "learning_rate": 4.074898714488313e-06, |
| "loss": 0.1686, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.1194029850746268, |
| "grad_norm": 1.2571581832718375, |
| "learning_rate": 4.067699728541595e-06, |
| "loss": 0.152, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1203358208955223, |
| "grad_norm": 1.1265454199988725, |
| "learning_rate": 4.060502744326805e-06, |
| "loss": 0.1379, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.1212686567164178, |
| "grad_norm": 1.2099178791599692, |
| "learning_rate": 4.053307777296511e-06, |
| "loss": 0.1537, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.1222014925373134, |
| "grad_norm": 1.364960057577564, |
| "learning_rate": 4.046114842898948e-06, |
| "loss": 0.1836, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.123134328358209, |
| "grad_norm": 1.3518523823356796, |
| "learning_rate": 4.03892395657799e-06, |
| "loss": 0.1727, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.1240671641791045, |
| "grad_norm": 1.3239439887244806, |
| "learning_rate": 4.031735133773113e-06, |
| "loss": 0.1539, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.2751889630504203, |
| "learning_rate": 4.02454838991936e-06, |
| "loss": 0.1852, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.1259328358208955, |
| "grad_norm": 1.326689919361112, |
| "learning_rate": 4.0173637404473105e-06, |
| "loss": 0.1811, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.126865671641791, |
| "grad_norm": 1.2127272826068796, |
| "learning_rate": 4.010181200783052e-06, |
| "loss": 0.1454, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.1277985074626866, |
| "grad_norm": 1.322726203190604, |
| "learning_rate": 4.003000786348135e-06, |
| "loss": 0.1787, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.1287313432835822, |
| "grad_norm": 1.2457042711147817, |
| "learning_rate": 3.995822512559552e-06, |
| "loss": 0.164, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.1296641791044777, |
| "grad_norm": 1.1615734518718064, |
| "learning_rate": 3.988646394829699e-06, |
| "loss": 0.1384, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.1305970149253732, |
| "grad_norm": 1.309796738470515, |
| "learning_rate": 3.981472448566339e-06, |
| "loss": 0.1789, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.1315298507462686, |
| "grad_norm": 1.2902712317148286, |
| "learning_rate": 3.974300689172579e-06, |
| "loss": 0.1658, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.132462686567164, |
| "grad_norm": 1.3037701154014294, |
| "learning_rate": 3.967131132046822e-06, |
| "loss": 0.2038, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.1333955223880596, |
| "grad_norm": 1.3434142092920884, |
| "learning_rate": 3.9599637925827495e-06, |
| "loss": 0.1484, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.1343283582089552, |
| "grad_norm": 1.401664096227999, |
| "learning_rate": 3.952798686169279e-06, |
| "loss": 0.1981, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.1352611940298507, |
| "grad_norm": 1.3177773174229326, |
| "learning_rate": 3.945635828190534e-06, |
| "loss": 0.1758, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.1361940298507462, |
| "grad_norm": 1.2030919853710615, |
| "learning_rate": 3.938475234025812e-06, |
| "loss": 0.1589, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.1371268656716418, |
| "grad_norm": 1.3322750694007146, |
| "learning_rate": 3.931316919049544e-06, |
| "loss": 0.2114, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.1380597014925373, |
| "grad_norm": 1.3513292327679234, |
| "learning_rate": 3.924160898631274e-06, |
| "loss": 0.2007, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.1389925373134329, |
| "grad_norm": 1.2715856870031297, |
| "learning_rate": 3.917007188135618e-06, |
| "loss": 0.1502, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.1399253731343284, |
| "grad_norm": 1.360952524579768, |
| "learning_rate": 3.9098558029222275e-06, |
| "loss": 0.2022, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.140858208955224, |
| "grad_norm": 1.394234335235106, |
| "learning_rate": 3.902706758345768e-06, |
| "loss": 0.2066, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.1417910447761195, |
| "grad_norm": 1.3657367560532043, |
| "learning_rate": 3.8955600697558764e-06, |
| "loss": 0.2092, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.142723880597015, |
| "grad_norm": 1.2934667493963083, |
| "learning_rate": 3.88841575249713e-06, |
| "loss": 0.2095, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.1436567164179103, |
| "grad_norm": 1.2407088587717017, |
| "learning_rate": 3.8812738219090165e-06, |
| "loss": 0.1546, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.1445895522388059, |
| "grad_norm": 1.3462235378169791, |
| "learning_rate": 3.874134293325901e-06, |
| "loss": 0.1789, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.1455223880597014, |
| "grad_norm": 1.4067365696444236, |
| "learning_rate": 3.866997182076985e-06, |
| "loss": 0.1827, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.146455223880597, |
| "grad_norm": 1.377478579563355, |
| "learning_rate": 3.8598625034862834e-06, |
| "loss": 0.1988, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.1473880597014925, |
| "grad_norm": 1.279473628493321, |
| "learning_rate": 3.8527302728725906e-06, |
| "loss": 0.1888, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.148320895522388, |
| "grad_norm": 1.2102178107485024, |
| "learning_rate": 3.845600505549443e-06, |
| "loss": 0.1501, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.1492537313432836, |
| "grad_norm": 1.3131128698812524, |
| "learning_rate": 3.838473216825085e-06, |
| "loss": 0.1687, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.150186567164179, |
| "grad_norm": 1.2627359714544923, |
| "learning_rate": 3.8313484220024434e-06, |
| "loss": 0.1535, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.1511194029850746, |
| "grad_norm": 1.288099194043848, |
| "learning_rate": 3.82422613637909e-06, |
| "loss": 0.1996, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.1520522388059702, |
| "grad_norm": 1.4105348381096752, |
| "learning_rate": 3.817106375247205e-06, |
| "loss": 0.1734, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.1529850746268657, |
| "grad_norm": 1.2964203374983818, |
| "learning_rate": 3.809989153893554e-06, |
| "loss": 0.1919, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.1539179104477613, |
| "grad_norm": 1.2822926379983786, |
| "learning_rate": 3.802874487599447e-06, |
| "loss": 0.1644, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.1548507462686568, |
| "grad_norm": 1.2879062945860258, |
| "learning_rate": 3.795762391640705e-06, |
| "loss": 0.1916, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.1557835820895521, |
| "grad_norm": 1.42619892292563, |
| "learning_rate": 3.788652881287635e-06, |
| "loss": 0.1916, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.1567164179104479, |
| "grad_norm": 1.3160995155313582, |
| "learning_rate": 3.781545971804992e-06, |
| "loss": 0.1921, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.1576492537313432, |
| "grad_norm": 1.2513175064563837, |
| "learning_rate": 3.774441678451943e-06, |
| "loss": 0.158, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.1585820895522387, |
| "grad_norm": 1.2976814682761826, |
| "learning_rate": 3.767340016482039e-06, |
| "loss": 0.1858, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.1595149253731343, |
| "grad_norm": 1.2631809769370392, |
| "learning_rate": 3.7602410011431837e-06, |
| "loss": 0.1929, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.1604477611940298, |
| "grad_norm": 1.301688507353941, |
| "learning_rate": 3.753144647677599e-06, |
| "loss": 0.1759, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.1613805970149254, |
| "grad_norm": 1.3028654984103567, |
| "learning_rate": 3.7460509713217863e-06, |
| "loss": 0.1478, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.162313432835821, |
| "grad_norm": 1.4170654304734462, |
| "learning_rate": 3.7389599873065034e-06, |
| "loss": 0.2231, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.1632462686567164, |
| "grad_norm": 1.4136925560947238, |
| "learning_rate": 3.731871710856727e-06, |
| "loss": 0.23, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.164179104477612, |
| "grad_norm": 1.3030305034565517, |
| "learning_rate": 3.7247861571916183e-06, |
| "loss": 0.1958, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.1651119402985075, |
| "grad_norm": 1.2163348158439937, |
| "learning_rate": 3.717703341524494e-06, |
| "loss": 0.1557, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.166044776119403, |
| "grad_norm": 1.2282391935385022, |
| "learning_rate": 3.7106232790627926e-06, |
| "loss": 0.1717, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.1669776119402986, |
| "grad_norm": 1.2831488659200352, |
| "learning_rate": 3.7035459850080392e-06, |
| "loss": 0.2062, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.1679104477611941, |
| "grad_norm": 1.2737349888267746, |
| "learning_rate": 3.696471474555816e-06, |
| "loss": 0.1576, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.1688432835820897, |
| "grad_norm": 1.3585086927572092, |
| "learning_rate": 3.6893997628957314e-06, |
| "loss": 0.1652, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.169776119402985, |
| "grad_norm": 1.2336076065127748, |
| "learning_rate": 3.6823308652113783e-06, |
| "loss": 0.1434, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.1707089552238805, |
| "grad_norm": 1.275887082452847, |
| "learning_rate": 3.6752647966803114e-06, |
| "loss": 0.1681, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.171641791044776, |
| "grad_norm": 1.265690020819207, |
| "learning_rate": 3.6682015724740116e-06, |
| "loss": 0.1566, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.1725746268656716, |
| "grad_norm": 1.3186785189168748, |
| "learning_rate": 3.661141207757854e-06, |
| "loss": 0.1895, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.1735074626865671, |
| "grad_norm": 1.3013458028814893, |
| "learning_rate": 3.6540837176910688e-06, |
| "loss": 0.1633, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.1744402985074627, |
| "grad_norm": 1.2924717838350035, |
| "learning_rate": 3.6470291174267187e-06, |
| "loss": 0.1551, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.1753731343283582, |
| "grad_norm": 1.3345403188122533, |
| "learning_rate": 3.6399774221116613e-06, |
| "loss": 0.1741, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.1763059701492538, |
| "grad_norm": 1.3254228160809514, |
| "learning_rate": 3.6329286468865143e-06, |
| "loss": 0.1822, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.1772388059701493, |
| "grad_norm": 1.3240960212231794, |
| "learning_rate": 3.625882806885629e-06, |
| "loss": 0.1716, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.1781716417910448, |
| "grad_norm": 1.277988066208133, |
| "learning_rate": 3.6188399172370526e-06, |
| "loss": 0.1998, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.1791044776119404, |
| "grad_norm": 1.2745092796536548, |
| "learning_rate": 3.611799993062497e-06, |
| "loss": 0.176, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.180037313432836, |
| "grad_norm": 1.3100020490753832, |
| "learning_rate": 3.6047630494773093e-06, |
| "loss": 0.1968, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.1809701492537314, |
| "grad_norm": 1.2350577675277863, |
| "learning_rate": 3.597729101590436e-06, |
| "loss": 0.1448, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.1819029850746268, |
| "grad_norm": 1.3403726848574598, |
| "learning_rate": 3.590698164504391e-06, |
| "loss": 0.183, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.1828358208955223, |
| "grad_norm": 1.2471727326244055, |
| "learning_rate": 3.583670253315223e-06, |
| "loss": 0.1597, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.1837686567164178, |
| "grad_norm": 1.300483980907673, |
| "learning_rate": 3.576645383112485e-06, |
| "loss": 0.1732, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.1847014925373134, |
| "grad_norm": 1.3056221253356903, |
| "learning_rate": 3.5696235689792e-06, |
| "loss": 0.1694, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.185634328358209, |
| "grad_norm": 1.2615843794808572, |
| "learning_rate": 3.5626048259918324e-06, |
| "loss": 0.1458, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.1865671641791045, |
| "grad_norm": 1.298997730458422, |
| "learning_rate": 3.5555891692202475e-06, |
| "loss": 0.1776, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.1875, |
| "grad_norm": 1.3886945575760463, |
| "learning_rate": 3.5485766137276894e-06, |
| "loss": 0.2076, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.1884328358208955, |
| "grad_norm": 1.2982711284559796, |
| "learning_rate": 3.5415671745707383e-06, |
| "loss": 0.1725, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.189365671641791, |
| "grad_norm": 1.3367857992818377, |
| "learning_rate": 3.5345608667992863e-06, |
| "loss": 0.1864, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.1902985074626866, |
| "grad_norm": 1.1634085446327156, |
| "learning_rate": 3.5275577054565047e-06, |
| "loss": 0.1258, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.1912313432835822, |
| "grad_norm": 1.14606876472478, |
| "learning_rate": 3.520557705578802e-06, |
| "loss": 0.1327, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.1921641791044777, |
| "grad_norm": 1.3260981006324937, |
| "learning_rate": 3.5135608821958055e-06, |
| "loss": 0.1932, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.1930970149253732, |
| "grad_norm": 1.3807183926628526, |
| "learning_rate": 3.5065672503303204e-06, |
| "loss": 0.1613, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.1940298507462686, |
| "grad_norm": 1.2651107865599973, |
| "learning_rate": 3.4995768249982975e-06, |
| "loss": 0.1606, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.194962686567164, |
| "grad_norm": 1.3830161336719449, |
| "learning_rate": 3.492589621208804e-06, |
| "loss": 0.1948, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.1958955223880596, |
| "grad_norm": 1.2676978832814751, |
| "learning_rate": 3.4856056539639906e-06, |
| "loss": 0.16, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.1968283582089552, |
| "grad_norm": 1.1400567919289024, |
| "learning_rate": 3.4786249382590575e-06, |
| "loss": 0.1215, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.1977611940298507, |
| "grad_norm": 1.4562871039890137, |
| "learning_rate": 3.471647489082227e-06, |
| "loss": 0.1727, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.1986940298507462, |
| "grad_norm": 1.2985859104756226, |
| "learning_rate": 3.4646733214147037e-06, |
| "loss": 0.1647, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.1996268656716418, |
| "grad_norm": 1.297692863890328, |
| "learning_rate": 3.457702450230649e-06, |
| "loss": 0.1519, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.2005597014925373, |
| "grad_norm": 1.2724731186295677, |
| "learning_rate": 3.450734890497146e-06, |
| "loss": 0.1501, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.2014925373134329, |
| "grad_norm": 1.2681638095698602, |
| "learning_rate": 3.443770657174166e-06, |
| "loss": 0.1722, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.2024253731343284, |
| "grad_norm": 1.1648570392932733, |
| "learning_rate": 3.4368097652145416e-06, |
| "loss": 0.1546, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.203358208955224, |
| "grad_norm": 1.2635339184109227, |
| "learning_rate": 3.4298522295639298e-06, |
| "loss": 0.1797, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.2042910447761195, |
| "grad_norm": 1.1618912447363592, |
| "learning_rate": 3.4228980651607787e-06, |
| "loss": 0.125, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.205223880597015, |
| "grad_norm": 1.3480410563587142, |
| "learning_rate": 3.415947286936301e-06, |
| "loss": 0.1904, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.2061567164179103, |
| "grad_norm": 1.2629731956681998, |
| "learning_rate": 3.40899990981444e-06, |
| "loss": 0.1636, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.2070895522388059, |
| "grad_norm": 1.1682543725678185, |
| "learning_rate": 3.4020559487118337e-06, |
| "loss": 0.1463, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.2080223880597014, |
| "grad_norm": 1.345852417828716, |
| "learning_rate": 3.3951154185377843e-06, |
| "loss": 0.1711, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.208955223880597, |
| "grad_norm": 1.2318372470877859, |
| "learning_rate": 3.388178334194232e-06, |
| "loss": 0.1459, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.2098880597014925, |
| "grad_norm": 1.324131635189433, |
| "learning_rate": 3.381244710575717e-06, |
| "loss": 0.212, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.210820895522388, |
| "grad_norm": 1.2369374528389712, |
| "learning_rate": 3.3743145625693456e-06, |
| "loss": 0.1639, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.2117537313432836, |
| "grad_norm": 1.2577013709675007, |
| "learning_rate": 3.3673879050547664e-06, |
| "loss": 0.1598, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.212686567164179, |
| "grad_norm": 1.3798021554722362, |
| "learning_rate": 3.360464752904132e-06, |
| "loss": 0.2157, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2136194029850746, |
| "grad_norm": 1.3073475578732112, |
| "learning_rate": 3.353545120982067e-06, |
| "loss": 0.2055, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.2145522388059702, |
| "grad_norm": 1.2530740286530024, |
| "learning_rate": 3.346629024145639e-06, |
| "loss": 0.1594, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.2154850746268657, |
| "grad_norm": 1.2552812505931332, |
| "learning_rate": 3.3397164772443274e-06, |
| "loss": 0.1845, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.2164179104477613, |
| "grad_norm": 1.2529584561220057, |
| "learning_rate": 3.3328074951199846e-06, |
| "loss": 0.1577, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.2173507462686568, |
| "grad_norm": 1.257916536805769, |
| "learning_rate": 3.325902092606814e-06, |
| "loss": 0.1402, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.2182835820895521, |
| "grad_norm": 1.2205205794235958, |
| "learning_rate": 3.319000284531332e-06, |
| "loss": 0.1657, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.2192164179104479, |
| "grad_norm": 1.3301229261087273, |
| "learning_rate": 3.3121020857123364e-06, |
| "loss": 0.1882, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.2201492537313432, |
| "grad_norm": 1.309878309023618, |
| "learning_rate": 3.3052075109608734e-06, |
| "loss": 0.2019, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.2210820895522387, |
| "grad_norm": 1.2342859184120942, |
| "learning_rate": 3.2983165750802127e-06, |
| "loss": 0.1611, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.2220149253731343, |
| "grad_norm": 1.3024638759100047, |
| "learning_rate": 3.29142929286581e-06, |
| "loss": 0.1734, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.2229477611940298, |
| "grad_norm": 1.2777502310594075, |
| "learning_rate": 3.2845456791052733e-06, |
| "loss": 0.1944, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.2238805970149254, |
| "grad_norm": 1.2324858537536083, |
| "learning_rate": 3.2776657485783357e-06, |
| "loss": 0.1481, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.224813432835821, |
| "grad_norm": 1.313727487629851, |
| "learning_rate": 3.2707895160568255e-06, |
| "loss": 0.1932, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.2257462686567164, |
| "grad_norm": 1.2805182543437854, |
| "learning_rate": 3.263916996304624e-06, |
| "loss": 0.1579, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.226679104477612, |
| "grad_norm": 1.25660958657868, |
| "learning_rate": 3.257048204077647e-06, |
| "loss": 0.1615, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.2276119402985075, |
| "grad_norm": 1.232309569771475, |
| "learning_rate": 3.2501831541238048e-06, |
| "loss": 0.1497, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.228544776119403, |
| "grad_norm": 1.23013841269123, |
| "learning_rate": 3.2433218611829713e-06, |
| "loss": 0.1777, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.2294776119402986, |
| "grad_norm": 1.3214107220165558, |
| "learning_rate": 3.236464339986956e-06, |
| "loss": 0.1767, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.2304104477611941, |
| "grad_norm": 1.2928726430457418, |
| "learning_rate": 3.22961060525947e-06, |
| "loss": 0.179, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.2313432835820897, |
| "grad_norm": 1.3137887003967543, |
| "learning_rate": 3.2227606717160944e-06, |
| "loss": 0.1893, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.232276119402985, |
| "grad_norm": 1.2507248269607059, |
| "learning_rate": 3.2159145540642433e-06, |
| "loss": 0.1543, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.2332089552238805, |
| "grad_norm": 1.2140463898107692, |
| "learning_rate": 3.2090722670031465e-06, |
| "loss": 0.1462, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.234141791044776, |
| "grad_norm": 1.2122563405449025, |
| "learning_rate": 3.2022338252238062e-06, |
| "loss": 0.143, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.2350746268656716, |
| "grad_norm": 1.2406985130643, |
| "learning_rate": 3.1953992434089643e-06, |
| "loss": 0.1575, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.2360074626865671, |
| "grad_norm": 1.216622584482215, |
| "learning_rate": 3.18856853623308e-06, |
| "loss": 0.1665, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.2369402985074627, |
| "grad_norm": 1.3313794936617174, |
| "learning_rate": 3.1817417183622915e-06, |
| "loss": 0.1924, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.2378731343283582, |
| "grad_norm": 1.2094255732356214, |
| "learning_rate": 3.1749188044543865e-06, |
| "loss": 0.186, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.2388059701492538, |
| "grad_norm": 1.200400622111638, |
| "learning_rate": 3.168099809158769e-06, |
| "loss": 0.1269, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.2397388059701493, |
| "grad_norm": 1.218016115664713, |
| "learning_rate": 3.1612847471164335e-06, |
| "loss": 0.1805, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.2406716417910448, |
| "grad_norm": 1.2447106314830831, |
| "learning_rate": 3.1544736329599248e-06, |
| "loss": 0.1586, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.2416044776119404, |
| "grad_norm": 1.2604336406422085, |
| "learning_rate": 3.1476664813133118e-06, |
| "loss": 0.1614, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.242537313432836, |
| "grad_norm": 1.3542640752949107, |
| "learning_rate": 3.140863306792161e-06, |
| "loss": 0.1868, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.2434701492537314, |
| "grad_norm": 1.2358962119870105, |
| "learning_rate": 3.1340641240034907e-06, |
| "loss": 0.1545, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.2444029850746268, |
| "grad_norm": 1.2383222636503617, |
| "learning_rate": 3.1272689475457592e-06, |
| "loss": 0.1815, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.2453358208955223, |
| "grad_norm": 1.2508922934834654, |
| "learning_rate": 3.1204777920088108e-06, |
| "loss": 0.1668, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.2462686567164178, |
| "grad_norm": 1.2445475954545528, |
| "learning_rate": 3.113690671973867e-06, |
| "loss": 0.1444, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.2472014925373134, |
| "grad_norm": 1.3335118104124264, |
| "learning_rate": 3.1069076020134785e-06, |
| "loss": 0.1607, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.248134328358209, |
| "grad_norm": 1.3301678211762822, |
| "learning_rate": 3.100128596691503e-06, |
| "loss": 0.2012, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.2490671641791045, |
| "grad_norm": 1.151237341115816, |
| "learning_rate": 3.093353670563071e-06, |
| "loss": 0.1308, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.2201549467316923, |
| "learning_rate": 3.0865828381745515e-06, |
| "loss": 0.1528, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.2509328358208955, |
| "grad_norm": 1.432176751420658, |
| "learning_rate": 3.0798161140635287e-06, |
| "loss": 0.1726, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.251865671641791, |
| "grad_norm": 1.2928222987662619, |
| "learning_rate": 3.0730535127587626e-06, |
| "loss": 0.1812, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.2527985074626866, |
| "grad_norm": 1.258937024396766, |
| "learning_rate": 3.0662950487801614e-06, |
| "loss": 0.1787, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.2537313432835822, |
| "grad_norm": 1.338580616178489, |
| "learning_rate": 3.059540736638751e-06, |
| "loss": 0.1484, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.2546641791044777, |
| "grad_norm": 1.3522120640921935, |
| "learning_rate": 3.052790590836644e-06, |
| "loss": 0.2266, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.2555970149253732, |
| "grad_norm": 1.297997790858102, |
| "learning_rate": 3.046044625867004e-06, |
| "loss": 0.2178, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.2565298507462686, |
| "grad_norm": 1.2977406734476347, |
| "learning_rate": 3.0393028562140237e-06, |
| "loss": 0.1922, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.2574626865671643, |
| "grad_norm": 1.176994177656682, |
| "learning_rate": 3.0325652963528797e-06, |
| "loss": 0.1904, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.2583955223880596, |
| "grad_norm": 1.3220076328359516, |
| "learning_rate": 3.0258319607497175e-06, |
| "loss": 0.1625, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.2593283582089552, |
| "grad_norm": 1.21578082266844, |
| "learning_rate": 3.0191028638616095e-06, |
| "loss": 0.1596, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.2602611940298507, |
| "grad_norm": 1.3360585999784078, |
| "learning_rate": 3.012378020136526e-06, |
| "loss": 0.1946, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.2611940298507462, |
| "grad_norm": 1.2339184197363335, |
| "learning_rate": 3.0056574440133104e-06, |
| "loss": 0.1624, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.2621268656716418, |
| "grad_norm": 1.2675456114784982, |
| "learning_rate": 2.9989411499216357e-06, |
| "loss": 0.1867, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.2630597014925373, |
| "grad_norm": 1.3160190054004581, |
| "learning_rate": 2.992229152281987e-06, |
| "loss": 0.2223, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.2639925373134329, |
| "grad_norm": 1.4460157844316839, |
| "learning_rate": 2.9855214655056243e-06, |
| "loss": 0.2286, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.2649253731343284, |
| "grad_norm": 1.333474546319058, |
| "learning_rate": 2.978818103994546e-06, |
| "loss": 0.1869, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.265858208955224, |
| "grad_norm": 1.282300154157047, |
| "learning_rate": 2.9721190821414713e-06, |
| "loss": 0.1843, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.2667910447761195, |
| "grad_norm": 1.2344193005337352, |
| "learning_rate": 2.9654244143297972e-06, |
| "loss": 0.1404, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.267723880597015, |
| "grad_norm": 1.362266068343807, |
| "learning_rate": 2.9587341149335726e-06, |
| "loss": 0.2044, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.2686567164179103, |
| "grad_norm": 1.2125996731121518, |
| "learning_rate": 2.9520481983174675e-06, |
| "loss": 0.1451, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.269589552238806, |
| "grad_norm": 1.3249164851880222, |
| "learning_rate": 2.945366678836745e-06, |
| "loss": 0.2002, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.2705223880597014, |
| "grad_norm": 1.20519470883145, |
| "learning_rate": 2.9386895708372205e-06, |
| "loss": 0.144, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.271455223880597, |
| "grad_norm": 1.2425903867255321, |
| "learning_rate": 2.932016888655241e-06, |
| "loss": 0.1759, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.2723880597014925, |
| "grad_norm": 1.3903366090601932, |
| "learning_rate": 2.9253486466176516e-06, |
| "loss": 0.2202, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.273320895522388, |
| "grad_norm": 1.2880579280720423, |
| "learning_rate": 2.9186848590417654e-06, |
| "loss": 0.1944, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.2742537313432836, |
| "grad_norm": 1.279645812179017, |
| "learning_rate": 2.912025540235327e-06, |
| "loss": 0.159, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.275186567164179, |
| "grad_norm": 1.304388046501594, |
| "learning_rate": 2.9053707044964886e-06, |
| "loss": 0.1818, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.2761194029850746, |
| "grad_norm": 1.3372290020118323, |
| "learning_rate": 2.8987203661137776e-06, |
| "loss": 0.179, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.2770522388059702, |
| "grad_norm": 1.3370082063445698, |
| "learning_rate": 2.8920745393660642e-06, |
| "loss": 0.2107, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.2779850746268657, |
| "grad_norm": 1.1694998297858563, |
| "learning_rate": 2.885433238522534e-06, |
| "loss": 0.1416, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.2789179104477613, |
| "grad_norm": 1.2882792517258348, |
| "learning_rate": 2.878796477842648e-06, |
| "loss": 0.1822, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.2798507462686568, |
| "grad_norm": 1.287059786494495, |
| "learning_rate": 2.8721642715761267e-06, |
| "loss": 0.1449, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.2807835820895521, |
| "grad_norm": 1.2673872593371118, |
| "learning_rate": 2.8655366339629093e-06, |
| "loss": 0.1703, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.2817164179104479, |
| "grad_norm": 1.3397105330121668, |
| "learning_rate": 2.858913579233127e-06, |
| "loss": 0.1595, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.2826492537313432, |
| "grad_norm": 1.3257315751315077, |
| "learning_rate": 2.852295121607066e-06, |
| "loss": 0.1816, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.2835820895522387, |
| "grad_norm": 1.3063699146894663, |
| "learning_rate": 2.8456812752951483e-06, |
| "loss": 0.1892, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.2845149253731343, |
| "grad_norm": 1.1803856021399208, |
| "learning_rate": 2.8390720544978933e-06, |
| "loss": 0.1451, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.2854477611940298, |
| "grad_norm": 1.2306047671883888, |
| "learning_rate": 2.8324674734058855e-06, |
| "loss": 0.1547, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.2863805970149254, |
| "grad_norm": 1.1824152613413623, |
| "learning_rate": 2.8258675461997513e-06, |
| "loss": 0.1466, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.287313432835821, |
| "grad_norm": 1.180770138321359, |
| "learning_rate": 2.8192722870501242e-06, |
| "loss": 0.1567, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.2882462686567164, |
| "grad_norm": 1.21308308779698, |
| "learning_rate": 2.812681710117614e-06, |
| "loss": 0.1476, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.289179104477612, |
| "grad_norm": 1.359483464677566, |
| "learning_rate": 2.8060958295527785e-06, |
| "loss": 0.2032, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.2901119402985075, |
| "grad_norm": 1.3840206529031518, |
| "learning_rate": 2.799514659496092e-06, |
| "loss": 0.154, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.291044776119403, |
| "grad_norm": 1.2453245620296973, |
| "learning_rate": 2.792938214077912e-06, |
| "loss": 0.1439, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.2919776119402986, |
| "grad_norm": 1.2518021387625964, |
| "learning_rate": 2.7863665074184553e-06, |
| "loss": 0.1748, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.292910447761194, |
| "grad_norm": 1.2698294847640776, |
| "learning_rate": 2.7797995536277624e-06, |
| "loss": 0.1641, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.2938432835820897, |
| "grad_norm": 1.2800728502720369, |
| "learning_rate": 2.773237366805672e-06, |
| "loss": 0.1558, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.294776119402985, |
| "grad_norm": 1.274571716079416, |
| "learning_rate": 2.766679961041781e-06, |
| "loss": 0.1857, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.2957089552238805, |
| "grad_norm": 1.320652195693462, |
| "learning_rate": 2.760127350415427e-06, |
| "loss": 0.1684, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.296641791044776, |
| "grad_norm": 1.3388822639892841, |
| "learning_rate": 2.753579548995652e-06, |
| "loss": 0.1651, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.2975746268656716, |
| "grad_norm": 1.1868289977432622, |
| "learning_rate": 2.7470365708411673e-06, |
| "loss": 0.1439, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.2985074626865671, |
| "grad_norm": 1.4092140368954786, |
| "learning_rate": 2.740498430000332e-06, |
| "loss": 0.1907, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.2994402985074627, |
| "grad_norm": 1.2461926006085717, |
| "learning_rate": 2.7339651405111176e-06, |
| "loss": 0.1608, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.3003731343283582, |
| "grad_norm": 1.2956733449383946, |
| "learning_rate": 2.727436716401083e-06, |
| "loss": 0.1857, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.3013059701492538, |
| "grad_norm": 1.30421727417671, |
| "learning_rate": 2.7209131716873347e-06, |
| "loss": 0.1943, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.3022388059701493, |
| "grad_norm": 1.2063263793325392, |
| "learning_rate": 2.714394520376509e-06, |
| "loss": 0.1539, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.3031716417910448, |
| "grad_norm": 1.280761928790951, |
| "learning_rate": 2.7078807764647277e-06, |
| "loss": 0.1676, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.3041044776119404, |
| "grad_norm": 1.3247312247362115, |
| "learning_rate": 2.701371953937583e-06, |
| "loss": 0.2055, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.3050373134328357, |
| "grad_norm": 1.257135213905167, |
| "learning_rate": 2.694868066770099e-06, |
| "loss": 0.1759, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.3059701492537314, |
| "grad_norm": 1.4301480995500815, |
| "learning_rate": 2.6883691289267e-06, |
| "loss": 0.1661, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3069029850746268, |
| "grad_norm": 1.1818771406706536, |
| "learning_rate": 2.6818751543611892e-06, |
| "loss": 0.1491, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.3078358208955223, |
| "grad_norm": 1.240059855888409, |
| "learning_rate": 2.675386157016706e-06, |
| "loss": 0.16, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.3087686567164178, |
| "grad_norm": 1.2964425916034028, |
| "learning_rate": 2.6689021508257105e-06, |
| "loss": 0.205, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.3097014925373134, |
| "grad_norm": 1.2421368200692056, |
| "learning_rate": 2.6624231497099395e-06, |
| "loss": 0.1722, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.310634328358209, |
| "grad_norm": 1.3169176322250298, |
| "learning_rate": 2.6559491675803883e-06, |
| "loss": 0.1924, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.3115671641791045, |
| "grad_norm": 1.1951875951034385, |
| "learning_rate": 2.649480218337276e-06, |
| "loss": 0.1317, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.3125, |
| "grad_norm": 1.1963353766896352, |
| "learning_rate": 2.6430163158700116e-06, |
| "loss": 0.1393, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.3134328358208955, |
| "grad_norm": 1.351728946093592, |
| "learning_rate": 2.636557474057173e-06, |
| "loss": 0.183, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.314365671641791, |
| "grad_norm": 1.369279930285477, |
| "learning_rate": 2.6301037067664726e-06, |
| "loss": 0.2294, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.3152985074626866, |
| "grad_norm": 1.4547602099783485, |
| "learning_rate": 2.623655027854719e-06, |
| "loss": 0.1615, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.3162313432835822, |
| "grad_norm": 1.2297794308793724, |
| "learning_rate": 2.6172114511678047e-06, |
| "loss": 0.175, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.3171641791044777, |
| "grad_norm": 1.251638883776497, |
| "learning_rate": 2.6107729905406655e-06, |
| "loss": 0.1768, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.3180970149253732, |
| "grad_norm": 1.256896553193628, |
| "learning_rate": 2.6043396597972488e-06, |
| "loss": 0.1602, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.3190298507462686, |
| "grad_norm": 1.324806723241369, |
| "learning_rate": 2.597911472750494e-06, |
| "loss": 0.181, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.3199626865671643, |
| "grad_norm": 1.3490247296765157, |
| "learning_rate": 2.5914884432022873e-06, |
| "loss": 0.1896, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.3208955223880596, |
| "grad_norm": 1.2752042814670184, |
| "learning_rate": 2.585070584943452e-06, |
| "loss": 0.1584, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.3218283582089552, |
| "grad_norm": 1.2645878576273337, |
| "learning_rate": 2.5786579117536983e-06, |
| "loss": 0.1605, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.3227611940298507, |
| "grad_norm": 1.2539941415618645, |
| "learning_rate": 2.5722504374016093e-06, |
| "loss": 0.1606, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.3236940298507462, |
| "grad_norm": 1.2283847069486205, |
| "learning_rate": 2.5658481756446056e-06, |
| "loss": 0.1733, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.3246268656716418, |
| "grad_norm": 1.3228664331345847, |
| "learning_rate": 2.5594511402289145e-06, |
| "loss": 0.1941, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.3255597014925373, |
| "grad_norm": 1.2693422740719202, |
| "learning_rate": 2.553059344889543e-06, |
| "loss": 0.1592, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.3264925373134329, |
| "grad_norm": 1.268832561869623, |
| "learning_rate": 2.546672803350247e-06, |
| "loss": 0.1488, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.3274253731343284, |
| "grad_norm": 1.2715434470082407, |
| "learning_rate": 2.5402915293234985e-06, |
| "loss": 0.1787, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.328358208955224, |
| "grad_norm": 1.199882042843623, |
| "learning_rate": 2.533915536510464e-06, |
| "loss": 0.1383, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.3292910447761195, |
| "grad_norm": 1.2940512076143134, |
| "learning_rate": 2.527544838600969e-06, |
| "loss": 0.1668, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.330223880597015, |
| "grad_norm": 1.2859342036370367, |
| "learning_rate": 2.521179449273472e-06, |
| "loss": 0.1673, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.3311567164179103, |
| "grad_norm": 1.368465223818523, |
| "learning_rate": 2.5148193821950317e-06, |
| "loss": 0.1931, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.332089552238806, |
| "grad_norm": 1.2412914337957712, |
| "learning_rate": 2.5084646510212817e-06, |
| "loss": 0.1668, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.3330223880597014, |
| "grad_norm": 1.3694410540778188, |
| "learning_rate": 2.5021152693963957e-06, |
| "loss": 0.1668, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.333955223880597, |
| "grad_norm": 1.3108468106665578, |
| "learning_rate": 2.495771250953061e-06, |
| "loss": 0.1558, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.3348880597014925, |
| "grad_norm": 1.2678417230006749, |
| "learning_rate": 2.4894326093124534e-06, |
| "loss": 0.1824, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.335820895522388, |
| "grad_norm": 1.234900070038742, |
| "learning_rate": 2.4830993580842023e-06, |
| "loss": 0.1608, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.3367537313432836, |
| "grad_norm": 1.2791584100851827, |
| "learning_rate": 2.476771510866364e-06, |
| "loss": 0.1687, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.337686567164179, |
| "grad_norm": 1.3008277496389138, |
| "learning_rate": 2.4704490812453907e-06, |
| "loss": 0.1828, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.3386194029850746, |
| "grad_norm": 1.2603709807496224, |
| "learning_rate": 2.4641320827961063e-06, |
| "loss": 0.1811, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.3395522388059702, |
| "grad_norm": 1.3262286033891795, |
| "learning_rate": 2.457820529081666e-06, |
| "loss": 0.2049, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.3404850746268657, |
| "grad_norm": 1.2793186685327578, |
| "learning_rate": 2.4515144336535413e-06, |
| "loss": 0.176, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.3414179104477613, |
| "grad_norm": 1.3417857413110468, |
| "learning_rate": 2.445213810051482e-06, |
| "loss": 0.2049, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.3423507462686568, |
| "grad_norm": 1.2402106645566335, |
| "learning_rate": 2.43891867180349e-06, |
| "loss": 0.1617, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.3432835820895521, |
| "grad_norm": 1.270642739818944, |
| "learning_rate": 2.4326290324257896e-06, |
| "loss": 0.166, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.3442164179104479, |
| "grad_norm": 1.1747953276893874, |
| "learning_rate": 2.4263449054227983e-06, |
| "loss": 0.1229, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.3451492537313432, |
| "grad_norm": 1.312921174633494, |
| "learning_rate": 2.4200663042870977e-06, |
| "loss": 0.2017, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.3460820895522387, |
| "grad_norm": 1.2446358820836307, |
| "learning_rate": 2.413793242499402e-06, |
| "loss": 0.1706, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.3470149253731343, |
| "grad_norm": 1.327826951397786, |
| "learning_rate": 2.407525733528538e-06, |
| "loss": 0.192, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.3479477611940298, |
| "grad_norm": 1.3012234815142307, |
| "learning_rate": 2.4012637908314064e-06, |
| "loss": 0.1689, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.3488805970149254, |
| "grad_norm": 1.2513612057630938, |
| "learning_rate": 2.3950074278529566e-06, |
| "loss": 0.1736, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.349813432835821, |
| "grad_norm": 1.1442958163589816, |
| "learning_rate": 2.38875665802616e-06, |
| "loss": 0.1614, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.3507462686567164, |
| "grad_norm": 1.246027691312265, |
| "learning_rate": 2.382511494771979e-06, |
| "loss": 0.1756, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.351679104477612, |
| "grad_norm": 1.1708781886882182, |
| "learning_rate": 2.3762719514993327e-06, |
| "loss": 0.149, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.3526119402985075, |
| "grad_norm": 1.2270642508980645, |
| "learning_rate": 2.370038041605079e-06, |
| "loss": 0.1879, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.353544776119403, |
| "grad_norm": 1.2447935359458226, |
| "learning_rate": 2.36380977847398e-06, |
| "loss": 0.1375, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.3544776119402986, |
| "grad_norm": 1.2858647314786538, |
| "learning_rate": 2.357587175478672e-06, |
| "loss": 0.1907, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.355410447761194, |
| "grad_norm": 1.1806857980871994, |
| "learning_rate": 2.3513702459796406e-06, |
| "loss": 0.1344, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.3563432835820897, |
| "grad_norm": 1.1918140230068817, |
| "learning_rate": 2.3451590033251887e-06, |
| "loss": 0.1525, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.357276119402985, |
| "grad_norm": 1.2130814375230647, |
| "learning_rate": 2.338953460851408e-06, |
| "loss": 0.1469, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.3582089552238805, |
| "grad_norm": 1.3714970513970581, |
| "learning_rate": 2.3327536318821496e-06, |
| "loss": 0.1723, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.359141791044776, |
| "grad_norm": 1.1811319514596708, |
| "learning_rate": 2.3265595297290035e-06, |
| "loss": 0.1451, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.3600746268656716, |
| "grad_norm": 1.1782788935254018, |
| "learning_rate": 2.320371167691258e-06, |
| "loss": 0.136, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.3610074626865671, |
| "grad_norm": 1.2409821702879118, |
| "learning_rate": 2.31418855905588e-06, |
| "loss": 0.1773, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.3619402985074627, |
| "grad_norm": 1.2830342959648524, |
| "learning_rate": 2.3080117170974827e-06, |
| "loss": 0.1867, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.3628731343283582, |
| "grad_norm": 1.2132120797541985, |
| "learning_rate": 2.301840655078298e-06, |
| "loss": 0.1475, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.3638059701492538, |
| "grad_norm": 1.3156738879306071, |
| "learning_rate": 2.2956753862481444e-06, |
| "loss": 0.1851, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.3647388059701493, |
| "grad_norm": 1.3398257271092922, |
| "learning_rate": 2.289515923844406e-06, |
| "loss": 0.177, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.3656716417910448, |
| "grad_norm": 1.259885121964175, |
| "learning_rate": 2.2833622810919987e-06, |
| "loss": 0.183, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.3666044776119404, |
| "grad_norm": 1.135200212068689, |
| "learning_rate": 2.277214471203342e-06, |
| "loss": 0.1333, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.3675373134328357, |
| "grad_norm": 1.29786431462598, |
| "learning_rate": 2.2710725073783345e-06, |
| "loss": 0.1912, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.3684701492537314, |
| "grad_norm": 1.2025422661506906, |
| "learning_rate": 2.264936402804322e-06, |
| "loss": 0.1435, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.3694029850746268, |
| "grad_norm": 1.2695588694838684, |
| "learning_rate": 2.2588061706560643e-06, |
| "loss": 0.1621, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.3703358208955223, |
| "grad_norm": 1.189376190696932, |
| "learning_rate": 2.2526818240957217e-06, |
| "loss": 0.1478, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.3712686567164178, |
| "grad_norm": 1.285370928942321, |
| "learning_rate": 2.2465633762728093e-06, |
| "loss": 0.1552, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.3722014925373134, |
| "grad_norm": 1.254381199655615, |
| "learning_rate": 2.240450840324183e-06, |
| "loss": 0.1704, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.373134328358209, |
| "grad_norm": 1.300268568192004, |
| "learning_rate": 2.234344229374003e-06, |
| "loss": 0.1495, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.3740671641791045, |
| "grad_norm": 1.2923264402806232, |
| "learning_rate": 2.2282435565337084e-06, |
| "loss": 0.1695, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 1.2156952202999838, |
| "learning_rate": 2.2221488349019903e-06, |
| "loss": 0.1648, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.3759328358208955, |
| "grad_norm": 1.2625527545126876, |
| "learning_rate": 2.216060077564757e-06, |
| "loss": 0.1832, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.376865671641791, |
| "grad_norm": 1.207405401122152, |
| "learning_rate": 2.2099772975951145e-06, |
| "loss": 0.146, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.3777985074626866, |
| "grad_norm": 1.2427419323477014, |
| "learning_rate": 2.203900508053336e-06, |
| "loss": 0.18, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.3787313432835822, |
| "grad_norm": 1.2046157819367844, |
| "learning_rate": 2.1978297219868307e-06, |
| "loss": 0.1665, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.3796641791044777, |
| "grad_norm": 1.2987998527053135, |
| "learning_rate": 2.191764952430119e-06, |
| "loss": 0.1981, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.3805970149253732, |
| "grad_norm": 1.4449869844658465, |
| "learning_rate": 2.1857062124048036e-06, |
| "loss": 0.1755, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.3815298507462686, |
| "grad_norm": 1.2788873598790043, |
| "learning_rate": 2.1796535149195362e-06, |
| "loss": 0.1709, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.3824626865671643, |
| "grad_norm": 1.3383933421237963, |
| "learning_rate": 2.1736068729700045e-06, |
| "loss": 0.2013, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.3833955223880596, |
| "grad_norm": 1.1532079090653113, |
| "learning_rate": 2.167566299538883e-06, |
| "loss": 0.1491, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.3843283582089552, |
| "grad_norm": 1.2545583313203545, |
| "learning_rate": 2.161531807595825e-06, |
| "loss": 0.1535, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.3852611940298507, |
| "grad_norm": 1.3383178306960863, |
| "learning_rate": 2.155503410097423e-06, |
| "loss": 0.1992, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.3861940298507462, |
| "grad_norm": 1.2521413725439199, |
| "learning_rate": 2.1494811199871857e-06, |
| "loss": 0.1563, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.3871268656716418, |
| "grad_norm": 1.3317947331297295, |
| "learning_rate": 2.1434649501955062e-06, |
| "loss": 0.2041, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.3880597014925373, |
| "grad_norm": 1.2507994216524636, |
| "learning_rate": 2.1374549136396417e-06, |
| "loss": 0.1589, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.3889925373134329, |
| "grad_norm": 1.392732959414099, |
| "learning_rate": 2.1314510232236723e-06, |
| "loss": 0.1922, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.3899253731343284, |
| "grad_norm": 1.33597802710792, |
| "learning_rate": 2.1254532918384892e-06, |
| "loss": 0.1754, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.390858208955224, |
| "grad_norm": 1.3917612946207263, |
| "learning_rate": 2.119461732361757e-06, |
| "loss": 0.2253, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.3917910447761195, |
| "grad_norm": 1.313878661367244, |
| "learning_rate": 2.113476357657889e-06, |
| "loss": 0.1885, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.392723880597015, |
| "grad_norm": 1.222705362849111, |
| "learning_rate": 2.1074971805780196e-06, |
| "loss": 0.1418, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.3936567164179103, |
| "grad_norm": 1.318990905960625, |
| "learning_rate": 2.1015242139599773e-06, |
| "loss": 0.1941, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.394589552238806, |
| "grad_norm": 1.216340100659452, |
| "learning_rate": 2.095557470628253e-06, |
| "loss": 0.1515, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.3955223880597014, |
| "grad_norm": 1.3156787670542847, |
| "learning_rate": 2.089596963393975e-06, |
| "loss": 0.1511, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.396455223880597, |
| "grad_norm": 1.2276298610177279, |
| "learning_rate": 2.0836427050548874e-06, |
| "loss": 0.1581, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.3973880597014925, |
| "grad_norm": 1.3040288892618115, |
| "learning_rate": 2.0776947083953136e-06, |
| "loss": 0.1696, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.398320895522388, |
| "grad_norm": 1.3052154898802157, |
| "learning_rate": 2.071752986186134e-06, |
| "loss": 0.1868, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.3992537313432836, |
| "grad_norm": 1.2960385420690526, |
| "learning_rate": 2.0658175511847565e-06, |
| "loss": 0.203, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3992537313432836, |
| "eval_loss": 0.2172926515340805, |
| "eval_runtime": 3.4362, |
| "eval_samples_per_second": 25.318, |
| "eval_steps_per_second": 6.402, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.400186567164179, |
| "grad_norm": 1.191067930404344, |
| "learning_rate": 2.0598884161350923e-06, |
| "loss": 0.1297, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.4011194029850746, |
| "grad_norm": 1.2983856064443016, |
| "learning_rate": 2.05396559376752e-06, |
| "loss": 0.2005, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.4020522388059702, |
| "grad_norm": 1.273195108135393, |
| "learning_rate": 2.0480490967988693e-06, |
| "loss": 0.1653, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.4029850746268657, |
| "grad_norm": 1.2477168832906964, |
| "learning_rate": 2.042138937932388e-06, |
| "loss": 0.1725, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.4039179104477613, |
| "grad_norm": 1.2654990217842248, |
| "learning_rate": 2.036235129857715e-06, |
| "loss": 0.1967, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.4048507462686568, |
| "grad_norm": 1.300973276034434, |
| "learning_rate": 2.0303376852508527e-06, |
| "loss": 0.1654, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.4057835820895521, |
| "grad_norm": 1.2239235226334035, |
| "learning_rate": 2.0244466167741434e-06, |
| "loss": 0.1543, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.4067164179104479, |
| "grad_norm": 1.3621770901905592, |
| "learning_rate": 2.018561937076236e-06, |
| "loss": 0.2214, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.4076492537313432, |
| "grad_norm": 1.2314178688880044, |
| "learning_rate": 2.0126836587920605e-06, |
| "loss": 0.1505, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.4085820895522387, |
| "grad_norm": 1.2375838754524546, |
| "learning_rate": 2.0068117945428077e-06, |
| "loss": 0.1488, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.4095149253731343, |
| "grad_norm": 1.2772042963424348, |
| "learning_rate": 2.0009463569358937e-06, |
| "loss": 0.1578, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.4104477611940298, |
| "grad_norm": 1.2704695628333311, |
| "learning_rate": 1.995087358564938e-06, |
| "loss": 0.1484, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.4113805970149254, |
| "grad_norm": 1.2676512489978526, |
| "learning_rate": 1.989234812009732e-06, |
| "loss": 0.1716, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.412313432835821, |
| "grad_norm": 1.3893814832466052, |
| "learning_rate": 1.9833887298362185e-06, |
| "loss": 0.1924, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.4132462686567164, |
| "grad_norm": 1.1972367918528255, |
| "learning_rate": 1.9775491245964535e-06, |
| "loss": 0.1559, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.414179104477612, |
| "grad_norm": 1.2690046609761634, |
| "learning_rate": 1.971716008828593e-06, |
| "loss": 0.1703, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.4151119402985075, |
| "grad_norm": 1.4415095142128806, |
| "learning_rate": 1.9658893950568574e-06, |
| "loss": 0.1807, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.416044776119403, |
| "grad_norm": 1.231941180448534, |
| "learning_rate": 1.9600692957915076e-06, |
| "loss": 0.1511, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.4169776119402986, |
| "grad_norm": 1.2454516869261922, |
| "learning_rate": 1.9542557235288146e-06, |
| "loss": 0.1552, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.417910447761194, |
| "grad_norm": 1.3500668677203271, |
| "learning_rate": 1.9484486907510405e-06, |
| "loss": 0.2019, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.4188432835820897, |
| "grad_norm": 1.3246104342962923, |
| "learning_rate": 1.9426482099264e-06, |
| "loss": 0.1708, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.419776119402985, |
| "grad_norm": 1.1917074288234641, |
| "learning_rate": 1.936854293509043e-06, |
| "loss": 0.1211, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.4207089552238805, |
| "grad_norm": 1.216178650600443, |
| "learning_rate": 1.9310669539390266e-06, |
| "loss": 0.1496, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.421641791044776, |
| "grad_norm": 1.3395376084981512, |
| "learning_rate": 1.925286203642285e-06, |
| "loss": 0.1734, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.4225746268656716, |
| "grad_norm": 1.344682183658041, |
| "learning_rate": 1.919512055030606e-06, |
| "loss": 0.2162, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.4235074626865671, |
| "grad_norm": 1.2739248529598715, |
| "learning_rate": 1.913744520501602e-06, |
| "loss": 0.1712, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.4244402985074627, |
| "grad_norm": 1.2620710786687312, |
| "learning_rate": 1.9079836124386865e-06, |
| "loss": 0.1853, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.4253731343283582, |
| "grad_norm": 1.2358754004543349, |
| "learning_rate": 1.90222934321104e-06, |
| "loss": 0.1611, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.4263059701492538, |
| "grad_norm": 1.2593400350557469, |
| "learning_rate": 1.896481725173594e-06, |
| "loss": 0.1871, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.4272388059701493, |
| "grad_norm": 1.2844520802387187, |
| "learning_rate": 1.8907407706669972e-06, |
| "loss": 0.179, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.4281716417910448, |
| "grad_norm": 1.2746170483788477, |
| "learning_rate": 1.8850064920175927e-06, |
| "loss": 0.1772, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.4291044776119404, |
| "grad_norm": 1.2940878198602277, |
| "learning_rate": 1.8792789015373875e-06, |
| "loss": 0.1862, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.4300373134328357, |
| "grad_norm": 1.195426677196978, |
| "learning_rate": 1.873558011524032e-06, |
| "loss": 0.1494, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.4309701492537314, |
| "grad_norm": 1.2615153545374882, |
| "learning_rate": 1.8678438342607846e-06, |
| "loss": 0.1908, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.4319029850746268, |
| "grad_norm": 1.3150310494175041, |
| "learning_rate": 1.8621363820164978e-06, |
| "loss": 0.1738, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.4328358208955223, |
| "grad_norm": 1.1992716911898411, |
| "learning_rate": 1.856435667045577e-06, |
| "loss": 0.1807, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.4337686567164178, |
| "grad_norm": 1.2442012956833255, |
| "learning_rate": 1.850741701587968e-06, |
| "loss": 0.1563, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.4347014925373134, |
| "grad_norm": 1.2918039875800205, |
| "learning_rate": 1.8450544978691237e-06, |
| "loss": 0.1675, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.435634328358209, |
| "grad_norm": 1.1774630640190071, |
| "learning_rate": 1.8393740680999783e-06, |
| "loss": 0.1195, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.4365671641791045, |
| "grad_norm": 1.2423005244049108, |
| "learning_rate": 1.8337004244769225e-06, |
| "loss": 0.1659, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.4375, |
| "grad_norm": 1.236103986246663, |
| "learning_rate": 1.8280335791817733e-06, |
| "loss": 0.172, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.4384328358208955, |
| "grad_norm": 1.2663025654525684, |
| "learning_rate": 1.8223735443817546e-06, |
| "loss": 0.2009, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.439365671641791, |
| "grad_norm": 1.1945304888279726, |
| "learning_rate": 1.8167203322294673e-06, |
| "loss": 0.1493, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.4402985074626866, |
| "grad_norm": 1.3246209552257189, |
| "learning_rate": 1.811073954862862e-06, |
| "loss": 0.1647, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.4412313432835822, |
| "grad_norm": 1.2286615732904398, |
| "learning_rate": 1.8054344244052153e-06, |
| "loss": 0.1477, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.4421641791044777, |
| "grad_norm": 1.3606390787574698, |
| "learning_rate": 1.7998017529651042e-06, |
| "loss": 0.181, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.4430970149253732, |
| "grad_norm": 1.3220909797733937, |
| "learning_rate": 1.7941759526363739e-06, |
| "loss": 0.2089, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.4440298507462686, |
| "grad_norm": 1.23294866264412, |
| "learning_rate": 1.7885570354981236e-06, |
| "loss": 0.16, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.4449626865671643, |
| "grad_norm": 1.1456697988885036, |
| "learning_rate": 1.7829450136146664e-06, |
| "loss": 0.127, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.4458955223880596, |
| "grad_norm": 1.242510469154095, |
| "learning_rate": 1.7773398990355162e-06, |
| "loss": 0.1422, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.4468283582089552, |
| "grad_norm": 1.289842263551184, |
| "learning_rate": 1.771741703795355e-06, |
| "loss": 0.2084, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.4477611940298507, |
| "grad_norm": 1.2230492782672477, |
| "learning_rate": 1.7661504399140066e-06, |
| "loss": 0.133, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.4486940298507462, |
| "grad_norm": 1.2448285718824177, |
| "learning_rate": 1.7605661193964169e-06, |
| "loss": 0.1822, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.4496268656716418, |
| "grad_norm": 1.3230576198153048, |
| "learning_rate": 1.754988754232616e-06, |
| "loss": 0.1895, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.4505597014925373, |
| "grad_norm": 1.2449432948513492, |
| "learning_rate": 1.749418356397708e-06, |
| "loss": 0.1658, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.4514925373134329, |
| "grad_norm": 1.2492631381392154, |
| "learning_rate": 1.7438549378518331e-06, |
| "loss": 0.189, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.4524253731343284, |
| "grad_norm": 1.323522251818834, |
| "learning_rate": 1.7382985105401485e-06, |
| "loss": 0.2106, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.453358208955224, |
| "grad_norm": 1.2370037278441117, |
| "learning_rate": 1.7327490863927998e-06, |
| "loss": 0.1698, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.4542910447761195, |
| "grad_norm": 1.265942036133397, |
| "learning_rate": 1.7272066773248974e-06, |
| "loss": 0.1688, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.455223880597015, |
| "grad_norm": 1.2009005216690825, |
| "learning_rate": 1.721671295236485e-06, |
| "loss": 0.1437, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.4561567164179103, |
| "grad_norm": 1.2681764317380788, |
| "learning_rate": 1.7161429520125244e-06, |
| "loss": 0.1499, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.457089552238806, |
| "grad_norm": 1.2226810045311758, |
| "learning_rate": 1.7106216595228636e-06, |
| "loss": 0.1614, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.4580223880597014, |
| "grad_norm": 1.2775985160587016, |
| "learning_rate": 1.705107429622207e-06, |
| "loss": 0.158, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.458955223880597, |
| "grad_norm": 1.3008246431123165, |
| "learning_rate": 1.6996002741500999e-06, |
| "loss": 0.1935, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.4598880597014925, |
| "grad_norm": 1.14452594932925, |
| "learning_rate": 1.694100204930898e-06, |
| "loss": 0.1491, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.460820895522388, |
| "grad_norm": 1.4381105859802414, |
| "learning_rate": 1.6886072337737418e-06, |
| "loss": 0.2209, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.4617537313432836, |
| "grad_norm": 1.2009555335447437, |
| "learning_rate": 1.6831213724725282e-06, |
| "loss": 0.1649, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.462686567164179, |
| "grad_norm": 1.1044201612968545, |
| "learning_rate": 1.677642632805892e-06, |
| "loss": 0.1149, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.4636194029850746, |
| "grad_norm": 1.201292892365621, |
| "learning_rate": 1.672171026537177e-06, |
| "loss": 0.1563, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.4645522388059702, |
| "grad_norm": 1.4098607205577598, |
| "learning_rate": 1.6667065654144105e-06, |
| "loss": 0.2303, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.4654850746268657, |
| "grad_norm": 1.2324389236814668, |
| "learning_rate": 1.661249261170278e-06, |
| "loss": 0.1742, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.4664179104477613, |
| "grad_norm": 1.2922406342420865, |
| "learning_rate": 1.6557991255221007e-06, |
| "loss": 0.2045, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.4673507462686568, |
| "grad_norm": 1.3009557247528194, |
| "learning_rate": 1.650356170171804e-06, |
| "loss": 0.181, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.4682835820895521, |
| "grad_norm": 1.4405772195986282, |
| "learning_rate": 1.6449204068058994e-06, |
| "loss": 0.1519, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.4692164179104479, |
| "grad_norm": 1.1432566344922312, |
| "learning_rate": 1.639491847095459e-06, |
| "loss": 0.1384, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.4701492537313432, |
| "grad_norm": 1.249487726677484, |
| "learning_rate": 1.6340705026960818e-06, |
| "loss": 0.159, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.4710820895522387, |
| "grad_norm": 1.2956733493021935, |
| "learning_rate": 1.6286563852478787e-06, |
| "loss": 0.1749, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.4720149253731343, |
| "grad_norm": 1.351965315446389, |
| "learning_rate": 1.623249506375445e-06, |
| "loss": 0.1927, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.4729477611940298, |
| "grad_norm": 1.2584895365550355, |
| "learning_rate": 1.6178498776878333e-06, |
| "loss": 0.1658, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.4738805970149254, |
| "grad_norm": 1.2828618472382831, |
| "learning_rate": 1.6124575107785245e-06, |
| "loss": 0.1892, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.474813432835821, |
| "grad_norm": 1.2323978562690985, |
| "learning_rate": 1.6070724172254148e-06, |
| "loss": 0.1595, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.4757462686567164, |
| "grad_norm": 1.2644417526045404, |
| "learning_rate": 1.6016946085907798e-06, |
| "loss": 0.1774, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.476679104477612, |
| "grad_norm": 1.2342045537498876, |
| "learning_rate": 1.5963240964212556e-06, |
| "loss": 0.1543, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.4776119402985075, |
| "grad_norm": 1.2125459502856037, |
| "learning_rate": 1.5909608922478108e-06, |
| "loss": 0.1572, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.478544776119403, |
| "grad_norm": 1.2609621232962978, |
| "learning_rate": 1.585605007585726e-06, |
| "loss": 0.1478, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.4794776119402986, |
| "grad_norm": 1.2867353665845063, |
| "learning_rate": 1.5802564539345599e-06, |
| "loss": 0.1917, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.480410447761194, |
| "grad_norm": 1.2669560096494146, |
| "learning_rate": 1.5749152427781367e-06, |
| "loss": 0.1758, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.4813432835820897, |
| "grad_norm": 1.2656220053772782, |
| "learning_rate": 1.5695813855845149e-06, |
| "loss": 0.1842, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.482276119402985, |
| "grad_norm": 1.3153542193728112, |
| "learning_rate": 1.5642548938059588e-06, |
| "loss": 0.1454, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.4832089552238805, |
| "grad_norm": 1.2452876594670799, |
| "learning_rate": 1.5589357788789244e-06, |
| "loss": 0.135, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.484141791044776, |
| "grad_norm": 1.2717468140148107, |
| "learning_rate": 1.5536240522240259e-06, |
| "loss": 0.1937, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.4850746268656716, |
| "grad_norm": 1.3275570249611397, |
| "learning_rate": 1.5483197252460158e-06, |
| "loss": 0.2052, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.4860074626865671, |
| "grad_norm": 1.2883023956548525, |
| "learning_rate": 1.543022809333755e-06, |
| "loss": 0.1888, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.4869402985074627, |
| "grad_norm": 1.285850183604376, |
| "learning_rate": 1.537733315860197e-06, |
| "loss": 0.1362, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.4878731343283582, |
| "grad_norm": 1.3564622211084076, |
| "learning_rate": 1.5324512561823562e-06, |
| "loss": 0.2011, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.4888059701492538, |
| "grad_norm": 1.1780864154364084, |
| "learning_rate": 1.527176641641286e-06, |
| "loss": 0.148, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.4897388059701493, |
| "grad_norm": 1.3062816174924374, |
| "learning_rate": 1.5219094835620546e-06, |
| "loss": 0.2116, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.4906716417910448, |
| "grad_norm": 1.2861264449635466, |
| "learning_rate": 1.5166497932537233e-06, |
| "loss": 0.1807, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.4916044776119404, |
| "grad_norm": 1.3040316994439474, |
| "learning_rate": 1.5113975820093129e-06, |
| "loss": 0.1908, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.4925373134328357, |
| "grad_norm": 1.2287037658204552, |
| "learning_rate": 1.5061528611057917e-06, |
| "loss": 0.1513, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.4934701492537314, |
| "grad_norm": 1.2792449236973227, |
| "learning_rate": 1.5009156418040443e-06, |
| "loss": 0.1961, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.4944029850746268, |
| "grad_norm": 1.3629466625435644, |
| "learning_rate": 1.4956859353488484e-06, |
| "loss": 0.1457, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.4953358208955223, |
| "grad_norm": 1.151598855059448, |
| "learning_rate": 1.4904637529688492e-06, |
| "loss": 0.1234, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.4962686567164178, |
| "grad_norm": 1.2603516755627053, |
| "learning_rate": 1.4852491058765388e-06, |
| "loss": 0.1969, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.4972014925373134, |
| "grad_norm": 1.3029607032037984, |
| "learning_rate": 1.4800420052682308e-06, |
| "loss": 0.1732, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.498134328358209, |
| "grad_norm": 1.2718701457765713, |
| "learning_rate": 1.4748424623240364e-06, |
| "loss": 0.1748, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.4990671641791045, |
| "grad_norm": 1.2583215619223413, |
| "learning_rate": 1.4696504882078361e-06, |
| "loss": 0.1488, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.3343211399094133, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.154, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.5009328358208955, |
| "grad_norm": 1.370324950789676, |
| "learning_rate": 1.4592892910336738e-06, |
| "loss": 0.1641, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.501865671641791, |
| "grad_norm": 1.263752243517082, |
| "learning_rate": 1.4541200902221276e-06, |
| "loss": 0.1589, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.5027985074626866, |
| "grad_norm": 1.2805016526218522, |
| "learning_rate": 1.4489585027313613e-06, |
| "loss": 0.1869, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.5037313432835822, |
| "grad_norm": 1.289232569056423, |
| "learning_rate": 1.4438045396437606e-06, |
| "loss": 0.1483, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.5046641791044775, |
| "grad_norm": 1.1645787470541484, |
| "learning_rate": 1.4386582120253467e-06, |
| "loss": 0.1412, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.5055970149253732, |
| "grad_norm": 1.203746273397564, |
| "learning_rate": 1.433519530925745e-06, |
| "loss": 0.136, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.5065298507462686, |
| "grad_norm": 1.2037002849351364, |
| "learning_rate": 1.4283885073781628e-06, |
| "loss": 0.1473, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.5074626865671643, |
| "grad_norm": 1.2417425890659581, |
| "learning_rate": 1.4232651523993635e-06, |
| "loss": 0.1541, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.5083955223880596, |
| "grad_norm": 1.223691899770722, |
| "learning_rate": 1.4181494769896487e-06, |
| "loss": 0.1522, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.5093283582089554, |
| "grad_norm": 1.1957898706574794, |
| "learning_rate": 1.413041492132831e-06, |
| "loss": 0.1471, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.5102611940298507, |
| "grad_norm": 1.2163473521299886, |
| "learning_rate": 1.4079412087962113e-06, |
| "loss": 0.1753, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.5111940298507462, |
| "grad_norm": 1.2661586102986833, |
| "learning_rate": 1.4028486379305507e-06, |
| "loss": 0.1903, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.5121268656716418, |
| "grad_norm": 1.3129586395056405, |
| "learning_rate": 1.397763790470054e-06, |
| "loss": 0.1941, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.5130597014925373, |
| "grad_norm": 1.2377248133069354, |
| "learning_rate": 1.3926866773323434e-06, |
| "loss": 0.1758, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.5139925373134329, |
| "grad_norm": 1.2697894927159725, |
| "learning_rate": 1.3876173094184341e-06, |
| "loss": 0.1851, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.5149253731343284, |
| "grad_norm": 1.2664467073712171, |
| "learning_rate": 1.3825556976127119e-06, |
| "loss": 0.1798, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.515858208955224, |
| "grad_norm": 1.2561239218852494, |
| "learning_rate": 1.3775018527829103e-06, |
| "loss": 0.1708, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.5167910447761193, |
| "grad_norm": 1.2767075134930894, |
| "learning_rate": 1.3724557857800824e-06, |
| "loss": 0.1443, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.517723880597015, |
| "grad_norm": 1.202873843485902, |
| "learning_rate": 1.3674175074385866e-06, |
| "loss": 0.1548, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.5186567164179103, |
| "grad_norm": 1.2304513532013928, |
| "learning_rate": 1.362387028576056e-06, |
| "loss": 0.1731, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.519589552238806, |
| "grad_norm": 1.3149193632354779, |
| "learning_rate": 1.3573643599933794e-06, |
| "loss": 0.1781, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.5205223880597014, |
| "grad_norm": 1.2283146253340484, |
| "learning_rate": 1.3523495124746722e-06, |
| "loss": 0.1478, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.5214552238805972, |
| "grad_norm": 1.2370105468976618, |
| "learning_rate": 1.3473424967872606e-06, |
| "loss": 0.1539, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.5223880597014925, |
| "grad_norm": 1.1375086929232519, |
| "learning_rate": 1.3423433236816563e-06, |
| "loss": 0.129, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.523320895522388, |
| "grad_norm": 1.2856837840697544, |
| "learning_rate": 1.3373520038915271e-06, |
| "loss": 0.1773, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.5242537313432836, |
| "grad_norm": 1.2384146311184505, |
| "learning_rate": 1.332368548133684e-06, |
| "loss": 0.1603, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.525186567164179, |
| "grad_norm": 1.379242483405248, |
| "learning_rate": 1.3273929671080515e-06, |
| "loss": 0.1768, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.5261194029850746, |
| "grad_norm": 1.274654451134043, |
| "learning_rate": 1.322425271497646e-06, |
| "loss": 0.147, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.5270522388059702, |
| "grad_norm": 1.215985264316688, |
| "learning_rate": 1.3174654719685537e-06, |
| "loss": 0.136, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.5279850746268657, |
| "grad_norm": 1.2344772185357755, |
| "learning_rate": 1.3125135791699084e-06, |
| "loss": 0.1855, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.528917910447761, |
| "grad_norm": 1.255018239260394, |
| "learning_rate": 1.3075696037338636e-06, |
| "loss": 0.1624, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.5298507462686568, |
| "grad_norm": 1.3004318503788717, |
| "learning_rate": 1.302633556275577e-06, |
| "loss": 0.181, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.5307835820895521, |
| "grad_norm": 1.316168592475357, |
| "learning_rate": 1.2977054473931838e-06, |
| "loss": 0.1794, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.5317164179104479, |
| "grad_norm": 1.217753124143506, |
| "learning_rate": 1.292785287667775e-06, |
| "loss": 0.1454, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.5326492537313432, |
| "grad_norm": 1.2802301204083537, |
| "learning_rate": 1.2878730876633694e-06, |
| "loss": 0.1672, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.533582089552239, |
| "grad_norm": 1.4173466323508679, |
| "learning_rate": 1.2829688579269006e-06, |
| "loss": 0.223, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.5345149253731343, |
| "grad_norm": 1.2209656067143468, |
| "learning_rate": 1.27807260898819e-06, |
| "loss": 0.1507, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.5354477611940298, |
| "grad_norm": 1.4096385483613454, |
| "learning_rate": 1.2731843513599179e-06, |
| "loss": 0.2567, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.5363805970149254, |
| "grad_norm": 1.367550102899949, |
| "learning_rate": 1.2683040955376109e-06, |
| "loss": 0.2145, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.537313432835821, |
| "grad_norm": 1.4046543061081387, |
| "learning_rate": 1.2634318519996148e-06, |
| "loss": 0.208, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.5382462686567164, |
| "grad_norm": 1.216717515432744, |
| "learning_rate": 1.258567631207071e-06, |
| "loss": 0.156, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.539179104477612, |
| "grad_norm": 1.1279229884919437, |
| "learning_rate": 1.253711443603896e-06, |
| "loss": 0.1321, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.5401119402985075, |
| "grad_norm": 1.1956783677025602, |
| "learning_rate": 1.2488632996167594e-06, |
| "loss": 0.1412, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.5410447761194028, |
| "grad_norm": 1.191733998318685, |
| "learning_rate": 1.244023209655057e-06, |
| "loss": 0.1537, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.5419776119402986, |
| "grad_norm": 1.254861547050445, |
| "learning_rate": 1.239191184110895e-06, |
| "loss": 0.1684, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.542910447761194, |
| "grad_norm": 1.4186506209173417, |
| "learning_rate": 1.2343672333590639e-06, |
| "loss": 0.2478, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.5438432835820897, |
| "grad_norm": 1.2645629635280344, |
| "learning_rate": 1.2295513677570176e-06, |
| "loss": 0.1641, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.544776119402985, |
| "grad_norm": 1.1434600478478376, |
| "learning_rate": 1.2247435976448474e-06, |
| "loss": 0.1491, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.5457089552238807, |
| "grad_norm": 1.3072744229757376, |
| "learning_rate": 1.2199439333452667e-06, |
| "loss": 0.2054, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.546641791044776, |
| "grad_norm": 1.3410789883038186, |
| "learning_rate": 1.2151523851635839e-06, |
| "loss": 0.1912, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.5475746268656716, |
| "grad_norm": 1.2553138926818888, |
| "learning_rate": 1.2103689633876781e-06, |
| "loss": 0.1724, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.5485074626865671, |
| "grad_norm": 1.266417108898093, |
| "learning_rate": 1.2055936782879845e-06, |
| "loss": 0.1606, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.5494402985074627, |
| "grad_norm": 1.2492407069458422, |
| "learning_rate": 1.2008265401174673e-06, |
| "loss": 0.1741, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.5503731343283582, |
| "grad_norm": 1.2268918917008558, |
| "learning_rate": 1.1960675591115966e-06, |
| "loss": 0.1828, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.5513059701492538, |
| "grad_norm": 1.2622548581146535, |
| "learning_rate": 1.1913167454883306e-06, |
| "loss": 0.17, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.5522388059701493, |
| "grad_norm": 1.414252888193506, |
| "learning_rate": 1.186574109448091e-06, |
| "loss": 0.2519, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.5531716417910446, |
| "grad_norm": 1.165184752660399, |
| "learning_rate": 1.1818396611737381e-06, |
| "loss": 0.1337, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.5541044776119404, |
| "grad_norm": 1.2575282833611288, |
| "learning_rate": 1.1771134108305572e-06, |
| "loss": 0.1833, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.5550373134328357, |
| "grad_norm": 1.277913286175671, |
| "learning_rate": 1.1723953685662287e-06, |
| "loss": 0.1923, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.5559701492537314, |
| "grad_norm": 1.2216895135729264, |
| "learning_rate": 1.1676855445108114e-06, |
| "loss": 0.1484, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.5569029850746268, |
| "grad_norm": 1.22394919388589, |
| "learning_rate": 1.1629839487767198e-06, |
| "loss": 0.1856, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.5578358208955225, |
| "grad_norm": 1.1992828862006262, |
| "learning_rate": 1.1582905914586961e-06, |
| "loss": 0.1554, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.5587686567164178, |
| "grad_norm": 1.246475284992138, |
| "learning_rate": 1.1536054826338005e-06, |
| "loss": 0.1577, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.5597014925373134, |
| "grad_norm": 1.2456042827508538, |
| "learning_rate": 1.148928632361378e-06, |
| "loss": 0.16, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.560634328358209, |
| "grad_norm": 1.2153660310681467, |
| "learning_rate": 1.1442600506830443e-06, |
| "loss": 0.1692, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.5615671641791045, |
| "grad_norm": 1.3206114927759005, |
| "learning_rate": 1.1395997476226612e-06, |
| "loss": 0.1837, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 1.189916838030876, |
| "learning_rate": 1.134947733186315e-06, |
| "loss": 0.1443, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.5634328358208955, |
| "grad_norm": 1.2105885080130891, |
| "learning_rate": 1.1303040173622977e-06, |
| "loss": 0.1503, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.564365671641791, |
| "grad_norm": 1.2501407525049524, |
| "learning_rate": 1.1256686101210818e-06, |
| "loss": 0.1786, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.5652985074626866, |
| "grad_norm": 1.215174069621574, |
| "learning_rate": 1.1210415214152976e-06, |
| "loss": 0.1643, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.5662313432835822, |
| "grad_norm": 1.2836568443928325, |
| "learning_rate": 1.1164227611797202e-06, |
| "loss": 0.1698, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.5671641791044775, |
| "grad_norm": 1.2894993667961356, |
| "learning_rate": 1.1118123393312397e-06, |
| "loss": 0.195, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.5680970149253732, |
| "grad_norm": 1.2395608868763646, |
| "learning_rate": 1.1072102657688434e-06, |
| "loss": 0.1448, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.5690298507462686, |
| "grad_norm": 1.2571854764243646, |
| "learning_rate": 1.1026165503735959e-06, |
| "loss": 0.1682, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.5699626865671643, |
| "grad_norm": 1.241220089205576, |
| "learning_rate": 1.0980312030086104e-06, |
| "loss": 0.1608, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.5708955223880596, |
| "grad_norm": 1.2317979456686032, |
| "learning_rate": 1.0934542335190418e-06, |
| "loss": 0.1421, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.5718283582089554, |
| "grad_norm": 1.2762123047527187, |
| "learning_rate": 1.0888856517320478e-06, |
| "loss": 0.1711, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.5727611940298507, |
| "grad_norm": 1.274556971173205, |
| "learning_rate": 1.0843254674567832e-06, |
| "loss": 0.1498, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.5736940298507462, |
| "grad_norm": 1.360555726010378, |
| "learning_rate": 1.079773690484372e-06, |
| "loss": 0.1883, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.5746268656716418, |
| "grad_norm": 1.2144506087038018, |
| "learning_rate": 1.075230330587884e-06, |
| "loss": 0.1566, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.5755597014925373, |
| "grad_norm": 1.2797794908909506, |
| "learning_rate": 1.07069539752232e-06, |
| "loss": 0.1693, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.5764925373134329, |
| "grad_norm": 1.1695369348750864, |
| "learning_rate": 1.0661689010245868e-06, |
| "loss": 0.1335, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.5774253731343284, |
| "grad_norm": 1.278683912569651, |
| "learning_rate": 1.0616508508134737e-06, |
| "loss": 0.2141, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.578358208955224, |
| "grad_norm": 1.23092493269237, |
| "learning_rate": 1.0571412565896406e-06, |
| "loss": 0.1394, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.5792910447761193, |
| "grad_norm": 1.3214140489535262, |
| "learning_rate": 1.052640128035587e-06, |
| "loss": 0.1938, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.580223880597015, |
| "grad_norm": 1.1396889903545657, |
| "learning_rate": 1.048147474815639e-06, |
| "loss": 0.139, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.5811567164179103, |
| "grad_norm": 1.2733826197436557, |
| "learning_rate": 1.0436633065759243e-06, |
| "loss": 0.1387, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.582089552238806, |
| "grad_norm": 1.348534129553173, |
| "learning_rate": 1.0391876329443534e-06, |
| "loss": 0.2277, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.5830223880597014, |
| "grad_norm": 1.3244185013193703, |
| "learning_rate": 1.0347204635305963e-06, |
| "loss": 0.1666, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.5839552238805972, |
| "grad_norm": 1.1133707065001888, |
| "learning_rate": 1.030261807926063e-06, |
| "loss": 0.1185, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.5848880597014925, |
| "grad_norm": 1.296948122560028, |
| "learning_rate": 1.0258116757038862e-06, |
| "loss": 0.1699, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.585820895522388, |
| "grad_norm": 1.2146289607505514, |
| "learning_rate": 1.0213700764188978e-06, |
| "loss": 0.174, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5867537313432836, |
| "grad_norm": 1.265118708956542, |
| "learning_rate": 1.0169370196076073e-06, |
| "loss": 0.1499, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.587686567164179, |
| "grad_norm": 1.1766160463133417, |
| "learning_rate": 1.0125125147881842e-06, |
| "loss": 0.134, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.5886194029850746, |
| "grad_norm": 1.2061441688887677, |
| "learning_rate": 1.0080965714604368e-06, |
| "loss": 0.1517, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.5895522388059702, |
| "grad_norm": 1.2419522468754731, |
| "learning_rate": 1.0036891991057863e-06, |
| "loss": 0.1785, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.5904850746268657, |
| "grad_norm": 1.233505174658301, |
| "learning_rate": 9.992904071872567e-07, |
| "loss": 0.1678, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.591417910447761, |
| "grad_norm": 1.2433709953213712, |
| "learning_rate": 9.949002051494467e-07, |
| "loss": 0.1655, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.5923507462686568, |
| "grad_norm": 1.3337135755983585, |
| "learning_rate": 9.90518602418512e-07, |
| "loss": 0.2046, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.5932835820895521, |
| "grad_norm": 1.1762132160436038, |
| "learning_rate": 9.861456084021448e-07, |
| "loss": 0.1386, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.5942164179104479, |
| "grad_norm": 1.139423349315348, |
| "learning_rate": 9.81781232489556e-07, |
| "loss": 0.1309, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.5951492537313432, |
| "grad_norm": 1.211881847706772, |
| "learning_rate": 9.774254840514474e-07, |
| "loss": 0.1451, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.596082089552239, |
| "grad_norm": 1.2929233087170993, |
| "learning_rate": 9.730783724400005e-07, |
| "loss": 0.1864, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.5970149253731343, |
| "grad_norm": 1.2599521105020628, |
| "learning_rate": 9.687399069888515e-07, |
| "loss": 0.1807, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.5979477611940298, |
| "grad_norm": 1.2254395494763746, |
| "learning_rate": 9.644100970130743e-07, |
| "loss": 0.1383, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.5988805970149254, |
| "grad_norm": 1.266780274181055, |
| "learning_rate": 9.600889518091572e-07, |
| "loss": 0.1634, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.599813432835821, |
| "grad_norm": 1.1862088701024427, |
| "learning_rate": 9.557764806549852e-07, |
| "loss": 0.1498, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.6007462686567164, |
| "grad_norm": 1.6649201506362419, |
| "learning_rate": 9.514726928098189e-07, |
| "loss": 0.1837, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.601679104477612, |
| "grad_norm": 1.234264729313629, |
| "learning_rate": 9.471775975142739e-07, |
| "loss": 0.1555, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.6026119402985075, |
| "grad_norm": 1.3098969463186163, |
| "learning_rate": 9.428912039903043e-07, |
| "loss": 0.1954, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.6035447761194028, |
| "grad_norm": 1.2922380419469448, |
| "learning_rate": 9.38613521441179e-07, |
| "loss": 0.1599, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.6044776119402986, |
| "grad_norm": 1.2490251437386872, |
| "learning_rate": 9.343445590514655e-07, |
| "loss": 0.1579, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.605410447761194, |
| "grad_norm": 1.2901488698253734, |
| "learning_rate": 9.300843259870063e-07, |
| "loss": 0.1648, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.6063432835820897, |
| "grad_norm": 1.2596444353650946, |
| "learning_rate": 9.258328313949039e-07, |
| "loss": 0.1771, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.607276119402985, |
| "grad_norm": 1.3134693635948327, |
| "learning_rate": 9.215900844034953e-07, |
| "loss": 0.2183, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.6082089552238807, |
| "grad_norm": 1.2504068109657847, |
| "learning_rate": 9.173560941223359e-07, |
| "loss": 0.1531, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.609141791044776, |
| "grad_norm": 1.2966735825237254, |
| "learning_rate": 9.131308696421825e-07, |
| "loss": 0.1723, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.6100746268656716, |
| "grad_norm": 1.2438087325878, |
| "learning_rate": 9.089144200349687e-07, |
| "loss": 0.1724, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.6110074626865671, |
| "grad_norm": 1.2142565513804016, |
| "learning_rate": 9.047067543537891e-07, |
| "loss": 0.1526, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.6119402985074627, |
| "grad_norm": 1.2410024286686867, |
| "learning_rate": 9.005078816328772e-07, |
| "loss": 0.1394, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.6128731343283582, |
| "grad_norm": 1.232244694202904, |
| "learning_rate": 8.963178108875886e-07, |
| "loss": 0.1573, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.6138059701492538, |
| "grad_norm": 1.2366886540270394, |
| "learning_rate": 8.92136551114377e-07, |
| "loss": 0.1699, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.6147388059701493, |
| "grad_norm": 1.195896184801038, |
| "learning_rate": 8.879641112907822e-07, |
| "loss": 0.1436, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.6156716417910446, |
| "grad_norm": 1.2275267806631487, |
| "learning_rate": 8.838005003754046e-07, |
| "loss": 0.1609, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.6166044776119404, |
| "grad_norm": 1.2746610225972304, |
| "learning_rate": 8.796457273078884e-07, |
| "loss": 0.1945, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.6175373134328357, |
| "grad_norm": 1.289578111770472, |
| "learning_rate": 8.754998010089033e-07, |
| "loss": 0.1872, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.6184701492537314, |
| "grad_norm": 1.2391204387890444, |
| "learning_rate": 8.713627303801237e-07, |
| "loss": 0.1821, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.6194029850746268, |
| "grad_norm": 1.247763928063891, |
| "learning_rate": 8.672345243042068e-07, |
| "loss": 0.1621, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.6203358208955225, |
| "grad_norm": 1.3225057066087762, |
| "learning_rate": 8.631151916447833e-07, |
| "loss": 0.2302, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.6212686567164178, |
| "grad_norm": 1.2664538479356695, |
| "learning_rate": 8.590047412464247e-07, |
| "loss": 0.1691, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.6222014925373134, |
| "grad_norm": 1.2197793802096346, |
| "learning_rate": 8.549031819346365e-07, |
| "loss": 0.1491, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.623134328358209, |
| "grad_norm": 1.1659712448306752, |
| "learning_rate": 8.50810522515833e-07, |
| "loss": 0.1362, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.6240671641791045, |
| "grad_norm": 1.2826109436963649, |
| "learning_rate": 8.467267717773198e-07, |
| "loss": 0.197, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 1.1964370900129955, |
| "learning_rate": 8.426519384872733e-07, |
| "loss": 0.147, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.6259328358208955, |
| "grad_norm": 1.2219216768473495, |
| "learning_rate": 8.385860313947269e-07, |
| "loss": 0.1452, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.626865671641791, |
| "grad_norm": 1.215418686931946, |
| "learning_rate": 8.345290592295429e-07, |
| "loss": 0.1388, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.6277985074626866, |
| "grad_norm": 1.2916191415428755, |
| "learning_rate": 8.304810307024041e-07, |
| "loss": 0.213, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.6287313432835822, |
| "grad_norm": 1.3474400193843503, |
| "learning_rate": 8.264419545047892e-07, |
| "loss": 0.2257, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.6296641791044775, |
| "grad_norm": 1.417866910801035, |
| "learning_rate": 8.224118393089553e-07, |
| "loss": 0.1822, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.6305970149253732, |
| "grad_norm": 1.2701405693061993, |
| "learning_rate": 8.183906937679214e-07, |
| "loss": 0.162, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.6315298507462686, |
| "grad_norm": 1.171354349701999, |
| "learning_rate": 8.143785265154436e-07, |
| "loss": 0.1493, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.6324626865671643, |
| "grad_norm": 1.245853361402978, |
| "learning_rate": 8.103753461660046e-07, |
| "loss": 0.137, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.6333955223880596, |
| "grad_norm": 1.2401681768660737, |
| "learning_rate": 8.063811613147888e-07, |
| "loss": 0.1545, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.6343283582089554, |
| "grad_norm": 1.3543115825906518, |
| "learning_rate": 8.02395980537668e-07, |
| "loss": 0.2262, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.6352611940298507, |
| "grad_norm": 1.2622941439543376, |
| "learning_rate": 7.984198123911819e-07, |
| "loss": 0.1854, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.6361940298507462, |
| "grad_norm": 1.2200368244742743, |
| "learning_rate": 7.944526654125184e-07, |
| "loss": 0.1715, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.6371268656716418, |
| "grad_norm": 1.306003439685902, |
| "learning_rate": 7.904945481194959e-07, |
| "loss": 0.1799, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.6380597014925373, |
| "grad_norm": 1.1903945099779447, |
| "learning_rate": 7.865454690105472e-07, |
| "loss": 0.1441, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.6389925373134329, |
| "grad_norm": 1.1424787110504189, |
| "learning_rate": 7.826054365646951e-07, |
| "loss": 0.1264, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.6399253731343284, |
| "grad_norm": 1.25777704993289, |
| "learning_rate": 7.786744592415429e-07, |
| "loss": 0.1925, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.640858208955224, |
| "grad_norm": 1.2658783455065246, |
| "learning_rate": 7.747525454812488e-07, |
| "loss": 0.1521, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.6417910447761193, |
| "grad_norm": 1.2307245795846298, |
| "learning_rate": 7.708397037045129e-07, |
| "loss": 0.1411, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.642723880597015, |
| "grad_norm": 1.2428854455488574, |
| "learning_rate": 7.669359423125555e-07, |
| "loss": 0.1727, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.6436567164179103, |
| "grad_norm": 1.257958809857627, |
| "learning_rate": 7.630412696871015e-07, |
| "loss": 0.1585, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.644589552238806, |
| "grad_norm": 1.2947143695235483, |
| "learning_rate": 7.591556941903605e-07, |
| "loss": 0.1912, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.6455223880597014, |
| "grad_norm": 1.259074792643218, |
| "learning_rate": 7.552792241650081e-07, |
| "loss": 0.182, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.6464552238805972, |
| "grad_norm": 1.214636998645944, |
| "learning_rate": 7.514118679341737e-07, |
| "loss": 0.1533, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.6473880597014925, |
| "grad_norm": 1.1789528360329278, |
| "learning_rate": 7.475536338014156e-07, |
| "loss": 0.1592, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.648320895522388, |
| "grad_norm": 1.2385211364305988, |
| "learning_rate": 7.437045300507068e-07, |
| "loss": 0.1878, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.6492537313432836, |
| "grad_norm": 1.2464420700757242, |
| "learning_rate": 7.398645649464175e-07, |
| "loss": 0.1724, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.650186567164179, |
| "grad_norm": 1.2615694365379468, |
| "learning_rate": 7.360337467332968e-07, |
| "loss": 0.1751, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.6511194029850746, |
| "grad_norm": 1.2024654335089155, |
| "learning_rate": 7.322120836364504e-07, |
| "loss": 0.1368, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.6520522388059702, |
| "grad_norm": 1.2616960564996835, |
| "learning_rate": 7.283995838613323e-07, |
| "loss": 0.1651, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.6529850746268657, |
| "grad_norm": 1.2711287773276967, |
| "learning_rate": 7.245962555937192e-07, |
| "loss": 0.1482, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.653917910447761, |
| "grad_norm": 1.1648789458296942, |
| "learning_rate": 7.208021069996962e-07, |
| "loss": 0.1429, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.6548507462686568, |
| "grad_norm": 1.331743388691754, |
| "learning_rate": 7.170171462256404e-07, |
| "loss": 0.1576, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.6557835820895521, |
| "grad_norm": 1.2220059997930557, |
| "learning_rate": 7.132413813982003e-07, |
| "loss": 0.1585, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.6567164179104479, |
| "grad_norm": 1.277416037893367, |
| "learning_rate": 7.094748206242797e-07, |
| "loss": 0.1613, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.6576492537313432, |
| "grad_norm": 1.2930735979309478, |
| "learning_rate": 7.057174719910198e-07, |
| "loss": 0.1347, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.658582089552239, |
| "grad_norm": 1.3009220963894073, |
| "learning_rate": 7.019693435657848e-07, |
| "loss": 0.1604, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.6595149253731343, |
| "grad_norm": 1.2534234521523646, |
| "learning_rate": 6.982304433961406e-07, |
| "loss": 0.1725, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.6604477611940298, |
| "grad_norm": 1.1749339898084066, |
| "learning_rate": 6.945007795098402e-07, |
| "loss": 0.1303, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.6613805970149254, |
| "grad_norm": 1.1501782104693064, |
| "learning_rate": 6.907803599148049e-07, |
| "loss": 0.1569, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.662313432835821, |
| "grad_norm": 1.3411460520884892, |
| "learning_rate": 6.870691925991085e-07, |
| "loss": 0.201, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.6632462686567164, |
| "grad_norm": 1.2992658970414581, |
| "learning_rate": 6.833672855309565e-07, |
| "loss": 0.1711, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.664179104477612, |
| "grad_norm": 1.3023872730004322, |
| "learning_rate": 6.796746466586757e-07, |
| "loss": 0.1797, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.6651119402985075, |
| "grad_norm": 1.3168878099360892, |
| "learning_rate": 6.759912839106908e-07, |
| "loss": 0.2034, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.6660447761194028, |
| "grad_norm": 1.2482529228619816, |
| "learning_rate": 6.723172051955102e-07, |
| "loss": 0.1287, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.6669776119402986, |
| "grad_norm": 1.1644284096985102, |
| "learning_rate": 6.686524184017102e-07, |
| "loss": 0.1452, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.667910447761194, |
| "grad_norm": 1.2217954755057625, |
| "learning_rate": 6.649969313979149e-07, |
| "loss": 0.1605, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.6688432835820897, |
| "grad_norm": 1.3257197552048248, |
| "learning_rate": 6.613507520327811e-07, |
| "loss": 0.1746, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.669776119402985, |
| "grad_norm": 1.267615190897561, |
| "learning_rate": 6.577138881349804e-07, |
| "loss": 0.1812, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.6707089552238807, |
| "grad_norm": 1.262028394959287, |
| "learning_rate": 6.540863475131853e-07, |
| "loss": 0.1584, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.671641791044776, |
| "grad_norm": 1.3174514462210885, |
| "learning_rate": 6.50468137956049e-07, |
| "loss": 0.1853, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.6725746268656716, |
| "grad_norm": 1.2558940023330483, |
| "learning_rate": 6.468592672321905e-07, |
| "loss": 0.1731, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.6735074626865671, |
| "grad_norm": 1.4215053238505322, |
| "learning_rate": 6.432597430901782e-07, |
| "loss": 0.13, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.6744402985074627, |
| "grad_norm": 1.3440130764969576, |
| "learning_rate": 6.396695732585123e-07, |
| "loss": 0.2222, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.6753731343283582, |
| "grad_norm": 1.2110671300633844, |
| "learning_rate": 6.360887654456066e-07, |
| "loss": 0.144, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.6763059701492538, |
| "grad_norm": 1.2272021955175507, |
| "learning_rate": 6.32517327339775e-07, |
| "loss": 0.1471, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.6772388059701493, |
| "grad_norm": 1.2403806067786642, |
| "learning_rate": 6.289552666092153e-07, |
| "loss": 0.2007, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.6781716417910446, |
| "grad_norm": 1.263065524151067, |
| "learning_rate": 6.254025909019889e-07, |
| "loss": 0.1721, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.6791044776119404, |
| "grad_norm": 1.2887727320735014, |
| "learning_rate": 6.218593078460084e-07, |
| "loss": 0.1627, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.6800373134328357, |
| "grad_norm": 1.2164111533573003, |
| "learning_rate": 6.183254250490195e-07, |
| "loss": 0.1498, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.6809701492537314, |
| "grad_norm": 1.318078297981106, |
| "learning_rate": 6.14800950098583e-07, |
| "loss": 0.2064, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.6819029850746268, |
| "grad_norm": 1.2598847937787256, |
| "learning_rate": 6.112858905620622e-07, |
| "loss": 0.1608, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.6828358208955225, |
| "grad_norm": 1.2677604291651299, |
| "learning_rate": 6.077802539866023e-07, |
| "loss": 0.1731, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.6837686567164178, |
| "grad_norm": 1.2687851660876244, |
| "learning_rate": 6.042840478991185e-07, |
| "loss": 0.1781, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.6847014925373134, |
| "grad_norm": 1.2394459442409977, |
| "learning_rate": 6.007972798062783e-07, |
| "loss": 0.1636, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.685634328358209, |
| "grad_norm": 1.2469576957445645, |
| "learning_rate": 5.973199571944843e-07, |
| "loss": 0.1775, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.6865671641791045, |
| "grad_norm": 1.2306891342872057, |
| "learning_rate": 5.938520875298587e-07, |
| "loss": 0.1813, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.6875, |
| "grad_norm": 1.177667304477984, |
| "learning_rate": 5.903936782582253e-07, |
| "loss": 0.1528, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.6884328358208955, |
| "grad_norm": 1.2786596288229926, |
| "learning_rate": 5.869447368050995e-07, |
| "loss": 0.1772, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.689365671641791, |
| "grad_norm": 1.2230138861852573, |
| "learning_rate": 5.835052705756661e-07, |
| "loss": 0.1283, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.6902985074626866, |
| "grad_norm": 1.193870531656724, |
| "learning_rate": 5.80075286954766e-07, |
| "loss": 0.1447, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.6912313432835822, |
| "grad_norm": 1.2821363975078859, |
| "learning_rate": 5.766547933068806e-07, |
| "loss": 0.1789, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.6921641791044775, |
| "grad_norm": 1.3215922151226887, |
| "learning_rate": 5.732437969761156e-07, |
| "loss": 0.1793, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.6930970149253732, |
| "grad_norm": 1.2320951636990367, |
| "learning_rate": 5.698423052861835e-07, |
| "loss": 0.1639, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.6940298507462686, |
| "grad_norm": 1.2797531268301714, |
| "learning_rate": 5.664503255403925e-07, |
| "loss": 0.1727, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.6949626865671643, |
| "grad_norm": 1.252803455484566, |
| "learning_rate": 5.630678650216236e-07, |
| "loss": 0.1457, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.6958955223880596, |
| "grad_norm": 1.2726809083765898, |
| "learning_rate": 5.596949309923233e-07, |
| "loss": 0.1768, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.6968283582089554, |
| "grad_norm": 1.2854257510532858, |
| "learning_rate": 5.56331530694481e-07, |
| "loss": 0.1713, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.6977611940298507, |
| "grad_norm": 1.155844470522867, |
| "learning_rate": 5.529776713496182e-07, |
| "loss": 0.1391, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.6986940298507462, |
| "grad_norm": 1.2116496183532584, |
| "learning_rate": 5.496333601587711e-07, |
| "loss": 0.1637, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.6996268656716418, |
| "grad_norm": 1.3656652490874133, |
| "learning_rate": 5.462986043024726e-07, |
| "loss": 0.2023, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.7005597014925373, |
| "grad_norm": 1.2681746668842202, |
| "learning_rate": 5.429734109407426e-07, |
| "loss": 0.1873, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.7014925373134329, |
| "grad_norm": 1.2277116009701676, |
| "learning_rate": 5.396577872130676e-07, |
| "loss": 0.164, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.7024253731343284, |
| "grad_norm": 1.2089056006264882, |
| "learning_rate": 5.363517402383878e-07, |
| "loss": 0.1709, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.703358208955224, |
| "grad_norm": 1.3297359993443203, |
| "learning_rate": 5.330552771150821e-07, |
| "loss": 0.1505, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.7042910447761193, |
| "grad_norm": 1.1615752359721518, |
| "learning_rate": 5.297684049209511e-07, |
| "loss": 0.148, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.705223880597015, |
| "grad_norm": 1.2464968950420923, |
| "learning_rate": 5.264911307132009e-07, |
| "loss": 0.1622, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.7061567164179103, |
| "grad_norm": 1.2751631812112247, |
| "learning_rate": 5.232234615284337e-07, |
| "loss": 0.1937, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.707089552238806, |
| "grad_norm": 1.2576047652349769, |
| "learning_rate": 5.19965404382628e-07, |
| "loss": 0.1591, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.7080223880597014, |
| "grad_norm": 1.311492145110782, |
| "learning_rate": 5.167169662711202e-07, |
| "loss": 0.2102, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.7089552238805972, |
| "grad_norm": 1.188329714322775, |
| "learning_rate": 5.134781541685996e-07, |
| "loss": 0.1432, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.7098880597014925, |
| "grad_norm": 1.3440732304941392, |
| "learning_rate": 5.102489750290834e-07, |
| "loss": 0.1802, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.710820895522388, |
| "grad_norm": 1.1861906294896165, |
| "learning_rate": 5.070294357859096e-07, |
| "loss": 0.1459, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.7117537313432836, |
| "grad_norm": 1.2223465113629186, |
| "learning_rate": 5.03819543351714e-07, |
| "loss": 0.1553, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.712686567164179, |
| "grad_norm": 1.2770412401865483, |
| "learning_rate": 5.006193046184238e-07, |
| "loss": 0.1642, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.7136194029850746, |
| "grad_norm": 1.3033610076093305, |
| "learning_rate": 4.974287264572363e-07, |
| "loss": 0.1764, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.7145522388059702, |
| "grad_norm": 1.2497589447053674, |
| "learning_rate": 4.942478157186087e-07, |
| "loss": 0.1516, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.7154850746268657, |
| "grad_norm": 1.2455585184268392, |
| "learning_rate": 4.910765792322397e-07, |
| "loss": 0.1584, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.716417910447761, |
| "grad_norm": 1.2460420142287991, |
| "learning_rate": 4.879150238070585e-07, |
| "loss": 0.1731, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.7173507462686568, |
| "grad_norm": 1.3555355069639567, |
| "learning_rate": 4.847631562312049e-07, |
| "loss": 0.2119, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.7182835820895521, |
| "grad_norm": 1.231681728249583, |
| "learning_rate": 4.816209832720214e-07, |
| "loss": 0.1495, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.7192164179104479, |
| "grad_norm": 1.2514926193302185, |
| "learning_rate": 4.78488511676034e-07, |
| "loss": 0.1538, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.7201492537313432, |
| "grad_norm": 1.198768294078837, |
| "learning_rate": 4.753657481689372e-07, |
| "loss": 0.1411, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.721082089552239, |
| "grad_norm": 1.242098430941121, |
| "learning_rate": 4.7225269945558483e-07, |
| "loss": 0.1454, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.7220149253731343, |
| "grad_norm": 1.2184578344056238, |
| "learning_rate": 4.691493722199697e-07, |
| "loss": 0.1602, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.7229477611940298, |
| "grad_norm": 1.2127849041273684, |
| "learning_rate": 4.6605577312521354e-07, |
| "loss": 0.1701, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.7238805970149254, |
| "grad_norm": 1.1627994701266557, |
| "learning_rate": 4.6297190881354816e-07, |
| "loss": 0.1255, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.724813432835821, |
| "grad_norm": 1.1910213399847986, |
| "learning_rate": 4.598977859063064e-07, |
| "loss": 0.1268, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.7257462686567164, |
| "grad_norm": 1.3187420615246166, |
| "learning_rate": 4.5683341100390464e-07, |
| "loss": 0.2062, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.726679104477612, |
| "grad_norm": 1.2321784422916793, |
| "learning_rate": 4.537787906858293e-07, |
| "loss": 0.1689, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.7276119402985075, |
| "grad_norm": 1.1635538050550778, |
| "learning_rate": 4.507339315106235e-07, |
| "loss": 0.1239, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.7285447761194028, |
| "grad_norm": 1.257652215638962, |
| "learning_rate": 4.476988400158716e-07, |
| "loss": 0.1611, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.7294776119402986, |
| "grad_norm": 1.245129913597949, |
| "learning_rate": 4.446735227181853e-07, |
| "loss": 0.1574, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.730410447761194, |
| "grad_norm": 1.1901118742002041, |
| "learning_rate": 4.4165798611319145e-07, |
| "loss": 0.1337, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.7313432835820897, |
| "grad_norm": 1.2406972967645573, |
| "learning_rate": 4.386522366755169e-07, |
| "loss": 0.1676, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.732276119402985, |
| "grad_norm": 1.2215382055435324, |
| "learning_rate": 4.3565628085877275e-07, |
| "loss": 0.1536, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.7332089552238807, |
| "grad_norm": 1.2560948722737746, |
| "learning_rate": 4.326701250955445e-07, |
| "loss": 0.1743, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.734141791044776, |
| "grad_norm": 1.2253387185011415, |
| "learning_rate": 4.296937757973757e-07, |
| "loss": 0.1426, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.7350746268656716, |
| "grad_norm": 1.2013816474289636, |
| "learning_rate": 4.267272393547539e-07, |
| "loss": 0.1829, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.7360074626865671, |
| "grad_norm": 1.2876379806867537, |
| "learning_rate": 4.2377052213709634e-07, |
| "loss": 0.1818, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.7369402985074627, |
| "grad_norm": 1.233354531426832, |
| "learning_rate": 4.208236304927404e-07, |
| "loss": 0.175, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.7378731343283582, |
| "grad_norm": 1.1418723680307974, |
| "learning_rate": 4.178865707489249e-07, |
| "loss": 0.1326, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.7388059701492538, |
| "grad_norm": 1.267791274500478, |
| "learning_rate": 4.149593492117793e-07, |
| "loss": 0.1702, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.7397388059701493, |
| "grad_norm": 1.2692321911248439, |
| "learning_rate": 4.120419721663099e-07, |
| "loss": 0.1496, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.7406716417910446, |
| "grad_norm": 1.2588803625643528, |
| "learning_rate": 4.091344458763863e-07, |
| "loss": 0.1832, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.7416044776119404, |
| "grad_norm": 1.1759139757933372, |
| "learning_rate": 4.062367765847258e-07, |
| "loss": 0.1531, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.7425373134328357, |
| "grad_norm": 1.2610912446849163, |
| "learning_rate": 4.03348970512884e-07, |
| "loss": 0.1975, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.7434701492537314, |
| "grad_norm": 1.3085651313701954, |
| "learning_rate": 4.0047103386123777e-07, |
| "loss": 0.1918, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.7444029850746268, |
| "grad_norm": 1.2771664493168298, |
| "learning_rate": 3.9760297280897533e-07, |
| "loss": 0.1661, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.7453358208955225, |
| "grad_norm": 1.2370508412489076, |
| "learning_rate": 3.9474479351407803e-07, |
| "loss": 0.1897, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.7462686567164178, |
| "grad_norm": 1.237915742041455, |
| "learning_rate": 3.918965021133131e-07, |
| "loss": 0.1662, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.7472014925373134, |
| "grad_norm": 1.1964733189715644, |
| "learning_rate": 3.8905810472221636e-07, |
| "loss": 0.1272, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.748134328358209, |
| "grad_norm": 1.2441657107867947, |
| "learning_rate": 3.8622960743508074e-07, |
| "loss": 0.1636, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.7490671641791045, |
| "grad_norm": 1.2898967656039175, |
| "learning_rate": 3.834110163249416e-07, |
| "loss": 0.1683, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.1520914532051287, |
| "learning_rate": 3.8060233744356634e-07, |
| "loss": 0.1205, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.7509328358208955, |
| "grad_norm": 1.2564727530163406, |
| "learning_rate": 3.7780357682143943e-07, |
| "loss": 0.1795, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.751865671641791, |
| "grad_norm": 1.2881573951266523, |
| "learning_rate": 3.75014740467749e-07, |
| "loss": 0.1693, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.7527985074626866, |
| "grad_norm": 1.2480429343512909, |
| "learning_rate": 3.72235834370378e-07, |
| "loss": 0.1742, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.7537313432835822, |
| "grad_norm": 1.2839924896207722, |
| "learning_rate": 3.6946686449588267e-07, |
| "loss": 0.2061, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.7546641791044775, |
| "grad_norm": 1.2061279903283946, |
| "learning_rate": 3.667078367894905e-07, |
| "loss": 0.1544, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.7555970149253732, |
| "grad_norm": 1.2372284741245994, |
| "learning_rate": 3.639587571750802e-07, |
| "loss": 0.1809, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.7565298507462686, |
| "grad_norm": 1.3036907322224398, |
| "learning_rate": 3.612196315551719e-07, |
| "loss": 0.1795, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.7574626865671643, |
| "grad_norm": 1.2616668191135232, |
| "learning_rate": 3.584904658109106e-07, |
| "loss": 0.1465, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.7583955223880596, |
| "grad_norm": 1.2097261595981943, |
| "learning_rate": 3.557712658020607e-07, |
| "loss": 0.1241, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.7593283582089554, |
| "grad_norm": 1.1598713960365468, |
| "learning_rate": 3.5306203736698686e-07, |
| "loss": 0.1155, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.7602611940298507, |
| "grad_norm": 1.1581715369378212, |
| "learning_rate": 3.503627863226455e-07, |
| "loss": 0.1302, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.7611940298507462, |
| "grad_norm": 1.3721765172969267, |
| "learning_rate": 3.4767351846456744e-07, |
| "loss": 0.1587, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.7621268656716418, |
| "grad_norm": 1.3747591450318046, |
| "learning_rate": 3.4499423956685207e-07, |
| "loss": 0.1997, |
| "step": 1889 |
| }, |
| { |
| "epoch": 1.7630597014925373, |
| "grad_norm": 1.3283176899587907, |
| "learning_rate": 3.423249553821506e-07, |
| "loss": 0.1867, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.7639925373134329, |
| "grad_norm": 1.2148518852704886, |
| "learning_rate": 3.3966567164165466e-07, |
| "loss": 0.1451, |
| "step": 1891 |
| }, |
| { |
| "epoch": 1.7649253731343284, |
| "grad_norm": 1.2376362058041783, |
| "learning_rate": 3.37016394055083e-07, |
| "loss": 0.1547, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.765858208955224, |
| "grad_norm": 1.2196305296400547, |
| "learning_rate": 3.343771283106728e-07, |
| "loss": 0.165, |
| "step": 1893 |
| }, |
| { |
| "epoch": 1.7667910447761193, |
| "grad_norm": 1.2217109193774467, |
| "learning_rate": 3.3174788007516166e-07, |
| "loss": 0.133, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.767723880597015, |
| "grad_norm": 1.2313212809615608, |
| "learning_rate": 3.2912865499378053e-07, |
| "loss": 0.1593, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.7686567164179103, |
| "grad_norm": 1.3465981871950774, |
| "learning_rate": 3.2651945869024035e-07, |
| "loss": 0.2268, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.769589552238806, |
| "grad_norm": 1.369769762821453, |
| "learning_rate": 3.239202967667182e-07, |
| "loss": 0.1979, |
| "step": 1897 |
| }, |
| { |
| "epoch": 1.7705223880597014, |
| "grad_norm": 1.2266535787952872, |
| "learning_rate": 3.2133117480384613e-07, |
| "loss": 0.1525, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.7714552238805972, |
| "grad_norm": 1.2029055250352059, |
| "learning_rate": 3.187520983607012e-07, |
| "loss": 0.1475, |
| "step": 1899 |
| }, |
| { |
| "epoch": 1.7723880597014925, |
| "grad_norm": 1.234808266249341, |
| "learning_rate": 3.1618307297479055e-07, |
| "loss": 0.1599, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.773320895522388, |
| "grad_norm": 1.2473184516361606, |
| "learning_rate": 3.1362410416204024e-07, |
| "loss": 0.1469, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.7742537313432836, |
| "grad_norm": 1.2725038653967082, |
| "learning_rate": 3.1107519741678526e-07, |
| "loss": 0.1736, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.775186567164179, |
| "grad_norm": 1.209842686248433, |
| "learning_rate": 3.0853635821175676e-07, |
| "loss": 0.1636, |
| "step": 1903 |
| }, |
| { |
| "epoch": 1.7761194029850746, |
| "grad_norm": 1.2386320385449814, |
| "learning_rate": 3.0600759199806815e-07, |
| "loss": 0.1648, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.7770522388059702, |
| "grad_norm": 1.2365815677283971, |
| "learning_rate": 3.0348890420520693e-07, |
| "loss": 0.1828, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.7779850746268657, |
| "grad_norm": 1.2932917260184533, |
| "learning_rate": 3.0098030024102107e-07, |
| "loss": 0.1664, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.778917910447761, |
| "grad_norm": 1.2884677497634525, |
| "learning_rate": 2.9848178549170604e-07, |
| "loss": 0.1549, |
| "step": 1907 |
| }, |
| { |
| "epoch": 1.7798507462686568, |
| "grad_norm": 1.1655230428805, |
| "learning_rate": 2.959933653217967e-07, |
| "loss": 0.1309, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.7807835820895521, |
| "grad_norm": 1.2142649182178054, |
| "learning_rate": 2.9351504507415305e-07, |
| "loss": 0.1602, |
| "step": 1909 |
| }, |
| { |
| "epoch": 1.7817164179104479, |
| "grad_norm": 1.2328295938780174, |
| "learning_rate": 2.9104683006995147e-07, |
| "loss": 0.1764, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.7826492537313432, |
| "grad_norm": 1.2276281692637927, |
| "learning_rate": 2.885887256086678e-07, |
| "loss": 0.157, |
| "step": 1911 |
| }, |
| { |
| "epoch": 1.783582089552239, |
| "grad_norm": 1.2275484894679003, |
| "learning_rate": 2.8614073696807297e-07, |
| "loss": 0.157, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.7845149253731343, |
| "grad_norm": 1.2993174273770114, |
| "learning_rate": 2.837028694042182e-07, |
| "loss": 0.1674, |
| "step": 1913 |
| }, |
| { |
| "epoch": 1.7854477611940298, |
| "grad_norm": 1.1840625313782969, |
| "learning_rate": 2.812751281514203e-07, |
| "loss": 0.1391, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.7863805970149254, |
| "grad_norm": 1.255504543240045, |
| "learning_rate": 2.7885751842225804e-07, |
| "loss": 0.157, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.787313432835821, |
| "grad_norm": 1.1684515816146364, |
| "learning_rate": 2.7645004540755527e-07, |
| "loss": 0.1556, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.7882462686567164, |
| "grad_norm": 1.3063847045432126, |
| "learning_rate": 2.74052714276371e-07, |
| "loss": 0.1968, |
| "step": 1917 |
| }, |
| { |
| "epoch": 1.789179104477612, |
| "grad_norm": 1.209985499231558, |
| "learning_rate": 2.716655301759902e-07, |
| "loss": 0.127, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.7901119402985075, |
| "grad_norm": 1.4224348466493586, |
| "learning_rate": 2.6928849823190995e-07, |
| "loss": 0.2183, |
| "step": 1919 |
| }, |
| { |
| "epoch": 1.7910447761194028, |
| "grad_norm": 1.2695172844316536, |
| "learning_rate": 2.669216235478295e-07, |
| "loss": 0.1668, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.7919776119402986, |
| "grad_norm": 1.2524223778087493, |
| "learning_rate": 2.6456491120564034e-07, |
| "loss": 0.1598, |
| "step": 1921 |
| }, |
| { |
| "epoch": 1.792910447761194, |
| "grad_norm": 1.2473601964151018, |
| "learning_rate": 2.622183662654143e-07, |
| "loss": 0.1733, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.7938432835820897, |
| "grad_norm": 1.222898030106351, |
| "learning_rate": 2.59881993765394e-07, |
| "loss": 0.1536, |
| "step": 1923 |
| }, |
| { |
| "epoch": 1.794776119402985, |
| "grad_norm": 1.3084287010523616, |
| "learning_rate": 2.575557987219784e-07, |
| "loss": 0.1912, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.7957089552238807, |
| "grad_norm": 1.3342414967971448, |
| "learning_rate": 2.5523978612971623e-07, |
| "loss": 0.1827, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.796641791044776, |
| "grad_norm": 1.2968683363611444, |
| "learning_rate": 2.529339609612941e-07, |
| "loss": 0.1587, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.7975746268656716, |
| "grad_norm": 1.2929590002031308, |
| "learning_rate": 2.506383281675229e-07, |
| "loss": 0.1542, |
| "step": 1927 |
| }, |
| { |
| "epoch": 1.7985074626865671, |
| "grad_norm": 1.2679707784255936, |
| "learning_rate": 2.4835289267733263e-07, |
| "loss": 0.166, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.7994402985074627, |
| "grad_norm": 1.260213173889151, |
| "learning_rate": 2.4607765939775706e-07, |
| "loss": 0.1841, |
| "step": 1929 |
| }, |
| { |
| "epoch": 1.8003731343283582, |
| "grad_norm": 1.20471851282945, |
| "learning_rate": 2.4381263321392514e-07, |
| "loss": 0.1426, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.8013059701492538, |
| "grad_norm": 1.2303747247753642, |
| "learning_rate": 2.415578189890505e-07, |
| "loss": 0.1621, |
| "step": 1931 |
| }, |
| { |
| "epoch": 1.8022388059701493, |
| "grad_norm": 1.2649296325203112, |
| "learning_rate": 2.3931322156442117e-07, |
| "loss": 0.1662, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.8031716417910446, |
| "grad_norm": 1.163644370815951, |
| "learning_rate": 2.3707884575938645e-07, |
| "loss": 0.1435, |
| "step": 1933 |
| }, |
| { |
| "epoch": 1.8041044776119404, |
| "grad_norm": 1.3676345128723275, |
| "learning_rate": 2.348546963713516e-07, |
| "loss": 0.244, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.8050373134328357, |
| "grad_norm": 1.365023911240722, |
| "learning_rate": 2.3264077817576446e-07, |
| "loss": 0.2213, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.8059701492537314, |
| "grad_norm": 1.2918235332781236, |
| "learning_rate": 2.3043709592610486e-07, |
| "loss": 0.1521, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.8069029850746268, |
| "grad_norm": 1.30356262574336, |
| "learning_rate": 2.2824365435387573e-07, |
| "loss": 0.1681, |
| "step": 1937 |
| }, |
| { |
| "epoch": 1.8078358208955225, |
| "grad_norm": 1.254062316141817, |
| "learning_rate": 2.2606045816859047e-07, |
| "loss": 0.1786, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.8087686567164178, |
| "grad_norm": 1.0726060509074782, |
| "learning_rate": 2.2388751205776826e-07, |
| "loss": 0.1126, |
| "step": 1939 |
| }, |
| { |
| "epoch": 1.8097014925373134, |
| "grad_norm": 1.1087652730174822, |
| "learning_rate": 2.2172482068691658e-07, |
| "loss": 0.1472, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.810634328358209, |
| "grad_norm": 1.2963637098299603, |
| "learning_rate": 2.1957238869952767e-07, |
| "loss": 0.2098, |
| "step": 1941 |
| }, |
| { |
| "epoch": 1.8115671641791045, |
| "grad_norm": 1.2253863654446437, |
| "learning_rate": 2.174302207170653e-07, |
| "loss": 0.1477, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.8125, |
| "grad_norm": 1.2721837097524327, |
| "learning_rate": 2.152983213389559e-07, |
| "loss": 0.1817, |
| "step": 1943 |
| }, |
| { |
| "epoch": 1.8134328358208955, |
| "grad_norm": 1.2653379590253169, |
| "learning_rate": 2.1317669514257678e-07, |
| "loss": 0.1601, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.814365671641791, |
| "grad_norm": 1.2482190972474847, |
| "learning_rate": 2.1106534668324963e-07, |
| "loss": 0.1583, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.8152985074626866, |
| "grad_norm": 1.219174084940544, |
| "learning_rate": 2.0896428049422768e-07, |
| "loss": 0.1655, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.8162313432835822, |
| "grad_norm": 1.2610747580499384, |
| "learning_rate": 2.0687350108668736e-07, |
| "loss": 0.1968, |
| "step": 1947 |
| }, |
| { |
| "epoch": 1.8171641791044775, |
| "grad_norm": 1.201167381487851, |
| "learning_rate": 2.0479301294971943e-07, |
| "loss": 0.1701, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.8180970149253732, |
| "grad_norm": 1.291343405885453, |
| "learning_rate": 2.0272282055031677e-07, |
| "loss": 0.1707, |
| "step": 1949 |
| }, |
| { |
| "epoch": 1.8190298507462686, |
| "grad_norm": 1.266129430543222, |
| "learning_rate": 2.006629283333694e-07, |
| "loss": 0.1795, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.8199626865671643, |
| "grad_norm": 1.195201708186078, |
| "learning_rate": 1.986133407216473e-07, |
| "loss": 0.137, |
| "step": 1951 |
| }, |
| { |
| "epoch": 1.8208955223880596, |
| "grad_norm": 1.286182662110936, |
| "learning_rate": 1.9657406211579966e-07, |
| "loss": 0.1586, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.8218283582089554, |
| "grad_norm": 1.2380020660793205, |
| "learning_rate": 1.9454509689433855e-07, |
| "loss": 0.1711, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.8227611940298507, |
| "grad_norm": 1.1637093894903545, |
| "learning_rate": 1.925264494136342e-07, |
| "loss": 0.1575, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.8236940298507462, |
| "grad_norm": 1.2883483299940546, |
| "learning_rate": 1.9051812400790294e-07, |
| "loss": 0.181, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.8246268656716418, |
| "grad_norm": 1.3248486280708933, |
| "learning_rate": 1.885201249891988e-07, |
| "loss": 0.1752, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.8255597014925373, |
| "grad_norm": 1.1699957820679514, |
| "learning_rate": 1.8653245664740415e-07, |
| "loss": 0.1336, |
| "step": 1957 |
| }, |
| { |
| "epoch": 1.8264925373134329, |
| "grad_norm": 1.217975885085646, |
| "learning_rate": 1.8455512325022073e-07, |
| "loss": 0.153, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.8274253731343284, |
| "grad_norm": 1.3227505872894074, |
| "learning_rate": 1.825881290431586e-07, |
| "loss": 0.2105, |
| "step": 1959 |
| }, |
| { |
| "epoch": 1.828358208955224, |
| "grad_norm": 1.3954681031280969, |
| "learning_rate": 1.806314782495311e-07, |
| "loss": 0.2267, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.8292910447761193, |
| "grad_norm": 1.1897675857272247, |
| "learning_rate": 1.7868517507044158e-07, |
| "loss": 0.1472, |
| "step": 1961 |
| }, |
| { |
| "epoch": 1.830223880597015, |
| "grad_norm": 1.251569228039328, |
| "learning_rate": 1.7674922368477675e-07, |
| "loss": 0.1474, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.8311567164179103, |
| "grad_norm": 1.2872655044907908, |
| "learning_rate": 1.7482362824919773e-07, |
| "loss": 0.1792, |
| "step": 1963 |
| }, |
| { |
| "epoch": 1.832089552238806, |
| "grad_norm": 1.293037384768504, |
| "learning_rate": 1.7290839289813065e-07, |
| "loss": 0.1752, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.8330223880597014, |
| "grad_norm": 1.2881640964829002, |
| "learning_rate": 1.71003521743755e-07, |
| "loss": 0.1867, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.8339552238805972, |
| "grad_norm": 1.3204633484140118, |
| "learning_rate": 1.6910901887599917e-07, |
| "loss": 0.1995, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.8348880597014925, |
| "grad_norm": 1.2679285220183314, |
| "learning_rate": 1.6722488836253104e-07, |
| "loss": 0.1661, |
| "step": 1967 |
| }, |
| { |
| "epoch": 1.835820895522388, |
| "grad_norm": 1.2423532391293808, |
| "learning_rate": 1.6535113424874683e-07, |
| "loss": 0.1732, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.8367537313432836, |
| "grad_norm": 1.3796806156601502, |
| "learning_rate": 1.6348776055776393e-07, |
| "loss": 0.1944, |
| "step": 1969 |
| }, |
| { |
| "epoch": 1.837686567164179, |
| "grad_norm": 1.2902379816793106, |
| "learning_rate": 1.6163477129041204e-07, |
| "loss": 0.175, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.8386194029850746, |
| "grad_norm": 1.2232784850810066, |
| "learning_rate": 1.5979217042522477e-07, |
| "loss": 0.129, |
| "step": 1971 |
| }, |
| { |
| "epoch": 1.8395522388059702, |
| "grad_norm": 1.2805197763508278, |
| "learning_rate": 1.5795996191842966e-07, |
| "loss": 0.1466, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.8404850746268657, |
| "grad_norm": 1.2795435182988995, |
| "learning_rate": 1.561381497039427e-07, |
| "loss": 0.1676, |
| "step": 1973 |
| }, |
| { |
| "epoch": 1.841417910447761, |
| "grad_norm": 1.1479715654839133, |
| "learning_rate": 1.5432673769335772e-07, |
| "loss": 0.1318, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.8423507462686568, |
| "grad_norm": 1.2255765389368125, |
| "learning_rate": 1.525257297759375e-07, |
| "loss": 0.1718, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.8432835820895521, |
| "grad_norm": 1.232347780038549, |
| "learning_rate": 1.5073512981860715e-07, |
| "loss": 0.1555, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.8442164179104479, |
| "grad_norm": 1.186929078251592, |
| "learning_rate": 1.4895494166594527e-07, |
| "loss": 0.1489, |
| "step": 1977 |
| }, |
| { |
| "epoch": 1.8451492537313432, |
| "grad_norm": 1.3533328231403003, |
| "learning_rate": 1.4718516914017433e-07, |
| "loss": 0.1932, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.846082089552239, |
| "grad_norm": 1.2033823427053365, |
| "learning_rate": 1.4542581604115258e-07, |
| "loss": 0.1388, |
| "step": 1979 |
| }, |
| { |
| "epoch": 1.8470149253731343, |
| "grad_norm": 1.2084325834974623, |
| "learning_rate": 1.4367688614637e-07, |
| "loss": 0.1705, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.8479477611940298, |
| "grad_norm": 1.2639200006890643, |
| "learning_rate": 1.4193838321093444e-07, |
| "loss": 0.1916, |
| "step": 1981 |
| }, |
| { |
| "epoch": 1.8488805970149254, |
| "grad_norm": 1.249684340310944, |
| "learning_rate": 1.4021031096756676e-07, |
| "loss": 0.1602, |
| "step": 1982 |
| }, |
| { |
| "epoch": 1.849813432835821, |
| "grad_norm": 1.2532245207528903, |
| "learning_rate": 1.3849267312659286e-07, |
| "loss": 0.1522, |
| "step": 1983 |
| }, |
| { |
| "epoch": 1.8507462686567164, |
| "grad_norm": 1.2884244986962023, |
| "learning_rate": 1.3678547337593494e-07, |
| "loss": 0.1602, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.851679104477612, |
| "grad_norm": 1.2943988387183982, |
| "learning_rate": 1.3508871538110257e-07, |
| "loss": 0.2043, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.8526119402985075, |
| "grad_norm": 1.1569657792604953, |
| "learning_rate": 1.3340240278518657e-07, |
| "loss": 0.1275, |
| "step": 1986 |
| }, |
| { |
| "epoch": 1.8535447761194028, |
| "grad_norm": 1.1921267259347368, |
| "learning_rate": 1.317265392088507e-07, |
| "loss": 0.137, |
| "step": 1987 |
| }, |
| { |
| "epoch": 1.8544776119402986, |
| "grad_norm": 1.301171410793594, |
| "learning_rate": 1.3006112825032447e-07, |
| "loss": 0.191, |
| "step": 1988 |
| }, |
| { |
| "epoch": 1.855410447761194, |
| "grad_norm": 1.2573536199515079, |
| "learning_rate": 1.284061734853931e-07, |
| "loss": 0.1457, |
| "step": 1989 |
| }, |
| { |
| "epoch": 1.8563432835820897, |
| "grad_norm": 1.1872268450628818, |
| "learning_rate": 1.2676167846739308e-07, |
| "loss": 0.1421, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.857276119402985, |
| "grad_norm": 1.2675468112267114, |
| "learning_rate": 1.2512764672720168e-07, |
| "loss": 0.1833, |
| "step": 1991 |
| }, |
| { |
| "epoch": 1.8582089552238807, |
| "grad_norm": 1.3549732318972694, |
| "learning_rate": 1.235040817732297e-07, |
| "loss": 0.1938, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.859141791044776, |
| "grad_norm": 1.2085891700977642, |
| "learning_rate": 1.2189098709141756e-07, |
| "loss": 0.1489, |
| "step": 1993 |
| }, |
| { |
| "epoch": 1.8600746268656716, |
| "grad_norm": 1.2385528532688561, |
| "learning_rate": 1.202883661452231e-07, |
| "loss": 0.1648, |
| "step": 1994 |
| }, |
| { |
| "epoch": 1.8610074626865671, |
| "grad_norm": 1.3342227258355062, |
| "learning_rate": 1.1869622237561662e-07, |
| "loss": 0.2074, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.8619402985074627, |
| "grad_norm": 1.2625600599310918, |
| "learning_rate": 1.1711455920107306e-07, |
| "loss": 0.1887, |
| "step": 1996 |
| }, |
| { |
| "epoch": 1.8628731343283582, |
| "grad_norm": 1.3176357088862165, |
| "learning_rate": 1.1554338001756482e-07, |
| "loss": 0.2104, |
| "step": 1997 |
| }, |
| { |
| "epoch": 1.8638059701492538, |
| "grad_norm": 1.3343195299943285, |
| "learning_rate": 1.1398268819855285e-07, |
| "loss": 0.2143, |
| "step": 1998 |
| }, |
| { |
| "epoch": 1.8647388059701493, |
| "grad_norm": 1.3588439215016899, |
| "learning_rate": 1.1243248709498278e-07, |
| "loss": 0.1616, |
| "step": 1999 |
| }, |
| { |
| "epoch": 1.8656716417910446, |
| "grad_norm": 1.2045136303338808, |
| "learning_rate": 1.1089278003527438e-07, |
| "loss": 0.1441, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.8656716417910446, |
| "eval_loss": 0.21490994095802307, |
| "eval_runtime": 3.411, |
| "eval_samples_per_second": 25.505, |
| "eval_steps_per_second": 6.45, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.8666044776119404, |
| "grad_norm": 1.2647703652398299, |
| "learning_rate": 1.0936357032531597e-07, |
| "loss": 0.1778, |
| "step": 2001 |
| }, |
| { |
| "epoch": 1.8675373134328357, |
| "grad_norm": 1.3578980897172048, |
| "learning_rate": 1.0784486124845783e-07, |
| "loss": 0.2182, |
| "step": 2002 |
| }, |
| { |
| "epoch": 1.8684701492537314, |
| "grad_norm": 1.2809410331706492, |
| "learning_rate": 1.0633665606550436e-07, |
| "loss": 0.2063, |
| "step": 2003 |
| }, |
| { |
| "epoch": 1.8694029850746268, |
| "grad_norm": 1.2287311481388954, |
| "learning_rate": 1.0483895801470579e-07, |
| "loss": 0.1484, |
| "step": 2004 |
| }, |
| { |
| "epoch": 1.8703358208955225, |
| "grad_norm": 1.1789462345833952, |
| "learning_rate": 1.0335177031175425e-07, |
| "loss": 0.1762, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.8712686567164178, |
| "grad_norm": 1.2786010454645145, |
| "learning_rate": 1.0187509614977387e-07, |
| "loss": 0.1437, |
| "step": 2006 |
| }, |
| { |
| "epoch": 1.8722014925373134, |
| "grad_norm": 1.2715109077455946, |
| "learning_rate": 1.0040893869931623e-07, |
| "loss": 0.1528, |
| "step": 2007 |
| }, |
| { |
| "epoch": 1.873134328358209, |
| "grad_norm": 1.269783047564372, |
| "learning_rate": 9.89533011083521e-08, |
| "loss": 0.1748, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.8740671641791045, |
| "grad_norm": 1.156925043114288, |
| "learning_rate": 9.75081865022659e-08, |
| "loss": 0.1273, |
| "step": 2009 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 1.269385652751176, |
| "learning_rate": 9.607359798384785e-08, |
| "loss": 0.1548, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.8759328358208955, |
| "grad_norm": 1.228564799752932, |
| "learning_rate": 9.464953863328685e-08, |
| "loss": 0.1781, |
| "step": 2011 |
| }, |
| { |
| "epoch": 1.876865671641791, |
| "grad_norm": 1.2315874613017999, |
| "learning_rate": 9.323601150816597e-08, |
| "loss": 0.1504, |
| "step": 2012 |
| }, |
| { |
| "epoch": 1.8777985074626866, |
| "grad_norm": 1.2946326738241574, |
| "learning_rate": 9.18330196434536e-08, |
| "loss": 0.1665, |
| "step": 2013 |
| }, |
| { |
| "epoch": 1.8787313432835822, |
| "grad_norm": 1.2632594514798259, |
| "learning_rate": 9.044056605149898e-08, |
| "loss": 0.1639, |
| "step": 2014 |
| }, |
| { |
| "epoch": 1.8796641791044775, |
| "grad_norm": 1.2220859866764355, |
| "learning_rate": 8.905865372202449e-08, |
| "loss": 0.153, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.8805970149253732, |
| "grad_norm": 1.2154225699873356, |
| "learning_rate": 8.768728562211948e-08, |
| "loss": 0.1676, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.8815298507462686, |
| "grad_norm": 1.3222162104234343, |
| "learning_rate": 8.632646469623251e-08, |
| "loss": 0.1924, |
| "step": 2017 |
| }, |
| { |
| "epoch": 1.8824626865671643, |
| "grad_norm": 1.2498467042920565, |
| "learning_rate": 8.497619386616917e-08, |
| "loss": 0.1525, |
| "step": 2018 |
| }, |
| { |
| "epoch": 1.8833955223880596, |
| "grad_norm": 1.2593630693482811, |
| "learning_rate": 8.363647603108038e-08, |
| "loss": 0.1683, |
| "step": 2019 |
| }, |
| { |
| "epoch": 1.8843283582089554, |
| "grad_norm": 1.2586530048367546, |
| "learning_rate": 8.230731406746018e-08, |
| "loss": 0.1827, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.8852611940298507, |
| "grad_norm": 1.222002038407417, |
| "learning_rate": 8.098871082913795e-08, |
| "loss": 0.1328, |
| "step": 2021 |
| }, |
| { |
| "epoch": 1.8861940298507462, |
| "grad_norm": 1.3251949889002166, |
| "learning_rate": 7.968066914727346e-08, |
| "loss": 0.1915, |
| "step": 2022 |
| }, |
| { |
| "epoch": 1.8871268656716418, |
| "grad_norm": 1.2308479588962644, |
| "learning_rate": 7.838319183034738e-08, |
| "loss": 0.1398, |
| "step": 2023 |
| }, |
| { |
| "epoch": 1.8880597014925373, |
| "grad_norm": 1.3339175227843414, |
| "learning_rate": 7.709628166416128e-08, |
| "loss": 0.1998, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.8889925373134329, |
| "grad_norm": 1.1317450810318777, |
| "learning_rate": 7.581994141182436e-08, |
| "loss": 0.1335, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.8899253731343284, |
| "grad_norm": 1.209849710848305, |
| "learning_rate": 7.455417381375452e-08, |
| "loss": 0.1675, |
| "step": 2026 |
| }, |
| { |
| "epoch": 1.890858208955224, |
| "grad_norm": 1.3439662204687448, |
| "learning_rate": 7.329898158766668e-08, |
| "loss": 0.1983, |
| "step": 2027 |
| }, |
| { |
| "epoch": 1.8917910447761193, |
| "grad_norm": 1.2251396088707578, |
| "learning_rate": 7.20543674285712e-08, |
| "loss": 0.1488, |
| "step": 2028 |
| }, |
| { |
| "epoch": 1.892723880597015, |
| "grad_norm": 1.2755583940998025, |
| "learning_rate": 7.082033400876597e-08, |
| "loss": 0.1581, |
| "step": 2029 |
| }, |
| { |
| "epoch": 1.8936567164179103, |
| "grad_norm": 1.3121422656304376, |
| "learning_rate": 6.959688397783104e-08, |
| "loss": 0.1893, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.894589552238806, |
| "grad_norm": 1.2011563608659308, |
| "learning_rate": 6.838401996262289e-08, |
| "loss": 0.1181, |
| "step": 2031 |
| }, |
| { |
| "epoch": 1.8955223880597014, |
| "grad_norm": 1.2263127899449682, |
| "learning_rate": 6.718174456726789e-08, |
| "loss": 0.1782, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.8964552238805972, |
| "grad_norm": 1.2668253310217705, |
| "learning_rate": 6.599006037315891e-08, |
| "loss": 0.145, |
| "step": 2033 |
| }, |
| { |
| "epoch": 1.8973880597014925, |
| "grad_norm": 1.2872258234895284, |
| "learning_rate": 6.480896993894925e-08, |
| "loss": 0.1984, |
| "step": 2034 |
| }, |
| { |
| "epoch": 1.898320895522388, |
| "grad_norm": 1.2223492712874333, |
| "learning_rate": 6.363847580054483e-08, |
| "loss": 0.1534, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.8992537313432836, |
| "grad_norm": 1.303409477099441, |
| "learning_rate": 6.247858047110145e-08, |
| "loss": 0.1629, |
| "step": 2036 |
| }, |
| { |
| "epoch": 1.900186567164179, |
| "grad_norm": 1.3053581424951257, |
| "learning_rate": 6.13292864410181e-08, |
| "loss": 0.1755, |
| "step": 2037 |
| }, |
| { |
| "epoch": 1.9011194029850746, |
| "grad_norm": 1.2366118845923963, |
| "learning_rate": 6.019059617793088e-08, |
| "loss": 0.1631, |
| "step": 2038 |
| }, |
| { |
| "epoch": 1.9020522388059702, |
| "grad_norm": 1.2141993390108499, |
| "learning_rate": 5.906251212670966e-08, |
| "loss": 0.1533, |
| "step": 2039 |
| }, |
| { |
| "epoch": 1.9029850746268657, |
| "grad_norm": 1.2575829570687929, |
| "learning_rate": 5.794503670945195e-08, |
| "loss": 0.1674, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.903917910447761, |
| "grad_norm": 1.264400341181521, |
| "learning_rate": 5.683817232547739e-08, |
| "loss": 0.1751, |
| "step": 2041 |
| }, |
| { |
| "epoch": 1.9048507462686568, |
| "grad_norm": 1.3228707706783953, |
| "learning_rate": 5.5741921351322726e-08, |
| "loss": 0.2016, |
| "step": 2042 |
| }, |
| { |
| "epoch": 1.9057835820895521, |
| "grad_norm": 1.2226813604358504, |
| "learning_rate": 5.465628614073626e-08, |
| "loss": 0.1537, |
| "step": 2043 |
| }, |
| { |
| "epoch": 1.9067164179104479, |
| "grad_norm": 1.1625816999071248, |
| "learning_rate": 5.3581269024673975e-08, |
| "loss": 0.1548, |
| "step": 2044 |
| }, |
| { |
| "epoch": 1.9076492537313432, |
| "grad_norm": 1.262325483429432, |
| "learning_rate": 5.251687231129288e-08, |
| "loss": 0.1619, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.908582089552239, |
| "grad_norm": 1.2324344765240605, |
| "learning_rate": 5.1463098285948755e-08, |
| "loss": 0.1633, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.9095149253731343, |
| "grad_norm": 1.3424158979490182, |
| "learning_rate": 5.0419949211188426e-08, |
| "loss": 0.2337, |
| "step": 2047 |
| }, |
| { |
| "epoch": 1.9104477611940298, |
| "grad_norm": 1.2312577225133483, |
| "learning_rate": 4.9387427326745287e-08, |
| "loss": 0.172, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.9113805970149254, |
| "grad_norm": 1.24843958071994, |
| "learning_rate": 4.8365534849536546e-08, |
| "loss": 0.1413, |
| "step": 2049 |
| }, |
| { |
| "epoch": 1.912313432835821, |
| "grad_norm": 1.1973236844635842, |
| "learning_rate": 4.7354273973657106e-08, |
| "loss": 0.1492, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.9132462686567164, |
| "grad_norm": 1.222774081659431, |
| "learning_rate": 4.635364687037347e-08, |
| "loss": 0.1532, |
| "step": 2051 |
| }, |
| { |
| "epoch": 1.914179104477612, |
| "grad_norm": 1.2354400863685973, |
| "learning_rate": 4.536365568812206e-08, |
| "loss": 0.1796, |
| "step": 2052 |
| }, |
| { |
| "epoch": 1.9151119402985075, |
| "grad_norm": 1.2537534257119465, |
| "learning_rate": 4.438430255250148e-08, |
| "loss": 0.16, |
| "step": 2053 |
| }, |
| { |
| "epoch": 1.9160447761194028, |
| "grad_norm": 1.25376146885341, |
| "learning_rate": 4.3415589566271345e-08, |
| "loss": 0.156, |
| "step": 2054 |
| }, |
| { |
| "epoch": 1.9169776119402986, |
| "grad_norm": 1.1554648927829305, |
| "learning_rate": 4.245751880934401e-08, |
| "loss": 0.1269, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.917910447761194, |
| "grad_norm": 1.2777046286903468, |
| "learning_rate": 4.1510092338784005e-08, |
| "loss": 0.1644, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.9188432835820897, |
| "grad_norm": 1.3672542752617738, |
| "learning_rate": 4.057331218880023e-08, |
| "loss": 0.1966, |
| "step": 2057 |
| }, |
| { |
| "epoch": 1.919776119402985, |
| "grad_norm": 1.2151857535759114, |
| "learning_rate": 3.9647180370742664e-08, |
| "loss": 0.145, |
| "step": 2058 |
| }, |
| { |
| "epoch": 1.9207089552238807, |
| "grad_norm": 1.241760663584323, |
| "learning_rate": 3.8731698873099025e-08, |
| "loss": 0.1594, |
| "step": 2059 |
| }, |
| { |
| "epoch": 1.921641791044776, |
| "grad_norm": 1.2075163609084734, |
| "learning_rate": 3.782686966149085e-08, |
| "loss": 0.1637, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.9225746268656716, |
| "grad_norm": 1.2686508486559018, |
| "learning_rate": 3.6932694678666335e-08, |
| "loss": 0.1743, |
| "step": 2061 |
| }, |
| { |
| "epoch": 1.9235074626865671, |
| "grad_norm": 1.2796694294012751, |
| "learning_rate": 3.604917584449919e-08, |
| "loss": 0.155, |
| "step": 2062 |
| }, |
| { |
| "epoch": 1.9244402985074627, |
| "grad_norm": 1.1685408020475783, |
| "learning_rate": 3.5176315055983625e-08, |
| "loss": 0.1558, |
| "step": 2063 |
| }, |
| { |
| "epoch": 1.9253731343283582, |
| "grad_norm": 1.209641119343688, |
| "learning_rate": 3.431411418722941e-08, |
| "loss": 0.1348, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.9263059701492538, |
| "grad_norm": 1.308251030820361, |
| "learning_rate": 3.346257508945849e-08, |
| "loss": 0.1886, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.9272388059701493, |
| "grad_norm": 1.212712386031227, |
| "learning_rate": 3.26216995910017e-08, |
| "loss": 0.1652, |
| "step": 2066 |
| }, |
| { |
| "epoch": 1.9281716417910446, |
| "grad_norm": 1.247431350146223, |
| "learning_rate": 3.1791489497293715e-08, |
| "loss": 0.1616, |
| "step": 2067 |
| }, |
| { |
| "epoch": 1.9291044776119404, |
| "grad_norm": 1.389925951787403, |
| "learning_rate": 3.097194659086977e-08, |
| "loss": 0.2654, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.9300373134328357, |
| "grad_norm": 1.2490713519993188, |
| "learning_rate": 3.016307263136231e-08, |
| "loss": 0.1754, |
| "step": 2069 |
| }, |
| { |
| "epoch": 1.9309701492537314, |
| "grad_norm": 1.266116357622932, |
| "learning_rate": 2.9364869355494874e-08, |
| "loss": 0.1664, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.9319029850746268, |
| "grad_norm": 1.2389673660954839, |
| "learning_rate": 2.857733847708155e-08, |
| "loss": 0.162, |
| "step": 2071 |
| }, |
| { |
| "epoch": 1.9328358208955225, |
| "grad_norm": 1.266035053802742, |
| "learning_rate": 2.7800481687021987e-08, |
| "loss": 0.1844, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.9337686567164178, |
| "grad_norm": 1.2372099071432225, |
| "learning_rate": 2.7034300653295818e-08, |
| "loss": 0.1761, |
| "step": 2073 |
| }, |
| { |
| "epoch": 1.9347014925373134, |
| "grad_norm": 1.264218418799199, |
| "learning_rate": 2.6278797020963253e-08, |
| "loss": 0.1417, |
| "step": 2074 |
| }, |
| { |
| "epoch": 1.935634328358209, |
| "grad_norm": 1.2718568361161524, |
| "learning_rate": 2.5533972412157825e-08, |
| "loss": 0.2047, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.9365671641791045, |
| "grad_norm": 1.2231602912323578, |
| "learning_rate": 2.479982842608475e-08, |
| "loss": 0.1609, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.9375, |
| "grad_norm": 1.2898602762124696, |
| "learning_rate": 2.4076366639015914e-08, |
| "loss": 0.1717, |
| "step": 2077 |
| }, |
| { |
| "epoch": 1.9384328358208955, |
| "grad_norm": 1.2700166079913484, |
| "learning_rate": 2.3363588604288777e-08, |
| "loss": 0.1536, |
| "step": 2078 |
| }, |
| { |
| "epoch": 1.939365671641791, |
| "grad_norm": 1.3149483654485066, |
| "learning_rate": 2.2661495852301376e-08, |
| "loss": 0.184, |
| "step": 2079 |
| }, |
| { |
| "epoch": 1.9402985074626866, |
| "grad_norm": 1.2239797410288278, |
| "learning_rate": 2.1970089890509527e-08, |
| "loss": 0.1442, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.9412313432835822, |
| "grad_norm": 1.2532824903580881, |
| "learning_rate": 2.128937220342353e-08, |
| "loss": 0.1785, |
| "step": 2081 |
| }, |
| { |
| "epoch": 1.9421641791044775, |
| "grad_norm": 1.1849411459371284, |
| "learning_rate": 2.0619344252605922e-08, |
| "loss": 0.1478, |
| "step": 2082 |
| }, |
| { |
| "epoch": 1.9430970149253732, |
| "grad_norm": 1.2705288944933275, |
| "learning_rate": 1.9960007476665376e-08, |
| "loss": 0.1928, |
| "step": 2083 |
| }, |
| { |
| "epoch": 1.9440298507462686, |
| "grad_norm": 1.2633498041710418, |
| "learning_rate": 1.931136329125727e-08, |
| "loss": 0.1883, |
| "step": 2084 |
| }, |
| { |
| "epoch": 1.9449626865671643, |
| "grad_norm": 1.2150222998047435, |
| "learning_rate": 1.8673413089078108e-08, |
| "loss": 0.1446, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.9458955223880596, |
| "grad_norm": 1.2652170430429417, |
| "learning_rate": 1.8046158239864996e-08, |
| "loss": 0.1748, |
| "step": 2086 |
| }, |
| { |
| "epoch": 1.9468283582089554, |
| "grad_norm": 1.3157094882210538, |
| "learning_rate": 1.7429600090388966e-08, |
| "loss": 0.2167, |
| "step": 2087 |
| }, |
| { |
| "epoch": 1.9477611940298507, |
| "grad_norm": 1.258322651998683, |
| "learning_rate": 1.6823739964456078e-08, |
| "loss": 0.1558, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.9486940298507462, |
| "grad_norm": 1.2276650774757258, |
| "learning_rate": 1.622857916290188e-08, |
| "loss": 0.1878, |
| "step": 2089 |
| }, |
| { |
| "epoch": 1.9496268656716418, |
| "grad_norm": 1.2725861869530462, |
| "learning_rate": 1.5644118963590305e-08, |
| "loss": 0.1589, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.9505597014925373, |
| "grad_norm": 1.258881749534401, |
| "learning_rate": 1.5070360621408653e-08, |
| "loss": 0.1747, |
| "step": 2091 |
| }, |
| { |
| "epoch": 1.9514925373134329, |
| "grad_norm": 1.2733567362619342, |
| "learning_rate": 1.4507305368268166e-08, |
| "loss": 0.1696, |
| "step": 2092 |
| }, |
| { |
| "epoch": 1.9524253731343284, |
| "grad_norm": 1.1802931314348926, |
| "learning_rate": 1.395495441309791e-08, |
| "loss": 0.1562, |
| "step": 2093 |
| }, |
| { |
| "epoch": 1.953358208955224, |
| "grad_norm": 1.1632999193280913, |
| "learning_rate": 1.3413308941845338e-08, |
| "loss": 0.1242, |
| "step": 2094 |
| }, |
| { |
| "epoch": 1.9542910447761193, |
| "grad_norm": 1.23255407194608, |
| "learning_rate": 1.2882370117471843e-08, |
| "loss": 0.1384, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.955223880597015, |
| "grad_norm": 1.3452475601728535, |
| "learning_rate": 1.2362139079949431e-08, |
| "loss": 0.2157, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.9561567164179103, |
| "grad_norm": 1.2450531168181902, |
| "learning_rate": 1.185261694626183e-08, |
| "loss": 0.1842, |
| "step": 2097 |
| }, |
| { |
| "epoch": 1.957089552238806, |
| "grad_norm": 1.4275173568352066, |
| "learning_rate": 1.1353804810397828e-08, |
| "loss": 0.2392, |
| "step": 2098 |
| }, |
| { |
| "epoch": 1.9580223880597014, |
| "grad_norm": 1.3143714071167805, |
| "learning_rate": 1.086570374335183e-08, |
| "loss": 0.1991, |
| "step": 2099 |
| }, |
| { |
| "epoch": 1.9589552238805972, |
| "grad_norm": 1.1500674993387472, |
| "learning_rate": 1.038831479311997e-08, |
| "loss": 0.1344, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.9598880597014925, |
| "grad_norm": 1.2672492478782138, |
| "learning_rate": 9.92163898470011e-09, |
| "loss": 0.1771, |
| "step": 2101 |
| }, |
| { |
| "epoch": 1.960820895522388, |
| "grad_norm": 1.4494577443854482, |
| "learning_rate": 9.465677320085742e-09, |
| "loss": 0.2224, |
| "step": 2102 |
| }, |
| { |
| "epoch": 1.9617537313432836, |
| "grad_norm": 1.219893918861546, |
| "learning_rate": 9.020430778267642e-09, |
| "loss": 0.1558, |
| "step": 2103 |
| }, |
| { |
| "epoch": 1.962686567164179, |
| "grad_norm": 1.240858328811536, |
| "learning_rate": 8.585900315229434e-09, |
| "loss": 0.1657, |
| "step": 2104 |
| }, |
| { |
| "epoch": 1.9636194029850746, |
| "grad_norm": 1.297384654876598, |
| "learning_rate": 8.162086863948149e-09, |
| "loss": 0.1803, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.9645522388059702, |
| "grad_norm": 1.268935326338251, |
| "learning_rate": 7.748991334387557e-09, |
| "loss": 0.1682, |
| "step": 2106 |
| }, |
| { |
| "epoch": 1.9654850746268657, |
| "grad_norm": 1.285120399000862, |
| "learning_rate": 7.346614613501502e-09, |
| "loss": 0.1683, |
| "step": 2107 |
| }, |
| { |
| "epoch": 1.966417910447761, |
| "grad_norm": 1.176238300774971, |
| "learning_rate": 6.9549575652289036e-09, |
| "loss": 0.1421, |
| "step": 2108 |
| }, |
| { |
| "epoch": 1.9673507462686568, |
| "grad_norm": 1.174234396013993, |
| "learning_rate": 6.57402103049154e-09, |
| "loss": 0.1541, |
| "step": 2109 |
| }, |
| { |
| "epoch": 1.9682835820895521, |
| "grad_norm": 1.2516232436617056, |
| "learning_rate": 6.203805827195153e-09, |
| "loss": 0.1972, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.9692164179104479, |
| "grad_norm": 1.2194039452614363, |
| "learning_rate": 5.844312750224457e-09, |
| "loss": 0.1427, |
| "step": 2111 |
| }, |
| { |
| "epoch": 1.9701492537313432, |
| "grad_norm": 1.2231155094240758, |
| "learning_rate": 5.495542571443135e-09, |
| "loss": 0.1569, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.971082089552239, |
| "grad_norm": 1.2499932139754333, |
| "learning_rate": 5.157496039691623e-09, |
| "loss": 0.16, |
| "step": 2113 |
| }, |
| { |
| "epoch": 1.9720149253731343, |
| "grad_norm": 1.330027159799502, |
| "learning_rate": 4.830173880785993e-09, |
| "loss": 0.1973, |
| "step": 2114 |
| }, |
| { |
| "epoch": 1.9729477611940298, |
| "grad_norm": 1.2327867569444704, |
| "learning_rate": 4.51357679751685e-09, |
| "loss": 0.127, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.9738805970149254, |
| "grad_norm": 1.2352162496382828, |
| "learning_rate": 4.207705469645995e-09, |
| "loss": 0.1329, |
| "step": 2116 |
| }, |
| { |
| "epoch": 1.974813432835821, |
| "grad_norm": 1.2655767875127586, |
| "learning_rate": 3.9125605539064305e-09, |
| "loss": 0.1552, |
| "step": 2117 |
| }, |
| { |
| "epoch": 1.9757462686567164, |
| "grad_norm": 1.2532336431861906, |
| "learning_rate": 3.6281426840006907e-09, |
| "loss": 0.1485, |
| "step": 2118 |
| }, |
| { |
| "epoch": 1.976679104477612, |
| "grad_norm": 1.2832730129321155, |
| "learning_rate": 3.354452470599179e-09, |
| "loss": 0.1679, |
| "step": 2119 |
| }, |
| { |
| "epoch": 1.9776119402985075, |
| "grad_norm": 1.2499818530768971, |
| "learning_rate": 3.0914905013396113e-09, |
| "loss": 0.1743, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.9785447761194028, |
| "grad_norm": 1.3021117386738132, |
| "learning_rate": 2.8392573408242418e-09, |
| "loss": 0.2287, |
| "step": 2121 |
| }, |
| { |
| "epoch": 1.9794776119402986, |
| "grad_norm": 1.2416303010434144, |
| "learning_rate": 2.597753530620417e-09, |
| "loss": 0.1703, |
| "step": 2122 |
| }, |
| { |
| "epoch": 1.980410447761194, |
| "grad_norm": 1.2533231907204014, |
| "learning_rate": 2.3669795892589108e-09, |
| "loss": 0.1675, |
| "step": 2123 |
| }, |
| { |
| "epoch": 1.9813432835820897, |
| "grad_norm": 1.1854416515724393, |
| "learning_rate": 2.146936012231704e-09, |
| "loss": 0.1354, |
| "step": 2124 |
| }, |
| { |
| "epoch": 1.982276119402985, |
| "grad_norm": 1.20978357068862, |
| "learning_rate": 1.937623271991429e-09, |
| "loss": 0.1187, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.9832089552238807, |
| "grad_norm": 1.2998527302124716, |
| "learning_rate": 1.7390418179519253e-09, |
| "loss": 0.2289, |
| "step": 2126 |
| }, |
| { |
| "epoch": 1.984141791044776, |
| "grad_norm": 1.2918207881836592, |
| "learning_rate": 1.5511920764849087e-09, |
| "loss": 0.1355, |
| "step": 2127 |
| }, |
| { |
| "epoch": 1.9850746268656716, |
| "grad_norm": 1.3023076128240987, |
| "learning_rate": 1.3740744509205263e-09, |
| "loss": 0.1271, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.9860074626865671, |
| "grad_norm": 1.3462881653805205, |
| "learning_rate": 1.2076893215462459e-09, |
| "loss": 0.1962, |
| "step": 2129 |
| }, |
| { |
| "epoch": 1.9869402985074627, |
| "grad_norm": 1.356991102865659, |
| "learning_rate": 1.0520370456063023e-09, |
| "loss": 0.1986, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.9878731343283582, |
| "grad_norm": 1.275323689551574, |
| "learning_rate": 9.071179572989198e-10, |
| "loss": 0.1687, |
| "step": 2131 |
| }, |
| { |
| "epoch": 1.9888059701492538, |
| "grad_norm": 1.3226690040861337, |
| "learning_rate": 7.72932367779089e-10, |
| "loss": 0.1542, |
| "step": 2132 |
| }, |
| { |
| "epoch": 1.9897388059701493, |
| "grad_norm": 1.3005321836804855, |
| "learning_rate": 6.494805651557911e-10, |
| "loss": 0.1672, |
| "step": 2133 |
| }, |
| { |
| "epoch": 1.9906716417910446, |
| "grad_norm": 1.186978116813406, |
| "learning_rate": 5.367628144897774e-10, |
| "loss": 0.1429, |
| "step": 2134 |
| }, |
| { |
| "epoch": 1.9916044776119404, |
| "grad_norm": 1.369297450226836, |
| "learning_rate": 4.3477935779689953e-10, |
| "loss": 0.2124, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.9925373134328357, |
| "grad_norm": 1.2885014260387289, |
| "learning_rate": 3.4353041404477926e-10, |
| "loss": 0.2039, |
| "step": 2136 |
| }, |
| { |
| "epoch": 1.9934701492537314, |
| "grad_norm": 1.2446402200227755, |
| "learning_rate": 2.630161791528085e-10, |
| "loss": 0.1603, |
| "step": 2137 |
| }, |
| { |
| "epoch": 1.9944029850746268, |
| "grad_norm": 1.1952225706546216, |
| "learning_rate": 1.932368259921491e-10, |
| "loss": 0.1773, |
| "step": 2138 |
| }, |
| { |
| "epoch": 1.9953358208955225, |
| "grad_norm": 1.1708117724538196, |
| "learning_rate": 1.3419250438517771e-10, |
| "loss": 0.161, |
| "step": 2139 |
| }, |
| { |
| "epoch": 1.9962686567164178, |
| "grad_norm": 1.2760179119329083, |
| "learning_rate": 8.588334110604113e-11, |
| "loss": 0.1879, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.9972014925373134, |
| "grad_norm": 1.311338484988188, |
| "learning_rate": 4.830943987843562e-11, |
| "loss": 0.1501, |
| "step": 2141 |
| }, |
| { |
| "epoch": 1.998134328358209, |
| "grad_norm": 1.1724830744992465, |
| "learning_rate": 2.1470881376162157e-11, |
| "loss": 0.1318, |
| "step": 2142 |
| }, |
| { |
| "epoch": 1.9990671641791045, |
| "grad_norm": 1.2412413185361992, |
| "learning_rate": 5.367723225346844e-12, |
| "loss": 0.1641, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.156053260474211, |
| "learning_rate": 0.0, |
| "loss": 0.1385, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 2144, |
| "total_flos": 30161139400704.0, |
| "train_loss": 0.19700502045800103, |
| "train_runtime": 1537.8264, |
| "train_samples_per_second": 11.146, |
| "train_steps_per_second": 1.394 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2144, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 30161139400704.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|