| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.11766090128250382, |
| "eval_steps": 1000000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00011766090128250382, |
| "grad_norm": 0.0, |
| "learning_rate": 0, |
| "loss": 1.5093, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00023532180256500765, |
| "grad_norm": 0.0, |
| "learning_rate": 0, |
| "loss": 1.6654, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00035298270384751147, |
| "grad_norm": 7.219926357269287, |
| "learning_rate": 0.0, |
| "loss": 1.5056, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0004706436051300153, |
| "grad_norm": 7.219926357269287, |
| "learning_rate": 0.0, |
| "loss": 1.6708, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0005883045064125191, |
| "grad_norm": 5.301504135131836, |
| "learning_rate": 1.5051499783199055e-07, |
| "loss": 1.5035, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0007059654076950229, |
| "grad_norm": 6.189344882965088, |
| "learning_rate": 2.385606273598312e-07, |
| "loss": 1.5641, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0008236263089775268, |
| "grad_norm": 7.7528533935546875, |
| "learning_rate": 3.010299956639811e-07, |
| "loss": 1.6783, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0009412872102600306, |
| "grad_norm": 5.569939613342285, |
| "learning_rate": 3.494850021680093e-07, |
| "loss": 1.4829, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0010589481115425344, |
| "grad_norm": 5.5853753089904785, |
| "learning_rate": 3.8907562519182173e-07, |
| "loss": 1.505, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0011766090128250382, |
| "grad_norm": 15.958133697509766, |
| "learning_rate": 4.2254902000712834e-07, |
| "loss": 1.5802, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001294269914107542, |
| "grad_norm": 4.650132179260254, |
| "learning_rate": 4.5154499349597166e-07, |
| "loss": 1.4608, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0014119308153900459, |
| "grad_norm": 8.008149147033691, |
| "learning_rate": 4.771212547196623e-07, |
| "loss": 1.5083, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0015295917166725497, |
| "grad_norm": 9.279545783996582, |
| "learning_rate": 4.999999999999999e-07, |
| "loss": 1.4809, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0016472526179550535, |
| "grad_norm": 3.9695677757263184, |
| "learning_rate": 5.206963425791124e-07, |
| "loss": 1.5041, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0017649135192375574, |
| "grad_norm": 8.24997615814209, |
| "learning_rate": 5.395906230238123e-07, |
| "loss": 1.4886, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0018825744205200612, |
| "grad_norm": 8.355944633483887, |
| "learning_rate": 5.569716761534182e-07, |
| "loss": 1.4472, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.002000235321802565, |
| "grad_norm": 6.1090474128723145, |
| "learning_rate": 5.730640178391189e-07, |
| "loss": 1.5176, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.002117896223085069, |
| "grad_norm": 6.527396202087402, |
| "learning_rate": 5.880456295278405e-07, |
| "loss": 1.6383, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0022355571243675727, |
| "grad_norm": 5.9756693840026855, |
| "learning_rate": 6.020599913279622e-07, |
| "loss": 1.5845, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0023532180256500765, |
| "grad_norm": 4.799884796142578, |
| "learning_rate": 6.15224460689137e-07, |
| "loss": 1.436, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0024708789269325803, |
| "grad_norm": 7.455969333648682, |
| "learning_rate": 6.276362525516529e-07, |
| "loss": 1.6733, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.002588539828215084, |
| "grad_norm": 5.23336124420166, |
| "learning_rate": 6.393768004764143e-07, |
| "loss": 1.4561, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.002706200729497588, |
| "grad_norm": 6.086044788360596, |
| "learning_rate": 6.505149978319905e-07, |
| "loss": 1.5478, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0028238616307800918, |
| "grad_norm": 7.104644298553467, |
| "learning_rate": 6.611096473669595e-07, |
| "loss": 1.5191, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0029415225320625956, |
| "grad_norm": 5.2691521644592285, |
| "learning_rate": 6.712113404111031e-07, |
| "loss": 1.4956, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0030591834333450994, |
| "grad_norm": 10.598758697509766, |
| "learning_rate": 6.808639180087963e-07, |
| "loss": 1.5545, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0031768443346276033, |
| "grad_norm": 6.0853590965271, |
| "learning_rate": 6.901056208558029e-07, |
| "loss": 1.4893, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.003294505235910107, |
| "grad_norm": 4.166804313659668, |
| "learning_rate": 6.989700043360186e-07, |
| "loss": 1.4774, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.003412166137192611, |
| "grad_norm": 4.60942268371582, |
| "learning_rate": 7.074866739854088e-07, |
| "loss": 1.5448, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0035298270384751147, |
| "grad_norm": 4.655628204345703, |
| "learning_rate": 7.156818820794935e-07, |
| "loss": 1.5081, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0036474879397576185, |
| "grad_norm": 7.2692461013793945, |
| "learning_rate": 7.235790156711094e-07, |
| "loss": 1.5995, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0037651488410401224, |
| "grad_norm": 4.613867282867432, |
| "learning_rate": 7.311989989494779e-07, |
| "loss": 1.4209, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.003882809742322626, |
| "grad_norm": 5.530818939208984, |
| "learning_rate": 7.38560627359831e-07, |
| "loss": 1.5738, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00400047064360513, |
| "grad_norm": 4.4906487464904785, |
| "learning_rate": 7.456808469171361e-07, |
| "loss": 1.5313, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.004118131544887634, |
| "grad_norm": 6.488246440887451, |
| "learning_rate": 7.525749891599529e-07, |
| "loss": 1.405, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.004235792446170138, |
| "grad_norm": 6.306236267089844, |
| "learning_rate": 7.592569699389436e-07, |
| "loss": 1.5628, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0043534533474526415, |
| "grad_norm": 6.571730136871338, |
| "learning_rate": 7.657394585211274e-07, |
| "loss": 1.5219, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.004471114248735145, |
| "grad_norm": 4.256100654602051, |
| "learning_rate": 7.720340221751376e-07, |
| "loss": 1.4473, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.004588775150017649, |
| "grad_norm": 9.501107215881348, |
| "learning_rate": 7.781512503836435e-07, |
| "loss": 1.4724, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.004706436051300153, |
| "grad_norm": 5.931169033050537, |
| "learning_rate": 7.841008620334974e-07, |
| "loss": 1.4485, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004824096952582657, |
| "grad_norm": 4.35921049118042, |
| "learning_rate": 7.89891798308405e-07, |
| "loss": 1.4223, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.004941757853865161, |
| "grad_norm": 5.662957191467285, |
| "learning_rate": 7.955323035132494e-07, |
| "loss": 1.4246, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0050594187551476644, |
| "grad_norm": 7.007707118988037, |
| "learning_rate": 8.01029995663981e-07, |
| "loss": 1.5855, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.005177079656430168, |
| "grad_norm": 4.280853748321533, |
| "learning_rate": 8.063919283598676e-07, |
| "loss": 1.3726, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.005294740557712672, |
| "grad_norm": 3.7020082473754883, |
| "learning_rate": 8.116246451989502e-07, |
| "loss": 1.3764, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.005412401458995176, |
| "grad_norm": 3.8952407836914062, |
| "learning_rate": 8.16734227789793e-07, |
| "loss": 1.3208, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.00553006236027768, |
| "grad_norm": 4.62631368637085, |
| "learning_rate": 8.217263382430935e-07, |
| "loss": 1.4958, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0056477232615601836, |
| "grad_norm": 5.86137056350708, |
| "learning_rate": 8.266062568876716e-07, |
| "loss": 1.4349, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.005765384162842687, |
| "grad_norm": 4.414599895477295, |
| "learning_rate": 8.313789158407869e-07, |
| "loss": 1.3405, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.005883045064125191, |
| "grad_norm": 4.100174427032471, |
| "learning_rate": 8.360489289678585e-07, |
| "loss": 1.4465, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.006000705965407695, |
| "grad_norm": 4.100174427032471, |
| "learning_rate": 8.360489289678585e-07, |
| "loss": 1.5331, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.006118366866690199, |
| "grad_norm": 4.92950439453125, |
| "learning_rate": 8.406206186877934e-07, |
| "loss": 1.4608, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.006236027767972703, |
| "grad_norm": 3.7287988662719727, |
| "learning_rate": 8.450980400142567e-07, |
| "loss": 1.3522, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0063536886692552065, |
| "grad_norm": 4.831335067749023, |
| "learning_rate": 8.494850021680092e-07, |
| "loss": 1.2695, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.00647134957053771, |
| "grad_norm": 4.615564346313477, |
| "learning_rate": 8.53785088048968e-07, |
| "loss": 1.4338, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.006589010471820214, |
| "grad_norm": 3.356062412261963, |
| "learning_rate": 8.580016718173995e-07, |
| "loss": 1.2757, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.006706671373102718, |
| "grad_norm": 5.449446678161621, |
| "learning_rate": 8.621379348003944e-07, |
| "loss": 1.356, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.006824332274385222, |
| "grad_norm": 3.9715359210968018, |
| "learning_rate": 8.661968799114842e-07, |
| "loss": 1.3804, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.006941993175667726, |
| "grad_norm": 3.3318145275115967, |
| "learning_rate": 8.701813447471218e-07, |
| "loss": 1.3677, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0070596540769502295, |
| "grad_norm": 3.5082614421844482, |
| "learning_rate": 8.740940135031001e-07, |
| "loss": 1.2629, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.007177314978232733, |
| "grad_norm": 6.502823352813721, |
| "learning_rate": 8.779374278362456e-07, |
| "loss": 1.4748, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.007294975879515237, |
| "grad_norm": 4.117458820343018, |
| "learning_rate": 8.817139967814684e-07, |
| "loss": 1.2848, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.007412636780797741, |
| "grad_norm": 3.3300046920776367, |
| "learning_rate": 8.854260058210719e-07, |
| "loss": 1.3502, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.007530297682080245, |
| "grad_norm": 3.441394567489624, |
| "learning_rate": 8.890756251918216e-07, |
| "loss": 1.4513, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.007647958583362749, |
| "grad_norm": 4.066061019897461, |
| "learning_rate": 8.926649175053833e-07, |
| "loss": 1.2698, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.007765619484645252, |
| "grad_norm": 3.6549081802368164, |
| "learning_rate": 8.961958447491268e-07, |
| "loss": 1.2616, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.007883280385927756, |
| "grad_norm": 3.476713180541992, |
| "learning_rate": 8.996702747267907e-07, |
| "loss": 1.2418, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.00800094128721026, |
| "grad_norm": 4.902803421020508, |
| "learning_rate": 9.030899869919433e-07, |
| "loss": 1.448, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.008118602188492764, |
| "grad_norm": 4.095132827758789, |
| "learning_rate": 9.064566783214276e-07, |
| "loss": 1.3068, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.008236263089775268, |
| "grad_norm": 7.270352840423584, |
| "learning_rate": 9.097719677709341e-07, |
| "loss": 1.3759, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.008353923991057772, |
| "grad_norm": 11.875506401062012, |
| "learning_rate": 9.13037401350413e-07, |
| "loss": 1.3746, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.008471584892340275, |
| "grad_norm": 4.7003302574157715, |
| "learning_rate": 9.162544563531181e-07, |
| "loss": 1.2541, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.00858924579362278, |
| "grad_norm": 4.896581649780273, |
| "learning_rate": 9.194245453686276e-07, |
| "loss": 1.3253, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.008706906694905283, |
| "grad_norm": 4.074301242828369, |
| "learning_rate": 9.225490200071283e-07, |
| "loss": 1.2807, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.008824567596187787, |
| "grad_norm": 3.7367427349090576, |
| "learning_rate": 9.256291743595375e-07, |
| "loss": 1.3511, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.00894222849747029, |
| "grad_norm": 3.3777713775634766, |
| "learning_rate": 9.28666248215634e-07, |
| "loss": 1.2507, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.009059889398752794, |
| "grad_norm": 4.367025852203369, |
| "learning_rate": 9.316614300602277e-07, |
| "loss": 1.2705, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.009177550300035298, |
| "grad_norm": 5.1601362228393555, |
| "learning_rate": 9.346158598654879e-07, |
| "loss": 1.237, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.009295211201317802, |
| "grad_norm": 6.360751152038574, |
| "learning_rate": 9.375306316958498e-07, |
| "loss": 1.2599, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.009412872102600306, |
| "grad_norm": 3.5460028648376465, |
| "learning_rate": 9.404067961403955e-07, |
| "loss": 1.1984, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00953053300388281, |
| "grad_norm": 3.6358044147491455, |
| "learning_rate": 9.432453625862408e-07, |
| "loss": 1.3739, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.009648193905165314, |
| "grad_norm": 3.501857042312622, |
| "learning_rate": 9.4604730134524e-07, |
| "loss": 1.2913, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.009765854806447817, |
| "grad_norm": 4.328304767608643, |
| "learning_rate": 9.488135456452205e-07, |
| "loss": 1.2586, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.009883515707730321, |
| "grad_norm": 4.727443218231201, |
| "learning_rate": 9.515449934959715e-07, |
| "loss": 1.2806, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.010001176609012825, |
| "grad_norm": 2.271977186203003, |
| "learning_rate": 9.542425094393247e-07, |
| "loss": 1.2396, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.010118837510295329, |
| "grad_norm": 5.965804100036621, |
| "learning_rate": 9.569069261918583e-07, |
| "loss": 1.2298, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.010236498411577833, |
| "grad_norm": 3.108452081680298, |
| "learning_rate": 9.59539046188037e-07, |
| "loss": 1.3033, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.010354159312860337, |
| "grad_norm": 3.3207242488861084, |
| "learning_rate": 9.621396430309406e-07, |
| "loss": 1.2343, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.01047182021414284, |
| "grad_norm": 4.672191619873047, |
| "learning_rate": 9.647094628571462e-07, |
| "loss": 1.2168, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.010589481115425344, |
| "grad_norm": 5.991580963134766, |
| "learning_rate": 9.672492256217836e-07, |
| "loss": 1.2139, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.010707142016707848, |
| "grad_norm": 3.4057729244232178, |
| "learning_rate": 9.69759626309309e-07, |
| "loss": 1.3043, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.010824802917990352, |
| "grad_norm": 2.9600138664245605, |
| "learning_rate": 9.722413360750842e-07, |
| "loss": 1.2457, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.010942463819272856, |
| "grad_norm": 5.642834663391113, |
| "learning_rate": 9.74695003322456e-07, |
| "loss": 1.2287, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.01106012472055536, |
| "grad_norm": 3.348522186279297, |
| "learning_rate": 9.771212547196622e-07, |
| "loss": 1.2436, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.011177785621837863, |
| "grad_norm": 2.7108304500579834, |
| "learning_rate": 9.795206961605466e-07, |
| "loss": 1.2419, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.011295446523120367, |
| "grad_norm": 4.255400657653809, |
| "learning_rate": 9.818939136727774e-07, |
| "loss": 1.2603, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.011413107424402871, |
| "grad_norm": 3.6323165893554688, |
| "learning_rate": 9.842414742769674e-07, |
| "loss": 1.2927, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.011530768325685375, |
| "grad_norm": 8.131080627441406, |
| "learning_rate": 9.865639267998492e-07, |
| "loss": 1.1739, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.011648429226967879, |
| "grad_norm": 3.69378662109375, |
| "learning_rate": 9.888618026444236e-07, |
| "loss": 1.2147, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.011766090128250382, |
| "grad_norm": 3.5188376903533936, |
| "learning_rate": 9.91135616519784e-07, |
| "loss": 1.1089, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.011883751029532886, |
| "grad_norm": 3.0614805221557617, |
| "learning_rate": 9.933858671331222e-07, |
| "loss": 1.2198, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.01200141193081539, |
| "grad_norm": 3.6954472064971924, |
| "learning_rate": 9.956130378462473e-07, |
| "loss": 1.1984, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.012119072832097894, |
| "grad_norm": 2.885023593902588, |
| "learning_rate": 9.978175972987748e-07, |
| "loss": 1.2223, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.012236733733380398, |
| "grad_norm": 5.114850044250488, |
| "learning_rate": 9.999999999999997e-07, |
| "loss": 1.3275, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.012354394634662902, |
| "grad_norm": 2.7323529720306396, |
| "learning_rate": 1e-06, |
| "loss": 1.1721, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.012472055535945405, |
| "grad_norm": 3.372161626815796, |
| "learning_rate": 1e-06, |
| "loss": 1.1658, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.01258971643722791, |
| "grad_norm": 3.060661554336548, |
| "learning_rate": 1e-06, |
| "loss": 1.181, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.012707377338510413, |
| "grad_norm": 4.042975425720215, |
| "learning_rate": 1e-06, |
| "loss": 1.2399, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.012825038239792917, |
| "grad_norm": 2.5195248126983643, |
| "learning_rate": 1e-06, |
| "loss": 1.1678, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.01294269914107542, |
| "grad_norm": 2.3371739387512207, |
| "learning_rate": 1e-06, |
| "loss": 1.0537, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.013060360042357924, |
| "grad_norm": 3.8651785850524902, |
| "learning_rate": 1e-06, |
| "loss": 1.227, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.013178020943640428, |
| "grad_norm": 3.3304595947265625, |
| "learning_rate": 1e-06, |
| "loss": 1.1826, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.013295681844922932, |
| "grad_norm": 5.385232448577881, |
| "learning_rate": 1e-06, |
| "loss": 1.2031, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.013413342746205436, |
| "grad_norm": 3.4247732162475586, |
| "learning_rate": 1e-06, |
| "loss": 1.2171, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.01353100364748794, |
| "grad_norm": 4.058660507202148, |
| "learning_rate": 1e-06, |
| "loss": 1.2008, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.013648664548770444, |
| "grad_norm": 7.034421443939209, |
| "learning_rate": 1e-06, |
| "loss": 1.1456, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.013766325450052947, |
| "grad_norm": 6.617616653442383, |
| "learning_rate": 1e-06, |
| "loss": 1.26, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.013883986351335451, |
| "grad_norm": 1.7746769189834595, |
| "learning_rate": 1e-06, |
| "loss": 1.1115, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.014001647252617955, |
| "grad_norm": 5.424281120300293, |
| "learning_rate": 1e-06, |
| "loss": 1.231, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.014119308153900459, |
| "grad_norm": 4.011491775512695, |
| "learning_rate": 1e-06, |
| "loss": 1.1399, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.014236969055182963, |
| "grad_norm": 3.085541009902954, |
| "learning_rate": 1e-06, |
| "loss": 1.1943, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.014354629956465467, |
| "grad_norm": 5.818262577056885, |
| "learning_rate": 1e-06, |
| "loss": 1.3063, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.01447229085774797, |
| "grad_norm": 2.9598960876464844, |
| "learning_rate": 1e-06, |
| "loss": 1.143, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.014589951759030474, |
| "grad_norm": 2.9467811584472656, |
| "learning_rate": 1e-06, |
| "loss": 1.2684, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.014707612660312978, |
| "grad_norm": 2.971536159515381, |
| "learning_rate": 1e-06, |
| "loss": 1.2013, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.014825273561595482, |
| "grad_norm": 3.8481204509735107, |
| "learning_rate": 1e-06, |
| "loss": 1.1681, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.014942934462877986, |
| "grad_norm": 2.742084503173828, |
| "learning_rate": 1e-06, |
| "loss": 1.1682, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.01506059536416049, |
| "grad_norm": 9.748570442199707, |
| "learning_rate": 1e-06, |
| "loss": 1.1744, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.015178256265442993, |
| "grad_norm": 2.9641549587249756, |
| "learning_rate": 1e-06, |
| "loss": 1.1502, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.015295917166725497, |
| "grad_norm": 2.597259044647217, |
| "learning_rate": 1e-06, |
| "loss": 1.0995, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.015413578068008001, |
| "grad_norm": 4.812168121337891, |
| "learning_rate": 1e-06, |
| "loss": 1.1822, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.015531238969290505, |
| "grad_norm": 3.0024499893188477, |
| "learning_rate": 1e-06, |
| "loss": 1.2001, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.015648899870573007, |
| "grad_norm": 1.608038306236267, |
| "learning_rate": 1e-06, |
| "loss": 1.1332, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.015766560771855512, |
| "grad_norm": 1.6776491403579712, |
| "learning_rate": 1e-06, |
| "loss": 1.0944, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.015884221673138015, |
| "grad_norm": 2.8363330364227295, |
| "learning_rate": 1e-06, |
| "loss": 1.1976, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.01600188257442052, |
| "grad_norm": 3.921804904937744, |
| "learning_rate": 1e-06, |
| "loss": 1.1713, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.016119543475703022, |
| "grad_norm": 4.1015777587890625, |
| "learning_rate": 1e-06, |
| "loss": 1.1039, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.016237204376985528, |
| "grad_norm": 2.807033061981201, |
| "learning_rate": 1e-06, |
| "loss": 1.1538, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.01635486527826803, |
| "grad_norm": 3.5602121353149414, |
| "learning_rate": 1e-06, |
| "loss": 1.1008, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.016472526179550535, |
| "grad_norm": 2.0294995307922363, |
| "learning_rate": 1e-06, |
| "loss": 1.1416, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.016590187080833037, |
| "grad_norm": 2.332132339477539, |
| "learning_rate": 1e-06, |
| "loss": 1.0754, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.016707847982115543, |
| "grad_norm": 3.4389145374298096, |
| "learning_rate": 1e-06, |
| "loss": 1.1983, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.016825508883398045, |
| "grad_norm": 3.4869580268859863, |
| "learning_rate": 1e-06, |
| "loss": 1.0771, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.01694316978468055, |
| "grad_norm": 3.293520212173462, |
| "learning_rate": 1e-06, |
| "loss": 1.0733, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.017060830685963053, |
| "grad_norm": 3.1414260864257812, |
| "learning_rate": 1e-06, |
| "loss": 1.2403, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.01717849158724556, |
| "grad_norm": 2.8440492153167725, |
| "learning_rate": 1e-06, |
| "loss": 1.153, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.01729615248852806, |
| "grad_norm": 3.1873555183410645, |
| "learning_rate": 1e-06, |
| "loss": 1.192, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.017413813389810566, |
| "grad_norm": 4.0402069091796875, |
| "learning_rate": 1e-06, |
| "loss": 1.0346, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.017531474291093068, |
| "grad_norm": 2.9087209701538086, |
| "learning_rate": 1e-06, |
| "loss": 1.1442, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.017649135192375574, |
| "grad_norm": 2.827258825302124, |
| "learning_rate": 1e-06, |
| "loss": 1.0892, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.017766796093658076, |
| "grad_norm": 4.406788349151611, |
| "learning_rate": 1e-06, |
| "loss": 1.135, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.01788445699494058, |
| "grad_norm": 5.041503429412842, |
| "learning_rate": 1e-06, |
| "loss": 1.1424, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.018002117896223083, |
| "grad_norm": 2.4025824069976807, |
| "learning_rate": 1e-06, |
| "loss": 1.1524, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.01811977879750559, |
| "grad_norm": 4.14438009262085, |
| "learning_rate": 1e-06, |
| "loss": 1.1283, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.01823743969878809, |
| "grad_norm": 3.5507988929748535, |
| "learning_rate": 1e-06, |
| "loss": 1.0656, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.018355100600070597, |
| "grad_norm": 2.2941172122955322, |
| "learning_rate": 1e-06, |
| "loss": 1.0974, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0184727615013531, |
| "grad_norm": 2.8862037658691406, |
| "learning_rate": 1e-06, |
| "loss": 1.1994, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.018590422402635604, |
| "grad_norm": 4.5398054122924805, |
| "learning_rate": 1e-06, |
| "loss": 1.0714, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.018708083303918106, |
| "grad_norm": 2.8482394218444824, |
| "learning_rate": 1e-06, |
| "loss": 1.1728, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.018825744205200612, |
| "grad_norm": 3.723759412765503, |
| "learning_rate": 1e-06, |
| "loss": 1.1599, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.018943405106483114, |
| "grad_norm": 2.3328919410705566, |
| "learning_rate": 1e-06, |
| "loss": 1.0474, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.01906106600776562, |
| "grad_norm": 3.5935370922088623, |
| "learning_rate": 1e-06, |
| "loss": 1.1742, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.01917872690904812, |
| "grad_norm": 4.418773651123047, |
| "learning_rate": 1e-06, |
| "loss": 1.1212, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.019296387810330627, |
| "grad_norm": 2.6029717922210693, |
| "learning_rate": 1e-06, |
| "loss": 1.0893, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.01941404871161313, |
| "grad_norm": 5.034754753112793, |
| "learning_rate": 1e-06, |
| "loss": 1.0841, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.019531709612895635, |
| "grad_norm": 3.910154104232788, |
| "learning_rate": 1e-06, |
| "loss": 1.0441, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.019649370514178137, |
| "grad_norm": 5.273410797119141, |
| "learning_rate": 1e-06, |
| "loss": 1.1746, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.019767031415460642, |
| "grad_norm": 12.92271614074707, |
| "learning_rate": 1e-06, |
| "loss": 1.082, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.019884692316743145, |
| "grad_norm": 4.1488356590271, |
| "learning_rate": 1e-06, |
| "loss": 1.1214, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.02000235321802565, |
| "grad_norm": 2.6337387561798096, |
| "learning_rate": 1e-06, |
| "loss": 1.1608, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.020120014119308152, |
| "grad_norm": 4.516927242279053, |
| "learning_rate": 1e-06, |
| "loss": 1.0716, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.020237675020590658, |
| "grad_norm": 3.008997678756714, |
| "learning_rate": 1e-06, |
| "loss": 1.0661, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.02035533592187316, |
| "grad_norm": 2.972275495529175, |
| "learning_rate": 1e-06, |
| "loss": 1.0881, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.020472996823155665, |
| "grad_norm": 3.5600616931915283, |
| "learning_rate": 1e-06, |
| "loss": 1.0664, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.020590657724438167, |
| "grad_norm": 4.367973327636719, |
| "learning_rate": 1e-06, |
| "loss": 1.0726, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.020708318625720673, |
| "grad_norm": 3.5119714736938477, |
| "learning_rate": 1e-06, |
| "loss": 1.1287, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.020825979527003175, |
| "grad_norm": 1.7865749597549438, |
| "learning_rate": 1e-06, |
| "loss": 1.0975, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.02094364042828568, |
| "grad_norm": 3.772221088409424, |
| "learning_rate": 1e-06, |
| "loss": 1.0591, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.021061301329568183, |
| "grad_norm": 3.0297372341156006, |
| "learning_rate": 1e-06, |
| "loss": 1.0511, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.02117896223085069, |
| "grad_norm": 3.609771728515625, |
| "learning_rate": 1e-06, |
| "loss": 1.0995, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02129662313213319, |
| "grad_norm": 3.8480026721954346, |
| "learning_rate": 1e-06, |
| "loss": 1.0532, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.021414284033415696, |
| "grad_norm": 4.087072849273682, |
| "learning_rate": 1e-06, |
| "loss": 1.0413, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.021531944934698198, |
| "grad_norm": 1.8740599155426025, |
| "learning_rate": 1e-06, |
| "loss": 1.0721, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.021649605835980704, |
| "grad_norm": 4.0999436378479, |
| "learning_rate": 1e-06, |
| "loss": 1.1368, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.021767266737263206, |
| "grad_norm": 2.669431686401367, |
| "learning_rate": 1e-06, |
| "loss": 1.1078, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.02188492763854571, |
| "grad_norm": 2.4161550998687744, |
| "learning_rate": 1e-06, |
| "loss": 0.9679, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.022002588539828213, |
| "grad_norm": 3.029654026031494, |
| "learning_rate": 1e-06, |
| "loss": 1.0193, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.02212024944111072, |
| "grad_norm": 3.7600207328796387, |
| "learning_rate": 1e-06, |
| "loss": 1.1391, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.02223791034239322, |
| "grad_norm": 2.874558210372925, |
| "learning_rate": 1e-06, |
| "loss": 1.0098, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.022355571243675727, |
| "grad_norm": 6.042860507965088, |
| "learning_rate": 1e-06, |
| "loss": 0.9819, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.02247323214495823, |
| "grad_norm": 1.992511510848999, |
| "learning_rate": 1e-06, |
| "loss": 0.9834, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.022590893046240734, |
| "grad_norm": 6.9423065185546875, |
| "learning_rate": 1e-06, |
| "loss": 1.0761, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.022708553947523236, |
| "grad_norm": 1.9064056873321533, |
| "learning_rate": 1e-06, |
| "loss": 0.9993, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.022826214848805742, |
| "grad_norm": 2.7809596061706543, |
| "learning_rate": 1e-06, |
| "loss": 1.1212, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.022943875750088244, |
| "grad_norm": 3.407334804534912, |
| "learning_rate": 1e-06, |
| "loss": 1.0231, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.02306153665137075, |
| "grad_norm": 3.5821261405944824, |
| "learning_rate": 1e-06, |
| "loss": 0.9963, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.02317919755265325, |
| "grad_norm": 3.629689931869507, |
| "learning_rate": 1e-06, |
| "loss": 1.0986, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.023296858453935757, |
| "grad_norm": 9.692272186279297, |
| "learning_rate": 1e-06, |
| "loss": 1.0639, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.02341451935521826, |
| "grad_norm": 2.2132132053375244, |
| "learning_rate": 1e-06, |
| "loss": 0.967, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.023532180256500765, |
| "grad_norm": 3.079921007156372, |
| "learning_rate": 1e-06, |
| "loss": 1.004, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.023649841157783267, |
| "grad_norm": 3.2911813259124756, |
| "learning_rate": 1e-06, |
| "loss": 1.0534, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.023767502059065772, |
| "grad_norm": 4.560142517089844, |
| "learning_rate": 1e-06, |
| "loss": 0.9946, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.023885162960348275, |
| "grad_norm": 2.6420490741729736, |
| "learning_rate": 1e-06, |
| "loss": 1.0177, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.02400282386163078, |
| "grad_norm": 2.756321668624878, |
| "learning_rate": 1e-06, |
| "loss": 1.0444, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.024120484762913282, |
| "grad_norm": 2.4896962642669678, |
| "learning_rate": 1e-06, |
| "loss": 1.0205, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.024238145664195788, |
| "grad_norm": 10.722167015075684, |
| "learning_rate": 1e-06, |
| "loss": 1.0348, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.02435580656547829, |
| "grad_norm": 2.6661248207092285, |
| "learning_rate": 1e-06, |
| "loss": 1.0385, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.024473467466760795, |
| "grad_norm": 4.628503799438477, |
| "learning_rate": 1e-06, |
| "loss": 0.972, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.024591128368043298, |
| "grad_norm": 3.111830234527588, |
| "learning_rate": 1e-06, |
| "loss": 1.1072, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.024708789269325803, |
| "grad_norm": 4.0781402587890625, |
| "learning_rate": 1e-06, |
| "loss": 1.0554, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.024826450170608305, |
| "grad_norm": 5.678940773010254, |
| "learning_rate": 1e-06, |
| "loss": 1.1226, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.02494411107189081, |
| "grad_norm": 5.578497886657715, |
| "learning_rate": 1e-06, |
| "loss": 0.9829, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.025061771973173313, |
| "grad_norm": 2.8906311988830566, |
| "learning_rate": 1e-06, |
| "loss": 0.9563, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.02517943287445582, |
| "grad_norm": 3.579702138900757, |
| "learning_rate": 1e-06, |
| "loss": 1.0379, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.02529709377573832, |
| "grad_norm": 3.536996841430664, |
| "learning_rate": 1e-06, |
| "loss": 0.9938, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.025414754677020826, |
| "grad_norm": 2.8766660690307617, |
| "learning_rate": 1e-06, |
| "loss": 0.9176, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.025532415578303328, |
| "grad_norm": 6.301769256591797, |
| "learning_rate": 1e-06, |
| "loss": 1.1188, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.025650076479585834, |
| "grad_norm": 6.618703365325928, |
| "learning_rate": 1e-06, |
| "loss": 1.0338, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.025767737380868336, |
| "grad_norm": 3.9064433574676514, |
| "learning_rate": 1e-06, |
| "loss": 0.9414, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.02588539828215084, |
| "grad_norm": 2.456340789794922, |
| "learning_rate": 1e-06, |
| "loss": 1.0252, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.026003059183433343, |
| "grad_norm": 3.485226631164551, |
| "learning_rate": 1e-06, |
| "loss": 1.0534, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.02612072008471585, |
| "grad_norm": 4.325056552886963, |
| "learning_rate": 1e-06, |
| "loss": 1.0813, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.02623838098599835, |
| "grad_norm": 3.2998199462890625, |
| "learning_rate": 1e-06, |
| "loss": 1.0372, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.026356041887280857, |
| "grad_norm": 3.8687708377838135, |
| "learning_rate": 1e-06, |
| "loss": 0.9828, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.02647370278856336, |
| "grad_norm": 3.572066307067871, |
| "learning_rate": 1e-06, |
| "loss": 1.0074, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.026591363689845864, |
| "grad_norm": 2.7380471229553223, |
| "learning_rate": 1e-06, |
| "loss": 0.8806, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.026709024591128366, |
| "grad_norm": 4.645740032196045, |
| "learning_rate": 1e-06, |
| "loss": 1.0778, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.026826685492410872, |
| "grad_norm": 4.34233283996582, |
| "learning_rate": 1e-06, |
| "loss": 1.0747, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.026944346393693374, |
| "grad_norm": 3.2557373046875, |
| "learning_rate": 1e-06, |
| "loss": 0.9457, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.02706200729497588, |
| "grad_norm": 4.202934741973877, |
| "learning_rate": 1e-06, |
| "loss": 0.8844, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.02717966819625838, |
| "grad_norm": 2.7801413536071777, |
| "learning_rate": 1e-06, |
| "loss": 1.0413, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.027297329097540887, |
| "grad_norm": 3.5069901943206787, |
| "learning_rate": 1e-06, |
| "loss": 1.0001, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.02741498999882339, |
| "grad_norm": 3.3977630138397217, |
| "learning_rate": 1e-06, |
| "loss": 1.0629, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.027532650900105895, |
| "grad_norm": 2.3797481060028076, |
| "learning_rate": 1e-06, |
| "loss": 0.9498, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.027650311801388397, |
| "grad_norm": 2.1209325790405273, |
| "learning_rate": 1e-06, |
| "loss": 1.2058, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.027767972702670903, |
| "grad_norm": 3.580355644226074, |
| "learning_rate": 1e-06, |
| "loss": 1.0018, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.027885633603953405, |
| "grad_norm": 4.806403160095215, |
| "learning_rate": 1e-06, |
| "loss": 1.0458, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.02800329450523591, |
| "grad_norm": 2.399860382080078, |
| "learning_rate": 1e-06, |
| "loss": 0.9849, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.028120955406518412, |
| "grad_norm": 3.2041261196136475, |
| "learning_rate": 1e-06, |
| "loss": 1.0088, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.028238616307800918, |
| "grad_norm": 5.033557891845703, |
| "learning_rate": 1e-06, |
| "loss": 1.1207, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.02835627720908342, |
| "grad_norm": 2.3300740718841553, |
| "learning_rate": 1e-06, |
| "loss": 0.987, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.028473938110365925, |
| "grad_norm": 3.4651834964752197, |
| "learning_rate": 1e-06, |
| "loss": 0.9708, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.028591599011648428, |
| "grad_norm": 5.092766284942627, |
| "learning_rate": 1e-06, |
| "loss": 0.9562, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.028709259912930933, |
| "grad_norm": 3.308539390563965, |
| "learning_rate": 1e-06, |
| "loss": 1.0092, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.028826920814213435, |
| "grad_norm": 2.86190128326416, |
| "learning_rate": 1e-06, |
| "loss": 0.9801, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.02894458171549594, |
| "grad_norm": 2.9260149002075195, |
| "learning_rate": 1e-06, |
| "loss": 0.8868, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.029062242616778443, |
| "grad_norm": 5.265139102935791, |
| "learning_rate": 1e-06, |
| "loss": 0.9974, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.02917990351806095, |
| "grad_norm": 5.411443710327148, |
| "learning_rate": 1e-06, |
| "loss": 1.0284, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.02929756441934345, |
| "grad_norm": 4.155786037445068, |
| "learning_rate": 1e-06, |
| "loss": 1.0795, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.029415225320625956, |
| "grad_norm": 2.910003900527954, |
| "learning_rate": 1e-06, |
| "loss": 1.0173, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.029532886221908458, |
| "grad_norm": 2.214527130126953, |
| "learning_rate": 1e-06, |
| "loss": 0.98, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.029650547123190964, |
| "grad_norm": 6.6619086265563965, |
| "learning_rate": 1e-06, |
| "loss": 0.9513, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.029768208024473466, |
| "grad_norm": 2.5836620330810547, |
| "learning_rate": 1e-06, |
| "loss": 0.9591, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.02988586892575597, |
| "grad_norm": 3.5939230918884277, |
| "learning_rate": 1e-06, |
| "loss": 0.9877, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.030003529827038473, |
| "grad_norm": 3.4384496212005615, |
| "learning_rate": 1e-06, |
| "loss": 0.8965, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.03012119072832098, |
| "grad_norm": 5.588817119598389, |
| "learning_rate": 1e-06, |
| "loss": 0.9589, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.03023885162960348, |
| "grad_norm": 5.522799015045166, |
| "learning_rate": 1e-06, |
| "loss": 1.0202, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.030356512530885987, |
| "grad_norm": 3.071120500564575, |
| "learning_rate": 1e-06, |
| "loss": 0.9195, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.03047417343216849, |
| "grad_norm": 8.155257225036621, |
| "learning_rate": 1e-06, |
| "loss": 0.9719, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.030591834333450994, |
| "grad_norm": 2.8112828731536865, |
| "learning_rate": 1e-06, |
| "loss": 0.9657, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.030709495234733496, |
| "grad_norm": 6.376706600189209, |
| "learning_rate": 1e-06, |
| "loss": 0.9453, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.030827156136016002, |
| "grad_norm": 2.884045124053955, |
| "learning_rate": 1e-06, |
| "loss": 0.8969, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.030944817037298504, |
| "grad_norm": 2.4637763500213623, |
| "learning_rate": 1e-06, |
| "loss": 0.8479, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.03106247793858101, |
| "grad_norm": 4.903054237365723, |
| "learning_rate": 1e-06, |
| "loss": 0.9653, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.03118013883986351, |
| "grad_norm": 3.2967870235443115, |
| "learning_rate": 1e-06, |
| "loss": 0.8837, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.031297799741146014, |
| "grad_norm": 2.45072078704834, |
| "learning_rate": 1e-06, |
| "loss": 0.8762, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.03141546064242852, |
| "grad_norm": 20.303709030151367, |
| "learning_rate": 1e-06, |
| "loss": 0.9166, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.031533121543711025, |
| "grad_norm": 6.09748649597168, |
| "learning_rate": 1e-06, |
| "loss": 0.9721, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.03165078244499353, |
| "grad_norm": 2.512051820755005, |
| "learning_rate": 1e-06, |
| "loss": 0.9464, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.03176844334627603, |
| "grad_norm": 4.073144435882568, |
| "learning_rate": 1e-06, |
| "loss": 0.8717, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.031886104247558535, |
| "grad_norm": 4.345306873321533, |
| "learning_rate": 1e-06, |
| "loss": 0.8799, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.03200376514884104, |
| "grad_norm": 4.083920478820801, |
| "learning_rate": 1e-06, |
| "loss": 1.0133, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.032121426050123546, |
| "grad_norm": 2.9541187286376953, |
| "learning_rate": 1e-06, |
| "loss": 0.9744, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.032239086951406044, |
| "grad_norm": 3.141975164413452, |
| "learning_rate": 1e-06, |
| "loss": 0.9776, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.03235674785268855, |
| "grad_norm": 4.691943168640137, |
| "learning_rate": 1e-06, |
| "loss": 0.883, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.032474408753971055, |
| "grad_norm": 5.815479755401611, |
| "learning_rate": 1e-06, |
| "loss": 0.8965, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.03259206965525356, |
| "grad_norm": 4.220132350921631, |
| "learning_rate": 1e-06, |
| "loss": 0.9603, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.03270973055653606, |
| "grad_norm": 3.9638681411743164, |
| "learning_rate": 1e-06, |
| "loss": 0.9201, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.032827391457818565, |
| "grad_norm": 2.6332955360412598, |
| "learning_rate": 1e-06, |
| "loss": 0.8148, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.03294505235910107, |
| "grad_norm": 4.9401397705078125, |
| "learning_rate": 1e-06, |
| "loss": 0.9787, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.033062713260383576, |
| "grad_norm": 3.0980424880981445, |
| "learning_rate": 1e-06, |
| "loss": 0.8342, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.033180374161666075, |
| "grad_norm": 3.9119269847869873, |
| "learning_rate": 1e-06, |
| "loss": 0.9128, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.03329803506294858, |
| "grad_norm": 5.621601104736328, |
| "learning_rate": 1e-06, |
| "loss": 0.8966, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.033415695964231086, |
| "grad_norm": 3.8806064128875732, |
| "learning_rate": 1e-06, |
| "loss": 0.9079, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.03353335686551359, |
| "grad_norm": 4.603435039520264, |
| "learning_rate": 1e-06, |
| "loss": 0.8963, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.03365101776679609, |
| "grad_norm": 4.245962142944336, |
| "learning_rate": 1e-06, |
| "loss": 0.9149, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.033768678668078596, |
| "grad_norm": 4.452668190002441, |
| "learning_rate": 1e-06, |
| "loss": 0.961, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.0338863395693611, |
| "grad_norm": 3.492171287536621, |
| "learning_rate": 1e-06, |
| "loss": 0.8587, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.03400400047064361, |
| "grad_norm": 4.348937511444092, |
| "learning_rate": 1e-06, |
| "loss": 1.0209, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.034121661371926106, |
| "grad_norm": 3.5924530029296875, |
| "learning_rate": 1e-06, |
| "loss": 0.9734, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.03423932227320861, |
| "grad_norm": 4.881322860717773, |
| "learning_rate": 1e-06, |
| "loss": 0.8807, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.03435698317449112, |
| "grad_norm": 3.300433397293091, |
| "learning_rate": 1e-06, |
| "loss": 0.8877, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.03447464407577362, |
| "grad_norm": 5.074859619140625, |
| "learning_rate": 1e-06, |
| "loss": 0.9591, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.03459230497705612, |
| "grad_norm": 4.042046070098877, |
| "learning_rate": 1e-06, |
| "loss": 0.9333, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.034709965878338626, |
| "grad_norm": 5.939486980438232, |
| "learning_rate": 1e-06, |
| "loss": 0.9156, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.03482762677962113, |
| "grad_norm": 9.997541427612305, |
| "learning_rate": 1e-06, |
| "loss": 0.9161, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.03494528768090364, |
| "grad_norm": 3.6703906059265137, |
| "learning_rate": 1e-06, |
| "loss": 0.8585, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.035062948582186136, |
| "grad_norm": 3.217092990875244, |
| "learning_rate": 1e-06, |
| "loss": 0.9021, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.03518060948346864, |
| "grad_norm": 3.009840488433838, |
| "learning_rate": 1e-06, |
| "loss": 0.9015, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.03529827038475115, |
| "grad_norm": 3.5084877014160156, |
| "learning_rate": 1e-06, |
| "loss": 0.7897, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03541593128603365, |
| "grad_norm": 2.707977056503296, |
| "learning_rate": 1e-06, |
| "loss": 0.9651, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.03553359218731615, |
| "grad_norm": 5.24489164352417, |
| "learning_rate": 1e-06, |
| "loss": 0.8964, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.03565125308859866, |
| "grad_norm": 2.6105916500091553, |
| "learning_rate": 1e-06, |
| "loss": 0.926, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.03576891398988116, |
| "grad_norm": 3.720118761062622, |
| "learning_rate": 1e-06, |
| "loss": 0.8478, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.03588657489116367, |
| "grad_norm": 4.300241470336914, |
| "learning_rate": 1e-06, |
| "loss": 0.8957, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.03600423579244617, |
| "grad_norm": 4.0967512130737305, |
| "learning_rate": 1e-06, |
| "loss": 0.8142, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.03612189669372867, |
| "grad_norm": 4.093652725219727, |
| "learning_rate": 1e-06, |
| "loss": 0.8621, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.03623955759501118, |
| "grad_norm": 3.4667654037475586, |
| "learning_rate": 1e-06, |
| "loss": 0.8997, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.03635721849629368, |
| "grad_norm": 3.124798536300659, |
| "learning_rate": 1e-06, |
| "loss": 0.8475, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.03647487939757618, |
| "grad_norm": 4.9428486824035645, |
| "learning_rate": 1e-06, |
| "loss": 1.0057, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03659254029885869, |
| "grad_norm": 2.985236167907715, |
| "learning_rate": 1e-06, |
| "loss": 0.8408, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.03671020120014119, |
| "grad_norm": 2.861386775970459, |
| "learning_rate": 1e-06, |
| "loss": 0.8258, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.0368278621014237, |
| "grad_norm": 3.8676459789276123, |
| "learning_rate": 1e-06, |
| "loss": 0.9179, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.0369455230027062, |
| "grad_norm": 4.594061374664307, |
| "learning_rate": 1e-06, |
| "loss": 0.9291, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.0370631839039887, |
| "grad_norm": 3.5613086223602295, |
| "learning_rate": 1e-06, |
| "loss": 0.9853, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.03718084480527121, |
| "grad_norm": 3.170051097869873, |
| "learning_rate": 1e-06, |
| "loss": 0.8249, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.037298505706553714, |
| "grad_norm": 2.451202869415283, |
| "learning_rate": 1e-06, |
| "loss": 0.9602, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.03741616660783621, |
| "grad_norm": 14.758941650390625, |
| "learning_rate": 1e-06, |
| "loss": 0.7851, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.03753382750911872, |
| "grad_norm": 5.861412525177002, |
| "learning_rate": 1e-06, |
| "loss": 0.9554, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.037651488410401224, |
| "grad_norm": 11.434176445007324, |
| "learning_rate": 1e-06, |
| "loss": 0.8162, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.03776914931168373, |
| "grad_norm": 2.5073564052581787, |
| "learning_rate": 1e-06, |
| "loss": 0.925, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.03788681021296623, |
| "grad_norm": 4.871037006378174, |
| "learning_rate": 1e-06, |
| "loss": 0.9359, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.038004471114248733, |
| "grad_norm": 4.676340103149414, |
| "learning_rate": 1e-06, |
| "loss": 0.8491, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.03812213201553124, |
| "grad_norm": 3.828737735748291, |
| "learning_rate": 1e-06, |
| "loss": 0.8098, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.038239792916813745, |
| "grad_norm": 3.128114938735962, |
| "learning_rate": 1e-06, |
| "loss": 0.8507, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.03835745381809624, |
| "grad_norm": 3.4662718772888184, |
| "learning_rate": 1e-06, |
| "loss": 0.9821, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.03847511471937875, |
| "grad_norm": 8.503030776977539, |
| "learning_rate": 1e-06, |
| "loss": 0.8421, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.038592775620661254, |
| "grad_norm": 5.41649055480957, |
| "learning_rate": 1e-06, |
| "loss": 0.9189, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.03871043652194376, |
| "grad_norm": 3.358555793762207, |
| "learning_rate": 1e-06, |
| "loss": 0.831, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.03882809742322626, |
| "grad_norm": 5.377659320831299, |
| "learning_rate": 1e-06, |
| "loss": 0.8219, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.038945758324508764, |
| "grad_norm": 2.828697681427002, |
| "learning_rate": 1e-06, |
| "loss": 0.9375, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.03906341922579127, |
| "grad_norm": 4.016804218292236, |
| "learning_rate": 1e-06, |
| "loss": 0.9602, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.039181080127073775, |
| "grad_norm": 3.2497718334198, |
| "learning_rate": 1e-06, |
| "loss": 0.8651, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.039298741028356274, |
| "grad_norm": 2.9022653102874756, |
| "learning_rate": 1e-06, |
| "loss": 0.8472, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.03941640192963878, |
| "grad_norm": 3.2514617443084717, |
| "learning_rate": 1e-06, |
| "loss": 1.0098, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.039534062830921285, |
| "grad_norm": 5.1763997077941895, |
| "learning_rate": 1e-06, |
| "loss": 0.8482, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.03965172373220379, |
| "grad_norm": 3.454880952835083, |
| "learning_rate": 1e-06, |
| "loss": 0.9632, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.03976938463348629, |
| "grad_norm": 3.575077772140503, |
| "learning_rate": 1e-06, |
| "loss": 0.8978, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.039887045534768795, |
| "grad_norm": 4.193572998046875, |
| "learning_rate": 1e-06, |
| "loss": 0.9332, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0400047064360513, |
| "grad_norm": 6.426424980163574, |
| "learning_rate": 1e-06, |
| "loss": 0.8009, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.040122367337333806, |
| "grad_norm": 3.2945382595062256, |
| "learning_rate": 1e-06, |
| "loss": 0.8918, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.040240028238616304, |
| "grad_norm": 4.680812358856201, |
| "learning_rate": 1e-06, |
| "loss": 0.8918, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.04035768913989881, |
| "grad_norm": 2.4894278049468994, |
| "learning_rate": 1e-06, |
| "loss": 0.884, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.040475350041181316, |
| "grad_norm": 3.5571224689483643, |
| "learning_rate": 1e-06, |
| "loss": 0.8372, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.04059301094246382, |
| "grad_norm": 5.702209949493408, |
| "learning_rate": 1e-06, |
| "loss": 0.8881, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.04071067184374632, |
| "grad_norm": 4.254310607910156, |
| "learning_rate": 1e-06, |
| "loss": 0.9545, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.040828332745028825, |
| "grad_norm": 5.766552925109863, |
| "learning_rate": 1e-06, |
| "loss": 0.8308, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.04094599364631133, |
| "grad_norm": 5.9502668380737305, |
| "learning_rate": 1e-06, |
| "loss": 0.863, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.041063654547593836, |
| "grad_norm": 3.360926389694214, |
| "learning_rate": 1e-06, |
| "loss": 0.8732, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.041181315448876335, |
| "grad_norm": 7.237725734710693, |
| "learning_rate": 1e-06, |
| "loss": 0.7984, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04129897635015884, |
| "grad_norm": 2.8876423835754395, |
| "learning_rate": 1e-06, |
| "loss": 0.875, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.041416637251441346, |
| "grad_norm": 3.3526382446289062, |
| "learning_rate": 1e-06, |
| "loss": 0.8522, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.04153429815272385, |
| "grad_norm": 14.322229385375977, |
| "learning_rate": 1e-06, |
| "loss": 0.7397, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.04165195905400635, |
| "grad_norm": 4.687523365020752, |
| "learning_rate": 1e-06, |
| "loss": 0.7707, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.041769619955288856, |
| "grad_norm": 3.8648178577423096, |
| "learning_rate": 1e-06, |
| "loss": 0.812, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.04188728085657136, |
| "grad_norm": 4.16708517074585, |
| "learning_rate": 1e-06, |
| "loss": 0.8107, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.04200494175785387, |
| "grad_norm": 5.837557792663574, |
| "learning_rate": 1e-06, |
| "loss": 0.7445, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.042122602659136366, |
| "grad_norm": 3.4978277683258057, |
| "learning_rate": 1e-06, |
| "loss": 0.9116, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.04224026356041887, |
| "grad_norm": 2.914486885070801, |
| "learning_rate": 1e-06, |
| "loss": 0.9159, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.04235792446170138, |
| "grad_norm": 3.2980706691741943, |
| "learning_rate": 1e-06, |
| "loss": 0.8705, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04247558536298388, |
| "grad_norm": 6.628724098205566, |
| "learning_rate": 1e-06, |
| "loss": 0.861, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.04259324626426638, |
| "grad_norm": 3.676485776901245, |
| "learning_rate": 1e-06, |
| "loss": 0.7846, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.042710907165548886, |
| "grad_norm": 5.055513381958008, |
| "learning_rate": 1e-06, |
| "loss": 0.7648, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.04282856806683139, |
| "grad_norm": 4.655031681060791, |
| "learning_rate": 1e-06, |
| "loss": 0.8165, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.0429462289681139, |
| "grad_norm": 6.790380954742432, |
| "learning_rate": 1e-06, |
| "loss": 0.8033, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.043063889869396396, |
| "grad_norm": 3.321683645248413, |
| "learning_rate": 1e-06, |
| "loss": 0.8015, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.0431815507706789, |
| "grad_norm": 6.742896556854248, |
| "learning_rate": 1e-06, |
| "loss": 0.8881, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.04329921167196141, |
| "grad_norm": 4.620999336242676, |
| "learning_rate": 1e-06, |
| "loss": 0.8187, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.04341687257324391, |
| "grad_norm": 3.710247755050659, |
| "learning_rate": 1e-06, |
| "loss": 1.1444, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.04353453347452641, |
| "grad_norm": 4.3619866371154785, |
| "learning_rate": 1e-06, |
| "loss": 0.8836, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.04365219437580892, |
| "grad_norm": 4.016956806182861, |
| "learning_rate": 1e-06, |
| "loss": 0.8193, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.04376985527709142, |
| "grad_norm": 3.5968549251556396, |
| "learning_rate": 1e-06, |
| "loss": 0.8534, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.04388751617837393, |
| "grad_norm": 5.6325836181640625, |
| "learning_rate": 1e-06, |
| "loss": 0.7794, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.04400517707965643, |
| "grad_norm": 5.343469142913818, |
| "learning_rate": 1e-06, |
| "loss": 0.876, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.04412283798093893, |
| "grad_norm": 3.2438323497772217, |
| "learning_rate": 1e-06, |
| "loss": 0.809, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.04424049888222144, |
| "grad_norm": 3.4838759899139404, |
| "learning_rate": 1e-06, |
| "loss": 0.8749, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.04435815978350394, |
| "grad_norm": 3.5921363830566406, |
| "learning_rate": 1e-06, |
| "loss": 0.8455, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.04447582068478644, |
| "grad_norm": 2.481532335281372, |
| "learning_rate": 1e-06, |
| "loss": 0.9034, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.04459348158606895, |
| "grad_norm": 2.756016731262207, |
| "learning_rate": 1e-06, |
| "loss": 0.8722, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.04471114248735145, |
| "grad_norm": 4.9279913902282715, |
| "learning_rate": 1e-06, |
| "loss": 0.7027, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.04482880338863396, |
| "grad_norm": 4.436341285705566, |
| "learning_rate": 1e-06, |
| "loss": 0.9594, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.04494646428991646, |
| "grad_norm": 6.906442165374756, |
| "learning_rate": 1e-06, |
| "loss": 0.8929, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.04506412519119896, |
| "grad_norm": 3.315844774246216, |
| "learning_rate": 1e-06, |
| "loss": 0.7651, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.04518178609248147, |
| "grad_norm": 5.387514591217041, |
| "learning_rate": 1e-06, |
| "loss": 0.826, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.045299446993763974, |
| "grad_norm": 4.287894248962402, |
| "learning_rate": 1e-06, |
| "loss": 0.8288, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.04541710789504647, |
| "grad_norm": 3.736056327819824, |
| "learning_rate": 1e-06, |
| "loss": 0.9377, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.04553476879632898, |
| "grad_norm": 3.92673659324646, |
| "learning_rate": 1e-06, |
| "loss": 0.8998, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.045652429697611484, |
| "grad_norm": 6.745547294616699, |
| "learning_rate": 1e-06, |
| "loss": 0.8782, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.04577009059889399, |
| "grad_norm": 3.430509567260742, |
| "learning_rate": 1e-06, |
| "loss": 0.8118, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.04588775150017649, |
| "grad_norm": 3.780449867248535, |
| "learning_rate": 1e-06, |
| "loss": 0.7847, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.046005412401458994, |
| "grad_norm": 5.1995320320129395, |
| "learning_rate": 1e-06, |
| "loss": 0.7804, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0461230733027415, |
| "grad_norm": 4.746626377105713, |
| "learning_rate": 1e-06, |
| "loss": 0.8891, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.046240734204024005, |
| "grad_norm": 3.883009433746338, |
| "learning_rate": 1e-06, |
| "loss": 0.8285, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.0463583951053065, |
| "grad_norm": 3.148414134979248, |
| "learning_rate": 1e-06, |
| "loss": 0.8413, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.04647605600658901, |
| "grad_norm": 3.574962854385376, |
| "learning_rate": 1e-06, |
| "loss": 0.8589, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.046593716907871514, |
| "grad_norm": 3.7145674228668213, |
| "learning_rate": 1e-06, |
| "loss": 0.8848, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.04671137780915402, |
| "grad_norm": 2.2142579555511475, |
| "learning_rate": 1e-06, |
| "loss": 0.9702, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.04682903871043652, |
| "grad_norm": 3.7105839252471924, |
| "learning_rate": 1e-06, |
| "loss": 0.8114, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.046946699611719024, |
| "grad_norm": 3.403414249420166, |
| "learning_rate": 1e-06, |
| "loss": 0.9485, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.04706436051300153, |
| "grad_norm": 5.079216957092285, |
| "learning_rate": 1e-06, |
| "loss": 0.8575, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.047182021414284035, |
| "grad_norm": 5.360959529876709, |
| "learning_rate": 1e-06, |
| "loss": 0.9164, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.047299682315566534, |
| "grad_norm": 2.644434928894043, |
| "learning_rate": 1e-06, |
| "loss": 0.9584, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.04741734321684904, |
| "grad_norm": 3.6469929218292236, |
| "learning_rate": 1e-06, |
| "loss": 0.9308, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.047535004118131545, |
| "grad_norm": 4.576849460601807, |
| "learning_rate": 1e-06, |
| "loss": 0.9586, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.04765266501941405, |
| "grad_norm": 3.8551862239837646, |
| "learning_rate": 1e-06, |
| "loss": 0.859, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.04777032592069655, |
| "grad_norm": 6.5358452796936035, |
| "learning_rate": 1e-06, |
| "loss": 0.8241, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.047887986821979055, |
| "grad_norm": 2.817338466644287, |
| "learning_rate": 1e-06, |
| "loss": 0.8393, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.04800564772326156, |
| "grad_norm": 2.9196722507476807, |
| "learning_rate": 1e-06, |
| "loss": 0.8141, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.048123308624544066, |
| "grad_norm": 5.13906717300415, |
| "learning_rate": 1e-06, |
| "loss": 0.8182, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.048240969525826564, |
| "grad_norm": 3.631796360015869, |
| "learning_rate": 1e-06, |
| "loss": 0.8274, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.04835863042710907, |
| "grad_norm": 4.240082263946533, |
| "learning_rate": 1e-06, |
| "loss": 0.8955, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.048476291328391576, |
| "grad_norm": 3.6155200004577637, |
| "learning_rate": 1e-06, |
| "loss": 0.8182, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.04859395222967408, |
| "grad_norm": 5.879213333129883, |
| "learning_rate": 1e-06, |
| "loss": 0.8166, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.04871161313095658, |
| "grad_norm": 6.984928131103516, |
| "learning_rate": 1e-06, |
| "loss": 0.8011, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.048829274032239085, |
| "grad_norm": 8.660188674926758, |
| "learning_rate": 1e-06, |
| "loss": 0.807, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.04894693493352159, |
| "grad_norm": 5.219287395477295, |
| "learning_rate": 1e-06, |
| "loss": 0.7783, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.049064595834804096, |
| "grad_norm": 3.9182770252227783, |
| "learning_rate": 1e-06, |
| "loss": 0.7483, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.049182256736086595, |
| "grad_norm": 5.254255294799805, |
| "learning_rate": 1e-06, |
| "loss": 0.819, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.0492999176373691, |
| "grad_norm": 5.512103080749512, |
| "learning_rate": 1e-06, |
| "loss": 1.0628, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.049417578538651606, |
| "grad_norm": 5.233940601348877, |
| "learning_rate": 1e-06, |
| "loss": 0.8147, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.04953523943993411, |
| "grad_norm": 4.2004547119140625, |
| "learning_rate": 1e-06, |
| "loss": 0.8236, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.04965290034121661, |
| "grad_norm": 3.2189583778381348, |
| "learning_rate": 1e-06, |
| "loss": 0.8067, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.049770561242499116, |
| "grad_norm": 3.050485610961914, |
| "learning_rate": 1e-06, |
| "loss": 0.875, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.04988822214378162, |
| "grad_norm": 5.160003662109375, |
| "learning_rate": 1e-06, |
| "loss": 0.8419, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.05000588304506413, |
| "grad_norm": 7.041035175323486, |
| "learning_rate": 1e-06, |
| "loss": 0.8879, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.050123543946346626, |
| "grad_norm": 4.338962078094482, |
| "learning_rate": 1e-06, |
| "loss": 0.8643, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.05024120484762913, |
| "grad_norm": 5.213710308074951, |
| "learning_rate": 1e-06, |
| "loss": 0.7715, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.05035886574891164, |
| "grad_norm": 2.2304441928863525, |
| "learning_rate": 1e-06, |
| "loss": 0.8655, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.05047652665019414, |
| "grad_norm": 3.0590453147888184, |
| "learning_rate": 1e-06, |
| "loss": 0.9235, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.05059418755147664, |
| "grad_norm": 5.010176181793213, |
| "learning_rate": 1e-06, |
| "loss": 0.8031, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.050711848452759146, |
| "grad_norm": 2.6110637187957764, |
| "learning_rate": 1e-06, |
| "loss": 0.8778, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.05082950935404165, |
| "grad_norm": 3.213735818862915, |
| "learning_rate": 1e-06, |
| "loss": 0.7609, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.05094717025532416, |
| "grad_norm": 3.956066131591797, |
| "learning_rate": 1e-06, |
| "loss": 0.7591, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.051064831156606656, |
| "grad_norm": 21.855979919433594, |
| "learning_rate": 1e-06, |
| "loss": 0.8767, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.05118249205788916, |
| "grad_norm": 9.275443077087402, |
| "learning_rate": 1e-06, |
| "loss": 0.7895, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.05130015295917167, |
| "grad_norm": 3.2387189865112305, |
| "learning_rate": 1e-06, |
| "loss": 0.8595, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.05141781386045417, |
| "grad_norm": 2.4975271224975586, |
| "learning_rate": 1e-06, |
| "loss": 0.773, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.05153547476173667, |
| "grad_norm": 7.902890205383301, |
| "learning_rate": 1e-06, |
| "loss": 0.7657, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.05165313566301918, |
| "grad_norm": 3.4846811294555664, |
| "learning_rate": 1e-06, |
| "loss": 0.9463, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.05177079656430168, |
| "grad_norm": 12.962797164916992, |
| "learning_rate": 1e-06, |
| "loss": 0.7384, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05188845746558419, |
| "grad_norm": 2.69588041305542, |
| "learning_rate": 1e-06, |
| "loss": 0.7672, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.05200611836686669, |
| "grad_norm": 2.473055839538574, |
| "learning_rate": 1e-06, |
| "loss": 0.8804, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.05212377926814919, |
| "grad_norm": 4.784314155578613, |
| "learning_rate": 1e-06, |
| "loss": 0.7457, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.0522414401694317, |
| "grad_norm": 3.915424346923828, |
| "learning_rate": 1e-06, |
| "loss": 0.8125, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.052359101070714203, |
| "grad_norm": 3.773595094680786, |
| "learning_rate": 1e-06, |
| "loss": 0.8424, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.0524767619719967, |
| "grad_norm": 4.634319305419922, |
| "learning_rate": 1e-06, |
| "loss": 0.7904, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.05259442287327921, |
| "grad_norm": 4.115906715393066, |
| "learning_rate": 1e-06, |
| "loss": 0.7609, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.05271208377456171, |
| "grad_norm": 8.289746284484863, |
| "learning_rate": 1e-06, |
| "loss": 0.8442, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.05282974467584422, |
| "grad_norm": 21.677457809448242, |
| "learning_rate": 1e-06, |
| "loss": 0.8504, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.05294740557712672, |
| "grad_norm": 4.526599884033203, |
| "learning_rate": 1e-06, |
| "loss": 0.8961, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.05306506647840922, |
| "grad_norm": 6.855340003967285, |
| "learning_rate": 1e-06, |
| "loss": 0.8506, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.05318272737969173, |
| "grad_norm": 4.277021884918213, |
| "learning_rate": 1e-06, |
| "loss": 0.7808, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.053300388280974234, |
| "grad_norm": 8.101177215576172, |
| "learning_rate": 1e-06, |
| "loss": 0.7786, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.05341804918225673, |
| "grad_norm": 2.057940721511841, |
| "learning_rate": 1e-06, |
| "loss": 0.8495, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.05353571008353924, |
| "grad_norm": 4.969939708709717, |
| "learning_rate": 1e-06, |
| "loss": 0.7586, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.053653370984821744, |
| "grad_norm": 2.957061290740967, |
| "learning_rate": 1e-06, |
| "loss": 0.8525, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.05377103188610425, |
| "grad_norm": 8.066761016845703, |
| "learning_rate": 1e-06, |
| "loss": 0.7433, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.05388869278738675, |
| "grad_norm": 4.397155284881592, |
| "learning_rate": 1e-06, |
| "loss": 0.7453, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.054006353688669254, |
| "grad_norm": 3.529214382171631, |
| "learning_rate": 1e-06, |
| "loss": 0.9033, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.05412401458995176, |
| "grad_norm": 5.184650421142578, |
| "learning_rate": 1e-06, |
| "loss": 0.9159, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.054241675491234265, |
| "grad_norm": 3.8979060649871826, |
| "learning_rate": 1e-06, |
| "loss": 0.9171, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.05435933639251676, |
| "grad_norm": 3.0818674564361572, |
| "learning_rate": 1e-06, |
| "loss": 0.8038, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.05447699729379927, |
| "grad_norm": 4.317335605621338, |
| "learning_rate": 1e-06, |
| "loss": 0.7565, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.054594658195081774, |
| "grad_norm": 3.5969228744506836, |
| "learning_rate": 1e-06, |
| "loss": 0.7188, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.05471231909636428, |
| "grad_norm": 4.5173115730285645, |
| "learning_rate": 1e-06, |
| "loss": 0.8674, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.05482997999764678, |
| "grad_norm": 3.263984441757202, |
| "learning_rate": 1e-06, |
| "loss": 0.8585, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.054947640898929284, |
| "grad_norm": 3.5642848014831543, |
| "learning_rate": 1e-06, |
| "loss": 0.8787, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.05506530180021179, |
| "grad_norm": 5.6151347160339355, |
| "learning_rate": 1e-06, |
| "loss": 0.7276, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.055182962701494295, |
| "grad_norm": 3.163313150405884, |
| "learning_rate": 1e-06, |
| "loss": 0.7931, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.055300623602776794, |
| "grad_norm": 2.878931999206543, |
| "learning_rate": 1e-06, |
| "loss": 0.7667, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0554182845040593, |
| "grad_norm": 3.318117618560791, |
| "learning_rate": 1e-06, |
| "loss": 0.7391, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.055535945405341805, |
| "grad_norm": 3.127615213394165, |
| "learning_rate": 1e-06, |
| "loss": 0.9311, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.05565360630662431, |
| "grad_norm": 2.90480637550354, |
| "learning_rate": 1e-06, |
| "loss": 0.9328, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.05577126720790681, |
| "grad_norm": 3.7865421772003174, |
| "learning_rate": 1e-06, |
| "loss": 0.745, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.055888928109189315, |
| "grad_norm": 3.2640819549560547, |
| "learning_rate": 1e-06, |
| "loss": 0.9306, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.05600658901047182, |
| "grad_norm": 3.227083206176758, |
| "learning_rate": 1e-06, |
| "loss": 0.8744, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.056124249911754326, |
| "grad_norm": 3.939415454864502, |
| "learning_rate": 1e-06, |
| "loss": 0.8885, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.056241910813036824, |
| "grad_norm": 2.41015887260437, |
| "learning_rate": 1e-06, |
| "loss": 0.8947, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.05635957171431933, |
| "grad_norm": 4.985194206237793, |
| "learning_rate": 1e-06, |
| "loss": 0.8153, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.056477232615601836, |
| "grad_norm": 3.3147754669189453, |
| "learning_rate": 1e-06, |
| "loss": 0.8851, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.05659489351688434, |
| "grad_norm": 6.035532474517822, |
| "learning_rate": 1e-06, |
| "loss": 0.8393, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.05671255441816684, |
| "grad_norm": 5.8169403076171875, |
| "learning_rate": 1e-06, |
| "loss": 0.8692, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.056830215319449345, |
| "grad_norm": 3.139738082885742, |
| "learning_rate": 1e-06, |
| "loss": 0.8567, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.05694787622073185, |
| "grad_norm": 6.8116021156311035, |
| "learning_rate": 1e-06, |
| "loss": 0.8185, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.057065537122014356, |
| "grad_norm": 4.787223815917969, |
| "learning_rate": 1e-06, |
| "loss": 0.726, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.057183198023296855, |
| "grad_norm": 2.7163264751434326, |
| "learning_rate": 1e-06, |
| "loss": 0.8637, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.05730085892457936, |
| "grad_norm": 4.378730773925781, |
| "learning_rate": 1e-06, |
| "loss": 0.9133, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.057418519825861866, |
| "grad_norm": 4.203522682189941, |
| "learning_rate": 1e-06, |
| "loss": 0.8322, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.05753618072714437, |
| "grad_norm": 6.0501837730407715, |
| "learning_rate": 1e-06, |
| "loss": 0.9354, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.05765384162842687, |
| "grad_norm": 4.329912185668945, |
| "learning_rate": 1e-06, |
| "loss": 0.6985, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.057771502529709376, |
| "grad_norm": 16.60413360595703, |
| "learning_rate": 1e-06, |
| "loss": 0.7859, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.05788916343099188, |
| "grad_norm": 4.150776386260986, |
| "learning_rate": 1e-06, |
| "loss": 0.8087, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.05800682433227439, |
| "grad_norm": 4.654838562011719, |
| "learning_rate": 1e-06, |
| "loss": 0.8132, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.058124485233556886, |
| "grad_norm": 6.413145542144775, |
| "learning_rate": 1e-06, |
| "loss": 0.7564, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.05824214613483939, |
| "grad_norm": 4.651871204376221, |
| "learning_rate": 1e-06, |
| "loss": 0.8122, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.0583598070361219, |
| "grad_norm": 4.54334020614624, |
| "learning_rate": 1e-06, |
| "loss": 0.7293, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.0584774679374044, |
| "grad_norm": 2.9437007904052734, |
| "learning_rate": 1e-06, |
| "loss": 0.8403, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.0585951288386869, |
| "grad_norm": 2.51481294631958, |
| "learning_rate": 1e-06, |
| "loss": 0.7564, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.05871278973996941, |
| "grad_norm": 3.907083749771118, |
| "learning_rate": 1e-06, |
| "loss": 0.8362, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.05883045064125191, |
| "grad_norm": 2.8982291221618652, |
| "learning_rate": 1e-06, |
| "loss": 0.7787, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05894811154253442, |
| "grad_norm": 4.202147960662842, |
| "learning_rate": 1e-06, |
| "loss": 0.7667, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.059065772443816916, |
| "grad_norm": 2.803162097930908, |
| "learning_rate": 1e-06, |
| "loss": 0.7728, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.05918343334509942, |
| "grad_norm": 2.6605453491210938, |
| "learning_rate": 1e-06, |
| "loss": 0.8435, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.05930109424638193, |
| "grad_norm": 3.9412362575531006, |
| "learning_rate": 1e-06, |
| "loss": 1.0537, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.05941875514766443, |
| "grad_norm": 4.297357082366943, |
| "learning_rate": 1e-06, |
| "loss": 0.7846, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.05953641604894693, |
| "grad_norm": 3.5739941596984863, |
| "learning_rate": 1e-06, |
| "loss": 0.9041, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.05965407695022944, |
| "grad_norm": 4.355228900909424, |
| "learning_rate": 1e-06, |
| "loss": 0.7947, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.05977173785151194, |
| "grad_norm": 3.83013916015625, |
| "learning_rate": 1e-06, |
| "loss": 0.8155, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.05988939875279445, |
| "grad_norm": 4.5131049156188965, |
| "learning_rate": 1e-06, |
| "loss": 0.8494, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.06000705965407695, |
| "grad_norm": 2.556640148162842, |
| "learning_rate": 1e-06, |
| "loss": 0.8655, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.06012472055535945, |
| "grad_norm": 4.710814952850342, |
| "learning_rate": 1e-06, |
| "loss": 0.8462, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.06024238145664196, |
| "grad_norm": 2.3375051021575928, |
| "learning_rate": 1e-06, |
| "loss": 0.8662, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.060360042357924464, |
| "grad_norm": 6.374229431152344, |
| "learning_rate": 1e-06, |
| "loss": 0.8735, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.06047770325920696, |
| "grad_norm": 4.315854072570801, |
| "learning_rate": 1e-06, |
| "loss": 0.7404, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.06059536416048947, |
| "grad_norm": 7.783134937286377, |
| "learning_rate": 1e-06, |
| "loss": 0.79, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.06071302506177197, |
| "grad_norm": 2.5863089561462402, |
| "learning_rate": 1e-06, |
| "loss": 0.9462, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.06083068596305448, |
| "grad_norm": 8.600770950317383, |
| "learning_rate": 1e-06, |
| "loss": 0.8503, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.06094834686433698, |
| "grad_norm": 6.381835460662842, |
| "learning_rate": 1e-06, |
| "loss": 0.8524, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.06106600776561948, |
| "grad_norm": 9.072025299072266, |
| "learning_rate": 1e-06, |
| "loss": 0.8341, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.06118366866690199, |
| "grad_norm": 4.649893283843994, |
| "learning_rate": 1e-06, |
| "loss": 0.8017, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.061301329568184494, |
| "grad_norm": 3.5497517585754395, |
| "learning_rate": 1e-06, |
| "loss": 0.822, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.06141899046946699, |
| "grad_norm": 6.476340293884277, |
| "learning_rate": 1e-06, |
| "loss": 0.7008, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.0615366513707495, |
| "grad_norm": 4.787139892578125, |
| "learning_rate": 1e-06, |
| "loss": 0.682, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.061654312272032004, |
| "grad_norm": 3.200488328933716, |
| "learning_rate": 1e-06, |
| "loss": 0.8403, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.06177197317331451, |
| "grad_norm": 4.321343898773193, |
| "learning_rate": 1e-06, |
| "loss": 0.713, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.06188963407459701, |
| "grad_norm": 3.959671974182129, |
| "learning_rate": 1e-06, |
| "loss": 0.7889, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.062007294975879514, |
| "grad_norm": 2.6865928173065186, |
| "learning_rate": 1e-06, |
| "loss": 0.7698, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.06212495587716202, |
| "grad_norm": 4.1267170906066895, |
| "learning_rate": 1e-06, |
| "loss": 0.9351, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.062242616778444525, |
| "grad_norm": 3.508446216583252, |
| "learning_rate": 1e-06, |
| "loss": 0.7834, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.06236027767972702, |
| "grad_norm": 11.811966896057129, |
| "learning_rate": 1e-06, |
| "loss": 0.6957, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.06247793858100953, |
| "grad_norm": 3.9337544441223145, |
| "learning_rate": 1e-06, |
| "loss": 0.9831, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.06259559948229203, |
| "grad_norm": 5.258102893829346, |
| "learning_rate": 1e-06, |
| "loss": 0.8131, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.06271326038357454, |
| "grad_norm": 3.2443442344665527, |
| "learning_rate": 1e-06, |
| "loss": 0.8168, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.06283092128485704, |
| "grad_norm": 7.886223316192627, |
| "learning_rate": 1e-06, |
| "loss": 0.9043, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.06294858218613955, |
| "grad_norm": 2.490288257598877, |
| "learning_rate": 1e-06, |
| "loss": 0.8503, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.06306624308742205, |
| "grad_norm": 9.365945816040039, |
| "learning_rate": 1e-06, |
| "loss": 0.8139, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.06318390398870455, |
| "grad_norm": 3.3306145668029785, |
| "learning_rate": 1e-06, |
| "loss": 0.8527, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.06330156488998706, |
| "grad_norm": 4.600078105926514, |
| "learning_rate": 1e-06, |
| "loss": 0.7503, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.06341922579126956, |
| "grad_norm": 2.8406596183776855, |
| "learning_rate": 1e-06, |
| "loss": 0.7695, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.06353688669255206, |
| "grad_norm": 10.398956298828125, |
| "learning_rate": 1e-06, |
| "loss": 0.7327, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.06365454759383457, |
| "grad_norm": 6.3080735206604, |
| "learning_rate": 1e-06, |
| "loss": 0.7773, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.06377220849511707, |
| "grad_norm": 4.205158710479736, |
| "learning_rate": 1e-06, |
| "loss": 0.8143, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.06388986939639958, |
| "grad_norm": 4.888593673706055, |
| "learning_rate": 1e-06, |
| "loss": 0.795, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.06400753029768208, |
| "grad_norm": 3.363938570022583, |
| "learning_rate": 1e-06, |
| "loss": 0.7612, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.06412519119896458, |
| "grad_norm": 3.169189691543579, |
| "learning_rate": 1e-06, |
| "loss": 0.8616, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.06424285210024709, |
| "grad_norm": 4.985038757324219, |
| "learning_rate": 1e-06, |
| "loss": 0.8326, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.06436051300152959, |
| "grad_norm": 4.971280574798584, |
| "learning_rate": 1e-06, |
| "loss": 0.806, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.06447817390281209, |
| "grad_norm": 5.460928440093994, |
| "learning_rate": 1e-06, |
| "loss": 0.7662, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.0645958348040946, |
| "grad_norm": 4.207946300506592, |
| "learning_rate": 1e-06, |
| "loss": 0.6787, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.0647134957053771, |
| "grad_norm": 2.679123640060425, |
| "learning_rate": 1e-06, |
| "loss": 0.7729, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.06483115660665961, |
| "grad_norm": 3.993603467941284, |
| "learning_rate": 1e-06, |
| "loss": 0.8018, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.06494881750794211, |
| "grad_norm": 4.020260810852051, |
| "learning_rate": 1e-06, |
| "loss": 0.7647, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.06506647840922461, |
| "grad_norm": 2.9653048515319824, |
| "learning_rate": 1e-06, |
| "loss": 0.7791, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.06518413931050712, |
| "grad_norm": 2.8370585441589355, |
| "learning_rate": 1e-06, |
| "loss": 0.773, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.06530180021178962, |
| "grad_norm": 5.039313316345215, |
| "learning_rate": 1e-06, |
| "loss": 0.8538, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.06541946111307212, |
| "grad_norm": 4.504653453826904, |
| "learning_rate": 1e-06, |
| "loss": 0.8726, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.06553712201435463, |
| "grad_norm": 3.786172866821289, |
| "learning_rate": 1e-06, |
| "loss": 0.7502, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.06565478291563713, |
| "grad_norm": 4.442392349243164, |
| "learning_rate": 1e-06, |
| "loss": 0.8309, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.06577244381691964, |
| "grad_norm": 4.390598773956299, |
| "learning_rate": 1e-06, |
| "loss": 0.8857, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.06589010471820214, |
| "grad_norm": 2.5926570892333984, |
| "learning_rate": 1e-06, |
| "loss": 0.7914, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.06600776561948464, |
| "grad_norm": 8.310322761535645, |
| "learning_rate": 1e-06, |
| "loss": 0.7625, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.06612542652076715, |
| "grad_norm": 12.237494468688965, |
| "learning_rate": 1e-06, |
| "loss": 0.7558, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.06624308742204965, |
| "grad_norm": 5.965966701507568, |
| "learning_rate": 1e-06, |
| "loss": 0.656, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.06636074832333215, |
| "grad_norm": 6.457314491271973, |
| "learning_rate": 1e-06, |
| "loss": 0.8754, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.06647840922461466, |
| "grad_norm": 2.9990360736846924, |
| "learning_rate": 1e-06, |
| "loss": 0.6746, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.06659607012589716, |
| "grad_norm": 6.220990180969238, |
| "learning_rate": 1e-06, |
| "loss": 0.7538, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.06671373102717967, |
| "grad_norm": 13.471657752990723, |
| "learning_rate": 1e-06, |
| "loss": 0.8042, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.06683139192846217, |
| "grad_norm": 6.996799945831299, |
| "learning_rate": 1e-06, |
| "loss": 0.8182, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.06694905282974467, |
| "grad_norm": 7.256328105926514, |
| "learning_rate": 1e-06, |
| "loss": 0.8296, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.06706671373102718, |
| "grad_norm": 6.098526477813721, |
| "learning_rate": 1e-06, |
| "loss": 0.7973, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.06718437463230968, |
| "grad_norm": 9.667437553405762, |
| "learning_rate": 1e-06, |
| "loss": 0.7141, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.06730203553359218, |
| "grad_norm": 4.498554229736328, |
| "learning_rate": 1e-06, |
| "loss": 0.9565, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.06741969643487469, |
| "grad_norm": 5.443784713745117, |
| "learning_rate": 1e-06, |
| "loss": 0.8814, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.06753735733615719, |
| "grad_norm": 4.860910415649414, |
| "learning_rate": 1e-06, |
| "loss": 0.6508, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.0676550182374397, |
| "grad_norm": 4.808187961578369, |
| "learning_rate": 1e-06, |
| "loss": 0.8269, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.0677726791387222, |
| "grad_norm": 4.875802993774414, |
| "learning_rate": 1e-06, |
| "loss": 0.7894, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.0678903400400047, |
| "grad_norm": 4.116751670837402, |
| "learning_rate": 1e-06, |
| "loss": 0.8942, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.06800800094128721, |
| "grad_norm": 3.51214599609375, |
| "learning_rate": 1e-06, |
| "loss": 0.6998, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.06812566184256971, |
| "grad_norm": 4.270370006561279, |
| "learning_rate": 1e-06, |
| "loss": 0.8708, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.06824332274385221, |
| "grad_norm": 3.15966796875, |
| "learning_rate": 1e-06, |
| "loss": 0.7595, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.06836098364513472, |
| "grad_norm": 4.910851955413818, |
| "learning_rate": 1e-06, |
| "loss": 0.6712, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.06847864454641722, |
| "grad_norm": 6.7051849365234375, |
| "learning_rate": 1e-06, |
| "loss": 0.8669, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.06859630544769973, |
| "grad_norm": 4.1636552810668945, |
| "learning_rate": 1e-06, |
| "loss": 0.8633, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.06871396634898223, |
| "grad_norm": 3.5476083755493164, |
| "learning_rate": 1e-06, |
| "loss": 0.8042, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.06883162725026473, |
| "grad_norm": 2.891711711883545, |
| "learning_rate": 1e-06, |
| "loss": 0.965, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.06894928815154724, |
| "grad_norm": 9.514786720275879, |
| "learning_rate": 1e-06, |
| "loss": 0.8521, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.06906694905282974, |
| "grad_norm": 4.414220809936523, |
| "learning_rate": 1e-06, |
| "loss": 0.8345, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.06918460995411224, |
| "grad_norm": 3.0958492755889893, |
| "learning_rate": 1e-06, |
| "loss": 0.7658, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.06930227085539475, |
| "grad_norm": 3.3460915088653564, |
| "learning_rate": 1e-06, |
| "loss": 0.8396, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.06941993175667725, |
| "grad_norm": 5.934871196746826, |
| "learning_rate": 1e-06, |
| "loss": 0.7246, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.06953759265795977, |
| "grad_norm": 5.69331169128418, |
| "learning_rate": 1e-06, |
| "loss": 0.7282, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.06965525355924226, |
| "grad_norm": 4.256507396697998, |
| "learning_rate": 1e-06, |
| "loss": 0.7915, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.06977291446052476, |
| "grad_norm": 5.126326084136963, |
| "learning_rate": 1e-06, |
| "loss": 0.7192, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.06989057536180728, |
| "grad_norm": 4.735490322113037, |
| "learning_rate": 1e-06, |
| "loss": 0.7529, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.07000823626308977, |
| "grad_norm": 3.5951437950134277, |
| "learning_rate": 1e-06, |
| "loss": 0.8213, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.07012589716437227, |
| "grad_norm": 6.8590168952941895, |
| "learning_rate": 1e-06, |
| "loss": 0.7228, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.07024355806565478, |
| "grad_norm": 5.810555934906006, |
| "learning_rate": 1e-06, |
| "loss": 0.747, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.07036121896693728, |
| "grad_norm": 4.012254238128662, |
| "learning_rate": 1e-06, |
| "loss": 0.7881, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.0704788798682198, |
| "grad_norm": 6.70487117767334, |
| "learning_rate": 1e-06, |
| "loss": 0.8001, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.0705965407695023, |
| "grad_norm": 2.8604013919830322, |
| "learning_rate": 1e-06, |
| "loss": 0.8036, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0707142016707848, |
| "grad_norm": 3.871709108352661, |
| "learning_rate": 1e-06, |
| "loss": 0.6838, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.0708318625720673, |
| "grad_norm": 3.1215546131134033, |
| "learning_rate": 1e-06, |
| "loss": 0.802, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.0709495234733498, |
| "grad_norm": 2.95503306388855, |
| "learning_rate": 1e-06, |
| "loss": 0.7952, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.0710671843746323, |
| "grad_norm": 5.121756076812744, |
| "learning_rate": 1e-06, |
| "loss": 0.7197, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.07118484527591482, |
| "grad_norm": 3.5067391395568848, |
| "learning_rate": 1e-06, |
| "loss": 0.7953, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.07130250617719731, |
| "grad_norm": 2.0713229179382324, |
| "learning_rate": 1e-06, |
| "loss": 0.7853, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.07142016707847983, |
| "grad_norm": 3.8708510398864746, |
| "learning_rate": 1e-06, |
| "loss": 0.809, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.07153782797976233, |
| "grad_norm": 3.114241123199463, |
| "learning_rate": 1e-06, |
| "loss": 0.7183, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.07165548888104482, |
| "grad_norm": 2.5814106464385986, |
| "learning_rate": 1e-06, |
| "loss": 0.7672, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.07177314978232734, |
| "grad_norm": 5.265275001525879, |
| "learning_rate": 1e-06, |
| "loss": 0.7487, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.07189081068360983, |
| "grad_norm": 5.663961410522461, |
| "learning_rate": 1e-06, |
| "loss": 0.7835, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.07200847158489233, |
| "grad_norm": 11.934410095214844, |
| "learning_rate": 1e-06, |
| "loss": 0.8255, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.07212613248617485, |
| "grad_norm": 5.278149604797363, |
| "learning_rate": 1e-06, |
| "loss": 0.8952, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.07224379338745734, |
| "grad_norm": 5.30377721786499, |
| "learning_rate": 1e-06, |
| "loss": 0.8394, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.07236145428873986, |
| "grad_norm": 3.567551851272583, |
| "learning_rate": 1e-06, |
| "loss": 0.8797, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.07247911519002236, |
| "grad_norm": 5.296813011169434, |
| "learning_rate": 1e-06, |
| "loss": 0.8361, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.07259677609130485, |
| "grad_norm": 4.579360008239746, |
| "learning_rate": 1e-06, |
| "loss": 0.7945, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.07271443699258737, |
| "grad_norm": 4.319943904876709, |
| "learning_rate": 1e-06, |
| "loss": 0.7434, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.07283209789386987, |
| "grad_norm": 6.2155914306640625, |
| "learning_rate": 1e-06, |
| "loss": 0.7999, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.07294975879515236, |
| "grad_norm": 4.133585453033447, |
| "learning_rate": 1e-06, |
| "loss": 0.8252, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.07306741969643488, |
| "grad_norm": 12.52299976348877, |
| "learning_rate": 1e-06, |
| "loss": 0.6713, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.07318508059771738, |
| "grad_norm": 3.280254602432251, |
| "learning_rate": 1e-06, |
| "loss": 0.8483, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.07330274149899989, |
| "grad_norm": 4.4010138511657715, |
| "learning_rate": 1e-06, |
| "loss": 0.8613, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.07342040240028239, |
| "grad_norm": 3.92676043510437, |
| "learning_rate": 1e-06, |
| "loss": 0.8764, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.07353806330156488, |
| "grad_norm": 6.646303176879883, |
| "learning_rate": 1e-06, |
| "loss": 0.6661, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.0736557242028474, |
| "grad_norm": 4.366452217102051, |
| "learning_rate": 1e-06, |
| "loss": 0.8682, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.0737733851041299, |
| "grad_norm": 4.757740497589111, |
| "learning_rate": 1e-06, |
| "loss": 0.7562, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0738910460054124, |
| "grad_norm": 4.1491241455078125, |
| "learning_rate": 1e-06, |
| "loss": 0.7249, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.07400870690669491, |
| "grad_norm": 3.2377617359161377, |
| "learning_rate": 1e-06, |
| "loss": 0.8469, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.0741263678079774, |
| "grad_norm": 3.7027909755706787, |
| "learning_rate": 1e-06, |
| "loss": 0.7918, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.07424402870925992, |
| "grad_norm": 4.722092628479004, |
| "learning_rate": 1e-06, |
| "loss": 0.8434, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.07436168961054242, |
| "grad_norm": 4.72484827041626, |
| "learning_rate": 1e-06, |
| "loss": 0.8254, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.07447935051182492, |
| "grad_norm": 4.173333644866943, |
| "learning_rate": 1e-06, |
| "loss": 0.7486, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.07459701141310743, |
| "grad_norm": 3.259687900543213, |
| "learning_rate": 1e-06, |
| "loss": 0.804, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.07471467231438993, |
| "grad_norm": 4.720432281494141, |
| "learning_rate": 1e-06, |
| "loss": 0.8884, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.07483233321567243, |
| "grad_norm": 2.8184030055999756, |
| "learning_rate": 1e-06, |
| "loss": 0.8045, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.07494999411695494, |
| "grad_norm": 2.802110433578491, |
| "learning_rate": 1e-06, |
| "loss": 0.8666, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.07506765501823744, |
| "grad_norm": 2.865649461746216, |
| "learning_rate": 1e-06, |
| "loss": 0.6902, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.07518531591951995, |
| "grad_norm": 3.1287262439727783, |
| "learning_rate": 1e-06, |
| "loss": 0.8662, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.07530297682080245, |
| "grad_norm": 5.535600662231445, |
| "learning_rate": 1e-06, |
| "loss": 0.7908, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.07542063772208495, |
| "grad_norm": 3.6541616916656494, |
| "learning_rate": 1e-06, |
| "loss": 0.843, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.07553829862336746, |
| "grad_norm": 3.577301502227783, |
| "learning_rate": 1e-06, |
| "loss": 0.7362, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.07565595952464996, |
| "grad_norm": 5.592081546783447, |
| "learning_rate": 1e-06, |
| "loss": 0.8034, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.07577362042593246, |
| "grad_norm": 3.633941173553467, |
| "learning_rate": 1e-06, |
| "loss": 0.8286, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.07589128132721497, |
| "grad_norm": 3.2601206302642822, |
| "learning_rate": 1e-06, |
| "loss": 0.7762, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.07600894222849747, |
| "grad_norm": 4.561715602874756, |
| "learning_rate": 1e-06, |
| "loss": 0.8529, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.07612660312977998, |
| "grad_norm": 9.033065795898438, |
| "learning_rate": 1e-06, |
| "loss": 0.8811, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.07624426403106248, |
| "grad_norm": 3.58722186088562, |
| "learning_rate": 1e-06, |
| "loss": 0.7805, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.07636192493234498, |
| "grad_norm": 2.476853370666504, |
| "learning_rate": 1e-06, |
| "loss": 0.7164, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.07647958583362749, |
| "grad_norm": 4.5690693855285645, |
| "learning_rate": 1e-06, |
| "loss": 0.6962, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.07659724673490999, |
| "grad_norm": 3.0442702770233154, |
| "learning_rate": 1e-06, |
| "loss": 0.7169, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.07671490763619249, |
| "grad_norm": 4.649383544921875, |
| "learning_rate": 1e-06, |
| "loss": 0.8122, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.076832568537475, |
| "grad_norm": 4.151635646820068, |
| "learning_rate": 1e-06, |
| "loss": 0.7678, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.0769502294387575, |
| "grad_norm": 5.340624809265137, |
| "learning_rate": 1e-06, |
| "loss": 0.7698, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.07706789034004001, |
| "grad_norm": 4.39862585067749, |
| "learning_rate": 1e-06, |
| "loss": 0.8883, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.07718555124132251, |
| "grad_norm": 7.796479225158691, |
| "learning_rate": 1e-06, |
| "loss": 0.7936, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.07730321214260501, |
| "grad_norm": 3.842503786087036, |
| "learning_rate": 1e-06, |
| "loss": 0.8487, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.07742087304388752, |
| "grad_norm": 12.752018928527832, |
| "learning_rate": 1e-06, |
| "loss": 0.8161, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.07753853394517002, |
| "grad_norm": 2.713529586791992, |
| "learning_rate": 1e-06, |
| "loss": 0.9158, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.07765619484645252, |
| "grad_norm": 5.894014358520508, |
| "learning_rate": 1e-06, |
| "loss": 0.927, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.07777385574773503, |
| "grad_norm": 10.467254638671875, |
| "learning_rate": 1e-06, |
| "loss": 0.8401, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.07789151664901753, |
| "grad_norm": 2.005702257156372, |
| "learning_rate": 1e-06, |
| "loss": 0.9121, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.07800917755030004, |
| "grad_norm": 2.868652582168579, |
| "learning_rate": 1e-06, |
| "loss": 0.7022, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.07812683845158254, |
| "grad_norm": 4.538110256195068, |
| "learning_rate": 1e-06, |
| "loss": 0.7856, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.07824449935286504, |
| "grad_norm": 3.814251184463501, |
| "learning_rate": 1e-06, |
| "loss": 0.7137, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.07836216025414755, |
| "grad_norm": 3.6797707080841064, |
| "learning_rate": 1e-06, |
| "loss": 0.6952, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.07847982115543005, |
| "grad_norm": 4.604442119598389, |
| "learning_rate": 1e-06, |
| "loss": 0.6523, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.07859748205671255, |
| "grad_norm": 6.55330753326416, |
| "learning_rate": 1e-06, |
| "loss": 0.6899, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.07871514295799506, |
| "grad_norm": 9.301312446594238, |
| "learning_rate": 1e-06, |
| "loss": 0.8195, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.07883280385927756, |
| "grad_norm": 4.445116996765137, |
| "learning_rate": 1e-06, |
| "loss": 0.7644, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.07895046476056007, |
| "grad_norm": 3.1115548610687256, |
| "learning_rate": 1e-06, |
| "loss": 0.788, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.07906812566184257, |
| "grad_norm": 3.480494499206543, |
| "learning_rate": 1e-06, |
| "loss": 0.8327, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.07918578656312507, |
| "grad_norm": 3.3543670177459717, |
| "learning_rate": 1e-06, |
| "loss": 0.9078, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.07930344746440758, |
| "grad_norm": 3.7252585887908936, |
| "learning_rate": 1e-06, |
| "loss": 0.6805, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.07942110836569008, |
| "grad_norm": 3.1235790252685547, |
| "learning_rate": 1e-06, |
| "loss": 0.7886, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.07953876926697258, |
| "grad_norm": 2.5716028213500977, |
| "learning_rate": 1e-06, |
| "loss": 0.7839, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.07965643016825509, |
| "grad_norm": 5.055776119232178, |
| "learning_rate": 1e-06, |
| "loss": 0.7922, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.07977409106953759, |
| "grad_norm": 6.259873867034912, |
| "learning_rate": 1e-06, |
| "loss": 0.6882, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.0798917519708201, |
| "grad_norm": 6.074028968811035, |
| "learning_rate": 1e-06, |
| "loss": 0.8171, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.0800094128721026, |
| "grad_norm": 6.025778293609619, |
| "learning_rate": 1e-06, |
| "loss": 0.7963, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0801270737733851, |
| "grad_norm": 4.927762508392334, |
| "learning_rate": 1e-06, |
| "loss": 0.7133, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.08024473467466761, |
| "grad_norm": 9.872488021850586, |
| "learning_rate": 1e-06, |
| "loss": 0.8878, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.08036239557595011, |
| "grad_norm": 4.145512580871582, |
| "learning_rate": 1e-06, |
| "loss": 0.6282, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.08048005647723261, |
| "grad_norm": 4.067215442657471, |
| "learning_rate": 1e-06, |
| "loss": 0.8972, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.08059771737851512, |
| "grad_norm": 6.702483177185059, |
| "learning_rate": 1e-06, |
| "loss": 0.71, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.08071537827979762, |
| "grad_norm": 7.621339797973633, |
| "learning_rate": 1e-06, |
| "loss": 0.7837, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.08083303918108013, |
| "grad_norm": 2.6410186290740967, |
| "learning_rate": 1e-06, |
| "loss": 0.7883, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.08095070008236263, |
| "grad_norm": 5.255964756011963, |
| "learning_rate": 1e-06, |
| "loss": 0.8354, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.08106836098364513, |
| "grad_norm": 4.244592666625977, |
| "learning_rate": 1e-06, |
| "loss": 0.8109, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.08118602188492764, |
| "grad_norm": 2.6107852458953857, |
| "learning_rate": 1e-06, |
| "loss": 0.8591, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.08130368278621014, |
| "grad_norm": 4.650479316711426, |
| "learning_rate": 1e-06, |
| "loss": 0.6969, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.08142134368749264, |
| "grad_norm": 3.1627695560455322, |
| "learning_rate": 1e-06, |
| "loss": 0.8289, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.08153900458877515, |
| "grad_norm": 4.6385674476623535, |
| "learning_rate": 1e-06, |
| "loss": 0.7032, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.08165666549005765, |
| "grad_norm": 5.22135066986084, |
| "learning_rate": 1e-06, |
| "loss": 0.7547, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.08177432639134016, |
| "grad_norm": 4.280886173248291, |
| "learning_rate": 1e-06, |
| "loss": 0.8054, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.08189198729262266, |
| "grad_norm": 10.859444618225098, |
| "learning_rate": 1e-06, |
| "loss": 0.7395, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.08200964819390516, |
| "grad_norm": 3.9999287128448486, |
| "learning_rate": 1e-06, |
| "loss": 0.7837, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.08212730909518767, |
| "grad_norm": 7.206291198730469, |
| "learning_rate": 1e-06, |
| "loss": 0.764, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.08224496999647017, |
| "grad_norm": 5.005997180938721, |
| "learning_rate": 1e-06, |
| "loss": 0.923, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.08236263089775267, |
| "grad_norm": 4.134511470794678, |
| "learning_rate": 1e-06, |
| "loss": 0.815, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.08248029179903518, |
| "grad_norm": 4.48399543762207, |
| "learning_rate": 1e-06, |
| "loss": 0.8545, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.08259795270031768, |
| "grad_norm": 7.961074352264404, |
| "learning_rate": 1e-06, |
| "loss": 0.8463, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.0827156136016002, |
| "grad_norm": 3.7703304290771484, |
| "learning_rate": 1e-06, |
| "loss": 0.7455, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.08283327450288269, |
| "grad_norm": 4.462988376617432, |
| "learning_rate": 1e-06, |
| "loss": 0.8649, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.08295093540416519, |
| "grad_norm": 5.397121429443359, |
| "learning_rate": 1e-06, |
| "loss": 0.8086, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.0830685963054477, |
| "grad_norm": 3.6075279712677, |
| "learning_rate": 1e-06, |
| "loss": 0.7628, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.0831862572067302, |
| "grad_norm": 3.9430606365203857, |
| "learning_rate": 1e-06, |
| "loss": 0.8156, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.0833039181080127, |
| "grad_norm": 3.9803953170776367, |
| "learning_rate": 1e-06, |
| "loss": 0.7861, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.08342157900929521, |
| "grad_norm": 5.1148762702941895, |
| "learning_rate": 1e-06, |
| "loss": 0.8435, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.08353923991057771, |
| "grad_norm": 3.512322425842285, |
| "learning_rate": 1e-06, |
| "loss": 0.8721, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.08365690081186022, |
| "grad_norm": 3.4553308486938477, |
| "learning_rate": 1e-06, |
| "loss": 0.8139, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.08377456171314272, |
| "grad_norm": 5.630927085876465, |
| "learning_rate": 1e-06, |
| "loss": 0.8191, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.08389222261442522, |
| "grad_norm": 10.072120666503906, |
| "learning_rate": 1e-06, |
| "loss": 0.8147, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.08400988351570773, |
| "grad_norm": 3.564073324203491, |
| "learning_rate": 1e-06, |
| "loss": 0.8458, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.08412754441699023, |
| "grad_norm": 9.791050910949707, |
| "learning_rate": 1e-06, |
| "loss": 0.9003, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.08424520531827273, |
| "grad_norm": 11.41666316986084, |
| "learning_rate": 1e-06, |
| "loss": 0.8637, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.08436286621955524, |
| "grad_norm": 9.25336742401123, |
| "learning_rate": 1e-06, |
| "loss": 0.8096, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.08448052712083774, |
| "grad_norm": 3.4500648975372314, |
| "learning_rate": 1e-06, |
| "loss": 0.7901, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.08459818802212025, |
| "grad_norm": 3.696110725402832, |
| "learning_rate": 1e-06, |
| "loss": 0.6153, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.08471584892340275, |
| "grad_norm": 5.816553115844727, |
| "learning_rate": 1e-06, |
| "loss": 0.9023, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.08483350982468525, |
| "grad_norm": 5.944538116455078, |
| "learning_rate": 1e-06, |
| "loss": 0.7745, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.08495117072596776, |
| "grad_norm": 9.655933380126953, |
| "learning_rate": 1e-06, |
| "loss": 0.7756, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.08506883162725026, |
| "grad_norm": 4.010641574859619, |
| "learning_rate": 1e-06, |
| "loss": 0.7837, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.08518649252853276, |
| "grad_norm": 3.5823662281036377, |
| "learning_rate": 1e-06, |
| "loss": 0.7036, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.08530415342981527, |
| "grad_norm": 3.164689302444458, |
| "learning_rate": 1e-06, |
| "loss": 0.8682, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.08542181433109777, |
| "grad_norm": 3.7373437881469727, |
| "learning_rate": 1e-06, |
| "loss": 0.8604, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.08553947523238029, |
| "grad_norm": 3.8801088333129883, |
| "learning_rate": 1e-06, |
| "loss": 0.7022, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.08565713613366278, |
| "grad_norm": 3.8801088333129883, |
| "learning_rate": 1e-06, |
| "loss": 0.8586, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.08577479703494528, |
| "grad_norm": 4.299330234527588, |
| "learning_rate": 1e-06, |
| "loss": 0.7284, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.0858924579362278, |
| "grad_norm": 12.118149757385254, |
| "learning_rate": 1e-06, |
| "loss": 0.7516, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0860101188375103, |
| "grad_norm": 4.249277114868164, |
| "learning_rate": 1e-06, |
| "loss": 0.804, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.08612777973879279, |
| "grad_norm": 3.9434337615966797, |
| "learning_rate": 1e-06, |
| "loss": 0.7754, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.0862454406400753, |
| "grad_norm": 9.427238464355469, |
| "learning_rate": 1e-06, |
| "loss": 0.7556, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.0863631015413578, |
| "grad_norm": 7.601968288421631, |
| "learning_rate": 1e-06, |
| "loss": 0.9026, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.08648076244264032, |
| "grad_norm": 4.908003807067871, |
| "learning_rate": 1e-06, |
| "loss": 0.7717, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.08659842334392281, |
| "grad_norm": 5.9103522300720215, |
| "learning_rate": 1e-06, |
| "loss": 0.8005, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.08671608424520531, |
| "grad_norm": 3.8755075931549072, |
| "learning_rate": 1e-06, |
| "loss": 0.7784, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.08683374514648783, |
| "grad_norm": 5.542903423309326, |
| "learning_rate": 1e-06, |
| "loss": 0.7552, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.08695140604777032, |
| "grad_norm": 3.3967931270599365, |
| "learning_rate": 1e-06, |
| "loss": 0.738, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.08706906694905282, |
| "grad_norm": 2.3593356609344482, |
| "learning_rate": 1e-06, |
| "loss": 0.7664, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.08718672785033534, |
| "grad_norm": 3.30541729927063, |
| "learning_rate": 1e-06, |
| "loss": 0.798, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.08730438875161783, |
| "grad_norm": 7.929763317108154, |
| "learning_rate": 1e-06, |
| "loss": 0.7142, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.08742204965290035, |
| "grad_norm": 4.586026191711426, |
| "learning_rate": 1e-06, |
| "loss": 0.8118, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.08753971055418285, |
| "grad_norm": 3.0885305404663086, |
| "learning_rate": 1e-06, |
| "loss": 0.831, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.08765737145546534, |
| "grad_norm": 6.417020797729492, |
| "learning_rate": 1e-06, |
| "loss": 0.7054, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.08777503235674786, |
| "grad_norm": 8.70758056640625, |
| "learning_rate": 1e-06, |
| "loss": 0.8487, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.08789269325803035, |
| "grad_norm": 5.649046421051025, |
| "learning_rate": 1e-06, |
| "loss": 0.8484, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.08801035415931285, |
| "grad_norm": 2.783040761947632, |
| "learning_rate": 1e-06, |
| "loss": 0.731, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.08812801506059537, |
| "grad_norm": 3.5551412105560303, |
| "learning_rate": 1e-06, |
| "loss": 0.7613, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.08824567596187786, |
| "grad_norm": 3.094209671020508, |
| "learning_rate": 1e-06, |
| "loss": 0.8581, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.08836333686316038, |
| "grad_norm": 6.274815082550049, |
| "learning_rate": 1e-06, |
| "loss": 0.8412, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.08848099776444288, |
| "grad_norm": 5.962408542633057, |
| "learning_rate": 1e-06, |
| "loss": 0.8323, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.08859865866572537, |
| "grad_norm": 2.927711009979248, |
| "learning_rate": 1e-06, |
| "loss": 0.8187, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.08871631956700789, |
| "grad_norm": 7.680797576904297, |
| "learning_rate": 1e-06, |
| "loss": 0.7775, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.08883398046829039, |
| "grad_norm": 45.736412048339844, |
| "learning_rate": 1e-06, |
| "loss": 0.5749, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.08895164136957288, |
| "grad_norm": 5.9662017822265625, |
| "learning_rate": 1e-06, |
| "loss": 0.7663, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.0890693022708554, |
| "grad_norm": 3.3588130474090576, |
| "learning_rate": 1e-06, |
| "loss": 0.7206, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.0891869631721379, |
| "grad_norm": 4.659145832061768, |
| "learning_rate": 1e-06, |
| "loss": 0.8381, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.08930462407342041, |
| "grad_norm": 5.114917755126953, |
| "learning_rate": 1e-06, |
| "loss": 0.6961, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0894222849747029, |
| "grad_norm": 6.852463722229004, |
| "learning_rate": 1e-06, |
| "loss": 0.7245, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0895399458759854, |
| "grad_norm": 3.8735191822052, |
| "learning_rate": 1e-06, |
| "loss": 0.8026, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.08965760677726792, |
| "grad_norm": 3.054180860519409, |
| "learning_rate": 1e-06, |
| "loss": 0.903, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.08977526767855042, |
| "grad_norm": 18.586585998535156, |
| "learning_rate": 1e-06, |
| "loss": 0.7696, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.08989292857983291, |
| "grad_norm": 2.874756097793579, |
| "learning_rate": 1e-06, |
| "loss": 0.7986, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.09001058948111543, |
| "grad_norm": 1.945826530456543, |
| "learning_rate": 1e-06, |
| "loss": 0.841, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.09012825038239793, |
| "grad_norm": 3.9630308151245117, |
| "learning_rate": 1e-06, |
| "loss": 0.7335, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.09024591128368044, |
| "grad_norm": 5.930178165435791, |
| "learning_rate": 1e-06, |
| "loss": 0.7273, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.09036357218496294, |
| "grad_norm": 4.515182971954346, |
| "learning_rate": 1e-06, |
| "loss": 0.842, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.09048123308624544, |
| "grad_norm": 4.042099952697754, |
| "learning_rate": 1e-06, |
| "loss": 0.6113, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.09059889398752795, |
| "grad_norm": 3.2117252349853516, |
| "learning_rate": 1e-06, |
| "loss": 0.8613, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.09071655488881045, |
| "grad_norm": 3.9129526615142822, |
| "learning_rate": 1e-06, |
| "loss": 0.6426, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.09083421579009295, |
| "grad_norm": 3.7040510177612305, |
| "learning_rate": 1e-06, |
| "loss": 0.7795, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.09095187669137546, |
| "grad_norm": 5.53035306930542, |
| "learning_rate": 1e-06, |
| "loss": 0.6404, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.09106953759265796, |
| "grad_norm": 3.193643569946289, |
| "learning_rate": 1e-06, |
| "loss": 0.7391, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.09118719849394047, |
| "grad_norm": 2.597047805786133, |
| "learning_rate": 1e-06, |
| "loss": 0.9017, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.09130485939522297, |
| "grad_norm": 3.4000656604766846, |
| "learning_rate": 1e-06, |
| "loss": 0.7492, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.09142252029650547, |
| "grad_norm": 2.4344582557678223, |
| "learning_rate": 1e-06, |
| "loss": 0.7923, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.09154018119778798, |
| "grad_norm": 3.083674192428589, |
| "learning_rate": 1e-06, |
| "loss": 0.9369, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.09165784209907048, |
| "grad_norm": 5.288464069366455, |
| "learning_rate": 1e-06, |
| "loss": 0.68, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.09177550300035298, |
| "grad_norm": 3.390939474105835, |
| "learning_rate": 1e-06, |
| "loss": 0.7158, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.09189316390163549, |
| "grad_norm": 3.3622586727142334, |
| "learning_rate": 1e-06, |
| "loss": 0.8059, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.09201082480291799, |
| "grad_norm": 3.347780227661133, |
| "learning_rate": 1e-06, |
| "loss": 0.7351, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.0921284857042005, |
| "grad_norm": 4.869822978973389, |
| "learning_rate": 1e-06, |
| "loss": 0.705, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.092246146605483, |
| "grad_norm": 8.056663513183594, |
| "learning_rate": 1e-06, |
| "loss": 0.7346, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.0923638075067655, |
| "grad_norm": 4.730690002441406, |
| "learning_rate": 1e-06, |
| "loss": 0.8079, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.09248146840804801, |
| "grad_norm": 3.544837713241577, |
| "learning_rate": 1e-06, |
| "loss": 0.7663, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.09259912930933051, |
| "grad_norm": 4.1507649421691895, |
| "learning_rate": 1e-06, |
| "loss": 0.7674, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.092716790210613, |
| "grad_norm": 7.242065906524658, |
| "learning_rate": 1e-06, |
| "loss": 0.8021, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.09283445111189552, |
| "grad_norm": 6.251343250274658, |
| "learning_rate": 1e-06, |
| "loss": 0.7952, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.09295211201317802, |
| "grad_norm": 4.684685230255127, |
| "learning_rate": 1e-06, |
| "loss": 0.8428, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.09306977291446053, |
| "grad_norm": 3.2917494773864746, |
| "learning_rate": 1e-06, |
| "loss": 0.8165, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.09318743381574303, |
| "grad_norm": 3.4737491607666016, |
| "learning_rate": 1e-06, |
| "loss": 0.7606, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.09330509471702553, |
| "grad_norm": 2.5431602001190186, |
| "learning_rate": 1e-06, |
| "loss": 0.8489, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.09342275561830804, |
| "grad_norm": 4.048852443695068, |
| "learning_rate": 1e-06, |
| "loss": 0.7458, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.09354041651959054, |
| "grad_norm": 7.465090751647949, |
| "learning_rate": 1e-06, |
| "loss": 0.8441, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.09365807742087304, |
| "grad_norm": 5.336065769195557, |
| "learning_rate": 1e-06, |
| "loss": 0.7008, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.09377573832215555, |
| "grad_norm": 3.435615301132202, |
| "learning_rate": 1e-06, |
| "loss": 0.795, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.09389339922343805, |
| "grad_norm": 7.03810977935791, |
| "learning_rate": 1e-06, |
| "loss": 0.8487, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.09401106012472056, |
| "grad_norm": 3.2493505477905273, |
| "learning_rate": 1e-06, |
| "loss": 0.7925, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.09412872102600306, |
| "grad_norm": 4.32244348526001, |
| "learning_rate": 1e-06, |
| "loss": 0.9033, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09424638192728556, |
| "grad_norm": 6.156367301940918, |
| "learning_rate": 1e-06, |
| "loss": 0.8354, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.09436404282856807, |
| "grad_norm": 3.897897481918335, |
| "learning_rate": 1e-06, |
| "loss": 0.667, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.09448170372985057, |
| "grad_norm": 4.081802845001221, |
| "learning_rate": 1e-06, |
| "loss": 0.8003, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.09459936463113307, |
| "grad_norm": 2.2866339683532715, |
| "learning_rate": 1e-06, |
| "loss": 0.9098, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.09471702553241558, |
| "grad_norm": 10.533675193786621, |
| "learning_rate": 1e-06, |
| "loss": 0.6993, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.09483468643369808, |
| "grad_norm": 2.7982473373413086, |
| "learning_rate": 1e-06, |
| "loss": 0.7198, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.09495234733498059, |
| "grad_norm": 6.184414386749268, |
| "learning_rate": 1e-06, |
| "loss": 0.7414, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.09507000823626309, |
| "grad_norm": 7.496194839477539, |
| "learning_rate": 1e-06, |
| "loss": 0.6699, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.09518766913754559, |
| "grad_norm": 2.7355363368988037, |
| "learning_rate": 1e-06, |
| "loss": 0.7179, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.0953053300388281, |
| "grad_norm": 4.605178356170654, |
| "learning_rate": 1e-06, |
| "loss": 0.7431, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0954229909401106, |
| "grad_norm": 3.33636212348938, |
| "learning_rate": 1e-06, |
| "loss": 0.6955, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.0955406518413931, |
| "grad_norm": 6.929253578186035, |
| "learning_rate": 1e-06, |
| "loss": 0.7663, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.09565831274267561, |
| "grad_norm": 4.302491664886475, |
| "learning_rate": 1e-06, |
| "loss": 0.9389, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.09577597364395811, |
| "grad_norm": 4.514020919799805, |
| "learning_rate": 1e-06, |
| "loss": 0.778, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.09589363454524062, |
| "grad_norm": 3.7888290882110596, |
| "learning_rate": 1e-06, |
| "loss": 0.7926, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.09601129544652312, |
| "grad_norm": 4.159764766693115, |
| "learning_rate": 1e-06, |
| "loss": 0.6922, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.09612895634780562, |
| "grad_norm": 2.528047561645508, |
| "learning_rate": 1e-06, |
| "loss": 0.8371, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.09624661724908813, |
| "grad_norm": 5.680185794830322, |
| "learning_rate": 1e-06, |
| "loss": 0.6564, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.09636427815037063, |
| "grad_norm": 5.111583709716797, |
| "learning_rate": 1e-06, |
| "loss": 0.827, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.09648193905165313, |
| "grad_norm": 3.2532787322998047, |
| "learning_rate": 1e-06, |
| "loss": 0.8875, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.09659959995293564, |
| "grad_norm": 6.838262557983398, |
| "learning_rate": 1e-06, |
| "loss": 0.7568, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.09671726085421814, |
| "grad_norm": 5.076169013977051, |
| "learning_rate": 1e-06, |
| "loss": 0.8047, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.09683492175550065, |
| "grad_norm": 3.4474267959594727, |
| "learning_rate": 1e-06, |
| "loss": 0.7991, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.09695258265678315, |
| "grad_norm": 2.7672460079193115, |
| "learning_rate": 1e-06, |
| "loss": 0.8094, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.09707024355806565, |
| "grad_norm": 3.9804646968841553, |
| "learning_rate": 1e-06, |
| "loss": 0.7259, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.09718790445934816, |
| "grad_norm": 3.030790090560913, |
| "learning_rate": 1e-06, |
| "loss": 0.7429, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.09730556536063066, |
| "grad_norm": 5.696221828460693, |
| "learning_rate": 1e-06, |
| "loss": 0.7522, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.09742322626191316, |
| "grad_norm": 2.9806602001190186, |
| "learning_rate": 1e-06, |
| "loss": 0.7438, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.09754088716319567, |
| "grad_norm": 6.773803234100342, |
| "learning_rate": 1e-06, |
| "loss": 0.7772, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.09765854806447817, |
| "grad_norm": 3.165433883666992, |
| "learning_rate": 1e-06, |
| "loss": 0.7221, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.09777620896576068, |
| "grad_norm": 3.9233906269073486, |
| "learning_rate": 1e-06, |
| "loss": 0.8598, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.09789386986704318, |
| "grad_norm": 2.9941489696502686, |
| "learning_rate": 1e-06, |
| "loss": 0.6664, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.09801153076832568, |
| "grad_norm": 3.3423893451690674, |
| "learning_rate": 1e-06, |
| "loss": 0.8662, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.09812919166960819, |
| "grad_norm": 7.260572910308838, |
| "learning_rate": 1e-06, |
| "loss": 0.6569, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.09824685257089069, |
| "grad_norm": 21.946184158325195, |
| "learning_rate": 1e-06, |
| "loss": 0.7579, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.09836451347217319, |
| "grad_norm": 4.030398845672607, |
| "learning_rate": 1e-06, |
| "loss": 0.8875, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.0984821743734557, |
| "grad_norm": 5.5473833084106445, |
| "learning_rate": 1e-06, |
| "loss": 0.6712, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.0985998352747382, |
| "grad_norm": 2.977285146713257, |
| "learning_rate": 1e-06, |
| "loss": 0.6802, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.09871749617602071, |
| "grad_norm": 4.305202960968018, |
| "learning_rate": 1e-06, |
| "loss": 0.8286, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.09883515707730321, |
| "grad_norm": 4.800453186035156, |
| "learning_rate": 1e-06, |
| "loss": 0.7722, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.09895281797858571, |
| "grad_norm": 4.110867023468018, |
| "learning_rate": 1e-06, |
| "loss": 0.736, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.09907047887986822, |
| "grad_norm": 4.379387855529785, |
| "learning_rate": 1e-06, |
| "loss": 0.7814, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.09918813978115072, |
| "grad_norm": 5.1933274269104, |
| "learning_rate": 1e-06, |
| "loss": 0.8989, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.09930580068243322, |
| "grad_norm": 4.651576995849609, |
| "learning_rate": 1e-06, |
| "loss": 0.8684, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.09942346158371573, |
| "grad_norm": 3.9711875915527344, |
| "learning_rate": 1e-06, |
| "loss": 0.8008, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.09954112248499823, |
| "grad_norm": 4.233101844787598, |
| "learning_rate": 1e-06, |
| "loss": 0.767, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.09965878338628074, |
| "grad_norm": 4.948994159698486, |
| "learning_rate": 1e-06, |
| "loss": 0.7025, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.09977644428756324, |
| "grad_norm": 4.045925140380859, |
| "learning_rate": 1e-06, |
| "loss": 0.7014, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.09989410518884574, |
| "grad_norm": 4.309968948364258, |
| "learning_rate": 1e-06, |
| "loss": 0.8363, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.10001176609012825, |
| "grad_norm": 4.463519096374512, |
| "learning_rate": 1e-06, |
| "loss": 0.7713, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.10012942699141075, |
| "grad_norm": 5.474331378936768, |
| "learning_rate": 1e-06, |
| "loss": 0.7899, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.10024708789269325, |
| "grad_norm": 8.145475387573242, |
| "learning_rate": 1e-06, |
| "loss": 0.7552, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.10036474879397576, |
| "grad_norm": 9.849764823913574, |
| "learning_rate": 1e-06, |
| "loss": 0.6796, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.10048240969525826, |
| "grad_norm": 3.580988883972168, |
| "learning_rate": 1e-06, |
| "loss": 0.818, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.10060007059654077, |
| "grad_norm": 2.9809317588806152, |
| "learning_rate": 1e-06, |
| "loss": 0.8343, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.10071773149782327, |
| "grad_norm": 4.253994464874268, |
| "learning_rate": 1e-06, |
| "loss": 0.7922, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.10083539239910577, |
| "grad_norm": 7.390686988830566, |
| "learning_rate": 1e-06, |
| "loss": 0.8706, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.10095305330038828, |
| "grad_norm": 6.373108386993408, |
| "learning_rate": 1e-06, |
| "loss": 0.6844, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.10107071420167078, |
| "grad_norm": 5.433058738708496, |
| "learning_rate": 1e-06, |
| "loss": 0.7881, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.10118837510295328, |
| "grad_norm": 5.169571399688721, |
| "learning_rate": 1e-06, |
| "loss": 0.7737, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.1013060360042358, |
| "grad_norm": 3.5316271781921387, |
| "learning_rate": 1e-06, |
| "loss": 0.7511, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.10142369690551829, |
| "grad_norm": 3.4881837368011475, |
| "learning_rate": 1e-06, |
| "loss": 0.6897, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.1015413578068008, |
| "grad_norm": 6.424959659576416, |
| "learning_rate": 1e-06, |
| "loss": 0.7233, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.1016590187080833, |
| "grad_norm": 5.144840240478516, |
| "learning_rate": 1e-06, |
| "loss": 0.6463, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.1017766796093658, |
| "grad_norm": 4.471700668334961, |
| "learning_rate": 1e-06, |
| "loss": 0.7613, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.10189434051064832, |
| "grad_norm": 3.5130629539489746, |
| "learning_rate": 1e-06, |
| "loss": 0.6285, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.10201200141193081, |
| "grad_norm": 6.073313236236572, |
| "learning_rate": 1e-06, |
| "loss": 0.7526, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.10212966231321331, |
| "grad_norm": 5.274708271026611, |
| "learning_rate": 1e-06, |
| "loss": 0.694, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.10224732321449582, |
| "grad_norm": 5.783721446990967, |
| "learning_rate": 1e-06, |
| "loss": 0.7246, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.10236498411577832, |
| "grad_norm": 2.880633592605591, |
| "learning_rate": 1e-06, |
| "loss": 0.7076, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.10248264501706084, |
| "grad_norm": 4.704663276672363, |
| "learning_rate": 1e-06, |
| "loss": 0.7082, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.10260030591834333, |
| "grad_norm": 3.2478537559509277, |
| "learning_rate": 1e-06, |
| "loss": 0.7734, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.10271796681962583, |
| "grad_norm": 18.305561065673828, |
| "learning_rate": 1e-06, |
| "loss": 0.6345, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.10283562772090835, |
| "grad_norm": 6.261775016784668, |
| "learning_rate": 1e-06, |
| "loss": 0.6628, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.10295328862219084, |
| "grad_norm": 6.447326183319092, |
| "learning_rate": 1e-06, |
| "loss": 0.8169, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.10307094952347334, |
| "grad_norm": 3.332118511199951, |
| "learning_rate": 1e-06, |
| "loss": 0.7389, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.10318861042475586, |
| "grad_norm": 7.243100166320801, |
| "learning_rate": 1e-06, |
| "loss": 0.8797, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.10330627132603835, |
| "grad_norm": 3.4968297481536865, |
| "learning_rate": 1e-06, |
| "loss": 0.6324, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.10342393222732087, |
| "grad_norm": 3.1249096393585205, |
| "learning_rate": 1e-06, |
| "loss": 0.7032, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.10354159312860337, |
| "grad_norm": 4.255460262298584, |
| "learning_rate": 1e-06, |
| "loss": 0.8448, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.10365925402988586, |
| "grad_norm": 3.519742488861084, |
| "learning_rate": 1e-06, |
| "loss": 0.7932, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.10377691493116838, |
| "grad_norm": 28.501571655273438, |
| "learning_rate": 1e-06, |
| "loss": 0.696, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.10389457583245088, |
| "grad_norm": 2.5168793201446533, |
| "learning_rate": 1e-06, |
| "loss": 0.881, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.10401223673373337, |
| "grad_norm": 8.99081039428711, |
| "learning_rate": 1e-06, |
| "loss": 0.7411, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.10412989763501589, |
| "grad_norm": 13.516379356384277, |
| "learning_rate": 1e-06, |
| "loss": 0.608, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.10424755853629838, |
| "grad_norm": 4.074734687805176, |
| "learning_rate": 1e-06, |
| "loss": 0.7659, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.1043652194375809, |
| "grad_norm": 2.260159492492676, |
| "learning_rate": 1e-06, |
| "loss": 0.8207, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.1044828803388634, |
| "grad_norm": 2.8651790618896484, |
| "learning_rate": 1e-06, |
| "loss": 0.7703, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.1046005412401459, |
| "grad_norm": 4.478638648986816, |
| "learning_rate": 1e-06, |
| "loss": 0.775, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.10471820214142841, |
| "grad_norm": 3.101065158843994, |
| "learning_rate": 1e-06, |
| "loss": 0.6703, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1048358630427109, |
| "grad_norm": 3.55016827583313, |
| "learning_rate": 1e-06, |
| "loss": 0.7054, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.1049535239439934, |
| "grad_norm": 4.5212507247924805, |
| "learning_rate": 1e-06, |
| "loss": 0.7758, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.10507118484527592, |
| "grad_norm": 4.663824081420898, |
| "learning_rate": 1e-06, |
| "loss": 0.6694, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.10518884574655842, |
| "grad_norm": 4.49483585357666, |
| "learning_rate": 1e-06, |
| "loss": 0.7006, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.10530650664784093, |
| "grad_norm": 2.8132238388061523, |
| "learning_rate": 1e-06, |
| "loss": 0.8139, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.10542416754912343, |
| "grad_norm": 5.475927352905273, |
| "learning_rate": 1e-06, |
| "loss": 0.7546, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.10554182845040593, |
| "grad_norm": 2.9564592838287354, |
| "learning_rate": 1e-06, |
| "loss": 0.7466, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.10565948935168844, |
| "grad_norm": 3.7339460849761963, |
| "learning_rate": 1e-06, |
| "loss": 0.6938, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.10577715025297094, |
| "grad_norm": 4.033295631408691, |
| "learning_rate": 1e-06, |
| "loss": 0.7168, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.10589481115425343, |
| "grad_norm": 3.160860300064087, |
| "learning_rate": 1e-06, |
| "loss": 0.6834, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.10601247205553595, |
| "grad_norm": 7.493616104125977, |
| "learning_rate": 1e-06, |
| "loss": 0.7393, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.10613013295681845, |
| "grad_norm": 22.620590209960938, |
| "learning_rate": 1e-06, |
| "loss": 0.8435, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.10624779385810096, |
| "grad_norm": 3.6928799152374268, |
| "learning_rate": 1e-06, |
| "loss": 0.7645, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.10636545475938346, |
| "grad_norm": 3.0258662700653076, |
| "learning_rate": 1e-06, |
| "loss": 0.6949, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.10648311566066596, |
| "grad_norm": 7.960806846618652, |
| "learning_rate": 1e-06, |
| "loss": 0.6814, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.10660077656194847, |
| "grad_norm": 10.985957145690918, |
| "learning_rate": 1e-06, |
| "loss": 0.8068, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.10671843746323097, |
| "grad_norm": 5.588890075683594, |
| "learning_rate": 1e-06, |
| "loss": 0.712, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.10683609836451347, |
| "grad_norm": 2.6326663494110107, |
| "learning_rate": 1e-06, |
| "loss": 0.8733, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.10695375926579598, |
| "grad_norm": 6.32219123840332, |
| "learning_rate": 1e-06, |
| "loss": 0.7209, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.10707142016707848, |
| "grad_norm": 3.7147603034973145, |
| "learning_rate": 1e-06, |
| "loss": 0.6518, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.10718908106836099, |
| "grad_norm": 6.2384352684021, |
| "learning_rate": 1e-06, |
| "loss": 0.7457, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.10730674196964349, |
| "grad_norm": 6.7992987632751465, |
| "learning_rate": 1e-06, |
| "loss": 0.6877, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.10742440287092599, |
| "grad_norm": 4.401557922363281, |
| "learning_rate": 1e-06, |
| "loss": 0.7824, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.1075420637722085, |
| "grad_norm": 4.7830705642700195, |
| "learning_rate": 1e-06, |
| "loss": 0.7996, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.107659724673491, |
| "grad_norm": 5.023268699645996, |
| "learning_rate": 1e-06, |
| "loss": 0.8633, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.1077773855747735, |
| "grad_norm": 5.523383617401123, |
| "learning_rate": 1e-06, |
| "loss": 0.8391, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.10789504647605601, |
| "grad_norm": 3.5851926803588867, |
| "learning_rate": 1e-06, |
| "loss": 0.8981, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.10801270737733851, |
| "grad_norm": 6.372958183288574, |
| "learning_rate": 1e-06, |
| "loss": 0.7296, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.10813036827862102, |
| "grad_norm": 3.6867308616638184, |
| "learning_rate": 1e-06, |
| "loss": 0.8342, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.10824802917990352, |
| "grad_norm": 3.857619047164917, |
| "learning_rate": 1e-06, |
| "loss": 0.6729, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.10836569008118602, |
| "grad_norm": 10.918023109436035, |
| "learning_rate": 1e-06, |
| "loss": 0.7497, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.10848335098246853, |
| "grad_norm": 4.183814525604248, |
| "learning_rate": 1e-06, |
| "loss": 0.7404, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.10860101188375103, |
| "grad_norm": 6.011120796203613, |
| "learning_rate": 1e-06, |
| "loss": 0.7812, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.10871867278503353, |
| "grad_norm": 3.549236297607422, |
| "learning_rate": 1e-06, |
| "loss": 0.7345, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.10883633368631604, |
| "grad_norm": 3.6891543865203857, |
| "learning_rate": 1e-06, |
| "loss": 0.7796, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.10895399458759854, |
| "grad_norm": 7.792111396789551, |
| "learning_rate": 1e-06, |
| "loss": 0.7652, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.10907165548888105, |
| "grad_norm": 4.561732769012451, |
| "learning_rate": 1e-06, |
| "loss": 0.7025, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.10918931639016355, |
| "grad_norm": 5.012827396392822, |
| "learning_rate": 1e-06, |
| "loss": 0.7675, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.10930697729144605, |
| "grad_norm": 4.815629959106445, |
| "learning_rate": 1e-06, |
| "loss": 0.7956, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.10942463819272856, |
| "grad_norm": 7.950117588043213, |
| "learning_rate": 1e-06, |
| "loss": 0.7514, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.10954229909401106, |
| "grad_norm": 3.6295828819274902, |
| "learning_rate": 1e-06, |
| "loss": 0.7498, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.10965995999529356, |
| "grad_norm": 8.010412216186523, |
| "learning_rate": 1e-06, |
| "loss": 0.6758, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.10977762089657607, |
| "grad_norm": 3.3440520763397217, |
| "learning_rate": 1e-06, |
| "loss": 0.8549, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.10989528179785857, |
| "grad_norm": 3.4136955738067627, |
| "learning_rate": 1e-06, |
| "loss": 0.693, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.11001294269914108, |
| "grad_norm": 4.733597755432129, |
| "learning_rate": 1e-06, |
| "loss": 0.8341, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.11013060360042358, |
| "grad_norm": 13.338993072509766, |
| "learning_rate": 1e-06, |
| "loss": 0.7849, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.11024826450170608, |
| "grad_norm": 3.829407215118408, |
| "learning_rate": 1e-06, |
| "loss": 0.7122, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.11036592540298859, |
| "grad_norm": 5.739267826080322, |
| "learning_rate": 1e-06, |
| "loss": 0.7708, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.11048358630427109, |
| "grad_norm": 5.647058010101318, |
| "learning_rate": 1e-06, |
| "loss": 0.7333, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.11060124720555359, |
| "grad_norm": 3.6504688262939453, |
| "learning_rate": 1e-06, |
| "loss": 0.678, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.1107189081068361, |
| "grad_norm": 4.032068729400635, |
| "learning_rate": 1e-06, |
| "loss": 0.6942, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.1108365690081186, |
| "grad_norm": 3.7702181339263916, |
| "learning_rate": 1e-06, |
| "loss": 0.7686, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.11095422990940111, |
| "grad_norm": 10.096896171569824, |
| "learning_rate": 1e-06, |
| "loss": 0.8725, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.11107189081068361, |
| "grad_norm": 5.022398948669434, |
| "learning_rate": 1e-06, |
| "loss": 0.7321, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.11118955171196611, |
| "grad_norm": 3.7425436973571777, |
| "learning_rate": 1e-06, |
| "loss": 0.6971, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.11130721261324862, |
| "grad_norm": 17.83504295349121, |
| "learning_rate": 1e-06, |
| "loss": 0.7606, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.11142487351453112, |
| "grad_norm": 5.255575180053711, |
| "learning_rate": 1e-06, |
| "loss": 0.7614, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.11154253441581362, |
| "grad_norm": 6.864058494567871, |
| "learning_rate": 1e-06, |
| "loss": 0.7047, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.11166019531709613, |
| "grad_norm": 8.533795356750488, |
| "learning_rate": 1e-06, |
| "loss": 0.8295, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.11177785621837863, |
| "grad_norm": 8.808038711547852, |
| "learning_rate": 1e-06, |
| "loss": 0.81, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.11189551711966114, |
| "grad_norm": 3.7528939247131348, |
| "learning_rate": 1e-06, |
| "loss": 0.6968, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.11201317802094364, |
| "grad_norm": 12.15597915649414, |
| "learning_rate": 1e-06, |
| "loss": 0.6969, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.11213083892222614, |
| "grad_norm": 3.9877498149871826, |
| "learning_rate": 1e-06, |
| "loss": 0.7565, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.11224849982350865, |
| "grad_norm": 3.230027914047241, |
| "learning_rate": 1e-06, |
| "loss": 0.7655, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.11236616072479115, |
| "grad_norm": 5.134922504425049, |
| "learning_rate": 1e-06, |
| "loss": 0.8121, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.11248382162607365, |
| "grad_norm": 3.7903997898101807, |
| "learning_rate": 1e-06, |
| "loss": 0.7504, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.11260148252735616, |
| "grad_norm": 3.913166046142578, |
| "learning_rate": 1e-06, |
| "loss": 0.7001, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.11271914342863866, |
| "grad_norm": 5.1488165855407715, |
| "learning_rate": 1e-06, |
| "loss": 0.7735, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.11283680432992117, |
| "grad_norm": 3.0253422260284424, |
| "learning_rate": 1e-06, |
| "loss": 0.8549, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.11295446523120367, |
| "grad_norm": 15.515777587890625, |
| "learning_rate": 1e-06, |
| "loss": 0.7316, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.11307212613248617, |
| "grad_norm": 4.447922706604004, |
| "learning_rate": 1e-06, |
| "loss": 0.802, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.11318978703376868, |
| "grad_norm": 6.39625883102417, |
| "learning_rate": 1e-06, |
| "loss": 0.7921, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.11330744793505118, |
| "grad_norm": 10.484755516052246, |
| "learning_rate": 1e-06, |
| "loss": 0.7821, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.11342510883633368, |
| "grad_norm": 10.59896469116211, |
| "learning_rate": 1e-06, |
| "loss": 0.7996, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.11354276973761619, |
| "grad_norm": 6.321623802185059, |
| "learning_rate": 1e-06, |
| "loss": 0.8076, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.11366043063889869, |
| "grad_norm": 2.905377149581909, |
| "learning_rate": 1e-06, |
| "loss": 0.5892, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.1137780915401812, |
| "grad_norm": 4.772261619567871, |
| "learning_rate": 1e-06, |
| "loss": 0.7141, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.1138957524414637, |
| "grad_norm": 4.624825954437256, |
| "learning_rate": 1e-06, |
| "loss": 0.6739, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.1140134133427462, |
| "grad_norm": 9.244427680969238, |
| "learning_rate": 1e-06, |
| "loss": 0.7439, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.11413107424402871, |
| "grad_norm": 4.125616550445557, |
| "learning_rate": 1e-06, |
| "loss": 0.8356, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.11424873514531121, |
| "grad_norm": 2.889173746109009, |
| "learning_rate": 1e-06, |
| "loss": 0.751, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.11436639604659371, |
| "grad_norm": 4.325053691864014, |
| "learning_rate": 1e-06, |
| "loss": 0.8099, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.11448405694787622, |
| "grad_norm": 3.6954429149627686, |
| "learning_rate": 1e-06, |
| "loss": 0.9103, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.11460171784915872, |
| "grad_norm": 3.570265293121338, |
| "learning_rate": 1e-06, |
| "loss": 0.7352, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.11471937875044123, |
| "grad_norm": 3.713390350341797, |
| "learning_rate": 1e-06, |
| "loss": 0.6956, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.11483703965172373, |
| "grad_norm": 6.262350559234619, |
| "learning_rate": 1e-06, |
| "loss": 0.7834, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.11495470055300623, |
| "grad_norm": 13.737163543701172, |
| "learning_rate": 1e-06, |
| "loss": 0.6825, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.11507236145428874, |
| "grad_norm": 3.9696526527404785, |
| "learning_rate": 1e-06, |
| "loss": 0.8195, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.11519002235557124, |
| "grad_norm": 3.6462998390197754, |
| "learning_rate": 1e-06, |
| "loss": 0.8344, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.11530768325685374, |
| "grad_norm": 3.0372889041900635, |
| "learning_rate": 1e-06, |
| "loss": 0.8347, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.11542534415813625, |
| "grad_norm": 2.526247501373291, |
| "learning_rate": 1e-06, |
| "loss": 0.9124, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.11554300505941875, |
| "grad_norm": 3.632237195968628, |
| "learning_rate": 1e-06, |
| "loss": 0.6517, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.11566066596070126, |
| "grad_norm": 3.3434395790100098, |
| "learning_rate": 1e-06, |
| "loss": 0.7136, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.11577832686198376, |
| "grad_norm": 4.603308200836182, |
| "learning_rate": 1e-06, |
| "loss": 0.8149, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.11589598776326626, |
| "grad_norm": 4.951627731323242, |
| "learning_rate": 1e-06, |
| "loss": 0.7847, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.11601364866454877, |
| "grad_norm": 4.969621658325195, |
| "learning_rate": 1e-06, |
| "loss": 0.7254, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.11613130956583127, |
| "grad_norm": 3.2793381214141846, |
| "learning_rate": 1e-06, |
| "loss": 0.7147, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.11624897046711377, |
| "grad_norm": 2.3934788703918457, |
| "learning_rate": 1e-06, |
| "loss": 0.7056, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.11636663136839628, |
| "grad_norm": 3.8675055503845215, |
| "learning_rate": 1e-06, |
| "loss": 0.8171, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.11648429226967878, |
| "grad_norm": 5.718388080596924, |
| "learning_rate": 1e-06, |
| "loss": 0.813, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.1166019531709613, |
| "grad_norm": 4.406123161315918, |
| "learning_rate": 1e-06, |
| "loss": 0.8078, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.1167196140722438, |
| "grad_norm": 2.9738152027130127, |
| "learning_rate": 1e-06, |
| "loss": 0.7428, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.11683727497352629, |
| "grad_norm": 6.35261344909668, |
| "learning_rate": 1e-06, |
| "loss": 0.7331, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.1169549358748088, |
| "grad_norm": 6.00268030166626, |
| "learning_rate": 1e-06, |
| "loss": 0.766, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.1170725967760913, |
| "grad_norm": 12.03709888458252, |
| "learning_rate": 1e-06, |
| "loss": 0.7315, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.1171902576773738, |
| "grad_norm": 6.83236837387085, |
| "learning_rate": 1e-06, |
| "loss": 0.7091, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.11730791857865631, |
| "grad_norm": 6.966513633728027, |
| "learning_rate": 1e-06, |
| "loss": 0.8094, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.11742557947993881, |
| "grad_norm": 5.393833637237549, |
| "learning_rate": 1e-06, |
| "loss": 0.6795, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.11754324038122133, |
| "grad_norm": 4.947326183319092, |
| "learning_rate": 1e-06, |
| "loss": 0.8499, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.11766090128250382, |
| "grad_norm": 3.890005350112915, |
| "learning_rate": 1e-06, |
| "loss": 0.7317, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6569126454516777e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|