| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.631578947368421, | |
| "eval_steps": 369, | |
| "global_step": 1107, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005705320211096848, | |
| "grad_norm": 83.0, | |
| "learning_rate": 0.0, | |
| "loss": 3.648493528366089, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011410640422193695, | |
| "grad_norm": 84.5, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "loss": 3.7405500411987305, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0017115960633290544, | |
| "grad_norm": 74.0, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 3.510922431945801, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002282128084438739, | |
| "grad_norm": 74.0, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "loss": 3.477842330932617, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002852660105548424, | |
| "grad_norm": 48.5, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 3.2050325870513916, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0034231921266581087, | |
| "grad_norm": 35.0, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 2.9610347747802734, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.003993724147767793, | |
| "grad_norm": 25.75, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 2.8089160919189453, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004564256168877478, | |
| "grad_norm": 15.25, | |
| "learning_rate": 9.45945945945946e-06, | |
| "loss": 2.672607183456421, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005134788189987163, | |
| "grad_norm": 10.125, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 2.4392411708831787, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005705320211096848, | |
| "grad_norm": 8.875, | |
| "learning_rate": 1.2162162162162164e-05, | |
| "loss": 2.4409432411193848, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006275852232206533, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 1.3513513513513515e-05, | |
| "loss": 2.3299427032470703, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0068463842533162175, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 1.4864864864864867e-05, | |
| "loss": 2.2852554321289062, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.007416916274425902, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 2.2712786197662354, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.007987448295535587, | |
| "grad_norm": 7.1875, | |
| "learning_rate": 1.756756756756757e-05, | |
| "loss": 2.2143714427948, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.008557980316645272, | |
| "grad_norm": 7.0, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 2.0812437534332275, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.009128512337754956, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 2.0270270270270273e-05, | |
| "loss": 2.068169355392456, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.009699044358864642, | |
| "grad_norm": 5.75, | |
| "learning_rate": 2.1621621621621624e-05, | |
| "loss": 1.8387004137039185, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.010269576379974325, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 2.2972972972972976e-05, | |
| "loss": 1.7710001468658447, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.01084010840108401, | |
| "grad_norm": 4.25, | |
| "learning_rate": 2.4324324324324327e-05, | |
| "loss": 1.7796661853790283, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.011410640422193696, | |
| "grad_norm": 3.5, | |
| "learning_rate": 2.5675675675675675e-05, | |
| "loss": 1.6957234144210815, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01198117244330338, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 2.702702702702703e-05, | |
| "loss": 1.7516167163848877, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.012551704464413066, | |
| "grad_norm": 2.78125, | |
| "learning_rate": 2.8378378378378378e-05, | |
| "loss": 1.6087043285369873, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01312223648552275, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 2.9729729729729733e-05, | |
| "loss": 1.5943574905395508, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.013692768506632435, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 3.108108108108108e-05, | |
| "loss": 1.599621295928955, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014263300527742119, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 3.2432432432432436e-05, | |
| "loss": 1.6016688346862793, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.014833832548851804, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 3.3783783783783784e-05, | |
| "loss": 1.5124552249908447, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01540436456996149, | |
| "grad_norm": 1.9765625, | |
| "learning_rate": 3.513513513513514e-05, | |
| "loss": 1.5520291328430176, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.015974896591071173, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 3.648648648648649e-05, | |
| "loss": 1.4819629192352295, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01654542861218086, | |
| "grad_norm": 2.0, | |
| "learning_rate": 3.783783783783784e-05, | |
| "loss": 1.5304462909698486, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.017115960633290545, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 3.918918918918919e-05, | |
| "loss": 1.4461307525634766, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.017686492654400227, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 4.0540540540540545e-05, | |
| "loss": 1.4548516273498535, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.018257024675509912, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 4.189189189189189e-05, | |
| "loss": 1.435849905014038, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.018827556696619598, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.324324324324325e-05, | |
| "loss": 1.4789021015167236, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.019398088717729283, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.4594594594594596e-05, | |
| "loss": 1.3856297731399536, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01996862073883897, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.594594594594595e-05, | |
| "loss": 1.5028152465820312, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02053915275994865, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 4.72972972972973e-05, | |
| "loss": 1.4294812679290771, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.021109684781058336, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.8648648648648654e-05, | |
| "loss": 1.3971917629241943, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.02168021680216802, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3995487689971924, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022250748823277707, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.9972283813747225e-05, | |
| "loss": 1.4693856239318848, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.022821280844387393, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 4.994456762749446e-05, | |
| "loss": 1.4715073108673096, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.023391812865497075, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.9916851441241684e-05, | |
| "loss": 1.490320086479187, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.02396234488660676, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.9889135254988913e-05, | |
| "loss": 1.3657546043395996, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.024532876907716446, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 4.986141906873614e-05, | |
| "loss": 1.4324053525924683, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02510340892882613, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 4.983370288248337e-05, | |
| "loss": 1.3849389553070068, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.025673940949935817, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.98059866962306e-05, | |
| "loss": 1.425079345703125, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0262444729710455, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 4.977827050997783e-05, | |
| "loss": 1.4127968549728394, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.026815004992155184, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.9750554323725054e-05, | |
| "loss": 1.429938793182373, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.02738553701326487, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.972283813747229e-05, | |
| "loss": 1.4178887605667114, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.027956069034374555, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 4.969512195121951e-05, | |
| "loss": 1.4397588968276978, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.028526601055484237, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.966740576496674e-05, | |
| "loss": 1.3697854280471802, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029097133076593923, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.963968957871397e-05, | |
| "loss": 1.3517988920211792, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.02966766509770361, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.9611973392461195e-05, | |
| "loss": 1.4193122386932373, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.030238197118813294, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.958425720620843e-05, | |
| "loss": 1.37640380859375, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.03080872913992298, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.9556541019955654e-05, | |
| "loss": 1.336474895477295, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.031379261161032665, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 4.952882483370288e-05, | |
| "loss": 1.4701391458511353, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03194979318214235, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.950110864745011e-05, | |
| "loss": 1.3760974407196045, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.032520325203252036, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.947339246119734e-05, | |
| "loss": 1.3897124528884888, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.03309085722436172, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.944567627494457e-05, | |
| "loss": 1.4239261150360107, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0336613892454714, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.94179600886918e-05, | |
| "loss": 1.3669216632843018, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.03423192126658109, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.9390243902439024e-05, | |
| "loss": 1.346958041191101, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03480245328769077, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 4.936252771618626e-05, | |
| "loss": 1.4235575199127197, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.03537298530880045, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.933481152993348e-05, | |
| "loss": 1.3075377941131592, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.03594351732991014, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.930709534368071e-05, | |
| "loss": 1.3214820623397827, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.036514049351019824, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.927937915742794e-05, | |
| "loss": 1.39829421043396, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.03708458137212951, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.9251662971175164e-05, | |
| "loss": 1.3523836135864258, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.037655113393239195, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.92239467849224e-05, | |
| "loss": 1.3268153667449951, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03822564541434888, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.919623059866962e-05, | |
| "loss": 1.3205022811889648, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.038796177435458566, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.916851441241685e-05, | |
| "loss": 1.2956037521362305, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03936670945656825, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.914079822616408e-05, | |
| "loss": 1.3702654838562012, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03993724147767794, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.911308203991131e-05, | |
| "loss": 1.388296127319336, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04050777349878762, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.908536585365854e-05, | |
| "loss": 1.4403045177459717, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0410783055198973, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.905764966740577e-05, | |
| "loss": 1.3626902103424072, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04164883754100699, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.902993348115299e-05, | |
| "loss": 1.382088303565979, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.04221936956211667, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.900221729490023e-05, | |
| "loss": 1.3237360715866089, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04278990158322636, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.897450110864745e-05, | |
| "loss": 1.319187879562378, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04336043360433604, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 4.894678492239468e-05, | |
| "loss": 1.3707743883132935, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.043930965625445725, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.891906873614191e-05, | |
| "loss": 1.3658738136291504, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.044501497646555414, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 4.8891352549889134e-05, | |
| "loss": 1.3247051239013672, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.045072029667665096, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 4.886363636363637e-05, | |
| "loss": 1.3614035844802856, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.045642561688774785, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.883592017738359e-05, | |
| "loss": 1.2589421272277832, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04621309370988447, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.880820399113082e-05, | |
| "loss": 1.3525424003601074, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.04678362573099415, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 1.2903777360916138, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04735415775210384, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.875277161862528e-05, | |
| "loss": 1.3538789749145508, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.04792468977321352, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.872505543237251e-05, | |
| "loss": 1.3419591188430786, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.04849522179432321, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.869733924611974e-05, | |
| "loss": 1.3367938995361328, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.04906575381543289, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.866962305986696e-05, | |
| "loss": 1.2979538440704346, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.049636285836542574, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.864190687361419e-05, | |
| "loss": 1.348291039466858, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05020681785765226, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.861419068736142e-05, | |
| "loss": 1.3377124071121216, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.050777349878761945, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.8586474501108644e-05, | |
| "loss": 1.3180426359176636, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.051347881899871634, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.855875831485588e-05, | |
| "loss": 1.3215968608856201, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.051918413920981316, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.85310421286031e-05, | |
| "loss": 1.3354041576385498, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.052488945942091, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.850332594235034e-05, | |
| "loss": 1.3552148342132568, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.05305947796320069, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.847560975609756e-05, | |
| "loss": 1.2916048765182495, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.05363000998431037, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.844789356984479e-05, | |
| "loss": 1.3131537437438965, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.05420054200542006, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.842017738359202e-05, | |
| "loss": 1.2902660369873047, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05477107402652974, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.839246119733925e-05, | |
| "loss": 1.3799315690994263, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.05534160604763942, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.836474501108647e-05, | |
| "loss": 1.3607311248779297, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.05591213806874911, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.833702882483371e-05, | |
| "loss": 1.3038060665130615, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.05648267008985879, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.830931263858093e-05, | |
| "loss": 1.318457841873169, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.057053202110968475, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.828159645232816e-05, | |
| "loss": 1.3159422874450684, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.057623734132078164, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.825388026607539e-05, | |
| "loss": 1.3275076150894165, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.058194266153187846, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.8226164079822614e-05, | |
| "loss": 1.2983460426330566, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.058764798174297535, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.819844789356985e-05, | |
| "loss": 1.3114261627197266, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.05933533019540722, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.817073170731707e-05, | |
| "loss": 1.266122817993164, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.0599058622165169, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.81430155210643e-05, | |
| "loss": 1.3662368059158325, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06047639423762659, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.811529933481153e-05, | |
| "loss": 1.3158059120178223, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06104692625873627, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.808758314855876e-05, | |
| "loss": 1.3571752309799194, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.06161745827984596, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.805986696230599e-05, | |
| "loss": 1.3249101638793945, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06218799030095564, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.803215077605322e-05, | |
| "loss": 1.3386337757110596, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.06275852232206533, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.800443458980044e-05, | |
| "loss": 1.2874070405960083, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06332905434317501, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.797671840354768e-05, | |
| "loss": 1.3232687711715698, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0638995863642847, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.79490022172949e-05, | |
| "loss": 1.3370904922485352, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06447011838539438, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.792128603104213e-05, | |
| "loss": 1.3211901187896729, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.06504065040650407, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.789356984478936e-05, | |
| "loss": 1.3841608762741089, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.06561118242761375, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.786585365853658e-05, | |
| "loss": 1.4017915725708008, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06618171444872344, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.783813747228382e-05, | |
| "loss": 1.4110525846481323, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06675224646983312, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.781042128603104e-05, | |
| "loss": 1.2671241760253906, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.0673227784909428, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.778270509977827e-05, | |
| "loss": 1.2970881462097168, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.06789331051205248, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.77549889135255e-05, | |
| "loss": 1.2626357078552246, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.06846384253316218, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.772727272727273e-05, | |
| "loss": 1.2779147624969482, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06903437455427186, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.769955654101996e-05, | |
| "loss": 1.308679461479187, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.06960490657538154, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.767184035476719e-05, | |
| "loss": 1.299755573272705, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07017543859649122, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.764412416851441e-05, | |
| "loss": 1.3637490272521973, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.0707459706176009, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.761640798226164e-05, | |
| "loss": 1.3058216571807861, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.0713165026387106, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.758869179600887e-05, | |
| "loss": 1.3146748542785645, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07188703465982028, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.75609756097561e-05, | |
| "loss": 1.2844371795654297, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07245756668092997, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.753325942350333e-05, | |
| "loss": 1.3195525407791138, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.07302809870203965, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.750554323725055e-05, | |
| "loss": 1.3399118185043335, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07359863072314933, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.747782705099779e-05, | |
| "loss": 1.2919648885726929, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.07416916274425903, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.745011086474501e-05, | |
| "loss": 1.277235507965088, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07473969476536871, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.742239467849224e-05, | |
| "loss": 1.3034231662750244, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.07531022678647839, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.739467849223947e-05, | |
| "loss": 1.2368437051773071, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.07588075880758807, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.73669623059867e-05, | |
| "loss": 1.3728649616241455, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.07645129082869775, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.733924611973393e-05, | |
| "loss": 1.2506084442138672, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07702182284980745, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.731152993348116e-05, | |
| "loss": 1.2813055515289307, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07759235487091713, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.728381374722838e-05, | |
| "loss": 1.2894189357757568, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.07816288689202681, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.725609756097561e-05, | |
| "loss": 1.3396642208099365, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.0787334189131365, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.722838137472284e-05, | |
| "loss": 1.3043787479400635, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.07930395093424618, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.720066518847007e-05, | |
| "loss": 1.308459997177124, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.07987448295535587, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.71729490022173e-05, | |
| "loss": 1.3281002044677734, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08044501497646556, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.714523281596452e-05, | |
| "loss": 1.3146984577178955, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08101554699757524, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.711751662971176e-05, | |
| "loss": 1.3078755140304565, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.08158607901868492, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.708980044345898e-05, | |
| "loss": 1.3129773139953613, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.0821566110397946, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.706208425720621e-05, | |
| "loss": 1.2827129364013672, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.0827271430609043, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.703436807095344e-05, | |
| "loss": 1.3232603073120117, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08329767508201398, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.700665188470067e-05, | |
| "loss": 1.220211386680603, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08386820710312366, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.69789356984479e-05, | |
| "loss": 1.3406665325164795, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.08443873912423334, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.695121951219512e-05, | |
| "loss": 1.2698848247528076, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08500927114534303, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.692350332594235e-05, | |
| "loss": 1.3016014099121094, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.08557980316645272, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.689578713968958e-05, | |
| "loss": 1.2674150466918945, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0861503351875624, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.686807095343681e-05, | |
| "loss": 1.316935420036316, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.08672086720867209, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.684035476718403e-05, | |
| "loss": 1.263155221939087, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.08729139922978177, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.681263858093127e-05, | |
| "loss": 1.30006742477417, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.08786193125089145, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.678492239467849e-05, | |
| "loss": 1.3325148820877075, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.08843246327200115, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.675720620842573e-05, | |
| "loss": 1.2306278944015503, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.08900299529311083, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.672949002217295e-05, | |
| "loss": 1.3476486206054688, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.08957352731422051, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.670177383592018e-05, | |
| "loss": 1.2401833534240723, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.09014405933533019, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.667405764966741e-05, | |
| "loss": 1.3140380382537842, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09071459135643987, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.664634146341464e-05, | |
| "loss": 1.29231595993042, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.09128512337754957, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.661862527716186e-05, | |
| "loss": 1.2908031940460205, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09185565539865925, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.659090909090909e-05, | |
| "loss": 1.259028434753418, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.09242618741976893, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.656319290465632e-05, | |
| "loss": 1.2758322954177856, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09299671944087862, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.653547671840355e-05, | |
| "loss": 1.2392590045928955, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.0935672514619883, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.650776053215078e-05, | |
| "loss": 1.3232059478759766, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.094137783483098, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.6480044345898e-05, | |
| "loss": 1.3052716255187988, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09470831550420768, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.645232815964524e-05, | |
| "loss": 1.2643868923187256, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.09527884752531736, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.642461197339246e-05, | |
| "loss": 1.3158135414123535, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.09584937954642704, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.639689578713969e-05, | |
| "loss": 1.2975637912750244, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09641991156753672, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.636917960088692e-05, | |
| "loss": 1.202270269393921, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.09699044358864642, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.634146341463415e-05, | |
| "loss": 1.1989184617996216, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.631374722838138e-05, | |
| "loss": 1.325451374053955, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.09813150763086578, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.628603104212861e-05, | |
| "loss": 1.3150224685668945, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.09870203965197547, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.625831485587583e-05, | |
| "loss": 1.2864487171173096, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.09927257167308515, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.623059866962306e-05, | |
| "loss": 1.3033939599990845, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.09984310369419484, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.620288248337029e-05, | |
| "loss": 1.2654147148132324, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10041363571530453, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.617516629711752e-05, | |
| "loss": 1.2905241250991821, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10098416773641421, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.614745011086475e-05, | |
| "loss": 1.2881019115447998, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.10155469975752389, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.611973392461197e-05, | |
| "loss": 1.3300973176956177, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10212523177863357, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.609201773835921e-05, | |
| "loss": 1.3166918754577637, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.10269576379974327, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.606430155210643e-05, | |
| "loss": 1.2149487733840942, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10326629582085295, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.603658536585366e-05, | |
| "loss": 1.284995198249817, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.10383682784196263, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.600886917960089e-05, | |
| "loss": 1.3197823762893677, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.10440735986307231, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.598115299334812e-05, | |
| "loss": 1.2414249181747437, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.104977891884182, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.595343680709535e-05, | |
| "loss": 1.2936391830444336, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10554842390529169, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.592572062084257e-05, | |
| "loss": 1.2889211177825928, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.10611895592640137, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.58980044345898e-05, | |
| "loss": 1.2958948612213135, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10668948794751106, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.587028824833703e-05, | |
| "loss": 1.3174210786819458, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.10726001996862074, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.584257206208426e-05, | |
| "loss": 1.3083107471466064, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.10783055198973042, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.581485587583149e-05, | |
| "loss": 1.2460663318634033, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.10840108401084012, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.578713968957872e-05, | |
| "loss": 1.262696623802185, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1089716160319498, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.575942350332594e-05, | |
| "loss": 1.290346384048462, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.10954214805305948, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.573170731707318e-05, | |
| "loss": 1.2630096673965454, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11011268007416916, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.57039911308204e-05, | |
| "loss": 1.2521231174468994, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.11068321209527884, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.567627494456763e-05, | |
| "loss": 1.2671630382537842, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11125374411638853, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.564855875831486e-05, | |
| "loss": 1.3561689853668213, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11182427613749822, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.562084257206209e-05, | |
| "loss": 1.2499645948410034, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.1123948081586079, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.559312638580932e-05, | |
| "loss": 1.2348875999450684, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.11296534017971759, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.556541019955654e-05, | |
| "loss": 1.322629690170288, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11353587220082727, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.553769401330377e-05, | |
| "loss": 1.2846410274505615, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.11410640422193695, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.5509977827051e-05, | |
| "loss": 1.311292052268982, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11467693624304665, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.548226164079823e-05, | |
| "loss": 1.2933259010314941, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.11524746826415633, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.3615764379501343, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11581800028526601, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.542682926829269e-05, | |
| "loss": 1.187692403793335, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.11638853230637569, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.539911308203991e-05, | |
| "loss": 1.2587438821792603, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.11695906432748537, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.537139689578715e-05, | |
| "loss": 1.2154557704925537, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.11752959634859507, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.534368070953437e-05, | |
| "loss": 1.2670985460281372, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.11810012836970475, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.53159645232816e-05, | |
| "loss": 1.292269229888916, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.11867066039081443, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.528824833702883e-05, | |
| "loss": 1.2353066205978394, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.11924119241192412, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.526053215077605e-05, | |
| "loss": 1.2745922803878784, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.1198117244330338, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.523281596452328e-05, | |
| "loss": 1.2637782096862793, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1203822564541435, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.520509977827051e-05, | |
| "loss": 1.2595422267913818, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.12095278847525318, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.517738359201774e-05, | |
| "loss": 1.2515778541564941, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12152332049636286, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.514966740576497e-05, | |
| "loss": 1.2258851528167725, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.12209385251747254, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.51219512195122e-05, | |
| "loss": 1.2595672607421875, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.12266438453858222, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.509423503325942e-05, | |
| "loss": 1.2574856281280518, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.12323491655969192, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.506651884700666e-05, | |
| "loss": 1.2860839366912842, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1238054485808016, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.503880266075388e-05, | |
| "loss": 1.2748535871505737, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.12437598060191128, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.501108647450111e-05, | |
| "loss": 1.2630361318588257, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.12494651262302096, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.498337028824834e-05, | |
| "loss": 1.2100318670272827, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.12551704464413066, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.495565410199557e-05, | |
| "loss": 1.279637098312378, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12608757666524034, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.49279379157428e-05, | |
| "loss": 1.241306185722351, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.12665810868635002, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.490022172949002e-05, | |
| "loss": 1.2467423677444458, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.1272286407074597, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.487250554323725e-05, | |
| "loss": 1.2398571968078613, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1277991727285694, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.484478935698448e-05, | |
| "loss": 1.298073410987854, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.12836970474967907, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.481707317073171e-05, | |
| "loss": 1.3275305032730103, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12894023677078875, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.478935698447894e-05, | |
| "loss": 1.2483649253845215, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.12951076879189843, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.476164079822617e-05, | |
| "loss": 1.322462797164917, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.13008130081300814, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.473392461197339e-05, | |
| "loss": 1.2100863456726074, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13065183283411783, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.470620842572063e-05, | |
| "loss": 1.249301552772522, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.1312223648552275, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.467849223946785e-05, | |
| "loss": 1.2208349704742432, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1317928968763372, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.465077605321508e-05, | |
| "loss": 1.2686306238174438, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.13236342889744687, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.462305986696231e-05, | |
| "loss": 1.2922316789627075, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13293396091855655, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.459534368070954e-05, | |
| "loss": 1.2734718322753906, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.13350449293966624, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.456762749445677e-05, | |
| "loss": 1.2748900651931763, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.13407502496077592, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.453991130820399e-05, | |
| "loss": 1.2857415676116943, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1346455569818856, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.451219512195122e-05, | |
| "loss": 1.2689714431762695, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13521608900299528, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.448447893569845e-05, | |
| "loss": 1.248453140258789, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.13578662102410496, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.445676274944568e-05, | |
| "loss": 1.2693870067596436, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.13635715304521467, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.442904656319291e-05, | |
| "loss": 1.2767329216003418, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.13692768506632436, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.440133037694014e-05, | |
| "loss": 1.2598170042037964, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13749821708743404, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.437361419068736e-05, | |
| "loss": 1.2850111722946167, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.13806874910854372, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.4345898004434597e-05, | |
| "loss": 1.2005095481872559, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1386392811296534, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.431818181818182e-05, | |
| "loss": 1.2896265983581543, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.13920981315076308, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.429046563192905e-05, | |
| "loss": 1.3427916765213013, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.13978034517187277, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.426274944567628e-05, | |
| "loss": 1.2719500064849854, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.14035087719298245, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.42350332594235e-05, | |
| "loss": 1.2944797277450562, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14092140921409213, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.420731707317074e-05, | |
| "loss": 1.3022198677062988, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.1414919412352018, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.417960088691796e-05, | |
| "loss": 1.286307454109192, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.14206247325631152, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.415188470066519e-05, | |
| "loss": 1.3540141582489014, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.1426330052774212, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.412416851441242e-05, | |
| "loss": 1.2702994346618652, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1432035372985309, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.409645232815965e-05, | |
| "loss": 1.2684781551361084, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.14377406931964057, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.406873614190688e-05, | |
| "loss": 1.1907923221588135, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.14434460134075025, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.404101995565411e-05, | |
| "loss": 1.2790608406066895, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.14491513336185993, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.401330376940133e-05, | |
| "loss": 1.2878901958465576, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14548566538296961, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.3985587583148566e-05, | |
| "loss": 1.2305991649627686, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.1460561974040793, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.395787139689579e-05, | |
| "loss": 1.3150757551193237, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.14662672942518898, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.393015521064302e-05, | |
| "loss": 1.213336706161499, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.14719726144629866, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.390243902439025e-05, | |
| "loss": 1.2233829498291016, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.14776779346740837, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.387472283813747e-05, | |
| "loss": 1.1772549152374268, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.14833832548851805, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.3847006651884707e-05, | |
| "loss": 1.3097314834594727, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14890885750962773, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.381929046563193e-05, | |
| "loss": 1.3049172163009644, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.14947938953073742, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.379157427937916e-05, | |
| "loss": 1.3094444274902344, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1500499215518471, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.376385809312639e-05, | |
| "loss": 1.3298535346984863, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.15062045357295678, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.373614190687362e-05, | |
| "loss": 1.2394543886184692, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.15119098559406646, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.370842572062084e-05, | |
| "loss": 1.2180919647216797, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.15176151761517614, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.3680709534368077e-05, | |
| "loss": 1.2652344703674316, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.15233204963628583, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.36529933481153e-05, | |
| "loss": 1.2816247940063477, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.1529025816573955, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.3625277161862536e-05, | |
| "loss": 1.2074222564697266, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15347311367850522, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.359756097560976e-05, | |
| "loss": 1.2124351263046265, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.1540436456996149, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.356984478935698e-05, | |
| "loss": 1.187751293182373, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15461417772072458, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.354212860310422e-05, | |
| "loss": 1.1458532810211182, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.15518470974183426, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.351441241685144e-05, | |
| "loss": 1.229477882385254, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.15575524176294395, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.348669623059867e-05, | |
| "loss": 1.2863445281982422, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.15632577378405363, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.34589800443459e-05, | |
| "loss": 1.226841688156128, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1568963058051633, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.343126385809313e-05, | |
| "loss": 1.2147347927093506, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.157466837826273, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.340354767184036e-05, | |
| "loss": 1.2533400058746338, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.15803736984738267, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.337583148558759e-05, | |
| "loss": 1.2199838161468506, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.15860790186849236, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.334811529933481e-05, | |
| "loss": 1.196079969406128, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.15917843388960207, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.3320399113082046e-05, | |
| "loss": 1.2512052059173584, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.15974896591071175, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.329268292682927e-05, | |
| "loss": 1.2729978561401367, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16031949793182143, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.32649667405765e-05, | |
| "loss": 1.2414803504943848, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.1608900299529311, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.323725055432373e-05, | |
| "loss": 1.2329685688018799, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1614605619740408, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.320953436807095e-05, | |
| "loss": 1.2458125352859497, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.16203109399515048, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.318181818181819e-05, | |
| "loss": 1.2762466669082642, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16260162601626016, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.315410199556541e-05, | |
| "loss": 1.2883433103561401, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16317215803736984, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.312638580931264e-05, | |
| "loss": 1.261974811553955, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.16374269005847952, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.309866962305987e-05, | |
| "loss": 1.2657639980316162, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.1643132220795892, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.30709534368071e-05, | |
| "loss": 1.295043706893921, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16488375410069891, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.304323725055433e-05, | |
| "loss": 1.2336839437484741, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.1654542861218086, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.301552106430156e-05, | |
| "loss": 1.264127492904663, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16602481814291828, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.298780487804878e-05, | |
| "loss": 1.2246544361114502, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.16659535016402796, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.2960088691796016e-05, | |
| "loss": 1.2040233612060547, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.16716588218513764, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.293237250554324e-05, | |
| "loss": 1.2784225940704346, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.16773641420624733, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.290465631929047e-05, | |
| "loss": 1.3152185678482056, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.168306946227357, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.28769401330377e-05, | |
| "loss": 1.2193617820739746, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.1688774782484667, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.284922394678492e-05, | |
| "loss": 1.2813901901245117, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.16944801026957637, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.2821507760532156e-05, | |
| "loss": 1.205044150352478, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.17001854229068605, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.279379157427938e-05, | |
| "loss": 1.2626889944076538, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17058907431179576, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.276607538802661e-05, | |
| "loss": 1.2680320739746094, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.17115960633290545, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.273835920177384e-05, | |
| "loss": 1.2155548334121704, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17173013835401513, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.271064301552107e-05, | |
| "loss": 1.2199232578277588, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.1723006703751248, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 1.2747461795806885, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.1728712023962345, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.2655210643015526e-05, | |
| "loss": 1.235656976699829, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.17344173441734417, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.262749445676275e-05, | |
| "loss": 1.3054769039154053, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.17401226643845386, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.2599778270509985e-05, | |
| "loss": 1.2325561046600342, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.17458279845956354, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.257206208425721e-05, | |
| "loss": 1.1963461637496948, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17515333048067322, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.254434589800444e-05, | |
| "loss": 1.2029732465744019, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.1757238625017829, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.251662971175167e-05, | |
| "loss": 1.289282202720642, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.17629439452289258, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.248891352549889e-05, | |
| "loss": 1.2570784091949463, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.1768649265440023, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.2461197339246126e-05, | |
| "loss": 1.1787132024765015, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17743545856511198, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.243348115299335e-05, | |
| "loss": 1.2079870700836182, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.17800599058622166, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.240576496674058e-05, | |
| "loss": 1.2776343822479248, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.17857652260733134, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.237804878048781e-05, | |
| "loss": 1.1856639385223389, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.17914705462844102, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.235033259423504e-05, | |
| "loss": 1.268944501876831, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1797175866495507, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.2322616407982266e-05, | |
| "loss": 1.2755537033081055, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.18028811867066039, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.2294900221729496e-05, | |
| "loss": 1.274179458618164, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.18085865069177007, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.226718403547672e-05, | |
| "loss": 1.2530457973480225, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.18142918271287975, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.2239467849223955e-05, | |
| "loss": 1.1844085454940796, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18199971473398943, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.221175166297118e-05, | |
| "loss": 1.3111554384231567, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.18257024675509914, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.21840354767184e-05, | |
| "loss": 1.2178188562393188, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18314077877620882, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.2156319290465636e-05, | |
| "loss": 1.2369928359985352, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1837113107973185, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.212860310421286e-05, | |
| "loss": 1.1851946115493774, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1842818428184282, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.210088691796009e-05, | |
| "loss": 1.2697205543518066, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.18485237483953787, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.207317073170732e-05, | |
| "loss": 1.2498860359191895, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.18542290686064755, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.204545454545455e-05, | |
| "loss": 1.2507086992263794, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.18599343888175723, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.201773835920178e-05, | |
| "loss": 1.2160149812698364, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.18656397090286692, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.1990022172949006e-05, | |
| "loss": 1.238983392715454, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.1871345029239766, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.196230598669623e-05, | |
| "loss": 1.2306344509124756, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.18770503494508628, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.1934589800443465e-05, | |
| "loss": 1.27529776096344, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.188275566966196, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.190687361419069e-05, | |
| "loss": 1.2787272930145264, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.18884609898730567, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.187915742793792e-05, | |
| "loss": 1.2454849481582642, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.18941663100841535, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.185144124168515e-05, | |
| "loss": 1.2060352563858032, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.18998716302952504, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.182372505543237e-05, | |
| "loss": 1.2341554164886475, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.19055769505063472, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.1796008869179606e-05, | |
| "loss": 1.2774791717529297, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1911282270717444, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.176829268292683e-05, | |
| "loss": 1.2547677755355835, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.19169875909285408, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.174057649667406e-05, | |
| "loss": 1.286057472229004, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19226929111396376, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.171286031042129e-05, | |
| "loss": 1.2891746759414673, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.19283982313507345, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.168514412416852e-05, | |
| "loss": 1.2376006841659546, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.19341035515618313, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.1657427937915746e-05, | |
| "loss": 1.2672202587127686, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.19398088717729284, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.1629711751662976e-05, | |
| "loss": 1.2037293910980225, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19455141919840252, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.16019955654102e-05, | |
| "loss": 1.218858003616333, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.1574279379157435e-05, | |
| "loss": 1.2183986902236938, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.19569248324062188, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.154656319290466e-05, | |
| "loss": 1.2573124170303345, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.19626301526173157, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.151884700665189e-05, | |
| "loss": 1.21070396900177, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.19683354728284125, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.1491130820399116e-05, | |
| "loss": 1.286003589630127, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.19740407930395093, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.146341463414634e-05, | |
| "loss": 1.2600152492523193, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.1979746113250606, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.1435698447893575e-05, | |
| "loss": 1.2338290214538574, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.1985451433461703, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.14079822616408e-05, | |
| "loss": 1.2722115516662598, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.19911567536727998, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.138026607538803e-05, | |
| "loss": 1.1988334655761719, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.19968620738838969, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.135254988913526e-05, | |
| "loss": 1.2339057922363281, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20025673940949937, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.1324833702882486e-05, | |
| "loss": 1.2363622188568115, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.20082727143060905, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.1297117516629716e-05, | |
| "loss": 1.2658472061157227, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.20139780345171873, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.1269401330376945e-05, | |
| "loss": 1.2181835174560547, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.20196833547282841, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.124168514412417e-05, | |
| "loss": 1.2710312604904175, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2025388674939381, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.12139689578714e-05, | |
| "loss": 1.176246166229248, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.20310939951504778, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.118625277161863e-05, | |
| "loss": 1.24937903881073, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20367993153615746, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.1158536585365856e-05, | |
| "loss": 1.2401498556137085, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.20425046355726714, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.1130820399113086e-05, | |
| "loss": 1.2015979290008545, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.20482099557837682, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.110310421286031e-05, | |
| "loss": 1.2495380640029907, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.20539152759948653, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.1075388026607545e-05, | |
| "loss": 1.2646973133087158, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20596205962059622, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.104767184035477e-05, | |
| "loss": 1.2007383108139038, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.2065325916417059, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.1019955654102e-05, | |
| "loss": 1.226219892501831, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.20710312366281558, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.0992239467849226e-05, | |
| "loss": 1.306444525718689, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.20767365568392526, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.0964523281596456e-05, | |
| "loss": 1.2141070365905762, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.20824418770503494, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.0936807095343685e-05, | |
| "loss": 1.2149772644042969, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.20881471972614463, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.0909090909090915e-05, | |
| "loss": 1.2671623229980469, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2093852517472543, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.088137472283814e-05, | |
| "loss": 1.2434954643249512, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.209955783768364, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.085365853658537e-05, | |
| "loss": 1.2326661348342896, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.0825942350332596e-05, | |
| "loss": 1.2969672679901123, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "eval_loss": 1.238897681236267, | |
| "eval_runtime": 80.0789, | |
| "eval_samples_per_second": 11.938, | |
| "eval_steps_per_second": 2.985, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.21109684781058338, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.0798226164079826e-05, | |
| "loss": 1.203234076499939, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21166737983169306, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.0770509977827055e-05, | |
| "loss": 1.2333259582519531, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.21223791185280275, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.074279379157428e-05, | |
| "loss": 1.2060984373092651, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.21280844387391243, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.0715077605321514e-05, | |
| "loss": 1.1909129619598389, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2133789758950221, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.068736141906874e-05, | |
| "loss": 1.2396963834762573, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2139495079161318, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.0659645232815966e-05, | |
| "loss": 1.1830250024795532, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21452003993724147, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.0631929046563196e-05, | |
| "loss": 1.207044005393982, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.21509057195835116, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.0604212860310425e-05, | |
| "loss": 1.2795757055282593, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.21566110397946084, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.057649667405765e-05, | |
| "loss": 1.2492969036102295, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.21623163600057052, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.0548780487804884e-05, | |
| "loss": 1.3094936609268188, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.21680216802168023, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.052106430155211e-05, | |
| "loss": 1.2260823249816895, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2173727000427899, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.0493348115299336e-05, | |
| "loss": 1.2405587434768677, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.2179432320638996, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.0465631929046566e-05, | |
| "loss": 1.1963216066360474, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.21851376408500928, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.043791574279379e-05, | |
| "loss": 1.2458081245422363, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.21908429610611896, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.0410199556541025e-05, | |
| "loss": 1.1974573135375977, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.21965482812722864, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.038248337028825e-05, | |
| "loss": 1.2237815856933594, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.22022536014833832, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.035476718403548e-05, | |
| "loss": 1.2369771003723145, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.220795892169448, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.0327050997782706e-05, | |
| "loss": 1.2545832395553589, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2213664241905577, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.0299334811529936e-05, | |
| "loss": 1.2126426696777344, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.22193695621166737, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.0271618625277165e-05, | |
| "loss": 1.2321901321411133, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.22250748823277705, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.0243902439024395e-05, | |
| "loss": 1.2315490245819092, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22307802025388676, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.021618625277162e-05, | |
| "loss": 1.1859689950942993, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.22364855227499644, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.018847006651885e-05, | |
| "loss": 1.2416760921478271, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.22421908429610612, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.0160753880266076e-05, | |
| "loss": 1.3080382347106934, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.2247896163172158, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.0133037694013306e-05, | |
| "loss": 1.2275526523590088, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.2253601483383255, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.0105321507760535e-05, | |
| "loss": 1.2734044790267944, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.22593068035943517, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.007760532150776e-05, | |
| "loss": 1.2480955123901367, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.22650121238054485, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.0049889135254994e-05, | |
| "loss": 1.2629410028457642, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.22707174440165453, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.002217294900222e-05, | |
| "loss": 1.190090537071228, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.22764227642276422, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.9994456762749446e-05, | |
| "loss": 1.2843146324157715, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.2282128084438739, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.9966740576496676e-05, | |
| "loss": 1.2836047410964966, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2287833404649836, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.9939024390243905e-05, | |
| "loss": 1.1873021125793457, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2293538724860933, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.9911308203991135e-05, | |
| "loss": 1.228004813194275, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.22992440450720297, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.9883592017738364e-05, | |
| "loss": 1.2318588495254517, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.23049493652831265, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.985587583148559e-05, | |
| "loss": 1.218421220779419, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23106546854942234, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.9828159645232816e-05, | |
| "loss": 1.3068960905075073, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.23163600057053202, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.9800443458980046e-05, | |
| "loss": 1.2189011573791504, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2322065325916417, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.9772727272727275e-05, | |
| "loss": 1.2019367218017578, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.23277706461275138, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.9745011086474505e-05, | |
| "loss": 1.2285387516021729, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23334759663386107, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.971729490022173e-05, | |
| "loss": 1.1963067054748535, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.23391812865497075, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.9689578713968964e-05, | |
| "loss": 1.3005050420761108, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.23448866067608046, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.9661862527716186e-05, | |
| "loss": 1.2429478168487549, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.23505919269719014, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.9634146341463416e-05, | |
| "loss": 1.2445229291915894, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.23562972471829982, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.9606430155210645e-05, | |
| "loss": 1.2569499015808105, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.2362002567394095, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.9578713968957875e-05, | |
| "loss": 1.232776165008545, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.23677078876051919, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.9550997782705104e-05, | |
| "loss": 1.2104380130767822, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.23734132078162887, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.952328159645233e-05, | |
| "loss": 1.2908308506011963, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.23791185280273855, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.9495565410199557e-05, | |
| "loss": 1.1678047180175781, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.23848238482384823, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.9467849223946786e-05, | |
| "loss": 1.310725212097168, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.2390529168449579, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.9440133037694015e-05, | |
| "loss": 1.2618491649627686, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.2396234488660676, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.9412416851441245e-05, | |
| "loss": 1.1795238256454468, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2401939808871773, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.9384700665188474e-05, | |
| "loss": 1.2187573909759521, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.240764512908287, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.93569844789357e-05, | |
| "loss": 1.2171461582183838, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24133504492939667, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.932926829268293e-05, | |
| "loss": 1.2295634746551514, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.24190557695050635, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.9301552106430156e-05, | |
| "loss": 1.2483271360397339, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24247610897161603, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.9273835920177385e-05, | |
| "loss": 1.1881691217422485, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.24304664099272572, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.9246119733924615e-05, | |
| "loss": 1.1997624635696411, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2436171730138354, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.9218403547671844e-05, | |
| "loss": 1.2510207891464233, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.24418770503494508, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.9190687361419074e-05, | |
| "loss": 1.2188156843185425, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.24475823705605476, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.9162971175166297e-05, | |
| "loss": 1.228477954864502, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.24532876907716444, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.9135254988913526e-05, | |
| "loss": 1.3039709329605103, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.24589930109827415, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.9107538802660755e-05, | |
| "loss": 1.2193942070007324, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.24646983311938384, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.9079822616407985e-05, | |
| "loss": 1.2380352020263672, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.24704036514049352, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.905210643015521e-05, | |
| "loss": 1.1670141220092773, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2476108971616032, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.9024390243902444e-05, | |
| "loss": 1.2406682968139648, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.24818142918271288, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.8996674057649667e-05, | |
| "loss": 1.200782060623169, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.24875196120382256, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.89689578713969e-05, | |
| "loss": 1.1442952156066895, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.24932249322493225, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.8941241685144125e-05, | |
| "loss": 1.15338134765625, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.24989302524604193, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.8913525498891355e-05, | |
| "loss": 1.1609077453613281, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.25046355726715164, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.8885809312638584e-05, | |
| "loss": 1.257835030555725, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2510340892882613, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.8858093126385814e-05, | |
| "loss": 1.2244375944137573, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.251604621309371, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.8830376940133037e-05, | |
| "loss": 1.2138961553573608, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.2521751533304807, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.8802660753880266e-05, | |
| "loss": 1.240128755569458, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.25274568535159037, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.8774944567627496e-05, | |
| "loss": 1.2070982456207275, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.25331621737270005, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.8747228381374725e-05, | |
| "loss": 1.2733830213546753, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.25388674939380973, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.8719512195121954e-05, | |
| "loss": 1.1820507049560547, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2544572814149194, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.869179600886918e-05, | |
| "loss": 1.196885108947754, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.2550278134360291, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.866407982261641e-05, | |
| "loss": 1.1905972957611084, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.2555983454571388, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.8636363636363636e-05, | |
| "loss": 1.2579684257507324, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.25616887747824846, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.8608647450110866e-05, | |
| "loss": 1.1727596521377563, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.25673940949935814, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8580931263858095e-05, | |
| "loss": 1.1504234075546265, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2573099415204678, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8553215077605324e-05, | |
| "loss": 1.1405715942382812, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.2578804735415775, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.8525498891352554e-05, | |
| "loss": 1.220837116241455, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2584510055626872, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.8497782705099777e-05, | |
| "loss": 1.1962711811065674, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.25902153758379687, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8470066518847006e-05, | |
| "loss": 1.1877164840698242, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.25959206960490655, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.8442350332594236e-05, | |
| "loss": 1.2504132986068726, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2601626016260163, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8414634146341465e-05, | |
| "loss": 1.1902315616607666, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.26073313364712597, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.8386917960088694e-05, | |
| "loss": 1.2856203317642212, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.26130366566823565, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.8359201773835924e-05, | |
| "loss": 1.2528060674667358, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.26187419768934533, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.833148558758315e-05, | |
| "loss": 1.1831871271133423, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.262444729710455, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.830376940133038e-05, | |
| "loss": 1.1781988143920898, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2630152617315647, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8276053215077606e-05, | |
| "loss": 1.193709373474121, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.2635857937526744, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.8248337028824835e-05, | |
| "loss": 1.1997225284576416, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.26415632577378406, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.8220620842572064e-05, | |
| "loss": 1.159136176109314, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.26472685779489374, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8192904656319294e-05, | |
| "loss": 1.242883324623108, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2652973898160034, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.8165188470066523e-05, | |
| "loss": 1.2907770872116089, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.2658679218371131, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.8137472283813746e-05, | |
| "loss": 1.2596560716629028, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2664384538582228, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.8109756097560976e-05, | |
| "loss": 1.2509888410568237, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.26700898587933247, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.8082039911308205e-05, | |
| "loss": 1.2029120922088623, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.26757951790044215, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.8054323725055435e-05, | |
| "loss": 1.210568904876709, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.26815004992155184, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.8026607538802664e-05, | |
| "loss": 1.1661216020584106, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2687205819426615, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.7998891352549893e-05, | |
| "loss": 1.229252576828003, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.2692911139637712, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.7971175166297116e-05, | |
| "loss": 1.209242343902588, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.2698616459848809, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.794345898004435e-05, | |
| "loss": 1.2709503173828125, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.27043217800599056, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.7915742793791575e-05, | |
| "loss": 1.2316001653671265, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.27100271002710025, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.7888026607538805e-05, | |
| "loss": 1.2138065099716187, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.27157324204820993, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.7860310421286034e-05, | |
| "loss": 1.1936984062194824, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.27214377406931967, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.783259423503326e-05, | |
| "loss": 1.2338573932647705, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.27271430609042935, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 1.2421263456344604, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.27328483811153903, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.7777161862527716e-05, | |
| "loss": 1.2414464950561523, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.2738553701326487, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.7749445676274945e-05, | |
| "loss": 1.2261340618133545, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2744259021537584, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.7721729490022175e-05, | |
| "loss": 1.208221435546875, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.2749964341748681, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.7694013303769404e-05, | |
| "loss": 1.2820276021957397, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.27556696619597776, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.7666297117516633e-05, | |
| "loss": 1.262161374092102, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.27613749821708744, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.763858093126386e-05, | |
| "loss": 1.2242916822433472, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2767080302381971, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.7610864745011086e-05, | |
| "loss": 1.1797833442687988, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2772785622593068, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.758314855875832e-05, | |
| "loss": 1.2725660800933838, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2778490942804165, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.7555432372505545e-05, | |
| "loss": 1.195313572883606, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.27841962630152617, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 3.7527716186252774e-05, | |
| "loss": 1.1661468744277954, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.27899015832263585, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 1.2072978019714355, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.27956069034374553, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.7472283813747226e-05, | |
| "loss": 1.203414797782898, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2801312223648552, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.7444567627494456e-05, | |
| "loss": 1.2426180839538574, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.2807017543859649, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.7416851441241685e-05, | |
| "loss": 1.232536792755127, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2812722864070746, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.7389135254988915e-05, | |
| "loss": 1.266850471496582, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.28184281842818426, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.7361419068736144e-05, | |
| "loss": 1.2585172653198242, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.28241335044929394, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.7333702882483374e-05, | |
| "loss": 1.2028322219848633, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2829838824704036, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.7305986696230596e-05, | |
| "loss": 1.2268320322036743, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.28355441449151336, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.727827050997783e-05, | |
| "loss": 1.2339527606964111, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.28412494651262304, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.7250554323725055e-05, | |
| "loss": 1.2072274684906006, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.2846954785337327, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.7222838137472285e-05, | |
| "loss": 1.235311508178711, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.2852660105548424, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.7195121951219514e-05, | |
| "loss": 1.2435599565505981, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2858365425759521, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.7167405764966744e-05, | |
| "loss": 1.2234078645706177, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.2864070745970618, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.713968957871397e-05, | |
| "loss": 1.2654131650924683, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.28697760661817145, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.7111973392461196e-05, | |
| "loss": 1.226614236831665, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.28754813863928114, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 3.7084257206208425e-05, | |
| "loss": 1.2334555387496948, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.2881186706603908, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.7056541019955655e-05, | |
| "loss": 1.2169506549835205, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2886892026815005, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.7028824833702884e-05, | |
| "loss": 1.2664920091629028, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2892597347026102, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 3.7001108647450114e-05, | |
| "loss": 1.2238786220550537, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.28983026672371986, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.697339246119734e-05, | |
| "loss": 1.179901361465454, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.29040079874482955, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.6945676274944566e-05, | |
| "loss": 1.2527443170547485, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.29097133076593923, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.69179600886918e-05, | |
| "loss": 1.2478464841842651, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2915418627870489, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.6890243902439025e-05, | |
| "loss": 1.2006577253341675, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2921123948081586, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.6862527716186254e-05, | |
| "loss": 1.283043384552002, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.6834811529933484e-05, | |
| "loss": 1.223816156387329, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.29325345885037796, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.6807095343680706e-05, | |
| "loss": 1.2357165813446045, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.29382399087148764, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.677937915742794e-05, | |
| "loss": 1.2494802474975586, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.2943945228925973, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.6751662971175165e-05, | |
| "loss": 1.2093576192855835, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.29496505491370706, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.6723946784922395e-05, | |
| "loss": 1.192871332168579, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.29553558693481674, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.6696230598669624e-05, | |
| "loss": 1.1430253982543945, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.2961061189559264, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.6668514412416854e-05, | |
| "loss": 1.2123762369155884, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.2966766509770361, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.664079822616408e-05, | |
| "loss": 1.2201260328292847, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2972471829981458, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.661308203991131e-05, | |
| "loss": 1.1812068223953247, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.29781771501925547, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.6585365853658535e-05, | |
| "loss": 1.2447538375854492, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.29838824704036515, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.655764966740577e-05, | |
| "loss": 1.2636268138885498, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.29895877906147483, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.6529933481152994e-05, | |
| "loss": 1.2320729494094849, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.2995293110825845, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 3.6502217294900224e-05, | |
| "loss": 1.2655476331710815, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3000998431036942, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.647450110864745e-05, | |
| "loss": 1.2109198570251465, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3006703751248039, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.6446784922394676e-05, | |
| "loss": 1.2380175590515137, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.30124090714591356, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.641906873614191e-05, | |
| "loss": 1.2023993730545044, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30181143916702324, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.6391352549889135e-05, | |
| "loss": 1.239518404006958, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3023819711881329, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 1.2405352592468262, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3029525032092426, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.6335920177383594e-05, | |
| "loss": 1.269554853439331, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.3035230352303523, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.630820399113082e-05, | |
| "loss": 1.256522297859192, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.30409356725146197, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.628048780487805e-05, | |
| "loss": 1.2245392799377441, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.30466409927257165, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.625277161862528e-05, | |
| "loss": 1.2256156206130981, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.30523463129368134, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.6225055432372505e-05, | |
| "loss": 1.2551851272583008, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.305805163314791, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.619733924611974e-05, | |
| "loss": 1.1682400703430176, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.30637569533590076, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 3.6169623059866964e-05, | |
| "loss": 1.2278921604156494, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.30694622735701044, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.6141906873614186e-05, | |
| "loss": 1.2167140245437622, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3075167593781201, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.611419068736142e-05, | |
| "loss": 1.2471628189086914, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.3080872913992298, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.6086474501108645e-05, | |
| "loss": 1.2300347089767456, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3086578234203395, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.605875831485588e-05, | |
| "loss": 1.1582870483398438, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.30922835544144917, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.6031042128603104e-05, | |
| "loss": 1.2606914043426514, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.30979888746255885, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.6003325942350334e-05, | |
| "loss": 1.2054803371429443, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.31036941948366853, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.597560975609756e-05, | |
| "loss": 1.1797690391540527, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3109399515047782, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.594789356984479e-05, | |
| "loss": 1.1780451536178589, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.3115104835258879, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.5920177383592015e-05, | |
| "loss": 1.2812529802322388, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.3120810155469976, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.589246119733925e-05, | |
| "loss": 1.3007402420043945, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.31265154756810726, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.5864745011086474e-05, | |
| "loss": 1.1987743377685547, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.31322207958921694, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.583702882483371e-05, | |
| "loss": 1.2217564582824707, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3137926116103266, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.580931263858093e-05, | |
| "loss": 1.211827039718628, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3143631436314363, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.5781596452328156e-05, | |
| "loss": 1.2164710760116577, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.314933675652546, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.575388026607539e-05, | |
| "loss": 1.2393014430999756, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.31550420767365567, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.5726164079822615e-05, | |
| "loss": 1.1759617328643799, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.31607473969476535, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.5698447893569844e-05, | |
| "loss": 1.2184211015701294, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.31664527171587503, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.5670731707317074e-05, | |
| "loss": 1.248216152191162, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3172158037369847, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.56430155210643e-05, | |
| "loss": 1.2055684328079224, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.3177863357580944, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.561529933481153e-05, | |
| "loss": 1.19916832447052, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.31835686777920413, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.558758314855876e-05, | |
| "loss": 1.151750087738037, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.3189273998003138, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.5559866962305985e-05, | |
| "loss": 1.254964828491211, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.3194979318214235, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.553215077605322e-05, | |
| "loss": 1.251706600189209, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3200684638425332, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 3.5504434589800444e-05, | |
| "loss": 1.1918596029281616, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.32063899586364286, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.547671840354767e-05, | |
| "loss": 1.2538777589797974, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.32120952788475254, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.54490022172949e-05, | |
| "loss": 1.227068543434143, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.3217800599058622, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.5421286031042125e-05, | |
| "loss": 1.1811244487762451, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3223505919269719, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.539356984478936e-05, | |
| "loss": 1.162517786026001, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3229211239480816, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.5365853658536584e-05, | |
| "loss": 1.1981290578842163, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.32349165596919127, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.5338137472283814e-05, | |
| "loss": 1.1930001974105835, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.32406218799030095, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.531042128603104e-05, | |
| "loss": 1.2397738695144653, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.32463272001141064, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.528270509977827e-05, | |
| "loss": 1.273198127746582, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3252032520325203, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.52549889135255e-05, | |
| "loss": 1.1873741149902344, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.32577378405363, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.522727272727273e-05, | |
| "loss": 1.2132840156555176, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.3263443160747397, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.5199556541019954e-05, | |
| "loss": 1.1881725788116455, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.32691484809584936, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.517184035476719e-05, | |
| "loss": 1.2296414375305176, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.32748538011695905, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.514412416851441e-05, | |
| "loss": 1.2116769552230835, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.32805591213806873, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.511640798226164e-05, | |
| "loss": 1.194542646408081, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3286264441591784, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.508869179600887e-05, | |
| "loss": 1.2189078330993652, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3291969761802881, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 3.5060975609756095e-05, | |
| "loss": 1.1380560398101807, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.32976750820139783, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.503325942350333e-05, | |
| "loss": 1.1995842456817627, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3303380402225075, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.5005543237250554e-05, | |
| "loss": 1.254304051399231, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.3309085722436172, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.497782705099778e-05, | |
| "loss": 1.201616883277893, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3314791042647269, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.495011086474501e-05, | |
| "loss": 1.1772336959838867, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.33204963628583656, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.492239467849224e-05, | |
| "loss": 1.1937668323516846, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.33262016830694624, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.489467849223947e-05, | |
| "loss": 1.186886191368103, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.3331907003280559, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.48669623059867e-05, | |
| "loss": 1.2187786102294922, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3337612323491656, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.4839246119733924e-05, | |
| "loss": 1.1842401027679443, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3343317643702753, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.481152993348116e-05, | |
| "loss": 1.1953545808792114, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.33490229639138497, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.478381374722838e-05, | |
| "loss": 1.1909786462783813, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.33547282841249465, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 3.475609756097561e-05, | |
| "loss": 1.201062798500061, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.33604336043360433, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.472838137472284e-05, | |
| "loss": 1.2262158393859863, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.336613892454714, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.4700665188470064e-05, | |
| "loss": 1.255564570426941, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3371844244758237, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.46729490022173e-05, | |
| "loss": 1.1916460990905762, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.3377549564969334, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.464523281596452e-05, | |
| "loss": 1.1728994846343994, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.33832548851804306, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.461751662971175e-05, | |
| "loss": 1.2145668268203735, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.33889602053915274, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.458980044345898e-05, | |
| "loss": 1.2174324989318848, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.3394665525602624, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.456208425720621e-05, | |
| "loss": 1.1968474388122559, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3400370845813721, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.453436807095344e-05, | |
| "loss": 1.1793067455291748, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.3406076166024818, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.450665188470067e-05, | |
| "loss": 1.2109010219573975, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.3411781486235915, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.447893569844789e-05, | |
| "loss": 1.2412149906158447, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3417486806447012, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.445121951219512e-05, | |
| "loss": 1.1886482238769531, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.3423192126658109, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.442350332594235e-05, | |
| "loss": 1.1711212396621704, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3428897446869206, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.4395787139689575e-05, | |
| "loss": 1.1890015602111816, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.34346027670803025, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.436807095343681e-05, | |
| "loss": 1.1860285997390747, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.34403080872913994, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.4340354767184034e-05, | |
| "loss": 1.2001878023147583, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.3446013407502496, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.431263858093127e-05, | |
| "loss": 1.1815104484558105, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3451718727713593, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.428492239467849e-05, | |
| "loss": 1.1652307510375977, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.345742404792469, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.425720620842572e-05, | |
| "loss": 1.1888481378555298, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.34631293681357866, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.422949002217295e-05, | |
| "loss": 1.2198981046676636, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.34688346883468835, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.420177383592018e-05, | |
| "loss": 1.2088303565979004, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.34745400085579803, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.4174057649667404e-05, | |
| "loss": 1.2638548612594604, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.3480245328769077, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.414634146341464e-05, | |
| "loss": 1.2314380407333374, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3485950648980174, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.411862527716186e-05, | |
| "loss": 1.1847796440124512, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.3491655969191271, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 1.1967138051986694, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.34973612894023676, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.406319290465632e-05, | |
| "loss": 1.1948060989379883, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.35030666096134644, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.4035476718403544e-05, | |
| "loss": 1.248701810836792, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.400776053215078e-05, | |
| "loss": 1.2076679468154907, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.3514477250035658, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.3980044345898e-05, | |
| "loss": 1.20987868309021, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.3520182570246755, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.395232815964523e-05, | |
| "loss": 1.1548939943313599, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.35258878904578517, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.392461197339246e-05, | |
| "loss": 1.2160520553588867, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.3531593210668949, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.389689578713969e-05, | |
| "loss": 1.2215287685394287, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.3537298530880046, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.386917960088692e-05, | |
| "loss": 1.2433137893676758, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35430038510911427, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.384146341463415e-05, | |
| "loss": 1.2307751178741455, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.35487091713022395, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.381374722838137e-05, | |
| "loss": 1.1872355937957764, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.35544144915133363, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.378603104212861e-05, | |
| "loss": 1.200265645980835, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.3560119811724433, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.375831485587583e-05, | |
| "loss": 1.3020355701446533, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.356582513193553, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.373059866962306e-05, | |
| "loss": 1.1976819038391113, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3571530452146627, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.370288248337029e-05, | |
| "loss": 1.1945629119873047, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.35772357723577236, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.3675166297117514e-05, | |
| "loss": 1.2189013957977295, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.35829410925688204, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.364745011086475e-05, | |
| "loss": 1.2139533758163452, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.3588646412779917, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 3.361973392461197e-05, | |
| "loss": 1.1832334995269775, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.3594351732991014, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.35920177383592e-05, | |
| "loss": 1.1789777278900146, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3600057053202111, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.356430155210643e-05, | |
| "loss": 1.1401221752166748, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.36057623734132077, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.353658536585366e-05, | |
| "loss": 1.2332661151885986, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.36114676936243045, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.350886917960089e-05, | |
| "loss": 1.1867516040802002, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.36171730138354014, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.348115299334812e-05, | |
| "loss": 1.2486271858215332, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.3622878334046498, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.345343680709534e-05, | |
| "loss": 1.1644282341003418, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.3628583654257595, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.342572062084257e-05, | |
| "loss": 1.1926931142807007, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3634288974468692, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.33980044345898e-05, | |
| "loss": 1.2337167263031006, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.36399942946797886, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.337028824833703e-05, | |
| "loss": 1.2726258039474487, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.3645699614890886, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.334257206208426e-05, | |
| "loss": 1.229848861694336, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.3651404935101983, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 3.3314855875831483e-05, | |
| "loss": 1.1424199342727661, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.36571102553130796, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.328713968957872e-05, | |
| "loss": 1.2158143520355225, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.36628155755241765, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.325942350332594e-05, | |
| "loss": 1.213433027267456, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.36685208957352733, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.323170731707317e-05, | |
| "loss": 1.1552369594573975, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.367422621594637, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.32039911308204e-05, | |
| "loss": 1.1470410823822021, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.3679931536157467, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.317627494456763e-05, | |
| "loss": 1.227137804031372, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.3685636856368564, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.314855875831486e-05, | |
| "loss": 1.1736478805541992, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.36913421765796606, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.312084257206209e-05, | |
| "loss": 1.2192144393920898, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.36970474967907574, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.309312638580931e-05, | |
| "loss": 1.1780518293380737, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.3702752817001854, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.306541019955654e-05, | |
| "loss": 1.2205878496170044, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.3708458137212951, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.303769401330377e-05, | |
| "loss": 1.2226086854934692, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3714163457424048, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.3009977827051e-05, | |
| "loss": 1.1905219554901123, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.37198687776351447, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.298226164079823e-05, | |
| "loss": 1.1790423393249512, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.37255740978462415, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.295454545454545e-05, | |
| "loss": 1.1909444332122803, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.37312794180573383, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 1.2416154146194458, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.3736984738268435, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.289911308203991e-05, | |
| "loss": 1.2464513778686523, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3742690058479532, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.287139689578714e-05, | |
| "loss": 1.239952802658081, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.3748395378690629, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.284368070953437e-05, | |
| "loss": 1.2005925178527832, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.37541006989017256, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.28159645232816e-05, | |
| "loss": 1.2646636962890625, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.3759806019112823, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.278824833702882e-05, | |
| "loss": 1.203331470489502, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.376551133932392, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.276053215077605e-05, | |
| "loss": 1.1849339008331299, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.37712166595350166, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.273281596452328e-05, | |
| "loss": 1.2010148763656616, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.37769219797461134, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.270509977827051e-05, | |
| "loss": 1.2384660243988037, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.378262729995721, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.267738359201774e-05, | |
| "loss": 1.2244110107421875, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.3788332620168307, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.2649667405764963e-05, | |
| "loss": 1.2706053256988525, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.3794037940379404, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.26219512195122e-05, | |
| "loss": 1.2451549768447876, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.37997432605905007, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.259423503325942e-05, | |
| "loss": 1.2653909921646118, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.38054485808015975, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.256651884700665e-05, | |
| "loss": 1.2227097749710083, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.38111539010126944, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.253880266075388e-05, | |
| "loss": 1.2289211750030518, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.3816859221223791, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.251108647450111e-05, | |
| "loss": 1.2068843841552734, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.3822564541434888, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 3.248337028824834e-05, | |
| "loss": 1.166361689567566, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3828269861645985, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.245565410199557e-05, | |
| "loss": 1.220710277557373, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.38339751818570816, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.242793791574279e-05, | |
| "loss": 1.1663460731506348, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.38396805020681785, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.240022172949002e-05, | |
| "loss": 1.1803617477416992, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.38453858222792753, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.237250554323725e-05, | |
| "loss": 1.1342628002166748, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3851091142490372, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.234478935698448e-05, | |
| "loss": 1.2325470447540283, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3856796462701469, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.231707317073171e-05, | |
| "loss": 1.1941877603530884, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.3862501782912566, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.228935698447893e-05, | |
| "loss": 1.1775301694869995, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.38682071031236626, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.226164079822617e-05, | |
| "loss": 1.248462438583374, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.387391242333476, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.223392461197339e-05, | |
| "loss": 1.2440953254699707, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.3879617743545857, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.220620842572062e-05, | |
| "loss": 1.1706881523132324, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.38853230637569536, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.217849223946785e-05, | |
| "loss": 1.227694034576416, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.38910283839680504, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.215077605321508e-05, | |
| "loss": 1.2553303241729736, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.3896733704179147, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.212305986696231e-05, | |
| "loss": 1.1399942636489868, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.209534368070954e-05, | |
| "loss": 1.2082273960113525, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.3908144344601341, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.206762749445676e-05, | |
| "loss": 1.2403631210327148, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.39138496648124377, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.203991130820399e-05, | |
| "loss": 1.1668493747711182, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.39195549850235345, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.201219512195122e-05, | |
| "loss": 1.1642647981643677, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.39252603052346313, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 3.198447893569845e-05, | |
| "loss": 1.169840693473816, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3930965625445728, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.195676274944568e-05, | |
| "loss": 1.1918284893035889, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.3936670945656825, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.19290465631929e-05, | |
| "loss": 1.2486236095428467, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3942376265867922, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.190133037694014e-05, | |
| "loss": 1.212164044380188, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.39480815860790186, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.187361419068736e-05, | |
| "loss": 1.2184773683547974, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.39537869062901154, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.184589800443459e-05, | |
| "loss": 1.2665815353393555, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.3959492226501212, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.181818181818182e-05, | |
| "loss": 1.1956299543380737, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3965197546712309, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.179046563192905e-05, | |
| "loss": 1.1868462562561035, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3970902866923406, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.176274944567628e-05, | |
| "loss": 1.2558304071426392, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.39766081871345027, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.17350332594235e-05, | |
| "loss": 1.2197167873382568, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.39823135073455995, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.170731707317073e-05, | |
| "loss": 1.2546510696411133, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.39880188275566963, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.167960088691796e-05, | |
| "loss": 1.2634811401367188, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.39937241477677937, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 3.165188470066519e-05, | |
| "loss": 1.1409438848495483, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.39994294679788905, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.162416851441242e-05, | |
| "loss": 1.167540431022644, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.40051347881899874, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.159645232815965e-05, | |
| "loss": 1.2233819961547852, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.4010840108401084, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 3.156873614190687e-05, | |
| "loss": 1.2183570861816406, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.4016545428612181, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.154101995565411e-05, | |
| "loss": 1.2039064168930054, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4022250748823278, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.151330376940133e-05, | |
| "loss": 1.2583222389221191, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.40279560690343746, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.148558758314856e-05, | |
| "loss": 1.2133885622024536, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.40336613892454715, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.145787139689579e-05, | |
| "loss": 1.2497689723968506, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.40393667094565683, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.143015521064302e-05, | |
| "loss": 1.1765098571777344, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4045072029667665, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.140243902439025e-05, | |
| "loss": 1.1668319702148438, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.4050777349878762, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.137472283813747e-05, | |
| "loss": 1.1545255184173584, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4056482670089859, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.13470066518847e-05, | |
| "loss": 1.2044893503189087, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.40621879903009556, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.131929046563193e-05, | |
| "loss": 1.2121517658233643, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.40678933105120524, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.129157427937916e-05, | |
| "loss": 1.276052713394165, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4073598630723149, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.126385809312638e-05, | |
| "loss": 1.1800833940505981, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4079303950934246, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.123614190687362e-05, | |
| "loss": 1.1513339281082153, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4085009271145343, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.120842572062084e-05, | |
| "loss": 1.2298616170883179, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.40907145913564397, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.118070953436808e-05, | |
| "loss": 1.1709084510803223, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.40964199115675365, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.11529933481153e-05, | |
| "loss": 1.1676058769226074, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.41021252317786333, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.112527716186253e-05, | |
| "loss": 1.2025721073150635, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.41078305519897307, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.109756097560976e-05, | |
| "loss": 1.2218658924102783, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.41135358722008275, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.106984478935698e-05, | |
| "loss": 1.1744896173477173, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.41192411924119243, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 3.104212860310421e-05, | |
| "loss": 1.1989339590072632, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4124946512623021, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.101441241685144e-05, | |
| "loss": 1.2189137935638428, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.4130651832834118, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.098669623059867e-05, | |
| "loss": 1.2155076265335083, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.4136357153045215, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.09589800443459e-05, | |
| "loss": 1.1465799808502197, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.41420624732563116, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.093126385809313e-05, | |
| "loss": 1.2145007848739624, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.41477677934674084, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.090354767184035e-05, | |
| "loss": 1.2057294845581055, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.4153473113678505, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.087583148558759e-05, | |
| "loss": 1.2041752338409424, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4159178433889602, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.084811529933481e-05, | |
| "loss": 1.1989641189575195, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.4164883754100699, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 3.082039911308204e-05, | |
| "loss": 1.188431739807129, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.41705890743117957, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.079268292682927e-05, | |
| "loss": 1.1488507986068726, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.41762943945228925, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 3.07649667405765e-05, | |
| "loss": 1.2174850702285767, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.41819997147339893, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.073725055432373e-05, | |
| "loss": 1.2141880989074707, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.4187705034945086, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.070953436807095e-05, | |
| "loss": 1.2875535488128662, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4193410355156183, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.068181818181818e-05, | |
| "loss": 1.168579339981079, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.419911567536728, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.065410199556541e-05, | |
| "loss": 1.1168636083602905, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.42048209955783766, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.062638580931264e-05, | |
| "loss": 1.1600708961486816, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.059866962305987e-05, | |
| "loss": 1.1832588911056519, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "eval_loss": 1.1941628456115723, | |
| "eval_runtime": 80.1253, | |
| "eval_samples_per_second": 11.931, | |
| "eval_steps_per_second": 2.983, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.421623163600057, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 3.05709534368071e-05, | |
| "loss": 1.193061351776123, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.42219369562116676, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.054323725055432e-05, | |
| "loss": 1.1793735027313232, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.42276422764227645, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.0515521064301554e-05, | |
| "loss": 1.1607141494750977, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.42333475966338613, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 1.1790132522583008, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.4239052916844958, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.0460088691796013e-05, | |
| "loss": 1.155259132385254, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.4244758237056055, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.043237250554324e-05, | |
| "loss": 1.1134623289108276, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4250463557267152, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.0404656319290465e-05, | |
| "loss": 1.198337435722351, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.42561688774782486, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.0376940133037695e-05, | |
| "loss": 1.1744345426559448, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.42618741976893454, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.034922394678492e-05, | |
| "loss": 1.1646068096160889, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.4267579517900442, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.0321507760532154e-05, | |
| "loss": 1.1827648878097534, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.4273284838111539, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 3.029379157427938e-05, | |
| "loss": 1.1942888498306274, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.4278990158322636, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.026607538802661e-05, | |
| "loss": 1.1896655559539795, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.42846954785337327, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.0238359201773835e-05, | |
| "loss": 1.197471022605896, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.42904007987448295, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 3.021064301552107e-05, | |
| "loss": 1.1281297206878662, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.42961061189559263, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.0182926829268294e-05, | |
| "loss": 1.1960434913635254, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.4301811439167023, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.0155210643015524e-05, | |
| "loss": 1.1772822141647339, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.430751675937812, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.012749445676275e-05, | |
| "loss": 1.2077326774597168, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.4313222079589217, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.0099778270509983e-05, | |
| "loss": 1.216168999671936, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.43189273998003136, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.007206208425721e-05, | |
| "loss": 1.1528898477554321, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.43246327200114104, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.0044345898004435e-05, | |
| "loss": 1.1724753379821777, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.4330338040222507, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 3.0016629711751664e-05, | |
| "loss": 1.1700730323791504, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.43360433604336046, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.998891352549889e-05, | |
| "loss": 1.1328129768371582, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.43417486806447014, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.9961197339246123e-05, | |
| "loss": 1.191325306892395, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.4347454000855798, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.993348115299335e-05, | |
| "loss": 1.160369873046875, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.4353159321066895, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.990576496674058e-05, | |
| "loss": 1.196010947227478, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.4358864641277992, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.9878048780487805e-05, | |
| "loss": 1.1497125625610352, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.43645699614890887, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.9850332594235038e-05, | |
| "loss": 1.152623176574707, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.43702752817001855, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.9822616407982264e-05, | |
| "loss": 1.1713566780090332, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.43759806019112824, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 2.9794900221729493e-05, | |
| "loss": 1.263333797454834, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.4381685922122379, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.976718403547672e-05, | |
| "loss": 1.144421935081482, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.4387391242333476, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.9739467849223952e-05, | |
| "loss": 1.2290055751800537, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.4393096562544573, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.971175166297118e-05, | |
| "loss": 1.1050488948822021, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.43988018827556696, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.96840354767184e-05, | |
| "loss": 1.2218358516693115, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.44045072029667665, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.9656319290465634e-05, | |
| "loss": 1.1308021545410156, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.4410212523177863, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.962860310421286e-05, | |
| "loss": 1.2299238443374634, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.441591784338896, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.960088691796009e-05, | |
| "loss": 1.1389673948287964, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.4421623163600057, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.9573170731707316e-05, | |
| "loss": 1.2660845518112183, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4427328483811154, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.954545454545455e-05, | |
| "loss": 1.099113941192627, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.44330338040222506, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.9517738359201774e-05, | |
| "loss": 1.2134381532669067, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.44387391242333474, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.9490022172949004e-05, | |
| "loss": 1.1754953861236572, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.946230598669623e-05, | |
| "loss": 1.1886742115020752, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.4450149764655541, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.9434589800443463e-05, | |
| "loss": 1.192276954650879, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.44558550848666384, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.940687361419069e-05, | |
| "loss": 1.2006890773773193, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.4461560405077735, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.9379157427937915e-05, | |
| "loss": 1.1819924116134644, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.4467265725288832, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.9351441241685145e-05, | |
| "loss": 1.1743961572647095, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.4472971045499929, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.932372505543237e-05, | |
| "loss": 1.2021007537841797, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.44786763657110257, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.9296008869179603e-05, | |
| "loss": 1.2032489776611328, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.44843816859221225, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.926829268292683e-05, | |
| "loss": 1.1912821531295776, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.44900870061332193, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.924057649667406e-05, | |
| "loss": 1.184190034866333, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.4495792326344316, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.9212860310421285e-05, | |
| "loss": 1.272563099861145, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.4501497646555413, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.9185144124168518e-05, | |
| "loss": 1.2212070226669312, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.450720296676651, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.9157427937915744e-05, | |
| "loss": 1.1937004327774048, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.45129082869776066, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.9129711751662973e-05, | |
| "loss": 1.1712844371795654, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.45186136071887034, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.91019955654102e-05, | |
| "loss": 1.1701891422271729, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.45243189273998, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.9074279379157432e-05, | |
| "loss": 1.2575602531433105, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.4530024247610897, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.904656319290466e-05, | |
| "loss": 1.1968649625778198, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.4535729567821994, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.9018847006651885e-05, | |
| "loss": 1.205810546875, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.45414348880330907, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8991130820399114e-05, | |
| "loss": 1.1697238683700562, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.45471402082441875, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.896341463414634e-05, | |
| "loss": 1.27318274974823, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.45528455284552843, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.8935698447893573e-05, | |
| "loss": 1.2104084491729736, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4558550848666381, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.89079822616408e-05, | |
| "loss": 1.2579401731491089, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.4564256168877478, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.888026607538803e-05, | |
| "loss": 1.1750009059906006, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.45699614890885754, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.8852549889135255e-05, | |
| "loss": 1.1911466121673584, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.4575666809299672, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8824833702882487e-05, | |
| "loss": 1.0935354232788086, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.4581372129510769, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.8797117516629713e-05, | |
| "loss": 1.1621028184890747, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.4587077449721866, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.8769401330376943e-05, | |
| "loss": 1.1952382326126099, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.45927827699329626, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.874168514412417e-05, | |
| "loss": 1.2074031829833984, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.45984880901440595, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.8713968957871395e-05, | |
| "loss": 1.191246509552002, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.46041934103551563, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.8686252771618628e-05, | |
| "loss": 1.2298707962036133, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.4609898730566253, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.8658536585365854e-05, | |
| "loss": 1.2514528036117554, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.461560405077735, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.8630820399113084e-05, | |
| "loss": 1.2710151672363281, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.4621309370988447, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.860310421286031e-05, | |
| "loss": 1.1337497234344482, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.46270146911995436, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8575388026607542e-05, | |
| "loss": 1.1267883777618408, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.46327200114106404, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.854767184035477e-05, | |
| "loss": 1.1755304336547852, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.4638425331621737, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8519955654101998e-05, | |
| "loss": 1.1366599798202515, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.4644130651832834, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.8492239467849224e-05, | |
| "loss": 1.2038339376449585, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.4649835972043931, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8464523281596457e-05, | |
| "loss": 1.2154085636138916, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.46555412922550277, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.8436807095343683e-05, | |
| "loss": 1.1818276643753052, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.46612466124661245, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.8409090909090912e-05, | |
| "loss": 1.2436468601226807, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.46669519326772213, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.838137472283814e-05, | |
| "loss": 1.1363047361373901, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.4672657252888318, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.8353658536585365e-05, | |
| "loss": 1.1960558891296387, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.4678362573099415, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.8325942350332597e-05, | |
| "loss": 1.171709418296814, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.46840678933105123, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.8298226164079824e-05, | |
| "loss": 1.1537501811981201, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.4689773213521609, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.8270509977827053e-05, | |
| "loss": 1.1839423179626465, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.4695478533732706, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.824279379157428e-05, | |
| "loss": 1.1610156297683716, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.4701183853943803, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.8215077605321512e-05, | |
| "loss": 1.1708459854125977, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.47068891741548996, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.8187361419068735e-05, | |
| "loss": 1.251354455947876, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.47125944943659964, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.8159645232815967e-05, | |
| "loss": 1.2049927711486816, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.4718299814577093, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.8131929046563194e-05, | |
| "loss": 1.230988621711731, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.472400513478819, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.8104212860310426e-05, | |
| "loss": 1.1739616394042969, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4729710454999287, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.807649667405765e-05, | |
| "loss": 1.1999741792678833, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.47354157752103837, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.8048780487804882e-05, | |
| "loss": 1.2062275409698486, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.47411210954214805, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.8021064301552108e-05, | |
| "loss": 1.1344287395477295, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.47468264156325773, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.7993348115299334e-05, | |
| "loss": 1.2056477069854736, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.4752531735843674, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.7965631929046564e-05, | |
| "loss": 1.1727713346481323, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.4758237056054771, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.793791574279379e-05, | |
| "loss": 1.2081948518753052, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4763942376265868, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.7910199556541023e-05, | |
| "loss": 1.255791187286377, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.47696476964769646, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.788248337028825e-05, | |
| "loss": 1.1889286041259766, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.47753530166880614, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.7854767184035478e-05, | |
| "loss": 1.241337776184082, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.4781058336899158, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.7827050997782704e-05, | |
| "loss": 1.2144089937210083, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.4786763657110255, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.7799334811529937e-05, | |
| "loss": 1.1527715921401978, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.4792468977321352, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.7771618625277163e-05, | |
| "loss": 1.181959629058838, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.47981742975324493, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.7743902439024393e-05, | |
| "loss": 1.1999069452285767, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.4803879617743546, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.771618625277162e-05, | |
| "loss": 1.2098867893218994, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.4809584937954643, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.7688470066518845e-05, | |
| "loss": 1.1860891580581665, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.481529025816574, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.7660753880266078e-05, | |
| "loss": 1.1108654737472534, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.48209955783768366, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.7633037694013304e-05, | |
| "loss": 1.2157371044158936, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.48267008985879334, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.7605321507760533e-05, | |
| "loss": 1.2216970920562744, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.483240621879903, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.757760532150776e-05, | |
| "loss": 1.1434253454208374, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.4838111539010127, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.7549889135254992e-05, | |
| "loss": 1.1241540908813477, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.4843816859221224, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.7522172949002218e-05, | |
| "loss": 1.186653971672058, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.48495221794323207, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.7494456762749448e-05, | |
| "loss": 1.2525804042816162, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.48552274996434175, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.7466740576496674e-05, | |
| "loss": 1.1987820863723755, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.48609328198545143, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.7439024390243906e-05, | |
| "loss": 1.2217812538146973, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.4866638140065611, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.7411308203991133e-05, | |
| "loss": 1.201343297958374, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.4872343460276708, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.7383592017738362e-05, | |
| "loss": 1.1668754816055298, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.7355875831485588e-05, | |
| "loss": 1.1264851093292236, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.48837541006989016, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.7328159645232814e-05, | |
| "loss": 1.202168345451355, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.48894594209099984, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.7300443458980047e-05, | |
| "loss": 1.2231934070587158, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.4895164741121095, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 1.1511149406433105, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.4900870061332192, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.7245011086474503e-05, | |
| "loss": 1.1898903846740723, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.4906575381543289, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.721729490022173e-05, | |
| "loss": 1.1848946809768677, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.49122807017543857, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.718957871396896e-05, | |
| "loss": 1.1898174285888672, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.4917986021965483, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.7161862527716188e-05, | |
| "loss": 1.2187345027923584, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.492369134217658, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.7134146341463417e-05, | |
| "loss": 1.1753157377243042, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.49293966623876767, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.7106430155210643e-05, | |
| "loss": 1.2812843322753906, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.49351019825987735, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.7078713968957876e-05, | |
| "loss": 1.2476832866668701, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.49408073028098703, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.7050997782705102e-05, | |
| "loss": 1.1763570308685303, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.4946512623020967, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.7023281596452328e-05, | |
| "loss": 1.159504771232605, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.4952217943232064, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.6995565410199558e-05, | |
| "loss": 1.2344439029693604, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4957923263443161, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.6967849223946784e-05, | |
| "loss": 1.2668113708496094, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.49636285836542576, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.6940133037694017e-05, | |
| "loss": 1.2388842105865479, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49693339038653545, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.6912416851441243e-05, | |
| "loss": 1.197232723236084, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.4975039224076451, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.6884700665188472e-05, | |
| "loss": 1.1960959434509277, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.4980744544287548, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.6856984478935698e-05, | |
| "loss": 1.222888469696045, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.4986449864498645, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.682926829268293e-05, | |
| "loss": 1.239640474319458, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.4992155184709742, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.6801552106430157e-05, | |
| "loss": 1.1557681560516357, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.49978605049208386, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.6773835920177387e-05, | |
| "loss": 1.1697707176208496, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5003565825131936, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.6746119733924613e-05, | |
| "loss": 1.2065680027008057, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5009271145343033, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.6718403547671845e-05, | |
| "loss": 1.2194795608520508, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.501497646555413, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.669068736141907e-05, | |
| "loss": 1.1722071170806885, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5020681785765226, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.6662971175166294e-05, | |
| "loss": 1.1860017776489258, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5026387105976323, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.6635254988913527e-05, | |
| "loss": 1.173937439918518, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.503209242618742, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.6607538802660753e-05, | |
| "loss": 1.1348332166671753, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5037797746398517, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.6579822616407986e-05, | |
| "loss": 1.205221176147461, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5043503066609614, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.655210643015521e-05, | |
| "loss": 1.1510381698608398, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.504920838682071, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.652439024390244e-05, | |
| "loss": 1.194382905960083, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5054913707031807, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.6496674057649668e-05, | |
| "loss": 1.2697436809539795, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5060619027242904, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.64689578713969e-05, | |
| "loss": 1.1560388803482056, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.5066324347454001, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.6441241685144123e-05, | |
| "loss": 1.2498875856399536, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5072029667665098, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.6413525498891356e-05, | |
| "loss": 1.1706441640853882, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.5077734987876195, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.6385809312638582e-05, | |
| "loss": 1.1960177421569824, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5083440308087291, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.6358093126385815e-05, | |
| "loss": 1.1732114553451538, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.5089145628298388, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.6330376940133038e-05, | |
| "loss": 1.1812173128128052, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.5094850948509485, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.6302660753880264e-05, | |
| "loss": 1.243033528327942, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.5100556268720582, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.6274944567627497e-05, | |
| "loss": 1.1132174730300903, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5106261588931679, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.6247228381374723e-05, | |
| "loss": 1.129286289215088, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5111966909142776, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.6219512195121952e-05, | |
| "loss": 1.1969499588012695, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5117672229353872, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.6191796008869178e-05, | |
| "loss": 1.1295521259307861, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.5123377549564969, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.616407982261641e-05, | |
| "loss": 1.1657040119171143, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5129082869776066, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.6136363636363637e-05, | |
| "loss": 1.182844638824463, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.5134788189987163, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.6108647450110867e-05, | |
| "loss": 1.11708664894104, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.514049351019826, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.6080931263858093e-05, | |
| "loss": 1.1282655000686646, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.5146198830409356, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.6053215077605326e-05, | |
| "loss": 1.1830154657363892, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5151904150620453, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.602549889135255e-05, | |
| "loss": 1.1873393058776855, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.515760947083155, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.5997782705099778e-05, | |
| "loss": 1.1280049085617065, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.5163314791042647, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.5970066518847007e-05, | |
| "loss": 1.1866214275360107, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5169020111253744, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.5942350332594233e-05, | |
| "loss": 1.132464051246643, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.517472543146484, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.5914634146341466e-05, | |
| "loss": 1.2057054042816162, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.5180430751675937, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.5886917960088692e-05, | |
| "loss": 1.1725504398345947, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5186136071887034, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.585920177383592e-05, | |
| "loss": 1.2105215787887573, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.5191841392098131, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.5831485587583148e-05, | |
| "loss": 1.126555323600769, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5197546712309228, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.580376940133038e-05, | |
| "loss": 1.117220401763916, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.5203252032520326, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.5776053215077607e-05, | |
| "loss": 1.1578710079193115, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5208957352731423, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.5748337028824836e-05, | |
| "loss": 1.1631922721862793, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.5214662672942519, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.5720620842572062e-05, | |
| "loss": 1.2013893127441406, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5220367993153616, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.5692904656319295e-05, | |
| "loss": 1.159932017326355, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5226073313364713, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.566518847006652e-05, | |
| "loss": 1.1213711500167847, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.523177863357581, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.5637472283813747e-05, | |
| "loss": 1.2035624980926514, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.5237483953786907, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 1.100569725036621, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.5243189273998003, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.5582039911308203e-05, | |
| "loss": 1.1802055835723877, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.52488945942091, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.5554323725055436e-05, | |
| "loss": 1.2129563093185425, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5254599914420197, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.552660753880266e-05, | |
| "loss": 1.2040753364562988, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.5260305234631294, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.549889135254989e-05, | |
| "loss": 1.1266067028045654, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5266010554842391, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.5471175166297117e-05, | |
| "loss": 1.1967592239379883, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.5271715875053488, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.544345898004435e-05, | |
| "loss": 1.1658574342727661, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.5277421195264584, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.5415742793791576e-05, | |
| "loss": 1.1974247694015503, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5283126515475681, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.5388026607538806e-05, | |
| "loss": 1.175785779953003, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.5288831835686778, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.5360310421286032e-05, | |
| "loss": 1.2295399904251099, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.5294537155897875, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.5332594235033258e-05, | |
| "loss": 1.1797332763671875, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.5300242476108972, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.530487804878049e-05, | |
| "loss": 1.1036921739578247, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.5305947796320069, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.5277161862527717e-05, | |
| "loss": 1.1661919355392456, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5311653116531165, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.5249445676274946e-05, | |
| "loss": 1.220758318901062, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.5317358436742262, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.5221729490022172e-05, | |
| "loss": 1.2072967290878296, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.5323063756953359, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.5194013303769405e-05, | |
| "loss": 1.211767315864563, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.5328769077164456, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.516629711751663e-05, | |
| "loss": 1.196463942527771, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5334474397375553, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.513858093126386e-05, | |
| "loss": 1.1342837810516357, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5340179717586649, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.5110864745011087e-05, | |
| "loss": 1.155871868133545, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5345885037797746, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.508314855875832e-05, | |
| "loss": 1.1863211393356323, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.5351590358008843, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.5055432372505546e-05, | |
| "loss": 1.1399109363555908, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.535729567821994, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.5027716186252775e-05, | |
| "loss": 1.148442268371582, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.5363000998431037, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2298827171325684, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5368706318642134, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.497228381374723e-05, | |
| "loss": 1.1379940509796143, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.537441163885323, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.4944567627494457e-05, | |
| "loss": 1.1394915580749512, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5380116959064327, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.4916851441241686e-05, | |
| "loss": 1.180498480796814, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.5385822279275424, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.4889135254988916e-05, | |
| "loss": 1.2175443172454834, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5391527599486521, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.4861419068736145e-05, | |
| "loss": 1.1404181718826294, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5397232919697618, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.483370288248337e-05, | |
| "loss": 1.1929075717926025, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.5402938239908714, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.4805986696230597e-05, | |
| "loss": 1.1470379829406738, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.5408643560119811, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.4778270509977827e-05, | |
| "loss": 1.1692397594451904, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5414348880330908, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.4750554323725056e-05, | |
| "loss": 1.2243307828903198, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.5420054200542005, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.4722838137472286e-05, | |
| "loss": 1.1853331327438354, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5425759520753102, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.4695121951219512e-05, | |
| "loss": 1.2312514781951904, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.5431464840964199, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.466740576496674e-05, | |
| "loss": 1.1487960815429688, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.5437170161175297, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.463968957871397e-05, | |
| "loss": 1.1434435844421387, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.5442875481386393, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.46119733924612e-05, | |
| "loss": 1.2065646648406982, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.544858080159749, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.4584257206208426e-05, | |
| "loss": 1.1631767749786377, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5454286121808587, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.4556541019955656e-05, | |
| "loss": 1.19287109375, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.5459991442019684, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.4528824833702885e-05, | |
| "loss": 1.183131456375122, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.5465696762230781, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.4501108647450115e-05, | |
| "loss": 1.1865886449813843, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.5471402082441877, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.447339246119734e-05, | |
| "loss": 1.1511285305023193, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.5477107402652974, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.4445676274944567e-05, | |
| "loss": 1.1591591835021973, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5482812722864071, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.4417960088691796e-05, | |
| "loss": 1.1885075569152832, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.5488518043075168, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 1.1785187721252441, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.5494223363286265, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.4362527716186255e-05, | |
| "loss": 1.1689701080322266, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.5499928683497362, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.433481152993348e-05, | |
| "loss": 1.1543480157852173, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.5505634003708458, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.430709534368071e-05, | |
| "loss": 1.196134328842163, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5511339323919555, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.427937915742794e-05, | |
| "loss": 1.2235426902770996, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.5517044644130652, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.425166297117517e-05, | |
| "loss": 1.2253239154815674, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.5522749964341749, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.4223946784922396e-05, | |
| "loss": 1.1899304389953613, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.5528455284552846, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.4196230598669625e-05, | |
| "loss": 1.1620666980743408, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.5534160604763942, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.4168514412416855e-05, | |
| "loss": 1.1896693706512451, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5539865924975039, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.414079822616408e-05, | |
| "loss": 1.1168513298034668, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.5545571245186136, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.4113082039911307e-05, | |
| "loss": 1.1533100605010986, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.5551276565397233, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.4085365853658536e-05, | |
| "loss": 1.11790132522583, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.555698188560833, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.4057649667405766e-05, | |
| "loss": 1.1832971572875977, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.5562687205819427, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.4029933481152995e-05, | |
| "loss": 1.136374592781067, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5568392526030523, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.400221729490022e-05, | |
| "loss": 1.13529634475708, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.557409784624162, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.397450110864745e-05, | |
| "loss": 1.152282476425171, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.5579803166452717, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.394678492239468e-05, | |
| "loss": 1.1445283889770508, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.5585508486663814, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.391906873614191e-05, | |
| "loss": 1.1682907342910767, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.5591213806874911, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.3891352549889136e-05, | |
| "loss": 1.2181129455566406, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5596919127086007, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.3863636363636365e-05, | |
| "loss": 1.1683390140533447, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.5602624447297104, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.3835920177383595e-05, | |
| "loss": 1.1526210308074951, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.5608329767508201, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.380820399113082e-05, | |
| "loss": 1.1839709281921387, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.5614035087719298, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.378048780487805e-05, | |
| "loss": 1.171961784362793, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.5619740407930395, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.3752771618625276e-05, | |
| "loss": 1.1404699087142944, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5625445728141492, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.3725055432372506e-05, | |
| "loss": 1.1446641683578491, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.5631151048352588, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.3697339246119735e-05, | |
| "loss": 1.1063508987426758, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.5636856368563685, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.3669623059866965e-05, | |
| "loss": 1.1023223400115967, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.5642561688774782, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.364190687361419e-05, | |
| "loss": 1.157923698425293, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.5648267008985879, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.361419068736142e-05, | |
| "loss": 1.1578837633132935, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5653972329196976, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.358647450110865e-05, | |
| "loss": 1.110813856124878, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.5659677649408072, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.355875831485588e-05, | |
| "loss": 1.1383073329925537, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.566538296961917, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.3531042128603105e-05, | |
| "loss": 1.1709469556808472, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.5671088289830267, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.3503325942350335e-05, | |
| "loss": 1.1664437055587769, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.5676793610041364, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.347560975609756e-05, | |
| "loss": 1.1766831874847412, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5682498930252461, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.344789356984479e-05, | |
| "loss": 1.1888954639434814, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.5688204250463558, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.3420177383592016e-05, | |
| "loss": 1.1901835203170776, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.5693909570674655, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.3392461197339246e-05, | |
| "loss": 1.13261079788208, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.5699614890885751, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.3364745011086475e-05, | |
| "loss": 1.2113161087036133, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.5705320211096848, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.3337028824833705e-05, | |
| "loss": 1.1643033027648926, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5711025531307945, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.330931263858093e-05, | |
| "loss": 1.2085559368133545, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.5716730851519042, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.328159645232816e-05, | |
| "loss": 1.1837122440338135, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.5722436171730139, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 2.325388026607539e-05, | |
| "loss": 1.2685991525650024, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.5728141491941235, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.322616407982262e-05, | |
| "loss": 1.1660895347595215, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.5733846812152332, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.3198447893569845e-05, | |
| "loss": 1.1840052604675293, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5739552132363429, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.3170731707317075e-05, | |
| "loss": 1.1665326356887817, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.5745257452574526, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.3143015521064304e-05, | |
| "loss": 1.1994144916534424, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.5750962772785623, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.311529933481153e-05, | |
| "loss": 1.1023156642913818, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.575666809299672, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.308758314855876e-05, | |
| "loss": 1.2176637649536133, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.5762373413207816, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.3059866962305986e-05, | |
| "loss": 1.2663724422454834, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5768078733418913, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.3032150776053215e-05, | |
| "loss": 1.1681220531463623, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.577378405363001, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.3004434589800445e-05, | |
| "loss": 1.221947431564331, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.5779489373841107, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.2976718403547674e-05, | |
| "loss": 1.1309971809387207, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.5785194694052204, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.29490022172949e-05, | |
| "loss": 1.1859217882156372, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.57909000142633, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.292128603104213e-05, | |
| "loss": 1.1979272365570068, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5796605334474397, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.289356984478936e-05, | |
| "loss": 1.1865754127502441, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.5802310654685494, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.286585365853659e-05, | |
| "loss": 1.1868486404418945, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.5808015974896591, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.2838137472283815e-05, | |
| "loss": 1.129669427871704, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.5813721295107688, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.2810421286031044e-05, | |
| "loss": 1.1734843254089355, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.5819426615318785, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.278270509977827e-05, | |
| "loss": 1.2343952655792236, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5825131935529881, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.27549889135255e-05, | |
| "loss": 1.21380615234375, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.5830837255740978, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 1.1312305927276611, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.5836542575952075, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.2699556541019955e-05, | |
| "loss": 1.1510472297668457, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.5842247896163172, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.2671840354767185e-05, | |
| "loss": 1.1997393369674683, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.5847953216374269, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.2644124168514414e-05, | |
| "loss": 1.1844977140426636, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.261640798226164e-05, | |
| "loss": 1.1642664670944214, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.5859363856796462, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.258869179600887e-05, | |
| "loss": 1.1929872035980225, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.5865069177007559, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.25609756097561e-05, | |
| "loss": 1.2264790534973145, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.5870774497218656, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.253325942350333e-05, | |
| "loss": 1.208320140838623, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.5876479817429753, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.2505543237250555e-05, | |
| "loss": 1.1017545461654663, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.588218513764085, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 2.2477827050997784e-05, | |
| "loss": 1.0866947174072266, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.5887890457851946, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.245011086474501e-05, | |
| "loss": 1.134414553642273, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.5893595778063043, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.242239467849224e-05, | |
| "loss": 1.1386680603027344, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.5899301098274141, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.239467849223947e-05, | |
| "loss": 1.098857045173645, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.5905006418485238, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.2366962305986695e-05, | |
| "loss": 1.1710071563720703, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5910711738696335, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 2.2339246119733925e-05, | |
| "loss": 1.1196489334106445, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.5916417058907432, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.2311529933481154e-05, | |
| "loss": 1.132148265838623, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.5922122379118528, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.2283813747228384e-05, | |
| "loss": 1.1694618463516235, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.5927827699329625, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.225609756097561e-05, | |
| "loss": 1.141546607017517, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.5933533019540722, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.222838137472284e-05, | |
| "loss": 1.214141607284546, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5939238339751819, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.220066518847007e-05, | |
| "loss": 1.142057180404663, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.5944943659962916, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.2172949002217298e-05, | |
| "loss": 1.1707711219787598, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.5950648980174013, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.2145232815964524e-05, | |
| "loss": 1.164795994758606, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.5956354300385109, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.211751662971175e-05, | |
| "loss": 1.1659691333770752, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.5962059620596206, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.208980044345898e-05, | |
| "loss": 1.1294951438903809, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5967764940807303, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.206208425720621e-05, | |
| "loss": 1.1925092935562134, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.59734702610184, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.203436807095344e-05, | |
| "loss": 1.1600418090820312, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.5979175581229497, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.2006651884700665e-05, | |
| "loss": 1.157020092010498, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.5984880901440593, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.1978935698447894e-05, | |
| "loss": 1.1589795351028442, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.599058622165169, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.1951219512195124e-05, | |
| "loss": 1.1546876430511475, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5996291541862787, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.1923503325942353e-05, | |
| "loss": 1.1549787521362305, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.6001996862073884, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.189578713968958e-05, | |
| "loss": 1.1518681049346924, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.6007702182284981, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.186807095343681e-05, | |
| "loss": 1.1609306335449219, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.6013407502496078, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.1840354767184038e-05, | |
| "loss": 1.1526927947998047, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.6019112822707174, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.1812638580931268e-05, | |
| "loss": 1.2030518054962158, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6024818142918271, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.178492239467849e-05, | |
| "loss": 1.087314248085022, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6030523463129368, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.175720620842572e-05, | |
| "loss": 1.120784044265747, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.6036228783340465, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.172949002217295e-05, | |
| "loss": 1.0867156982421875, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6041934103551562, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.170177383592018e-05, | |
| "loss": 1.2083582878112793, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.6047639423762659, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.1674057649667405e-05, | |
| "loss": 1.1944574117660522, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6053344743973755, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.1646341463414634e-05, | |
| "loss": 1.118787169456482, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.6059050064184852, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.1618625277161864e-05, | |
| "loss": 1.1591801643371582, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6064755384395949, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.1590909090909093e-05, | |
| "loss": 1.1802964210510254, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.6070460704607046, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.156319290465632e-05, | |
| "loss": 1.1993342638015747, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.6076166024818143, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.153547671840355e-05, | |
| "loss": 1.2244541645050049, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6081871345029239, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.150776053215078e-05, | |
| "loss": 1.1696969270706177, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.6087576665240336, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.1480044345898008e-05, | |
| "loss": 1.204698085784912, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.6093281985451433, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.1452328159645234e-05, | |
| "loss": 1.167772650718689, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.609898730566253, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.142461197339246e-05, | |
| "loss": 1.1064563989639282, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.6104692625873627, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.139689578713969e-05, | |
| "loss": 1.1095709800720215, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6110397946084724, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.136917960088692e-05, | |
| "loss": 1.1526896953582764, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.611610326629582, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.134146341463415e-05, | |
| "loss": 1.1842620372772217, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6121808586506917, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.1313747228381374e-05, | |
| "loss": 1.1854032278060913, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.6127513906718015, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.1286031042128604e-05, | |
| "loss": 1.1536649465560913, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6133219226929112, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.1258314855875833e-05, | |
| "loss": 1.162165641784668, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6138924547140209, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.1230598669623063e-05, | |
| "loss": 1.1589579582214355, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.6144629867351306, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.120288248337029e-05, | |
| "loss": 1.2380765676498413, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.6150335187562402, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.117516629711752e-05, | |
| "loss": 1.1789859533309937, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.6156040507773499, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.1147450110864748e-05, | |
| "loss": 1.1379293203353882, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.6161745827984596, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.1119733924611977e-05, | |
| "loss": 1.176946759223938, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6167451148195693, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.10920177383592e-05, | |
| "loss": 1.232793927192688, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.617315646840679, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.106430155210643e-05, | |
| "loss": 1.1333751678466797, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.6178861788617886, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.103658536585366e-05, | |
| "loss": 1.1847493648529053, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.6184567108828983, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.100886917960089e-05, | |
| "loss": 1.1365629434585571, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.619027242904008, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.0981152993348114e-05, | |
| "loss": 1.1531561613082886, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6195977749251177, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.0953436807095344e-05, | |
| "loss": 1.1419352293014526, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.6201683069462274, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.0925720620842573e-05, | |
| "loss": 1.2071990966796875, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.6207388389673371, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.0898004434589803e-05, | |
| "loss": 1.146884799003601, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.6213093709884467, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.087028824833703e-05, | |
| "loss": 1.1956453323364258, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.6218799030095564, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.084257206208426e-05, | |
| "loss": 1.182574987411499, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6224504350306661, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.0814855875831488e-05, | |
| "loss": 1.1805145740509033, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.6230209670517758, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.0787139689578717e-05, | |
| "loss": 1.173978567123413, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6235914990728855, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.0759423503325943e-05, | |
| "loss": 1.1732361316680908, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.6241620310939952, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.073170731707317e-05, | |
| "loss": 1.1978164911270142, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.6247325631151048, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.07039911308204e-05, | |
| "loss": 1.161289930343628, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.6253030951362145, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.067627494456763e-05, | |
| "loss": 1.1583458185195923, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.6258736271573242, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.0648558758314858e-05, | |
| "loss": 1.1835911273956299, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.6264441591784339, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.0620842572062084e-05, | |
| "loss": 1.1692794561386108, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.6270146911995436, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.0593126385809313e-05, | |
| "loss": 1.1748257875442505, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.6275852232206532, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.0565410199556543e-05, | |
| "loss": 1.172876238822937, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6281557552417629, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.0537694013303772e-05, | |
| "loss": 1.1829420328140259, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.6287262872628726, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.0509977827051e-05, | |
| "loss": 1.163160800933838, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.6292968192839823, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.0482261640798228e-05, | |
| "loss": 1.144565463066101, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.629867351305092, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 1.1199369430541992, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.6304378833262017, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.0426829268292683e-05, | |
| "loss": 1.1951239109039307, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.6310084153473113, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.0399113082039913e-05, | |
| "loss": 1.1440958976745605, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.037139689578714e-05, | |
| "loss": 1.1329402923583984, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "eval_loss": 1.1687453985214233, | |
| "eval_runtime": 80.1565, | |
| "eval_samples_per_second": 11.927, | |
| "eval_steps_per_second": 2.982, | |
| "step": 1107 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1841, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 369, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.75350724523733e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |