| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.06842285323297982, | |
| "eval_steps": 500, | |
| "global_step": 650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00010526592805073818, | |
| "grad_norm": 2.6939258575439453, | |
| "learning_rate": 0.0, | |
| "loss": 0.8515, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00021053185610147635, | |
| "grad_norm": 2.7966604232788086, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.8166, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00031579778415221455, | |
| "grad_norm": 2.257108211517334, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.8018, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0004210637122029527, | |
| "grad_norm": 1.1128956079483032, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5497, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0005263296402536909, | |
| "grad_norm": 1.2874521017074585, | |
| "learning_rate": 4.999473462510531e-05, | |
| "loss": 0.6127, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0006315955683044291, | |
| "grad_norm": 0.9699161648750305, | |
| "learning_rate": 4.998946925021062e-05, | |
| "loss": 0.6407, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0007368614963551673, | |
| "grad_norm": 1.0630613565444946, | |
| "learning_rate": 4.998420387531593e-05, | |
| "loss": 0.4727, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0008421274244059054, | |
| "grad_norm": 0.882173478603363, | |
| "learning_rate": 4.997893850042124e-05, | |
| "loss": 0.5235, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0009473933524566436, | |
| "grad_norm": 0.676689088344574, | |
| "learning_rate": 4.997367312552654e-05, | |
| "loss": 0.4341, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0010526592805073817, | |
| "grad_norm": 0.7519457936286926, | |
| "learning_rate": 4.996840775063184e-05, | |
| "loss": 0.4338, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00115792520855812, | |
| "grad_norm": 0.7073312401771545, | |
| "learning_rate": 4.996314237573715e-05, | |
| "loss": 0.3924, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0012631911366088582, | |
| "grad_norm": 0.7425239086151123, | |
| "learning_rate": 4.995787700084246e-05, | |
| "loss": 0.4859, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0013684570646595963, | |
| "grad_norm": 0.6777500510215759, | |
| "learning_rate": 4.995261162594777e-05, | |
| "loss": 0.4235, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0014737229927103345, | |
| "grad_norm": 0.6901292204856873, | |
| "learning_rate": 4.994734625105308e-05, | |
| "loss": 0.4709, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0015789889207610726, | |
| "grad_norm": 0.8694287538528442, | |
| "learning_rate": 4.994208087615839e-05, | |
| "loss": 0.5086, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0016842548488118108, | |
| "grad_norm": 0.6798275113105774, | |
| "learning_rate": 4.99368155012637e-05, | |
| "loss": 0.4937, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.001789520776862549, | |
| "grad_norm": 0.7667484879493713, | |
| "learning_rate": 4.9931550126369e-05, | |
| "loss": 0.4974, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0018947867049132871, | |
| "grad_norm": 0.6613733172416687, | |
| "learning_rate": 4.992628475147431e-05, | |
| "loss": 0.4181, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0020000526329640254, | |
| "grad_norm": 0.7069230079650879, | |
| "learning_rate": 4.992101937657962e-05, | |
| "loss": 0.4834, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0021053185610147634, | |
| "grad_norm": 0.5691242814064026, | |
| "learning_rate": 4.991575400168492e-05, | |
| "loss": 0.4405, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002210584489065502, | |
| "grad_norm": 0.701371431350708, | |
| "learning_rate": 4.991048862679023e-05, | |
| "loss": 0.3933, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.00231585041711624, | |
| "grad_norm": 0.5670080780982971, | |
| "learning_rate": 4.990522325189554e-05, | |
| "loss": 0.5061, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.002421116345166978, | |
| "grad_norm": 0.6001436114311218, | |
| "learning_rate": 4.9899957877000847e-05, | |
| "loss": 0.4879, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0025263822732177164, | |
| "grad_norm": 0.6185859441757202, | |
| "learning_rate": 4.9894692502106156e-05, | |
| "loss": 0.4006, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0026316482012684545, | |
| "grad_norm": 0.677813708782196, | |
| "learning_rate": 4.988942712721146e-05, | |
| "loss": 0.4347, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0027369141293191925, | |
| "grad_norm": 0.5357967019081116, | |
| "learning_rate": 4.988416175231677e-05, | |
| "loss": 0.4594, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.002842180057369931, | |
| "grad_norm": 0.5995861291885376, | |
| "learning_rate": 4.9878896377422076e-05, | |
| "loss": 0.4947, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.002947445985420669, | |
| "grad_norm": 0.5909422636032104, | |
| "learning_rate": 4.9873631002527385e-05, | |
| "loss": 0.5316, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.003052711913471407, | |
| "grad_norm": 0.6816675662994385, | |
| "learning_rate": 4.9868365627632694e-05, | |
| "loss": 0.4705, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.003157977841522145, | |
| "grad_norm": 0.5410743355751038, | |
| "learning_rate": 4.9863100252737996e-05, | |
| "loss": 0.4229, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0032632437695728836, | |
| "grad_norm": 0.7362250089645386, | |
| "learning_rate": 4.9857834877843305e-05, | |
| "loss": 0.4922, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0033685096976236216, | |
| "grad_norm": 0.7518715262413025, | |
| "learning_rate": 4.9852569502948614e-05, | |
| "loss": 0.3942, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0034737756256743597, | |
| "grad_norm": 0.6200836300849915, | |
| "learning_rate": 4.9847304128053916e-05, | |
| "loss": 0.3937, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.003579041553725098, | |
| "grad_norm": 0.6816834807395935, | |
| "learning_rate": 4.9842038753159225e-05, | |
| "loss": 0.492, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.003684307481775836, | |
| "grad_norm": 0.6341183185577393, | |
| "learning_rate": 4.9836773378264534e-05, | |
| "loss": 0.5873, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0037895734098265742, | |
| "grad_norm": 0.5888874530792236, | |
| "learning_rate": 4.9831508003369843e-05, | |
| "loss": 0.3784, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0038948393378773127, | |
| "grad_norm": 0.503926157951355, | |
| "learning_rate": 4.982624262847515e-05, | |
| "loss": 0.5116, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.004000105265928051, | |
| "grad_norm": 0.6205700039863586, | |
| "learning_rate": 4.982097725358046e-05, | |
| "loss": 0.4237, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.004105371193978789, | |
| "grad_norm": 0.47863858938217163, | |
| "learning_rate": 4.9815711878685764e-05, | |
| "loss": 0.5397, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.004210637122029527, | |
| "grad_norm": 0.5036730766296387, | |
| "learning_rate": 4.981044650379107e-05, | |
| "loss": 0.46, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004315903050080265, | |
| "grad_norm": 0.4822523593902588, | |
| "learning_rate": 4.9805181128896375e-05, | |
| "loss": 0.4988, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.004421168978131004, | |
| "grad_norm": 0.5173696875572205, | |
| "learning_rate": 4.9799915754001684e-05, | |
| "loss": 0.4003, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.004526434906181742, | |
| "grad_norm": 0.6021311283111572, | |
| "learning_rate": 4.979465037910699e-05, | |
| "loss": 0.4306, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.00463170083423248, | |
| "grad_norm": 0.5137932300567627, | |
| "learning_rate": 4.97893850042123e-05, | |
| "loss": 0.4453, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.004736966762283218, | |
| "grad_norm": 0.5420482158660889, | |
| "learning_rate": 4.978411962931761e-05, | |
| "loss": 0.5377, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.004842232690333956, | |
| "grad_norm": 0.5643067359924316, | |
| "learning_rate": 4.977885425442292e-05, | |
| "loss": 0.4519, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.004947498618384694, | |
| "grad_norm": 0.5466287136077881, | |
| "learning_rate": 4.977358887952823e-05, | |
| "loss": 0.4221, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.005052764546435433, | |
| "grad_norm": 0.5712279677391052, | |
| "learning_rate": 4.976832350463354e-05, | |
| "loss": 0.4987, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.005158030474486171, | |
| "grad_norm": 0.4822379946708679, | |
| "learning_rate": 4.976305812973884e-05, | |
| "loss": 0.4848, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.005263296402536909, | |
| "grad_norm": 0.5017122626304626, | |
| "learning_rate": 4.975779275484414e-05, | |
| "loss": 0.4196, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.005368562330587647, | |
| "grad_norm": 0.4559021592140198, | |
| "learning_rate": 4.975252737994945e-05, | |
| "loss": 0.4412, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.005473828258638385, | |
| "grad_norm": 0.5421490669250488, | |
| "learning_rate": 4.974726200505476e-05, | |
| "loss": 0.3746, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.005579094186689123, | |
| "grad_norm": 0.46819037199020386, | |
| "learning_rate": 4.974199663016007e-05, | |
| "loss": 0.4521, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.005684360114739862, | |
| "grad_norm": 0.45857539772987366, | |
| "learning_rate": 4.973673125526538e-05, | |
| "loss": 0.3941, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0057896260427906, | |
| "grad_norm": 0.5490565896034241, | |
| "learning_rate": 4.973146588037069e-05, | |
| "loss": 0.4551, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.005894891970841338, | |
| "grad_norm": 0.5232876539230347, | |
| "learning_rate": 4.9726200505475997e-05, | |
| "loss": 0.4356, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.006000157898892076, | |
| "grad_norm": 0.5434950590133667, | |
| "learning_rate": 4.97209351305813e-05, | |
| "loss": 0.436, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.006105423826942814, | |
| "grad_norm": 0.44252631068229675, | |
| "learning_rate": 4.971566975568661e-05, | |
| "loss": 0.4263, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.006210689754993552, | |
| "grad_norm": 0.49957412481307983, | |
| "learning_rate": 4.971040438079192e-05, | |
| "loss": 0.4422, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.00631595568304429, | |
| "grad_norm": 0.46676474809646606, | |
| "learning_rate": 4.970513900589722e-05, | |
| "loss": 0.434, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.006421221611095029, | |
| "grad_norm": 0.5107528567314148, | |
| "learning_rate": 4.969987363100253e-05, | |
| "loss": 0.5225, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.006526487539145767, | |
| "grad_norm": 0.4967051148414612, | |
| "learning_rate": 4.969460825610784e-05, | |
| "loss": 0.4199, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.006631753467196505, | |
| "grad_norm": 0.4968240559101105, | |
| "learning_rate": 4.9689342881213146e-05, | |
| "loss": 0.4157, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.006737019395247243, | |
| "grad_norm": 0.5468823909759521, | |
| "learning_rate": 4.9684077506318455e-05, | |
| "loss": 0.4204, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.006842285323297981, | |
| "grad_norm": 0.49830362200737, | |
| "learning_rate": 4.967881213142376e-05, | |
| "loss": 0.4381, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.006947551251348719, | |
| "grad_norm": 0.6488986611366272, | |
| "learning_rate": 4.9673546756529066e-05, | |
| "loss": 0.5919, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.007052817179399458, | |
| "grad_norm": 0.5668662190437317, | |
| "learning_rate": 4.9668281381634375e-05, | |
| "loss": 0.4089, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.007158083107450196, | |
| "grad_norm": 0.5446314811706543, | |
| "learning_rate": 4.9663016006739684e-05, | |
| "loss": 0.4116, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.007263349035500934, | |
| "grad_norm": 0.5011276006698608, | |
| "learning_rate": 4.9657750631844993e-05, | |
| "loss": 0.4808, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.007368614963551672, | |
| "grad_norm": 0.7226698398590088, | |
| "learning_rate": 4.9652485256950296e-05, | |
| "loss": 0.4645, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.00747388089160241, | |
| "grad_norm": 0.47501352429389954, | |
| "learning_rate": 4.9647219882055605e-05, | |
| "loss": 0.5207, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0075791468196531485, | |
| "grad_norm": 0.4856880307197571, | |
| "learning_rate": 4.9641954507160914e-05, | |
| "loss": 0.4878, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.007684412747703887, | |
| "grad_norm": 0.5225908756256104, | |
| "learning_rate": 4.9636689132266216e-05, | |
| "loss": 0.514, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.007789678675754625, | |
| "grad_norm": 0.526539146900177, | |
| "learning_rate": 4.9631423757371525e-05, | |
| "loss": 0.4572, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.007894944603805363, | |
| "grad_norm": 0.49719616770744324, | |
| "learning_rate": 4.9626158382476834e-05, | |
| "loss": 0.4352, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.008000210531856101, | |
| "grad_norm": 0.5542761087417603, | |
| "learning_rate": 4.962089300758214e-05, | |
| "loss": 0.5408, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.00810547645990684, | |
| "grad_norm": 0.4830870032310486, | |
| "learning_rate": 4.961562763268745e-05, | |
| "loss": 0.3941, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.008210742387957578, | |
| "grad_norm": 0.504296600818634, | |
| "learning_rate": 4.961036225779276e-05, | |
| "loss": 0.4609, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.008316008316008316, | |
| "grad_norm": 0.5107358694076538, | |
| "learning_rate": 4.960509688289807e-05, | |
| "loss": 0.4313, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.008421274244059054, | |
| "grad_norm": 0.5339490175247192, | |
| "learning_rate": 4.959983150800337e-05, | |
| "loss": 0.5046, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.008526540172109793, | |
| "grad_norm": 0.4737516939640045, | |
| "learning_rate": 4.9594566133108675e-05, | |
| "loss": 0.5728, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.00863180610016053, | |
| "grad_norm": 0.4952607750892639, | |
| "learning_rate": 4.9589300758213984e-05, | |
| "loss": 0.4464, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.008737072028211269, | |
| "grad_norm": 0.5083893537521362, | |
| "learning_rate": 4.958403538331929e-05, | |
| "loss": 0.4429, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.008842337956262008, | |
| "grad_norm": 0.5977057218551636, | |
| "learning_rate": 4.95787700084246e-05, | |
| "loss": 0.4367, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.008947603884312745, | |
| "grad_norm": 0.6330780982971191, | |
| "learning_rate": 4.957350463352991e-05, | |
| "loss": 0.4736, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.009052869812363484, | |
| "grad_norm": 0.4460638761520386, | |
| "learning_rate": 4.956823925863522e-05, | |
| "loss": 0.4933, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.00915813574041422, | |
| "grad_norm": 0.508703351020813, | |
| "learning_rate": 4.956297388374053e-05, | |
| "loss": 0.5794, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.00926340166846496, | |
| "grad_norm": 0.45487043261528015, | |
| "learning_rate": 4.955770850884584e-05, | |
| "loss": 0.5156, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.009368667596515699, | |
| "grad_norm": 0.46359360218048096, | |
| "learning_rate": 4.955244313395114e-05, | |
| "loss": 0.4634, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.009473933524566436, | |
| "grad_norm": 0.5234309434890747, | |
| "learning_rate": 4.954717775905645e-05, | |
| "loss": 0.383, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.009579199452617175, | |
| "grad_norm": 0.5344865322113037, | |
| "learning_rate": 4.954191238416175e-05, | |
| "loss": 0.4619, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.009684465380667912, | |
| "grad_norm": 0.6055357456207275, | |
| "learning_rate": 4.953664700926706e-05, | |
| "loss": 0.495, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.00978973130871865, | |
| "grad_norm": 0.4749431014060974, | |
| "learning_rate": 4.953138163437237e-05, | |
| "loss": 0.5209, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.009894997236769388, | |
| "grad_norm": 0.4775514602661133, | |
| "learning_rate": 4.952611625947768e-05, | |
| "loss": 0.4064, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.010000263164820127, | |
| "grad_norm": 0.4580100178718567, | |
| "learning_rate": 4.952085088458299e-05, | |
| "loss": 0.4479, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.010105529092870866, | |
| "grad_norm": 0.5589710474014282, | |
| "learning_rate": 4.9515585509688296e-05, | |
| "loss": 0.4072, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.010210795020921603, | |
| "grad_norm": 0.45461875200271606, | |
| "learning_rate": 4.95103201347936e-05, | |
| "loss": 0.4933, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.010316060948972342, | |
| "grad_norm": 0.4552902579307556, | |
| "learning_rate": 4.950505475989891e-05, | |
| "loss": 0.4038, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.010421326877023079, | |
| "grad_norm": 0.5590063333511353, | |
| "learning_rate": 4.9499789385004216e-05, | |
| "loss": 0.4928, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.010526592805073818, | |
| "grad_norm": 0.5689685344696045, | |
| "learning_rate": 4.949452401010952e-05, | |
| "loss": 0.3962, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.010631858733124555, | |
| "grad_norm": 0.5378232598304749, | |
| "learning_rate": 4.948925863521483e-05, | |
| "loss": 0.4843, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.010737124661175294, | |
| "grad_norm": 0.6677789688110352, | |
| "learning_rate": 4.948399326032014e-05, | |
| "loss": 0.5839, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.010842390589226033, | |
| "grad_norm": 0.4315250515937805, | |
| "learning_rate": 4.9478727885425446e-05, | |
| "loss": 0.5467, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.01094765651727677, | |
| "grad_norm": 0.6344457268714905, | |
| "learning_rate": 4.9473462510530755e-05, | |
| "loss": 0.5048, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.011052922445327509, | |
| "grad_norm": 0.41527998447418213, | |
| "learning_rate": 4.946819713563606e-05, | |
| "loss": 0.5559, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.011158188373378246, | |
| "grad_norm": 0.46887871623039246, | |
| "learning_rate": 4.9462931760741366e-05, | |
| "loss": 0.4165, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.011263454301428985, | |
| "grad_norm": 0.5572345852851868, | |
| "learning_rate": 4.9457666385846675e-05, | |
| "loss": 0.4496, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.011368720229479724, | |
| "grad_norm": 0.46033406257629395, | |
| "learning_rate": 4.9452401010951984e-05, | |
| "loss": 0.4699, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.011473986157530461, | |
| "grad_norm": 0.5205333232879639, | |
| "learning_rate": 4.944713563605729e-05, | |
| "loss": 0.4291, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0115792520855812, | |
| "grad_norm": 0.5044732689857483, | |
| "learning_rate": 4.9441870261162595e-05, | |
| "loss": 0.4424, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.011684518013631937, | |
| "grad_norm": 0.5410451889038086, | |
| "learning_rate": 4.9436604886267904e-05, | |
| "loss": 0.3803, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.011789783941682676, | |
| "grad_norm": 0.5163026452064514, | |
| "learning_rate": 4.943133951137321e-05, | |
| "loss": 0.4276, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.011895049869733413, | |
| "grad_norm": 0.4330487847328186, | |
| "learning_rate": 4.9426074136478516e-05, | |
| "loss": 0.4644, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.012000315797784152, | |
| "grad_norm": 0.41046929359436035, | |
| "learning_rate": 4.9420808761583825e-05, | |
| "loss": 0.4161, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.012105581725834891, | |
| "grad_norm": 0.4908786714076996, | |
| "learning_rate": 4.9415543386689134e-05, | |
| "loss": 0.4502, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.012210847653885628, | |
| "grad_norm": 0.4866664707660675, | |
| "learning_rate": 4.941027801179444e-05, | |
| "loss": 0.3923, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.012316113581936367, | |
| "grad_norm": 0.4272409677505493, | |
| "learning_rate": 4.940501263689975e-05, | |
| "loss": 0.4828, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.012421379509987104, | |
| "grad_norm": 0.517900288105011, | |
| "learning_rate": 4.939974726200506e-05, | |
| "loss": 0.4661, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.012526645438037843, | |
| "grad_norm": 0.5139513611793518, | |
| "learning_rate": 4.939448188711037e-05, | |
| "loss": 0.536, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.01263191136608858, | |
| "grad_norm": 0.5204519629478455, | |
| "learning_rate": 4.938921651221567e-05, | |
| "loss": 0.4156, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01273717729413932, | |
| "grad_norm": 0.566659152507782, | |
| "learning_rate": 4.9383951137320974e-05, | |
| "loss": 0.4675, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.012842443222190058, | |
| "grad_norm": 0.5262351632118225, | |
| "learning_rate": 4.937868576242628e-05, | |
| "loss": 0.5002, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.012947709150240795, | |
| "grad_norm": 0.5888293981552124, | |
| "learning_rate": 4.937342038753159e-05, | |
| "loss": 0.4058, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.013052975078291534, | |
| "grad_norm": 0.5911523103713989, | |
| "learning_rate": 4.93681550126369e-05, | |
| "loss": 0.392, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.013158241006342272, | |
| "grad_norm": 0.48798367381095886, | |
| "learning_rate": 4.936288963774221e-05, | |
| "loss": 0.4442, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.01326350693439301, | |
| "grad_norm": 0.5228798985481262, | |
| "learning_rate": 4.935762426284752e-05, | |
| "loss": 0.4673, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.01336877286244375, | |
| "grad_norm": 0.4832141399383545, | |
| "learning_rate": 4.935235888795283e-05, | |
| "loss": 0.4259, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.013474038790494487, | |
| "grad_norm": 0.6188245415687561, | |
| "learning_rate": 4.934709351305814e-05, | |
| "loss": 0.4982, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.013579304718545225, | |
| "grad_norm": 0.4905821979045868, | |
| "learning_rate": 4.934182813816344e-05, | |
| "loss": 0.4539, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.013684570646595963, | |
| "grad_norm": 0.6506298184394836, | |
| "learning_rate": 4.933656276326875e-05, | |
| "loss": 0.3982, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.013789836574646702, | |
| "grad_norm": 0.570380687713623, | |
| "learning_rate": 4.933129738837405e-05, | |
| "loss": 0.3901, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.013895102502697439, | |
| "grad_norm": 0.44687098264694214, | |
| "learning_rate": 4.932603201347936e-05, | |
| "loss": 0.4176, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.014000368430748178, | |
| "grad_norm": 0.6272158622741699, | |
| "learning_rate": 4.932076663858467e-05, | |
| "loss": 0.4455, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.014105634358798917, | |
| "grad_norm": 0.6358391046524048, | |
| "learning_rate": 4.931550126368998e-05, | |
| "loss": 0.384, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.014210900286849654, | |
| "grad_norm": 0.6558123826980591, | |
| "learning_rate": 4.931023588879529e-05, | |
| "loss": 0.5024, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.014316166214900393, | |
| "grad_norm": 0.4577985107898712, | |
| "learning_rate": 4.9304970513900596e-05, | |
| "loss": 0.3906, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.01442143214295113, | |
| "grad_norm": 0.5580503344535828, | |
| "learning_rate": 4.92997051390059e-05, | |
| "loss": 0.4589, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.014526698071001869, | |
| "grad_norm": 0.5660861134529114, | |
| "learning_rate": 4.929443976411121e-05, | |
| "loss": 0.3913, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.014631963999052606, | |
| "grad_norm": 0.49188342690467834, | |
| "learning_rate": 4.9289174389216516e-05, | |
| "loss": 0.3951, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.014737229927103345, | |
| "grad_norm": 0.6210848689079285, | |
| "learning_rate": 4.9283909014321825e-05, | |
| "loss": 0.4282, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.014842495855154084, | |
| "grad_norm": 0.48430967330932617, | |
| "learning_rate": 4.927864363942713e-05, | |
| "loss": 0.4667, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.01494776178320482, | |
| "grad_norm": 0.5269038677215576, | |
| "learning_rate": 4.9273378264532436e-05, | |
| "loss": 0.3845, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.01505302771125556, | |
| "grad_norm": 0.5490912199020386, | |
| "learning_rate": 4.9268112889637745e-05, | |
| "loss": 0.4477, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.015158293639306297, | |
| "grad_norm": 0.4111802279949188, | |
| "learning_rate": 4.9262847514743054e-05, | |
| "loss": 0.4351, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.015263559567357036, | |
| "grad_norm": 0.48929688334465027, | |
| "learning_rate": 4.9257582139848357e-05, | |
| "loss": 0.4512, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.015368825495407775, | |
| "grad_norm": 0.9201393723487854, | |
| "learning_rate": 4.9252316764953666e-05, | |
| "loss": 0.5254, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.015474091423458512, | |
| "grad_norm": 0.5191910862922668, | |
| "learning_rate": 4.9247051390058975e-05, | |
| "loss": 0.5455, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.01557935735150925, | |
| "grad_norm": 0.3562093675136566, | |
| "learning_rate": 4.9241786015164284e-05, | |
| "loss": 0.5303, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.015684623279559988, | |
| "grad_norm": 0.7195460796356201, | |
| "learning_rate": 4.923652064026959e-05, | |
| "loss": 0.4389, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.015789889207610725, | |
| "grad_norm": 0.448176771402359, | |
| "learning_rate": 4.9231255265374895e-05, | |
| "loss": 0.3987, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.015895155135661466, | |
| "grad_norm": 0.48504385352134705, | |
| "learning_rate": 4.9225989890480204e-05, | |
| "loss": 0.4725, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.016000421063712203, | |
| "grad_norm": 0.5456967353820801, | |
| "learning_rate": 4.922072451558551e-05, | |
| "loss": 0.5143, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.01610568699176294, | |
| "grad_norm": 0.61397784948349, | |
| "learning_rate": 4.9215459140690815e-05, | |
| "loss": 0.4295, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.01621095291981368, | |
| "grad_norm": 0.6359485387802124, | |
| "learning_rate": 4.9210193765796124e-05, | |
| "loss": 0.4498, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.016316218847864418, | |
| "grad_norm": 0.5002400279045105, | |
| "learning_rate": 4.920492839090143e-05, | |
| "loss": 0.467, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.016421484775915155, | |
| "grad_norm": 0.5669925212860107, | |
| "learning_rate": 4.919966301600674e-05, | |
| "loss": 0.5151, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.016526750703965892, | |
| "grad_norm": 0.4109033942222595, | |
| "learning_rate": 4.919439764111205e-05, | |
| "loss": 0.4672, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.016632016632016633, | |
| "grad_norm": 0.5119397044181824, | |
| "learning_rate": 4.918913226621736e-05, | |
| "loss": 0.4846, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.01673728256006737, | |
| "grad_norm": 0.5187058448791504, | |
| "learning_rate": 4.918386689132267e-05, | |
| "loss": 0.4698, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.016842548488118107, | |
| "grad_norm": 0.55632483959198, | |
| "learning_rate": 4.917860151642797e-05, | |
| "loss": 0.3904, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.016947814416168848, | |
| "grad_norm": 0.5332942008972168, | |
| "learning_rate": 4.917333614153328e-05, | |
| "loss": 0.4253, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.017053080344219585, | |
| "grad_norm": 0.5523495078086853, | |
| "learning_rate": 4.916807076663858e-05, | |
| "loss": 0.415, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.017158346272270322, | |
| "grad_norm": 0.5162644386291504, | |
| "learning_rate": 4.916280539174389e-05, | |
| "loss": 0.514, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.01726361220032106, | |
| "grad_norm": 0.414809912443161, | |
| "learning_rate": 4.91575400168492e-05, | |
| "loss": 0.4757, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.0173688781283718, | |
| "grad_norm": 0.5634474754333496, | |
| "learning_rate": 4.915227464195451e-05, | |
| "loss": 0.3643, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.017474144056422537, | |
| "grad_norm": 0.5438713431358337, | |
| "learning_rate": 4.914700926705982e-05, | |
| "loss": 0.4315, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.017579409984473274, | |
| "grad_norm": 0.49885427951812744, | |
| "learning_rate": 4.914174389216513e-05, | |
| "loss": 0.4697, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.017684675912524015, | |
| "grad_norm": 0.46923205256462097, | |
| "learning_rate": 4.913647851727044e-05, | |
| "loss": 0.4189, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.017789941840574752, | |
| "grad_norm": 0.4423271119594574, | |
| "learning_rate": 4.913121314237574e-05, | |
| "loss": 0.4602, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.01789520776862549, | |
| "grad_norm": 0.6115851402282715, | |
| "learning_rate": 4.912594776748105e-05, | |
| "loss": 0.4399, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.018000473696676227, | |
| "grad_norm": 0.5554397106170654, | |
| "learning_rate": 4.912068239258635e-05, | |
| "loss": 0.4262, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.018105739624726967, | |
| "grad_norm": 0.565323531627655, | |
| "learning_rate": 4.911541701769166e-05, | |
| "loss": 0.4424, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.018211005552777704, | |
| "grad_norm": 0.44236519932746887, | |
| "learning_rate": 4.911015164279697e-05, | |
| "loss": 0.424, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.01831627148082844, | |
| "grad_norm": 0.6567726731300354, | |
| "learning_rate": 4.910488626790228e-05, | |
| "loss": 0.4231, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.018421537408879182, | |
| "grad_norm": 0.42518746852874756, | |
| "learning_rate": 4.9099620893007586e-05, | |
| "loss": 0.4878, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.01852680333692992, | |
| "grad_norm": 0.5739135146141052, | |
| "learning_rate": 4.9094355518112895e-05, | |
| "loss": 0.4514, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.018632069264980657, | |
| "grad_norm": 0.628442645072937, | |
| "learning_rate": 4.90890901432182e-05, | |
| "loss": 0.3625, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.018737335193031397, | |
| "grad_norm": 0.445872962474823, | |
| "learning_rate": 4.9083824768323507e-05, | |
| "loss": 0.5256, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.018842601121082134, | |
| "grad_norm": 0.5037261247634888, | |
| "learning_rate": 4.9078559393428816e-05, | |
| "loss": 0.4322, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.01894786704913287, | |
| "grad_norm": 0.5586241483688354, | |
| "learning_rate": 4.9073294018534125e-05, | |
| "loss": 0.5682, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01905313297718361, | |
| "grad_norm": 0.5735304355621338, | |
| "learning_rate": 4.906802864363943e-05, | |
| "loss": 0.4486, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.01915839890523435, | |
| "grad_norm": 0.6629624962806702, | |
| "learning_rate": 4.9062763268744736e-05, | |
| "loss": 0.4748, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.019263664833285087, | |
| "grad_norm": 0.5536085963249207, | |
| "learning_rate": 4.9057497893850045e-05, | |
| "loss": 0.3779, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.019368930761335824, | |
| "grad_norm": 0.37973251938819885, | |
| "learning_rate": 4.9052232518955354e-05, | |
| "loss": 0.4913, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.019474196689386564, | |
| "grad_norm": 0.6046680212020874, | |
| "learning_rate": 4.9046967144060656e-05, | |
| "loss": 0.4644, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.0195794626174373, | |
| "grad_norm": 0.5051435828208923, | |
| "learning_rate": 4.9041701769165965e-05, | |
| "loss": 0.5042, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.01968472854548804, | |
| "grad_norm": 0.5261257290840149, | |
| "learning_rate": 4.9036436394271274e-05, | |
| "loss": 0.4679, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.019789994473538776, | |
| "grad_norm": 0.5349376797676086, | |
| "learning_rate": 4.903117101937658e-05, | |
| "loss": 0.4206, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.019895260401589517, | |
| "grad_norm": 0.5617197751998901, | |
| "learning_rate": 4.902590564448189e-05, | |
| "loss": 0.3974, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.020000526329640254, | |
| "grad_norm": 0.549514889717102, | |
| "learning_rate": 4.90206402695872e-05, | |
| "loss": 0.5034, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.02010579225769099, | |
| "grad_norm": 0.6475022435188293, | |
| "learning_rate": 4.9015374894692503e-05, | |
| "loss": 0.4651, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.02021105818574173, | |
| "grad_norm": 0.6060453057289124, | |
| "learning_rate": 4.901010951979781e-05, | |
| "loss": 0.3981, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.02031632411379247, | |
| "grad_norm": 0.6936651468276978, | |
| "learning_rate": 4.9004844144903115e-05, | |
| "loss": 0.3804, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.020421590041843206, | |
| "grad_norm": 0.44638895988464355, | |
| "learning_rate": 4.8999578770008424e-05, | |
| "loss": 0.4596, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.020526855969893943, | |
| "grad_norm": 0.5297572612762451, | |
| "learning_rate": 4.899431339511373e-05, | |
| "loss": 0.4385, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.020632121897944684, | |
| "grad_norm": 0.5046480894088745, | |
| "learning_rate": 4.898904802021904e-05, | |
| "loss": 0.4557, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.02073738782599542, | |
| "grad_norm": 0.5276935696601868, | |
| "learning_rate": 4.898378264532435e-05, | |
| "loss": 0.39, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.020842653754046158, | |
| "grad_norm": 0.4923096001148224, | |
| "learning_rate": 4.897851727042966e-05, | |
| "loss": 0.4585, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0209479196820969, | |
| "grad_norm": 0.4554820954799652, | |
| "learning_rate": 4.897325189553497e-05, | |
| "loss": 0.5175, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.021053185610147636, | |
| "grad_norm": 0.47559452056884766, | |
| "learning_rate": 4.896798652064027e-05, | |
| "loss": 0.5275, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.021158451538198373, | |
| "grad_norm": 0.5070779323577881, | |
| "learning_rate": 4.896272114574558e-05, | |
| "loss": 0.4958, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.02126371746624911, | |
| "grad_norm": 0.5040444135665894, | |
| "learning_rate": 4.895745577085088e-05, | |
| "loss": 0.4616, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.02136898339429985, | |
| "grad_norm": 0.5290699601173401, | |
| "learning_rate": 4.895219039595619e-05, | |
| "loss": 0.5178, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.021474249322350588, | |
| "grad_norm": 0.5007508993148804, | |
| "learning_rate": 4.89469250210615e-05, | |
| "loss": 0.4489, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.021579515250401325, | |
| "grad_norm": 0.6373962759971619, | |
| "learning_rate": 4.894165964616681e-05, | |
| "loss": 0.4124, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.021684781178452066, | |
| "grad_norm": 0.5132836699485779, | |
| "learning_rate": 4.893639427127212e-05, | |
| "loss": 0.4534, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.021790047106502803, | |
| "grad_norm": 0.6253231167793274, | |
| "learning_rate": 4.893112889637743e-05, | |
| "loss": 0.3986, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.02189531303455354, | |
| "grad_norm": 0.5937986373901367, | |
| "learning_rate": 4.8925863521482736e-05, | |
| "loss": 0.3956, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.022000578962604277, | |
| "grad_norm": 0.4578053951263428, | |
| "learning_rate": 4.892059814658804e-05, | |
| "loss": 0.4068, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.022105844890655018, | |
| "grad_norm": 0.5060281157493591, | |
| "learning_rate": 4.891533277169335e-05, | |
| "loss": 0.5179, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.022211110818705755, | |
| "grad_norm": 0.561792254447937, | |
| "learning_rate": 4.8910067396798657e-05, | |
| "loss": 0.4547, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.022316376746756492, | |
| "grad_norm": 0.38052886724472046, | |
| "learning_rate": 4.890480202190396e-05, | |
| "loss": 0.4493, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.022421642674807233, | |
| "grad_norm": 0.5639155507087708, | |
| "learning_rate": 4.889953664700927e-05, | |
| "loss": 0.4239, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.02252690860285797, | |
| "grad_norm": 0.5452573299407959, | |
| "learning_rate": 4.889427127211458e-05, | |
| "loss": 0.4393, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.022632174530908707, | |
| "grad_norm": 0.4861447811126709, | |
| "learning_rate": 4.8889005897219886e-05, | |
| "loss": 0.4971, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.022737440458959448, | |
| "grad_norm": 0.5619585514068604, | |
| "learning_rate": 4.8883740522325195e-05, | |
| "loss": 0.3992, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.022842706387010185, | |
| "grad_norm": 0.5488256812095642, | |
| "learning_rate": 4.88784751474305e-05, | |
| "loss": 0.4155, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.022947972315060922, | |
| "grad_norm": 0.517796516418457, | |
| "learning_rate": 4.8873209772535806e-05, | |
| "loss": 0.5018, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.02305323824311166, | |
| "grad_norm": 0.6027892827987671, | |
| "learning_rate": 4.8867944397641115e-05, | |
| "loss": 0.4684, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.0231585041711624, | |
| "grad_norm": 0.47196510434150696, | |
| "learning_rate": 4.8862679022746424e-05, | |
| "loss": 0.4423, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.023263770099213137, | |
| "grad_norm": 0.41390231251716614, | |
| "learning_rate": 4.8857413647851726e-05, | |
| "loss": 0.4031, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.023369036027263874, | |
| "grad_norm": 0.5514193773269653, | |
| "learning_rate": 4.8852148272957035e-05, | |
| "loss": 0.6308, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.023474301955314615, | |
| "grad_norm": 0.4564357101917267, | |
| "learning_rate": 4.8846882898062344e-05, | |
| "loss": 0.5284, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.023579567883365352, | |
| "grad_norm": 0.45888492465019226, | |
| "learning_rate": 4.8841617523167653e-05, | |
| "loss": 0.4536, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.02368483381141609, | |
| "grad_norm": 0.4363495409488678, | |
| "learning_rate": 4.8836352148272956e-05, | |
| "loss": 0.4838, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.023790099739466827, | |
| "grad_norm": 0.40970975160598755, | |
| "learning_rate": 4.8831086773378265e-05, | |
| "loss": 0.5299, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.023895365667517567, | |
| "grad_norm": 0.5274611711502075, | |
| "learning_rate": 4.8825821398483574e-05, | |
| "loss": 0.3967, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.024000631595568304, | |
| "grad_norm": 0.5038068890571594, | |
| "learning_rate": 4.882055602358888e-05, | |
| "loss": 0.5067, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.02410589752361904, | |
| "grad_norm": 0.5031372904777527, | |
| "learning_rate": 4.881529064869419e-05, | |
| "loss": 0.3756, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.024211163451669782, | |
| "grad_norm": 0.49740293622016907, | |
| "learning_rate": 4.88100252737995e-05, | |
| "loss": 0.4809, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.02431642937972052, | |
| "grad_norm": 0.4950021207332611, | |
| "learning_rate": 4.88047598989048e-05, | |
| "loss": 0.4149, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.024421695307771257, | |
| "grad_norm": 0.46618038415908813, | |
| "learning_rate": 4.879949452401011e-05, | |
| "loss": 0.4737, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.024526961235821994, | |
| "grad_norm": 0.4663354158401489, | |
| "learning_rate": 4.8794229149115414e-05, | |
| "loss": 0.3884, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.024632227163872734, | |
| "grad_norm": 0.6165478229522705, | |
| "learning_rate": 4.878896377422072e-05, | |
| "loss": 0.3875, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.02473749309192347, | |
| "grad_norm": 0.4838646948337555, | |
| "learning_rate": 4.878369839932603e-05, | |
| "loss": 0.4679, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.02484275901997421, | |
| "grad_norm": 0.49089592695236206, | |
| "learning_rate": 4.877843302443134e-05, | |
| "loss": 0.5484, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.02494802494802495, | |
| "grad_norm": 0.4166033864021301, | |
| "learning_rate": 4.877316764953665e-05, | |
| "loss": 0.4594, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.025053290876075687, | |
| "grad_norm": 0.6557610630989075, | |
| "learning_rate": 4.876790227464196e-05, | |
| "loss": 0.422, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.025158556804126424, | |
| "grad_norm": 0.4997393786907196, | |
| "learning_rate": 4.876263689974727e-05, | |
| "loss": 0.4165, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.02526382273217716, | |
| "grad_norm": 0.3650420606136322, | |
| "learning_rate": 4.875737152485258e-05, | |
| "loss": 0.4758, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0253690886602279, | |
| "grad_norm": 0.5316746830940247, | |
| "learning_rate": 4.875210614995788e-05, | |
| "loss": 0.4703, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.02547435458827864, | |
| "grad_norm": 0.3838014602661133, | |
| "learning_rate": 4.874684077506318e-05, | |
| "loss": 0.6512, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.025579620516329376, | |
| "grad_norm": 0.5243346095085144, | |
| "learning_rate": 4.874157540016849e-05, | |
| "loss": 0.4515, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.025684886444380117, | |
| "grad_norm": 0.46801677346229553, | |
| "learning_rate": 4.87363100252738e-05, | |
| "loss": 0.4605, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.025790152372430854, | |
| "grad_norm": 0.4614790081977844, | |
| "learning_rate": 4.873104465037911e-05, | |
| "loss": 0.4101, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.02589541830048159, | |
| "grad_norm": 0.4433145821094513, | |
| "learning_rate": 4.872577927548442e-05, | |
| "loss": 0.4578, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.026000684228532328, | |
| "grad_norm": 0.43368014693260193, | |
| "learning_rate": 4.872051390058973e-05, | |
| "loss": 0.4077, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.02610595015658307, | |
| "grad_norm": 0.4347352385520935, | |
| "learning_rate": 4.8715248525695036e-05, | |
| "loss": 0.4451, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.026211216084633806, | |
| "grad_norm": 0.5047518610954285, | |
| "learning_rate": 4.870998315080034e-05, | |
| "loss": 0.4308, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.026316482012684543, | |
| "grad_norm": 0.6036553978919983, | |
| "learning_rate": 4.870471777590565e-05, | |
| "loss": 0.5001, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.026421747940735284, | |
| "grad_norm": 0.5581931471824646, | |
| "learning_rate": 4.8699452401010956e-05, | |
| "loss": 0.3939, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.02652701386878602, | |
| "grad_norm": 0.4085439145565033, | |
| "learning_rate": 4.869418702611626e-05, | |
| "loss": 0.5321, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.026632279796836758, | |
| "grad_norm": 0.6976563334465027, | |
| "learning_rate": 4.868892165122157e-05, | |
| "loss": 0.4767, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.0267375457248875, | |
| "grad_norm": 0.48653343319892883, | |
| "learning_rate": 4.8683656276326876e-05, | |
| "loss": 0.5387, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.026842811652938236, | |
| "grad_norm": 0.5379003286361694, | |
| "learning_rate": 4.8678390901432185e-05, | |
| "loss": 0.4418, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.026948077580988973, | |
| "grad_norm": 0.42478466033935547, | |
| "learning_rate": 4.8673125526537494e-05, | |
| "loss": 0.4751, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.02705334350903971, | |
| "grad_norm": 0.4857715666294098, | |
| "learning_rate": 4.86678601516428e-05, | |
| "loss": 0.4608, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.02715860943709045, | |
| "grad_norm": 0.46174147725105286, | |
| "learning_rate": 4.8662594776748106e-05, | |
| "loss": 0.4611, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.027263875365141188, | |
| "grad_norm": 0.5316092371940613, | |
| "learning_rate": 4.8657329401853415e-05, | |
| "loss": 0.4463, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.027369141293191925, | |
| "grad_norm": 0.5541107058525085, | |
| "learning_rate": 4.8652064026958724e-05, | |
| "loss": 0.4619, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.027474407221242666, | |
| "grad_norm": 0.4637160003185272, | |
| "learning_rate": 4.864679865206403e-05, | |
| "loss": 0.425, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.027579673149293403, | |
| "grad_norm": 0.4406774938106537, | |
| "learning_rate": 4.8641533277169335e-05, | |
| "loss": 0.5234, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.02768493907734414, | |
| "grad_norm": 0.5540871620178223, | |
| "learning_rate": 4.8636267902274644e-05, | |
| "loss": 0.4565, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.027790205005394877, | |
| "grad_norm": 0.5119719505310059, | |
| "learning_rate": 4.863100252737995e-05, | |
| "loss": 0.4224, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.027895470933445618, | |
| "grad_norm": 0.6064046025276184, | |
| "learning_rate": 4.8625737152485255e-05, | |
| "loss": 0.453, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.028000736861496355, | |
| "grad_norm": 0.5928232669830322, | |
| "learning_rate": 4.8620471777590564e-05, | |
| "loss": 0.4444, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.028106002789547092, | |
| "grad_norm": 0.5610330700874329, | |
| "learning_rate": 4.861520640269587e-05, | |
| "loss": 0.4051, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.028211268717597833, | |
| "grad_norm": 0.4866770803928375, | |
| "learning_rate": 4.860994102780118e-05, | |
| "loss": 0.4629, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.02831653464564857, | |
| "grad_norm": 0.5181504487991333, | |
| "learning_rate": 4.860467565290649e-05, | |
| "loss": 0.4225, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.028421800573699307, | |
| "grad_norm": 0.36064937710762024, | |
| "learning_rate": 4.85994102780118e-05, | |
| "loss": 0.4136, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.028527066501750045, | |
| "grad_norm": 0.4846802353858948, | |
| "learning_rate": 4.85941449031171e-05, | |
| "loss": 0.4321, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.028632332429800785, | |
| "grad_norm": 0.4463631510734558, | |
| "learning_rate": 4.858887952822241e-05, | |
| "loss": 0.5485, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.028737598357851522, | |
| "grad_norm": 0.4516132175922394, | |
| "learning_rate": 4.8583614153327714e-05, | |
| "loss": 0.4853, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.02884286428590226, | |
| "grad_norm": 0.40815305709838867, | |
| "learning_rate": 4.857834877843302e-05, | |
| "loss": 0.3355, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.028948130213953, | |
| "grad_norm": 0.54203200340271, | |
| "learning_rate": 4.857308340353833e-05, | |
| "loss": 0.3969, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.029053396142003737, | |
| "grad_norm": 0.5161415338516235, | |
| "learning_rate": 4.856781802864364e-05, | |
| "loss": 0.3776, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.029158662070054474, | |
| "grad_norm": 0.4058281183242798, | |
| "learning_rate": 4.856255265374895e-05, | |
| "loss": 0.4268, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.02926392799810521, | |
| "grad_norm": 0.43867388367652893, | |
| "learning_rate": 4.855728727885426e-05, | |
| "loss": 0.4458, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.029369193926155952, | |
| "grad_norm": 0.441211998462677, | |
| "learning_rate": 4.855202190395957e-05, | |
| "loss": 0.4532, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.02947445985420669, | |
| "grad_norm": 0.5454714894294739, | |
| "learning_rate": 4.854675652906488e-05, | |
| "loss": 0.4907, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.029579725782257427, | |
| "grad_norm": 0.47156885266304016, | |
| "learning_rate": 4.854149115417018e-05, | |
| "loss": 0.4905, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.029684991710308167, | |
| "grad_norm": 0.40513938665390015, | |
| "learning_rate": 4.853622577927549e-05, | |
| "loss": 0.4808, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.029790257638358904, | |
| "grad_norm": 0.47520211338996887, | |
| "learning_rate": 4.853096040438079e-05, | |
| "loss": 0.4501, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.02989552356640964, | |
| "grad_norm": 0.5248693823814392, | |
| "learning_rate": 4.85256950294861e-05, | |
| "loss": 0.4287, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.03000078949446038, | |
| "grad_norm": 0.4880824089050293, | |
| "learning_rate": 4.852042965459141e-05, | |
| "loss": 0.3947, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.03010605542251112, | |
| "grad_norm": 0.4884517788887024, | |
| "learning_rate": 4.851516427969672e-05, | |
| "loss": 0.4521, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.030211321350561857, | |
| "grad_norm": 0.5394681096076965, | |
| "learning_rate": 4.8509898904802026e-05, | |
| "loss": 0.4033, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.030316587278612594, | |
| "grad_norm": 0.46996134519577026, | |
| "learning_rate": 4.8504633529907335e-05, | |
| "loss": 0.4217, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.030421853206663334, | |
| "grad_norm": 0.4631175398826599, | |
| "learning_rate": 4.849936815501264e-05, | |
| "loss": 0.4114, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.03052711913471407, | |
| "grad_norm": 0.5271033644676208, | |
| "learning_rate": 4.849410278011795e-05, | |
| "loss": 0.4044, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03063238506276481, | |
| "grad_norm": 0.46999993920326233, | |
| "learning_rate": 4.8488837405223256e-05, | |
| "loss": 0.4408, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.03073765099081555, | |
| "grad_norm": 0.3656292259693146, | |
| "learning_rate": 4.848357203032856e-05, | |
| "loss": 0.4169, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.030842916918866287, | |
| "grad_norm": 0.5758498907089233, | |
| "learning_rate": 4.847830665543387e-05, | |
| "loss": 0.4718, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.030948182846917024, | |
| "grad_norm": 0.43184739351272583, | |
| "learning_rate": 4.8473041280539176e-05, | |
| "loss": 0.4081, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.03105344877496776, | |
| "grad_norm": 0.44835662841796875, | |
| "learning_rate": 4.8467775905644485e-05, | |
| "loss": 0.4249, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.0311587147030185, | |
| "grad_norm": 0.4488978087902069, | |
| "learning_rate": 4.8462510530749794e-05, | |
| "loss": 0.5449, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.031263980631069235, | |
| "grad_norm": 0.5275838971138, | |
| "learning_rate": 4.8457245155855096e-05, | |
| "loss": 0.4624, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.031369246559119976, | |
| "grad_norm": 0.6487151980400085, | |
| "learning_rate": 4.8451979780960405e-05, | |
| "loss": 0.4815, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.03147451248717072, | |
| "grad_norm": 0.5481114983558655, | |
| "learning_rate": 4.8446714406065714e-05, | |
| "loss": 0.3889, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.03157977841522145, | |
| "grad_norm": 0.516204833984375, | |
| "learning_rate": 4.844144903117102e-05, | |
| "loss": 0.3923, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03168504434327219, | |
| "grad_norm": 0.5541898012161255, | |
| "learning_rate": 4.843618365627633e-05, | |
| "loss": 0.4513, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.03179031027132293, | |
| "grad_norm": 0.5141636729240417, | |
| "learning_rate": 4.8430918281381635e-05, | |
| "loss": 0.4993, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.031895576199373665, | |
| "grad_norm": 0.46877187490463257, | |
| "learning_rate": 4.8425652906486944e-05, | |
| "loss": 0.4815, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.032000842127424406, | |
| "grad_norm": 0.5002549886703491, | |
| "learning_rate": 4.842038753159225e-05, | |
| "loss": 0.5064, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.03210610805547515, | |
| "grad_norm": 0.45424237847328186, | |
| "learning_rate": 4.8415122156697555e-05, | |
| "loss": 0.4549, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.03221137398352588, | |
| "grad_norm": 0.4908994138240814, | |
| "learning_rate": 4.8409856781802864e-05, | |
| "loss": 0.5029, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.03231663991157662, | |
| "grad_norm": 0.6221848726272583, | |
| "learning_rate": 4.840459140690817e-05, | |
| "loss": 0.4033, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.03242190583962736, | |
| "grad_norm": 0.5026724934577942, | |
| "learning_rate": 4.839932603201348e-05, | |
| "loss": 0.3765, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.032527171767678095, | |
| "grad_norm": 0.4318561255931854, | |
| "learning_rate": 4.839406065711879e-05, | |
| "loss": 0.4174, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.032632437695728836, | |
| "grad_norm": 0.5485970377922058, | |
| "learning_rate": 4.83887952822241e-05, | |
| "loss": 0.4528, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03273770362377958, | |
| "grad_norm": 0.49032801389694214, | |
| "learning_rate": 4.838352990732941e-05, | |
| "loss": 0.4687, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.03284296955183031, | |
| "grad_norm": 0.4289769232273102, | |
| "learning_rate": 4.837826453243471e-05, | |
| "loss": 0.5144, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.03294823547988105, | |
| "grad_norm": 0.500663697719574, | |
| "learning_rate": 4.8372999157540013e-05, | |
| "loss": 0.3923, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.033053501407931785, | |
| "grad_norm": 0.5670647025108337, | |
| "learning_rate": 4.836773378264532e-05, | |
| "loss": 0.4049, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.033158767335982525, | |
| "grad_norm": 0.4813581109046936, | |
| "learning_rate": 4.836246840775063e-05, | |
| "loss": 0.443, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.033264033264033266, | |
| "grad_norm": 0.5485454797744751, | |
| "learning_rate": 4.835720303285594e-05, | |
| "loss": 0.4008, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.033369299192084, | |
| "grad_norm": 0.5390880703926086, | |
| "learning_rate": 4.835193765796125e-05, | |
| "loss": 0.3993, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.03347456512013474, | |
| "grad_norm": 0.498060017824173, | |
| "learning_rate": 4.834667228306656e-05, | |
| "loss": 0.3953, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.03357983104818548, | |
| "grad_norm": 0.49461764097213745, | |
| "learning_rate": 4.834140690817187e-05, | |
| "loss": 0.3972, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.033685096976236215, | |
| "grad_norm": 0.723934531211853, | |
| "learning_rate": 4.8336141533277176e-05, | |
| "loss": 0.4582, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.033790362904286955, | |
| "grad_norm": 0.4396905303001404, | |
| "learning_rate": 4.833087615838248e-05, | |
| "loss": 0.404, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.033895628832337696, | |
| "grad_norm": 0.4418332576751709, | |
| "learning_rate": 4.832561078348779e-05, | |
| "loss": 0.5145, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.03400089476038843, | |
| "grad_norm": 0.5111250281333923, | |
| "learning_rate": 4.832034540859309e-05, | |
| "loss": 0.5276, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.03410616068843917, | |
| "grad_norm": 0.5635156035423279, | |
| "learning_rate": 4.83150800336984e-05, | |
| "loss": 0.5484, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.03421142661648991, | |
| "grad_norm": 0.5792466402053833, | |
| "learning_rate": 4.830981465880371e-05, | |
| "loss": 0.5747, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.034316692544540645, | |
| "grad_norm": 0.4661281406879425, | |
| "learning_rate": 4.830454928390902e-05, | |
| "loss": 0.4601, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.034421958472591385, | |
| "grad_norm": 0.6661891937255859, | |
| "learning_rate": 4.8299283909014326e-05, | |
| "loss": 0.4993, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.03452722440064212, | |
| "grad_norm": 0.5207692384719849, | |
| "learning_rate": 4.8294018534119635e-05, | |
| "loss": 0.421, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.03463249032869286, | |
| "grad_norm": 0.6618428826332092, | |
| "learning_rate": 4.828875315922494e-05, | |
| "loss": 0.4163, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.0347377562567436, | |
| "grad_norm": 0.513272225856781, | |
| "learning_rate": 4.8283487784330246e-05, | |
| "loss": 0.3797, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.034843022184794334, | |
| "grad_norm": 0.4838692545890808, | |
| "learning_rate": 4.8278222409435555e-05, | |
| "loss": 0.3843, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.034948288112845075, | |
| "grad_norm": 0.5403527021408081, | |
| "learning_rate": 4.8272957034540864e-05, | |
| "loss": 0.4821, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.035053554040895815, | |
| "grad_norm": 0.48934701085090637, | |
| "learning_rate": 4.8267691659646167e-05, | |
| "loss": 0.4205, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.03515881996894655, | |
| "grad_norm": 0.5227293968200684, | |
| "learning_rate": 4.8262426284751476e-05, | |
| "loss": 0.483, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.03526408589699729, | |
| "grad_norm": 0.5904392004013062, | |
| "learning_rate": 4.8257160909856785e-05, | |
| "loss": 0.3868, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.03536935182504803, | |
| "grad_norm": 0.4555564522743225, | |
| "learning_rate": 4.8251895534962094e-05, | |
| "loss": 0.4235, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.035474617753098764, | |
| "grad_norm": 0.8526967763900757, | |
| "learning_rate": 4.8246630160067396e-05, | |
| "loss": 0.4588, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.035579883681149505, | |
| "grad_norm": 0.45085299015045166, | |
| "learning_rate": 4.8241364785172705e-05, | |
| "loss": 0.4228, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.035685149609200245, | |
| "grad_norm": 0.5043511390686035, | |
| "learning_rate": 4.8236099410278014e-05, | |
| "loss": 0.4632, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.03579041553725098, | |
| "grad_norm": 0.5064621567726135, | |
| "learning_rate": 4.823083403538332e-05, | |
| "loss": 0.4844, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.03589568146530172, | |
| "grad_norm": 0.48965758085250854, | |
| "learning_rate": 4.822556866048863e-05, | |
| "loss": 0.4481, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.03600094739335245, | |
| "grad_norm": 0.4565337300300598, | |
| "learning_rate": 4.8220303285593934e-05, | |
| "loss": 0.4011, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.036106213321403194, | |
| "grad_norm": 0.5424944758415222, | |
| "learning_rate": 4.821503791069924e-05, | |
| "loss": 0.5101, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.036211479249453934, | |
| "grad_norm": 0.4527457058429718, | |
| "learning_rate": 4.820977253580455e-05, | |
| "loss": 0.4097, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.03631674517750467, | |
| "grad_norm": 0.3896700441837311, | |
| "learning_rate": 4.8204507160909854e-05, | |
| "loss": 0.4177, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.03642201110555541, | |
| "grad_norm": 0.5583755373954773, | |
| "learning_rate": 4.8199241786015163e-05, | |
| "loss": 0.4437, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.03652727703360615, | |
| "grad_norm": 0.41155165433883667, | |
| "learning_rate": 4.819397641112047e-05, | |
| "loss": 0.4382, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.03663254296165688, | |
| "grad_norm": 0.36993688344955444, | |
| "learning_rate": 4.818871103622578e-05, | |
| "loss": 0.4839, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.036737808889707624, | |
| "grad_norm": 0.449740469455719, | |
| "learning_rate": 4.818344566133109e-05, | |
| "loss": 0.4251, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.036843074817758364, | |
| "grad_norm": 0.3957495391368866, | |
| "learning_rate": 4.81781802864364e-05, | |
| "loss": 0.4743, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0369483407458091, | |
| "grad_norm": 0.5629512667655945, | |
| "learning_rate": 4.817291491154171e-05, | |
| "loss": 0.4002, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.03705360667385984, | |
| "grad_norm": 0.4598921239376068, | |
| "learning_rate": 4.816764953664701e-05, | |
| "loss": 0.4692, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.03715887260191058, | |
| "grad_norm": 0.516234278678894, | |
| "learning_rate": 4.816238416175232e-05, | |
| "loss": 0.4175, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.03726413852996131, | |
| "grad_norm": 0.5708214044570923, | |
| "learning_rate": 4.815711878685762e-05, | |
| "loss": 0.4306, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.037369404458012054, | |
| "grad_norm": 0.6185720562934875, | |
| "learning_rate": 4.815185341196293e-05, | |
| "loss": 0.4598, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.037474670386062794, | |
| "grad_norm": 0.5227758884429932, | |
| "learning_rate": 4.814658803706824e-05, | |
| "loss": 0.3782, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.03757993631411353, | |
| "grad_norm": 0.5345552563667297, | |
| "learning_rate": 4.814132266217355e-05, | |
| "loss": 0.418, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.03768520224216427, | |
| "grad_norm": 0.5797765254974365, | |
| "learning_rate": 4.813605728727886e-05, | |
| "loss": 0.5089, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.037790468170215, | |
| "grad_norm": 0.5567287802696228, | |
| "learning_rate": 4.813079191238417e-05, | |
| "loss": 0.4304, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.03789573409826574, | |
| "grad_norm": 0.4520246982574463, | |
| "learning_rate": 4.8125526537489476e-05, | |
| "loss": 0.4626, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.038001000026316484, | |
| "grad_norm": 0.44900500774383545, | |
| "learning_rate": 4.812026116259478e-05, | |
| "loss": 0.3843, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.03810626595436722, | |
| "grad_norm": 0.48296135663986206, | |
| "learning_rate": 4.811499578770009e-05, | |
| "loss": 0.4855, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.03821153188241796, | |
| "grad_norm": 0.4269002377986908, | |
| "learning_rate": 4.810973041280539e-05, | |
| "loss": 0.3795, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.0383167978104687, | |
| "grad_norm": 0.9296995401382446, | |
| "learning_rate": 4.81044650379107e-05, | |
| "loss": 0.4861, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.03842206373851943, | |
| "grad_norm": 0.5746780633926392, | |
| "learning_rate": 4.809919966301601e-05, | |
| "loss": 0.3991, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.03852732966657017, | |
| "grad_norm": 0.47170913219451904, | |
| "learning_rate": 4.8093934288121317e-05, | |
| "loss": 0.4348, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.038632595594620914, | |
| "grad_norm": 0.4327333867549896, | |
| "learning_rate": 4.8088668913226626e-05, | |
| "loss": 0.405, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.03873786152267165, | |
| "grad_norm": 0.4907747507095337, | |
| "learning_rate": 4.8083403538331935e-05, | |
| "loss": 0.4467, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.03884312745072239, | |
| "grad_norm": 0.48626840114593506, | |
| "learning_rate": 4.807813816343724e-05, | |
| "loss": 0.485, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.03894839337877313, | |
| "grad_norm": 0.5155723094940186, | |
| "learning_rate": 4.8072872788542546e-05, | |
| "loss": 0.3931, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03905365930682386, | |
| "grad_norm": 0.5703728795051575, | |
| "learning_rate": 4.8067607413647855e-05, | |
| "loss": 0.3728, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.0391589252348746, | |
| "grad_norm": 0.5467020273208618, | |
| "learning_rate": 4.8062342038753164e-05, | |
| "loss": 0.477, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.03926419116292534, | |
| "grad_norm": 0.4459872543811798, | |
| "learning_rate": 4.8057076663858466e-05, | |
| "loss": 0.4712, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.03936945709097608, | |
| "grad_norm": 0.511060357093811, | |
| "learning_rate": 4.8051811288963775e-05, | |
| "loss": 0.5146, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.03947472301902682, | |
| "grad_norm": 0.3677018880844116, | |
| "learning_rate": 4.8046545914069084e-05, | |
| "loss": 0.4605, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03957998894707755, | |
| "grad_norm": 0.47560691833496094, | |
| "learning_rate": 4.804128053917439e-05, | |
| "loss": 0.4479, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.03968525487512829, | |
| "grad_norm": 0.5171210169792175, | |
| "learning_rate": 4.8036015164279695e-05, | |
| "loss": 0.4413, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.03979052080317903, | |
| "grad_norm": 0.448194295167923, | |
| "learning_rate": 4.8030749789385004e-05, | |
| "loss": 0.4637, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.03989578673122977, | |
| "grad_norm": 0.5280170440673828, | |
| "learning_rate": 4.8025484414490313e-05, | |
| "loss": 0.4365, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.04000105265928051, | |
| "grad_norm": 0.490249902009964, | |
| "learning_rate": 4.802021903959562e-05, | |
| "loss": 0.4618, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04010631858733125, | |
| "grad_norm": 0.5452317595481873, | |
| "learning_rate": 4.801495366470093e-05, | |
| "loss": 0.3972, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.04021158451538198, | |
| "grad_norm": 0.5572560429573059, | |
| "learning_rate": 4.800968828980624e-05, | |
| "loss": 0.4756, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.04031685044343272, | |
| "grad_norm": 0.45014721155166626, | |
| "learning_rate": 4.800442291491154e-05, | |
| "loss": 0.3915, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.04042211637148346, | |
| "grad_norm": 0.6049466729164124, | |
| "learning_rate": 4.799915754001685e-05, | |
| "loss": 0.3675, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.0405273822995342, | |
| "grad_norm": 0.6129103302955627, | |
| "learning_rate": 4.7993892165122154e-05, | |
| "loss": 0.378, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.04063264822758494, | |
| "grad_norm": 0.5461925864219666, | |
| "learning_rate": 4.798862679022746e-05, | |
| "loss": 0.4091, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.04073791415563568, | |
| "grad_norm": 0.41969093680381775, | |
| "learning_rate": 4.798336141533277e-05, | |
| "loss": 0.4843, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.04084318008368641, | |
| "grad_norm": 0.510870635509491, | |
| "learning_rate": 4.797809604043808e-05, | |
| "loss": 0.581, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.04094844601173715, | |
| "grad_norm": 0.5956604480743408, | |
| "learning_rate": 4.797283066554339e-05, | |
| "loss": 0.3163, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.041053711939787886, | |
| "grad_norm": 0.4685046076774597, | |
| "learning_rate": 4.79675652906487e-05, | |
| "loss": 0.4587, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.04115897786783863, | |
| "grad_norm": 0.4563463628292084, | |
| "learning_rate": 4.796229991575401e-05, | |
| "loss": 0.468, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.04126424379588937, | |
| "grad_norm": 0.5047011971473694, | |
| "learning_rate": 4.795703454085931e-05, | |
| "loss": 0.4117, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.0413695097239401, | |
| "grad_norm": 0.6256960034370422, | |
| "learning_rate": 4.795176916596462e-05, | |
| "loss": 0.4522, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.04147477565199084, | |
| "grad_norm": 0.479109525680542, | |
| "learning_rate": 4.794650379106992e-05, | |
| "loss": 0.5458, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.04158004158004158, | |
| "grad_norm": 0.5637032985687256, | |
| "learning_rate": 4.794123841617523e-05, | |
| "loss": 0.4724, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.041685307508092316, | |
| "grad_norm": 0.5758900046348572, | |
| "learning_rate": 4.793597304128054e-05, | |
| "loss": 0.3943, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.04179057343614306, | |
| "grad_norm": 0.41813746094703674, | |
| "learning_rate": 4.793070766638585e-05, | |
| "loss": 0.4937, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.0418958393641938, | |
| "grad_norm": 0.4549589455127716, | |
| "learning_rate": 4.792544229149116e-05, | |
| "loss": 0.4055, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.04200110529224453, | |
| "grad_norm": 0.42384806275367737, | |
| "learning_rate": 4.792017691659647e-05, | |
| "loss": 0.4189, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.04210637122029527, | |
| "grad_norm": 0.4235416352748871, | |
| "learning_rate": 4.7914911541701776e-05, | |
| "loss": 0.4304, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04221163714834601, | |
| "grad_norm": 0.44901612401008606, | |
| "learning_rate": 4.7909646166807085e-05, | |
| "loss": 0.4575, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.042316903076396746, | |
| "grad_norm": 0.4786452353000641, | |
| "learning_rate": 4.790438079191239e-05, | |
| "loss": 0.4031, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.04242216900444749, | |
| "grad_norm": 0.64895099401474, | |
| "learning_rate": 4.7899115417017696e-05, | |
| "loss": 0.4437, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.04252743493249822, | |
| "grad_norm": 0.7129364609718323, | |
| "learning_rate": 4.7893850042123e-05, | |
| "loss": 0.426, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.04263270086054896, | |
| "grad_norm": 0.5261722207069397, | |
| "learning_rate": 4.788858466722831e-05, | |
| "loss": 0.4704, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.0427379667885997, | |
| "grad_norm": 0.5278510451316833, | |
| "learning_rate": 4.7883319292333616e-05, | |
| "loss": 0.43, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.042843232716650435, | |
| "grad_norm": 0.47645267844200134, | |
| "learning_rate": 4.7878053917438925e-05, | |
| "loss": 0.4399, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.042948498644701176, | |
| "grad_norm": 0.5606099367141724, | |
| "learning_rate": 4.7872788542544234e-05, | |
| "loss": 0.5023, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.04305376457275192, | |
| "grad_norm": 0.5183596611022949, | |
| "learning_rate": 4.786752316764954e-05, | |
| "loss": 0.4431, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.04315903050080265, | |
| "grad_norm": 0.4570636451244354, | |
| "learning_rate": 4.7862257792754845e-05, | |
| "loss": 0.4435, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.04326429642885339, | |
| "grad_norm": 0.5054503679275513, | |
| "learning_rate": 4.7856992417860154e-05, | |
| "loss": 0.4884, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.04336956235690413, | |
| "grad_norm": 0.4896951913833618, | |
| "learning_rate": 4.7851727042965463e-05, | |
| "loss": 0.472, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.043474828284954865, | |
| "grad_norm": 0.6141940951347351, | |
| "learning_rate": 4.7846461668070766e-05, | |
| "loss": 0.426, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.043580094213005606, | |
| "grad_norm": 0.48963436484336853, | |
| "learning_rate": 4.7841196293176075e-05, | |
| "loss": 0.4668, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.04368536014105635, | |
| "grad_norm": 0.5451966524124146, | |
| "learning_rate": 4.7835930918281384e-05, | |
| "loss": 0.4728, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.04379062606910708, | |
| "grad_norm": 0.434573769569397, | |
| "learning_rate": 4.783066554338669e-05, | |
| "loss": 0.4055, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.04389589199715782, | |
| "grad_norm": 0.5499134659767151, | |
| "learning_rate": 4.7825400168492e-05, | |
| "loss": 0.3879, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.044001157925208555, | |
| "grad_norm": 0.5180830955505371, | |
| "learning_rate": 4.7820134793597304e-05, | |
| "loss": 0.4445, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.044106423853259295, | |
| "grad_norm": 0.4541892409324646, | |
| "learning_rate": 4.781486941870261e-05, | |
| "loss": 0.4059, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.044211689781310036, | |
| "grad_norm": 0.3752939999103546, | |
| "learning_rate": 4.780960404380792e-05, | |
| "loss": 0.5885, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.04431695570936077, | |
| "grad_norm": 0.4906155467033386, | |
| "learning_rate": 4.780433866891323e-05, | |
| "loss": 0.4839, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.04442222163741151, | |
| "grad_norm": 0.4721757769584656, | |
| "learning_rate": 4.779907329401854e-05, | |
| "loss": 0.4177, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.04452748756546225, | |
| "grad_norm": 0.42130014300346375, | |
| "learning_rate": 4.779380791912384e-05, | |
| "loss": 0.4295, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.044632753493512985, | |
| "grad_norm": 0.5732069611549377, | |
| "learning_rate": 4.778854254422915e-05, | |
| "loss": 0.3721, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.044738019421563725, | |
| "grad_norm": 0.48826277256011963, | |
| "learning_rate": 4.778327716933446e-05, | |
| "loss": 0.4228, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.044843285349614466, | |
| "grad_norm": 0.5234729051589966, | |
| "learning_rate": 4.777801179443976e-05, | |
| "loss": 0.4014, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.0449485512776652, | |
| "grad_norm": 0.46457454562187195, | |
| "learning_rate": 4.777274641954507e-05, | |
| "loss": 0.5259, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.04505381720571594, | |
| "grad_norm": 0.5036742091178894, | |
| "learning_rate": 4.776748104465038e-05, | |
| "loss": 0.4361, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.04515908313376668, | |
| "grad_norm": 0.5410817265510559, | |
| "learning_rate": 4.776221566975569e-05, | |
| "loss": 0.463, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.045264349061817415, | |
| "grad_norm": 0.4173840284347534, | |
| "learning_rate": 4.7756950294861e-05, | |
| "loss": 0.4048, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.045369614989868155, | |
| "grad_norm": 0.726842999458313, | |
| "learning_rate": 4.775168491996631e-05, | |
| "loss": 0.5549, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.045474880917918896, | |
| "grad_norm": 0.40877723693847656, | |
| "learning_rate": 4.774641954507162e-05, | |
| "loss": 0.4433, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.04558014684596963, | |
| "grad_norm": 0.6194121241569519, | |
| "learning_rate": 4.774115417017692e-05, | |
| "loss": 0.4257, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.04568541277402037, | |
| "grad_norm": 0.5976036787033081, | |
| "learning_rate": 4.773588879528222e-05, | |
| "loss": 0.4709, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.045790678702071104, | |
| "grad_norm": 0.6144199371337891, | |
| "learning_rate": 4.773062342038753e-05, | |
| "loss": 0.3868, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.045895944630121845, | |
| "grad_norm": 0.5125494599342346, | |
| "learning_rate": 4.772535804549284e-05, | |
| "loss": 0.4116, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.046001210558172585, | |
| "grad_norm": 0.5164209604263306, | |
| "learning_rate": 4.772009267059815e-05, | |
| "loss": 0.3564, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.04610647648622332, | |
| "grad_norm": 0.4817107319831848, | |
| "learning_rate": 4.771482729570346e-05, | |
| "loss": 0.4801, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.04621174241427406, | |
| "grad_norm": 0.44076791405677795, | |
| "learning_rate": 4.7709561920808766e-05, | |
| "loss": 0.551, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.0463170083423248, | |
| "grad_norm": 0.634650707244873, | |
| "learning_rate": 4.7704296545914075e-05, | |
| "loss": 0.533, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.046422274270375534, | |
| "grad_norm": 0.4300638437271118, | |
| "learning_rate": 4.7699031171019384e-05, | |
| "loss": 0.4219, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.046527540198426275, | |
| "grad_norm": 0.5052940249443054, | |
| "learning_rate": 4.7693765796124686e-05, | |
| "loss": 0.4419, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.046632806126477015, | |
| "grad_norm": 0.4833763539791107, | |
| "learning_rate": 4.7688500421229995e-05, | |
| "loss": 0.4074, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.04673807205452775, | |
| "grad_norm": 0.4841054677963257, | |
| "learning_rate": 4.76832350463353e-05, | |
| "loss": 0.4357, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.04684333798257849, | |
| "grad_norm": 0.5227946639060974, | |
| "learning_rate": 4.767796967144061e-05, | |
| "loss": 0.4471, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.04694860391062923, | |
| "grad_norm": 0.5761273503303528, | |
| "learning_rate": 4.7672704296545916e-05, | |
| "loss": 0.4422, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.047053869838679964, | |
| "grad_norm": 0.47115081548690796, | |
| "learning_rate": 4.7667438921651225e-05, | |
| "loss": 0.4172, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.047159135766730705, | |
| "grad_norm": 0.5475848913192749, | |
| "learning_rate": 4.7662173546756534e-05, | |
| "loss": 0.4435, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.04726440169478144, | |
| "grad_norm": 0.4437314569950104, | |
| "learning_rate": 4.765690817186184e-05, | |
| "loss": 0.389, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.04736966762283218, | |
| "grad_norm": 0.4307888448238373, | |
| "learning_rate": 4.7651642796967145e-05, | |
| "loss": 0.4354, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04747493355088292, | |
| "grad_norm": 0.3933163285255432, | |
| "learning_rate": 4.7646377422072454e-05, | |
| "loss": 0.561, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.04758019947893365, | |
| "grad_norm": 0.37329408526420593, | |
| "learning_rate": 4.764111204717776e-05, | |
| "loss": 0.4767, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.047685465406984394, | |
| "grad_norm": 0.554229199886322, | |
| "learning_rate": 4.763584667228307e-05, | |
| "loss": 0.3594, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.047790731335035135, | |
| "grad_norm": 0.4243522882461548, | |
| "learning_rate": 4.7630581297388374e-05, | |
| "loss": 0.44, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.04789599726308587, | |
| "grad_norm": 0.5723696351051331, | |
| "learning_rate": 4.762531592249368e-05, | |
| "loss": 0.4377, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.04800126319113661, | |
| "grad_norm": 0.5366947054862976, | |
| "learning_rate": 4.762005054759899e-05, | |
| "loss": 0.4021, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.04810652911918735, | |
| "grad_norm": 0.5559504628181458, | |
| "learning_rate": 4.76147851727043e-05, | |
| "loss": 0.3775, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.04821179504723808, | |
| "grad_norm": 0.48702389001846313, | |
| "learning_rate": 4.7609519797809604e-05, | |
| "loss": 0.4751, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.048317060975288824, | |
| "grad_norm": 0.36137351393699646, | |
| "learning_rate": 4.760425442291491e-05, | |
| "loss": 0.482, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.048422326903339565, | |
| "grad_norm": 0.4528438150882721, | |
| "learning_rate": 4.759898904802022e-05, | |
| "loss": 0.4059, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0485275928313903, | |
| "grad_norm": 0.5218043923377991, | |
| "learning_rate": 4.759372367312553e-05, | |
| "loss": 0.4095, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.04863285875944104, | |
| "grad_norm": 0.5252096652984619, | |
| "learning_rate": 4.758845829823084e-05, | |
| "loss": 0.4989, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.04873812468749178, | |
| "grad_norm": 0.3626563549041748, | |
| "learning_rate": 4.758319292333614e-05, | |
| "loss": 0.5983, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.04884339061554251, | |
| "grad_norm": 0.473537415266037, | |
| "learning_rate": 4.757792754844145e-05, | |
| "loss": 0.5459, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.048948656543593254, | |
| "grad_norm": 0.7054407596588135, | |
| "learning_rate": 4.757266217354676e-05, | |
| "loss": 0.3718, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.04905392247164399, | |
| "grad_norm": 0.4829826056957245, | |
| "learning_rate": 4.756739679865206e-05, | |
| "loss": 0.4165, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.04915918839969473, | |
| "grad_norm": 0.5529534816741943, | |
| "learning_rate": 4.756213142375737e-05, | |
| "loss": 0.5058, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.04926445432774547, | |
| "grad_norm": 0.4331270456314087, | |
| "learning_rate": 4.755686604886268e-05, | |
| "loss": 0.4267, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.0493697202557962, | |
| "grad_norm": 0.48735421895980835, | |
| "learning_rate": 4.755160067396799e-05, | |
| "loss": 0.443, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.04947498618384694, | |
| "grad_norm": 0.6138409972190857, | |
| "learning_rate": 4.75463352990733e-05, | |
| "loss": 0.4449, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.049580252111897684, | |
| "grad_norm": 0.4512140154838562, | |
| "learning_rate": 4.754106992417861e-05, | |
| "loss": 0.486, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.04968551803994842, | |
| "grad_norm": 0.5221918225288391, | |
| "learning_rate": 4.7535804549283916e-05, | |
| "loss": 0.4122, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.04979078396799916, | |
| "grad_norm": 0.5450029969215393, | |
| "learning_rate": 4.753053917438922e-05, | |
| "loss": 0.3362, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.0498960498960499, | |
| "grad_norm": 0.5064875483512878, | |
| "learning_rate": 4.752527379949452e-05, | |
| "loss": 0.4868, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.05000131582410063, | |
| "grad_norm": 0.5182908177375793, | |
| "learning_rate": 4.752000842459983e-05, | |
| "loss": 0.4034, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.05010658175215137, | |
| "grad_norm": 0.5384114384651184, | |
| "learning_rate": 4.751474304970514e-05, | |
| "loss": 0.5353, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.050211847680202114, | |
| "grad_norm": 0.5357162952423096, | |
| "learning_rate": 4.750947767481045e-05, | |
| "loss": 0.3976, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.05031711360825285, | |
| "grad_norm": 0.45556405186653137, | |
| "learning_rate": 4.750421229991576e-05, | |
| "loss": 0.403, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.05042237953630359, | |
| "grad_norm": 0.5855860710144043, | |
| "learning_rate": 4.7498946925021066e-05, | |
| "loss": 0.3754, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.05052764546435432, | |
| "grad_norm": 0.5920200943946838, | |
| "learning_rate": 4.7493681550126375e-05, | |
| "loss": 0.3944, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.05063291139240506, | |
| "grad_norm": 0.5460993051528931, | |
| "learning_rate": 4.7488416175231684e-05, | |
| "loss": 0.5356, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.0507381773204558, | |
| "grad_norm": 0.5433392524719238, | |
| "learning_rate": 4.7483150800336986e-05, | |
| "loss": 0.4043, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.05084344324850654, | |
| "grad_norm": 0.6986379027366638, | |
| "learning_rate": 4.7477885425442295e-05, | |
| "loss": 0.4374, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.05094870917655728, | |
| "grad_norm": 0.6336686611175537, | |
| "learning_rate": 4.74726200505476e-05, | |
| "loss": 0.4308, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.05105397510460802, | |
| "grad_norm": 0.5509925484657288, | |
| "learning_rate": 4.7467354675652906e-05, | |
| "loss": 0.4101, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.05115924103265875, | |
| "grad_norm": 0.5978362560272217, | |
| "learning_rate": 4.7462089300758215e-05, | |
| "loss": 0.379, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.05126450696070949, | |
| "grad_norm": 0.5480085015296936, | |
| "learning_rate": 4.7456823925863524e-05, | |
| "loss": 0.3327, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.05136977288876023, | |
| "grad_norm": 0.5396241545677185, | |
| "learning_rate": 4.745155855096883e-05, | |
| "loss": 0.4283, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.05147503881681097, | |
| "grad_norm": 0.43143001198768616, | |
| "learning_rate": 4.744629317607414e-05, | |
| "loss": 0.584, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.05158030474486171, | |
| "grad_norm": 0.4590414762496948, | |
| "learning_rate": 4.7441027801179445e-05, | |
| "loss": 0.3794, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.05168557067291245, | |
| "grad_norm": 0.4620942175388336, | |
| "learning_rate": 4.7435762426284754e-05, | |
| "loss": 0.4421, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.05179083660096318, | |
| "grad_norm": 0.5003826022148132, | |
| "learning_rate": 4.743049705139006e-05, | |
| "loss": 0.4408, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.05189610252901392, | |
| "grad_norm": 0.5184903740882874, | |
| "learning_rate": 4.742523167649537e-05, | |
| "loss": 0.4523, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.052001368457064656, | |
| "grad_norm": 0.5750355124473572, | |
| "learning_rate": 4.7419966301600674e-05, | |
| "loss": 0.3512, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.0521066343851154, | |
| "grad_norm": 0.516768217086792, | |
| "learning_rate": 4.741470092670598e-05, | |
| "loss": 0.3583, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.05221190031316614, | |
| "grad_norm": 0.5511295199394226, | |
| "learning_rate": 4.740943555181129e-05, | |
| "loss": 0.4536, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.05231716624121687, | |
| "grad_norm": 0.4026057720184326, | |
| "learning_rate": 4.74041701769166e-05, | |
| "loss": 0.4834, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.05242243216926761, | |
| "grad_norm": 0.6032986044883728, | |
| "learning_rate": 4.73989048020219e-05, | |
| "loss": 0.3901, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.05252769809731835, | |
| "grad_norm": 0.45538461208343506, | |
| "learning_rate": 4.739363942712721e-05, | |
| "loss": 0.4174, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.052632964025369086, | |
| "grad_norm": 0.564687967300415, | |
| "learning_rate": 4.738837405223252e-05, | |
| "loss": 0.4543, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05273822995341983, | |
| "grad_norm": 0.5365861058235168, | |
| "learning_rate": 4.738310867733783e-05, | |
| "loss": 0.3998, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.05284349588147057, | |
| "grad_norm": 0.5887376666069031, | |
| "learning_rate": 4.737784330244314e-05, | |
| "loss": 0.4881, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.0529487618095213, | |
| "grad_norm": 0.5137104392051697, | |
| "learning_rate": 4.737257792754845e-05, | |
| "loss": 0.4158, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.05305402773757204, | |
| "grad_norm": 0.7075323462486267, | |
| "learning_rate": 4.736731255265375e-05, | |
| "loss": 0.4249, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.05315929366562278, | |
| "grad_norm": 0.5085923075675964, | |
| "learning_rate": 4.736204717775906e-05, | |
| "loss": 0.3974, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.053264559593673516, | |
| "grad_norm": 0.4885638654232025, | |
| "learning_rate": 4.735678180286436e-05, | |
| "loss": 0.3569, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.05336982552172426, | |
| "grad_norm": 0.5807955265045166, | |
| "learning_rate": 4.735151642796967e-05, | |
| "loss": 0.3868, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.053475091449775, | |
| "grad_norm": 0.4715438485145569, | |
| "learning_rate": 4.734625105307498e-05, | |
| "loss": 0.4592, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.05358035737782573, | |
| "grad_norm": 0.4971379041671753, | |
| "learning_rate": 4.734098567818029e-05, | |
| "loss": 0.4449, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.05368562330587647, | |
| "grad_norm": 0.5600916743278503, | |
| "learning_rate": 4.73357203032856e-05, | |
| "loss": 0.3905, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.053790889233927205, | |
| "grad_norm": 0.5462086200714111, | |
| "learning_rate": 4.733045492839091e-05, | |
| "loss": 0.3757, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.053896155161977946, | |
| "grad_norm": 0.4880779981613159, | |
| "learning_rate": 4.7325189553496216e-05, | |
| "loss": 0.4084, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.05400142109002869, | |
| "grad_norm": 0.5553451180458069, | |
| "learning_rate": 4.731992417860152e-05, | |
| "loss": 0.4088, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.05410668701807942, | |
| "grad_norm": 0.4913026690483093, | |
| "learning_rate": 4.731465880370683e-05, | |
| "loss": 0.4286, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.05421195294613016, | |
| "grad_norm": 0.43161246180534363, | |
| "learning_rate": 4.730939342881213e-05, | |
| "loss": 0.413, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.0543172188741809, | |
| "grad_norm": 0.5062459707260132, | |
| "learning_rate": 4.730412805391744e-05, | |
| "loss": 0.4713, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.054422484802231635, | |
| "grad_norm": 0.4592074751853943, | |
| "learning_rate": 4.729886267902275e-05, | |
| "loss": 0.4902, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.054527750730282376, | |
| "grad_norm": 0.49476075172424316, | |
| "learning_rate": 4.7293597304128056e-05, | |
| "loss": 0.4016, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.05463301665833312, | |
| "grad_norm": 0.4191977381706238, | |
| "learning_rate": 4.7288331929233365e-05, | |
| "loss": 0.4672, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.05473828258638385, | |
| "grad_norm": 0.5030830502510071, | |
| "learning_rate": 4.7283066554338674e-05, | |
| "loss": 0.4905, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.05484354851443459, | |
| "grad_norm": 0.4686654210090637, | |
| "learning_rate": 4.727780117944398e-05, | |
| "loss": 0.4441, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.05494881444248533, | |
| "grad_norm": 0.46608471870422363, | |
| "learning_rate": 4.7272535804549286e-05, | |
| "loss": 0.3742, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.055054080370536065, | |
| "grad_norm": 0.5822672247886658, | |
| "learning_rate": 4.7267270429654595e-05, | |
| "loss": 0.4266, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.055159346298586806, | |
| "grad_norm": 0.4522544741630554, | |
| "learning_rate": 4.7262005054759904e-05, | |
| "loss": 0.4532, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.05526461222663754, | |
| "grad_norm": 0.47990643978118896, | |
| "learning_rate": 4.7256739679865206e-05, | |
| "loss": 0.3853, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.05536987815468828, | |
| "grad_norm": 0.5252960920333862, | |
| "learning_rate": 4.7251474304970515e-05, | |
| "loss": 0.4716, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.05547514408273902, | |
| "grad_norm": 0.45028603076934814, | |
| "learning_rate": 4.7246208930075824e-05, | |
| "loss": 0.4579, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.055580410010789755, | |
| "grad_norm": 0.5253304243087769, | |
| "learning_rate": 4.724094355518113e-05, | |
| "loss": 0.4433, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.055685675938840495, | |
| "grad_norm": 0.48800671100616455, | |
| "learning_rate": 4.723567818028644e-05, | |
| "loss": 0.4228, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.055790941866891236, | |
| "grad_norm": 0.5435435771942139, | |
| "learning_rate": 4.7230412805391744e-05, | |
| "loss": 0.4181, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.05589620779494197, | |
| "grad_norm": 0.5906736254692078, | |
| "learning_rate": 4.722514743049705e-05, | |
| "loss": 0.4003, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.05600147372299271, | |
| "grad_norm": 0.49869149923324585, | |
| "learning_rate": 4.721988205560236e-05, | |
| "loss": 0.4781, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.05610673965104345, | |
| "grad_norm": 0.4748145341873169, | |
| "learning_rate": 4.721461668070767e-05, | |
| "loss": 0.4291, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.056212005579094185, | |
| "grad_norm": 0.471021831035614, | |
| "learning_rate": 4.7209351305812973e-05, | |
| "loss": 0.4683, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.056317271507144925, | |
| "grad_norm": 0.6247691512107849, | |
| "learning_rate": 4.720408593091828e-05, | |
| "loss": 0.3932, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.056422537435195666, | |
| "grad_norm": 0.6917199492454529, | |
| "learning_rate": 4.719882055602359e-05, | |
| "loss": 0.597, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.0565278033632464, | |
| "grad_norm": 0.607105553150177, | |
| "learning_rate": 4.71935551811289e-05, | |
| "loss": 0.5024, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.05663306929129714, | |
| "grad_norm": 0.6015260815620422, | |
| "learning_rate": 4.71882898062342e-05, | |
| "loss": 0.4569, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.05673833521934788, | |
| "grad_norm": 0.6226845979690552, | |
| "learning_rate": 4.718302443133951e-05, | |
| "loss": 0.4134, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.056843601147398615, | |
| "grad_norm": 0.46711722016334534, | |
| "learning_rate": 4.717775905644482e-05, | |
| "loss": 0.4957, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.056948867075449355, | |
| "grad_norm": 0.4069374203681946, | |
| "learning_rate": 4.717249368155013e-05, | |
| "loss": 0.4173, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.05705413300350009, | |
| "grad_norm": 0.47599026560783386, | |
| "learning_rate": 4.716722830665544e-05, | |
| "loss": 0.4865, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.05715939893155083, | |
| "grad_norm": 0.46828117966651917, | |
| "learning_rate": 4.716196293176075e-05, | |
| "loss": 0.4763, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.05726466485960157, | |
| "grad_norm": 0.3772525191307068, | |
| "learning_rate": 4.715669755686605e-05, | |
| "loss": 0.4225, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.057369930787652304, | |
| "grad_norm": 0.44674021005630493, | |
| "learning_rate": 4.715143218197136e-05, | |
| "loss": 0.5063, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.057475196715703045, | |
| "grad_norm": 0.5613642334938049, | |
| "learning_rate": 4.714616680707666e-05, | |
| "loss": 0.5388, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.057580462643753785, | |
| "grad_norm": 0.5140121579170227, | |
| "learning_rate": 4.714090143218197e-05, | |
| "loss": 0.4481, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.05768572857180452, | |
| "grad_norm": 0.4728577435016632, | |
| "learning_rate": 4.713563605728728e-05, | |
| "loss": 0.3896, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.05779099449985526, | |
| "grad_norm": 0.4167439639568329, | |
| "learning_rate": 4.713037068239259e-05, | |
| "loss": 0.3863, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.057896260427906, | |
| "grad_norm": 0.5620428919792175, | |
| "learning_rate": 4.71251053074979e-05, | |
| "loss": 0.4342, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.058001526355956734, | |
| "grad_norm": 0.424396812915802, | |
| "learning_rate": 4.7119839932603206e-05, | |
| "loss": 0.5043, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.058106792284007475, | |
| "grad_norm": 0.4943045675754547, | |
| "learning_rate": 4.7114574557708515e-05, | |
| "loss": 0.3649, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.058212058212058215, | |
| "grad_norm": 0.5179657340049744, | |
| "learning_rate": 4.7109309182813824e-05, | |
| "loss": 0.3986, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.05831732414010895, | |
| "grad_norm": 0.46122902631759644, | |
| "learning_rate": 4.710404380791913e-05, | |
| "loss": 0.4501, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.05842259006815969, | |
| "grad_norm": 0.5129498243331909, | |
| "learning_rate": 4.709877843302443e-05, | |
| "loss": 0.4105, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.05852785599621042, | |
| "grad_norm": 0.5061764121055603, | |
| "learning_rate": 4.709351305812974e-05, | |
| "loss": 0.3993, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.058633121924261164, | |
| "grad_norm": 0.5676811933517456, | |
| "learning_rate": 4.708824768323505e-05, | |
| "loss": 0.3786, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.058738387852311905, | |
| "grad_norm": 0.5383573174476624, | |
| "learning_rate": 4.7082982308340356e-05, | |
| "loss": 0.4541, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.05884365378036264, | |
| "grad_norm": 0.6130087375640869, | |
| "learning_rate": 4.7077716933445665e-05, | |
| "loss": 0.4215, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.05894891970841338, | |
| "grad_norm": 0.6504372954368591, | |
| "learning_rate": 4.7072451558550974e-05, | |
| "loss": 0.3891, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.05905418563646412, | |
| "grad_norm": 0.5079691410064697, | |
| "learning_rate": 4.706718618365628e-05, | |
| "loss": 0.4668, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.05915945156451485, | |
| "grad_norm": 0.528856635093689, | |
| "learning_rate": 4.7061920808761585e-05, | |
| "loss": 0.3965, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.059264717492565594, | |
| "grad_norm": 0.44504040479660034, | |
| "learning_rate": 4.7056655433866894e-05, | |
| "loss": 0.5032, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.059369983420616335, | |
| "grad_norm": 0.5209716558456421, | |
| "learning_rate": 4.70513900589722e-05, | |
| "loss": 0.4837, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.05947524934866707, | |
| "grad_norm": 0.48046526312828064, | |
| "learning_rate": 4.7046124684077505e-05, | |
| "loss": 0.3989, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.05958051527671781, | |
| "grad_norm": 0.5712192058563232, | |
| "learning_rate": 4.7040859309182814e-05, | |
| "loss": 0.4788, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.05968578120476855, | |
| "grad_norm": 0.6029406785964966, | |
| "learning_rate": 4.7035593934288123e-05, | |
| "loss": 0.3974, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.05979104713281928, | |
| "grad_norm": 0.5272865295410156, | |
| "learning_rate": 4.703032855939343e-05, | |
| "loss": 0.4562, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.059896313060870024, | |
| "grad_norm": 0.5821331143379211, | |
| "learning_rate": 4.702506318449874e-05, | |
| "loss": 0.3848, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.06000157898892076, | |
| "grad_norm": 0.45264291763305664, | |
| "learning_rate": 4.7019797809604044e-05, | |
| "loss": 0.491, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0601068449169715, | |
| "grad_norm": 0.5712417364120483, | |
| "learning_rate": 4.701453243470935e-05, | |
| "loss": 0.4128, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.06021211084502224, | |
| "grad_norm": 0.5191047787666321, | |
| "learning_rate": 4.700926705981466e-05, | |
| "loss": 0.4552, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.06031737677307297, | |
| "grad_norm": 0.4191204607486725, | |
| "learning_rate": 4.700400168491997e-05, | |
| "loss": 0.4669, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.06042264270112371, | |
| "grad_norm": 0.508425235748291, | |
| "learning_rate": 4.699873631002528e-05, | |
| "loss": 0.4031, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.060527908629174454, | |
| "grad_norm": 0.47075721621513367, | |
| "learning_rate": 4.699347093513058e-05, | |
| "loss": 0.4773, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.06063317455722519, | |
| "grad_norm": 0.5133448839187622, | |
| "learning_rate": 4.698820556023589e-05, | |
| "loss": 0.3865, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.06073844048527593, | |
| "grad_norm": 0.5425415635108948, | |
| "learning_rate": 4.69829401853412e-05, | |
| "loss": 0.4117, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.06084370641332667, | |
| "grad_norm": 0.61476731300354, | |
| "learning_rate": 4.69776748104465e-05, | |
| "loss": 0.4307, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.0609489723413774, | |
| "grad_norm": 0.553023099899292, | |
| "learning_rate": 4.697240943555181e-05, | |
| "loss": 0.3579, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.06105423826942814, | |
| "grad_norm": 0.4436430037021637, | |
| "learning_rate": 4.696714406065712e-05, | |
| "loss": 0.4099, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.061159504197478884, | |
| "grad_norm": 0.5598846673965454, | |
| "learning_rate": 4.696187868576243e-05, | |
| "loss": 0.3615, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.06126477012552962, | |
| "grad_norm": 0.6036468744277954, | |
| "learning_rate": 4.695661331086774e-05, | |
| "loss": 0.4438, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.06137003605358036, | |
| "grad_norm": 0.6011479496955872, | |
| "learning_rate": 4.695134793597305e-05, | |
| "loss": 0.4288, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.0614753019816311, | |
| "grad_norm": 0.5292397141456604, | |
| "learning_rate": 4.694608256107835e-05, | |
| "loss": 0.4086, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.06158056790968183, | |
| "grad_norm": 0.5526982545852661, | |
| "learning_rate": 4.694081718618366e-05, | |
| "loss": 0.3941, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.06168583383773257, | |
| "grad_norm": 0.5088376402854919, | |
| "learning_rate": 4.693555181128896e-05, | |
| "loss": 0.4356, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.06179109976578331, | |
| "grad_norm": 0.5751054286956787, | |
| "learning_rate": 4.693028643639427e-05, | |
| "loss": 0.4629, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.06189636569383405, | |
| "grad_norm": 0.47562679648399353, | |
| "learning_rate": 4.692502106149958e-05, | |
| "loss": 0.4875, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.06200163162188479, | |
| "grad_norm": 0.406876802444458, | |
| "learning_rate": 4.691975568660489e-05, | |
| "loss": 0.5099, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.06210689754993552, | |
| "grad_norm": 0.43212106823921204, | |
| "learning_rate": 4.69144903117102e-05, | |
| "loss": 0.5271, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.06221216347798626, | |
| "grad_norm": 0.5265733003616333, | |
| "learning_rate": 4.6909224936815506e-05, | |
| "loss": 0.4456, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.062317429406037, | |
| "grad_norm": 0.37871816754341125, | |
| "learning_rate": 4.6903959561920815e-05, | |
| "loss": 0.3964, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.06242269533408774, | |
| "grad_norm": 0.443781316280365, | |
| "learning_rate": 4.6898694187026124e-05, | |
| "loss": 0.4575, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.06252796126213847, | |
| "grad_norm": 0.5184212923049927, | |
| "learning_rate": 4.6893428812131426e-05, | |
| "loss": 0.4791, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.06263322719018921, | |
| "grad_norm": 0.4982917308807373, | |
| "learning_rate": 4.688816343723673e-05, | |
| "loss": 0.4104, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.06273849311823995, | |
| "grad_norm": 0.43113309144973755, | |
| "learning_rate": 4.688289806234204e-05, | |
| "loss": 0.4384, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.06284375904629069, | |
| "grad_norm": 0.5594951510429382, | |
| "learning_rate": 4.6877632687447346e-05, | |
| "loss": 0.4428, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.06294902497434143, | |
| "grad_norm": 0.408655047416687, | |
| "learning_rate": 4.6872367312552655e-05, | |
| "loss": 0.4328, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.06305429090239217, | |
| "grad_norm": 0.41858869791030884, | |
| "learning_rate": 4.6867101937657964e-05, | |
| "loss": 0.4822, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.0631595568304429, | |
| "grad_norm": 0.5304632186889648, | |
| "learning_rate": 4.6861836562763274e-05, | |
| "loss": 0.4376, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06326482275849364, | |
| "grad_norm": 0.4693495035171509, | |
| "learning_rate": 4.685657118786858e-05, | |
| "loss": 0.3905, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.06337008868654438, | |
| "grad_norm": 0.5536295771598816, | |
| "learning_rate": 4.6851305812973885e-05, | |
| "loss": 0.4378, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.06347535461459512, | |
| "grad_norm": 0.4618769884109497, | |
| "learning_rate": 4.6846040438079194e-05, | |
| "loss": 0.4642, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.06358062054264586, | |
| "grad_norm": 0.463776171207428, | |
| "learning_rate": 4.68407750631845e-05, | |
| "loss": 0.4518, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.0636858864706966, | |
| "grad_norm": 0.5297257900238037, | |
| "learning_rate": 4.6835509688289805e-05, | |
| "loss": 0.3222, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.06379115239874733, | |
| "grad_norm": 0.47493240237236023, | |
| "learning_rate": 4.6830244313395114e-05, | |
| "loss": 0.4, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.06389641832679807, | |
| "grad_norm": 0.6347471475601196, | |
| "learning_rate": 4.682497893850042e-05, | |
| "loss": 0.4315, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.06400168425484881, | |
| "grad_norm": 0.5118055939674377, | |
| "learning_rate": 4.681971356360573e-05, | |
| "loss": 0.4136, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.06410695018289955, | |
| "grad_norm": 0.5062241554260254, | |
| "learning_rate": 4.681444818871104e-05, | |
| "loss": 0.51, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.0642122161109503, | |
| "grad_norm": 0.45359355211257935, | |
| "learning_rate": 4.680918281381634e-05, | |
| "loss": 0.3897, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.06431748203900102, | |
| "grad_norm": 0.4978649914264679, | |
| "learning_rate": 4.680391743892165e-05, | |
| "loss": 0.4234, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.06442274796705176, | |
| "grad_norm": 0.5025052428245544, | |
| "learning_rate": 4.679865206402696e-05, | |
| "loss": 0.4344, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.0645280138951025, | |
| "grad_norm": 0.4677049517631531, | |
| "learning_rate": 4.679338668913227e-05, | |
| "loss": 0.3997, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.06463327982315324, | |
| "grad_norm": 0.38490286469459534, | |
| "learning_rate": 4.678812131423758e-05, | |
| "loss": 0.4778, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.06473854575120398, | |
| "grad_norm": 0.4486238956451416, | |
| "learning_rate": 4.678285593934288e-05, | |
| "loss": 0.4002, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.06484381167925472, | |
| "grad_norm": 0.48641228675842285, | |
| "learning_rate": 4.677759056444819e-05, | |
| "loss": 0.4302, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.06494907760730545, | |
| "grad_norm": 0.5490376353263855, | |
| "learning_rate": 4.67723251895535e-05, | |
| "loss": 0.4203, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.06505434353535619, | |
| "grad_norm": 0.4899100363254547, | |
| "learning_rate": 4.67670598146588e-05, | |
| "loss": 0.399, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.06515960946340693, | |
| "grad_norm": 0.7570556998252869, | |
| "learning_rate": 4.676179443976411e-05, | |
| "loss": 0.4409, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.06526487539145767, | |
| "grad_norm": 0.5624217391014099, | |
| "learning_rate": 4.675652906486942e-05, | |
| "loss": 0.3867, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.06537014131950841, | |
| "grad_norm": 0.47434237599372864, | |
| "learning_rate": 4.675126368997473e-05, | |
| "loss": 0.3962, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.06547540724755915, | |
| "grad_norm": 0.5388314723968506, | |
| "learning_rate": 4.674599831508004e-05, | |
| "loss": 0.3872, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.06558067317560988, | |
| "grad_norm": 0.49027901887893677, | |
| "learning_rate": 4.674073294018535e-05, | |
| "loss": 0.4786, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.06568593910366062, | |
| "grad_norm": 0.4333001673221588, | |
| "learning_rate": 4.6735467565290656e-05, | |
| "loss": 0.4245, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.06579120503171136, | |
| "grad_norm": 0.4188300669193268, | |
| "learning_rate": 4.673020219039596e-05, | |
| "loss": 0.4713, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.0658964709597621, | |
| "grad_norm": 0.48492878675460815, | |
| "learning_rate": 4.672493681550126e-05, | |
| "loss": 0.4896, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.06600173688781284, | |
| "grad_norm": 0.5120576024055481, | |
| "learning_rate": 4.671967144060657e-05, | |
| "loss": 0.4209, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.06610700281586357, | |
| "grad_norm": 0.5438317060470581, | |
| "learning_rate": 4.671440606571188e-05, | |
| "loss": 0.4494, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.06621226874391431, | |
| "grad_norm": 0.5266952514648438, | |
| "learning_rate": 4.670914069081719e-05, | |
| "loss": 0.5609, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.06631753467196505, | |
| "grad_norm": 0.6691259741783142, | |
| "learning_rate": 4.6703875315922496e-05, | |
| "loss": 0.405, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.06642280060001579, | |
| "grad_norm": 0.6721771955490112, | |
| "learning_rate": 4.6698609941027806e-05, | |
| "loss": 0.537, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.06652806652806653, | |
| "grad_norm": 0.6021822690963745, | |
| "learning_rate": 4.6693344566133115e-05, | |
| "loss": 0.4862, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.06663333245611727, | |
| "grad_norm": 0.42799803614616394, | |
| "learning_rate": 4.6688079191238424e-05, | |
| "loss": 0.4316, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.066738598384168, | |
| "grad_norm": 0.3875657320022583, | |
| "learning_rate": 4.6682813816343726e-05, | |
| "loss": 0.4557, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.06684386431221874, | |
| "grad_norm": 0.4300662577152252, | |
| "learning_rate": 4.6677548441449035e-05, | |
| "loss": 0.5253, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.06694913024026948, | |
| "grad_norm": 0.4926076829433441, | |
| "learning_rate": 4.667228306655434e-05, | |
| "loss": 0.5151, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.06705439616832022, | |
| "grad_norm": 0.457466185092926, | |
| "learning_rate": 4.6667017691659646e-05, | |
| "loss": 0.4296, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.06715966209637096, | |
| "grad_norm": 0.5367447137832642, | |
| "learning_rate": 4.6661752316764955e-05, | |
| "loss": 0.43, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.06726492802442169, | |
| "grad_norm": 0.5215645432472229, | |
| "learning_rate": 4.6656486941870264e-05, | |
| "loss": 0.4355, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.06737019395247243, | |
| "grad_norm": 0.5821287035942078, | |
| "learning_rate": 4.665122156697557e-05, | |
| "loss": 0.3576, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.06747545988052317, | |
| "grad_norm": 0.5504344701766968, | |
| "learning_rate": 4.664595619208088e-05, | |
| "loss": 0.4843, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.06758072580857391, | |
| "grad_norm": 0.4482622742652893, | |
| "learning_rate": 4.6640690817186184e-05, | |
| "loss": 0.4474, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.06768599173662465, | |
| "grad_norm": 0.5162287950515747, | |
| "learning_rate": 4.663542544229149e-05, | |
| "loss": 0.5323, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.06779125766467539, | |
| "grad_norm": 0.5771566033363342, | |
| "learning_rate": 4.66301600673968e-05, | |
| "loss": 0.3508, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.06789652359272612, | |
| "grad_norm": 0.473014235496521, | |
| "learning_rate": 4.6624894692502105e-05, | |
| "loss": 0.3959, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.06800178952077686, | |
| "grad_norm": 0.4953562915325165, | |
| "learning_rate": 4.6619629317607414e-05, | |
| "loss": 0.4301, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.0681070554488276, | |
| "grad_norm": 0.519964337348938, | |
| "learning_rate": 4.661436394271272e-05, | |
| "loss": 0.4395, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.06821232137687834, | |
| "grad_norm": 0.5988878607749939, | |
| "learning_rate": 4.660909856781803e-05, | |
| "loss": 0.4151, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.06831758730492908, | |
| "grad_norm": 0.5311563014984131, | |
| "learning_rate": 4.660383319292334e-05, | |
| "loss": 0.431, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.06842285323297982, | |
| "grad_norm": 0.48196783661842346, | |
| "learning_rate": 4.659856781802864e-05, | |
| "loss": 0.4645, | |
| "step": 650 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 9499, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.570341741428736e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |