| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3157721616139466, | |
| "eval_steps": 500, | |
| "global_step": 720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00043857244668603693, | |
| "grad_norm": 1.5650805234909058, | |
| "learning_rate": 1.4492753623188406e-08, | |
| "loss": 0.4933, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0008771448933720739, | |
| "grad_norm": 1.2313175201416016, | |
| "learning_rate": 2.898550724637681e-08, | |
| "loss": 0.4744, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.001315717340058111, | |
| "grad_norm": 1.4933998584747314, | |
| "learning_rate": 4.347826086956521e-08, | |
| "loss": 0.4723, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0017542897867441477, | |
| "grad_norm": 1.4024336338043213, | |
| "learning_rate": 5.797101449275362e-08, | |
| "loss": 0.4569, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0021928622334301848, | |
| "grad_norm": 1.2763197422027588, | |
| "learning_rate": 7.246376811594203e-08, | |
| "loss": 0.4702, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002631434680116222, | |
| "grad_norm": 1.3957324028015137, | |
| "learning_rate": 8.695652173913042e-08, | |
| "loss": 0.4788, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.003070007126802259, | |
| "grad_norm": 1.3984495401382446, | |
| "learning_rate": 1.0144927536231885e-07, | |
| "loss": 0.4864, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0035085795734882954, | |
| "grad_norm": 1.321620225906372, | |
| "learning_rate": 1.1594202898550725e-07, | |
| "loss": 0.4611, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0039471520201743325, | |
| "grad_norm": 1.2592936754226685, | |
| "learning_rate": 1.3043478260869563e-07, | |
| "loss": 0.4508, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0043857244668603695, | |
| "grad_norm": 1.4119383096694946, | |
| "learning_rate": 1.4492753623188405e-07, | |
| "loss": 0.5231, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0048242969135464065, | |
| "grad_norm": 1.343310832977295, | |
| "learning_rate": 1.5942028985507245e-07, | |
| "loss": 0.4483, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.005262869360232444, | |
| "grad_norm": 1.522481918334961, | |
| "learning_rate": 1.7391304347826085e-07, | |
| "loss": 0.4727, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005701441806918481, | |
| "grad_norm": 1.2751290798187256, | |
| "learning_rate": 1.8840579710144927e-07, | |
| "loss": 0.4973, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006140014253604518, | |
| "grad_norm": 1.29275643825531, | |
| "learning_rate": 2.028985507246377e-07, | |
| "loss": 0.4627, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006578586700290554, | |
| "grad_norm": 1.2619212865829468, | |
| "learning_rate": 2.1739130434782607e-07, | |
| "loss": 0.4771, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.007017159146976591, | |
| "grad_norm": 1.4083727598190308, | |
| "learning_rate": 2.318840579710145e-07, | |
| "loss": 0.4892, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.007455731593662628, | |
| "grad_norm": 1.468392252922058, | |
| "learning_rate": 2.463768115942029e-07, | |
| "loss": 0.4481, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.007894304040348665, | |
| "grad_norm": 1.4284064769744873, | |
| "learning_rate": 2.6086956521739126e-07, | |
| "loss": 0.454, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.008332876487034702, | |
| "grad_norm": 1.2147692441940308, | |
| "learning_rate": 2.753623188405797e-07, | |
| "loss": 0.4357, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.008771448933720739, | |
| "grad_norm": 1.2484781742095947, | |
| "learning_rate": 2.898550724637681e-07, | |
| "loss": 0.47, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009210021380406776, | |
| "grad_norm": 1.3065662384033203, | |
| "learning_rate": 3.043478260869565e-07, | |
| "loss": 0.515, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.009648593827092813, | |
| "grad_norm": 1.2731220722198486, | |
| "learning_rate": 3.188405797101449e-07, | |
| "loss": 0.4567, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01008716627377885, | |
| "grad_norm": 1.2372796535491943, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 0.4886, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.010525738720464887, | |
| "grad_norm": 1.2341394424438477, | |
| "learning_rate": 3.478260869565217e-07, | |
| "loss": 0.4618, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.010964311167150924, | |
| "grad_norm": 1.2708252668380737, | |
| "learning_rate": 3.6231884057971015e-07, | |
| "loss": 0.4695, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.011402883613836961, | |
| "grad_norm": 1.326284646987915, | |
| "learning_rate": 3.7681159420289855e-07, | |
| "loss": 0.4807, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.011841456060522998, | |
| "grad_norm": 1.267345666885376, | |
| "learning_rate": 3.9130434782608694e-07, | |
| "loss": 0.4729, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.012280028507209035, | |
| "grad_norm": 1.302316665649414, | |
| "learning_rate": 4.057971014492754e-07, | |
| "loss": 0.4427, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.012718600953895072, | |
| "grad_norm": 1.2135436534881592, | |
| "learning_rate": 4.2028985507246374e-07, | |
| "loss": 0.4371, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.013157173400581108, | |
| "grad_norm": 1.162211298942566, | |
| "learning_rate": 4.3478260869565214e-07, | |
| "loss": 0.4611, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.013595745847267145, | |
| "grad_norm": 1.305415391921997, | |
| "learning_rate": 4.4927536231884053e-07, | |
| "loss": 0.4789, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.014034318293953182, | |
| "grad_norm": 1.545601725578308, | |
| "learning_rate": 4.63768115942029e-07, | |
| "loss": 0.5328, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.014472890740639219, | |
| "grad_norm": 1.4781523942947388, | |
| "learning_rate": 4.782608695652174e-07, | |
| "loss": 0.4827, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.014911463187325256, | |
| "grad_norm": 1.2530491352081299, | |
| "learning_rate": 4.927536231884058e-07, | |
| "loss": 0.4836, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.015350035634011293, | |
| "grad_norm": 1.2580320835113525, | |
| "learning_rate": 5.072463768115942e-07, | |
| "loss": 0.4536, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01578860808069733, | |
| "grad_norm": 1.2908357381820679, | |
| "learning_rate": 5.217391304347825e-07, | |
| "loss": 0.4578, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.016227180527383367, | |
| "grad_norm": 1.2363778352737427, | |
| "learning_rate": 5.36231884057971e-07, | |
| "loss": 0.4768, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.016665752974069404, | |
| "grad_norm": 1.395221471786499, | |
| "learning_rate": 5.507246376811594e-07, | |
| "loss": 0.4261, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01710432542075544, | |
| "grad_norm": 1.5333712100982666, | |
| "learning_rate": 5.652173913043477e-07, | |
| "loss": 0.5309, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.017542897867441478, | |
| "grad_norm": 1.4437744617462158, | |
| "learning_rate": 5.797101449275362e-07, | |
| "loss": 0.5116, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.017981470314127515, | |
| "grad_norm": 1.3201457262039185, | |
| "learning_rate": 5.942028985507246e-07, | |
| "loss": 0.4506, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.018420042760813552, | |
| "grad_norm": 1.2283574342727661, | |
| "learning_rate": 6.08695652173913e-07, | |
| "loss": 0.5068, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01885861520749959, | |
| "grad_norm": 1.355634093284607, | |
| "learning_rate": 6.231884057971014e-07, | |
| "loss": 0.5099, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.019297187654185626, | |
| "grad_norm": 1.284976601600647, | |
| "learning_rate": 6.376811594202898e-07, | |
| "loss": 0.4596, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.019735760100871663, | |
| "grad_norm": 1.3104331493377686, | |
| "learning_rate": 6.521739130434782e-07, | |
| "loss": 0.4222, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0201743325475577, | |
| "grad_norm": 1.2699097394943237, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.4565, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.020612904994243737, | |
| "grad_norm": 1.401367425918579, | |
| "learning_rate": 6.811594202898551e-07, | |
| "loss": 0.4207, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.021051477440929774, | |
| "grad_norm": 1.2467551231384277, | |
| "learning_rate": 6.956521739130434e-07, | |
| "loss": 0.448, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.02149004988761581, | |
| "grad_norm": 1.304906964302063, | |
| "learning_rate": 7.101449275362319e-07, | |
| "loss": 0.451, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.02192862233430185, | |
| "grad_norm": 1.21262526512146, | |
| "learning_rate": 7.246376811594203e-07, | |
| "loss": 0.4607, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.022367194780987885, | |
| "grad_norm": 1.1871509552001953, | |
| "learning_rate": 7.391304347826086e-07, | |
| "loss": 0.4355, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.022805767227673922, | |
| "grad_norm": 1.2784358263015747, | |
| "learning_rate": 7.536231884057971e-07, | |
| "loss": 0.4316, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.02324433967435996, | |
| "grad_norm": 1.2877992391586304, | |
| "learning_rate": 7.681159420289855e-07, | |
| "loss": 0.4737, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.023682912121045997, | |
| "grad_norm": 1.315425157546997, | |
| "learning_rate": 7.826086956521739e-07, | |
| "loss": 0.4885, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.024121484567732034, | |
| "grad_norm": 1.2742252349853516, | |
| "learning_rate": 7.971014492753623e-07, | |
| "loss": 0.4956, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02456005701441807, | |
| "grad_norm": 1.2137497663497925, | |
| "learning_rate": 8.115942028985508e-07, | |
| "loss": 0.3931, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.024998629461104108, | |
| "grad_norm": 1.2331868410110474, | |
| "learning_rate": 8.260869565217391e-07, | |
| "loss": 0.463, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.025437201907790145, | |
| "grad_norm": 1.3358384370803833, | |
| "learning_rate": 8.405797101449275e-07, | |
| "loss": 0.4744, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.025875774354476182, | |
| "grad_norm": 1.3751511573791504, | |
| "learning_rate": 8.550724637681159e-07, | |
| "loss": 0.4179, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.026314346801162215, | |
| "grad_norm": 1.3184670209884644, | |
| "learning_rate": 8.695652173913043e-07, | |
| "loss": 0.4813, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.026752919247848252, | |
| "grad_norm": 1.2712794542312622, | |
| "learning_rate": 8.840579710144928e-07, | |
| "loss": 0.4931, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.02719149169453429, | |
| "grad_norm": 1.3398113250732422, | |
| "learning_rate": 8.985507246376811e-07, | |
| "loss": 0.54, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.027630064141220326, | |
| "grad_norm": 1.2505016326904297, | |
| "learning_rate": 9.130434782608695e-07, | |
| "loss": 0.4464, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.028068636587906363, | |
| "grad_norm": 1.1980383396148682, | |
| "learning_rate": 9.27536231884058e-07, | |
| "loss": 0.4118, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0285072090345924, | |
| "grad_norm": 1.27497136592865, | |
| "learning_rate": 9.420289855072463e-07, | |
| "loss": 0.4491, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.028945781481278438, | |
| "grad_norm": 1.2081972360610962, | |
| "learning_rate": 9.565217391304349e-07, | |
| "loss": 0.3582, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.029384353927964475, | |
| "grad_norm": 1.2842012643814087, | |
| "learning_rate": 9.710144927536232e-07, | |
| "loss": 0.4584, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02982292637465051, | |
| "grad_norm": 1.2653955221176147, | |
| "learning_rate": 9.855072463768117e-07, | |
| "loss": 0.463, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03026149882133655, | |
| "grad_norm": 1.190528392791748, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4628, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.030700071268022586, | |
| "grad_norm": 1.1633963584899902, | |
| "learning_rate": 9.999994952664242e-07, | |
| "loss": 0.4204, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.031138643714708623, | |
| "grad_norm": 1.4251805543899536, | |
| "learning_rate": 9.999979810667154e-07, | |
| "loss": 0.492, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.03157721616139466, | |
| "grad_norm": 1.4169254302978516, | |
| "learning_rate": 9.99995457403931e-07, | |
| "loss": 0.4918, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0320157886080807, | |
| "grad_norm": 1.2996279001235962, | |
| "learning_rate": 9.999919242831662e-07, | |
| "loss": 0.4389, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.032454361054766734, | |
| "grad_norm": 1.1896483898162842, | |
| "learning_rate": 9.999873817115539e-07, | |
| "loss": 0.4169, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.032892933501452774, | |
| "grad_norm": 1.3035953044891357, | |
| "learning_rate": 9.999818296982652e-07, | |
| "loss": 0.4625, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.03333150594813881, | |
| "grad_norm": 1.2109572887420654, | |
| "learning_rate": 9.999752682545095e-07, | |
| "loss": 0.4225, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03377007839482485, | |
| "grad_norm": 1.1753894090652466, | |
| "learning_rate": 9.999676973935336e-07, | |
| "loss": 0.4265, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03420865084151088, | |
| "grad_norm": 1.2878810167312622, | |
| "learning_rate": 9.99959117130623e-07, | |
| "loss": 0.4736, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.03464722328819692, | |
| "grad_norm": 1.29270601272583, | |
| "learning_rate": 9.999495274831003e-07, | |
| "loss": 0.4767, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.035085795734882956, | |
| "grad_norm": 1.1873911619186401, | |
| "learning_rate": 9.999389284703264e-07, | |
| "loss": 0.4282, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03552436818156899, | |
| "grad_norm": 1.4078459739685059, | |
| "learning_rate": 9.999273201137004e-07, | |
| "loss": 0.4702, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.03596294062825503, | |
| "grad_norm": 1.209952473640442, | |
| "learning_rate": 9.999147024366583e-07, | |
| "loss": 0.4174, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.036401513074941064, | |
| "grad_norm": 1.1953450441360474, | |
| "learning_rate": 9.999010754646748e-07, | |
| "loss": 0.4217, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.036840085521627104, | |
| "grad_norm": 1.3063303232192993, | |
| "learning_rate": 9.998864392252614e-07, | |
| "loss": 0.4839, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03727865796831314, | |
| "grad_norm": 1.1942335367202759, | |
| "learning_rate": 9.998707937479682e-07, | |
| "loss": 0.4443, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03771723041499918, | |
| "grad_norm": 1.6526542901992798, | |
| "learning_rate": 9.99854139064382e-07, | |
| "loss": 0.4641, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03815580286168521, | |
| "grad_norm": 1.4341564178466797, | |
| "learning_rate": 9.998364752081277e-07, | |
| "loss": 0.4928, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.03859437530837125, | |
| "grad_norm": 1.1596306562423706, | |
| "learning_rate": 9.998178022148676e-07, | |
| "loss": 0.4209, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.039032947755057286, | |
| "grad_norm": 1.2552282810211182, | |
| "learning_rate": 9.997981201223009e-07, | |
| "loss": 0.5077, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.039471520201743326, | |
| "grad_norm": 1.4964027404785156, | |
| "learning_rate": 9.997774289701647e-07, | |
| "loss": 0.4983, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03991009264842936, | |
| "grad_norm": 1.221358060836792, | |
| "learning_rate": 9.99755728800233e-07, | |
| "loss": 0.4285, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.0403486650951154, | |
| "grad_norm": 1.2171251773834229, | |
| "learning_rate": 9.997330196563169e-07, | |
| "loss": 0.4797, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.040787237541801434, | |
| "grad_norm": 1.2266311645507812, | |
| "learning_rate": 9.99709301584265e-07, | |
| "loss": 0.4371, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.041225809988487475, | |
| "grad_norm": 1.4037154912948608, | |
| "learning_rate": 9.99684574631962e-07, | |
| "loss": 0.5075, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.04166438243517351, | |
| "grad_norm": 1.2918537855148315, | |
| "learning_rate": 9.996588388493306e-07, | |
| "loss": 0.4561, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04210295488185955, | |
| "grad_norm": 1.422583818435669, | |
| "learning_rate": 9.996320942883295e-07, | |
| "loss": 0.4975, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.04254152732854558, | |
| "grad_norm": 1.3405108451843262, | |
| "learning_rate": 9.996043410029537e-07, | |
| "loss": 0.4725, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.04298009977523162, | |
| "grad_norm": 1.2558434009552002, | |
| "learning_rate": 9.995755790492359e-07, | |
| "loss": 0.4548, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.043418672221917656, | |
| "grad_norm": 1.3062516450881958, | |
| "learning_rate": 9.99545808485244e-07, | |
| "loss": 0.4432, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0438572446686037, | |
| "grad_norm": 1.277416467666626, | |
| "learning_rate": 9.995150293710838e-07, | |
| "loss": 0.4464, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04429581711528973, | |
| "grad_norm": 1.2594455480575562, | |
| "learning_rate": 9.99483241768895e-07, | |
| "loss": 0.4271, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.04473438956197577, | |
| "grad_norm": 1.4411756992340088, | |
| "learning_rate": 9.994504457428556e-07, | |
| "loss": 0.4759, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.045172962008661804, | |
| "grad_norm": 1.3192076683044434, | |
| "learning_rate": 9.994166413591784e-07, | |
| "loss": 0.4448, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.045611534455347845, | |
| "grad_norm": 1.247989296913147, | |
| "learning_rate": 9.993818286861122e-07, | |
| "loss": 0.491, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.04605010690203388, | |
| "grad_norm": 1.2985870838165283, | |
| "learning_rate": 9.993460077939414e-07, | |
| "loss": 0.4688, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04648867934871992, | |
| "grad_norm": 1.3416420221328735, | |
| "learning_rate": 9.993091787549862e-07, | |
| "loss": 0.4958, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04692725179540595, | |
| "grad_norm": 1.4006417989730835, | |
| "learning_rate": 9.99271341643602e-07, | |
| "loss": 0.4829, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04736582424209199, | |
| "grad_norm": 1.422197699546814, | |
| "learning_rate": 9.99232496536179e-07, | |
| "loss": 0.4628, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.04780439668877803, | |
| "grad_norm": 1.2422194480895996, | |
| "learning_rate": 9.991926435111437e-07, | |
| "loss": 0.4086, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.04824296913546407, | |
| "grad_norm": 1.185133695602417, | |
| "learning_rate": 9.99151782648956e-07, | |
| "loss": 0.4374, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0486815415821501, | |
| "grad_norm": 1.2822550535202026, | |
| "learning_rate": 9.99109914032112e-07, | |
| "loss": 0.457, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.04912011402883614, | |
| "grad_norm": 1.3902647495269775, | |
| "learning_rate": 9.99067037745141e-07, | |
| "loss": 0.4281, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.049558686475522175, | |
| "grad_norm": 1.244568943977356, | |
| "learning_rate": 9.990231538746078e-07, | |
| "loss": 0.443, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.049997258922208215, | |
| "grad_norm": 1.1944634914398193, | |
| "learning_rate": 9.989782625091113e-07, | |
| "loss": 0.3956, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.05043583136889425, | |
| "grad_norm": 1.330433964729309, | |
| "learning_rate": 9.989323637392834e-07, | |
| "loss": 0.5167, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.05087440381558029, | |
| "grad_norm": 1.2958663702011108, | |
| "learning_rate": 9.988854576577913e-07, | |
| "loss": 0.5158, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.05131297626226632, | |
| "grad_norm": 1.2222250699996948, | |
| "learning_rate": 9.988375443593354e-07, | |
| "loss": 0.4502, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.051751548708952363, | |
| "grad_norm": 1.3030247688293457, | |
| "learning_rate": 9.987886239406491e-07, | |
| "loss": 0.387, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0521901211556384, | |
| "grad_norm": 1.281866431236267, | |
| "learning_rate": 9.987386965004997e-07, | |
| "loss": 0.4687, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.05262869360232443, | |
| "grad_norm": 1.273772120475769, | |
| "learning_rate": 9.986877621396877e-07, | |
| "loss": 0.4064, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05306726604901047, | |
| "grad_norm": 1.3751946687698364, | |
| "learning_rate": 9.986358209610457e-07, | |
| "loss": 0.4919, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.053505838495696505, | |
| "grad_norm": 1.3724075555801392, | |
| "learning_rate": 9.985828730694396e-07, | |
| "loss": 0.4633, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.053944410942382545, | |
| "grad_norm": 1.2193125486373901, | |
| "learning_rate": 9.985289185717683e-07, | |
| "loss": 0.4356, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.05438298338906858, | |
| "grad_norm": 1.424628734588623, | |
| "learning_rate": 9.984739575769617e-07, | |
| "loss": 0.4395, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05482155583575462, | |
| "grad_norm": 1.2202560901641846, | |
| "learning_rate": 9.984179901959828e-07, | |
| "loss": 0.4765, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05526012828244065, | |
| "grad_norm": 1.3114961385726929, | |
| "learning_rate": 9.983610165418259e-07, | |
| "loss": 0.4197, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.05569870072912669, | |
| "grad_norm": 1.2029987573623657, | |
| "learning_rate": 9.983030367295173e-07, | |
| "loss": 0.429, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05613727317581273, | |
| "grad_norm": 1.4868053197860718, | |
| "learning_rate": 9.982440508761143e-07, | |
| "loss": 0.4736, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.05657584562249877, | |
| "grad_norm": 1.212609887123108, | |
| "learning_rate": 9.981840591007051e-07, | |
| "loss": 0.4235, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.0570144180691848, | |
| "grad_norm": 1.3265501260757446, | |
| "learning_rate": 9.981230615244099e-07, | |
| "loss": 0.459, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05745299051587084, | |
| "grad_norm": 1.2474690675735474, | |
| "learning_rate": 9.980610582703782e-07, | |
| "loss": 0.4498, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.057891562962556875, | |
| "grad_norm": 1.1922861337661743, | |
| "learning_rate": 9.979980494637908e-07, | |
| "loss": 0.4227, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.058330135409242916, | |
| "grad_norm": 1.351237416267395, | |
| "learning_rate": 9.979340352318582e-07, | |
| "loss": 0.441, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.05876870785592895, | |
| "grad_norm": 1.1860822439193726, | |
| "learning_rate": 9.978690157038208e-07, | |
| "loss": 0.412, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.05920728030261499, | |
| "grad_norm": 1.3116803169250488, | |
| "learning_rate": 9.97802991010949e-07, | |
| "loss": 0.5344, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.05964585274930102, | |
| "grad_norm": 1.3794771432876587, | |
| "learning_rate": 9.977359612865422e-07, | |
| "loss": 0.4982, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.060084425195987064, | |
| "grad_norm": 1.197117567062378, | |
| "learning_rate": 9.976679266659292e-07, | |
| "loss": 0.4201, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.0605229976426731, | |
| "grad_norm": 1.2389888763427734, | |
| "learning_rate": 9.97598887286467e-07, | |
| "loss": 0.4692, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.06096157008935914, | |
| "grad_norm": 1.3116774559020996, | |
| "learning_rate": 9.975288432875422e-07, | |
| "loss": 0.4854, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.06140014253604517, | |
| "grad_norm": 1.2350678443908691, | |
| "learning_rate": 9.974577948105684e-07, | |
| "loss": 0.4724, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06183871498273121, | |
| "grad_norm": 1.3790651559829712, | |
| "learning_rate": 9.973857419989881e-07, | |
| "loss": 0.5353, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.062277287429417245, | |
| "grad_norm": 1.3146638870239258, | |
| "learning_rate": 9.973126849982713e-07, | |
| "loss": 0.4545, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.06271585987610329, | |
| "grad_norm": 1.4369558095932007, | |
| "learning_rate": 9.972386239559152e-07, | |
| "loss": 0.4897, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.06315443232278932, | |
| "grad_norm": 1.3575363159179688, | |
| "learning_rate": 9.97163559021444e-07, | |
| "loss": 0.4034, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.06359300476947535, | |
| "grad_norm": 1.2711018323898315, | |
| "learning_rate": 9.97087490346409e-07, | |
| "loss": 0.4584, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.0640315772161614, | |
| "grad_norm": 1.2842679023742676, | |
| "learning_rate": 9.970104180843878e-07, | |
| "loss": 0.4689, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.06447014966284743, | |
| "grad_norm": 1.2036633491516113, | |
| "learning_rate": 9.969323423909846e-07, | |
| "loss": 0.417, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06490872210953347, | |
| "grad_norm": 1.199987769126892, | |
| "learning_rate": 9.968532634238287e-07, | |
| "loss": 0.4346, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.0653472945562195, | |
| "grad_norm": 1.280448317527771, | |
| "learning_rate": 9.967731813425752e-07, | |
| "loss": 0.4319, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06578586700290555, | |
| "grad_norm": 1.2478801012039185, | |
| "learning_rate": 9.966920963089051e-07, | |
| "loss": 0.4424, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06622443944959158, | |
| "grad_norm": 1.2198948860168457, | |
| "learning_rate": 9.966100084865232e-07, | |
| "loss": 0.4236, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.06666301189627762, | |
| "grad_norm": 1.28062903881073, | |
| "learning_rate": 9.965269180411598e-07, | |
| "loss": 0.4262, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06710158434296365, | |
| "grad_norm": 1.137345790863037, | |
| "learning_rate": 9.96442825140569e-07, | |
| "loss": 0.4692, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.0675401567896497, | |
| "grad_norm": 1.2888954877853394, | |
| "learning_rate": 9.963577299545286e-07, | |
| "loss": 0.5076, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.06797872923633573, | |
| "grad_norm": 1.2218375205993652, | |
| "learning_rate": 9.962716326548404e-07, | |
| "loss": 0.4651, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.06841730168302176, | |
| "grad_norm": 1.3181045055389404, | |
| "learning_rate": 9.961845334153293e-07, | |
| "loss": 0.4151, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0688558741297078, | |
| "grad_norm": 1.2577183246612549, | |
| "learning_rate": 9.960964324118425e-07, | |
| "loss": 0.4713, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.06929444657639384, | |
| "grad_norm": 1.2748234272003174, | |
| "learning_rate": 9.960073298222508e-07, | |
| "loss": 0.4551, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.06973301902307988, | |
| "grad_norm": 1.2437751293182373, | |
| "learning_rate": 9.959172258264458e-07, | |
| "loss": 0.4388, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.07017159146976591, | |
| "grad_norm": 1.3476532697677612, | |
| "learning_rate": 9.95826120606342e-07, | |
| "loss": 0.5177, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07061016391645195, | |
| "grad_norm": 1.3056862354278564, | |
| "learning_rate": 9.957340143458747e-07, | |
| "loss": 0.4781, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.07104873636313798, | |
| "grad_norm": 1.4270120859146118, | |
| "learning_rate": 9.956409072310004e-07, | |
| "loss": 0.4588, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.07148730880982403, | |
| "grad_norm": 1.3080512285232544, | |
| "learning_rate": 9.95546799449696e-07, | |
| "loss": 0.4265, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.07192588125651006, | |
| "grad_norm": 1.4175631999969482, | |
| "learning_rate": 9.954516911919595e-07, | |
| "loss": 0.4487, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.0723644537031961, | |
| "grad_norm": 1.2619612216949463, | |
| "learning_rate": 9.953555826498077e-07, | |
| "loss": 0.4375, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07280302614988213, | |
| "grad_norm": 1.301414966583252, | |
| "learning_rate": 9.952584740172777e-07, | |
| "loss": 0.4425, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.07324159859656817, | |
| "grad_norm": 1.2964798212051392, | |
| "learning_rate": 9.951603654904254e-07, | |
| "loss": 0.4687, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.07368017104325421, | |
| "grad_norm": 1.2225522994995117, | |
| "learning_rate": 9.950612572673255e-07, | |
| "loss": 0.4119, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.07411874348994024, | |
| "grad_norm": 1.2875328063964844, | |
| "learning_rate": 9.949611495480708e-07, | |
| "loss": 0.4658, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.07455731593662628, | |
| "grad_norm": 1.168454885482788, | |
| "learning_rate": 9.948600425347724e-07, | |
| "loss": 0.409, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07499588838331232, | |
| "grad_norm": 1.3310086727142334, | |
| "learning_rate": 9.947579364315587e-07, | |
| "loss": 0.4657, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.07543446082999836, | |
| "grad_norm": 1.254629135131836, | |
| "learning_rate": 9.946548314445751e-07, | |
| "loss": 0.4406, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.07587303327668439, | |
| "grad_norm": 1.2079718112945557, | |
| "learning_rate": 9.94550727781984e-07, | |
| "loss": 0.4325, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.07631160572337042, | |
| "grad_norm": 1.2881447076797485, | |
| "learning_rate": 9.944456256539636e-07, | |
| "loss": 0.4459, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.07675017817005647, | |
| "grad_norm": 1.4596824645996094, | |
| "learning_rate": 9.943395252727085e-07, | |
| "loss": 0.4727, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0771887506167425, | |
| "grad_norm": 1.2925268411636353, | |
| "learning_rate": 9.94232426852428e-07, | |
| "loss": 0.482, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.07762732306342854, | |
| "grad_norm": 1.2629894018173218, | |
| "learning_rate": 9.94124330609347e-07, | |
| "loss": 0.45, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.07806589551011457, | |
| "grad_norm": 1.238906741142273, | |
| "learning_rate": 9.940152367617049e-07, | |
| "loss": 0.4358, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.07850446795680062, | |
| "grad_norm": 1.33396315574646, | |
| "learning_rate": 9.939051455297548e-07, | |
| "loss": 0.4683, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.07894304040348665, | |
| "grad_norm": 1.4251528978347778, | |
| "learning_rate": 9.937940571357636e-07, | |
| "loss": 0.4883, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07938161285017269, | |
| "grad_norm": 1.2432212829589844, | |
| "learning_rate": 9.936819718040116e-07, | |
| "loss": 0.4295, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.07982018529685872, | |
| "grad_norm": 1.2691503763198853, | |
| "learning_rate": 9.935688897607915e-07, | |
| "loss": 0.49, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.08025875774354477, | |
| "grad_norm": 1.3803023099899292, | |
| "learning_rate": 9.934548112344087e-07, | |
| "loss": 0.4467, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.0806973301902308, | |
| "grad_norm": 1.3414316177368164, | |
| "learning_rate": 9.933397364551805e-07, | |
| "loss": 0.4608, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.08113590263691683, | |
| "grad_norm": 1.3661813735961914, | |
| "learning_rate": 9.93223665655435e-07, | |
| "loss": 0.4901, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08157447508360287, | |
| "grad_norm": 1.2298403978347778, | |
| "learning_rate": 9.931065990695113e-07, | |
| "loss": 0.4408, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.08201304753028892, | |
| "grad_norm": 1.1648515462875366, | |
| "learning_rate": 9.929885369337596e-07, | |
| "loss": 0.4097, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.08245161997697495, | |
| "grad_norm": 1.1954864263534546, | |
| "learning_rate": 9.928694794865395e-07, | |
| "loss": 0.4162, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.08289019242366098, | |
| "grad_norm": 1.2637189626693726, | |
| "learning_rate": 9.9274942696822e-07, | |
| "loss": 0.4361, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.08332876487034702, | |
| "grad_norm": 1.2444093227386475, | |
| "learning_rate": 9.926283796211794e-07, | |
| "loss": 0.4692, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08376733731703306, | |
| "grad_norm": 1.282979965209961, | |
| "learning_rate": 9.925063376898044e-07, | |
| "loss": 0.3999, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.0842059097637191, | |
| "grad_norm": 1.2734895944595337, | |
| "learning_rate": 9.923833014204893e-07, | |
| "loss": 0.4343, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.08464448221040513, | |
| "grad_norm": 1.208871603012085, | |
| "learning_rate": 9.922592710616364e-07, | |
| "loss": 0.4414, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.08508305465709116, | |
| "grad_norm": 1.2236772775650024, | |
| "learning_rate": 9.92134246863655e-07, | |
| "loss": 0.4345, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.08552162710377721, | |
| "grad_norm": 1.2935453653335571, | |
| "learning_rate": 9.920082290789607e-07, | |
| "loss": 0.4637, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08596019955046325, | |
| "grad_norm": 1.307611107826233, | |
| "learning_rate": 9.91881217961975e-07, | |
| "loss": 0.4666, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.08639877199714928, | |
| "grad_norm": 1.2390419244766235, | |
| "learning_rate": 9.917532137691252e-07, | |
| "loss": 0.4072, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.08683734444383531, | |
| "grad_norm": 1.3468120098114014, | |
| "learning_rate": 9.916242167588432e-07, | |
| "loss": 0.4486, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.08727591689052135, | |
| "grad_norm": 1.270461916923523, | |
| "learning_rate": 9.914942271915655e-07, | |
| "loss": 0.4438, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.0877144893372074, | |
| "grad_norm": 1.2275724411010742, | |
| "learning_rate": 9.913632453297325e-07, | |
| "loss": 0.4503, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08815306178389343, | |
| "grad_norm": 1.2674055099487305, | |
| "learning_rate": 9.912312714377879e-07, | |
| "loss": 0.425, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.08859163423057946, | |
| "grad_norm": 1.2971616983413696, | |
| "learning_rate": 9.910983057821786e-07, | |
| "loss": 0.4633, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.0890302066772655, | |
| "grad_norm": 1.3597564697265625, | |
| "learning_rate": 9.909643486313533e-07, | |
| "loss": 0.4709, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.08946877912395154, | |
| "grad_norm": 1.403801441192627, | |
| "learning_rate": 9.908294002557627e-07, | |
| "loss": 0.4508, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.08990735157063758, | |
| "grad_norm": 1.1720523834228516, | |
| "learning_rate": 9.906934609278588e-07, | |
| "loss": 0.4182, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09034592401732361, | |
| "grad_norm": 1.4372029304504395, | |
| "learning_rate": 9.90556530922094e-07, | |
| "loss": 0.4495, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.09078449646400964, | |
| "grad_norm": 1.2117644548416138, | |
| "learning_rate": 9.904186105149211e-07, | |
| "loss": 0.4486, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.09122306891069569, | |
| "grad_norm": 1.253118872642517, | |
| "learning_rate": 9.902796999847923e-07, | |
| "loss": 0.4683, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.09166164135738172, | |
| "grad_norm": 1.1815754175186157, | |
| "learning_rate": 9.901397996121587e-07, | |
| "loss": 0.4208, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.09210021380406776, | |
| "grad_norm": 1.1744418144226074, | |
| "learning_rate": 9.899989096794704e-07, | |
| "loss": 0.4086, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09253878625075379, | |
| "grad_norm": 1.194718599319458, | |
| "learning_rate": 9.898570304711746e-07, | |
| "loss": 0.4343, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.09297735869743984, | |
| "grad_norm": 1.3020581007003784, | |
| "learning_rate": 9.897141622737159e-07, | |
| "loss": 0.4369, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.09341593114412587, | |
| "grad_norm": 1.3153276443481445, | |
| "learning_rate": 9.895703053755363e-07, | |
| "loss": 0.4609, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.0938545035908119, | |
| "grad_norm": 1.3371548652648926, | |
| "learning_rate": 9.89425460067073e-07, | |
| "loss": 0.459, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.09429307603749794, | |
| "grad_norm": 1.2718877792358398, | |
| "learning_rate": 9.892796266407595e-07, | |
| "loss": 0.4669, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09473164848418399, | |
| "grad_norm": 1.2996882200241089, | |
| "learning_rate": 9.891328053910237e-07, | |
| "loss": 0.4598, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.09517022093087002, | |
| "grad_norm": 1.3045529127120972, | |
| "learning_rate": 9.88984996614288e-07, | |
| "loss": 0.4628, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.09560879337755605, | |
| "grad_norm": 1.2552134990692139, | |
| "learning_rate": 9.888362006089688e-07, | |
| "loss": 0.423, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.09604736582424209, | |
| "grad_norm": 1.3412526845932007, | |
| "learning_rate": 9.886864176754754e-07, | |
| "loss": 0.4708, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.09648593827092813, | |
| "grad_norm": 1.2526757717132568, | |
| "learning_rate": 9.885356481162096e-07, | |
| "loss": 0.4416, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09692451071761417, | |
| "grad_norm": 1.2230411767959595, | |
| "learning_rate": 9.883838922355653e-07, | |
| "loss": 0.4032, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.0973630831643002, | |
| "grad_norm": 1.3482338190078735, | |
| "learning_rate": 9.882311503399277e-07, | |
| "loss": 0.445, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.09780165561098624, | |
| "grad_norm": 1.3570636510849, | |
| "learning_rate": 9.880774227376725e-07, | |
| "loss": 0.519, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.09824022805767228, | |
| "grad_norm": 1.2754287719726562, | |
| "learning_rate": 9.879227097391658e-07, | |
| "loss": 0.4919, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.09867880050435832, | |
| "grad_norm": 1.3529372215270996, | |
| "learning_rate": 9.87767011656763e-07, | |
| "loss": 0.4193, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09911737295104435, | |
| "grad_norm": 1.3907129764556885, | |
| "learning_rate": 9.876103288048084e-07, | |
| "loss": 0.4202, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.09955594539773038, | |
| "grad_norm": 1.2041593790054321, | |
| "learning_rate": 9.87452661499634e-07, | |
| "loss": 0.4326, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.09999451784441643, | |
| "grad_norm": 1.3893024921417236, | |
| "learning_rate": 9.872940100595597e-07, | |
| "loss": 0.4933, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.10043309029110246, | |
| "grad_norm": 1.1358956098556519, | |
| "learning_rate": 9.871343748048929e-07, | |
| "loss": 0.3958, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.1008716627377885, | |
| "grad_norm": 1.258034586906433, | |
| "learning_rate": 9.869737560579262e-07, | |
| "loss": 0.4579, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10131023518447453, | |
| "grad_norm": 1.3062182664871216, | |
| "learning_rate": 9.868121541429386e-07, | |
| "loss": 0.4671, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.10174880763116058, | |
| "grad_norm": 1.2269024848937988, | |
| "learning_rate": 9.866495693861934e-07, | |
| "loss": 0.4541, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.10218738007784661, | |
| "grad_norm": 1.3577250242233276, | |
| "learning_rate": 9.86486002115939e-07, | |
| "loss": 0.5296, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.10262595252453265, | |
| "grad_norm": 1.330257534980774, | |
| "learning_rate": 9.863214526624063e-07, | |
| "loss": 0.4678, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.10306452497121868, | |
| "grad_norm": 1.1935516595840454, | |
| "learning_rate": 9.861559213578107e-07, | |
| "loss": 0.4826, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10350309741790473, | |
| "grad_norm": 1.4533356428146362, | |
| "learning_rate": 9.859894085363485e-07, | |
| "loss": 0.5029, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.10394166986459076, | |
| "grad_norm": 1.274011254310608, | |
| "learning_rate": 9.85821914534198e-07, | |
| "loss": 0.4471, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.1043802423112768, | |
| "grad_norm": 1.3055455684661865, | |
| "learning_rate": 9.856534396895193e-07, | |
| "loss": 0.4521, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.10481881475796283, | |
| "grad_norm": 1.290225625038147, | |
| "learning_rate": 9.854839843424512e-07, | |
| "loss": 0.4444, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.10525738720464886, | |
| "grad_norm": 1.256276249885559, | |
| "learning_rate": 9.853135488351132e-07, | |
| "loss": 0.4397, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10569595965133491, | |
| "grad_norm": 1.2009029388427734, | |
| "learning_rate": 9.851421335116036e-07, | |
| "loss": 0.464, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.10613453209802094, | |
| "grad_norm": 1.3538483381271362, | |
| "learning_rate": 9.849697387179987e-07, | |
| "loss": 0.4543, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.10657310454470698, | |
| "grad_norm": 1.314831018447876, | |
| "learning_rate": 9.84796364802352e-07, | |
| "loss": 0.4675, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.10701167699139301, | |
| "grad_norm": 1.334395408630371, | |
| "learning_rate": 9.846220121146943e-07, | |
| "loss": 0.4541, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.10745024943807906, | |
| "grad_norm": 1.2025914192199707, | |
| "learning_rate": 9.844466810070317e-07, | |
| "loss": 0.3998, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.10788882188476509, | |
| "grad_norm": 1.438745141029358, | |
| "learning_rate": 9.842703718333468e-07, | |
| "loss": 0.4303, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.10832739433145112, | |
| "grad_norm": 1.2495355606079102, | |
| "learning_rate": 9.84093084949596e-07, | |
| "loss": 0.4415, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.10876596677813716, | |
| "grad_norm": 1.3335902690887451, | |
| "learning_rate": 9.8391482071371e-07, | |
| "loss": 0.4923, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1092045392248232, | |
| "grad_norm": 1.359126091003418, | |
| "learning_rate": 9.837355794855923e-07, | |
| "loss": 0.4598, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.10964311167150924, | |
| "grad_norm": 1.1839964389801025, | |
| "learning_rate": 9.835553616271194e-07, | |
| "loss": 0.4053, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11008168411819527, | |
| "grad_norm": 1.2885066270828247, | |
| "learning_rate": 9.83374167502139e-07, | |
| "loss": 0.4521, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1105202565648813, | |
| "grad_norm": 1.381494402885437, | |
| "learning_rate": 9.83191997476471e-07, | |
| "loss": 0.5172, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.11095882901156735, | |
| "grad_norm": 1.3523175716400146, | |
| "learning_rate": 9.830088519179035e-07, | |
| "loss": 0.4813, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.11139740145825339, | |
| "grad_norm": 1.1988216638565063, | |
| "learning_rate": 9.82824731196196e-07, | |
| "loss": 0.4377, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.11183597390493942, | |
| "grad_norm": 1.1344822645187378, | |
| "learning_rate": 9.826396356830764e-07, | |
| "loss": 0.4638, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11227454635162545, | |
| "grad_norm": 1.2201296091079712, | |
| "learning_rate": 9.824535657522397e-07, | |
| "loss": 0.4378, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.1127131187983115, | |
| "grad_norm": 1.4678555727005005, | |
| "learning_rate": 9.822665217793496e-07, | |
| "loss": 0.4644, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.11315169124499753, | |
| "grad_norm": 1.251297950744629, | |
| "learning_rate": 9.820785041420348e-07, | |
| "loss": 0.4329, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.11359026369168357, | |
| "grad_norm": 1.2199487686157227, | |
| "learning_rate": 9.818895132198913e-07, | |
| "loss": 0.4474, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.1140288361383696, | |
| "grad_norm": 1.311081051826477, | |
| "learning_rate": 9.81699549394479e-07, | |
| "loss": 0.4531, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11446740858505565, | |
| "grad_norm": 1.3128409385681152, | |
| "learning_rate": 9.815086130493221e-07, | |
| "loss": 0.4166, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.11490598103174168, | |
| "grad_norm": 1.300299882888794, | |
| "learning_rate": 9.81316704569909e-07, | |
| "loss": 0.4667, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.11534455347842772, | |
| "grad_norm": 1.1987988948822021, | |
| "learning_rate": 9.811238243436904e-07, | |
| "loss": 0.4499, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.11578312592511375, | |
| "grad_norm": 1.2745237350463867, | |
| "learning_rate": 9.809299727600783e-07, | |
| "loss": 0.4198, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1162216983717998, | |
| "grad_norm": 1.3099387884140015, | |
| "learning_rate": 9.807351502104468e-07, | |
| "loss": 0.4695, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.11666027081848583, | |
| "grad_norm": 1.3721712827682495, | |
| "learning_rate": 9.805393570881295e-07, | |
| "loss": 0.4978, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.11709884326517186, | |
| "grad_norm": 1.3487995862960815, | |
| "learning_rate": 9.8034259378842e-07, | |
| "loss": 0.4425, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.1175374157118579, | |
| "grad_norm": 1.3157742023468018, | |
| "learning_rate": 9.801448607085704e-07, | |
| "loss": 0.4446, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.11797598815854395, | |
| "grad_norm": 1.368003487586975, | |
| "learning_rate": 9.799461582477909e-07, | |
| "loss": 0.462, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.11841456060522998, | |
| "grad_norm": 1.254192590713501, | |
| "learning_rate": 9.797464868072486e-07, | |
| "loss": 0.4711, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11885313305191601, | |
| "grad_norm": 1.3297892808914185, | |
| "learning_rate": 9.795458467900672e-07, | |
| "loss": 0.4489, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.11929170549860205, | |
| "grad_norm": 1.3227829933166504, | |
| "learning_rate": 9.793442386013255e-07, | |
| "loss": 0.4494, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.1197302779452881, | |
| "grad_norm": 1.3050018548965454, | |
| "learning_rate": 9.79141662648057e-07, | |
| "loss": 0.4533, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.12016885039197413, | |
| "grad_norm": 1.2946937084197998, | |
| "learning_rate": 9.789381193392498e-07, | |
| "loss": 0.4566, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.12060742283866016, | |
| "grad_norm": 1.1722913980484009, | |
| "learning_rate": 9.787336090858441e-07, | |
| "loss": 0.4402, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1210459952853462, | |
| "grad_norm": 1.3350423574447632, | |
| "learning_rate": 9.78528132300733e-07, | |
| "loss": 0.4497, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.12148456773203223, | |
| "grad_norm": 1.2697076797485352, | |
| "learning_rate": 9.7832168939876e-07, | |
| "loss": 0.4525, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.12192314017871828, | |
| "grad_norm": 1.3002448081970215, | |
| "learning_rate": 9.781142807967204e-07, | |
| "loss": 0.4955, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.12236171262540431, | |
| "grad_norm": 1.319963812828064, | |
| "learning_rate": 9.779059069133582e-07, | |
| "loss": 0.4588, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.12280028507209034, | |
| "grad_norm": 1.4620566368103027, | |
| "learning_rate": 9.776965681693666e-07, | |
| "loss": 0.4813, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.12323885751877638, | |
| "grad_norm": 1.4124107360839844, | |
| "learning_rate": 9.774862649873868e-07, | |
| "loss": 0.4535, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.12367742996546242, | |
| "grad_norm": 1.3538119792938232, | |
| "learning_rate": 9.772749977920071e-07, | |
| "loss": 0.4501, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.12411600241214846, | |
| "grad_norm": 1.2902806997299194, | |
| "learning_rate": 9.770627670097623e-07, | |
| "loss": 0.4242, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.12455457485883449, | |
| "grad_norm": 1.2163552045822144, | |
| "learning_rate": 9.768495730691321e-07, | |
| "loss": 0.4376, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.12499314730552052, | |
| "grad_norm": 1.2694215774536133, | |
| "learning_rate": 9.766354164005414e-07, | |
| "loss": 0.4893, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.12543171975220657, | |
| "grad_norm": 1.3819620609283447, | |
| "learning_rate": 9.76420297436358e-07, | |
| "loss": 0.4442, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1258702921988926, | |
| "grad_norm": 1.2240216732025146, | |
| "learning_rate": 9.762042166108932e-07, | |
| "loss": 0.4411, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.12630886464557864, | |
| "grad_norm": 1.3175026178359985, | |
| "learning_rate": 9.759871743604001e-07, | |
| "loss": 0.3919, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.1267474370922647, | |
| "grad_norm": 1.219504714012146, | |
| "learning_rate": 9.757691711230727e-07, | |
| "loss": 0.4241, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.1271860095389507, | |
| "grad_norm": 1.389089584350586, | |
| "learning_rate": 9.75550207339045e-07, | |
| "loss": 0.4885, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12762458198563675, | |
| "grad_norm": 1.3088550567626953, | |
| "learning_rate": 9.753302834503908e-07, | |
| "loss": 0.4106, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.1280631544323228, | |
| "grad_norm": 1.2590484619140625, | |
| "learning_rate": 9.751093999011216e-07, | |
| "loss": 0.472, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.12850172687900882, | |
| "grad_norm": 1.2284749746322632, | |
| "learning_rate": 9.74887557137187e-07, | |
| "loss": 0.4192, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.12894029932569487, | |
| "grad_norm": 1.2713497877120972, | |
| "learning_rate": 9.746647556064732e-07, | |
| "loss": 0.4552, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.1293788717723809, | |
| "grad_norm": 1.276537537574768, | |
| "learning_rate": 9.744409957588014e-07, | |
| "loss": 0.4545, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.12981744421906694, | |
| "grad_norm": 1.3731132745742798, | |
| "learning_rate": 9.742162780459281e-07, | |
| "loss": 0.4143, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.13025601666575298, | |
| "grad_norm": 1.246085524559021, | |
| "learning_rate": 9.73990602921544e-07, | |
| "loss": 0.4145, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.130694589112439, | |
| "grad_norm": 1.2259469032287598, | |
| "learning_rate": 9.737639708412721e-07, | |
| "loss": 0.4474, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.13113316155912505, | |
| "grad_norm": 1.3381538391113281, | |
| "learning_rate": 9.735363822626676e-07, | |
| "loss": 0.4735, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.1315717340058111, | |
| "grad_norm": 1.2572318315505981, | |
| "learning_rate": 9.73307837645217e-07, | |
| "loss": 0.406, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13201030645249712, | |
| "grad_norm": 1.3827505111694336, | |
| "learning_rate": 9.730783374503369e-07, | |
| "loss": 0.405, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.13244887889918316, | |
| "grad_norm": 1.2806872129440308, | |
| "learning_rate": 9.728478821413728e-07, | |
| "loss": 0.439, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.13288745134586918, | |
| "grad_norm": 1.2340537309646606, | |
| "learning_rate": 9.726164721835995e-07, | |
| "loss": 0.4199, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.13332602379255523, | |
| "grad_norm": 1.3651092052459717, | |
| "learning_rate": 9.723841080442176e-07, | |
| "loss": 0.4593, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.13376459623924128, | |
| "grad_norm": 1.4623818397521973, | |
| "learning_rate": 9.721507901923559e-07, | |
| "loss": 0.4769, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1342031686859273, | |
| "grad_norm": 1.2197152376174927, | |
| "learning_rate": 9.719165190990673e-07, | |
| "loss": 0.4333, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.13464174113261335, | |
| "grad_norm": 1.3367750644683838, | |
| "learning_rate": 9.716812952373297e-07, | |
| "loss": 0.4313, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.1350803135792994, | |
| "grad_norm": 1.3246334791183472, | |
| "learning_rate": 9.714451190820449e-07, | |
| "loss": 0.4228, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1355188860259854, | |
| "grad_norm": 1.2900984287261963, | |
| "learning_rate": 9.71207991110037e-07, | |
| "loss": 0.414, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.13595745847267146, | |
| "grad_norm": 1.2274436950683594, | |
| "learning_rate": 9.709699118000517e-07, | |
| "loss": 0.4576, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.13639603091935748, | |
| "grad_norm": 1.288940191268921, | |
| "learning_rate": 9.707308816327556e-07, | |
| "loss": 0.4692, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.13683460336604353, | |
| "grad_norm": 1.3872264623641968, | |
| "learning_rate": 9.704909010907348e-07, | |
| "loss": 0.4913, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.13727317581272958, | |
| "grad_norm": 1.2140966653823853, | |
| "learning_rate": 9.702499706584943e-07, | |
| "loss": 0.4335, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.1377117482594156, | |
| "grad_norm": 1.399438738822937, | |
| "learning_rate": 9.700080908224567e-07, | |
| "loss": 0.4552, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.13815032070610164, | |
| "grad_norm": 1.334566354751587, | |
| "learning_rate": 9.697652620709615e-07, | |
| "loss": 0.4204, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1385888931527877, | |
| "grad_norm": 1.1500535011291504, | |
| "learning_rate": 9.695214848942641e-07, | |
| "loss": 0.419, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1390274655994737, | |
| "grad_norm": 1.257188081741333, | |
| "learning_rate": 9.692767597845349e-07, | |
| "loss": 0.4364, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.13946603804615976, | |
| "grad_norm": 1.3126678466796875, | |
| "learning_rate": 9.690310872358571e-07, | |
| "loss": 0.4729, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.13990461049284578, | |
| "grad_norm": 1.2673767805099487, | |
| "learning_rate": 9.687844677442282e-07, | |
| "loss": 0.4365, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.14034318293953182, | |
| "grad_norm": 1.3040989637374878, | |
| "learning_rate": 9.685369018075562e-07, | |
| "loss": 0.4715, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14078175538621787, | |
| "grad_norm": 1.3866188526153564, | |
| "learning_rate": 9.682883899256607e-07, | |
| "loss": 0.438, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1412203278329039, | |
| "grad_norm": 1.2850979566574097, | |
| "learning_rate": 9.680389326002707e-07, | |
| "loss": 0.4325, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.14165890027958994, | |
| "grad_norm": 1.244288682937622, | |
| "learning_rate": 9.677885303350244e-07, | |
| "loss": 0.447, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.14209747272627596, | |
| "grad_norm": 1.327620029449463, | |
| "learning_rate": 9.675371836354673e-07, | |
| "loss": 0.4371, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.142536045172962, | |
| "grad_norm": 1.311292290687561, | |
| "learning_rate": 9.672848930090522e-07, | |
| "loss": 0.4506, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.14297461761964805, | |
| "grad_norm": 1.2936956882476807, | |
| "learning_rate": 9.670316589651367e-07, | |
| "loss": 0.4778, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.14341319006633407, | |
| "grad_norm": 1.2305073738098145, | |
| "learning_rate": 9.667774820149843e-07, | |
| "loss": 0.4253, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.14385176251302012, | |
| "grad_norm": 1.366324782371521, | |
| "learning_rate": 9.665223626717613e-07, | |
| "loss": 0.463, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.14429033495970617, | |
| "grad_norm": 1.3423768281936646, | |
| "learning_rate": 9.66266301450537e-07, | |
| "loss": 0.4535, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.1447289074063922, | |
| "grad_norm": 1.348443627357483, | |
| "learning_rate": 9.66009298868282e-07, | |
| "loss": 0.4935, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.14516747985307824, | |
| "grad_norm": 1.1361678838729858, | |
| "learning_rate": 9.657513554438677e-07, | |
| "loss": 0.3826, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.14560605229976425, | |
| "grad_norm": 1.1119284629821777, | |
| "learning_rate": 9.65492471698065e-07, | |
| "loss": 0.4397, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.1460446247464503, | |
| "grad_norm": 1.3069874048233032, | |
| "learning_rate": 9.652326481535433e-07, | |
| "loss": 0.4402, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.14648319719313635, | |
| "grad_norm": 1.3529688119888306, | |
| "learning_rate": 9.64971885334869e-07, | |
| "loss": 0.4139, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.14692176963982237, | |
| "grad_norm": 1.2486492395401, | |
| "learning_rate": 9.647101837685052e-07, | |
| "loss": 0.3979, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.14736034208650842, | |
| "grad_norm": 1.3289217948913574, | |
| "learning_rate": 9.644475439828102e-07, | |
| "loss": 0.451, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.14779891453319446, | |
| "grad_norm": 1.3076781034469604, | |
| "learning_rate": 9.641839665080363e-07, | |
| "loss": 0.482, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.14823748697988048, | |
| "grad_norm": 1.285366415977478, | |
| "learning_rate": 9.639194518763294e-07, | |
| "loss": 0.4418, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.14867605942656653, | |
| "grad_norm": 1.2070653438568115, | |
| "learning_rate": 9.636540006217268e-07, | |
| "loss": 0.439, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.14911463187325255, | |
| "grad_norm": 1.2946048974990845, | |
| "learning_rate": 9.633876132801577e-07, | |
| "loss": 0.4141, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1495532043199386, | |
| "grad_norm": 1.31601881980896, | |
| "learning_rate": 9.6312029038944e-07, | |
| "loss": 0.4488, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.14999177676662465, | |
| "grad_norm": 1.2885645627975464, | |
| "learning_rate": 9.628520324892816e-07, | |
| "loss": 0.4398, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.15043034921331067, | |
| "grad_norm": 1.262040138244629, | |
| "learning_rate": 9.625828401212772e-07, | |
| "loss": 0.4839, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1508689216599967, | |
| "grad_norm": 1.273505687713623, | |
| "learning_rate": 9.623127138289087e-07, | |
| "loss": 0.419, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.15130749410668276, | |
| "grad_norm": 1.374241590499878, | |
| "learning_rate": 9.620416541575432e-07, | |
| "loss": 0.4567, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.15174606655336878, | |
| "grad_norm": 1.118963360786438, | |
| "learning_rate": 9.617696616544325e-07, | |
| "loss": 0.3974, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.15218463900005483, | |
| "grad_norm": 1.2339282035827637, | |
| "learning_rate": 9.614967368687115e-07, | |
| "loss": 0.4428, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.15262321144674085, | |
| "grad_norm": 1.2473440170288086, | |
| "learning_rate": 9.612228803513975e-07, | |
| "loss": 0.4359, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1530617838934269, | |
| "grad_norm": 1.1782147884368896, | |
| "learning_rate": 9.609480926553887e-07, | |
| "loss": 0.4493, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.15350035634011294, | |
| "grad_norm": 1.233314037322998, | |
| "learning_rate": 9.606723743354637e-07, | |
| "loss": 0.4602, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15393892878679896, | |
| "grad_norm": 1.267967700958252, | |
| "learning_rate": 9.60395725948279e-07, | |
| "loss": 0.4777, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.154377501233485, | |
| "grad_norm": 1.3795247077941895, | |
| "learning_rate": 9.601181480523702e-07, | |
| "loss": 0.425, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.15481607368017106, | |
| "grad_norm": 1.411054253578186, | |
| "learning_rate": 9.598396412081488e-07, | |
| "loss": 0.479, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.15525464612685708, | |
| "grad_norm": 1.2811428308486938, | |
| "learning_rate": 9.595602059779015e-07, | |
| "loss": 0.4635, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.15569321857354312, | |
| "grad_norm": 1.3159114122390747, | |
| "learning_rate": 9.592798429257899e-07, | |
| "loss": 0.444, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.15613179102022914, | |
| "grad_norm": 1.1596899032592773, | |
| "learning_rate": 9.589985526178484e-07, | |
| "loss": 0.3732, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1565703634669152, | |
| "grad_norm": 1.2319432497024536, | |
| "learning_rate": 9.587163356219836e-07, | |
| "loss": 0.426, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.15700893591360124, | |
| "grad_norm": 1.502126932144165, | |
| "learning_rate": 9.584331925079734e-07, | |
| "loss": 0.4757, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.15744750836028726, | |
| "grad_norm": 1.2502065896987915, | |
| "learning_rate": 9.58149123847465e-07, | |
| "loss": 0.4382, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.1578860808069733, | |
| "grad_norm": 1.2441209554672241, | |
| "learning_rate": 9.578641302139742e-07, | |
| "loss": 0.431, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.15832465325365933, | |
| "grad_norm": 1.2779874801635742, | |
| "learning_rate": 9.575782121828845e-07, | |
| "loss": 0.4354, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.15876322570034537, | |
| "grad_norm": 1.3990627527236938, | |
| "learning_rate": 9.572913703314454e-07, | |
| "loss": 0.4955, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.15920179814703142, | |
| "grad_norm": 1.2426034212112427, | |
| "learning_rate": 9.570036052387724e-07, | |
| "loss": 0.4749, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.15964037059371744, | |
| "grad_norm": 1.1871353387832642, | |
| "learning_rate": 9.567149174858438e-07, | |
| "loss": 0.4638, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1600789430404035, | |
| "grad_norm": 1.3922412395477295, | |
| "learning_rate": 9.564253076555013e-07, | |
| "loss": 0.4672, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.16051751548708953, | |
| "grad_norm": 1.1922954320907593, | |
| "learning_rate": 9.561347763324483e-07, | |
| "loss": 0.4251, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.16095608793377555, | |
| "grad_norm": 1.1256937980651855, | |
| "learning_rate": 9.558433241032483e-07, | |
| "loss": 0.4816, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.1613946603804616, | |
| "grad_norm": 1.2206835746765137, | |
| "learning_rate": 9.55550951556324e-07, | |
| "loss": 0.442, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.16183323282714762, | |
| "grad_norm": 1.230305790901184, | |
| "learning_rate": 9.55257659281957e-07, | |
| "loss": 0.4258, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.16227180527383367, | |
| "grad_norm": 1.1930097341537476, | |
| "learning_rate": 9.549634478722843e-07, | |
| "loss": 0.4303, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.16271037772051972, | |
| "grad_norm": 1.1612834930419922, | |
| "learning_rate": 9.546683179213e-07, | |
| "loss": 0.4636, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.16314895016720574, | |
| "grad_norm": 1.4160550832748413, | |
| "learning_rate": 9.54372270024852e-07, | |
| "loss": 0.4658, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.16358752261389178, | |
| "grad_norm": 1.2340151071548462, | |
| "learning_rate": 9.540753047806413e-07, | |
| "loss": 0.4572, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.16402609506057783, | |
| "grad_norm": 1.3071835041046143, | |
| "learning_rate": 9.537774227882215e-07, | |
| "loss": 0.4409, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.16446466750726385, | |
| "grad_norm": 1.381076455116272, | |
| "learning_rate": 9.534786246489966e-07, | |
| "loss": 0.466, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1649032399539499, | |
| "grad_norm": 1.2368775606155396, | |
| "learning_rate": 9.531789109662204e-07, | |
| "loss": 0.4359, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.16534181240063592, | |
| "grad_norm": 1.3651288747787476, | |
| "learning_rate": 9.528782823449953e-07, | |
| "loss": 0.4409, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.16578038484732197, | |
| "grad_norm": 1.466973900794983, | |
| "learning_rate": 9.525767393922706e-07, | |
| "loss": 0.4804, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.166218957294008, | |
| "grad_norm": 1.3845632076263428, | |
| "learning_rate": 9.522742827168416e-07, | |
| "loss": 0.4723, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.16665752974069403, | |
| "grad_norm": 1.242058277130127, | |
| "learning_rate": 9.519709129293488e-07, | |
| "loss": 0.4361, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.16709610218738008, | |
| "grad_norm": 1.2678145170211792, | |
| "learning_rate": 9.516666306422755e-07, | |
| "loss": 0.4524, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.16753467463406613, | |
| "grad_norm": 1.2828023433685303, | |
| "learning_rate": 9.51361436469948e-07, | |
| "loss": 0.4527, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.16797324708075215, | |
| "grad_norm": 1.3856935501098633, | |
| "learning_rate": 9.510553310285331e-07, | |
| "loss": 0.4676, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.1684118195274382, | |
| "grad_norm": 1.3793553113937378, | |
| "learning_rate": 9.507483149360375e-07, | |
| "loss": 0.4231, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.16885039197412421, | |
| "grad_norm": 1.2514710426330566, | |
| "learning_rate": 9.504403888123066e-07, | |
| "loss": 0.4119, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.16928896442081026, | |
| "grad_norm": 1.2418358325958252, | |
| "learning_rate": 9.50131553279023e-07, | |
| "loss": 0.4563, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.1697275368674963, | |
| "grad_norm": 1.291680097579956, | |
| "learning_rate": 9.498218089597054e-07, | |
| "loss": 0.4411, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.17016610931418233, | |
| "grad_norm": 1.2920475006103516, | |
| "learning_rate": 9.495111564797073e-07, | |
| "loss": 0.4543, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.17060468176086838, | |
| "grad_norm": 1.341176152229309, | |
| "learning_rate": 9.491995964662154e-07, | |
| "loss": 0.4597, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.17104325420755442, | |
| "grad_norm": 1.2180360555648804, | |
| "learning_rate": 9.488871295482491e-07, | |
| "loss": 0.4578, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.17148182665424044, | |
| "grad_norm": 1.3022912740707397, | |
| "learning_rate": 9.485737563566585e-07, | |
| "loss": 0.4191, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.1719203991009265, | |
| "grad_norm": 1.261922001838684, | |
| "learning_rate": 9.482594775241236e-07, | |
| "loss": 0.4556, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1723589715476125, | |
| "grad_norm": 1.392162799835205, | |
| "learning_rate": 9.479442936851526e-07, | |
| "loss": 0.456, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.17279754399429856, | |
| "grad_norm": 1.3650363683700562, | |
| "learning_rate": 9.476282054760809e-07, | |
| "loss": 0.4934, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1732361164409846, | |
| "grad_norm": 1.3042758703231812, | |
| "learning_rate": 9.4731121353507e-07, | |
| "loss": 0.4519, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.17367468888767063, | |
| "grad_norm": 1.3327823877334595, | |
| "learning_rate": 9.469933185021058e-07, | |
| "loss": 0.4212, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.17411326133435667, | |
| "grad_norm": 1.2559716701507568, | |
| "learning_rate": 9.466745210189972e-07, | |
| "loss": 0.4643, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.1745518337810427, | |
| "grad_norm": 1.188941478729248, | |
| "learning_rate": 9.463548217293759e-07, | |
| "loss": 0.4303, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.17499040622772874, | |
| "grad_norm": 1.3942642211914062, | |
| "learning_rate": 9.460342212786932e-07, | |
| "loss": 0.4702, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.1754289786744148, | |
| "grad_norm": 1.2623578310012817, | |
| "learning_rate": 9.457127203142206e-07, | |
| "loss": 0.4399, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1758675511211008, | |
| "grad_norm": 1.3886511325836182, | |
| "learning_rate": 9.453903194850475e-07, | |
| "loss": 0.4714, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.17630612356778685, | |
| "grad_norm": 1.1634944677352905, | |
| "learning_rate": 9.4506701944208e-07, | |
| "loss": 0.4144, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.1767446960144729, | |
| "grad_norm": 1.3607139587402344, | |
| "learning_rate": 9.447428208380395e-07, | |
| "loss": 0.4516, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.17718326846115892, | |
| "grad_norm": 1.351024866104126, | |
| "learning_rate": 9.444177243274617e-07, | |
| "loss": 0.4194, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.17762184090784497, | |
| "grad_norm": 1.2279891967773438, | |
| "learning_rate": 9.440917305666951e-07, | |
| "loss": 0.482, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.178060413354531, | |
| "grad_norm": 1.2650517225265503, | |
| "learning_rate": 9.437648402138998e-07, | |
| "loss": 0.414, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.17849898580121704, | |
| "grad_norm": 1.3450144529342651, | |
| "learning_rate": 9.434370539290459e-07, | |
| "loss": 0.4864, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.17893755824790308, | |
| "grad_norm": 1.2379168272018433, | |
| "learning_rate": 9.431083723739124e-07, | |
| "loss": 0.4029, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1793761306945891, | |
| "grad_norm": 1.3272401094436646, | |
| "learning_rate": 9.427787962120857e-07, | |
| "loss": 0.4524, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.17981470314127515, | |
| "grad_norm": 1.3062082529067993, | |
| "learning_rate": 9.424483261089583e-07, | |
| "loss": 0.4385, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1802532755879612, | |
| "grad_norm": 1.2583765983581543, | |
| "learning_rate": 9.421169627317278e-07, | |
| "loss": 0.4247, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.18069184803464722, | |
| "grad_norm": 1.2366396188735962, | |
| "learning_rate": 9.417847067493952e-07, | |
| "loss": 0.4399, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.18113042048133327, | |
| "grad_norm": 1.334314227104187, | |
| "learning_rate": 9.414515588327631e-07, | |
| "loss": 0.438, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.18156899292801928, | |
| "grad_norm": 1.3514381647109985, | |
| "learning_rate": 9.411175196544358e-07, | |
| "loss": 0.5151, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.18200756537470533, | |
| "grad_norm": 1.2838636636734009, | |
| "learning_rate": 9.40782589888816e-07, | |
| "loss": 0.4465, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.18244613782139138, | |
| "grad_norm": 1.25214684009552, | |
| "learning_rate": 9.404467702121051e-07, | |
| "loss": 0.3996, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1828847102680774, | |
| "grad_norm": 1.323198676109314, | |
| "learning_rate": 9.40110061302301e-07, | |
| "loss": 0.4093, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.18332328271476345, | |
| "grad_norm": 1.3139373064041138, | |
| "learning_rate": 9.397724638391967e-07, | |
| "loss": 0.4789, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1837618551614495, | |
| "grad_norm": 1.1997904777526855, | |
| "learning_rate": 9.394339785043794e-07, | |
| "loss": 0.4365, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.18420042760813551, | |
| "grad_norm": 1.297034502029419, | |
| "learning_rate": 9.390946059812289e-07, | |
| "loss": 0.4518, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.18463900005482156, | |
| "grad_norm": 1.1115834712982178, | |
| "learning_rate": 9.387543469549155e-07, | |
| "loss": 0.4254, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.18507757250150758, | |
| "grad_norm": 1.141788363456726, | |
| "learning_rate": 9.384132021124004e-07, | |
| "loss": 0.3842, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.18551614494819363, | |
| "grad_norm": 1.3769128322601318, | |
| "learning_rate": 9.380711721424326e-07, | |
| "loss": 0.4629, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.18595471739487968, | |
| "grad_norm": 1.3783560991287231, | |
| "learning_rate": 9.377282577355478e-07, | |
| "loss": 0.4906, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1863932898415657, | |
| "grad_norm": 1.255490779876709, | |
| "learning_rate": 9.373844595840678e-07, | |
| "loss": 0.4537, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.18683186228825174, | |
| "grad_norm": 1.354235053062439, | |
| "learning_rate": 9.370397783820984e-07, | |
| "loss": 0.4497, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1872704347349378, | |
| "grad_norm": 1.347273826599121, | |
| "learning_rate": 9.366942148255285e-07, | |
| "loss": 0.4267, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1877090071816238, | |
| "grad_norm": 1.2848392724990845, | |
| "learning_rate": 9.363477696120283e-07, | |
| "loss": 0.4738, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.18814757962830986, | |
| "grad_norm": 1.2392444610595703, | |
| "learning_rate": 9.360004434410476e-07, | |
| "loss": 0.4834, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.18858615207499588, | |
| "grad_norm": 1.195491909980774, | |
| "learning_rate": 9.356522370138154e-07, | |
| "loss": 0.3701, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.18902472452168192, | |
| "grad_norm": 1.2303284406661987, | |
| "learning_rate": 9.353031510333373e-07, | |
| "loss": 0.4281, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.18946329696836797, | |
| "grad_norm": 1.2569408416748047, | |
| "learning_rate": 9.349531862043951e-07, | |
| "loss": 0.4603, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.189901869415054, | |
| "grad_norm": 1.3959214687347412, | |
| "learning_rate": 9.346023432335449e-07, | |
| "loss": 0.4663, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.19034044186174004, | |
| "grad_norm": 1.31195068359375, | |
| "learning_rate": 9.342506228291156e-07, | |
| "loss": 0.4729, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1907790143084261, | |
| "grad_norm": 1.33975088596344, | |
| "learning_rate": 9.338980257012074e-07, | |
| "loss": 0.4647, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1912175867551121, | |
| "grad_norm": 1.283033013343811, | |
| "learning_rate": 9.335445525616909e-07, | |
| "loss": 0.4047, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.19165615920179815, | |
| "grad_norm": 1.341052770614624, | |
| "learning_rate": 9.331902041242053e-07, | |
| "loss": 0.4434, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.19209473164848417, | |
| "grad_norm": 1.3923901319503784, | |
| "learning_rate": 9.328349811041564e-07, | |
| "loss": 0.4544, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.19253330409517022, | |
| "grad_norm": 1.2661969661712646, | |
| "learning_rate": 9.324788842187163e-07, | |
| "loss": 0.4461, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.19297187654185627, | |
| "grad_norm": 1.2110265493392944, | |
| "learning_rate": 9.321219141868215e-07, | |
| "loss": 0.4707, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1934104489885423, | |
| "grad_norm": 1.4487905502319336, | |
| "learning_rate": 9.317640717291708e-07, | |
| "loss": 0.4779, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.19384902143522834, | |
| "grad_norm": 1.3091298341751099, | |
| "learning_rate": 9.314053575682246e-07, | |
| "loss": 0.4674, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.19428759388191436, | |
| "grad_norm": 1.2794967889785767, | |
| "learning_rate": 9.310457724282033e-07, | |
| "loss": 0.3946, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.1947261663286004, | |
| "grad_norm": 1.2558822631835938, | |
| "learning_rate": 9.306853170350854e-07, | |
| "loss": 0.4358, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.19516473877528645, | |
| "grad_norm": 1.1746054887771606, | |
| "learning_rate": 9.303239921166071e-07, | |
| "loss": 0.434, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.19560331122197247, | |
| "grad_norm": 1.2854063510894775, | |
| "learning_rate": 9.299617984022597e-07, | |
| "loss": 0.4739, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.19604188366865852, | |
| "grad_norm": 1.214192509651184, | |
| "learning_rate": 9.29598736623288e-07, | |
| "loss": 0.4272, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.19648045611534457, | |
| "grad_norm": 1.248357892036438, | |
| "learning_rate": 9.292348075126901e-07, | |
| "loss": 0.431, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.19691902856203058, | |
| "grad_norm": 1.2610982656478882, | |
| "learning_rate": 9.288700118052151e-07, | |
| "loss": 0.4582, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.19735760100871663, | |
| "grad_norm": 1.3101803064346313, | |
| "learning_rate": 9.285043502373615e-07, | |
| "loss": 0.4896, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.19779617345540265, | |
| "grad_norm": 1.4143675565719604, | |
| "learning_rate": 9.281378235473761e-07, | |
| "loss": 0.4681, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.1982347459020887, | |
| "grad_norm": 1.2853251695632935, | |
| "learning_rate": 9.27770432475252e-07, | |
| "loss": 0.4826, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.19867331834877475, | |
| "grad_norm": 1.3457460403442383, | |
| "learning_rate": 9.274021777627276e-07, | |
| "loss": 0.4796, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.19911189079546077, | |
| "grad_norm": 1.3269720077514648, | |
| "learning_rate": 9.270330601532854e-07, | |
| "loss": 0.4544, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.1995504632421468, | |
| "grad_norm": 1.2965871095657349, | |
| "learning_rate": 9.266630803921491e-07, | |
| "loss": 0.4134, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.19998903568883286, | |
| "grad_norm": 1.250986933708191, | |
| "learning_rate": 9.262922392262837e-07, | |
| "loss": 0.4538, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.20042760813551888, | |
| "grad_norm": 1.3280874490737915, | |
| "learning_rate": 9.259205374043933e-07, | |
| "loss": 0.457, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.20086618058220493, | |
| "grad_norm": 1.3324693441390991, | |
| "learning_rate": 9.255479756769193e-07, | |
| "loss": 0.4568, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.20130475302889095, | |
| "grad_norm": 1.4459820985794067, | |
| "learning_rate": 9.251745547960393e-07, | |
| "loss": 0.5189, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.201743325475577, | |
| "grad_norm": 1.2799947261810303, | |
| "learning_rate": 9.248002755156659e-07, | |
| "loss": 0.4259, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.20218189792226304, | |
| "grad_norm": 1.3187311887741089, | |
| "learning_rate": 9.244251385914437e-07, | |
| "loss": 0.4845, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.20262047036894906, | |
| "grad_norm": 1.3244918584823608, | |
| "learning_rate": 9.240491447807501e-07, | |
| "loss": 0.4272, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2030590428156351, | |
| "grad_norm": 1.355553150177002, | |
| "learning_rate": 9.236722948426918e-07, | |
| "loss": 0.4544, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.20349761526232116, | |
| "grad_norm": 1.241315245628357, | |
| "learning_rate": 9.232945895381039e-07, | |
| "loss": 0.419, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.20393618770900718, | |
| "grad_norm": 1.3401538133621216, | |
| "learning_rate": 9.229160296295487e-07, | |
| "loss": 0.3983, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.20437476015569322, | |
| "grad_norm": 1.4097830057144165, | |
| "learning_rate": 9.225366158813138e-07, | |
| "loss": 0.4733, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.20481333260237924, | |
| "grad_norm": 1.2525343894958496, | |
| "learning_rate": 9.221563490594103e-07, | |
| "loss": 0.4384, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.2052519050490653, | |
| "grad_norm": 1.1235498189926147, | |
| "learning_rate": 9.217752299315724e-07, | |
| "loss": 0.4004, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.20569047749575134, | |
| "grad_norm": 1.2831324338912964, | |
| "learning_rate": 9.213932592672544e-07, | |
| "loss": 0.4974, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.20612904994243736, | |
| "grad_norm": 1.2940030097961426, | |
| "learning_rate": 9.2101043783763e-07, | |
| "loss": 0.4909, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2065676223891234, | |
| "grad_norm": 1.3095605373382568, | |
| "learning_rate": 9.206267664155906e-07, | |
| "loss": 0.4604, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.20700619483580945, | |
| "grad_norm": 1.2282830476760864, | |
| "learning_rate": 9.202422457757433e-07, | |
| "loss": 0.4773, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.20744476728249547, | |
| "grad_norm": 1.3521174192428589, | |
| "learning_rate": 9.198568766944102e-07, | |
| "loss": 0.4535, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.20788333972918152, | |
| "grad_norm": 1.260043978691101, | |
| "learning_rate": 9.194706599496262e-07, | |
| "loss": 0.4328, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.20832191217586754, | |
| "grad_norm": 1.3410409688949585, | |
| "learning_rate": 9.190835963211376e-07, | |
| "loss": 0.4879, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2087604846225536, | |
| "grad_norm": 1.3885812759399414, | |
| "learning_rate": 9.186956865904003e-07, | |
| "loss": 0.4449, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.20919905706923964, | |
| "grad_norm": 1.3543665409088135, | |
| "learning_rate": 9.183069315405784e-07, | |
| "loss": 0.5023, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.20963762951592566, | |
| "grad_norm": 1.3189713954925537, | |
| "learning_rate": 9.179173319565432e-07, | |
| "loss": 0.4523, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.2100762019626117, | |
| "grad_norm": 1.262413501739502, | |
| "learning_rate": 9.175268886248704e-07, | |
| "loss": 0.4632, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.21051477440929772, | |
| "grad_norm": 1.3755730390548706, | |
| "learning_rate": 9.171356023338395e-07, | |
| "loss": 0.4475, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.21095334685598377, | |
| "grad_norm": 1.2970229387283325, | |
| "learning_rate": 9.167434738734318e-07, | |
| "loss": 0.4604, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.21139191930266982, | |
| "grad_norm": 1.2604976892471313, | |
| "learning_rate": 9.163505040353287e-07, | |
| "loss": 0.5005, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.21183049174935584, | |
| "grad_norm": 1.4260404109954834, | |
| "learning_rate": 9.159566936129111e-07, | |
| "loss": 0.4655, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.21226906419604188, | |
| "grad_norm": 1.4161478281021118, | |
| "learning_rate": 9.155620434012556e-07, | |
| "loss": 0.4763, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.21270763664272793, | |
| "grad_norm": 1.3012006282806396, | |
| "learning_rate": 9.151665541971356e-07, | |
| "loss": 0.4265, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.21314620908941395, | |
| "grad_norm": 1.3242242336273193, | |
| "learning_rate": 9.147702267990177e-07, | |
| "loss": 0.4468, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2135847815361, | |
| "grad_norm": 1.350150465965271, | |
| "learning_rate": 9.143730620070608e-07, | |
| "loss": 0.4252, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.21402335398278602, | |
| "grad_norm": 1.3396698236465454, | |
| "learning_rate": 9.139750606231145e-07, | |
| "loss": 0.4238, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.21446192642947207, | |
| "grad_norm": 1.2412779331207275, | |
| "learning_rate": 9.135762234507174e-07, | |
| "loss": 0.4363, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.2149004988761581, | |
| "grad_norm": 1.2845489978790283, | |
| "learning_rate": 9.131765512950958e-07, | |
| "loss": 0.447, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.21533907132284413, | |
| "grad_norm": 1.3290574550628662, | |
| "learning_rate": 9.127760449631612e-07, | |
| "loss": 0.5195, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.21577764376953018, | |
| "grad_norm": 1.325376033782959, | |
| "learning_rate": 9.123747052635098e-07, | |
| "loss": 0.4565, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 1.39933180809021, | |
| "learning_rate": 9.119725330064201e-07, | |
| "loss": 0.4568, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.21665478866290225, | |
| "grad_norm": 1.3452569246292114, | |
| "learning_rate": 9.115695290038513e-07, | |
| "loss": 0.4797, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2170933611095883, | |
| "grad_norm": 1.330816388130188, | |
| "learning_rate": 9.111656940694421e-07, | |
| "loss": 0.4302, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.21753193355627432, | |
| "grad_norm": 1.2303009033203125, | |
| "learning_rate": 9.107610290185087e-07, | |
| "loss": 0.4297, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.21797050600296036, | |
| "grad_norm": 1.2003905773162842, | |
| "learning_rate": 9.103555346680433e-07, | |
| "loss": 0.4407, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.2184090784496464, | |
| "grad_norm": 1.2907445430755615, | |
| "learning_rate": 9.099492118367122e-07, | |
| "loss": 0.4384, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.21884765089633243, | |
| "grad_norm": 1.2537270784378052, | |
| "learning_rate": 9.095420613448548e-07, | |
| "loss": 0.4237, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.21928622334301848, | |
| "grad_norm": 1.3271981477737427, | |
| "learning_rate": 9.091340840144807e-07, | |
| "loss": 0.4436, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.21972479578970452, | |
| "grad_norm": 1.2225565910339355, | |
| "learning_rate": 9.087252806692699e-07, | |
| "loss": 0.4514, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.22016336823639054, | |
| "grad_norm": 1.273189663887024, | |
| "learning_rate": 9.083156521345692e-07, | |
| "loss": 0.4481, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2206019406830766, | |
| "grad_norm": 1.3194257020950317, | |
| "learning_rate": 9.079051992373916e-07, | |
| "loss": 0.4672, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.2210405131297626, | |
| "grad_norm": 1.24362051486969, | |
| "learning_rate": 9.074939228064147e-07, | |
| "loss": 0.4002, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.22147908557644866, | |
| "grad_norm": 1.4185460805892944, | |
| "learning_rate": 9.070818236719785e-07, | |
| "loss": 0.4555, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2219176580231347, | |
| "grad_norm": 1.2558276653289795, | |
| "learning_rate": 9.066689026660842e-07, | |
| "loss": 0.4683, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.22235623046982073, | |
| "grad_norm": 1.3859413862228394, | |
| "learning_rate": 9.062551606223921e-07, | |
| "loss": 0.4426, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.22279480291650677, | |
| "grad_norm": 1.3655338287353516, | |
| "learning_rate": 9.058405983762201e-07, | |
| "loss": 0.405, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.22323337536319282, | |
| "grad_norm": 1.2751038074493408, | |
| "learning_rate": 9.054252167645425e-07, | |
| "loss": 0.4462, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.22367194780987884, | |
| "grad_norm": 1.3899219036102295, | |
| "learning_rate": 9.050090166259872e-07, | |
| "loss": 0.412, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2241105202565649, | |
| "grad_norm": 1.3597509860992432, | |
| "learning_rate": 9.045919988008348e-07, | |
| "loss": 0.4391, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2245490927032509, | |
| "grad_norm": 1.4710191488265991, | |
| "learning_rate": 9.041741641310172e-07, | |
| "loss": 0.44, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.22498766514993696, | |
| "grad_norm": 1.2938218116760254, | |
| "learning_rate": 9.037555134601149e-07, | |
| "loss": 0.433, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.225426237596623, | |
| "grad_norm": 1.3852367401123047, | |
| "learning_rate": 9.033360476333565e-07, | |
| "loss": 0.4891, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.22586481004330902, | |
| "grad_norm": 1.2105050086975098, | |
| "learning_rate": 9.029157674976154e-07, | |
| "loss": 0.4549, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.22630338248999507, | |
| "grad_norm": 1.290735125541687, | |
| "learning_rate": 9.0249467390141e-07, | |
| "loss": 0.436, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2267419549366811, | |
| "grad_norm": 1.3402304649353027, | |
| "learning_rate": 9.020727676949004e-07, | |
| "loss": 0.4032, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.22718052738336714, | |
| "grad_norm": 1.257806420326233, | |
| "learning_rate": 9.016500497298876e-07, | |
| "loss": 0.4829, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.22761909983005318, | |
| "grad_norm": 1.203140139579773, | |
| "learning_rate": 9.012265208598113e-07, | |
| "loss": 0.4132, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.2280576722767392, | |
| "grad_norm": 1.2450907230377197, | |
| "learning_rate": 9.008021819397486e-07, | |
| "loss": 0.4349, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.22849624472342525, | |
| "grad_norm": 1.3185664415359497, | |
| "learning_rate": 9.003770338264118e-07, | |
| "loss": 0.4742, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.2289348171701113, | |
| "grad_norm": 1.2235329151153564, | |
| "learning_rate": 8.999510773781471e-07, | |
| "loss": 0.4181, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.22937338961679732, | |
| "grad_norm": 1.2654236555099487, | |
| "learning_rate": 8.995243134549326e-07, | |
| "loss": 0.4767, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.22981196206348337, | |
| "grad_norm": 1.2756022214889526, | |
| "learning_rate": 8.990967429183765e-07, | |
| "loss": 0.4335, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.23025053451016939, | |
| "grad_norm": 1.337327241897583, | |
| "learning_rate": 8.986683666317157e-07, | |
| "loss": 0.4182, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.23068910695685543, | |
| "grad_norm": 1.3000355958938599, | |
| "learning_rate": 8.982391854598137e-07, | |
| "loss": 0.4844, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.23112767940354148, | |
| "grad_norm": 1.393452525138855, | |
| "learning_rate": 8.97809200269159e-07, | |
| "loss": 0.4648, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.2315662518502275, | |
| "grad_norm": 1.2905426025390625, | |
| "learning_rate": 8.973784119278639e-07, | |
| "loss": 0.4342, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.23200482429691355, | |
| "grad_norm": 1.1917750835418701, | |
| "learning_rate": 8.969468213056613e-07, | |
| "loss": 0.4271, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2324433967435996, | |
| "grad_norm": 1.3469759225845337, | |
| "learning_rate": 8.965144292739046e-07, | |
| "loss": 0.4533, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.23288196919028561, | |
| "grad_norm": 1.2097269296646118, | |
| "learning_rate": 8.960812367055646e-07, | |
| "loss": 0.4292, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.23332054163697166, | |
| "grad_norm": 1.1486494541168213, | |
| "learning_rate": 8.95647244475229e-07, | |
| "loss": 0.4925, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.23375911408365768, | |
| "grad_norm": 1.2630372047424316, | |
| "learning_rate": 8.952124534590993e-07, | |
| "loss": 0.4106, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.23419768653034373, | |
| "grad_norm": 1.3153948783874512, | |
| "learning_rate": 8.947768645349903e-07, | |
| "loss": 0.4591, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.23463625897702978, | |
| "grad_norm": 1.280995488166809, | |
| "learning_rate": 8.943404785823269e-07, | |
| "loss": 0.4071, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.2350748314237158, | |
| "grad_norm": 1.2669576406478882, | |
| "learning_rate": 8.939032964821442e-07, | |
| "loss": 0.4185, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.23551340387040184, | |
| "grad_norm": 1.197329044342041, | |
| "learning_rate": 8.93465319117084e-07, | |
| "loss": 0.4476, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.2359519763170879, | |
| "grad_norm": 1.1159907579421997, | |
| "learning_rate": 8.930265473713937e-07, | |
| "loss": 0.429, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.2363905487637739, | |
| "grad_norm": 1.3145620822906494, | |
| "learning_rate": 8.925869821309247e-07, | |
| "loss": 0.4711, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.23682912121045996, | |
| "grad_norm": 1.306536078453064, | |
| "learning_rate": 8.921466242831303e-07, | |
| "loss": 0.4491, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.23726769365714598, | |
| "grad_norm": 1.2363736629486084, | |
| "learning_rate": 8.917054747170642e-07, | |
| "loss": 0.44, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.23770626610383203, | |
| "grad_norm": 1.340304970741272, | |
| "learning_rate": 8.912635343233783e-07, | |
| "loss": 0.4919, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.23814483855051807, | |
| "grad_norm": 1.2518609762191772, | |
| "learning_rate": 8.908208039943213e-07, | |
| "loss": 0.4362, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.2385834109972041, | |
| "grad_norm": 1.2911936044692993, | |
| "learning_rate": 8.903772846237364e-07, | |
| "loss": 0.4416, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.23902198344389014, | |
| "grad_norm": 1.2846801280975342, | |
| "learning_rate": 8.899329771070602e-07, | |
| "loss": 0.4885, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.2394605558905762, | |
| "grad_norm": 1.3153619766235352, | |
| "learning_rate": 8.894878823413207e-07, | |
| "loss": 0.4922, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.2398991283372622, | |
| "grad_norm": 1.4053038358688354, | |
| "learning_rate": 8.890420012251346e-07, | |
| "loss": 0.4912, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.24033770078394825, | |
| "grad_norm": 1.302511215209961, | |
| "learning_rate": 8.885953346587065e-07, | |
| "loss": 0.4019, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.24077627323063427, | |
| "grad_norm": 1.3971174955368042, | |
| "learning_rate": 8.881478835438272e-07, | |
| "loss": 0.4438, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.24121484567732032, | |
| "grad_norm": 1.2062628269195557, | |
| "learning_rate": 8.876996487838711e-07, | |
| "loss": 0.383, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.24165341812400637, | |
| "grad_norm": 1.3044462203979492, | |
| "learning_rate": 8.872506312837944e-07, | |
| "loss": 0.451, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.2420919905706924, | |
| "grad_norm": 1.3488870859146118, | |
| "learning_rate": 8.868008319501341e-07, | |
| "loss": 0.4249, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.24253056301737844, | |
| "grad_norm": 1.1751151084899902, | |
| "learning_rate": 8.863502516910057e-07, | |
| "loss": 0.4581, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.24296913546406446, | |
| "grad_norm": 1.3605632781982422, | |
| "learning_rate": 8.858988914161009e-07, | |
| "loss": 0.4447, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.2434077079107505, | |
| "grad_norm": 1.2312195301055908, | |
| "learning_rate": 8.854467520366864e-07, | |
| "loss": 0.4164, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.24384628035743655, | |
| "grad_norm": 1.3179501295089722, | |
| "learning_rate": 8.849938344656021e-07, | |
| "loss": 0.4674, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.24428485280412257, | |
| "grad_norm": 1.2114101648330688, | |
| "learning_rate": 8.845401396172588e-07, | |
| "loss": 0.4529, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.24472342525080862, | |
| "grad_norm": 1.238842487335205, | |
| "learning_rate": 8.840856684076365e-07, | |
| "loss": 0.4685, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.24516199769749467, | |
| "grad_norm": 1.1789189577102661, | |
| "learning_rate": 8.836304217542828e-07, | |
| "loss": 0.4282, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.24560057014418069, | |
| "grad_norm": 1.304688811302185, | |
| "learning_rate": 8.831744005763107e-07, | |
| "loss": 0.4424, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.24603914259086673, | |
| "grad_norm": 1.3729431629180908, | |
| "learning_rate": 8.827176057943969e-07, | |
| "loss": 0.4683, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.24647771503755275, | |
| "grad_norm": 1.3452732563018799, | |
| "learning_rate": 8.822600383307802e-07, | |
| "loss": 0.4498, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.2469162874842388, | |
| "grad_norm": 1.27022385597229, | |
| "learning_rate": 8.818016991092594e-07, | |
| "loss": 0.4617, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.24735485993092485, | |
| "grad_norm": 1.2468392848968506, | |
| "learning_rate": 8.813425890551909e-07, | |
| "loss": 0.4356, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.24779343237761087, | |
| "grad_norm": 1.2570581436157227, | |
| "learning_rate": 8.808827090954881e-07, | |
| "loss": 0.3973, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.24823200482429691, | |
| "grad_norm": 1.2952882051467896, | |
| "learning_rate": 8.804220601586183e-07, | |
| "loss": 0.4624, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.24867057727098296, | |
| "grad_norm": 1.2446449995040894, | |
| "learning_rate": 8.799606431746013e-07, | |
| "loss": 0.3957, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.24910914971766898, | |
| "grad_norm": 1.4092421531677246, | |
| "learning_rate": 8.794984590750079e-07, | |
| "loss": 0.4968, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.24954772216435503, | |
| "grad_norm": 1.3203057050704956, | |
| "learning_rate": 8.790355087929573e-07, | |
| "loss": 0.4874, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.24998629461104105, | |
| "grad_norm": 1.468639850616455, | |
| "learning_rate": 8.785717932631155e-07, | |
| "loss": 0.4666, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2504248670577271, | |
| "grad_norm": 1.3520934581756592, | |
| "learning_rate": 8.781073134216943e-07, | |
| "loss": 0.4469, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.25086343950441314, | |
| "grad_norm": 1.266510248184204, | |
| "learning_rate": 8.776420702064473e-07, | |
| "loss": 0.3974, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.2513020119510992, | |
| "grad_norm": 1.2830793857574463, | |
| "learning_rate": 8.771760645566705e-07, | |
| "loss": 0.4821, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.2517405843977852, | |
| "grad_norm": 1.3247817754745483, | |
| "learning_rate": 8.767092974131984e-07, | |
| "loss": 0.4426, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.25217915684447123, | |
| "grad_norm": 1.340187907218933, | |
| "learning_rate": 8.762417697184032e-07, | |
| "loss": 0.4512, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2526177292911573, | |
| "grad_norm": 1.2662535905838013, | |
| "learning_rate": 8.757734824161929e-07, | |
| "loss": 0.4661, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.2530563017378433, | |
| "grad_norm": 1.2929002046585083, | |
| "learning_rate": 8.753044364520083e-07, | |
| "loss": 0.4576, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.2534948741845294, | |
| "grad_norm": 1.3643664121627808, | |
| "learning_rate": 8.748346327728228e-07, | |
| "loss": 0.4642, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.2539334466312154, | |
| "grad_norm": 1.4776318073272705, | |
| "learning_rate": 8.74364072327139e-07, | |
| "loss": 0.4554, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.2543720190779014, | |
| "grad_norm": 1.1963412761688232, | |
| "learning_rate": 8.738927560649876e-07, | |
| "loss": 0.4026, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.25481059152458746, | |
| "grad_norm": 1.3636819124221802, | |
| "learning_rate": 8.734206849379253e-07, | |
| "loss": 0.4415, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.2552491639712735, | |
| "grad_norm": 1.3011325597763062, | |
| "learning_rate": 8.729478598990323e-07, | |
| "loss": 0.4289, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.25568773641795955, | |
| "grad_norm": 1.2742562294006348, | |
| "learning_rate": 8.724742819029116e-07, | |
| "loss": 0.4317, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.2561263088646456, | |
| "grad_norm": 1.4198004007339478, | |
| "learning_rate": 8.719999519056859e-07, | |
| "loss": 0.4656, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.2565648813113316, | |
| "grad_norm": 1.376230001449585, | |
| "learning_rate": 8.715248708649963e-07, | |
| "loss": 0.459, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.25700345375801764, | |
| "grad_norm": 1.1574681997299194, | |
| "learning_rate": 8.710490397400005e-07, | |
| "loss": 0.4235, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2574420262047037, | |
| "grad_norm": 1.2351993322372437, | |
| "learning_rate": 8.7057245949137e-07, | |
| "loss": 0.4214, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.25788059865138974, | |
| "grad_norm": 1.2393819093704224, | |
| "learning_rate": 8.70095131081289e-07, | |
| "loss": 0.415, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.2583191710980758, | |
| "grad_norm": 1.267637014389038, | |
| "learning_rate": 8.696170554734523e-07, | |
| "loss": 0.4274, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.2587577435447618, | |
| "grad_norm": 1.3031575679779053, | |
| "learning_rate": 8.691382336330631e-07, | |
| "loss": 0.4582, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2591963159914478, | |
| "grad_norm": 1.3642998933792114, | |
| "learning_rate": 8.686586665268313e-07, | |
| "loss": 0.465, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.25963488843813387, | |
| "grad_norm": 1.3312978744506836, | |
| "learning_rate": 8.681783551229713e-07, | |
| "loss": 0.4679, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.2600734608848199, | |
| "grad_norm": 1.3136823177337646, | |
| "learning_rate": 8.676973003912004e-07, | |
| "loss": 0.485, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.26051203333150597, | |
| "grad_norm": 1.2545801401138306, | |
| "learning_rate": 8.672155033027364e-07, | |
| "loss": 0.4402, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.26095060577819196, | |
| "grad_norm": 1.227077603340149, | |
| "learning_rate": 8.667329648302959e-07, | |
| "loss": 0.4597, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.261389178224878, | |
| "grad_norm": 1.3195209503173828, | |
| "learning_rate": 8.662496859480925e-07, | |
| "loss": 0.4567, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.26182775067156405, | |
| "grad_norm": 1.235021710395813, | |
| "learning_rate": 8.657656676318345e-07, | |
| "loss": 0.4215, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.2622663231182501, | |
| "grad_norm": 1.2290730476379395, | |
| "learning_rate": 8.652809108587231e-07, | |
| "loss": 0.4139, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.26270489556493615, | |
| "grad_norm": 1.2141824960708618, | |
| "learning_rate": 8.647954166074503e-07, | |
| "loss": 0.4234, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.2631434680116222, | |
| "grad_norm": 1.3154791593551636, | |
| "learning_rate": 8.64309185858197e-07, | |
| "loss": 0.4791, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2635820404583082, | |
| "grad_norm": 1.2648977041244507, | |
| "learning_rate": 8.638222195926313e-07, | |
| "loss": 0.4227, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.26402061290499423, | |
| "grad_norm": 1.231683373451233, | |
| "learning_rate": 8.633345187939061e-07, | |
| "loss": 0.4806, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.2644591853516803, | |
| "grad_norm": 1.3356053829193115, | |
| "learning_rate": 8.628460844466572e-07, | |
| "loss": 0.4733, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.26489775779836633, | |
| "grad_norm": 1.3214858770370483, | |
| "learning_rate": 8.623569175370016e-07, | |
| "loss": 0.4439, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.2653363302450524, | |
| "grad_norm": 1.3323317766189575, | |
| "learning_rate": 8.61867019052535e-07, | |
| "loss": 0.4439, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.26577490269173837, | |
| "grad_norm": 1.388413906097412, | |
| "learning_rate": 8.613763899823303e-07, | |
| "loss": 0.46, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.2662134751384244, | |
| "grad_norm": 1.3175569772720337, | |
| "learning_rate": 8.608850313169355e-07, | |
| "loss": 0.3937, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.26665204758511046, | |
| "grad_norm": 1.3802878856658936, | |
| "learning_rate": 8.603929440483713e-07, | |
| "loss": 0.4245, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.2670906200317965, | |
| "grad_norm": 1.294541835784912, | |
| "learning_rate": 8.599001291701294e-07, | |
| "loss": 0.4257, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.26752919247848256, | |
| "grad_norm": 1.3904340267181396, | |
| "learning_rate": 8.59406587677171e-07, | |
| "loss": 0.47, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.26796776492516855, | |
| "grad_norm": 1.310609221458435, | |
| "learning_rate": 8.589123205659237e-07, | |
| "loss": 0.4693, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.2684063373718546, | |
| "grad_norm": 1.508705496788025, | |
| "learning_rate": 8.584173288342806e-07, | |
| "loss": 0.4818, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.26884490981854064, | |
| "grad_norm": 1.258613109588623, | |
| "learning_rate": 8.579216134815972e-07, | |
| "loss": 0.4391, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.2692834822652267, | |
| "grad_norm": 1.309943437576294, | |
| "learning_rate": 8.574251755086905e-07, | |
| "loss": 0.4345, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.26972205471191274, | |
| "grad_norm": 1.2155766487121582, | |
| "learning_rate": 8.569280159178358e-07, | |
| "loss": 0.4451, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.2701606271585988, | |
| "grad_norm": 1.3451566696166992, | |
| "learning_rate": 8.564301357127662e-07, | |
| "loss": 0.446, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.2705991996052848, | |
| "grad_norm": 1.581193447113037, | |
| "learning_rate": 8.559315358986684e-07, | |
| "loss": 0.455, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.2710377720519708, | |
| "grad_norm": 1.3008910417556763, | |
| "learning_rate": 8.554322174821833e-07, | |
| "loss": 0.4209, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.2714763444986569, | |
| "grad_norm": 1.2218880653381348, | |
| "learning_rate": 8.549321814714017e-07, | |
| "loss": 0.4627, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.2719149169453429, | |
| "grad_norm": 1.2023916244506836, | |
| "learning_rate": 8.544314288758634e-07, | |
| "loss": 0.4502, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.27235348939202897, | |
| "grad_norm": 1.3902664184570312, | |
| "learning_rate": 8.539299607065551e-07, | |
| "loss": 0.4435, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.27279206183871496, | |
| "grad_norm": 1.35061776638031, | |
| "learning_rate": 8.534277779759081e-07, | |
| "loss": 0.4476, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.273230634285401, | |
| "grad_norm": 1.2908838987350464, | |
| "learning_rate": 8.529248816977963e-07, | |
| "loss": 0.4829, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.27366920673208706, | |
| "grad_norm": 1.2177608013153076, | |
| "learning_rate": 8.524212728875342e-07, | |
| "loss": 0.4346, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.2741077791787731, | |
| "grad_norm": 1.3742519617080688, | |
| "learning_rate": 8.51916952561875e-07, | |
| "loss": 0.463, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.27454635162545915, | |
| "grad_norm": 1.1771491765975952, | |
| "learning_rate": 8.514119217390084e-07, | |
| "loss": 0.4665, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.27498492407214514, | |
| "grad_norm": 1.4156662225723267, | |
| "learning_rate": 8.509061814385581e-07, | |
| "loss": 0.5086, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.2754234965188312, | |
| "grad_norm": 1.4472264051437378, | |
| "learning_rate": 8.503997326815811e-07, | |
| "loss": 0.4758, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 1.3086676597595215, | |
| "learning_rate": 8.498925764905635e-07, | |
| "loss": 0.4232, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.2763006414122033, | |
| "grad_norm": 1.2598360776901245, | |
| "learning_rate": 8.493847138894208e-07, | |
| "loss": 0.4545, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.27673921385888933, | |
| "grad_norm": 1.3427696228027344, | |
| "learning_rate": 8.488761459034941e-07, | |
| "loss": 0.4266, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.2771777863055754, | |
| "grad_norm": 1.257368803024292, | |
| "learning_rate": 8.483668735595486e-07, | |
| "loss": 0.4527, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.27761635875226137, | |
| "grad_norm": 1.3200676441192627, | |
| "learning_rate": 8.478568978857721e-07, | |
| "loss": 0.5008, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.2780549311989474, | |
| "grad_norm": 1.3147386312484741, | |
| "learning_rate": 8.473462199117715e-07, | |
| "loss": 0.4622, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.27849350364563347, | |
| "grad_norm": 1.241818904876709, | |
| "learning_rate": 8.468348406685724e-07, | |
| "loss": 0.4707, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2789320760923195, | |
| "grad_norm": 1.1343870162963867, | |
| "learning_rate": 8.463227611886157e-07, | |
| "loss": 0.4625, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.27937064853900556, | |
| "grad_norm": 1.3791996240615845, | |
| "learning_rate": 8.458099825057565e-07, | |
| "loss": 0.4212, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.27980922098569155, | |
| "grad_norm": 1.2124180793762207, | |
| "learning_rate": 8.45296505655261e-07, | |
| "loss": 0.4738, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.2802477934323776, | |
| "grad_norm": 1.332551121711731, | |
| "learning_rate": 8.447823316738054e-07, | |
| "loss": 0.445, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.28068636587906365, | |
| "grad_norm": 1.3134421110153198, | |
| "learning_rate": 8.442674615994731e-07, | |
| "loss": 0.4764, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2811249383257497, | |
| "grad_norm": 1.2080031633377075, | |
| "learning_rate": 8.43751896471753e-07, | |
| "loss": 0.4158, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.28156351077243574, | |
| "grad_norm": 1.3157594203948975, | |
| "learning_rate": 8.432356373315371e-07, | |
| "loss": 0.4535, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.28200208321912174, | |
| "grad_norm": 1.3137787580490112, | |
| "learning_rate": 8.427186852211188e-07, | |
| "loss": 0.4377, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.2824406556658078, | |
| "grad_norm": 1.255462408065796, | |
| "learning_rate": 8.422010411841905e-07, | |
| "loss": 0.3872, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.28287922811249383, | |
| "grad_norm": 1.292777180671692, | |
| "learning_rate": 8.416827062658415e-07, | |
| "loss": 0.4944, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2833178005591799, | |
| "grad_norm": 1.4436018466949463, | |
| "learning_rate": 8.411636815125558e-07, | |
| "loss": 0.4772, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.2837563730058659, | |
| "grad_norm": 1.2209681272506714, | |
| "learning_rate": 8.406439679722104e-07, | |
| "loss": 0.4437, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.2841949454525519, | |
| "grad_norm": 1.3344581127166748, | |
| "learning_rate": 8.401235666940727e-07, | |
| "loss": 0.4715, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.28463351789923796, | |
| "grad_norm": 1.1497730016708374, | |
| "learning_rate": 8.396024787287988e-07, | |
| "loss": 0.4298, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.285072090345924, | |
| "grad_norm": 1.341781735420227, | |
| "learning_rate": 8.390807051284309e-07, | |
| "loss": 0.4257, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.28551066279261006, | |
| "grad_norm": 1.280103087425232, | |
| "learning_rate": 8.385582469463959e-07, | |
| "loss": 0.4368, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.2859492352392961, | |
| "grad_norm": 1.2544479370117188, | |
| "learning_rate": 8.380351052375023e-07, | |
| "loss": 0.4723, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.28638780768598215, | |
| "grad_norm": 1.1628601551055908, | |
| "learning_rate": 8.375112810579389e-07, | |
| "loss": 0.4515, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.28682638013266815, | |
| "grad_norm": 1.3643077611923218, | |
| "learning_rate": 8.369867754652724e-07, | |
| "loss": 0.4235, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.2872649525793542, | |
| "grad_norm": 1.205430030822754, | |
| "learning_rate": 8.36461589518445e-07, | |
| "loss": 0.4375, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.28770352502604024, | |
| "grad_norm": 1.2593382596969604, | |
| "learning_rate": 8.359357242777728e-07, | |
| "loss": 0.4494, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.2881420974727263, | |
| "grad_norm": 1.226905107498169, | |
| "learning_rate": 8.354091808049431e-07, | |
| "loss": 0.4476, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.28858066991941234, | |
| "grad_norm": 1.2723194360733032, | |
| "learning_rate": 8.348819601630124e-07, | |
| "loss": 0.4706, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.28901924236609833, | |
| "grad_norm": 1.2584813833236694, | |
| "learning_rate": 8.343540634164047e-07, | |
| "loss": 0.4518, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.2894578148127844, | |
| "grad_norm": 1.19351327419281, | |
| "learning_rate": 8.338254916309089e-07, | |
| "loss": 0.3974, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2898963872594704, | |
| "grad_norm": 1.3015458583831787, | |
| "learning_rate": 8.332962458736765e-07, | |
| "loss": 0.451, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.29033495970615647, | |
| "grad_norm": 1.206496000289917, | |
| "learning_rate": 8.327663272132202e-07, | |
| "loss": 0.4202, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2907735321528425, | |
| "grad_norm": 1.1564853191375732, | |
| "learning_rate": 8.322357367194108e-07, | |
| "loss": 0.4161, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.2912121045995285, | |
| "grad_norm": 1.2696150541305542, | |
| "learning_rate": 8.317044754634756e-07, | |
| "loss": 0.4461, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.29165067704621456, | |
| "grad_norm": 1.227945327758789, | |
| "learning_rate": 8.311725445179964e-07, | |
| "loss": 0.4303, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.2920892494929006, | |
| "grad_norm": 1.2026177644729614, | |
| "learning_rate": 8.306399449569066e-07, | |
| "loss": 0.4329, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.29252782193958665, | |
| "grad_norm": 1.330443263053894, | |
| "learning_rate": 8.301066778554897e-07, | |
| "loss": 0.4263, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.2929663943862727, | |
| "grad_norm": 1.2969058752059937, | |
| "learning_rate": 8.295727442903772e-07, | |
| "loss": 0.4558, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.29340496683295875, | |
| "grad_norm": 1.3471171855926514, | |
| "learning_rate": 8.290381453395457e-07, | |
| "loss": 0.4873, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.29384353927964474, | |
| "grad_norm": 1.3648743629455566, | |
| "learning_rate": 8.285028820823153e-07, | |
| "loss": 0.4626, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2942821117263308, | |
| "grad_norm": 1.481045126914978, | |
| "learning_rate": 8.279669555993475e-07, | |
| "loss": 0.4924, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.29472068417301683, | |
| "grad_norm": 1.1982755661010742, | |
| "learning_rate": 8.274303669726426e-07, | |
| "loss": 0.4365, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.2951592566197029, | |
| "grad_norm": 1.2949172258377075, | |
| "learning_rate": 8.268931172855378e-07, | |
| "loss": 0.4929, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.29559782906638893, | |
| "grad_norm": 1.2132493257522583, | |
| "learning_rate": 8.263552076227047e-07, | |
| "loss": 0.413, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2960364015130749, | |
| "grad_norm": 1.3627359867095947, | |
| "learning_rate": 8.258166390701481e-07, | |
| "loss": 0.486, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.29647497395976097, | |
| "grad_norm": 1.3661818504333496, | |
| "learning_rate": 8.25277412715202e-07, | |
| "loss": 0.4704, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.296913546406447, | |
| "grad_norm": 1.2108001708984375, | |
| "learning_rate": 8.247375296465293e-07, | |
| "loss": 0.402, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.29735211885313306, | |
| "grad_norm": 1.4531514644622803, | |
| "learning_rate": 8.241969909541183e-07, | |
| "loss": 0.4896, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.2977906912998191, | |
| "grad_norm": 1.3670005798339844, | |
| "learning_rate": 8.236557977292813e-07, | |
| "loss": 0.4511, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.2982292637465051, | |
| "grad_norm": 1.4220718145370483, | |
| "learning_rate": 8.231139510646515e-07, | |
| "loss": 0.4765, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.29866783619319115, | |
| "grad_norm": 1.2339059114456177, | |
| "learning_rate": 8.22571452054182e-07, | |
| "loss": 0.4748, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.2991064086398772, | |
| "grad_norm": 1.2986462116241455, | |
| "learning_rate": 8.220283017931427e-07, | |
| "loss": 0.4738, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.29954498108656324, | |
| "grad_norm": 1.2862370014190674, | |
| "learning_rate": 8.214845013781183e-07, | |
| "loss": 0.4658, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.2999835535332493, | |
| "grad_norm": 1.3527040481567383, | |
| "learning_rate": 8.209400519070057e-07, | |
| "loss": 0.4905, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.3004221259799353, | |
| "grad_norm": 1.2541968822479248, | |
| "learning_rate": 8.203949544790129e-07, | |
| "loss": 0.4487, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.30086069842662133, | |
| "grad_norm": 1.3078125715255737, | |
| "learning_rate": 8.198492101946562e-07, | |
| "loss": 0.4253, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.3012992708733074, | |
| "grad_norm": 1.210807204246521, | |
| "learning_rate": 8.193028201557567e-07, | |
| "loss": 0.414, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.3017378433199934, | |
| "grad_norm": 1.3149670362472534, | |
| "learning_rate": 8.187557854654406e-07, | |
| "loss": 0.4341, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3021764157666795, | |
| "grad_norm": 1.2983028888702393, | |
| "learning_rate": 8.182081072281346e-07, | |
| "loss": 0.471, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.3026149882133655, | |
| "grad_norm": 1.1206141710281372, | |
| "learning_rate": 8.176597865495653e-07, | |
| "loss": 0.4074, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3030535606600515, | |
| "grad_norm": 1.276363492012024, | |
| "learning_rate": 8.171108245367561e-07, | |
| "loss": 0.4624, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.30349213310673756, | |
| "grad_norm": 1.3328988552093506, | |
| "learning_rate": 8.165612222980251e-07, | |
| "loss": 0.4394, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.3039307055534236, | |
| "grad_norm": 1.4199296236038208, | |
| "learning_rate": 8.160109809429834e-07, | |
| "loss": 0.4394, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.30436927800010966, | |
| "grad_norm": 1.363855004310608, | |
| "learning_rate": 8.154601015825318e-07, | |
| "loss": 0.4267, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3048078504467957, | |
| "grad_norm": 1.2294096946716309, | |
| "learning_rate": 8.149085853288597e-07, | |
| "loss": 0.4361, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3052464228934817, | |
| "grad_norm": 1.3705590963363647, | |
| "learning_rate": 8.143564332954425e-07, | |
| "loss": 0.465, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.30568499534016774, | |
| "grad_norm": 1.2903246879577637, | |
| "learning_rate": 8.138036465970384e-07, | |
| "loss": 0.4469, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.3061235677868538, | |
| "grad_norm": 1.2863341569900513, | |
| "learning_rate": 8.132502263496875e-07, | |
| "loss": 0.4226, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.30656214023353984, | |
| "grad_norm": 1.333102822303772, | |
| "learning_rate": 8.126961736707091e-07, | |
| "loss": 0.4326, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.3070007126802259, | |
| "grad_norm": 1.2394230365753174, | |
| "learning_rate": 8.121414896786991e-07, | |
| "loss": 0.4383, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3074392851269119, | |
| "grad_norm": 1.4252898693084717, | |
| "learning_rate": 8.115861754935279e-07, | |
| "loss": 0.4935, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.3078778575735979, | |
| "grad_norm": 1.2032008171081543, | |
| "learning_rate": 8.110302322363387e-07, | |
| "loss": 0.4592, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.30831643002028397, | |
| "grad_norm": 1.212705135345459, | |
| "learning_rate": 8.10473661029544e-07, | |
| "loss": 0.4354, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.30875500246697, | |
| "grad_norm": 1.3495712280273438, | |
| "learning_rate": 8.099164629968247e-07, | |
| "loss": 0.4419, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.30919357491365607, | |
| "grad_norm": 1.325252652168274, | |
| "learning_rate": 8.093586392631271e-07, | |
| "loss": 0.4354, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3096321473603421, | |
| "grad_norm": 1.2952181100845337, | |
| "learning_rate": 8.088001909546606e-07, | |
| "loss": 0.4495, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.3100707198070281, | |
| "grad_norm": 1.4086304903030396, | |
| "learning_rate": 8.082411191988956e-07, | |
| "loss": 0.4763, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.31050929225371415, | |
| "grad_norm": 1.5200958251953125, | |
| "learning_rate": 8.076814251245612e-07, | |
| "loss": 0.4554, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.3109478647004002, | |
| "grad_norm": 1.2769299745559692, | |
| "learning_rate": 8.071211098616433e-07, | |
| "loss": 0.4089, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.31138643714708625, | |
| "grad_norm": 1.294661283493042, | |
| "learning_rate": 8.06560174541381e-07, | |
| "loss": 0.5231, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3118250095937723, | |
| "grad_norm": 1.3333851099014282, | |
| "learning_rate": 8.059986202962666e-07, | |
| "loss": 0.4307, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.3122635820404583, | |
| "grad_norm": 1.2841682434082031, | |
| "learning_rate": 8.054364482600405e-07, | |
| "loss": 0.4259, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.31270215448714433, | |
| "grad_norm": 1.257988452911377, | |
| "learning_rate": 8.048736595676916e-07, | |
| "loss": 0.4293, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.3131407269338304, | |
| "grad_norm": 1.2623865604400635, | |
| "learning_rate": 8.043102553554531e-07, | |
| "loss": 0.4311, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.31357929938051643, | |
| "grad_norm": 1.398133635520935, | |
| "learning_rate": 8.037462367608012e-07, | |
| "loss": 0.4415, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3140178718272025, | |
| "grad_norm": 1.3084006309509277, | |
| "learning_rate": 8.031816049224523e-07, | |
| "loss": 0.4699, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.31445644427388847, | |
| "grad_norm": 1.1205732822418213, | |
| "learning_rate": 8.026163609803611e-07, | |
| "loss": 0.4297, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.3148950167205745, | |
| "grad_norm": 1.3080092668533325, | |
| "learning_rate": 8.020505060757178e-07, | |
| "loss": 0.3942, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.31533358916726056, | |
| "grad_norm": 1.2918123006820679, | |
| "learning_rate": 8.014840413509464e-07, | |
| "loss": 0.4946, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.3157721616139466, | |
| "grad_norm": 1.257453441619873, | |
| "learning_rate": 8.009169679497019e-07, | |
| "loss": 0.4411, | |
| "step": 720 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2280, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 40, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.522795283589628e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |