| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.004443325581607889, |
| "eval_steps": 335, |
| "global_step": 1337, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 3.323354960065736e-06, |
| "grad_norm": 2.593838930130005, |
| "learning_rate": 2e-05, |
| "loss": 1.1307, |
| "step": 1 |
| }, |
| { |
| "epoch": 3.323354960065736e-06, |
| "eval_loss": 0.82511967420578, |
| "eval_runtime": 3228.7238, |
| "eval_samples_per_second": 39.24, |
| "eval_steps_per_second": 19.62, |
| "step": 1 |
| }, |
| { |
| "epoch": 6.646709920131472e-06, |
| "grad_norm": 1.9493608474731445, |
| "learning_rate": 4e-05, |
| "loss": 0.8578, |
| "step": 2 |
| }, |
| { |
| "epoch": 9.970064880197207e-06, |
| "grad_norm": 2.583665370941162, |
| "learning_rate": 6e-05, |
| "loss": 0.6069, |
| "step": 3 |
| }, |
| { |
| "epoch": 1.3293419840262944e-05, |
| "grad_norm": 3.607430934906006, |
| "learning_rate": 8e-05, |
| "loss": 0.7483, |
| "step": 4 |
| }, |
| { |
| "epoch": 1.661677480032868e-05, |
| "grad_norm": 2.763707399368286, |
| "learning_rate": 0.0001, |
| "loss": 1.0399, |
| "step": 5 |
| }, |
| { |
| "epoch": 1.9940129760394414e-05, |
| "grad_norm": 6.582963466644287, |
| "learning_rate": 0.00012, |
| "loss": 1.3938, |
| "step": 6 |
| }, |
| { |
| "epoch": 2.326348472046015e-05, |
| "grad_norm": 3.642049551010132, |
| "learning_rate": 0.00014, |
| "loss": 0.6619, |
| "step": 7 |
| }, |
| { |
| "epoch": 2.6586839680525888e-05, |
| "grad_norm": 2.402776002883911, |
| "learning_rate": 0.00016, |
| "loss": 0.9859, |
| "step": 8 |
| }, |
| { |
| "epoch": 2.9910194640591625e-05, |
| "grad_norm": 3.6133182048797607, |
| "learning_rate": 0.00018, |
| "loss": 1.3822, |
| "step": 9 |
| }, |
| { |
| "epoch": 3.323354960065736e-05, |
| "grad_norm": 2.660515546798706, |
| "learning_rate": 0.0002, |
| "loss": 0.9685, |
| "step": 10 |
| }, |
| { |
| "epoch": 3.65569045607231e-05, |
| "grad_norm": 3.0625710487365723, |
| "learning_rate": 0.0001999997197615636, |
| "loss": 1.1394, |
| "step": 11 |
| }, |
| { |
| "epoch": 3.988025952078883e-05, |
| "grad_norm": 3.345867872238159, |
| "learning_rate": 0.0001999988790478251, |
| "loss": 0.7105, |
| "step": 12 |
| }, |
| { |
| "epoch": 4.3203614480854566e-05, |
| "grad_norm": 3.0021090507507324, |
| "learning_rate": 0.00019999747786349646, |
| "loss": 0.7867, |
| "step": 13 |
| }, |
| { |
| "epoch": 4.65269694409203e-05, |
| "grad_norm": 2.038267135620117, |
| "learning_rate": 0.000199995516216431, |
| "loss": 0.7967, |
| "step": 14 |
| }, |
| { |
| "epoch": 4.985032440098604e-05, |
| "grad_norm": 2.1804399490356445, |
| "learning_rate": 0.00019999299411762334, |
| "loss": 0.6677, |
| "step": 15 |
| }, |
| { |
| "epoch": 5.3173679361051776e-05, |
| "grad_norm": 2.740802526473999, |
| "learning_rate": 0.0001999899115812092, |
| "loss": 0.6612, |
| "step": 16 |
| }, |
| { |
| "epoch": 5.649703432111751e-05, |
| "grad_norm": 2.2491555213928223, |
| "learning_rate": 0.00019998626862446556, |
| "loss": 0.7864, |
| "step": 17 |
| }, |
| { |
| "epoch": 5.982038928118325e-05, |
| "grad_norm": 2.0781562328338623, |
| "learning_rate": 0.0001999820652678103, |
| "loss": 0.9538, |
| "step": 18 |
| }, |
| { |
| "epoch": 6.314374424124898e-05, |
| "grad_norm": 1.8750240802764893, |
| "learning_rate": 0.00019997730153480228, |
| "loss": 0.6619, |
| "step": 19 |
| }, |
| { |
| "epoch": 6.646709920131472e-05, |
| "grad_norm": 1.6040363311767578, |
| "learning_rate": 0.00019997197745214108, |
| "loss": 0.7326, |
| "step": 20 |
| }, |
| { |
| "epoch": 6.979045416138045e-05, |
| "grad_norm": 2.0579588413238525, |
| "learning_rate": 0.000199966093049667, |
| "loss": 0.7618, |
| "step": 21 |
| }, |
| { |
| "epoch": 7.31138091214462e-05, |
| "grad_norm": 2.5934383869171143, |
| "learning_rate": 0.00019995964836036075, |
| "loss": 1.2188, |
| "step": 22 |
| }, |
| { |
| "epoch": 7.643716408151193e-05, |
| "grad_norm": 1.8951530456542969, |
| "learning_rate": 0.00019995264342034328, |
| "loss": 0.8864, |
| "step": 23 |
| }, |
| { |
| "epoch": 7.976051904157766e-05, |
| "grad_norm": 2.207963705062866, |
| "learning_rate": 0.00019994507826887574, |
| "loss": 0.6334, |
| "step": 24 |
| }, |
| { |
| "epoch": 8.30838740016434e-05, |
| "grad_norm": 1.5225898027420044, |
| "learning_rate": 0.00019993695294835898, |
| "loss": 0.5909, |
| "step": 25 |
| }, |
| { |
| "epoch": 8.640722896170913e-05, |
| "grad_norm": 2.108294725418091, |
| "learning_rate": 0.00019992826750433356, |
| "loss": 0.7502, |
| "step": 26 |
| }, |
| { |
| "epoch": 8.973058392177487e-05, |
| "grad_norm": 2.806995153427124, |
| "learning_rate": 0.00019991902198547942, |
| "loss": 0.9968, |
| "step": 27 |
| }, |
| { |
| "epoch": 9.30539388818406e-05, |
| "grad_norm": 2.3967955112457275, |
| "learning_rate": 0.00019990921644361546, |
| "loss": 0.5702, |
| "step": 28 |
| }, |
| { |
| "epoch": 9.637729384190635e-05, |
| "grad_norm": 3.112006187438965, |
| "learning_rate": 0.0001998988509336996, |
| "loss": 0.9814, |
| "step": 29 |
| }, |
| { |
| "epoch": 9.970064880197208e-05, |
| "grad_norm": 3.5534136295318604, |
| "learning_rate": 0.00019988792551382806, |
| "loss": 1.1793, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00010302400376203781, |
| "grad_norm": 2.224628448486328, |
| "learning_rate": 0.0001998764402452353, |
| "loss": 0.7663, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.00010634735872210355, |
| "grad_norm": 4.616275310516357, |
| "learning_rate": 0.0001998643951922936, |
| "loss": 1.038, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.00010967071368216928, |
| "grad_norm": 2.729283332824707, |
| "learning_rate": 0.00019985179042251267, |
| "loss": 1.1306, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00011299406864223503, |
| "grad_norm": 2.5778391361236572, |
| "learning_rate": 0.00019983862600653936, |
| "loss": 0.9213, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.00011631742360230076, |
| "grad_norm": 2.805159568786621, |
| "learning_rate": 0.00019982490201815716, |
| "loss": 1.2159, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0001196407785623665, |
| "grad_norm": 3.2575483322143555, |
| "learning_rate": 0.00019981061853428588, |
| "loss": 1.14, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.00012296413352243224, |
| "grad_norm": 2.3567512035369873, |
| "learning_rate": 0.00019979577563498108, |
| "loss": 0.5381, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.00012628748848249796, |
| "grad_norm": 2.3204903602600098, |
| "learning_rate": 0.00019978037340343384, |
| "loss": 0.7389, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0001296108434425637, |
| "grad_norm": 2.385420083999634, |
| "learning_rate": 0.00019976441192597012, |
| "loss": 0.9223, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.00013293419840262945, |
| "grad_norm": 2.0204904079437256, |
| "learning_rate": 0.00019974789129205024, |
| "loss": 1.0554, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00013625755336269516, |
| "grad_norm": 3.0869300365448, |
| "learning_rate": 0.00019973081159426856, |
| "loss": 1.0152, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0001395809083227609, |
| "grad_norm": 2.0119707584381104, |
| "learning_rate": 0.0001997131729283529, |
| "loss": 0.7922, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.00014290426328282665, |
| "grad_norm": 2.9331247806549072, |
| "learning_rate": 0.00019969497539316374, |
| "loss": 0.9397, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0001462276182428924, |
| "grad_norm": 1.8829960823059082, |
| "learning_rate": 0.00019967621909069424, |
| "loss": 0.7462, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0001495509732029581, |
| "grad_norm": 1.8125828504562378, |
| "learning_rate": 0.00019965690412606906, |
| "loss": 0.9771, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.00015287432816302385, |
| "grad_norm": 1.8464730978012085, |
| "learning_rate": 0.00019963703060754407, |
| "loss": 1.0249, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0001561976831230896, |
| "grad_norm": 1.6242536306381226, |
| "learning_rate": 0.0001996165986465058, |
| "loss": 0.7156, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.00015952103808315532, |
| "grad_norm": 2.4028480052948, |
| "learning_rate": 0.00019959560835747066, |
| "loss": 0.7822, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.00016284439304322106, |
| "grad_norm": 1.5756831169128418, |
| "learning_rate": 0.00019957405985808436, |
| "loss": 1.0894, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0001661677480032868, |
| "grad_norm": 1.5813134908676147, |
| "learning_rate": 0.00019955195326912123, |
| "loss": 0.9756, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00016949110296335255, |
| "grad_norm": 2.885624408721924, |
| "learning_rate": 0.00019952928871448363, |
| "loss": 1.2237, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.00017281445792341826, |
| "grad_norm": 2.5551652908325195, |
| "learning_rate": 0.00019950606632120112, |
| "loss": 0.8768, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.000176137812883484, |
| "grad_norm": 1.6047443151474, |
| "learning_rate": 0.00019948228621942984, |
| "loss": 1.289, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.00017946116784354975, |
| "grad_norm": 3.01027250289917, |
| "learning_rate": 0.00019945794854245178, |
| "loss": 0.8267, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.00018278452280361547, |
| "grad_norm": 2.2793943881988525, |
| "learning_rate": 0.000199433053426674, |
| "loss": 0.7214, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0001861078777636812, |
| "grad_norm": 1.9147419929504395, |
| "learning_rate": 0.00019940760101162783, |
| "loss": 1.3664, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.00018943123272374695, |
| "grad_norm": 1.797659993171692, |
| "learning_rate": 0.0001993815914399682, |
| "loss": 0.9188, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0001927545876838127, |
| "grad_norm": 1.291646122932434, |
| "learning_rate": 0.00019935502485747273, |
| "loss": 1.1547, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.00019607794264387841, |
| "grad_norm": 4.202106475830078, |
| "learning_rate": 0.00019932790141304096, |
| "loss": 0.4626, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.00019940129760394416, |
| "grad_norm": 2.5573880672454834, |
| "learning_rate": 0.00019930022125869357, |
| "loss": 0.6246, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0002027246525640099, |
| "grad_norm": 2.0912930965423584, |
| "learning_rate": 0.00019927198454957137, |
| "loss": 0.9089, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.00020604800752407562, |
| "grad_norm": 2.2763779163360596, |
| "learning_rate": 0.0001992431914439346, |
| "loss": 0.7671, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.00020937136248414136, |
| "grad_norm": 1.7758549451828003, |
| "learning_rate": 0.00019921384210316197, |
| "loss": 0.6254, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0002126947174442071, |
| "grad_norm": 2.030876398086548, |
| "learning_rate": 0.0001991839366917497, |
| "loss": 1.4996, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.00021601807240427285, |
| "grad_norm": 3.710184097290039, |
| "learning_rate": 0.0001991534753773108, |
| "loss": 1.0784, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.00021934142736433857, |
| "grad_norm": 2.3565335273742676, |
| "learning_rate": 0.0001991224583305738, |
| "loss": 1.3819, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0002226647823244043, |
| "grad_norm": 1.755561351776123, |
| "learning_rate": 0.00019909088572538214, |
| "loss": 0.605, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.00022598813728447005, |
| "grad_norm": 2.827974319458008, |
| "learning_rate": 0.00019905875773869292, |
| "loss": 1.2538, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.00022931149224453577, |
| "grad_norm": 1.7999393939971924, |
| "learning_rate": 0.00019902607455057612, |
| "loss": 0.8563, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0002326348472046015, |
| "grad_norm": 1.5709620714187622, |
| "learning_rate": 0.00019899283634421342, |
| "loss": 0.89, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00023595820216466726, |
| "grad_norm": 2.1778719425201416, |
| "learning_rate": 0.00019895904330589724, |
| "loss": 0.641, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.000239281557124733, |
| "grad_norm": 5.8329176902771, |
| "learning_rate": 0.00019892469562502984, |
| "loss": 1.005, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.00024260491208479872, |
| "grad_norm": 2.6885933876037598, |
| "learning_rate": 0.00019888979349412197, |
| "loss": 0.6898, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0002459282670448645, |
| "grad_norm": 3.8890256881713867, |
| "learning_rate": 0.000198854337108792, |
| "loss": 1.1845, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0002492516220049302, |
| "grad_norm": 2.175297260284424, |
| "learning_rate": 0.0001988183266677648, |
| "loss": 0.5369, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0002525749769649959, |
| "grad_norm": 3.844905138015747, |
| "learning_rate": 0.00019878176237287054, |
| "loss": 1.4376, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.00025589833192506166, |
| "grad_norm": 2.4426724910736084, |
| "learning_rate": 0.00019874464442904362, |
| "loss": 0.7001, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0002592216868851274, |
| "grad_norm": 1.2583106756210327, |
| "learning_rate": 0.00019870697304432154, |
| "loss": 0.5028, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.00026254504184519315, |
| "grad_norm": 2.751328706741333, |
| "learning_rate": 0.00019866874842984372, |
| "loss": 0.6537, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0002658683968052589, |
| "grad_norm": 3.057467222213745, |
| "learning_rate": 0.0001986299707998503, |
| "loss": 0.8409, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00026919175176532464, |
| "grad_norm": 3.0413947105407715, |
| "learning_rate": 0.0001985906403716809, |
| "loss": 0.7247, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.00027251510672539033, |
| "grad_norm": 1.5867135524749756, |
| "learning_rate": 0.00019855075736577345, |
| "loss": 0.8003, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.00027583846168545607, |
| "grad_norm": 2.737339496612549, |
| "learning_rate": 0.00019851032200566301, |
| "loss": 0.9347, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0002791618166455218, |
| "grad_norm": 2.1772475242614746, |
| "learning_rate": 0.00019846933451798043, |
| "loss": 0.9581, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.00028248517160558756, |
| "grad_norm": 2.1294620037078857, |
| "learning_rate": 0.00019842779513245105, |
| "loss": 0.6898, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0002858085265656533, |
| "grad_norm": 1.3660725355148315, |
| "learning_rate": 0.00019838570408189357, |
| "loss": 0.7278, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.00028913188152571905, |
| "grad_norm": 1.9248489141464233, |
| "learning_rate": 0.00019834306160221857, |
| "loss": 0.4563, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0002924552364857848, |
| "grad_norm": 2.1242995262145996, |
| "learning_rate": 0.00019829986793242727, |
| "loss": 1.1302, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0002957785914458505, |
| "grad_norm": 1.845307469367981, |
| "learning_rate": 0.00019825612331461027, |
| "loss": 0.7694, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0002991019464059162, |
| "grad_norm": 2.346177339553833, |
| "learning_rate": 0.00019821182799394595, |
| "loss": 0.9094, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00030242530136598197, |
| "grad_norm": 1.6282812356948853, |
| "learning_rate": 0.0001981669822186994, |
| "loss": 0.3128, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0003057486563260477, |
| "grad_norm": 1.948099970817566, |
| "learning_rate": 0.00019812158624022075, |
| "loss": 1.2383, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.00030907201128611345, |
| "grad_norm": 2.2840962409973145, |
| "learning_rate": 0.000198075640312944, |
| "loss": 0.9425, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.0003123953662461792, |
| "grad_norm": 2.1497271060943604, |
| "learning_rate": 0.0001980291446943855, |
| "loss": 1.0945, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.00031571872120624494, |
| "grad_norm": 3.017632246017456, |
| "learning_rate": 0.00019798209964514237, |
| "loss": 0.8373, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.00031904207616631063, |
| "grad_norm": 2.1620545387268066, |
| "learning_rate": 0.00019793450542889124, |
| "loss": 0.9743, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0003223654311263764, |
| "grad_norm": 2.597017526626587, |
| "learning_rate": 0.0001978863623123867, |
| "loss": 0.8165, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0003256887860864421, |
| "grad_norm": 1.6289966106414795, |
| "learning_rate": 0.00019783767056545976, |
| "loss": 0.6275, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.00032901214104650786, |
| "grad_norm": 2.287369966506958, |
| "learning_rate": 0.00019778843046101643, |
| "loss": 0.717, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0003323354960065736, |
| "grad_norm": 3.0594425201416016, |
| "learning_rate": 0.00019773864227503612, |
| "loss": 1.1728, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00033565885096663935, |
| "grad_norm": 2.5068864822387695, |
| "learning_rate": 0.00019768830628657004, |
| "loss": 1.0075, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0003389822059267051, |
| "grad_norm": 2.005697727203369, |
| "learning_rate": 0.0001976374227777398, |
| "loss": 0.6371, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0003423055608867708, |
| "grad_norm": 1.5179638862609863, |
| "learning_rate": 0.00019758599203373574, |
| "loss": 0.5727, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0003456289158468365, |
| "grad_norm": 2.4509246349334717, |
| "learning_rate": 0.0001975340143428152, |
| "loss": 1.027, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.00034895227080690227, |
| "grad_norm": 1.57172429561615, |
| "learning_rate": 0.00019748148999630116, |
| "loss": 0.6933, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.000352275625766968, |
| "grad_norm": 2.8001174926757812, |
| "learning_rate": 0.00019742841928858048, |
| "loss": 0.5884, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.00035559898072703376, |
| "grad_norm": 1.4794889688491821, |
| "learning_rate": 0.00019737480251710209, |
| "loss": 0.5695, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0003589223356870995, |
| "grad_norm": 1.9946357011795044, |
| "learning_rate": 0.0001973206399823757, |
| "loss": 1.0238, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.00036224569064716524, |
| "grad_norm": 1.5432323217391968, |
| "learning_rate": 0.00019726593198796975, |
| "loss": 0.8537, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.00036556904560723093, |
| "grad_norm": 2.5868325233459473, |
| "learning_rate": 0.0001972106788405099, |
| "loss": 1.4079, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0003688924005672967, |
| "grad_norm": 1.8962173461914062, |
| "learning_rate": 0.00019715488084967727, |
| "loss": 0.6017, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0003722157555273624, |
| "grad_norm": 1.5898348093032837, |
| "learning_rate": 0.00019709853832820665, |
| "loss": 0.9094, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.00037553911048742816, |
| "grad_norm": 2.134303569793701, |
| "learning_rate": 0.0001970416515918849, |
| "loss": 0.7571, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0003788624654474939, |
| "grad_norm": 2.37445068359375, |
| "learning_rate": 0.000196984220959549, |
| "loss": 0.8375, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.00038218582040755965, |
| "grad_norm": 2.937535285949707, |
| "learning_rate": 0.00019692624675308435, |
| "loss": 0.9742, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0003855091753676254, |
| "grad_norm": 2.049798011779785, |
| "learning_rate": 0.000196867729297423, |
| "loss": 0.8425, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0003888325303276911, |
| "grad_norm": 2.130037546157837, |
| "learning_rate": 0.00019680866892054174, |
| "loss": 0.7941, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00039215588528775683, |
| "grad_norm": 1.7274867296218872, |
| "learning_rate": 0.00019674906595346027, |
| "loss": 0.635, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.00039547924024782257, |
| "grad_norm": 2.4574530124664307, |
| "learning_rate": 0.00019668892073023954, |
| "loss": 1.0262, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0003988025952078883, |
| "grad_norm": 3.3932619094848633, |
| "learning_rate": 0.00019662823358797951, |
| "loss": 1.021, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00040212595016795406, |
| "grad_norm": 3.7460312843322754, |
| "learning_rate": 0.0001965670048668177, |
| "loss": 0.666, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0004054493051280198, |
| "grad_norm": 2.45210337638855, |
| "learning_rate": 0.00019650523490992683, |
| "loss": 0.7837, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.00040877266008808555, |
| "grad_norm": 1.7856807708740234, |
| "learning_rate": 0.00019644292406351322, |
| "loss": 0.7383, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.00041209601504815124, |
| "grad_norm": 1.8551324605941772, |
| "learning_rate": 0.0001963800726768148, |
| "loss": 0.7183, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.000415419370008217, |
| "grad_norm": 1.6712207794189453, |
| "learning_rate": 0.00019631668110209907, |
| "loss": 1.2395, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0004187427249682827, |
| "grad_norm": 2.197774648666382, |
| "learning_rate": 0.00019625274969466106, |
| "loss": 0.5523, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.00042206607992834847, |
| "grad_norm": 2.847198009490967, |
| "learning_rate": 0.00019618827881282168, |
| "loss": 1.0058, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0004253894348884142, |
| "grad_norm": 4.338975429534912, |
| "learning_rate": 0.00019612326881792512, |
| "loss": 1.3433, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.00042871278984847995, |
| "grad_norm": 1.8630313873291016, |
| "learning_rate": 0.0001960577200743375, |
| "loss": 0.586, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0004320361448085457, |
| "grad_norm": 2.66900634765625, |
| "learning_rate": 0.0001959916329494443, |
| "loss": 0.9871, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0004353594997686114, |
| "grad_norm": 2.0445942878723145, |
| "learning_rate": 0.00019592500781364866, |
| "loss": 0.8108, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00043868285472867713, |
| "grad_norm": 2.0626654624938965, |
| "learning_rate": 0.00019585784504036894, |
| "loss": 1.0747, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0004420062096887429, |
| "grad_norm": 1.622671127319336, |
| "learning_rate": 0.00019579014500603704, |
| "loss": 1.1958, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0004453295646488086, |
| "grad_norm": 1.9433460235595703, |
| "learning_rate": 0.00019572190809009595, |
| "loss": 0.855, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.00044865291960887436, |
| "grad_norm": 1.5850857496261597, |
| "learning_rate": 0.00019565313467499783, |
| "loss": 0.842, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0004519762745689401, |
| "grad_norm": 1.862047553062439, |
| "learning_rate": 0.00019558382514620176, |
| "loss": 1.0577, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.00045529962952900585, |
| "grad_norm": 2.423488140106201, |
| "learning_rate": 0.00019551397989217158, |
| "loss": 0.9635, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.00045862298448907154, |
| "grad_norm": 1.421524167060852, |
| "learning_rate": 0.00019544359930437382, |
| "loss": 0.6248, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0004619463394491373, |
| "grad_norm": 2.424243211746216, |
| "learning_rate": 0.0001953726837772754, |
| "loss": 0.8793, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.000465269694409203, |
| "grad_norm": 1.9087179899215698, |
| "learning_rate": 0.0001953012337083415, |
| "loss": 1.0438, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00046859304936926877, |
| "grad_norm": 1.9860990047454834, |
| "learning_rate": 0.0001952292494980331, |
| "loss": 0.8836, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0004719164043293345, |
| "grad_norm": 1.922594428062439, |
| "learning_rate": 0.00019515673154980515, |
| "loss": 0.7314, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.00047523975928940026, |
| "grad_norm": 2.4743568897247314, |
| "learning_rate": 0.00019508368027010395, |
| "loss": 0.7213, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.000478563114249466, |
| "grad_norm": 2.92726731300354, |
| "learning_rate": 0.00019501009606836503, |
| "loss": 1.0991, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0004818864692095317, |
| "grad_norm": 3.6583187580108643, |
| "learning_rate": 0.00019493597935701086, |
| "loss": 1.1298, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.00048520982416959743, |
| "grad_norm": 1.9078856706619263, |
| "learning_rate": 0.00019486133055144839, |
| "loss": 0.9963, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0004885331791296632, |
| "grad_norm": 2.1639506816864014, |
| "learning_rate": 0.00019478615007006698, |
| "loss": 0.9563, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.000491856534089729, |
| "grad_norm": 3.626147985458374, |
| "learning_rate": 0.0001947104383342358, |
| "loss": 0.5332, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0004951798890497946, |
| "grad_norm": 2.0015130043029785, |
| "learning_rate": 0.00019463419576830164, |
| "loss": 0.4274, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0004985032440098604, |
| "grad_norm": 2.0864193439483643, |
| "learning_rate": 0.00019455742279958645, |
| "loss": 0.5563, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0005018265989699261, |
| "grad_norm": 2.014056921005249, |
| "learning_rate": 0.00019448011985838496, |
| "loss": 0.8894, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0005051499539299918, |
| "grad_norm": 2.897721529006958, |
| "learning_rate": 0.00019440228737796226, |
| "loss": 0.7464, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0005084733088900576, |
| "grad_norm": 1.6992590427398682, |
| "learning_rate": 0.00019432392579455142, |
| "loss": 1.0114, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.0005117966638501233, |
| "grad_norm": 2.061690092086792, |
| "learning_rate": 0.000194245035547351, |
| "loss": 0.8523, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0005151200188101891, |
| "grad_norm": 1.9756520986557007, |
| "learning_rate": 0.00019416561707852255, |
| "loss": 0.8939, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0005184433737702548, |
| "grad_norm": 3.0679516792297363, |
| "learning_rate": 0.00019408567083318827, |
| "loss": 0.8342, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0005217667287303206, |
| "grad_norm": 2.8347623348236084, |
| "learning_rate": 0.00019400519725942835, |
| "loss": 0.9611, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0005250900836903863, |
| "grad_norm": 1.632202386856079, |
| "learning_rate": 0.00019392419680827857, |
| "loss": 0.523, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.000528413438650452, |
| "grad_norm": 1.9706271886825562, |
| "learning_rate": 0.00019384266993372772, |
| "loss": 1.2523, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0005317367936105178, |
| "grad_norm": 1.5293477773666382, |
| "learning_rate": 0.0001937606170927151, |
| "loss": 0.9098, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0005350601485705835, |
| "grad_norm": 2.504441976547241, |
| "learning_rate": 0.00019367803874512785, |
| "loss": 0.6785, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.0005383835035306493, |
| "grad_norm": 2.0011444091796875, |
| "learning_rate": 0.00019359493535379857, |
| "loss": 0.7819, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0005417068584907149, |
| "grad_norm": 2.698025941848755, |
| "learning_rate": 0.0001935113073845025, |
| "loss": 1.2558, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0005450302134507807, |
| "grad_norm": 1.6795804500579834, |
| "learning_rate": 0.0001934271553059551, |
| "loss": 0.9447, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0005483535684108464, |
| "grad_norm": 2.05863094329834, |
| "learning_rate": 0.0001933424795898093, |
| "loss": 0.9352, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0005516769233709121, |
| "grad_norm": 2.321521520614624, |
| "learning_rate": 0.0001932572807106529, |
| "loss": 0.8903, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0005550002783309779, |
| "grad_norm": 1.3765385150909424, |
| "learning_rate": 0.00019317155914600593, |
| "loss": 1.0314, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.0005583236332910436, |
| "grad_norm": 2.1478731632232666, |
| "learning_rate": 0.0001930853153763179, |
| "loss": 1.0972, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0005616469882511094, |
| "grad_norm": 1.5712205171585083, |
| "learning_rate": 0.00019299854988496525, |
| "loss": 0.6613, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0005649703432111751, |
| "grad_norm": 2.106233596801758, |
| "learning_rate": 0.00019291126315824846, |
| "loss": 0.674, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0005682936981712409, |
| "grad_norm": 2.7500600814819336, |
| "learning_rate": 0.00019282345568538939, |
| "loss": 0.7301, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0005716170531313066, |
| "grad_norm": 2.4417264461517334, |
| "learning_rate": 0.00019273512795852873, |
| "loss": 0.8071, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0005749404080913723, |
| "grad_norm": 2.697852849960327, |
| "learning_rate": 0.00019264628047272289, |
| "loss": 0.7752, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0005782637630514381, |
| "grad_norm": 2.2733891010284424, |
| "learning_rate": 0.00019255691372594148, |
| "loss": 0.7162, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0005815871180115038, |
| "grad_norm": 2.801717758178711, |
| "learning_rate": 0.0001924670282190645, |
| "loss": 0.9177, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0005849104729715696, |
| "grad_norm": 2.3935840129852295, |
| "learning_rate": 0.00019237662445587936, |
| "loss": 1.0933, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0005882338279316352, |
| "grad_norm": 3.074425220489502, |
| "learning_rate": 0.00019228570294307828, |
| "loss": 0.7686, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.000591557182891701, |
| "grad_norm": 2.155723810195923, |
| "learning_rate": 0.00019219426419025534, |
| "loss": 0.9334, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0005948805378517667, |
| "grad_norm": 2.251619577407837, |
| "learning_rate": 0.00019210230870990352, |
| "loss": 0.7433, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0005982038928118324, |
| "grad_norm": 1.779147982597351, |
| "learning_rate": 0.0001920098370174121, |
| "loss": 0.8434, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0006015272477718982, |
| "grad_norm": 1.5720431804656982, |
| "learning_rate": 0.00019191684963106349, |
| "loss": 1.1236, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0006048506027319639, |
| "grad_norm": 2.1575276851654053, |
| "learning_rate": 0.0001918233470720305, |
| "loss": 0.6812, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0006081739576920297, |
| "grad_norm": 2.104848623275757, |
| "learning_rate": 0.00019172932986437333, |
| "loss": 0.8865, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0006114973126520954, |
| "grad_norm": 2.6855051517486572, |
| "learning_rate": 0.00019163479853503672, |
| "loss": 0.864, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0006148206676121612, |
| "grad_norm": 6.135538578033447, |
| "learning_rate": 0.00019153975361384687, |
| "loss": 1.0433, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0006181440225722269, |
| "grad_norm": 1.9011839628219604, |
| "learning_rate": 0.00019144419563350858, |
| "loss": 1.0538, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0006214673775322927, |
| "grad_norm": 1.792335033416748, |
| "learning_rate": 0.00019134812512960233, |
| "loss": 0.9006, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0006247907324923584, |
| "grad_norm": 2.9424028396606445, |
| "learning_rate": 0.00019125154264058094, |
| "loss": 1.0332, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0006281140874524241, |
| "grad_norm": 2.4385499954223633, |
| "learning_rate": 0.00019115444870776695, |
| "loss": 0.3531, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.0006314374424124899, |
| "grad_norm": 1.7163375616073608, |
| "learning_rate": 0.00019105684387534948, |
| "loss": 1.258, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0006347607973725555, |
| "grad_norm": 2.8776745796203613, |
| "learning_rate": 0.00019095872869038098, |
| "loss": 0.5629, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0006380841523326213, |
| "grad_norm": 2.397474765777588, |
| "learning_rate": 0.00019086010370277437, |
| "loss": 1.0474, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.000641407507292687, |
| "grad_norm": 2.938826084136963, |
| "learning_rate": 0.00019076096946529992, |
| "loss": 1.0489, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.0006447308622527527, |
| "grad_norm": 1.9875274896621704, |
| "learning_rate": 0.0001906613265335821, |
| "loss": 1.1448, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0006480542172128185, |
| "grad_norm": 2.6492698192596436, |
| "learning_rate": 0.00019056117546609647, |
| "loss": 1.0026, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0006513775721728842, |
| "grad_norm": 1.7564932107925415, |
| "learning_rate": 0.00019046051682416662, |
| "loss": 1.268, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.00065470092713295, |
| "grad_norm": 2.032909631729126, |
| "learning_rate": 0.00019035935117196097, |
| "loss": 0.6964, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.0006580242820930157, |
| "grad_norm": 3.4252007007598877, |
| "learning_rate": 0.00019025767907648958, |
| "loss": 1.0756, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0006613476370530815, |
| "grad_norm": 2.259429454803467, |
| "learning_rate": 0.00019015550110760106, |
| "loss": 1.1985, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0006646709920131472, |
| "grad_norm": 2.154845714569092, |
| "learning_rate": 0.00019005281783797927, |
| "loss": 0.8484, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.000667994346973213, |
| "grad_norm": 1.7857868671417236, |
| "learning_rate": 0.0001899496298431402, |
| "loss": 1.178, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.0006713177019332787, |
| "grad_norm": 1.5021724700927734, |
| "learning_rate": 0.0001898459377014287, |
| "loss": 0.9586, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.0006746410568933444, |
| "grad_norm": 2.9662904739379883, |
| "learning_rate": 0.00018974174199401525, |
| "loss": 1.0109, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.0006779644118534102, |
| "grad_norm": 2.3649721145629883, |
| "learning_rate": 0.00018963704330489262, |
| "loss": 0.7838, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.0006812877668134758, |
| "grad_norm": 2.1048192977905273, |
| "learning_rate": 0.00018953184222087285, |
| "loss": 1.3349, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0006846111217735416, |
| "grad_norm": 2.5267252922058105, |
| "learning_rate": 0.00018942613933158365, |
| "loss": 1.2447, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.0006879344767336073, |
| "grad_norm": 2.2604713439941406, |
| "learning_rate": 0.00018931993522946526, |
| "loss": 0.9698, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.000691257831693673, |
| "grad_norm": 2.060702085494995, |
| "learning_rate": 0.00018921323050976712, |
| "loss": 1.1335, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0006945811866537388, |
| "grad_norm": 1.4325411319732666, |
| "learning_rate": 0.0001891060257705445, |
| "loss": 0.8489, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.0006979045416138045, |
| "grad_norm": 1.696060299873352, |
| "learning_rate": 0.00018899832161265514, |
| "loss": 1.0021, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0007012278965738703, |
| "grad_norm": 2.194124221801758, |
| "learning_rate": 0.000188890118639756, |
| "loss": 0.8379, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.000704551251533936, |
| "grad_norm": 2.39493727684021, |
| "learning_rate": 0.00018878141745829965, |
| "loss": 0.8481, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.0007078746064940018, |
| "grad_norm": 2.1452245712280273, |
| "learning_rate": 0.0001886722186775311, |
| "loss": 0.7282, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.0007111979614540675, |
| "grad_norm": 4.161812782287598, |
| "learning_rate": 0.0001885625229094843, |
| "loss": 1.1232, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.0007145213164141333, |
| "grad_norm": 2.658095359802246, |
| "learning_rate": 0.00018845233076897864, |
| "loss": 0.7978, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.000717844671374199, |
| "grad_norm": 2.6832375526428223, |
| "learning_rate": 0.00018834164287361553, |
| "loss": 1.0845, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.0007211680263342647, |
| "grad_norm": 1.4918650388717651, |
| "learning_rate": 0.0001882304598437751, |
| "loss": 1.1464, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0007244913812943305, |
| "grad_norm": 2.185304641723633, |
| "learning_rate": 0.00018811878230261245, |
| "loss": 0.9031, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.0007278147362543961, |
| "grad_norm": 2.20607590675354, |
| "learning_rate": 0.00018800661087605444, |
| "loss": 0.7687, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0007311380912144619, |
| "grad_norm": 1.7606959342956543, |
| "learning_rate": 0.0001878939461927959, |
| "loss": 0.7721, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0007344614461745276, |
| "grad_norm": 1.9390268325805664, |
| "learning_rate": 0.0001877807888842964, |
| "loss": 0.7985, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.0007377848011345934, |
| "grad_norm": 1.9534977674484253, |
| "learning_rate": 0.00018766713958477644, |
| "loss": 0.7436, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.0007411081560946591, |
| "grad_norm": 2.0108580589294434, |
| "learning_rate": 0.00018755299893121404, |
| "loss": 0.8744, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0007444315110547248, |
| "grad_norm": 2.3469605445861816, |
| "learning_rate": 0.0001874383675633412, |
| "loss": 0.8103, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.0007477548660147906, |
| "grad_norm": 2.6010794639587402, |
| "learning_rate": 0.00018732324612364022, |
| "loss": 0.6754, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0007510782209748563, |
| "grad_norm": 2.374936819076538, |
| "learning_rate": 0.00018720763525734017, |
| "loss": 0.9731, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0007544015759349221, |
| "grad_norm": 1.8234738111495972, |
| "learning_rate": 0.00018709153561241316, |
| "loss": 0.9814, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0007577249308949878, |
| "grad_norm": 2.046567678451538, |
| "learning_rate": 0.0001869749478395709, |
| "loss": 1.3241, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.0007610482858550536, |
| "grad_norm": 2.928182601928711, |
| "learning_rate": 0.00018685787259226086, |
| "loss": 0.8543, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0007643716408151193, |
| "grad_norm": 3.1443798542022705, |
| "learning_rate": 0.00018674031052666272, |
| "loss": 0.7922, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.000767694995775185, |
| "grad_norm": 3.132957935333252, |
| "learning_rate": 0.00018662226230168472, |
| "loss": 0.732, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0007710183507352508, |
| "grad_norm": 4.542943477630615, |
| "learning_rate": 0.0001865037285789598, |
| "loss": 1.0421, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.0007743417056953165, |
| "grad_norm": 2.511564016342163, |
| "learning_rate": 0.0001863847100228421, |
| "loss": 1.0499, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.0007776650606553822, |
| "grad_norm": 2.294689655303955, |
| "learning_rate": 0.0001862652073004031, |
| "loss": 0.731, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.0007809884156154479, |
| "grad_norm": 2.206822395324707, |
| "learning_rate": 0.00018614522108142788, |
| "loss": 1.0678, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0007843117705755137, |
| "grad_norm": 1.5954980850219727, |
| "learning_rate": 0.00018602475203841152, |
| "loss": 1.0332, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.0007876351255355794, |
| "grad_norm": 2.328080177307129, |
| "learning_rate": 0.0001859038008465551, |
| "loss": 0.7923, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0007909584804956451, |
| "grad_norm": 3.6944761276245117, |
| "learning_rate": 0.00018578236818376207, |
| "loss": 0.68, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.0007942818354557109, |
| "grad_norm": 2.6158041954040527, |
| "learning_rate": 0.00018566045473063442, |
| "loss": 0.9602, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.0007976051904157766, |
| "grad_norm": 3.4564719200134277, |
| "learning_rate": 0.0001855380611704689, |
| "loss": 0.8778, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0008009285453758424, |
| "grad_norm": 2.708306074142456, |
| "learning_rate": 0.00018541518818925308, |
| "loss": 0.9371, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0008042519003359081, |
| "grad_norm": 2.519165515899658, |
| "learning_rate": 0.0001852918364756616, |
| "loss": 0.8222, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0008075752552959739, |
| "grad_norm": 2.298029899597168, |
| "learning_rate": 0.00018516800672105228, |
| "loss": 0.7957, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.0008108986102560396, |
| "grad_norm": 3.7880380153656006, |
| "learning_rate": 0.00018504369961946227, |
| "loss": 0.6957, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0008142219652161053, |
| "grad_norm": 1.516716718673706, |
| "learning_rate": 0.00018491891586760412, |
| "loss": 1.2474, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.0008175453201761711, |
| "grad_norm": 2.2521932125091553, |
| "learning_rate": 0.00018479365616486194, |
| "loss": 1.1017, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0008208686751362368, |
| "grad_norm": 2.2281289100646973, |
| "learning_rate": 0.0001846679212132873, |
| "loss": 0.8114, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0008241920300963025, |
| "grad_norm": 2.63161301612854, |
| "learning_rate": 0.00018454171171759562, |
| "loss": 0.562, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.0008275153850563682, |
| "grad_norm": 2.087157964706421, |
| "learning_rate": 0.0001844150283851619, |
| "loss": 0.9418, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.000830838740016434, |
| "grad_norm": 2.3542277812957764, |
| "learning_rate": 0.00018428787192601692, |
| "loss": 0.9547, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0008341620949764997, |
| "grad_norm": 2.7823688983917236, |
| "learning_rate": 0.00018416024305284318, |
| "loss": 1.1647, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0008374854499365654, |
| "grad_norm": 3.1965949535369873, |
| "learning_rate": 0.00018403214248097108, |
| "loss": 0.9733, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0008408088048966312, |
| "grad_norm": 2.3127360343933105, |
| "learning_rate": 0.00018390357092837464, |
| "loss": 0.6744, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0008441321598566969, |
| "grad_norm": 2.6081366539001465, |
| "learning_rate": 0.0001837745291156677, |
| "loss": 0.4712, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.0008474555148167627, |
| "grad_norm": 2.655521869659424, |
| "learning_rate": 0.00018364501776609978, |
| "loss": 0.8526, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.0008507788697768284, |
| "grad_norm": 2.5998477935791016, |
| "learning_rate": 0.000183515037605552, |
| "loss": 0.6903, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.0008541022247368942, |
| "grad_norm": 1.3501770496368408, |
| "learning_rate": 0.00018338458936253323, |
| "loss": 0.6203, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0008574255796969599, |
| "grad_norm": 2.938946008682251, |
| "learning_rate": 0.00018325367376817553, |
| "loss": 1.0738, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.0008607489346570257, |
| "grad_norm": 1.3566104173660278, |
| "learning_rate": 0.00018312229155623063, |
| "loss": 0.8905, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0008640722896170914, |
| "grad_norm": 1.6955430507659912, |
| "learning_rate": 0.0001829904434630654, |
| "loss": 0.8945, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0008673956445771571, |
| "grad_norm": 2.2695062160491943, |
| "learning_rate": 0.00018285813022765794, |
| "loss": 1.3401, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.0008707189995372228, |
| "grad_norm": 2.285875082015991, |
| "learning_rate": 0.00018272535259159333, |
| "loss": 0.5588, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.0008740423544972885, |
| "grad_norm": 2.5898141860961914, |
| "learning_rate": 0.0001825921112990595, |
| "loss": 0.5749, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0008773657094573543, |
| "grad_norm": 1.8657327890396118, |
| "learning_rate": 0.0001824584070968431, |
| "loss": 1.1243, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.00088068906441742, |
| "grad_norm": 3.04009747505188, |
| "learning_rate": 0.00018232424073432523, |
| "loss": 0.6605, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.0008840124193774857, |
| "grad_norm": 1.8760170936584473, |
| "learning_rate": 0.0001821896129634773, |
| "loss": 0.8554, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.0008873357743375515, |
| "grad_norm": 3.241586685180664, |
| "learning_rate": 0.00018205452453885692, |
| "loss": 1.3335, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.0008906591292976172, |
| "grad_norm": 2.223585367202759, |
| "learning_rate": 0.0001819189762176034, |
| "loss": 0.7643, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.000893982484257683, |
| "grad_norm": 2.3313605785369873, |
| "learning_rate": 0.00018178296875943377, |
| "loss": 0.9619, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.0008973058392177487, |
| "grad_norm": 2.2432057857513428, |
| "learning_rate": 0.00018164650292663837, |
| "loss": 0.9027, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0009006291941778145, |
| "grad_norm": 2.1724648475646973, |
| "learning_rate": 0.00018150957948407655, |
| "loss": 0.8207, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.0009039525491378802, |
| "grad_norm": 1.8221163749694824, |
| "learning_rate": 0.00018137219919917268, |
| "loss": 0.564, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.000907275904097946, |
| "grad_norm": 2.9687201976776123, |
| "learning_rate": 0.0001812343628419114, |
| "loss": 0.6431, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.0009105992590580117, |
| "grad_norm": 1.6138179302215576, |
| "learning_rate": 0.0001810960711848336, |
| "loss": 0.8986, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0009139226140180774, |
| "grad_norm": 2.7505204677581787, |
| "learning_rate": 0.0001809573250030321, |
| "loss": 0.559, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0009172459689781431, |
| "grad_norm": 1.9008560180664062, |
| "learning_rate": 0.00018081812507414707, |
| "loss": 0.7741, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.0009205693239382088, |
| "grad_norm": 1.8047677278518677, |
| "learning_rate": 0.00018067847217836204, |
| "loss": 0.8689, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.0009238926788982746, |
| "grad_norm": 2.580085039138794, |
| "learning_rate": 0.00018053836709839907, |
| "loss": 1.1604, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.0009272160338583403, |
| "grad_norm": 2.055727243423462, |
| "learning_rate": 0.00018039781061951482, |
| "loss": 0.8672, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.000930539388818406, |
| "grad_norm": 3.0733115673065186, |
| "learning_rate": 0.0001802568035294958, |
| "loss": 0.9887, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0009338627437784718, |
| "grad_norm": 2.3724474906921387, |
| "learning_rate": 0.00018011534661865414, |
| "loss": 0.8757, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.0009371860987385375, |
| "grad_norm": 2.3270065784454346, |
| "learning_rate": 0.00017997344067982314, |
| "loss": 0.8757, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.0009405094536986033, |
| "grad_norm": 1.917435646057129, |
| "learning_rate": 0.00017983108650835273, |
| "loss": 1.0723, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.000943832808658669, |
| "grad_norm": 2.466601848602295, |
| "learning_rate": 0.00017968828490210517, |
| "loss": 0.8578, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.0009471561636187348, |
| "grad_norm": 1.9273431301116943, |
| "learning_rate": 0.00017954503666145038, |
| "loss": 0.5289, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.0009504795185788005, |
| "grad_norm": 1.703373670578003, |
| "learning_rate": 0.00017940134258926166, |
| "loss": 0.9778, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.0009538028735388663, |
| "grad_norm": 2.1906208992004395, |
| "learning_rate": 0.00017925720349091107, |
| "loss": 1.0434, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.000957126228498932, |
| "grad_norm": 1.8268455266952515, |
| "learning_rate": 0.00017911262017426482, |
| "loss": 0.5991, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0009604495834589977, |
| "grad_norm": 2.182431697845459, |
| "learning_rate": 0.00017896759344967906, |
| "loss": 1.0621, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.0009637729384190634, |
| "grad_norm": 2.1958398818969727, |
| "learning_rate": 0.00017882212412999505, |
| "loss": 1.0184, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0009670962933791291, |
| "grad_norm": 2.3882758617401123, |
| "learning_rate": 0.0001786762130305346, |
| "loss": 0.9169, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.0009704196483391949, |
| "grad_norm": 2.064112901687622, |
| "learning_rate": 0.00017852986096909574, |
| "loss": 1.1755, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.0009737430032992606, |
| "grad_norm": 1.9005621671676636, |
| "learning_rate": 0.00017838306876594788, |
| "loss": 0.9455, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.0009770663582593265, |
| "grad_norm": 2.704106330871582, |
| "learning_rate": 0.00017823583724382744, |
| "loss": 0.8015, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.000980389713219392, |
| "grad_norm": 1.8319103717803955, |
| "learning_rate": 0.000178088167227933, |
| "loss": 1.1968, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.000983713068179458, |
| "grad_norm": 2.681913137435913, |
| "learning_rate": 0.00017794005954592084, |
| "loss": 0.7402, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.0009870364231395236, |
| "grad_norm": 1.8737455606460571, |
| "learning_rate": 0.00017779151502790027, |
| "loss": 1.0434, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.0009903597780995892, |
| "grad_norm": 2.2911226749420166, |
| "learning_rate": 0.00017764253450642902, |
| "loss": 0.562, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.000993683133059655, |
| "grad_norm": 2.5499420166015625, |
| "learning_rate": 0.00017749311881650837, |
| "loss": 1.1258, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.0009970064880197207, |
| "grad_norm": 1.6716042757034302, |
| "learning_rate": 0.00017734326879557876, |
| "loss": 0.5659, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0010003298429797866, |
| "grad_norm": 4.81651496887207, |
| "learning_rate": 0.00017719298528351489, |
| "loss": 0.9533, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.0010036531979398522, |
| "grad_norm": 2.0948708057403564, |
| "learning_rate": 0.00017704226912262108, |
| "loss": 0.9046, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.001006976552899918, |
| "grad_norm": 3.2504518032073975, |
| "learning_rate": 0.00017689112115762656, |
| "loss": 1.1282, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.0010102999078599837, |
| "grad_norm": 2.3687779903411865, |
| "learning_rate": 0.00017673954223568073, |
| "loss": 1.3698, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0010136232628200495, |
| "grad_norm": 1.7902189493179321, |
| "learning_rate": 0.00017658753320634835, |
| "loss": 1.0331, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.0010169466177801152, |
| "grad_norm": 2.5034868717193604, |
| "learning_rate": 0.00017643509492160493, |
| "loss": 0.9221, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.001020269972740181, |
| "grad_norm": 2.9678382873535156, |
| "learning_rate": 0.00017628222823583175, |
| "loss": 1.1739, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.0010235933277002467, |
| "grad_norm": 2.8975117206573486, |
| "learning_rate": 0.0001761289340058113, |
| "loss": 0.8452, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.0010269166826603125, |
| "grad_norm": 2.479252576828003, |
| "learning_rate": 0.0001759752130907222, |
| "loss": 0.6775, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0010302400376203781, |
| "grad_norm": 1.6846801042556763, |
| "learning_rate": 0.0001758210663521347, |
| "loss": 0.8358, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0010335633925804438, |
| "grad_norm": 1.4105985164642334, |
| "learning_rate": 0.00017566649465400555, |
| "loss": 1.1666, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.0010368867475405096, |
| "grad_norm": 1.7234609127044678, |
| "learning_rate": 0.00017551149886267347, |
| "loss": 1.0065, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.0010402101025005753, |
| "grad_norm": 2.2938060760498047, |
| "learning_rate": 0.00017535607984685392, |
| "loss": 0.7772, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.0010435334574606411, |
| "grad_norm": 1.7764406204223633, |
| "learning_rate": 0.00017520023847763456, |
| "loss": 0.5399, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.0010468568124207068, |
| "grad_norm": 3.057908058166504, |
| "learning_rate": 0.0001750439756284703, |
| "loss": 0.7325, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.0010501801673807726, |
| "grad_norm": 2.3120014667510986, |
| "learning_rate": 0.00017488729217517818, |
| "loss": 0.7843, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.0010535035223408382, |
| "grad_norm": 2.835329055786133, |
| "learning_rate": 0.00017473018899593276, |
| "loss": 0.7952, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.001056826877300904, |
| "grad_norm": 3.063936233520508, |
| "learning_rate": 0.00017457266697126103, |
| "loss": 1.0372, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.0010601502322609697, |
| "grad_norm": 2.0668740272521973, |
| "learning_rate": 0.0001744147269840375, |
| "loss": 1.2189, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.0010634735872210356, |
| "grad_norm": 1.467910885810852, |
| "learning_rate": 0.00017425636991947926, |
| "loss": 0.4099, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0010667969421811012, |
| "grad_norm": 2.1655707359313965, |
| "learning_rate": 0.00017409759666514108, |
| "loss": 1.064, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.001070120297141167, |
| "grad_norm": 2.4154369831085205, |
| "learning_rate": 0.00017393840811091025, |
| "loss": 0.827, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.0010734436521012327, |
| "grad_norm": 1.3061712980270386, |
| "learning_rate": 0.00017377880514900186, |
| "loss": 0.4219, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.0010767670070612986, |
| "grad_norm": 2.6192286014556885, |
| "learning_rate": 0.00017361878867395357, |
| "loss": 0.8404, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.0010800903620213642, |
| "grad_norm": 2.863708257675171, |
| "learning_rate": 0.00017345835958262076, |
| "loss": 0.7554, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.0010834137169814298, |
| "grad_norm": 3.0085036754608154, |
| "learning_rate": 0.00017329751877417132, |
| "loss": 0.8845, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.0010867370719414957, |
| "grad_norm": 2.4132509231567383, |
| "learning_rate": 0.00017313626715008087, |
| "loss": 0.7935, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0010900604269015613, |
| "grad_norm": 1.584530234336853, |
| "learning_rate": 0.00017297460561412737, |
| "loss": 0.8238, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.0010933837818616272, |
| "grad_norm": 3.4459643363952637, |
| "learning_rate": 0.0001728125350723864, |
| "loss": 0.7411, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.0010967071368216928, |
| "grad_norm": 1.6007535457611084, |
| "learning_rate": 0.00017265005643322584, |
| "loss": 1.0634, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0011000304917817587, |
| "grad_norm": 2.6975879669189453, |
| "learning_rate": 0.00017248717060730094, |
| "loss": 1.0681, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.0011033538467418243, |
| "grad_norm": 2.289832353591919, |
| "learning_rate": 0.00017232387850754904, |
| "loss": 0.6341, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.0011066772017018901, |
| "grad_norm": 1.5257370471954346, |
| "learning_rate": 0.00017216018104918455, |
| "loss": 0.5519, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.0011100005566619558, |
| "grad_norm": 3.0421066284179688, |
| "learning_rate": 0.00017199607914969396, |
| "loss": 1.0758, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.0011133239116220216, |
| "grad_norm": 2.0122852325439453, |
| "learning_rate": 0.0001718315737288304, |
| "loss": 0.7425, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0011133239116220216, |
| "eval_loss": 0.8063258528709412, |
| "eval_runtime": 3233.4173, |
| "eval_samples_per_second": 39.183, |
| "eval_steps_per_second": 19.592, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0011166472665820873, |
| "grad_norm": 2.3147132396698, |
| "learning_rate": 0.0001716666657086087, |
| "loss": 1.1424, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.0011199706215421531, |
| "grad_norm": 3.1851842403411865, |
| "learning_rate": 0.00017150135601330023, |
| "loss": 0.7141, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.0011232939765022187, |
| "grad_norm": 2.0654189586639404, |
| "learning_rate": 0.00017133564556942753, |
| "loss": 1.054, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.0011266173314622846, |
| "grad_norm": 2.193204402923584, |
| "learning_rate": 0.0001711695353057594, |
| "loss": 0.4185, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0011299406864223502, |
| "grad_norm": 2.66573166847229, |
| "learning_rate": 0.00017100302615330538, |
| "loss": 1.0129, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0011332640413824159, |
| "grad_norm": 1.7640445232391357, |
| "learning_rate": 0.00017083611904531077, |
| "loss": 0.9847, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.0011365873963424817, |
| "grad_norm": 2.9297056198120117, |
| "learning_rate": 0.00017066881491725137, |
| "loss": 0.5878, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.0011399107513025474, |
| "grad_norm": 2.2123494148254395, |
| "learning_rate": 0.00017050111470682806, |
| "loss": 1.0239, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.0011432341062626132, |
| "grad_norm": 3.016484498977661, |
| "learning_rate": 0.00017033301935396176, |
| "loss": 0.8833, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.0011465574612226788, |
| "grad_norm": 2.979496717453003, |
| "learning_rate": 0.00017016452980078803, |
| "loss": 0.9933, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.0011498808161827447, |
| "grad_norm": 3.2114806175231934, |
| "learning_rate": 0.00016999564699165184, |
| "loss": 1.1358, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.0011532041711428103, |
| "grad_norm": 2.6296467781066895, |
| "learning_rate": 0.00016982637187310236, |
| "loss": 0.975, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0011565275261028762, |
| "grad_norm": 1.9657410383224487, |
| "learning_rate": 0.0001696567053938874, |
| "loss": 1.032, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.0011598508810629418, |
| "grad_norm": 2.1968419551849365, |
| "learning_rate": 0.0001694866485049483, |
| "loss": 1.0387, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.0011631742360230077, |
| "grad_norm": 1.9351694583892822, |
| "learning_rate": 0.0001693162021594147, |
| "loss": 1.0679, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0011664975909830733, |
| "grad_norm": 2.5958166122436523, |
| "learning_rate": 0.0001691453673125989, |
| "loss": 0.8852, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0011698209459431392, |
| "grad_norm": 2.6189615726470947, |
| "learning_rate": 0.00016897414492199068, |
| "loss": 0.8748, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.0011731443009032048, |
| "grad_norm": 2.212649345397949, |
| "learning_rate": 0.00016880253594725195, |
| "loss": 0.8637, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.0011764676558632704, |
| "grad_norm": 2.4734244346618652, |
| "learning_rate": 0.0001686305413502114, |
| "loss": 0.6893, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.0011797910108233363, |
| "grad_norm": 2.0774455070495605, |
| "learning_rate": 0.00016845816209485885, |
| "loss": 1.0388, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.001183114365783402, |
| "grad_norm": 2.198000192642212, |
| "learning_rate": 0.00016828539914734024, |
| "loss": 0.3543, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.0011864377207434678, |
| "grad_norm": 3.1604952812194824, |
| "learning_rate": 0.0001681122534759519, |
| "loss": 1.2221, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.0011897610757035334, |
| "grad_norm": 1.7176005840301514, |
| "learning_rate": 0.0001679387260511353, |
| "loss": 1.0015, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.0011930844306635993, |
| "grad_norm": 1.6220836639404297, |
| "learning_rate": 0.0001677648178454715, |
| "loss": 1.1473, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.0011964077856236649, |
| "grad_norm": 1.8516310453414917, |
| "learning_rate": 0.0001675905298336758, |
| "loss": 1.0743, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0011997311405837307, |
| "grad_norm": 1.4496151208877563, |
| "learning_rate": 0.00016741586299259215, |
| "loss": 1.1718, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.0012030544955437964, |
| "grad_norm": 2.137094497680664, |
| "learning_rate": 0.00016724081830118786, |
| "loss": 0.9015, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.0012063778505038622, |
| "grad_norm": 2.0566606521606445, |
| "learning_rate": 0.0001670653967405479, |
| "loss": 0.6874, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.0012097012054639279, |
| "grad_norm": 1.8700917959213257, |
| "learning_rate": 0.00016688959929386958, |
| "loss": 0.9722, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.0012130245604239937, |
| "grad_norm": 2.447117328643799, |
| "learning_rate": 0.0001667134269464569, |
| "loss": 0.7384, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.0012163479153840594, |
| "grad_norm": 1.885358452796936, |
| "learning_rate": 0.00016653688068571514, |
| "loss": 1.0383, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.0012196712703441252, |
| "grad_norm": 2.7440185546875, |
| "learning_rate": 0.00016635996150114526, |
| "loss": 0.916, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.0012229946253041908, |
| "grad_norm": 2.909104347229004, |
| "learning_rate": 0.00016618267038433836, |
| "loss": 1.2791, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.0012263179802642565, |
| "grad_norm": 2.072293519973755, |
| "learning_rate": 0.00016600500832897016, |
| "loss": 0.9197, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.0012296413352243223, |
| "grad_norm": 2.057619333267212, |
| "learning_rate": 0.0001658269763307954, |
| "loss": 1.1913, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.001232964690184388, |
| "grad_norm": 2.2635629177093506, |
| "learning_rate": 0.00016564857538764222, |
| "loss": 0.4376, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.0012362880451444538, |
| "grad_norm": 2.7420809268951416, |
| "learning_rate": 0.00016546980649940668, |
| "loss": 0.9921, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.0012396114001045194, |
| "grad_norm": 2.0869131088256836, |
| "learning_rate": 0.0001652906706680471, |
| "loss": 0.8394, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.0012429347550645853, |
| "grad_norm": 2.557790756225586, |
| "learning_rate": 0.00016511116889757825, |
| "loss": 0.444, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.001246258110024651, |
| "grad_norm": 3.326352596282959, |
| "learning_rate": 0.00016493130219406615, |
| "loss": 1.2891, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0012495814649847168, |
| "grad_norm": 2.856938123703003, |
| "learning_rate": 0.00016475107156562206, |
| "loss": 0.7033, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.0012529048199447824, |
| "grad_norm": 2.09859037399292, |
| "learning_rate": 0.00016457047802239698, |
| "loss": 0.7436, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.0012562281749048483, |
| "grad_norm": 2.375894069671631, |
| "learning_rate": 0.0001643895225765759, |
| "loss": 0.7112, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.001259551529864914, |
| "grad_norm": 2.7644810676574707, |
| "learning_rate": 0.00016420820624237227, |
| "loss": 0.9923, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.0012628748848249798, |
| "grad_norm": 1.7642817497253418, |
| "learning_rate": 0.0001640265300360222, |
| "loss": 0.6766, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0012661982397850454, |
| "grad_norm": 1.8854440450668335, |
| "learning_rate": 0.00016384449497577888, |
| "loss": 1.0343, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.001269521594745111, |
| "grad_norm": 2.546734571456909, |
| "learning_rate": 0.00016366210208190664, |
| "loss": 1.1507, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.0012728449497051769, |
| "grad_norm": 2.9949944019317627, |
| "learning_rate": 0.00016347935237667546, |
| "loss": 0.5915, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.0012761683046652425, |
| "grad_norm": 2.4460384845733643, |
| "learning_rate": 0.00016329624688435527, |
| "loss": 0.7061, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.0012794916596253084, |
| "grad_norm": 2.156528949737549, |
| "learning_rate": 0.0001631127866312099, |
| "loss": 1.0332, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.001282815014585374, |
| "grad_norm": 4.042215824127197, |
| "learning_rate": 0.00016292897264549172, |
| "loss": 0.9354, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.0012861383695454399, |
| "grad_norm": 2.446014165878296, |
| "learning_rate": 0.00016274480595743554, |
| "loss": 1.0042, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.0012894617245055055, |
| "grad_norm": 3.4464094638824463, |
| "learning_rate": 0.00016256028759925313, |
| "loss": 0.9816, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.0012927850794655713, |
| "grad_norm": 2.033539295196533, |
| "learning_rate": 0.00016237541860512713, |
| "loss": 0.7215, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.001296108434425637, |
| "grad_norm": 2.1186728477478027, |
| "learning_rate": 0.00016219020001120556, |
| "loss": 1.2443, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0012994317893857028, |
| "grad_norm": 2.625006675720215, |
| "learning_rate": 0.00016200463285559574, |
| "loss": 1.0826, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0013027551443457685, |
| "grad_norm": 2.270512104034424, |
| "learning_rate": 0.00016181871817835876, |
| "loss": 0.8646, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.0013060784993058343, |
| "grad_norm": 1.2360811233520508, |
| "learning_rate": 0.0001616324570215033, |
| "loss": 0.8564, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.0013094018542659, |
| "grad_norm": 3.1289613246917725, |
| "learning_rate": 0.0001614458504289801, |
| "loss": 0.7, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.0013127252092259658, |
| "grad_norm": 2.8300228118896484, |
| "learning_rate": 0.00016125889944667597, |
| "loss": 0.7768, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.0013160485641860314, |
| "grad_norm": 1.623810887336731, |
| "learning_rate": 0.00016107160512240793, |
| "loss": 1.2405, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.001319371919146097, |
| "grad_norm": 1.4173606634140015, |
| "learning_rate": 0.0001608839685059173, |
| "loss": 0.8267, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.001322695274106163, |
| "grad_norm": 2.3122427463531494, |
| "learning_rate": 0.00016069599064886397, |
| "loss": 0.7878, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.0013260186290662286, |
| "grad_norm": 2.017801284790039, |
| "learning_rate": 0.00016050767260482034, |
| "loss": 0.7762, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.0013293419840262944, |
| "grad_norm": 1.9952462911605835, |
| "learning_rate": 0.00016031901542926547, |
| "loss": 0.9665, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.00133266533898636, |
| "grad_norm": 1.493338704109192, |
| "learning_rate": 0.00016013002017957923, |
| "loss": 0.6265, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.001335988693946426, |
| "grad_norm": 2.571161985397339, |
| "learning_rate": 0.00015994068791503629, |
| "loss": 0.601, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.0013393120489064915, |
| "grad_norm": 2.7708725929260254, |
| "learning_rate": 0.00015975101969680018, |
| "loss": 0.8486, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.0013426354038665574, |
| "grad_norm": 2.8990213871002197, |
| "learning_rate": 0.0001595610165879174, |
| "loss": 0.7763, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.001345958758826623, |
| "grad_norm": 3.1342248916625977, |
| "learning_rate": 0.00015937067965331148, |
| "loss": 0.9712, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.0013492821137866889, |
| "grad_norm": 2.5845744609832764, |
| "learning_rate": 0.00015918000995977685, |
| "loss": 0.5654, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.0013526054687467545, |
| "grad_norm": 2.0698084831237793, |
| "learning_rate": 0.00015898900857597305, |
| "loss": 1.1681, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.0013559288237068204, |
| "grad_norm": 1.965105652809143, |
| "learning_rate": 0.00015879767657241874, |
| "loss": 0.9746, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.001359252178666886, |
| "grad_norm": 3.326399564743042, |
| "learning_rate": 0.00015860601502148549, |
| "loss": 0.5961, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.0013625755336269516, |
| "grad_norm": 2.5008418560028076, |
| "learning_rate": 0.00015841402499739197, |
| "loss": 0.9563, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0013658988885870175, |
| "grad_norm": 2.0401337146759033, |
| "learning_rate": 0.00015822170757619789, |
| "loss": 1.0539, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.0013692222435470831, |
| "grad_norm": 2.2083499431610107, |
| "learning_rate": 0.00015802906383579788, |
| "loss": 0.8539, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.001372545598507149, |
| "grad_norm": 2.2163169384002686, |
| "learning_rate": 0.00015783609485591556, |
| "loss": 0.9977, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.0013758689534672146, |
| "grad_norm": 1.962361454963684, |
| "learning_rate": 0.00015764280171809747, |
| "loss": 0.8299, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.0013791923084272805, |
| "grad_norm": 1.924648404121399, |
| "learning_rate": 0.0001574491855057069, |
| "loss": 0.8988, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.001382515663387346, |
| "grad_norm": 2.6560895442962646, |
| "learning_rate": 0.00015725524730391796, |
| "loss": 0.7632, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.001385839018347412, |
| "grad_norm": 2.312373399734497, |
| "learning_rate": 0.00015706098819970942, |
| "loss": 1.2274, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.0013891623733074776, |
| "grad_norm": 2.8376545906066895, |
| "learning_rate": 0.00015686640928185862, |
| "loss": 1.0709, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.0013924857282675434, |
| "grad_norm": 1.7969470024108887, |
| "learning_rate": 0.00015667151164093545, |
| "loss": 0.6818, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.001395809083227609, |
| "grad_norm": 2.3826024532318115, |
| "learning_rate": 0.00015647629636929606, |
| "loss": 0.991, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.001399132438187675, |
| "grad_norm": 1.66337251663208, |
| "learning_rate": 0.00015628076456107687, |
| "loss": 0.5347, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.0014024557931477406, |
| "grad_norm": 2.6910243034362793, |
| "learning_rate": 0.0001560849173121885, |
| "loss": 1.0328, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.0014057791481078064, |
| "grad_norm": 3.5220468044281006, |
| "learning_rate": 0.0001558887557203095, |
| "loss": 0.8558, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.001409102503067872, |
| "grad_norm": 1.8120965957641602, |
| "learning_rate": 0.00015569228088488016, |
| "loss": 0.7371, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.0014124258580279377, |
| "grad_norm": 3.545508861541748, |
| "learning_rate": 0.00015549549390709652, |
| "loss": 0.8421, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.0014157492129880035, |
| "grad_norm": 2.4664230346679688, |
| "learning_rate": 0.00015529839588990411, |
| "loss": 0.4116, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.0014190725679480692, |
| "grad_norm": 2.0859506130218506, |
| "learning_rate": 0.0001551009879379917, |
| "loss": 1.0685, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.001422395922908135, |
| "grad_norm": 2.197068452835083, |
| "learning_rate": 0.00015490327115778524, |
| "loss": 1.0368, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.0014257192778682007, |
| "grad_norm": 2.6117873191833496, |
| "learning_rate": 0.0001547052466574415, |
| "loss": 0.8388, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.0014290426328282665, |
| "grad_norm": 1.4545451402664185, |
| "learning_rate": 0.00015450691554684204, |
| "loss": 0.5815, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0014323659877883321, |
| "grad_norm": 1.6503546237945557, |
| "learning_rate": 0.00015430827893758687, |
| "loss": 1.2627, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.001435689342748398, |
| "grad_norm": 5.260714530944824, |
| "learning_rate": 0.00015410933794298827, |
| "loss": 0.5394, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.0014390126977084636, |
| "grad_norm": 1.7504278421401978, |
| "learning_rate": 0.00015391009367806446, |
| "loss": 0.8148, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.0014423360526685295, |
| "grad_norm": 1.788711428642273, |
| "learning_rate": 0.00015371054725953347, |
| "loss": 0.6245, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.0014456594076285951, |
| "grad_norm": 1.9077836275100708, |
| "learning_rate": 0.00015351069980580682, |
| "loss": 0.5702, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.001448982762588661, |
| "grad_norm": 1.910592794418335, |
| "learning_rate": 0.00015331055243698332, |
| "loss": 0.7232, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.0014523061175487266, |
| "grad_norm": 2.3402035236358643, |
| "learning_rate": 0.00015311010627484266, |
| "loss": 0.7282, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.0014556294725087922, |
| "grad_norm": 2.232607841491699, |
| "learning_rate": 0.00015290936244283918, |
| "loss": 1.2627, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.001458952827468858, |
| "grad_norm": 1.9575645923614502, |
| "learning_rate": 0.00015270832206609568, |
| "loss": 1.0406, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.0014622761824289237, |
| "grad_norm": 2.150543212890625, |
| "learning_rate": 0.00015250698627139697, |
| "loss": 0.9826, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0014655995373889896, |
| "grad_norm": 4.266528606414795, |
| "learning_rate": 0.00015230535618718357, |
| "loss": 0.8738, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.0014689228923490552, |
| "grad_norm": 2.0403382778167725, |
| "learning_rate": 0.00015210343294354557, |
| "loss": 0.7778, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.001472246247309121, |
| "grad_norm": 1.740125060081482, |
| "learning_rate": 0.00015190121767221594, |
| "loss": 0.9677, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.0014755696022691867, |
| "grad_norm": 2.5546271800994873, |
| "learning_rate": 0.00015169871150656457, |
| "loss": 1.2232, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.0014788929572292526, |
| "grad_norm": 2.9307358264923096, |
| "learning_rate": 0.00015149591558159166, |
| "loss": 0.5073, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.0014822163121893182, |
| "grad_norm": 1.6386537551879883, |
| "learning_rate": 0.0001512928310339215, |
| "loss": 0.5687, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.001485539667149384, |
| "grad_norm": 2.8957998752593994, |
| "learning_rate": 0.00015108945900179602, |
| "loss": 0.767, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.0014888630221094497, |
| "grad_norm": 2.1520543098449707, |
| "learning_rate": 0.00015088580062506835, |
| "loss": 0.8657, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.0014921863770695155, |
| "grad_norm": 1.4322130680084229, |
| "learning_rate": 0.00015068185704519667, |
| "loss": 0.9973, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.0014955097320295812, |
| "grad_norm": 1.5489296913146973, |
| "learning_rate": 0.0001504776294052375, |
| "loss": 0.7799, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.001498833086989647, |
| "grad_norm": 2.5609688758850098, |
| "learning_rate": 0.00015027311884983964, |
| "loss": 0.9496, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.0015021564419497127, |
| "grad_norm": 2.324089288711548, |
| "learning_rate": 0.00015006832652523735, |
| "loss": 0.9417, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.0015054797969097783, |
| "grad_norm": 1.6559861898422241, |
| "learning_rate": 0.00014986325357924432, |
| "loss": 1.0362, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.0015088031518698441, |
| "grad_norm": 1.5636197328567505, |
| "learning_rate": 0.00014965790116124692, |
| "loss": 1.0802, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.0015121265068299098, |
| "grad_norm": 1.774526834487915, |
| "learning_rate": 0.00014945227042219804, |
| "loss": 0.706, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0015154498617899756, |
| "grad_norm": 2.7354817390441895, |
| "learning_rate": 0.00014924636251461033, |
| "loss": 0.5476, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.0015187732167500413, |
| "grad_norm": 1.8853740692138672, |
| "learning_rate": 0.00014904017859255006, |
| "loss": 1.112, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.0015220965717101071, |
| "grad_norm": 3.530038356781006, |
| "learning_rate": 0.0001488337198116304, |
| "loss": 0.9669, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.0015254199266701728, |
| "grad_norm": 2.1883273124694824, |
| "learning_rate": 0.00014862698732900508, |
| "loss": 1.0206, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0015287432816302386, |
| "grad_norm": 2.3160412311553955, |
| "learning_rate": 0.0001484199823033618, |
| "loss": 0.9228, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0015320666365903042, |
| "grad_norm": 1.904786467552185, |
| "learning_rate": 0.00014821270589491592, |
| "loss": 1.1042, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.00153538999155037, |
| "grad_norm": 2.208003044128418, |
| "learning_rate": 0.00014800515926540375, |
| "loss": 0.7847, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.0015387133465104357, |
| "grad_norm": 2.0928616523742676, |
| "learning_rate": 0.00014779734357807614, |
| "loss": 0.5636, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.0015420367014705016, |
| "grad_norm": 2.1676676273345947, |
| "learning_rate": 0.000147589259997692, |
| "loss": 0.9899, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.0015453600564305672, |
| "grad_norm": 1.9746110439300537, |
| "learning_rate": 0.00014738090969051163, |
| "loss": 0.8615, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.001548683411390633, |
| "grad_norm": 2.691312789916992, |
| "learning_rate": 0.00014717229382429028, |
| "loss": 0.9095, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.0015520067663506987, |
| "grad_norm": 2.8194122314453125, |
| "learning_rate": 0.0001469634135682717, |
| "loss": 1.0815, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.0015553301213107643, |
| "grad_norm": 2.1670734882354736, |
| "learning_rate": 0.0001467542700931814, |
| "loss": 1.1359, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.0015586534762708302, |
| "grad_norm": 3.914708375930786, |
| "learning_rate": 0.0001465448645712202, |
| "loss": 0.7727, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.0015619768312308958, |
| "grad_norm": 1.640748381614685, |
| "learning_rate": 0.00014633519817605758, |
| "loss": 0.333, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0015653001861909617, |
| "grad_norm": 2.2876367568969727, |
| "learning_rate": 0.00014612527208282522, |
| "loss": 0.7169, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.0015686235411510273, |
| "grad_norm": 1.4500001668930054, |
| "learning_rate": 0.00014591508746811032, |
| "loss": 0.9855, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.0015719468961110932, |
| "grad_norm": 3.451514482498169, |
| "learning_rate": 0.0001457046455099491, |
| "loss": 0.8154, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.0015752702510711588, |
| "grad_norm": 2.2254014015197754, |
| "learning_rate": 0.00014549394738781993, |
| "loss": 0.9397, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.0015785936060312246, |
| "grad_norm": 2.63394832611084, |
| "learning_rate": 0.00014528299428263712, |
| "loss": 0.709, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.0015819169609912903, |
| "grad_norm": 2.099602460861206, |
| "learning_rate": 0.0001450717873767441, |
| "loss": 1.0745, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.0015852403159513561, |
| "grad_norm": 2.0736916065216064, |
| "learning_rate": 0.00014486032785390667, |
| "loss": 1.2907, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.0015885636709114218, |
| "grad_norm": 2.344252347946167, |
| "learning_rate": 0.0001446486168993065, |
| "loss": 0.9257, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.0015918870258714876, |
| "grad_norm": 2.2529079914093018, |
| "learning_rate": 0.00014443665569953466, |
| "loss": 0.5821, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.0015952103808315533, |
| "grad_norm": 2.390965223312378, |
| "learning_rate": 0.00014422444544258454, |
| "loss": 0.9467, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.001598533735791619, |
| "grad_norm": 2.4676353931427, |
| "learning_rate": 0.0001440119873178456, |
| "loss": 1.1481, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.0016018570907516847, |
| "grad_norm": 2.723836660385132, |
| "learning_rate": 0.0001437992825160965, |
| "loss": 1.0146, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.0016051804457117504, |
| "grad_norm": 2.4351353645324707, |
| "learning_rate": 0.00014358633222949843, |
| "loss": 1.0365, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.0016085038006718162, |
| "grad_norm": 2.6160218715667725, |
| "learning_rate": 0.0001433731376515885, |
| "loss": 0.8535, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.0016118271556318819, |
| "grad_norm": 1.719913125038147, |
| "learning_rate": 0.00014315969997727305, |
| "loss": 0.7818, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.0016151505105919477, |
| "grad_norm": 1.9794899225234985, |
| "learning_rate": 0.00014294602040282087, |
| "loss": 0.6927, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.0016184738655520134, |
| "grad_norm": 2.530505657196045, |
| "learning_rate": 0.00014273210012585651, |
| "loss": 1.0023, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.0016217972205120792, |
| "grad_norm": 3.295813798904419, |
| "learning_rate": 0.00014251794034535374, |
| "loss": 1.0951, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.0016251205754721448, |
| "grad_norm": 1.7799811363220215, |
| "learning_rate": 0.00014230354226162849, |
| "loss": 1.0959, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.0016284439304322107, |
| "grad_norm": 3.7634365558624268, |
| "learning_rate": 0.00014208890707633252, |
| "loss": 0.7226, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0016317672853922763, |
| "grad_norm": 2.918199062347412, |
| "learning_rate": 0.00014187403599244639, |
| "loss": 0.9726, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.0016350906403523422, |
| "grad_norm": 1.9790452718734741, |
| "learning_rate": 0.00014165893021427276, |
| "loss": 0.7381, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.0016384139953124078, |
| "grad_norm": 3.541562557220459, |
| "learning_rate": 0.00014144359094742985, |
| "loss": 1.0652, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.0016417373502724737, |
| "grad_norm": 2.492720603942871, |
| "learning_rate": 0.00014122801939884445, |
| "loss": 0.6157, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.0016450607052325393, |
| "grad_norm": 3.0369317531585693, |
| "learning_rate": 0.0001410122167767452, |
| "loss": 1.0933, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.001648384060192605, |
| "grad_norm": 3.2473337650299072, |
| "learning_rate": 0.00014079618429065587, |
| "loss": 0.9726, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.0016517074151526708, |
| "grad_norm": 2.1378862857818604, |
| "learning_rate": 0.0001405799231513886, |
| "loss": 1.0105, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.0016550307701127364, |
| "grad_norm": 2.4640915393829346, |
| "learning_rate": 0.0001403634345710371, |
| "loss": 0.7305, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.0016583541250728023, |
| "grad_norm": 2.5675315856933594, |
| "learning_rate": 0.00014014671976296975, |
| "loss": 0.9592, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.001661677480032868, |
| "grad_norm": 2.35866117477417, |
| "learning_rate": 0.00013992977994182297, |
| "loss": 1.2306, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0016650008349929338, |
| "grad_norm": 1.8059924840927124, |
| "learning_rate": 0.00013971261632349423, |
| "loss": 0.9314, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.0016683241899529994, |
| "grad_norm": 2.0500893592834473, |
| "learning_rate": 0.0001394952301251354, |
| "loss": 0.9083, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.0016716475449130653, |
| "grad_norm": 1.9817473888397217, |
| "learning_rate": 0.00013927762256514588, |
| "loss": 1.1285, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.0016749708998731309, |
| "grad_norm": 2.559354782104492, |
| "learning_rate": 0.00013905979486316568, |
| "loss": 0.9594, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.0016782942548331967, |
| "grad_norm": 2.1031811237335205, |
| "learning_rate": 0.00013884174824006873, |
| "loss": 0.9267, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.0016816176097932624, |
| "grad_norm": 2.237828254699707, |
| "learning_rate": 0.0001386234839179559, |
| "loss": 0.7867, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.0016849409647533282, |
| "grad_norm": 3.0826919078826904, |
| "learning_rate": 0.00013840500312014826, |
| "loss": 0.9198, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0016882643197133939, |
| "grad_norm": 1.821419358253479, |
| "learning_rate": 0.00013818630707118014, |
| "loss": 0.877, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.0016915876746734595, |
| "grad_norm": 2.015178918838501, |
| "learning_rate": 0.00013796739699679228, |
| "loss": 0.7542, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.0016949110296335254, |
| "grad_norm": 2.8974575996398926, |
| "learning_rate": 0.00013774827412392507, |
| "loss": 0.9196, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.001698234384593591, |
| "grad_norm": 2.040759801864624, |
| "learning_rate": 0.00013752893968071149, |
| "loss": 0.8026, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.0017015577395536568, |
| "grad_norm": 1.8575818538665771, |
| "learning_rate": 0.00013730939489647043, |
| "loss": 0.7079, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.0017048810945137225, |
| "grad_norm": 2.015794038772583, |
| "learning_rate": 0.00013708964100169957, |
| "loss": 1.1416, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.0017082044494737883, |
| "grad_norm": 2.1127171516418457, |
| "learning_rate": 0.0001368696792280687, |
| "loss": 0.9396, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.001711527804433854, |
| "grad_norm": 2.133606195449829, |
| "learning_rate": 0.00013664951080841268, |
| "loss": 1.1934, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.0017148511593939198, |
| "grad_norm": 1.995360016822815, |
| "learning_rate": 0.00013642913697672462, |
| "loss": 0.7406, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.0017181745143539854, |
| "grad_norm": 2.1345937252044678, |
| "learning_rate": 0.00013620855896814878, |
| "loss": 1.0595, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.0017214978693140513, |
| "grad_norm": 2.163135051727295, |
| "learning_rate": 0.000135987778018974, |
| "loss": 0.8264, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.001724821224274117, |
| "grad_norm": 1.7292685508728027, |
| "learning_rate": 0.00013576679536662638, |
| "loss": 0.7376, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.0017281445792341828, |
| "grad_norm": 1.779617190361023, |
| "learning_rate": 0.0001355456122496626, |
| "loss": 0.2976, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0017314679341942484, |
| "grad_norm": 2.1159169673919678, |
| "learning_rate": 0.00013532422990776287, |
| "loss": 1.1358, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.0017347912891543143, |
| "grad_norm": 2.535615921020508, |
| "learning_rate": 0.00013510264958172398, |
| "loss": 0.9962, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.00173811464411438, |
| "grad_norm": 2.4328877925872803, |
| "learning_rate": 0.00013488087251345245, |
| "loss": 0.7934, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.0017414379990744455, |
| "grad_norm": 3.112191915512085, |
| "learning_rate": 0.00013465889994595747, |
| "loss": 0.8322, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.0017447613540345114, |
| "grad_norm": 1.626070499420166, |
| "learning_rate": 0.00013443673312334392, |
| "loss": 0.4358, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.001748084708994577, |
| "grad_norm": 1.7329679727554321, |
| "learning_rate": 0.00013421437329080548, |
| "loss": 0.6419, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.0017514080639546429, |
| "grad_norm": 2.1917316913604736, |
| "learning_rate": 0.00013399182169461757, |
| "loss": 1.0182, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.0017547314189147085, |
| "grad_norm": 2.6607604026794434, |
| "learning_rate": 0.0001337690795821304, |
| "loss": 0.7061, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.0017580547738747744, |
| "grad_norm": 2.403113603591919, |
| "learning_rate": 0.00013354614820176205, |
| "loss": 0.9044, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.00176137812883484, |
| "grad_norm": 1.7576875686645508, |
| "learning_rate": 0.0001333230288029913, |
| "loss": 0.8398, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0017647014837949059, |
| "grad_norm": 1.7336602210998535, |
| "learning_rate": 0.00013309972263635076, |
| "loss": 1.3434, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.0017680248387549715, |
| "grad_norm": 1.9603030681610107, |
| "learning_rate": 0.00013287623095341991, |
| "loss": 0.7383, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.0017713481937150373, |
| "grad_norm": 2.057232141494751, |
| "learning_rate": 0.00013265255500681796, |
| "loss": 1.0643, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.001774671548675103, |
| "grad_norm": 1.9266340732574463, |
| "learning_rate": 0.00013242869605019676, |
| "loss": 0.9176, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.0017779949036351688, |
| "grad_norm": 2.640422821044922, |
| "learning_rate": 0.00013220465533823406, |
| "loss": 0.8926, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.0017813182585952345, |
| "grad_norm": 2.180840015411377, |
| "learning_rate": 0.00013198043412662627, |
| "loss": 0.9526, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.0017846416135553, |
| "grad_norm": 2.1570563316345215, |
| "learning_rate": 0.00013175603367208134, |
| "loss": 0.7101, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.001787964968515366, |
| "grad_norm": 2.518481731414795, |
| "learning_rate": 0.00013153145523231197, |
| "loss": 0.7608, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.0017912883234754316, |
| "grad_norm": 2.682793140411377, |
| "learning_rate": 0.00013130670006602837, |
| "loss": 0.9513, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0017946116784354974, |
| "grad_norm": 2.468629837036133, |
| "learning_rate": 0.00013108176943293126, |
| "loss": 0.8469, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.001797935033395563, |
| "grad_norm": 2.705810308456421, |
| "learning_rate": 0.0001308566645937048, |
| "loss": 1.0462, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.001801258388355629, |
| "grad_norm": 1.938127875328064, |
| "learning_rate": 0.00013063138681000962, |
| "loss": 0.9872, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.0018045817433156946, |
| "grad_norm": 1.6162564754486084, |
| "learning_rate": 0.00013040593734447555, |
| "loss": 0.8097, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.0018079050982757604, |
| "grad_norm": 1.9742683172225952, |
| "learning_rate": 0.00013018031746069467, |
| "loss": 0.7124, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.001811228453235826, |
| "grad_norm": 2.3947043418884277, |
| "learning_rate": 0.0001299545284232143, |
| "loss": 0.54, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.001814551808195892, |
| "grad_norm": 3.0326342582702637, |
| "learning_rate": 0.00012972857149752976, |
| "loss": 0.6806, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.0018178751631559575, |
| "grad_norm": 2.3208701610565186, |
| "learning_rate": 0.0001295024479500774, |
| "loss": 0.8071, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.0018211985181160234, |
| "grad_norm": 2.1220314502716064, |
| "learning_rate": 0.00012927615904822736, |
| "loss": 0.8911, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.001824521873076089, |
| "grad_norm": 4.058460712432861, |
| "learning_rate": 0.00012904970606027657, |
| "loss": 0.6629, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.0018278452280361549, |
| "grad_norm": 1.9857665300369263, |
| "learning_rate": 0.00012882309025544173, |
| "loss": 1.1803, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0018311685829962205, |
| "grad_norm": 2.9957692623138428, |
| "learning_rate": 0.000128596312903852, |
| "loss": 0.8082, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.0018344919379562862, |
| "grad_norm": 3.009734869003296, |
| "learning_rate": 0.00012836937527654194, |
| "loss": 0.6776, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.001837815292916352, |
| "grad_norm": 2.615323066711426, |
| "learning_rate": 0.00012814227864544453, |
| "loss": 1.3133, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.0018411386478764176, |
| "grad_norm": 2.3974363803863525, |
| "learning_rate": 0.0001279150242833838, |
| "loss": 0.5645, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.0018444620028364835, |
| "grad_norm": 2.873770236968994, |
| "learning_rate": 0.00012768761346406793, |
| "loss": 0.4937, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.0018477853577965491, |
| "grad_norm": 2.761345863342285, |
| "learning_rate": 0.000127460047462082, |
| "loss": 0.6471, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.001851108712756615, |
| "grad_norm": 2.254004955291748, |
| "learning_rate": 0.00012723232755288075, |
| "loss": 0.5895, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.0018544320677166806, |
| "grad_norm": 3.007963180541992, |
| "learning_rate": 0.0001270044550127816, |
| "loss": 1.0542, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.0018577554226767465, |
| "grad_norm": 2.213001251220703, |
| "learning_rate": 0.00012677643111895756, |
| "loss": 0.8269, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.001861078777636812, |
| "grad_norm": 2.1694979667663574, |
| "learning_rate": 0.00012654825714942972, |
| "loss": 0.7593, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.001864402132596878, |
| "grad_norm": 2.7547688484191895, |
| "learning_rate": 0.00012631993438306043, |
| "loss": 1.9703, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.0018677254875569436, |
| "grad_norm": 2.476234197616577, |
| "learning_rate": 0.00012609146409954598, |
| "loss": 0.8481, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.0018710488425170094, |
| "grad_norm": 3.1867456436157227, |
| "learning_rate": 0.00012586284757940946, |
| "loss": 0.9079, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.001874372197477075, |
| "grad_norm": 2.296586513519287, |
| "learning_rate": 0.0001256340861039936, |
| "loss": 0.7001, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.001877695552437141, |
| "grad_norm": 1.6829555034637451, |
| "learning_rate": 0.00012540518095545365, |
| "loss": 0.7896, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.0018810189073972066, |
| "grad_norm": 3.4515936374664307, |
| "learning_rate": 0.00012517613341674988, |
| "loss": 0.8062, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.0018843422623572722, |
| "grad_norm": 1.9574947357177734, |
| "learning_rate": 0.00012494694477164085, |
| "loss": 0.945, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.001887665617317338, |
| "grad_norm": 2.0612564086914062, |
| "learning_rate": 0.00012471761630467593, |
| "loss": 0.82, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.0018909889722774037, |
| "grad_norm": 1.8187974691390991, |
| "learning_rate": 0.0001244881493011881, |
| "loss": 0.6127, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.0018943123272374695, |
| "grad_norm": 2.5469536781311035, |
| "learning_rate": 0.00012425854504728687, |
| "loss": 1.0473, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0018976356821975352, |
| "grad_norm": 2.1312432289123535, |
| "learning_rate": 0.00012402880482985097, |
| "loss": 0.6154, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.001900959037157601, |
| "grad_norm": 2.154513359069824, |
| "learning_rate": 0.0001237989299365212, |
| "loss": 0.851, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.0019042823921176667, |
| "grad_norm": 2.3860623836517334, |
| "learning_rate": 0.00012356892165569315, |
| "loss": 0.9685, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.0019076057470777325, |
| "grad_norm": 1.7248111963272095, |
| "learning_rate": 0.00012333878127651006, |
| "loss": 0.8971, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.0019109291020377981, |
| "grad_norm": 2.4173145294189453, |
| "learning_rate": 0.00012310851008885551, |
| "loss": 0.3288, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.001914252456997864, |
| "grad_norm": 2.560663938522339, |
| "learning_rate": 0.00012287810938334628, |
| "loss": 0.9095, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.0019175758119579296, |
| "grad_norm": 2.4700520038604736, |
| "learning_rate": 0.00012264758045132503, |
| "loss": 0.8354, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.0019208991669179955, |
| "grad_norm": 3.193286895751953, |
| "learning_rate": 0.0001224169245848531, |
| "loss": 0.9986, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.0019242225218780611, |
| "grad_norm": 2.184664249420166, |
| "learning_rate": 0.00012218614307670325, |
| "loss": 0.7368, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0019275458768381268, |
| "grad_norm": 2.0635182857513428, |
| "learning_rate": 0.00012195523722035251, |
| "loss": 1.0547, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0019308692317981926, |
| "grad_norm": 2.0145578384399414, |
| "learning_rate": 0.00012172420830997477, |
| "loss": 0.8455, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.0019341925867582582, |
| "grad_norm": 2.438504219055176, |
| "learning_rate": 0.00012149305764043368, |
| "loss": 0.9734, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.001937515941718324, |
| "grad_norm": 2.3351311683654785, |
| "learning_rate": 0.00012126178650727527, |
| "loss": 0.4809, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.0019408392966783897, |
| "grad_norm": 2.146331548690796, |
| "learning_rate": 0.0001210303962067207, |
| "loss": 1.2014, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.0019441626516384556, |
| "grad_norm": 2.121534824371338, |
| "learning_rate": 0.00012079888803565916, |
| "loss": 0.7174, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.0019474860065985212, |
| "grad_norm": 2.191533088684082, |
| "learning_rate": 0.00012056726329164036, |
| "loss": 0.579, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.001950809361558587, |
| "grad_norm": 3.195350170135498, |
| "learning_rate": 0.00012033552327286748, |
| "loss": 0.7127, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.001954132716518653, |
| "grad_norm": 2.790376663208008, |
| "learning_rate": 0.00012010366927818966, |
| "loss": 0.7117, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.0019574560714787186, |
| "grad_norm": 2.871413469314575, |
| "learning_rate": 0.00011987170260709497, |
| "loss": 0.8917, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.001960779426438784, |
| "grad_norm": 2.7113215923309326, |
| "learning_rate": 0.00011963962455970292, |
| "loss": 0.9844, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.00196410278139885, |
| "grad_norm": 2.5417799949645996, |
| "learning_rate": 0.00011940743643675733, |
| "loss": 0.7809, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.001967426136358916, |
| "grad_norm": 2.755305528640747, |
| "learning_rate": 0.00011917513953961892, |
| "loss": 0.5988, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.0019707494913189815, |
| "grad_norm": 3.3853535652160645, |
| "learning_rate": 0.00011894273517025802, |
| "loss": 0.7682, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.001974072846279047, |
| "grad_norm": 2.008025884628296, |
| "learning_rate": 0.00011871022463124743, |
| "loss": 0.9777, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.001977396201239113, |
| "grad_norm": 2.048288106918335, |
| "learning_rate": 0.00011847760922575495, |
| "loss": 1.1554, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.0019807195561991784, |
| "grad_norm": 2.378748893737793, |
| "learning_rate": 0.00011824489025753611, |
| "loss": 0.7135, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.0019840429111592445, |
| "grad_norm": 1.7821803092956543, |
| "learning_rate": 0.0001180120690309269, |
| "loss": 0.7473, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.00198736626611931, |
| "grad_norm": 2.244962453842163, |
| "learning_rate": 0.00011777914685083649, |
| "loss": 0.9208, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.0019906896210793758, |
| "grad_norm": 3.296584367752075, |
| "learning_rate": 0.00011754612502273976, |
| "loss": 0.6115, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.0019940129760394414, |
| "grad_norm": 2.3598029613494873, |
| "learning_rate": 0.00011731300485267023, |
| "loss": 0.4963, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0019973363309995075, |
| "grad_norm": 1.8640865087509155, |
| "learning_rate": 0.00011707978764721253, |
| "loss": 1.0341, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.002000659685959573, |
| "grad_norm": 2.823103666305542, |
| "learning_rate": 0.0001168464747134951, |
| "loss": 0.5334, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.0020039830409196388, |
| "grad_norm": 2.356562614440918, |
| "learning_rate": 0.00011661306735918303, |
| "loss": 1.1563, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.0020073063958797044, |
| "grad_norm": 1.7850453853607178, |
| "learning_rate": 0.00011637956689247058, |
| "loss": 0.6332, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.0020106297508397705, |
| "grad_norm": 2.6029694080352783, |
| "learning_rate": 0.00011614597462207382, |
| "loss": 0.5515, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.002013953105799836, |
| "grad_norm": 2.554975986480713, |
| "learning_rate": 0.00011591229185722339, |
| "loss": 1.0411, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.0020172764607599017, |
| "grad_norm": 2.723120927810669, |
| "learning_rate": 0.0001156785199076572, |
| "loss": 0.9845, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.0020205998157199674, |
| "grad_norm": 2.5275723934173584, |
| "learning_rate": 0.00011544466008361295, |
| "loss": 0.7504, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.002023923170680033, |
| "grad_norm": 2.436131000518799, |
| "learning_rate": 0.00011521071369582079, |
| "loss": 0.834, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.002027246525640099, |
| "grad_norm": 1.6245455741882324, |
| "learning_rate": 0.00011497668205549621, |
| "loss": 1.1013, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0020305698806001647, |
| "grad_norm": 1.9569754600524902, |
| "learning_rate": 0.00011474256647433239, |
| "loss": 0.7834, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.0020338932355602303, |
| "grad_norm": 2.2459635734558105, |
| "learning_rate": 0.00011450836826449305, |
| "loss": 0.7966, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.002037216590520296, |
| "grad_norm": 2.3233418464660645, |
| "learning_rate": 0.00011427408873860495, |
| "loss": 0.6925, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.002040539945480362, |
| "grad_norm": 2.3365554809570312, |
| "learning_rate": 0.00011403972920975066, |
| "loss": 1.1375, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.0020438633004404277, |
| "grad_norm": 2.217907667160034, |
| "learning_rate": 0.00011380529099146114, |
| "loss": 0.7229, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.0020471866554004933, |
| "grad_norm": 2.2354955673217773, |
| "learning_rate": 0.0001135707753977084, |
| "loss": 0.9491, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.002050510010360559, |
| "grad_norm": 1.5122630596160889, |
| "learning_rate": 0.00011333618374289812, |
| "loss": 0.9403, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.002053833365320625, |
| "grad_norm": 1.8312718868255615, |
| "learning_rate": 0.00011310151734186221, |
| "loss": 0.718, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.0020571567202806906, |
| "grad_norm": 4.088409900665283, |
| "learning_rate": 0.00011286677750985164, |
| "loss": 1.0302, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0020604800752407563, |
| "grad_norm": 1.87000572681427, |
| "learning_rate": 0.00011263196556252882, |
| "loss": 0.7077, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.002063803430200822, |
| "grad_norm": 2.2027151584625244, |
| "learning_rate": 0.00011239708281596048, |
| "loss": 0.8584, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.0020671267851608876, |
| "grad_norm": 1.6802822351455688, |
| "learning_rate": 0.00011216213058661005, |
| "loss": 0.4266, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.0020704501401209536, |
| "grad_norm": 1.5808541774749756, |
| "learning_rate": 0.00011192711019133043, |
| "loss": 1.0296, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.0020737734950810193, |
| "grad_norm": 2.912220001220703, |
| "learning_rate": 0.0001116920229473566, |
| "loss": 1.05, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.002077096850041085, |
| "grad_norm": 2.2467291355133057, |
| "learning_rate": 0.00011145687017229817, |
| "loss": 0.9888, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.0020804202050011505, |
| "grad_norm": 2.39890718460083, |
| "learning_rate": 0.00011122165318413212, |
| "loss": 0.9139, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.0020837435599612166, |
| "grad_norm": 1.633890151977539, |
| "learning_rate": 0.0001109863733011952, |
| "loss": 0.9724, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0020870669149212822, |
| "grad_norm": 1.9472523927688599, |
| "learning_rate": 0.00011075103184217676, |
| "loss": 1.016, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.002090390269881348, |
| "grad_norm": 2.565326452255249, |
| "learning_rate": 0.00011051563012611126, |
| "loss": 1.1732, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.0020937136248414135, |
| "grad_norm": 2.2524020671844482, |
| "learning_rate": 0.00011028016947237089, |
| "loss": 0.4214, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0020970369798014796, |
| "grad_norm": 3.7713682651519775, |
| "learning_rate": 0.00011004465120065814, |
| "loss": 0.9991, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.002100360334761545, |
| "grad_norm": 2.0051424503326416, |
| "learning_rate": 0.00010980907663099844, |
| "loss": 0.8795, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.002103683689721611, |
| "grad_norm": 1.7558032274246216, |
| "learning_rate": 0.00010957344708373278, |
| "loss": 0.9593, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.0021070070446816765, |
| "grad_norm": 1.7094303369522095, |
| "learning_rate": 0.00010933776387951033, |
| "loss": 1.0733, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.0021103303996417425, |
| "grad_norm": 2.2374813556671143, |
| "learning_rate": 0.00010910202833928089, |
| "loss": 0.8686, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.002113653754601808, |
| "grad_norm": 3.0024795532226562, |
| "learning_rate": 0.00010886624178428762, |
| "loss": 0.8538, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.002116977109561874, |
| "grad_norm": 1.9901098012924194, |
| "learning_rate": 0.0001086304055360597, |
| "loss": 0.8818, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.0021203004645219395, |
| "grad_norm": 1.961928367614746, |
| "learning_rate": 0.00010839452091640469, |
| "loss": 1.0803, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.002123623819482005, |
| "grad_norm": 3.1986398696899414, |
| "learning_rate": 0.00010815858924740139, |
| "loss": 0.6213, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.002126947174442071, |
| "grad_norm": 2.1918091773986816, |
| "learning_rate": 0.00010792261185139221, |
| "loss": 0.8908, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.002130270529402137, |
| "grad_norm": 2.5319199562072754, |
| "learning_rate": 0.00010768659005097585, |
| "loss": 0.9992, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.0021335938843622024, |
| "grad_norm": 2.3293981552124023, |
| "learning_rate": 0.00010745052516899996, |
| "loss": 1.0111, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.002136917239322268, |
| "grad_norm": 2.176724910736084, |
| "learning_rate": 0.0001072144185285536, |
| "loss": 0.86, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.002140240594282334, |
| "grad_norm": 1.5879802703857422, |
| "learning_rate": 0.00010697827145295991, |
| "loss": 0.6246, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.0021435639492423998, |
| "grad_norm": 1.701689600944519, |
| "learning_rate": 0.00010674208526576857, |
| "loss": 0.5481, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.0021468873042024654, |
| "grad_norm": 2.197331190109253, |
| "learning_rate": 0.00010650586129074857, |
| "loss": 0.6809, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.002150210659162531, |
| "grad_norm": 2.1718311309814453, |
| "learning_rate": 0.00010626960085188067, |
| "loss": 1.0801, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.002153534014122597, |
| "grad_norm": 4.355485916137695, |
| "learning_rate": 0.00010603330527334998, |
| "loss": 1.2853, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.0021568573690826627, |
| "grad_norm": 2.317735195159912, |
| "learning_rate": 0.00010579697587953858, |
| "loss": 0.9521, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.0021601807240427284, |
| "grad_norm": 1.9010009765625, |
| "learning_rate": 0.00010556061399501802, |
| "loss": 0.82, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.002163504079002794, |
| "grad_norm": 1.3154947757720947, |
| "learning_rate": 0.00010532422094454204, |
| "loss": 1.2106, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.0021668274339628596, |
| "grad_norm": 2.130319833755493, |
| "learning_rate": 0.00010508779805303905, |
| "loss": 0.9213, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.0021701507889229257, |
| "grad_norm": 1.8011289834976196, |
| "learning_rate": 0.00010485134664560461, |
| "loss": 1.0765, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.0021734741438829914, |
| "grad_norm": 2.1197900772094727, |
| "learning_rate": 0.00010461486804749418, |
| "loss": 0.9027, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.002176797498843057, |
| "grad_norm": 1.9010964632034302, |
| "learning_rate": 0.00010437836358411568, |
| "loss": 0.6381, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.0021801208538031226, |
| "grad_norm": 1.9410107135772705, |
| "learning_rate": 0.00010414183458102186, |
| "loss": 0.7964, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.0021834442087631887, |
| "grad_norm": 2.4888432025909424, |
| "learning_rate": 0.0001039052823639031, |
| "loss": 0.9221, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.0021867675637232543, |
| "grad_norm": 2.1054494380950928, |
| "learning_rate": 0.00010366870825857989, |
| "loss": 0.7669, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.00219009091868332, |
| "grad_norm": 1.6643797159194946, |
| "learning_rate": 0.00010343211359099532, |
| "loss": 0.9111, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0021934142736433856, |
| "grad_norm": 1.8980937004089355, |
| "learning_rate": 0.00010319549968720787, |
| "loss": 1.103, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0021967376286034517, |
| "grad_norm": 2.2484617233276367, |
| "learning_rate": 0.00010295886787338369, |
| "loss": 0.93, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.0022000609835635173, |
| "grad_norm": 2.264021873474121, |
| "learning_rate": 0.00010272221947578937, |
| "loss": 0.8636, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.002203384338523583, |
| "grad_norm": 2.902461051940918, |
| "learning_rate": 0.00010248555582078445, |
| "loss": 1.0284, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.0022067076934836486, |
| "grad_norm": 1.2113059759140015, |
| "learning_rate": 0.00010224887823481402, |
| "loss": 0.7502, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.002210031048443714, |
| "grad_norm": 3.4769859313964844, |
| "learning_rate": 0.0001020121880444012, |
| "loss": 0.4294, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.0022133544034037803, |
| "grad_norm": 3.4410877227783203, |
| "learning_rate": 0.00010177548657613971, |
| "loss": 0.9235, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.002216677758363846, |
| "grad_norm": 3.706423044204712, |
| "learning_rate": 0.00010153877515668662, |
| "loss": 0.9124, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.0022200011133239115, |
| "grad_norm": 1.6656898260116577, |
| "learning_rate": 0.00010130205511275464, |
| "loss": 1.0897, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.002223324468283977, |
| "grad_norm": 1.8999181985855103, |
| "learning_rate": 0.00010106532777110486, |
| "loss": 0.9479, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.0022266478232440432, |
| "grad_norm": 2.498556137084961, |
| "learning_rate": 0.00010082859445853934, |
| "loss": 0.8578, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0022266478232440432, |
| "eval_loss": 0.7900153994560242, |
| "eval_runtime": 3240.8445, |
| "eval_samples_per_second": 39.094, |
| "eval_steps_per_second": 19.547, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.002229971178204109, |
| "grad_norm": 1.8369241952896118, |
| "learning_rate": 0.00010059185650189348, |
| "loss": 0.6654, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.0022332945331641745, |
| "grad_norm": 1.9373723268508911, |
| "learning_rate": 0.0001003551152280288, |
| "loss": 0.5924, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.00223661788812424, |
| "grad_norm": 2.284693956375122, |
| "learning_rate": 0.00010011837196382543, |
| "loss": 0.7141, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.0022399412430843062, |
| "grad_norm": 1.8173497915267944, |
| "learning_rate": 9.988162803617458e-05, |
| "loss": 0.8533, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.002243264598044372, |
| "grad_norm": 2.2727203369140625, |
| "learning_rate": 9.96448847719712e-05, |
| "loss": 1.0409, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0022465879530044375, |
| "grad_norm": 1.7468798160552979, |
| "learning_rate": 9.940814349810653e-05, |
| "loss": 0.9378, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.002249911307964503, |
| "grad_norm": 2.3897597789764404, |
| "learning_rate": 9.917140554146067e-05, |
| "loss": 0.9678, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.002253234662924569, |
| "grad_norm": 1.4605112075805664, |
| "learning_rate": 9.893467222889516e-05, |
| "loss": 0.8242, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.002256558017884635, |
| "grad_norm": 2.9371156692504883, |
| "learning_rate": 9.869794488724537e-05, |
| "loss": 0.97, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.0022598813728447005, |
| "grad_norm": 1.554976224899292, |
| "learning_rate": 9.846122484331343e-05, |
| "loss": 0.6133, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.002263204727804766, |
| "grad_norm": 1.649769902229309, |
| "learning_rate": 9.822451342386031e-05, |
| "loss": 1.0024, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.0022665280827648317, |
| "grad_norm": 2.1759161949157715, |
| "learning_rate": 9.798781195559883e-05, |
| "loss": 0.4422, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.002269851437724898, |
| "grad_norm": 2.7241005897521973, |
| "learning_rate": 9.7751121765186e-05, |
| "loss": 0.6119, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.0022731747926849634, |
| "grad_norm": 2.4562528133392334, |
| "learning_rate": 9.751444417921555e-05, |
| "loss": 0.9385, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.002276498147645029, |
| "grad_norm": 4.837019920349121, |
| "learning_rate": 9.727778052421064e-05, |
| "loss": 0.4189, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.0022798215026050947, |
| "grad_norm": 1.7064907550811768, |
| "learning_rate": 9.704113212661635e-05, |
| "loss": 1.1713, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.0022831448575651608, |
| "grad_norm": 3.0644757747650146, |
| "learning_rate": 9.680450031279216e-05, |
| "loss": 0.8532, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.0022864682125252264, |
| "grad_norm": 2.6350553035736084, |
| "learning_rate": 9.65678864090047e-05, |
| "loss": 0.7621, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.002289791567485292, |
| "grad_norm": 1.530420184135437, |
| "learning_rate": 9.633129174142014e-05, |
| "loss": 0.8986, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.0022931149224453577, |
| "grad_norm": 1.633294939994812, |
| "learning_rate": 9.609471763609692e-05, |
| "loss": 0.4471, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0022964382774054238, |
| "grad_norm": 2.1451008319854736, |
| "learning_rate": 9.585816541897816e-05, |
| "loss": 0.7268, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.0022997616323654894, |
| "grad_norm": 2.0823404788970947, |
| "learning_rate": 9.562163641588437e-05, |
| "loss": 0.6338, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.002303084987325555, |
| "grad_norm": 1.8083529472351074, |
| "learning_rate": 9.538513195250583e-05, |
| "loss": 0.6431, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.0023064083422856207, |
| "grad_norm": 2.6677329540252686, |
| "learning_rate": 9.514865335439541e-05, |
| "loss": 0.7905, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.0023097316972456863, |
| "grad_norm": 3.05940580368042, |
| "learning_rate": 9.491220194696096e-05, |
| "loss": 0.7371, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.0023130550522057524, |
| "grad_norm": 1.8619906902313232, |
| "learning_rate": 9.467577905545795e-05, |
| "loss": 1.0022, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.002316378407165818, |
| "grad_norm": 2.6758852005004883, |
| "learning_rate": 9.443938600498196e-05, |
| "loss": 1.0803, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.0023197017621258836, |
| "grad_norm": 3.5494561195373535, |
| "learning_rate": 9.420302412046145e-05, |
| "loss": 0.9802, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.0023230251170859493, |
| "grad_norm": 1.923540472984314, |
| "learning_rate": 9.396669472665004e-05, |
| "loss": 1.0205, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.0023263484720460153, |
| "grad_norm": 2.2519900798797607, |
| "learning_rate": 9.373039914811935e-05, |
| "loss": 0.9522, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.002329671827006081, |
| "grad_norm": 2.2550559043884277, |
| "learning_rate": 9.349413870925144e-05, |
| "loss": 1.0383, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.0023329951819661466, |
| "grad_norm": 2.535550355911255, |
| "learning_rate": 9.325791473423144e-05, |
| "loss": 0.7763, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.0023363185369262122, |
| "grad_norm": 2.5544021129608154, |
| "learning_rate": 9.30217285470401e-05, |
| "loss": 0.6332, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.0023396418918862783, |
| "grad_norm": 2.3451223373413086, |
| "learning_rate": 9.278558147144642e-05, |
| "loss": 0.958, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.002342965246846344, |
| "grad_norm": 3.874997138977051, |
| "learning_rate": 9.254947483100006e-05, |
| "loss": 1.3982, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.0023462886018064096, |
| "grad_norm": 1.9717652797698975, |
| "learning_rate": 9.231340994902417e-05, |
| "loss": 0.8799, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.0023496119567664752, |
| "grad_norm": 2.044839382171631, |
| "learning_rate": 9.207738814860783e-05, |
| "loss": 1.2354, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.002352935311726541, |
| "grad_norm": 2.498375415802002, |
| "learning_rate": 9.184141075259863e-05, |
| "loss": 0.8478, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.002356258666686607, |
| "grad_norm": 1.493669867515564, |
| "learning_rate": 9.160547908359532e-05, |
| "loss": 1.0594, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.0023595820216466726, |
| "grad_norm": 2.704451084136963, |
| "learning_rate": 9.136959446394034e-05, |
| "loss": 0.9091, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.002362905376606738, |
| "grad_norm": 1.4863560199737549, |
| "learning_rate": 9.113375821571239e-05, |
| "loss": 1.3247, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.002366228731566804, |
| "grad_norm": 2.655653715133667, |
| "learning_rate": 9.089797166071914e-05, |
| "loss": 0.6064, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.00236955208652687, |
| "grad_norm": 2.025148868560791, |
| "learning_rate": 9.066223612048969e-05, |
| "loss": 0.4657, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.0023728754414869355, |
| "grad_norm": 2.5605967044830322, |
| "learning_rate": 9.04265529162672e-05, |
| "loss": 1.1225, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.002376198796447001, |
| "grad_norm": 1.5156453847885132, |
| "learning_rate": 9.019092336900156e-05, |
| "loss": 0.5417, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.002379522151407067, |
| "grad_norm": 1.3675994873046875, |
| "learning_rate": 8.99553487993419e-05, |
| "loss": 0.7597, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.002382845506367133, |
| "grad_norm": 1.4033820629119873, |
| "learning_rate": 8.971983052762913e-05, |
| "loss": 0.8266, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.0023861688613271985, |
| "grad_norm": 3.831524133682251, |
| "learning_rate": 8.948436987388876e-05, |
| "loss": 1.0312, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.002389492216287264, |
| "grad_norm": 2.5193397998809814, |
| "learning_rate": 8.924896815782326e-05, |
| "loss": 0.388, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.0023928155712473298, |
| "grad_norm": 2.961416482925415, |
| "learning_rate": 8.901362669880481e-05, |
| "loss": 0.9089, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.0023961389262073954, |
| "grad_norm": 2.4046037197113037, |
| "learning_rate": 8.87783468158679e-05, |
| "loss": 0.674, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.0023994622811674615, |
| "grad_norm": 2.0201237201690674, |
| "learning_rate": 8.854312982770185e-05, |
| "loss": 0.4603, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.002402785636127527, |
| "grad_norm": 1.9336488246917725, |
| "learning_rate": 8.830797705264344e-05, |
| "loss": 1.2791, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.0024061089910875928, |
| "grad_norm": 2.432431936264038, |
| "learning_rate": 8.80728898086696e-05, |
| "loss": 0.555, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.0024094323460476584, |
| "grad_norm": 2.049858331680298, |
| "learning_rate": 8.783786941338997e-05, |
| "loss": 1.2561, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.0024127557010077245, |
| "grad_norm": 1.9385666847229004, |
| "learning_rate": 8.760291718403955e-05, |
| "loss": 1.2627, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.00241607905596779, |
| "grad_norm": 2.3782167434692383, |
| "learning_rate": 8.736803443747117e-05, |
| "loss": 0.493, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.0024194024109278557, |
| "grad_norm": 2.8201656341552734, |
| "learning_rate": 8.713322249014841e-05, |
| "loss": 1.219, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.0024227257658879214, |
| "grad_norm": 1.9326146841049194, |
| "learning_rate": 8.689848265813782e-05, |
| "loss": 0.671, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.0024260491208479874, |
| "grad_norm": 2.4845776557922363, |
| "learning_rate": 8.666381625710192e-05, |
| "loss": 1.1306, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.002429372475808053, |
| "grad_norm": 1.897662878036499, |
| "learning_rate": 8.642922460229161e-05, |
| "loss": 1.0483, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.0024326958307681187, |
| "grad_norm": 2.1385698318481445, |
| "learning_rate": 8.619470900853887e-05, |
| "loss": 0.8237, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.0024360191857281843, |
| "grad_norm": 2.609041690826416, |
| "learning_rate": 8.596027079024935e-05, |
| "loss": 1.3002, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.0024393425406882504, |
| "grad_norm": 2.091109037399292, |
| "learning_rate": 8.57259112613951e-05, |
| "loss": 1.1823, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.002442665895648316, |
| "grad_norm": 4.9723711013793945, |
| "learning_rate": 8.549163173550698e-05, |
| "loss": 0.9495, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.0024459892506083817, |
| "grad_norm": 2.0263137817382812, |
| "learning_rate": 8.525743352566764e-05, |
| "loss": 0.945, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.0024493126055684473, |
| "grad_norm": 2.4975996017456055, |
| "learning_rate": 8.50233179445038e-05, |
| "loss": 0.5585, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.002452635960528513, |
| "grad_norm": 1.869718313217163, |
| "learning_rate": 8.478928630417921e-05, |
| "loss": 1.1208, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.002455959315488579, |
| "grad_norm": 2.218090295791626, |
| "learning_rate": 8.455533991638712e-05, |
| "loss": 0.9037, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0024592826704486447, |
| "grad_norm": 1.9609618186950684, |
| "learning_rate": 8.432148009234284e-05, |
| "loss": 0.6614, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0024626060254087103, |
| "grad_norm": 2.544095277786255, |
| "learning_rate": 8.408770814277663e-05, |
| "loss": 0.7293, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.002465929380368776, |
| "grad_norm": 2.0442986488342285, |
| "learning_rate": 8.385402537792621e-05, |
| "loss": 0.797, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.002469252735328842, |
| "grad_norm": 1.888458251953125, |
| "learning_rate": 8.362043310752943e-05, |
| "loss": 0.8964, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.0024725760902889076, |
| "grad_norm": 1.9157485961914062, |
| "learning_rate": 8.338693264081697e-05, |
| "loss": 0.8633, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.0024758994452489733, |
| "grad_norm": 3.2599165439605713, |
| "learning_rate": 8.315352528650495e-05, |
| "loss": 1.0014, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.002479222800209039, |
| "grad_norm": 2.07326340675354, |
| "learning_rate": 8.292021235278753e-05, |
| "loss": 0.8836, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.002482546155169105, |
| "grad_norm": 2.074749231338501, |
| "learning_rate": 8.268699514732979e-05, |
| "loss": 1.0107, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.0024858695101291706, |
| "grad_norm": 2.413445472717285, |
| "learning_rate": 8.245387497726027e-05, |
| "loss": 1.0935, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.0024891928650892362, |
| "grad_norm": 2.770019769668579, |
| "learning_rate": 8.222085314916355e-05, |
| "loss": 0.6449, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.002492516220049302, |
| "grad_norm": 1.7852829694747925, |
| "learning_rate": 8.198793096907309e-05, |
| "loss": 0.658, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0024958395750093675, |
| "grad_norm": 1.9140889644622803, |
| "learning_rate": 8.175510974246391e-05, |
| "loss": 0.7751, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.0024991629299694336, |
| "grad_norm": 1.8890185356140137, |
| "learning_rate": 8.152239077424507e-05, |
| "loss": 0.854, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.002502486284929499, |
| "grad_norm": 2.610456943511963, |
| "learning_rate": 8.12897753687526e-05, |
| "loss": 0.6235, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.002505809639889565, |
| "grad_norm": 1.5817598104476929, |
| "learning_rate": 8.105726482974199e-05, |
| "loss": 0.6668, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.0025091329948496305, |
| "grad_norm": 2.368863344192505, |
| "learning_rate": 8.082486046038111e-05, |
| "loss": 0.8256, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.0025124563498096966, |
| "grad_norm": 2.1765031814575195, |
| "learning_rate": 8.059256356324268e-05, |
| "loss": 0.908, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.002515779704769762, |
| "grad_norm": 1.7256314754486084, |
| "learning_rate": 8.036037544029709e-05, |
| "loss": 0.577, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.002519103059729828, |
| "grad_norm": 1.280367136001587, |
| "learning_rate": 8.012829739290508e-05, |
| "loss": 0.6491, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.0025224264146898935, |
| "grad_norm": 1.6887412071228027, |
| "learning_rate": 7.989633072181037e-05, |
| "loss": 1.1735, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0025257497696499595, |
| "grad_norm": 2.1677637100219727, |
| "learning_rate": 7.966447672713254e-05, |
| "loss": 0.9427, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.002529073124610025, |
| "grad_norm": 2.13200044631958, |
| "learning_rate": 7.943273670835966e-05, |
| "loss": 0.5779, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.002532396479570091, |
| "grad_norm": 1.6203086376190186, |
| "learning_rate": 7.920111196434085e-05, |
| "loss": 0.8072, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.0025357198345301564, |
| "grad_norm": 3.0133414268493652, |
| "learning_rate": 7.896960379327934e-05, |
| "loss": 0.7679, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.002539043189490222, |
| "grad_norm": 1.7745375633239746, |
| "learning_rate": 7.873821349272478e-05, |
| "loss": 0.8275, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.002542366544450288, |
| "grad_norm": 2.3216776847839355, |
| "learning_rate": 7.850694235956633e-05, |
| "loss": 0.6882, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.0025456898994103538, |
| "grad_norm": 1.5814892053604126, |
| "learning_rate": 7.827579169002524e-05, |
| "loss": 0.8818, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.0025490132543704194, |
| "grad_norm": 2.209172487258911, |
| "learning_rate": 7.80447627796475e-05, |
| "loss": 0.5781, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.002552336609330485, |
| "grad_norm": 2.160571813583374, |
| "learning_rate": 7.781385692329675e-05, |
| "loss": 0.8048, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.002555659964290551, |
| "grad_norm": 2.0445640087127686, |
| "learning_rate": 7.758307541514695e-05, |
| "loss": 1.2058, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.0025589833192506167, |
| "grad_norm": 2.0943262577056885, |
| "learning_rate": 7.7352419548675e-05, |
| "loss": 1.0538, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0025623066742106824, |
| "grad_norm": 4.018516540527344, |
| "learning_rate": 7.712189061665375e-05, |
| "loss": 0.4696, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.002565630029170748, |
| "grad_norm": 1.7685644626617432, |
| "learning_rate": 7.68914899111445e-05, |
| "loss": 1.128, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.002568953384130814, |
| "grad_norm": 1.4300886392593384, |
| "learning_rate": 7.666121872348995e-05, |
| "loss": 0.8966, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.0025722767390908797, |
| "grad_norm": 1.8490028381347656, |
| "learning_rate": 7.643107834430686e-05, |
| "loss": 0.9422, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.0025756000940509454, |
| "grad_norm": 1.758457064628601, |
| "learning_rate": 7.620107006347883e-05, |
| "loss": 1.0323, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.002578923449011011, |
| "grad_norm": 2.2684803009033203, |
| "learning_rate": 7.597119517014905e-05, |
| "loss": 0.7242, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.0025822468039710766, |
| "grad_norm": 1.4495296478271484, |
| "learning_rate": 7.574145495271315e-05, |
| "loss": 0.9589, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.0025855701589311427, |
| "grad_norm": 1.9905943870544434, |
| "learning_rate": 7.55118506988119e-05, |
| "loss": 0.7841, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.0025888935138912083, |
| "grad_norm": 1.9174848794937134, |
| "learning_rate": 7.52823836953241e-05, |
| "loss": 0.5076, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.002592216868851274, |
| "grad_norm": 1.315011978149414, |
| "learning_rate": 7.505305522835916e-05, |
| "loss": 0.3087, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0025955402238113396, |
| "grad_norm": 2.5037121772766113, |
| "learning_rate": 7.482386658325018e-05, |
| "loss": 1.1691, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.0025988635787714057, |
| "grad_norm": 1.9181026220321655, |
| "learning_rate": 7.459481904454642e-05, |
| "loss": 0.6995, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.0026021869337314713, |
| "grad_norm": 1.811360478401184, |
| "learning_rate": 7.43659138960064e-05, |
| "loss": 0.9057, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.002605510288691537, |
| "grad_norm": 3.2156643867492676, |
| "learning_rate": 7.413715242059058e-05, |
| "loss": 0.6813, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.0026088336436516026, |
| "grad_norm": 2.809575080871582, |
| "learning_rate": 7.390853590045406e-05, |
| "loss": 0.8422, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.0026121569986116686, |
| "grad_norm": 3.667475461959839, |
| "learning_rate": 7.368006561693959e-05, |
| "loss": 0.9714, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.0026154803535717343, |
| "grad_norm": 1.7948848009109497, |
| "learning_rate": 7.345174285057032e-05, |
| "loss": 0.7066, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.0026188037085318, |
| "grad_norm": 2.3704257011413574, |
| "learning_rate": 7.322356888104247e-05, |
| "loss": 0.7432, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.0026221270634918655, |
| "grad_norm": 1.6095038652420044, |
| "learning_rate": 7.299554498721839e-05, |
| "loss": 1.0732, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.0026254504184519316, |
| "grad_norm": 1.8659001588821411, |
| "learning_rate": 7.276767244711929e-05, |
| "loss": 1.3812, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0026287737734119973, |
| "grad_norm": 2.7053465843200684, |
| "learning_rate": 7.253995253791803e-05, |
| "loss": 0.8775, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.002632097128372063, |
| "grad_norm": 1.9934276342391968, |
| "learning_rate": 7.231238653593208e-05, |
| "loss": 1.1197, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.0026354204833321285, |
| "grad_norm": 2.0284664630889893, |
| "learning_rate": 7.208497571661625e-05, |
| "loss": 0.9916, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.002638743838292194, |
| "grad_norm": 2.268935203552246, |
| "learning_rate": 7.185772135455553e-05, |
| "loss": 0.9107, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.0026420671932522602, |
| "grad_norm": 1.5755196809768677, |
| "learning_rate": 7.163062472345807e-05, |
| "loss": 0.8286, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.002645390548212326, |
| "grad_norm": 2.701737403869629, |
| "learning_rate": 7.140368709614804e-05, |
| "loss": 0.6317, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.0026487139031723915, |
| "grad_norm": 1.6806485652923584, |
| "learning_rate": 7.117690974455828e-05, |
| "loss": 1.0092, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.002652037258132457, |
| "grad_norm": 2.387301445007324, |
| "learning_rate": 7.095029393972341e-05, |
| "loss": 0.9981, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.002655360613092523, |
| "grad_norm": 2.0945944786071777, |
| "learning_rate": 7.072384095177269e-05, |
| "loss": 0.8833, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.002658683968052589, |
| "grad_norm": 2.2880685329437256, |
| "learning_rate": 7.049755204992262e-05, |
| "loss": 0.6492, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0026620073230126545, |
| "grad_norm": 2.2428207397460938, |
| "learning_rate": 7.027142850247023e-05, |
| "loss": 1.0001, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.00266533067797272, |
| "grad_norm": 2.195579767227173, |
| "learning_rate": 7.00454715767857e-05, |
| "loss": 0.8971, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.002668654032932786, |
| "grad_norm": 2.64233660697937, |
| "learning_rate": 6.981968253930532e-05, |
| "loss": 0.9304, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.002671977387892852, |
| "grad_norm": 1.573728322982788, |
| "learning_rate": 6.959406265552446e-05, |
| "loss": 0.7636, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.0026753007428529174, |
| "grad_norm": 2.323046922683716, |
| "learning_rate": 6.936861318999039e-05, |
| "loss": 0.8272, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.002678624097812983, |
| "grad_norm": 1.2111873626708984, |
| "learning_rate": 6.914333540629521e-05, |
| "loss": 0.8412, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.0026819474527730487, |
| "grad_norm": 2.6255946159362793, |
| "learning_rate": 6.891823056706877e-05, |
| "loss": 1.2073, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.002685270807733115, |
| "grad_norm": 2.2799253463745117, |
| "learning_rate": 6.869329993397165e-05, |
| "loss": 1.0982, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.0026885941626931804, |
| "grad_norm": 2.6791117191314697, |
| "learning_rate": 6.846854476768804e-05, |
| "loss": 0.8172, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.002691917517653246, |
| "grad_norm": 1.7381466627120972, |
| "learning_rate": 6.824396632791867e-05, |
| "loss": 1.0518, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0026952408726133117, |
| "grad_norm": 2.056283712387085, |
| "learning_rate": 6.801956587337378e-05, |
| "loss": 0.7701, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.0026985642275733778, |
| "grad_norm": 2.5469250679016113, |
| "learning_rate": 6.779534466176595e-05, |
| "loss": 0.77, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.0027018875825334434, |
| "grad_norm": 1.608278512954712, |
| "learning_rate": 6.757130394980324e-05, |
| "loss": 1.0236, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.002705210937493509, |
| "grad_norm": 2.222339153289795, |
| "learning_rate": 6.734744499318209e-05, |
| "loss": 0.7726, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.0027085342924535747, |
| "grad_norm": 1.7149579524993896, |
| "learning_rate": 6.712376904658008e-05, |
| "loss": 0.8896, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.0027118576474136407, |
| "grad_norm": 1.6997548341751099, |
| "learning_rate": 6.690027736364922e-05, |
| "loss": 0.9059, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.0027151810023737064, |
| "grad_norm": 2.0666792392730713, |
| "learning_rate": 6.667697119700876e-05, |
| "loss": 0.9987, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.002718504357333772, |
| "grad_norm": 2.7630252838134766, |
| "learning_rate": 6.645385179823798e-05, |
| "loss": 0.8782, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.0027218277122938376, |
| "grad_norm": 1.464687466621399, |
| "learning_rate": 6.623092041786963e-05, |
| "loss": 0.7638, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0027251510672539033, |
| "grad_norm": 2.7846906185150146, |
| "learning_rate": 6.600817830538244e-05, |
| "loss": 1.2357, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0027284744222139693, |
| "grad_norm": 1.8688273429870605, |
| "learning_rate": 6.578562670919453e-05, |
| "loss": 0.7761, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.002731797777174035, |
| "grad_norm": 2.552701473236084, |
| "learning_rate": 6.556326687665608e-05, |
| "loss": 0.8297, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.0027351211321341006, |
| "grad_norm": 2.456305503845215, |
| "learning_rate": 6.534110005404255e-05, |
| "loss": 0.7151, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.0027384444870941663, |
| "grad_norm": 2.453496217727661, |
| "learning_rate": 6.511912748654759e-05, |
| "loss": 0.59, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.0027417678420542323, |
| "grad_norm": 1.7119109630584717, |
| "learning_rate": 6.489735041827605e-05, |
| "loss": 1.0155, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.002745091197014298, |
| "grad_norm": 1.677862524986267, |
| "learning_rate": 6.467577009223717e-05, |
| "loss": 0.7779, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.0027484145519743636, |
| "grad_norm": 2.11332106590271, |
| "learning_rate": 6.445438775033743e-05, |
| "loss": 1.1967, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.0027517379069344292, |
| "grad_norm": 1.6521720886230469, |
| "learning_rate": 6.423320463337363e-05, |
| "loss": 0.9303, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.0027550612618944953, |
| "grad_norm": 1.7343634366989136, |
| "learning_rate": 6.401222198102603e-05, |
| "loss": 1.0159, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.002758384616854561, |
| "grad_norm": 1.9725018739700317, |
| "learning_rate": 6.379144103185123e-05, |
| "loss": 0.7249, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0027617079718146266, |
| "grad_norm": 1.848870873451233, |
| "learning_rate": 6.357086302327542e-05, |
| "loss": 0.8119, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.002765031326774692, |
| "grad_norm": 2.3503406047821045, |
| "learning_rate": 6.335048919158733e-05, |
| "loss": 0.6752, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.0027683546817347583, |
| "grad_norm": 1.880751132965088, |
| "learning_rate": 6.31303207719313e-05, |
| "loss": 0.8967, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.002771678036694824, |
| "grad_norm": 2.5321950912475586, |
| "learning_rate": 6.291035899830043e-05, |
| "loss": 1.4429, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.0027750013916548895, |
| "grad_norm": 2.1685030460357666, |
| "learning_rate": 6.26906051035296e-05, |
| "loss": 0.8103, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.002778324746614955, |
| "grad_norm": 2.246095895767212, |
| "learning_rate": 6.247106031928854e-05, |
| "loss": 0.9691, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.002781648101575021, |
| "grad_norm": 2.2341558933258057, |
| "learning_rate": 6.225172587607496e-05, |
| "loss": 0.9346, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.002784971456535087, |
| "grad_norm": 1.7885407209396362, |
| "learning_rate": 6.203260300320773e-05, |
| "loss": 0.7622, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.0027882948114951525, |
| "grad_norm": 2.0753839015960693, |
| "learning_rate": 6.181369292881987e-05, |
| "loss": 0.9163, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.002791618166455218, |
| "grad_norm": 2.8468055725097656, |
| "learning_rate": 6.159499687985175e-05, |
| "loss": 1.0923, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.002794941521415284, |
| "grad_norm": 1.672538161277771, |
| "learning_rate": 6.137651608204411e-05, |
| "loss": 0.8471, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.00279826487637535, |
| "grad_norm": 1.954453945159912, |
| "learning_rate": 6.115825175993129e-05, |
| "loss": 1.0673, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.0028015882313354155, |
| "grad_norm": 2.2643611431121826, |
| "learning_rate": 6.0940205136834314e-05, |
| "loss": 0.8641, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.002804911586295481, |
| "grad_norm": 2.73639178276062, |
| "learning_rate": 6.0722377434854115e-05, |
| "loss": 0.3082, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.0028082349412555468, |
| "grad_norm": 1.2507221698760986, |
| "learning_rate": 6.0504769874864606e-05, |
| "loss": 0.6386, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.002811558296215613, |
| "grad_norm": 2.7501723766326904, |
| "learning_rate": 6.0287383676505796e-05, |
| "loss": 0.675, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.0028148816511756785, |
| "grad_norm": 2.5215868949890137, |
| "learning_rate": 6.0070220058177083e-05, |
| "loss": 1.4006, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.002818205006135744, |
| "grad_norm": 2.644489288330078, |
| "learning_rate": 5.985328023703026e-05, |
| "loss": 1.0212, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.0028215283610958097, |
| "grad_norm": 2.0919172763824463, |
| "learning_rate": 5.963656542896292e-05, |
| "loss": 0.9604, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.0028248517160558754, |
| "grad_norm": 1.9078031778335571, |
| "learning_rate": 5.942007684861141e-05, |
| "loss": 0.6867, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0028281750710159414, |
| "grad_norm": 1.341930866241455, |
| "learning_rate": 5.920381570934415e-05, |
| "loss": 0.4825, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.002831498425976007, |
| "grad_norm": 1.4135596752166748, |
| "learning_rate": 5.898778322325482e-05, |
| "loss": 0.9042, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.0028348217809360727, |
| "grad_norm": 3.0637757778167725, |
| "learning_rate": 5.8771980601155584e-05, |
| "loss": 0.8, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.0028381451358961383, |
| "grad_norm": 1.1350816488265991, |
| "learning_rate": 5.8556409052570184e-05, |
| "loss": 0.6352, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.0028414684908562044, |
| "grad_norm": 2.974297523498535, |
| "learning_rate": 5.834106978572726e-05, |
| "loss": 0.953, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.00284479184581627, |
| "grad_norm": 1.6131561994552612, |
| "learning_rate": 5.812596400755368e-05, |
| "loss": 1.1661, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.0028481152007763357, |
| "grad_norm": 1.42369544506073, |
| "learning_rate": 5.791109292366749e-05, |
| "loss": 1.1922, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.0028514385557364013, |
| "grad_norm": 2.268709421157837, |
| "learning_rate": 5.769645773837156e-05, |
| "loss": 0.7496, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.0028547619106964674, |
| "grad_norm": 1.760610580444336, |
| "learning_rate": 5.7482059654646304e-05, |
| "loss": 0.8179, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.002858085265656533, |
| "grad_norm": 1.400349736213684, |
| "learning_rate": 5.7267899874143495e-05, |
| "loss": 0.9362, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0028614086206165987, |
| "grad_norm": 1.8297139406204224, |
| "learning_rate": 5.7053979597179175e-05, |
| "loss": 0.8381, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.0028647319755766643, |
| "grad_norm": 1.8903241157531738, |
| "learning_rate": 5.684030002272694e-05, |
| "loss": 1.1997, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.00286805533053673, |
| "grad_norm": 1.5943444967269897, |
| "learning_rate": 5.66268623484115e-05, |
| "loss": 0.7329, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.002871378685496796, |
| "grad_norm": 1.3991549015045166, |
| "learning_rate": 5.641366777050159e-05, |
| "loss": 1.0264, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.0028747020404568616, |
| "grad_norm": 3.293590784072876, |
| "learning_rate": 5.6200717483903545e-05, |
| "loss": 1.2638, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.0028780253954169273, |
| "grad_norm": 2.305203914642334, |
| "learning_rate": 5.598801268215443e-05, |
| "loss": 0.7039, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.002881348750376993, |
| "grad_norm": 1.9197697639465332, |
| "learning_rate": 5.5775554557415465e-05, |
| "loss": 0.7887, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.002884672105337059, |
| "grad_norm": 1.9447755813598633, |
| "learning_rate": 5.556334430046537e-05, |
| "loss": 0.9385, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.0028879954602971246, |
| "grad_norm": 2.6985504627227783, |
| "learning_rate": 5.53513831006935e-05, |
| "loss": 0.5904, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.0028913188152571902, |
| "grad_norm": 1.594799518585205, |
| "learning_rate": 5.5139672146093376e-05, |
| "loss": 1.0022, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.002894642170217256, |
| "grad_norm": 2.485776901245117, |
| "learning_rate": 5.492821262325595e-05, |
| "loss": 0.8321, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.002897965525177322, |
| "grad_norm": 2.4602620601654053, |
| "learning_rate": 5.471700571736287e-05, |
| "loss": 0.8259, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.0029012888801373876, |
| "grad_norm": 1.3913767337799072, |
| "learning_rate": 5.450605261218009e-05, |
| "loss": 0.9241, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.002904612235097453, |
| "grad_norm": 2.930417537689209, |
| "learning_rate": 5.429535449005096e-05, |
| "loss": 0.68, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.002907935590057519, |
| "grad_norm": 1.7724130153656006, |
| "learning_rate": 5.408491253188965e-05, |
| "loss": 0.719, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.0029112589450175845, |
| "grad_norm": 1.5435396432876587, |
| "learning_rate": 5.3874727917174805e-05, |
| "loss": 0.8378, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.0029145822999776506, |
| "grad_norm": 2.9708385467529297, |
| "learning_rate": 5.366480182394242e-05, |
| "loss": 0.583, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.002917905654937716, |
| "grad_norm": 2.3581833839416504, |
| "learning_rate": 5.3455135428779826e-05, |
| "loss": 0.8518, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.002921229009897782, |
| "grad_norm": 1.5053647756576538, |
| "learning_rate": 5.324572990681862e-05, |
| "loss": 0.5481, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.0029245523648578475, |
| "grad_norm": 2.0453686714172363, |
| "learning_rate": 5.303658643172828e-05, |
| "loss": 1.4061, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.0029278757198179135, |
| "grad_norm": 2.7578492164611816, |
| "learning_rate": 5.282770617570973e-05, |
| "loss": 0.9779, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.002931199074777979, |
| "grad_norm": 2.1508371829986572, |
| "learning_rate": 5.2619090309488416e-05, |
| "loss": 1.0649, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.002934522429738045, |
| "grad_norm": 2.0045487880706787, |
| "learning_rate": 5.2410740002308035e-05, |
| "loss": 0.8897, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.0029378457846981104, |
| "grad_norm": 3.2993271350860596, |
| "learning_rate": 5.2202656421923876e-05, |
| "loss": 1.004, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.0029411691396581765, |
| "grad_norm": 2.9482834339141846, |
| "learning_rate": 5.1994840734596264e-05, |
| "loss": 0.9076, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.002944492494618242, |
| "grad_norm": 1.955139398574829, |
| "learning_rate": 5.1787294105084095e-05, |
| "loss": 0.9145, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.0029478158495783078, |
| "grad_norm": 2.491614580154419, |
| "learning_rate": 5.1580017696638226e-05, |
| "loss": 0.662, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.0029511392045383734, |
| "grad_norm": 2.719644069671631, |
| "learning_rate": 5.137301267099498e-05, |
| "loss": 1.1147, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.0029544625594984395, |
| "grad_norm": 2.648268699645996, |
| "learning_rate": 5.1166280188369655e-05, |
| "loss": 0.6992, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.002957785914458505, |
| "grad_norm": 1.7123095989227295, |
| "learning_rate": 5.095982140744995e-05, |
| "loss": 0.8088, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0029611092694185707, |
| "grad_norm": 1.9856046438217163, |
| "learning_rate": 5.0753637485389685e-05, |
| "loss": 0.7292, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.0029644326243786364, |
| "grad_norm": 1.634596586227417, |
| "learning_rate": 5.0547729577802004e-05, |
| "loss": 1.0181, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.002967755979338702, |
| "grad_norm": 2.8171486854553223, |
| "learning_rate": 5.034209883875307e-05, |
| "loss": 1.1075, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.002971079334298768, |
| "grad_norm": 1.6797412633895874, |
| "learning_rate": 5.013674642075573e-05, |
| "loss": 0.9165, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.0029744026892588337, |
| "grad_norm": 1.9247677326202393, |
| "learning_rate": 4.9931673474762666e-05, |
| "loss": 1.1835, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.0029777260442188994, |
| "grad_norm": 2.5648396015167236, |
| "learning_rate": 4.972688115016039e-05, |
| "loss": 0.8235, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.002981049399178965, |
| "grad_norm": 3.0656604766845703, |
| "learning_rate": 4.952237059476251e-05, |
| "loss": 1.1572, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.002984372754139031, |
| "grad_norm": 1.6321872472763062, |
| "learning_rate": 4.931814295480335e-05, |
| "loss": 1.1099, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.0029876961090990967, |
| "grad_norm": 1.9796544313430786, |
| "learning_rate": 4.9114199374931655e-05, |
| "loss": 0.9264, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.0029910194640591623, |
| "grad_norm": 2.6657955646514893, |
| "learning_rate": 4.891054099820406e-05, |
| "loss": 0.8, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.002994342819019228, |
| "grad_norm": 2.9986019134521484, |
| "learning_rate": 4.870716896607851e-05, |
| "loss": 1.0711, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.002997666173979294, |
| "grad_norm": 2.200094223022461, |
| "learning_rate": 4.8504084418408355e-05, |
| "loss": 0.8344, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.0030009895289393597, |
| "grad_norm": 3.1483426094055176, |
| "learning_rate": 4.830128849343542e-05, |
| "loss": 0.9605, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.0030043128838994253, |
| "grad_norm": 2.294978141784668, |
| "learning_rate": 4.809878232778406e-05, |
| "loss": 0.6189, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.003007636238859491, |
| "grad_norm": 1.5292420387268066, |
| "learning_rate": 4.789656705645447e-05, |
| "loss": 0.872, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.0030109595938195566, |
| "grad_norm": 2.268393039703369, |
| "learning_rate": 4.769464381281643e-05, |
| "loss": 0.9634, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.0030142829487796226, |
| "grad_norm": 2.076624631881714, |
| "learning_rate": 4.7493013728603074e-05, |
| "loss": 1.0022, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.0030176063037396883, |
| "grad_norm": 2.2546167373657227, |
| "learning_rate": 4.729167793390435e-05, |
| "loss": 0.9459, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.003020929658699754, |
| "grad_norm": 1.8075573444366455, |
| "learning_rate": 4.709063755716083e-05, |
| "loss": 1.3215, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.0030242530136598196, |
| "grad_norm": 1.9737260341644287, |
| "learning_rate": 4.6889893725157375e-05, |
| "loss": 0.8568, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0030275763686198856, |
| "grad_norm": 3.899272918701172, |
| "learning_rate": 4.668944756301666e-05, |
| "loss": 0.973, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.0030308997235799513, |
| "grad_norm": 1.8497694730758667, |
| "learning_rate": 4.648930019419321e-05, |
| "loss": 1.0238, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.003034223078540017, |
| "grad_norm": 1.7761645317077637, |
| "learning_rate": 4.628945274046659e-05, |
| "loss": 1.1853, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.0030375464335000825, |
| "grad_norm": 2.692018985748291, |
| "learning_rate": 4.608990632193557e-05, |
| "loss": 0.8397, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.0030408697884601486, |
| "grad_norm": 2.3758668899536133, |
| "learning_rate": 4.589066205701177e-05, |
| "loss": 1.1698, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.0030441931434202142, |
| "grad_norm": 1.9619114398956299, |
| "learning_rate": 4.569172106241312e-05, |
| "loss": 0.8451, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.00304751649838028, |
| "grad_norm": 1.6542222499847412, |
| "learning_rate": 4.5493084453157954e-05, |
| "loss": 0.6891, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.0030508398533403455, |
| "grad_norm": 2.2412753105163574, |
| "learning_rate": 4.529475334255855e-05, |
| "loss": 0.7928, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.003054163208300411, |
| "grad_norm": 1.8066473007202148, |
| "learning_rate": 4.5096728842214795e-05, |
| "loss": 0.9513, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.003057486563260477, |
| "grad_norm": 1.6183894872665405, |
| "learning_rate": 4.489901206200832e-05, |
| "loss": 0.8679, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.003060809918220543, |
| "grad_norm": 1.7738823890686035, |
| "learning_rate": 4.4701604110095916e-05, |
| "loss": 1.0069, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.0030641332731806085, |
| "grad_norm": 1.9727370738983154, |
| "learning_rate": 4.450450609290347e-05, |
| "loss": 0.5319, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.003067456628140674, |
| "grad_norm": 1.3664517402648926, |
| "learning_rate": 4.430771911511986e-05, |
| "loss": 0.2944, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.00307077998310074, |
| "grad_norm": 2.3164656162261963, |
| "learning_rate": 4.4111244279690536e-05, |
| "loss": 1.3496, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.003074103338060806, |
| "grad_norm": 1.6333527565002441, |
| "learning_rate": 4.3915082687811515e-05, |
| "loss": 0.7681, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.0030774266930208715, |
| "grad_norm": 2.8815577030181885, |
| "learning_rate": 4.371923543892316e-05, |
| "loss": 1.0568, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.003080750047980937, |
| "grad_norm": 1.6626960039138794, |
| "learning_rate": 4.352370363070396e-05, |
| "loss": 0.7345, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.003084073402941003, |
| "grad_norm": 2.0292208194732666, |
| "learning_rate": 4.332848835906457e-05, |
| "loss": 0.5221, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.003087396757901069, |
| "grad_norm": 2.0600383281707764, |
| "learning_rate": 4.313359071814137e-05, |
| "loss": 0.7379, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.0030907201128611344, |
| "grad_norm": 2.089693069458008, |
| "learning_rate": 4.293901180029059e-05, |
| "loss": 0.7173, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0030940434678212, |
| "grad_norm": 2.7445740699768066, |
| "learning_rate": 4.274475269608208e-05, |
| "loss": 1.1091, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.003097366822781266, |
| "grad_norm": 2.0086019039154053, |
| "learning_rate": 4.2550814494293114e-05, |
| "loss": 1.3061, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.0031006901777413318, |
| "grad_norm": 3.048259735107422, |
| "learning_rate": 4.2357198281902556e-05, |
| "loss": 0.9055, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.0031040135327013974, |
| "grad_norm": 2.275136947631836, |
| "learning_rate": 4.2163905144084456e-05, |
| "loss": 0.7963, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.003107336887661463, |
| "grad_norm": 1.90250563621521, |
| "learning_rate": 4.197093616420212e-05, |
| "loss": 0.906, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.0031106602426215287, |
| "grad_norm": 1.2324166297912598, |
| "learning_rate": 4.1778292423802165e-05, |
| "loss": 1.0246, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.0031139835975815947, |
| "grad_norm": 1.7340452671051025, |
| "learning_rate": 4.158597500260804e-05, |
| "loss": 1.2704, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.0031173069525416604, |
| "grad_norm": 1.7009319067001343, |
| "learning_rate": 4.139398497851453e-05, |
| "loss": 0.7377, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.003120630307501726, |
| "grad_norm": 1.9392496347427368, |
| "learning_rate": 4.120232342758128e-05, |
| "loss": 0.8505, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0031239536624617916, |
| "grad_norm": 1.9137873649597168, |
| "learning_rate": 4.1010991424026936e-05, |
| "loss": 0.854, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0031272770174218577, |
| "grad_norm": 2.3392927646636963, |
| "learning_rate": 4.081999004022317e-05, |
| "loss": 0.7184, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.0031306003723819233, |
| "grad_norm": 2.563201904296875, |
| "learning_rate": 4.062932034668856e-05, |
| "loss": 1.0486, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.003133923727341989, |
| "grad_norm": 3.3593509197235107, |
| "learning_rate": 4.0438983412082624e-05, |
| "loss": 0.8378, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.0031372470823020546, |
| "grad_norm": 2.170193672180176, |
| "learning_rate": 4.0248980303199854e-05, |
| "loss": 0.7698, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.0031405704372621207, |
| "grad_norm": 1.4478988647460938, |
| "learning_rate": 4.005931208496373e-05, |
| "loss": 0.8382, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.0031438937922221863, |
| "grad_norm": 1.5125523805618286, |
| "learning_rate": 3.986997982042079e-05, |
| "loss": 0.3141, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.003147217147182252, |
| "grad_norm": 2.468640089035034, |
| "learning_rate": 3.968098457073456e-05, |
| "loss": 0.7174, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.0031505405021423176, |
| "grad_norm": 1.7223023176193237, |
| "learning_rate": 3.9492327395179707e-05, |
| "loss": 0.9229, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.0031538638571023832, |
| "grad_norm": 1.9831416606903076, |
| "learning_rate": 3.930400935113607e-05, |
| "loss": 0.6094, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.0031571872120624493, |
| "grad_norm": 3.7181596755981445, |
| "learning_rate": 3.9116031494082715e-05, |
| "loss": 0.4995, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.003160510567022515, |
| "grad_norm": 2.800658941268921, |
| "learning_rate": 3.89283948775921e-05, |
| "loss": 1.1526, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.0031638339219825806, |
| "grad_norm": 2.679004430770874, |
| "learning_rate": 3.8741100553324036e-05, |
| "loss": 1.1475, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.003167157276942646, |
| "grad_norm": 1.9589389562606812, |
| "learning_rate": 3.855414957101987e-05, |
| "loss": 0.8868, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.0031704806319027123, |
| "grad_norm": 3.259084701538086, |
| "learning_rate": 3.8367542978496726e-05, |
| "loss": 0.7888, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.003173803986862778, |
| "grad_norm": 1.8688689470291138, |
| "learning_rate": 3.8181281821641245e-05, |
| "loss": 0.868, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.0031771273418228435, |
| "grad_norm": 1.5200146436691284, |
| "learning_rate": 3.799536714440426e-05, |
| "loss": 0.8957, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.003180450696782909, |
| "grad_norm": 2.1697998046875, |
| "learning_rate": 3.780979998879448e-05, |
| "loss": 0.719, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.0031837740517429752, |
| "grad_norm": 2.2164082527160645, |
| "learning_rate": 3.762458139487287e-05, |
| "loss": 0.9671, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.003187097406703041, |
| "grad_norm": 1.7509944438934326, |
| "learning_rate": 3.743971240074689e-05, |
| "loss": 0.4619, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.0031904207616631065, |
| "grad_norm": 2.366628646850586, |
| "learning_rate": 3.725519404256447e-05, |
| "loss": 1.1947, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.003193744116623172, |
| "grad_norm": 1.7423028945922852, |
| "learning_rate": 3.707102735450831e-05, |
| "loss": 0.4799, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.003197067471583238, |
| "grad_norm": 3.3096704483032227, |
| "learning_rate": 3.688721336879012e-05, |
| "loss": 0.9332, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.003200390826543304, |
| "grad_norm": 2.0674052238464355, |
| "learning_rate": 3.6703753115644735e-05, |
| "loss": 0.7861, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.0032037141815033695, |
| "grad_norm": 2.0808584690093994, |
| "learning_rate": 3.6520647623324525e-05, |
| "loss": 0.5346, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.003207037536463435, |
| "grad_norm": 2.2569382190704346, |
| "learning_rate": 3.633789791809339e-05, |
| "loss": 0.7376, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.0032103608914235008, |
| "grad_norm": 2.037261724472046, |
| "learning_rate": 3.615550502422115e-05, |
| "loss": 0.9662, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.003213684246383567, |
| "grad_norm": 1.5911048650741577, |
| "learning_rate": 3.5973469963977805e-05, |
| "loss": 0.5487, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.0032170076013436325, |
| "grad_norm": 1.9614652395248413, |
| "learning_rate": 3.579179375762773e-05, |
| "loss": 1.0662, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.003220330956303698, |
| "grad_norm": 2.467716932296753, |
| "learning_rate": 3.5610477423424124e-05, |
| "loss": 0.6805, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.0032236543112637637, |
| "grad_norm": 2.4678971767425537, |
| "learning_rate": 3.542952197760305e-05, |
| "loss": 0.6498, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.00322697766622383, |
| "grad_norm": 1.838238000869751, |
| "learning_rate": 3.524892843437793e-05, |
| "loss": 0.8032, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.0032303010211838954, |
| "grad_norm": 4.720175743103027, |
| "learning_rate": 3.506869780593387e-05, |
| "loss": 0.6131, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.003233624376143961, |
| "grad_norm": 2.0438411235809326, |
| "learning_rate": 3.488883110242175e-05, |
| "loss": 1.0154, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.0032369477311040267, |
| "grad_norm": 1.9405808448791504, |
| "learning_rate": 3.4709329331952946e-05, |
| "loss": 0.8456, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.0032402710860640923, |
| "grad_norm": 1.3455164432525635, |
| "learning_rate": 3.453019350059333e-05, |
| "loss": 0.6341, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.0032435944410241584, |
| "grad_norm": 2.457744836807251, |
| "learning_rate": 3.435142461235778e-05, |
| "loss": 0.6841, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.003246917795984224, |
| "grad_norm": 2.1893134117126465, |
| "learning_rate": 3.417302366920465e-05, |
| "loss": 1.0017, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.0032502411509442897, |
| "grad_norm": 2.831120491027832, |
| "learning_rate": 3.399499167102985e-05, |
| "loss": 0.842, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.0032535645059043553, |
| "grad_norm": 1.5025831460952759, |
| "learning_rate": 3.381732961566166e-05, |
| "loss": 0.67, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0032568878608644214, |
| "grad_norm": 1.6793795824050903, |
| "learning_rate": 3.364003849885476e-05, |
| "loss": 0.8812, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.003260211215824487, |
| "grad_norm": 1.9502850770950317, |
| "learning_rate": 3.346311931428485e-05, |
| "loss": 0.8764, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.0032635345707845527, |
| "grad_norm": 1.9033828973770142, |
| "learning_rate": 3.32865730535431e-05, |
| "loss": 0.8055, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.0032668579257446183, |
| "grad_norm": 2.4927141666412354, |
| "learning_rate": 3.3110400706130427e-05, |
| "loss": 0.5764, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.0032701812807046844, |
| "grad_norm": 1.6894636154174805, |
| "learning_rate": 3.2934603259452104e-05, |
| "loss": 0.9428, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.00327350463566475, |
| "grad_norm": 2.994685649871826, |
| "learning_rate": 3.275918169881216e-05, |
| "loss": 0.6822, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.0032768279906248156, |
| "grad_norm": 2.1028683185577393, |
| "learning_rate": 3.258413700740783e-05, |
| "loss": 0.5847, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.0032801513455848813, |
| "grad_norm": 2.150242328643799, |
| "learning_rate": 3.2409470166324216e-05, |
| "loss": 0.8227, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.0032834747005449473, |
| "grad_norm": 1.834642767906189, |
| "learning_rate": 3.223518215452852e-05, |
| "loss": 0.5538, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.003286798055505013, |
| "grad_norm": 2.7726798057556152, |
| "learning_rate": 3.2061273948864736e-05, |
| "loss": 1.0142, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.0032901214104650786, |
| "grad_norm": 1.5941482782363892, |
| "learning_rate": 3.188774652404813e-05, |
| "loss": 1.2037, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0032934447654251442, |
| "grad_norm": 1.4523831605911255, |
| "learning_rate": 3.171460085265978e-05, |
| "loss": 0.8859, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.00329676812038521, |
| "grad_norm": 2.7575342655181885, |
| "learning_rate": 3.154183790514117e-05, |
| "loss": 0.9737, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.003300091475345276, |
| "grad_norm": 1.9906915426254272, |
| "learning_rate": 3.1369458649788644e-05, |
| "loss": 0.6769, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.0033034148303053416, |
| "grad_norm": 2.669388771057129, |
| "learning_rate": 3.1197464052748024e-05, |
| "loss": 1.038, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.0033067381852654072, |
| "grad_norm": 2.2965517044067383, |
| "learning_rate": 3.102585507800936e-05, |
| "loss": 0.5346, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.003310061540225473, |
| "grad_norm": 1.4986422061920166, |
| "learning_rate": 3.0854632687401154e-05, |
| "loss": 1.1781, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.003313384895185539, |
| "grad_norm": 2.397091865539551, |
| "learning_rate": 3.0683797840585317e-05, |
| "loss": 0.7228, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.0033167082501456046, |
| "grad_norm": 1.880570888519287, |
| "learning_rate": 3.051335149505171e-05, |
| "loss": 0.965, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.00332003160510567, |
| "grad_norm": 1.5805224180221558, |
| "learning_rate": 3.0343294606112628e-05, |
| "loss": 0.5373, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.003323354960065736, |
| "grad_norm": 2.085787534713745, |
| "learning_rate": 3.0173628126897657e-05, |
| "loss": 1.0924, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.003326678315025802, |
| "grad_norm": 2.0339345932006836, |
| "learning_rate": 3.0004353008348186e-05, |
| "loss": 0.9347, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.0033300016699858675, |
| "grad_norm": 2.8665969371795654, |
| "learning_rate": 2.983547019921199e-05, |
| "loss": 0.6028, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.003333325024945933, |
| "grad_norm": 1.641309380531311, |
| "learning_rate": 2.9666980646038278e-05, |
| "loss": 0.9894, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.003336648379905999, |
| "grad_norm": 2.332895278930664, |
| "learning_rate": 2.9498885293171942e-05, |
| "loss": 1.0866, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.0033399717348660644, |
| "grad_norm": 2.141460418701172, |
| "learning_rate": 2.9331185082748634e-05, |
| "loss": 0.8964, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.0033399717348660644, |
| "eval_loss": 0.7770960330963135, |
| "eval_runtime": 3252.7632, |
| "eval_samples_per_second": 38.95, |
| "eval_steps_per_second": 19.475, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.0033432950898261305, |
| "grad_norm": 1.9887334108352661, |
| "learning_rate": 2.9163880954689228e-05, |
| "loss": 0.8261, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.003346618444786196, |
| "grad_norm": 2.993713140487671, |
| "learning_rate": 2.8996973846694642e-05, |
| "loss": 0.6206, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.0033499417997462618, |
| "grad_norm": 1.2360352277755737, |
| "learning_rate": 2.8830464694240634e-05, |
| "loss": 1.2049, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.0033532651547063274, |
| "grad_norm": 2.6683409214019775, |
| "learning_rate": 2.8664354430572492e-05, |
| "loss": 0.8874, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.0033565885096663935, |
| "grad_norm": 1.849593162536621, |
| "learning_rate": 2.8498643986699803e-05, |
| "loss": 1.0419, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.003359911864626459, |
| "grad_norm": 1.955989956855774, |
| "learning_rate": 2.8333334291391323e-05, |
| "loss": 1.3201, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.0033632352195865248, |
| "grad_norm": 1.4712536334991455, |
| "learning_rate": 2.8168426271169623e-05, |
| "loss": 0.8732, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.0033665585745465904, |
| "grad_norm": 1.8448526859283447, |
| "learning_rate": 2.8003920850306085e-05, |
| "loss": 1.313, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.0033698819295066565, |
| "grad_norm": 2.3865597248077393, |
| "learning_rate": 2.783981895081549e-05, |
| "loss": 1.0349, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.003373205284466722, |
| "grad_norm": 3.1011106967926025, |
| "learning_rate": 2.767612149245099e-05, |
| "loss": 1.1726, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.0033765286394267877, |
| "grad_norm": 1.5871694087982178, |
| "learning_rate": 2.751282939269908e-05, |
| "loss": 0.7144, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.0033798519943868534, |
| "grad_norm": 2.121737480163574, |
| "learning_rate": 2.734994356677416e-05, |
| "loss": 1.1891, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.003383175349346919, |
| "grad_norm": 1.8265715837478638, |
| "learning_rate": 2.7187464927613606e-05, |
| "loss": 0.6414, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.003386498704306985, |
| "grad_norm": 2.1911258697509766, |
| "learning_rate": 2.702539438587267e-05, |
| "loss": 0.9227, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.0033898220592670507, |
| "grad_norm": 2.096592903137207, |
| "learning_rate": 2.6863732849919164e-05, |
| "loss": 0.268, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0033931454142271163, |
| "grad_norm": 1.7348281145095825, |
| "learning_rate": 2.670248122582869e-05, |
| "loss": 0.9092, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.003396468769187182, |
| "grad_norm": 2.108294725418091, |
| "learning_rate": 2.6541640417379276e-05, |
| "loss": 0.98, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.003399792124147248, |
| "grad_norm": 2.166422128677368, |
| "learning_rate": 2.6381211326046428e-05, |
| "loss": 1.0629, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.0034031154791073137, |
| "grad_norm": 1.3211616277694702, |
| "learning_rate": 2.6221194850998155e-05, |
| "loss": 1.1247, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0034064388340673793, |
| "grad_norm": 1.4853568077087402, |
| "learning_rate": 2.6061591889089776e-05, |
| "loss": 0.3974, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.003409762189027445, |
| "grad_norm": 1.7520192861557007, |
| "learning_rate": 2.590240333485897e-05, |
| "loss": 0.8057, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.003413085543987511, |
| "grad_norm": 2.130795955657959, |
| "learning_rate": 2.5743630080520765e-05, |
| "loss": 0.8887, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.0034164088989475767, |
| "grad_norm": 2.033766269683838, |
| "learning_rate": 2.558527301596251e-05, |
| "loss": 0.6583, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.0034197322539076423, |
| "grad_norm": 1.8105857372283936, |
| "learning_rate": 2.5427333028738988e-05, |
| "loss": 0.4101, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.003423055608867708, |
| "grad_norm": 2.780566930770874, |
| "learning_rate": 2.5269811004067256e-05, |
| "loss": 0.8233, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.003426378963827774, |
| "grad_norm": 1.9028615951538086, |
| "learning_rate": 2.5112707824821846e-05, |
| "loss": 0.8525, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.0034297023187878396, |
| "grad_norm": 3.4555652141571045, |
| "learning_rate": 2.495602437152975e-05, |
| "loss": 0.8126, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.0034330256737479053, |
| "grad_norm": 1.3704328536987305, |
| "learning_rate": 2.4799761522365438e-05, |
| "loss": 0.7354, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.003436349028707971, |
| "grad_norm": 2.7514312267303467, |
| "learning_rate": 2.4643920153146116e-05, |
| "loss": 1.19, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.0034396723836680365, |
| "grad_norm": 2.007598638534546, |
| "learning_rate": 2.448850113732658e-05, |
| "loss": 1.0406, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.0034429957386281026, |
| "grad_norm": 1.9599459171295166, |
| "learning_rate": 2.4333505345994433e-05, |
| "loss": 1.0742, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.0034463190935881682, |
| "grad_norm": 1.525658130645752, |
| "learning_rate": 2.4178933647865344e-05, |
| "loss": 1.0423, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.003449642448548234, |
| "grad_norm": 2.253969430923462, |
| "learning_rate": 2.4024786909277806e-05, |
| "loss": 0.7478, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.0034529658035082995, |
| "grad_norm": 2.9593136310577393, |
| "learning_rate": 2.3871065994188723e-05, |
| "loss": 0.9221, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.0034562891584683656, |
| "grad_norm": 1.6113935708999634, |
| "learning_rate": 2.3717771764168262e-05, |
| "loss": 1.0702, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.003459612513428431, |
| "grad_norm": 1.477838397026062, |
| "learning_rate": 2.3564905078395073e-05, |
| "loss": 0.8423, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.003462935868388497, |
| "grad_norm": 1.8534512519836426, |
| "learning_rate": 2.3412466793651654e-05, |
| "loss": 0.6136, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.0034662592233485625, |
| "grad_norm": 1.2977598905563354, |
| "learning_rate": 2.32604577643193e-05, |
| "loss": 1.0533, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.0034695825783086286, |
| "grad_norm": 1.402443289756775, |
| "learning_rate": 2.310887884237346e-05, |
| "loss": 0.6865, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.003472905933268694, |
| "grad_norm": 1.5745853185653687, |
| "learning_rate": 2.2957730877378947e-05, |
| "loss": 0.8526, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.00347622928822876, |
| "grad_norm": 2.4579102993011475, |
| "learning_rate": 2.280701471648512e-05, |
| "loss": 0.5886, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.0034795526431888255, |
| "grad_norm": 2.511986017227173, |
| "learning_rate": 2.2656731204421255e-05, |
| "loss": 1.0093, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.003482875998148891, |
| "grad_norm": 1.5310604572296143, |
| "learning_rate": 2.2506881183491647e-05, |
| "loss": 0.5451, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.003486199353108957, |
| "grad_norm": 2.594639539718628, |
| "learning_rate": 2.2357465493571016e-05, |
| "loss": 1.3524, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.003489522708069023, |
| "grad_norm": 2.8036839962005615, |
| "learning_rate": 2.2208484972099743e-05, |
| "loss": 0.8427, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0034928460630290884, |
| "grad_norm": 1.729481816291809, |
| "learning_rate": 2.2059940454079175e-05, |
| "loss": 0.5532, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.003496169417989154, |
| "grad_norm": 1.573091745376587, |
| "learning_rate": 2.191183277206703e-05, |
| "loss": 0.5843, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.00349949277294922, |
| "grad_norm": 2.2913410663604736, |
| "learning_rate": 2.176416275617259e-05, |
| "loss": 0.6285, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.0035028161279092858, |
| "grad_norm": 2.0742692947387695, |
| "learning_rate": 2.1616931234052108e-05, |
| "loss": 1.4509, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.0035061394828693514, |
| "grad_norm": 2.3040852546691895, |
| "learning_rate": 2.1470139030904312e-05, |
| "loss": 0.8464, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.003509462837829417, |
| "grad_norm": 2.0199368000030518, |
| "learning_rate": 2.132378696946542e-05, |
| "loss": 1.0534, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.003512786192789483, |
| "grad_norm": 2.384861469268799, |
| "learning_rate": 2.1177875870004993e-05, |
| "loss": 1.1861, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.0035161095477495487, |
| "grad_norm": 1.7507251501083374, |
| "learning_rate": 2.103240655032095e-05, |
| "loss": 0.6772, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.0035194329027096144, |
| "grad_norm": 1.9110218286514282, |
| "learning_rate": 2.0887379825735176e-05, |
| "loss": 0.4735, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.00352275625766968, |
| "grad_norm": 2.0105197429656982, |
| "learning_rate": 2.074279650908897e-05, |
| "loss": 0.6666, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.0035260796126297457, |
| "grad_norm": 2.125974416732788, |
| "learning_rate": 2.0598657410738343e-05, |
| "loss": 1.151, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.0035294029675898117, |
| "grad_norm": 1.5914584398269653, |
| "learning_rate": 2.0454963338549625e-05, |
| "loss": 0.7659, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.0035327263225498774, |
| "grad_norm": 2.65944504737854, |
| "learning_rate": 2.0311715097894855e-05, |
| "loss": 0.6658, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.003536049677509943, |
| "grad_norm": 1.8550827503204346, |
| "learning_rate": 2.0168913491647255e-05, |
| "loss": 1.1008, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.0035393730324700086, |
| "grad_norm": 1.9186201095581055, |
| "learning_rate": 2.0026559320176875e-05, |
| "loss": 1.0043, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.0035426963874300747, |
| "grad_norm": 2.0259082317352295, |
| "learning_rate": 1.9884653381345875e-05, |
| "loss": 1.2005, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.0035460197423901403, |
| "grad_norm": 2.626133680343628, |
| "learning_rate": 1.9743196470504234e-05, |
| "loss": 0.9398, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.003549343097350206, |
| "grad_norm": 2.803363561630249, |
| "learning_rate": 1.9602189380485214e-05, |
| "loss": 0.7764, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.0035526664523102716, |
| "grad_norm": 2.6616218090057373, |
| "learning_rate": 1.9461632901600935e-05, |
| "loss": 0.867, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.0035559898072703377, |
| "grad_norm": 1.805355191230774, |
| "learning_rate": 1.9321527821637987e-05, |
| "loss": 0.9621, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.0035593131622304033, |
| "grad_norm": 1.9301918745040894, |
| "learning_rate": 1.9181874925852926e-05, |
| "loss": 1.1156, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.003562636517190469, |
| "grad_norm": 1.2671960592269897, |
| "learning_rate": 1.904267499696791e-05, |
| "loss": 1.0542, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.0035659598721505346, |
| "grad_norm": 1.832224726676941, |
| "learning_rate": 1.8903928815166426e-05, |
| "loss": 0.9923, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.0035692832271106, |
| "grad_norm": 2.3667545318603516, |
| "learning_rate": 1.8765637158088623e-05, |
| "loss": 0.7825, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.0035726065820706663, |
| "grad_norm": 1.8500324487686157, |
| "learning_rate": 1.8627800800827332e-05, |
| "loss": 0.4897, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.003575929937030732, |
| "grad_norm": 2.8491454124450684, |
| "learning_rate": 1.8490420515923445e-05, |
| "loss": 1.0215, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.0035792532919907975, |
| "grad_norm": 2.6926255226135254, |
| "learning_rate": 1.8353497073361647e-05, |
| "loss": 0.8692, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.003582576646950863, |
| "grad_norm": 3.2963078022003174, |
| "learning_rate": 1.821703124056623e-05, |
| "loss": 1.2669, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.0035859000019109293, |
| "grad_norm": 1.8386890888214111, |
| "learning_rate": 1.808102378239659e-05, |
| "loss": 1.0086, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.003589223356870995, |
| "grad_norm": 2.384105920791626, |
| "learning_rate": 1.794547546114308e-05, |
| "loss": 1.6574, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0035925467118310605, |
| "grad_norm": 2.209704875946045, |
| "learning_rate": 1.78103870365227e-05, |
| "loss": 1.2232, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.003595870066791126, |
| "grad_norm": 2.2206103801727295, |
| "learning_rate": 1.7675759265674797e-05, |
| "loss": 1.0329, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.0035991934217511922, |
| "grad_norm": 1.4707410335540771, |
| "learning_rate": 1.7541592903156933e-05, |
| "loss": 0.5543, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.003602516776711258, |
| "grad_norm": 1.7666308879852295, |
| "learning_rate": 1.7407888700940523e-05, |
| "loss": 0.6173, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.0036058401316713235, |
| "grad_norm": 3.515272378921509, |
| "learning_rate": 1.7274647408406698e-05, |
| "loss": 0.7079, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.003609163486631389, |
| "grad_norm": 1.8757985830307007, |
| "learning_rate": 1.7141869772342088e-05, |
| "loss": 0.6742, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.003612486841591455, |
| "grad_norm": 1.7394914627075195, |
| "learning_rate": 1.7009556536934602e-05, |
| "loss": 0.9227, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.003615810196551521, |
| "grad_norm": 1.7292208671569824, |
| "learning_rate": 1.6877708443769392e-05, |
| "loss": 0.6633, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.0036191335515115865, |
| "grad_norm": 2.7111852169036865, |
| "learning_rate": 1.6746326231824495e-05, |
| "loss": 1.4088, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.003622456906471652, |
| "grad_norm": 2.8773698806762695, |
| "learning_rate": 1.661541063746679e-05, |
| "loss": 0.9911, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0036257802614317177, |
| "grad_norm": 2.6539015769958496, |
| "learning_rate": 1.648496239444799e-05, |
| "loss": 0.8291, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.003629103616391784, |
| "grad_norm": 1.9189116954803467, |
| "learning_rate": 1.635498223390022e-05, |
| "loss": 1.0237, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.0036324269713518494, |
| "grad_norm": 1.969369888305664, |
| "learning_rate": 1.6225470884332304e-05, |
| "loss": 1.0304, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.003635750326311915, |
| "grad_norm": 2.1338069438934326, |
| "learning_rate": 1.6096429071625374e-05, |
| "loss": 1.0386, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.0036390736812719807, |
| "grad_norm": 2.693995714187622, |
| "learning_rate": 1.5967857519028928e-05, |
| "loss": 0.6724, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.003642397036232047, |
| "grad_norm": 1.5616436004638672, |
| "learning_rate": 1.5839756947156846e-05, |
| "loss": 1.0765, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.0036457203911921124, |
| "grad_norm": 2.3733086585998535, |
| "learning_rate": 1.5712128073983146e-05, |
| "loss": 0.6867, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.003649043746152178, |
| "grad_norm": 1.7389389276504517, |
| "learning_rate": 1.5584971614838128e-05, |
| "loss": 0.9531, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.0036523671011122437, |
| "grad_norm": 2.9344327449798584, |
| "learning_rate": 1.5458288282404398e-05, |
| "loss": 0.6522, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.0036556904560723098, |
| "grad_norm": 2.1339237689971924, |
| "learning_rate": 1.533207878671269e-05, |
| "loss": 1.0749, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0036590138110323754, |
| "grad_norm": 2.975815534591675, |
| "learning_rate": 1.5206343835138092e-05, |
| "loss": 0.4487, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.003662337165992441, |
| "grad_norm": 1.2044059038162231, |
| "learning_rate": 1.5081084132395907e-05, |
| "loss": 0.6741, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.0036656605209525067, |
| "grad_norm": 1.7217930555343628, |
| "learning_rate": 1.4956300380537747e-05, |
| "loss": 1.0937, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.0036689838759125723, |
| "grad_norm": 1.3124263286590576, |
| "learning_rate": 1.4831993278947742e-05, |
| "loss": 0.4858, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.0036723072308726384, |
| "grad_norm": 2.2098429203033447, |
| "learning_rate": 1.4708163524338436e-05, |
| "loss": 1.0121, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.003675630585832704, |
| "grad_norm": 1.8337208032608032, |
| "learning_rate": 1.4584811810746935e-05, |
| "loss": 1.2821, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.0036789539407927696, |
| "grad_norm": 1.871624231338501, |
| "learning_rate": 1.4461938829531107e-05, |
| "loss": 0.6309, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.0036822772957528353, |
| "grad_norm": 2.333550453186035, |
| "learning_rate": 1.4339545269365585e-05, |
| "loss": 0.9257, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.0036856006507129013, |
| "grad_norm": 1.6626560688018799, |
| "learning_rate": 1.4217631816237952e-05, |
| "loss": 0.5538, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.003688924005672967, |
| "grad_norm": 1.7753632068634033, |
| "learning_rate": 1.4096199153444934e-05, |
| "loss": 0.6881, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.0036922473606330326, |
| "grad_norm": 2.1906261444091797, |
| "learning_rate": 1.3975247961588478e-05, |
| "loss": 1.2149, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.0036955707155930983, |
| "grad_norm": 1.9990803003311157, |
| "learning_rate": 1.385477891857211e-05, |
| "loss": 0.9241, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.0036988940705531643, |
| "grad_norm": 1.9631175994873047, |
| "learning_rate": 1.3734792699596921e-05, |
| "loss": 0.9384, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.00370221742551323, |
| "grad_norm": 1.601910948753357, |
| "learning_rate": 1.361528997715792e-05, |
| "loss": 1.0611, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.0037055407804732956, |
| "grad_norm": 1.8392131328582764, |
| "learning_rate": 1.3496271421040219e-05, |
| "loss": 0.9894, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.0037088641354333612, |
| "grad_norm": 3.042781352996826, |
| "learning_rate": 1.3377737698315295e-05, |
| "loss": 0.9245, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.003712187490393427, |
| "grad_norm": 2.590090751647949, |
| "learning_rate": 1.3259689473337289e-05, |
| "loss": 1.0378, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.003715510845353493, |
| "grad_norm": 2.383646011352539, |
| "learning_rate": 1.3142127407739168e-05, |
| "loss": 0.5865, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.0037188342003135586, |
| "grad_norm": 1.675218939781189, |
| "learning_rate": 1.3025052160429118e-05, |
| "loss": 0.442, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.003722157555273624, |
| "grad_norm": 2.6803250312805176, |
| "learning_rate": 1.290846438758687e-05, |
| "loss": 1.1026, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.00372548091023369, |
| "grad_norm": 2.088174343109131, |
| "learning_rate": 1.2792364742659856e-05, |
| "loss": 1.0888, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.003728804265193756, |
| "grad_norm": 2.361299514770508, |
| "learning_rate": 1.2676753876359781e-05, |
| "loss": 0.9856, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.0037321276201538215, |
| "grad_norm": 1.9991151094436646, |
| "learning_rate": 1.2561632436658811e-05, |
| "loss": 0.7982, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.003735450975113887, |
| "grad_norm": 3.7411093711853027, |
| "learning_rate": 1.2447001068785969e-05, |
| "loss": 1.2467, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.003738774330073953, |
| "grad_norm": 2.4900856018066406, |
| "learning_rate": 1.233286041522358e-05, |
| "loss": 0.707, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.003742097685034019, |
| "grad_norm": 2.1829333305358887, |
| "learning_rate": 1.2219211115703621e-05, |
| "loss": 0.6891, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.0037454210399940845, |
| "grad_norm": 1.6782306432724, |
| "learning_rate": 1.210605380720411e-05, |
| "loss": 1.1077, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.00374874439495415, |
| "grad_norm": 2.4409914016723633, |
| "learning_rate": 1.1993389123945597e-05, |
| "loss": 0.8018, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.003752067749914216, |
| "grad_norm": 1.8187549114227295, |
| "learning_rate": 1.1881217697387547e-05, |
| "loss": 1.0921, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.003755391104874282, |
| "grad_norm": 1.4356580972671509, |
| "learning_rate": 1.1769540156224923e-05, |
| "loss": 0.7464, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.0037587144598343475, |
| "grad_norm": 2.073961019515991, |
| "learning_rate": 1.1658357126384479e-05, |
| "loss": 0.6749, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.003762037814794413, |
| "grad_norm": 1.284589409828186, |
| "learning_rate": 1.1547669231021395e-05, |
| "loss": 0.4601, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.0037653611697544788, |
| "grad_norm": 2.1579086780548096, |
| "learning_rate": 1.1437477090515713e-05, |
| "loss": 1.0621, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.0037686845247145444, |
| "grad_norm": 1.9478920698165894, |
| "learning_rate": 1.1327781322468889e-05, |
| "loss": 0.8049, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.0037720078796746105, |
| "grad_norm": 1.604015827178955, |
| "learning_rate": 1.1218582541700362e-05, |
| "loss": 1.1651, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.003775331234634676, |
| "grad_norm": 2.576106309890747, |
| "learning_rate": 1.1109881360244034e-05, |
| "loss": 1.2095, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.0037786545895947417, |
| "grad_norm": 1.4734787940979004, |
| "learning_rate": 1.100167838734486e-05, |
| "loss": 0.8627, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.0037819779445548074, |
| "grad_norm": 1.465071439743042, |
| "learning_rate": 1.0893974229455538e-05, |
| "loss": 1.0974, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.0037853012995148734, |
| "grad_norm": 1.63118577003479, |
| "learning_rate": 1.0786769490232895e-05, |
| "loss": 1.0472, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.003788624654474939, |
| "grad_norm": 1.9454184770584106, |
| "learning_rate": 1.0680064770534748e-05, |
| "loss": 1.0103, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0037919480094350047, |
| "grad_norm": 1.2677627801895142, |
| "learning_rate": 1.0573860668416358e-05, |
| "loss": 0.7591, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.0037952713643950703, |
| "grad_norm": 1.8168962001800537, |
| "learning_rate": 1.046815777912713e-05, |
| "loss": 0.3897, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.0037985947193551364, |
| "grad_norm": 2.0347893238067627, |
| "learning_rate": 1.0362956695107361e-05, |
| "loss": 0.9761, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.003801918074315202, |
| "grad_norm": 1.2987099885940552, |
| "learning_rate": 1.0258258005984777e-05, |
| "loss": 0.7144, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.0038052414292752677, |
| "grad_norm": 2.079082489013672, |
| "learning_rate": 1.015406229857132e-05, |
| "loss": 0.8902, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.0038085647842353333, |
| "grad_norm": 1.4583004713058472, |
| "learning_rate": 1.0050370156859811e-05, |
| "loss": 0.8563, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.003811888139195399, |
| "grad_norm": 2.9463703632354736, |
| "learning_rate": 9.947182162020729e-06, |
| "loss": 0.5976, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.003815211494155465, |
| "grad_norm": 1.276196002960205, |
| "learning_rate": 9.844498892398946e-06, |
| "loss": 1.2324, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.0038185348491155307, |
| "grad_norm": 2.1213057041168213, |
| "learning_rate": 9.742320923510428e-06, |
| "loss": 1.2369, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.0038218582040755963, |
| "grad_norm": 2.704526424407959, |
| "learning_rate": 9.640648828039045e-06, |
| "loss": 0.9242, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.003825181559035662, |
| "grad_norm": 1.3100472688674927, |
| "learning_rate": 9.539483175833397e-06, |
| "loss": 0.6016, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.003828504913995728, |
| "grad_norm": 2.6044392585754395, |
| "learning_rate": 9.438824533903545e-06, |
| "loss": 0.6004, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.0038318282689557936, |
| "grad_norm": 1.9593836069107056, |
| "learning_rate": 9.338673466417924e-06, |
| "loss": 1.0782, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.0038351516239158593, |
| "grad_norm": 2.4404966831207275, |
| "learning_rate": 9.2390305347001e-06, |
| "loss": 0.9982, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.003838474978875925, |
| "grad_norm": 1.8588268756866455, |
| "learning_rate": 9.139896297225626e-06, |
| "loss": 0.65, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.003841798333835991, |
| "grad_norm": 1.6673916578292847, |
| "learning_rate": 9.041271309619048e-06, |
| "loss": 1.3098, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.0038451216887960566, |
| "grad_norm": 2.00077223777771, |
| "learning_rate": 8.943156124650531e-06, |
| "loss": 1.1602, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.0038484450437561222, |
| "grad_norm": 3.087786912918091, |
| "learning_rate": 8.845551292233045e-06, |
| "loss": 0.9981, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.003851768398716188, |
| "grad_norm": 1.7945916652679443, |
| "learning_rate": 8.748457359419093e-06, |
| "loss": 0.9974, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.0038550917536762535, |
| "grad_norm": 2.521268129348755, |
| "learning_rate": 8.651874870397692e-06, |
| "loss": 0.9116, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.0038584151086363196, |
| "grad_norm": 2.505439281463623, |
| "learning_rate": 8.555804366491405e-06, |
| "loss": 0.9118, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.003861738463596385, |
| "grad_norm": 2.009557008743286, |
| "learning_rate": 8.460246386153147e-06, |
| "loss": 0.6388, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.003865061818556451, |
| "grad_norm": 3.128729820251465, |
| "learning_rate": 8.365201464963302e-06, |
| "loss": 1.0031, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.0038683851735165165, |
| "grad_norm": 1.6357756853103638, |
| "learning_rate": 8.27067013562669e-06, |
| "loss": 0.8037, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.0038717085284765826, |
| "grad_norm": 2.719797134399414, |
| "learning_rate": 8.17665292796952e-06, |
| "loss": 1.0075, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.003875031883436648, |
| "grad_norm": 2.046483278274536, |
| "learning_rate": 8.083150368936532e-06, |
| "loss": 0.2509, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.003878355238396714, |
| "grad_norm": 1.5247260332107544, |
| "learning_rate": 7.990162982587924e-06, |
| "loss": 1.1599, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.0038816785933567795, |
| "grad_norm": 1.7768243551254272, |
| "learning_rate": 7.897691290096498e-06, |
| "loss": 0.6034, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.0038850019483168455, |
| "grad_norm": 3.438926935195923, |
| "learning_rate": 7.805735809744696e-06, |
| "loss": 0.6389, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.003888325303276911, |
| "grad_norm": 1.7144073247909546, |
| "learning_rate": 7.714297056921715e-06, |
| "loss": 1.159, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.003891648658236977, |
| "grad_norm": 1.8290139436721802, |
| "learning_rate": 7.623375544120648e-06, |
| "loss": 0.9152, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.0038949720131970424, |
| "grad_norm": 2.1877503395080566, |
| "learning_rate": 7.532971780935516e-06, |
| "loss": 1.0348, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.003898295368157108, |
| "grad_norm": 1.77268648147583, |
| "learning_rate": 7.443086274058497e-06, |
| "loss": 0.7937, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.003901618723117174, |
| "grad_norm": 1.3810285329818726, |
| "learning_rate": 7.353719527277125e-06, |
| "loss": 0.9105, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.0039049420780772398, |
| "grad_norm": 1.8284751176834106, |
| "learning_rate": 7.2648720414712716e-06, |
| "loss": 1.1995, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.003908265433037306, |
| "grad_norm": 1.7051944732666016, |
| "learning_rate": 7.1765443146106e-06, |
| "loss": 0.7849, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.003911588787997371, |
| "grad_norm": 1.6046769618988037, |
| "learning_rate": 7.088736841751575e-06, |
| "loss": 0.8096, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.003914912142957437, |
| "grad_norm": 2.0210366249084473, |
| "learning_rate": 7.001450115034758e-06, |
| "loss": 0.6858, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.003918235497917502, |
| "grad_norm": 1.8352065086364746, |
| "learning_rate": 6.914684623682099e-06, |
| "loss": 0.8634, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.003921558852877568, |
| "grad_norm": 1.6586220264434814, |
| "learning_rate": 6.828440853994089e-06, |
| "loss": 1.0516, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0039248822078376345, |
| "grad_norm": 1.8991930484771729, |
| "learning_rate": 6.7427192893471105e-06, |
| "loss": 1.3651, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.0039282055627977, |
| "grad_norm": 2.0748491287231445, |
| "learning_rate": 6.657520410190721e-06, |
| "loss": 0.9304, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.003931528917757766, |
| "grad_norm": 2.1835572719573975, |
| "learning_rate": 6.572844694044911e-06, |
| "loss": 0.7364, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.003934852272717832, |
| "grad_norm": 1.8947280645370483, |
| "learning_rate": 6.488692615497516e-06, |
| "loss": 0.9096, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.003938175627677897, |
| "grad_norm": 1.584531307220459, |
| "learning_rate": 6.405064646201464e-06, |
| "loss": 0.8091, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.003941498982637963, |
| "grad_norm": 2.568542718887329, |
| "learning_rate": 6.321961254872166e-06, |
| "loss": 0.8995, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.003944822337598028, |
| "grad_norm": 1.460935354232788, |
| "learning_rate": 6.239382907284941e-06, |
| "loss": 0.6506, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.003948145692558094, |
| "grad_norm": 2.127983570098877, |
| "learning_rate": 6.157330066272282e-06, |
| "loss": 0.8965, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.00395146904751816, |
| "grad_norm": 1.676879644393921, |
| "learning_rate": 6.0758031917214296e-06, |
| "loss": 1.0078, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.003954792402478226, |
| "grad_norm": 1.7694756984710693, |
| "learning_rate": 5.994802740571659e-06, |
| "loss": 0.5985, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.003958115757438292, |
| "grad_norm": 2.5703816413879395, |
| "learning_rate": 5.914329166811727e-06, |
| "loss": 0.9092, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.003961439112398357, |
| "grad_norm": 1.5351899862289429, |
| "learning_rate": 5.834382921477466e-06, |
| "loss": 1.1089, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.003964762467358423, |
| "grad_norm": 2.5164711475372314, |
| "learning_rate": 5.754964452649037e-06, |
| "loss": 0.9535, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.003968085822318489, |
| "grad_norm": 2.1561896800994873, |
| "learning_rate": 5.67607420544859e-06, |
| "loss": 1.2628, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.003971409177278554, |
| "grad_norm": 2.350405693054199, |
| "learning_rate": 5.597712622037754e-06, |
| "loss": 0.826, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.00397473253223862, |
| "grad_norm": 2.584944009780884, |
| "learning_rate": 5.519880141615042e-06, |
| "loss": 0.9049, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.003978055887198686, |
| "grad_norm": 1.9516925811767578, |
| "learning_rate": 5.442577200413546e-06, |
| "loss": 1.0605, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.0039813792421587516, |
| "grad_norm": 3.750558853149414, |
| "learning_rate": 5.365804231698368e-06, |
| "loss": 0.9567, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.003984702597118818, |
| "grad_norm": 1.9897552728652954, |
| "learning_rate": 5.289561665764198e-06, |
| "loss": 0.6373, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.003988025952078883, |
| "grad_norm": 2.0267670154571533, |
| "learning_rate": 5.213849929933024e-06, |
| "loss": 0.7988, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.003991349307038949, |
| "grad_norm": 1.9218477010726929, |
| "learning_rate": 5.138669448551614e-06, |
| "loss": 0.8301, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.003994672661999015, |
| "grad_norm": 2.785154104232788, |
| "learning_rate": 5.0640206429891535e-06, |
| "loss": 0.8819, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.00399799601695908, |
| "grad_norm": 1.7020105123519897, |
| "learning_rate": 4.989903931634965e-06, |
| "loss": 0.9052, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.004001319371919146, |
| "grad_norm": 1.9669575691223145, |
| "learning_rate": 4.916319729896057e-06, |
| "loss": 0.8468, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.004004642726879211, |
| "grad_norm": 2.421363353729248, |
| "learning_rate": 4.843268450194871e-06, |
| "loss": 0.7444, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.0040079660818392775, |
| "grad_norm": 1.5673800706863403, |
| "learning_rate": 4.770750501966925e-06, |
| "loss": 0.9701, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.004011289436799344, |
| "grad_norm": 1.9752105474472046, |
| "learning_rate": 4.698766291658552e-06, |
| "loss": 1.0881, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.004014612791759409, |
| "grad_norm": 1.9096660614013672, |
| "learning_rate": 4.627316222724598e-06, |
| "loss": 0.9221, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.004017936146719475, |
| "grad_norm": 3.038297653198242, |
| "learning_rate": 4.556400695626173e-06, |
| "loss": 0.7838, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.004021259501679541, |
| "grad_norm": 2.406052350997925, |
| "learning_rate": 4.486020107828448e-06, |
| "loss": 1.0768, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.004024582856639606, |
| "grad_norm": 2.2107632160186768, |
| "learning_rate": 4.416174853798283e-06, |
| "loss": 0.9042, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.004027906211599672, |
| "grad_norm": 1.4963706731796265, |
| "learning_rate": 4.34686532500218e-06, |
| "loss": 0.7624, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.004031229566559737, |
| "grad_norm": 1.8679277896881104, |
| "learning_rate": 4.2780919099040585e-06, |
| "loss": 0.9459, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.0040345529215198035, |
| "grad_norm": 2.617995023727417, |
| "learning_rate": 4.2098549939629696e-06, |
| "loss": 1.009, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.0040378762764798695, |
| "grad_norm": 1.7749000787734985, |
| "learning_rate": 4.14215495963105e-06, |
| "loss": 0.8112, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.004041199631439935, |
| "grad_norm": 1.2091432809829712, |
| "learning_rate": 4.074992186351367e-06, |
| "loss": 0.6515, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.004044522986400001, |
| "grad_norm": 2.0824778079986572, |
| "learning_rate": 4.008367050555683e-06, |
| "loss": 1.0088, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.004047846341360066, |
| "grad_norm": 1.503562569618225, |
| "learning_rate": 3.942279925662506e-06, |
| "loss": 0.8859, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.004051169696320132, |
| "grad_norm": 2.2736172676086426, |
| "learning_rate": 3.876731182074888e-06, |
| "loss": 0.9602, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.004054493051280198, |
| "grad_norm": 1.9392001628875732, |
| "learning_rate": 3.811721187178352e-06, |
| "loss": 1.2851, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.004057816406240263, |
| "grad_norm": 1.8600444793701172, |
| "learning_rate": 3.747250305338934e-06, |
| "loss": 0.3464, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.004061139761200329, |
| "grad_norm": 3.741283655166626, |
| "learning_rate": 3.6833188979009447e-06, |
| "loss": 0.9047, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.0040644631161603955, |
| "grad_norm": 2.066908359527588, |
| "learning_rate": 3.6199273231852016e-06, |
| "loss": 0.7508, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.004067786471120461, |
| "grad_norm": 1.472381591796875, |
| "learning_rate": 3.5570759364867976e-06, |
| "loss": 1.1071, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.004071109826080527, |
| "grad_norm": 2.138390302658081, |
| "learning_rate": 3.494765090073193e-06, |
| "loss": 0.7563, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.004074433181040592, |
| "grad_norm": 1.5176939964294434, |
| "learning_rate": 3.432995133182315e-06, |
| "loss": 1.1034, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.004077756536000658, |
| "grad_norm": 1.9056531190872192, |
| "learning_rate": 3.3717664120204717e-06, |
| "loss": 0.7667, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.004081079890960724, |
| "grad_norm": 3.191318988800049, |
| "learning_rate": 3.3110792697604755e-06, |
| "loss": 0.8358, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.004084403245920789, |
| "grad_norm": 1.8594859838485718, |
| "learning_rate": 3.250934046539722e-06, |
| "loss": 0.2147, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.004087726600880855, |
| "grad_norm": 1.8989686965942383, |
| "learning_rate": 3.1913310794582817e-06, |
| "loss": 0.7407, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.0040910499558409206, |
| "grad_norm": 2.4154255390167236, |
| "learning_rate": 3.1322707025770114e-06, |
| "loss": 0.7876, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.004094373310800987, |
| "grad_norm": 2.14886736869812, |
| "learning_rate": 3.073753246915656e-06, |
| "loss": 0.8468, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.004097696665761053, |
| "grad_norm": 2.599923849105835, |
| "learning_rate": 3.015779040451017e-06, |
| "loss": 0.8232, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.004101020020721118, |
| "grad_norm": 1.8031104803085327, |
| "learning_rate": 2.958348408115108e-06, |
| "loss": 0.9074, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.004104343375681184, |
| "grad_norm": 1.2037352323532104, |
| "learning_rate": 2.9014616717933595e-06, |
| "loss": 0.494, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.00410766673064125, |
| "grad_norm": 1.8279937505722046, |
| "learning_rate": 2.8451191503227614e-06, |
| "loss": 0.7566, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.004110990085601315, |
| "grad_norm": 1.9116507768630981, |
| "learning_rate": 2.7893211594901215e-06, |
| "loss": 0.937, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.004114313440561381, |
| "grad_norm": 1.6439839601516724, |
| "learning_rate": 2.7340680120302554e-06, |
| "loss": 0.7953, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.0041176367955214465, |
| "grad_norm": 2.841966390609741, |
| "learning_rate": 2.6793600176243105e-06, |
| "loss": 0.7789, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.004120960150481513, |
| "grad_norm": 1.5409972667694092, |
| "learning_rate": 2.625197482897912e-06, |
| "loss": 0.4756, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.004124283505441579, |
| "grad_norm": 1.8044242858886719, |
| "learning_rate": 2.5715807114195525e-06, |
| "loss": 0.7198, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.004127606860401644, |
| "grad_norm": 2.6371781826019287, |
| "learning_rate": 2.5185100036988484e-06, |
| "loss": 0.6385, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.00413093021536171, |
| "grad_norm": 1.3817503452301025, |
| "learning_rate": 2.4659856571848105e-06, |
| "loss": 0.9401, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.004134253570321775, |
| "grad_norm": 1.8377162218093872, |
| "learning_rate": 2.4140079662642865e-06, |
| "loss": 0.8917, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.004137576925281841, |
| "grad_norm": 1.7400139570236206, |
| "learning_rate": 2.362577222260198e-06, |
| "loss": 1.108, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.004140900280241907, |
| "grad_norm": 1.966605544090271, |
| "learning_rate": 2.3116937134299745e-06, |
| "loss": 0.6442, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.0041442236352019724, |
| "grad_norm": 1.370110273361206, |
| "learning_rate": 2.2613577249639083e-06, |
| "loss": 0.777, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.0041475469901620385, |
| "grad_norm": 1.5771291255950928, |
| "learning_rate": 2.2115695389835712e-06, |
| "loss": 0.6103, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.004150870345122105, |
| "grad_norm": 2.013594388961792, |
| "learning_rate": 2.1623294345402447e-06, |
| "loss": 1.0112, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.00415419370008217, |
| "grad_norm": 2.0027196407318115, |
| "learning_rate": 2.1136376876133234e-06, |
| "loss": 0.8395, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.004157517055042236, |
| "grad_norm": 2.6950972080230713, |
| "learning_rate": 2.0654945711087834e-06, |
| "loss": 1.2351, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.004160840410002301, |
| "grad_norm": 2.4111666679382324, |
| "learning_rate": 2.0179003548576602e-06, |
| "loss": 1.0297, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.004164163764962367, |
| "grad_norm": 1.8260390758514404, |
| "learning_rate": 1.970855305614516e-06, |
| "loss": 1.159, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.004167487119922433, |
| "grad_norm": 2.396212577819824, |
| "learning_rate": 1.9243596870559988e-06, |
| "loss": 0.9679, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.004170810474882498, |
| "grad_norm": 2.532935380935669, |
| "learning_rate": 1.8784137597792738e-06, |
| "loss": 0.9187, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.0041741338298425645, |
| "grad_norm": 1.9746872186660767, |
| "learning_rate": 1.8330177813006388e-06, |
| "loss": 0.9043, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.0041774571848026305, |
| "grad_norm": 1.886202335357666, |
| "learning_rate": 1.7881720060540786e-06, |
| "loss": 0.8272, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.004180780539762696, |
| "grad_norm": 1.5669914484024048, |
| "learning_rate": 1.7438766853897558e-06, |
| "loss": 0.798, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.004184103894722762, |
| "grad_norm": 1.6605589389801025, |
| "learning_rate": 1.7001320675727239e-06, |
| "loss": 1.1453, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.004187427249682827, |
| "grad_norm": 2.4264414310455322, |
| "learning_rate": 1.656938397781449e-06, |
| "loss": 0.727, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.004190750604642893, |
| "grad_norm": 2.188891887664795, |
| "learning_rate": 1.6142959181064343e-06, |
| "loss": 0.9834, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.004194073959602959, |
| "grad_norm": 1.7915462255477905, |
| "learning_rate": 1.5722048675489541e-06, |
| "loss": 0.8014, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.004197397314563024, |
| "grad_norm": 2.2112960815429688, |
| "learning_rate": 1.5306654820195886e-06, |
| "loss": 0.669, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.00420072066952309, |
| "grad_norm": 1.741981029510498, |
| "learning_rate": 1.489677994336991e-06, |
| "loss": 0.9008, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.004204044024483156, |
| "grad_norm": 1.3700710535049438, |
| "learning_rate": 1.4492426342265552e-06, |
| "loss": 0.7386, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.004207367379443222, |
| "grad_norm": 2.1248631477355957, |
| "learning_rate": 1.4093596283191179e-06, |
| "loss": 0.977, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.004210690734403288, |
| "grad_norm": 2.1686811447143555, |
| "learning_rate": 1.3700292001497028e-06, |
| "loss": 0.8883, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.004214014089363353, |
| "grad_norm": 1.8819714784622192, |
| "learning_rate": 1.3312515701562667e-06, |
| "loss": 0.5279, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.004217337444323419, |
| "grad_norm": 1.7714366912841797, |
| "learning_rate": 1.2930269556784558e-06, |
| "loss": 1.1745, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.004220660799283485, |
| "grad_norm": 2.3303427696228027, |
| "learning_rate": 1.2553555709564068e-06, |
| "loss": 1.0176, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.00422398415424355, |
| "grad_norm": 1.934847354888916, |
| "learning_rate": 1.2182376271294815e-06, |
| "loss": 1.0643, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.004227307509203616, |
| "grad_norm": 1.9978758096694946, |
| "learning_rate": 1.1816733322352114e-06, |
| "loss": 1.093, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.0042306308641636816, |
| "grad_norm": 1.9631693363189697, |
| "learning_rate": 1.1456628912079992e-06, |
| "loss": 1.091, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.004233954219123748, |
| "grad_norm": 1.8431519269943237, |
| "learning_rate": 1.110206505878031e-06, |
| "loss": 0.8671, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.004237277574083814, |
| "grad_norm": 2.2053067684173584, |
| "learning_rate": 1.0753043749701652e-06, |
| "loss": 0.8372, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.004240600929043879, |
| "grad_norm": 2.2077174186706543, |
| "learning_rate": 1.040956694102746e-06, |
| "loss": 1.0352, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.004243924284003945, |
| "grad_norm": 1.8683050870895386, |
| "learning_rate": 1.0071636557866137e-06, |
| "loss": 0.9413, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.00424724763896401, |
| "grad_norm": 1.6869767904281616, |
| "learning_rate": 9.73925449423896e-07, |
| "loss": 0.8921, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.004250570993924076, |
| "grad_norm": 2.4182016849517822, |
| "learning_rate": 9.412422613070848e-07, |
| "loss": 0.9755, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.004253894348884142, |
| "grad_norm": 1.7648463249206543, |
| "learning_rate": 9.091142746178726e-07, |
| "loss": 1.1577, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0042572177038442075, |
| "grad_norm": 1.8752923011779785, |
| "learning_rate": 8.775416694262073e-07, |
| "loss": 0.96, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.004260541058804274, |
| "grad_norm": 1.7734326124191284, |
| "learning_rate": 8.465246226892154e-07, |
| "loss": 0.6681, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.00426386441376434, |
| "grad_norm": 2.1282567977905273, |
| "learning_rate": 8.160633082502922e-07, |
| "loss": 0.8363, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.004267187768724405, |
| "grad_norm": 1.7843672037124634, |
| "learning_rate": 7.861578968380578e-07, |
| "loss": 1.0503, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.004270511123684471, |
| "grad_norm": 1.9426231384277344, |
| "learning_rate": 7.568085560654247e-07, |
| "loss": 1.0447, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.004273834478644536, |
| "grad_norm": 2.154215097427368, |
| "learning_rate": 7.28015450428654e-07, |
| "loss": 1.0945, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.004277157833604602, |
| "grad_norm": 1.881326675415039, |
| "learning_rate": 6.99778741306456e-07, |
| "loss": 0.8126, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.004280481188564668, |
| "grad_norm": 2.788193941116333, |
| "learning_rate": 6.720985869590357e-07, |
| "loss": 0.6972, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.0042838045435247335, |
| "grad_norm": 2.3980252742767334, |
| "learning_rate": 6.44975142527282e-07, |
| "loss": 1.0364, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.0042871278984847995, |
| "grad_norm": 2.5638821125030518, |
| "learning_rate": 6.184085600318024e-07, |
| "loss": 0.9393, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.004290451253444865, |
| "grad_norm": 3.0566511154174805, |
| "learning_rate": 5.92398988372167e-07, |
| "loss": 1.0036, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.004293774608404931, |
| "grad_norm": 2.413839101791382, |
| "learning_rate": 5.669465733259994e-07, |
| "loss": 1.3389, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.004297097963364997, |
| "grad_norm": 2.7977092266082764, |
| "learning_rate": 5.420514575482205e-07, |
| "loss": 1.091, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.004300421318325062, |
| "grad_norm": 3.4734342098236084, |
| "learning_rate": 5.177137805701615e-07, |
| "loss": 0.7818, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.004303744673285128, |
| "grad_norm": 2.988787889480591, |
| "learning_rate": 4.939336787988857e-07, |
| "loss": 0.7218, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.004307068028245194, |
| "grad_norm": 1.6737980842590332, |
| "learning_rate": 4.7071128551637867e-07, |
| "loss": 1.1564, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.004310391383205259, |
| "grad_norm": 2.376605749130249, |
| "learning_rate": 4.4804673087877057e-07, |
| "loss": 0.3682, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.0043137147381653255, |
| "grad_norm": 2.320552110671997, |
| "learning_rate": 4.2594014191565945e-07, |
| "loss": 0.9988, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.004317038093125391, |
| "grad_norm": 2.8528194427490234, |
| "learning_rate": 4.043916425293559e-07, |
| "loss": 1.3724, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.004320361448085457, |
| "grad_norm": 3.585374593734741, |
| "learning_rate": 3.83401353494206e-07, |
| "loss": 0.9507, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.004323684803045523, |
| "grad_norm": 2.1745338439941406, |
| "learning_rate": 3.6296939245593633e-07, |
| "loss": 0.6232, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.004327008158005588, |
| "grad_norm": 1.2730436325073242, |
| "learning_rate": 3.4309587393096534e-07, |
| "loss": 0.6022, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.004330331512965654, |
| "grad_norm": 2.007568120956421, |
| "learning_rate": 3.2378090930577087e-07, |
| "loss": 1.1188, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.004333654867925719, |
| "grad_norm": 3.0893633365631104, |
| "learning_rate": 3.0502460683624613e-07, |
| "loss": 0.5077, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.004336978222885785, |
| "grad_norm": 1.57036554813385, |
| "learning_rate": 2.868270716471444e-07, |
| "loss": 0.5061, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.004340301577845851, |
| "grad_norm": 1.7737019062042236, |
| "learning_rate": 2.6918840573144644e-07, |
| "loss": 1.0445, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.004343624932805917, |
| "grad_norm": 2.042353391647339, |
| "learning_rate": 2.521087079497719e-07, |
| "loss": 0.9395, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.004346948287765983, |
| "grad_norm": 2.071798086166382, |
| "learning_rate": 2.3558807402989103e-07, |
| "loss": 1.0835, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.004350271642726049, |
| "grad_norm": 1.6557015180587769, |
| "learning_rate": 2.1962659656614703e-07, |
| "loss": 1.0824, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.004353594997686114, |
| "grad_norm": 1.98556649684906, |
| "learning_rate": 2.042243650189124e-07, |
| "loss": 1.0463, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.00435691835264618, |
| "grad_norm": 1.9155919551849365, |
| "learning_rate": 1.8938146571413352e-07, |
| "loss": 0.8835, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.004360241707606245, |
| "grad_norm": 2.178790807723999, |
| "learning_rate": 1.750979818428422e-07, |
| "loss": 0.6642, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.004363565062566311, |
| "grad_norm": 1.4291096925735474, |
| "learning_rate": 1.6137399346064508e-07, |
| "loss": 0.7367, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.004366888417526377, |
| "grad_norm": 3.2246644496917725, |
| "learning_rate": 1.4820957748733488e-07, |
| "loss": 0.8849, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.004370211772486443, |
| "grad_norm": 2.7322123050689697, |
| "learning_rate": 1.3560480770642425e-07, |
| "loss": 1.2786, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.004373535127446509, |
| "grad_norm": 2.1076912879943848, |
| "learning_rate": 1.235597547647127e-07, |
| "loss": 1.0382, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.004376858482406574, |
| "grad_norm": 2.021298885345459, |
| "learning_rate": 1.120744861719536e-07, |
| "loss": 0.7905, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.00438018183736664, |
| "grad_norm": 2.3875844478607178, |
| "learning_rate": 1.0114906630040999e-07, |
| "loss": 0.7163, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.004383505192326706, |
| "grad_norm": 1.4649884700775146, |
| "learning_rate": 9.078355638453273e-08, |
| "loss": 1.1184, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.004386828547286771, |
| "grad_norm": 1.641730546951294, |
| "learning_rate": 8.097801452061626e-08, |
| "loss": 0.7022, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.004390151902246837, |
| "grad_norm": 2.3859140872955322, |
| "learning_rate": 7.173249566645446e-08, |
| "loss": 1.2353, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.004393475257206903, |
| "grad_norm": 1.9294756650924683, |
| "learning_rate": 6.304705164104086e-08, |
| "loss": 0.857, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.0043967986121669685, |
| "grad_norm": 2.262242078781128, |
| "learning_rate": 5.4921731124280007e-08, |
| "loss": 0.8556, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.004400121967127035, |
| "grad_norm": 1.5056575536727905, |
| "learning_rate": 4.735657965672102e-08, |
| "loss": 0.8407, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.0044034453220871, |
| "grad_norm": 2.3266441822052, |
| "learning_rate": 4.035163963926891e-08, |
| "loss": 0.6501, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.004406768677047166, |
| "grad_norm": 2.55777907371521, |
| "learning_rate": 3.390695033300695e-08, |
| "loss": 1.2284, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.004410092032007232, |
| "grad_norm": 2.0342135429382324, |
| "learning_rate": 2.8022547858930213e-08, |
| "loss": 0.7973, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.004413415386967297, |
| "grad_norm": 1.9018234014511108, |
| "learning_rate": 2.2698465197745766e-08, |
| "loss": 0.7389, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.004416738741927363, |
| "grad_norm": 1.6029261350631714, |
| "learning_rate": 1.7934732189706093e-08, |
| "loss": 0.796, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.004420062096887428, |
| "grad_norm": 1.9555233716964722, |
| "learning_rate": 1.3731375534442593e-08, |
| "loss": 0.5044, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.0044233854518474945, |
| "grad_norm": 1.6823757886886597, |
| "learning_rate": 1.0088418790787923e-08, |
| "loss": 0.5384, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.0044267088068075605, |
| "grad_norm": 1.7445098161697388, |
| "learning_rate": 7.005882376676098e-09, |
| "loss": 0.8662, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.004430032161767626, |
| "grad_norm": 1.3536102771759033, |
| "learning_rate": 4.483783568998145e-09, |
| "loss": 0.6267, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.004433355516727692, |
| "grad_norm": 2.434354543685913, |
| "learning_rate": 2.5221365035577037e-09, |
| "loss": 0.9421, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.004436678871687758, |
| "grad_norm": 3.5411949157714844, |
| "learning_rate": 1.1209521749155904e-09, |
| "loss": 0.6256, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.004440002226647823, |
| "grad_norm": 1.8648262023925781, |
| "learning_rate": 2.8023843640090364e-10, |
| "loss": 0.8315, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.004443325581607889, |
| "grad_norm": 3.0049996376037598, |
| "learning_rate": 0.0, |
| "loss": 0.7712, |
| "step": 1337 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1337, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 335, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.018183511703552e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|