| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6954505940307157, |
| "eval_steps": 500, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0023181686467690526, |
| "grad_norm": 28.501070022583008, |
| "learning_rate": 2e-05, |
| "loss": 2.6862, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004636337293538105, |
| "grad_norm": 2.4246978759765625, |
| "learning_rate": 1.998453209590101e-05, |
| "loss": 1.2503, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006954505940307157, |
| "grad_norm": 8.621129989624023, |
| "learning_rate": 1.996906419180201e-05, |
| "loss": 1.9701, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00927267458707621, |
| "grad_norm": 2.628929853439331, |
| "learning_rate": 1.995359628770302e-05, |
| "loss": 1.4985, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011590843233845263, |
| "grad_norm": 5.25876522064209, |
| "learning_rate": 1.993812838360402e-05, |
| "loss": 1.3525, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013909011880614315, |
| "grad_norm": 1.959221363067627, |
| "learning_rate": 1.992266047950503e-05, |
| "loss": 1.2634, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.016227180527383367, |
| "grad_norm": 4.436636447906494, |
| "learning_rate": 1.9907192575406035e-05, |
| "loss": 1.3113, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01854534917415242, |
| "grad_norm": 2.8270750045776367, |
| "learning_rate": 1.989172467130704e-05, |
| "loss": 1.2802, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02086351782092147, |
| "grad_norm": 1.8319437503814697, |
| "learning_rate": 1.9876256767208045e-05, |
| "loss": 1.2364, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.023181686467690525, |
| "grad_norm": 1.2635818719863892, |
| "learning_rate": 1.986078886310905e-05, |
| "loss": 1.2281, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.025499855114459576, |
| "grad_norm": 1.3797353506088257, |
| "learning_rate": 1.9845320959010055e-05, |
| "loss": 1.2345, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02781802376122863, |
| "grad_norm": 1.020139455795288, |
| "learning_rate": 1.982985305491106e-05, |
| "loss": 1.2106, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.030136192407997683, |
| "grad_norm": 1.2911367416381836, |
| "learning_rate": 1.9814385150812065e-05, |
| "loss": 1.2277, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.032454361054766734, |
| "grad_norm": 1.878492832183838, |
| "learning_rate": 1.979891724671307e-05, |
| "loss": 1.1971, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.034772529701535784, |
| "grad_norm": 1.455005407333374, |
| "learning_rate": 1.978344934261408e-05, |
| "loss": 1.1909, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03709069834830484, |
| "grad_norm": 1.5538443326950073, |
| "learning_rate": 1.976798143851508e-05, |
| "loss": 1.227, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03940886699507389, |
| "grad_norm": 1.4034634828567505, |
| "learning_rate": 1.975251353441609e-05, |
| "loss": 1.1373, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04172703564184294, |
| "grad_norm": 1.3893576860427856, |
| "learning_rate": 1.973704563031709e-05, |
| "loss": 1.178, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.044045204288612, |
| "grad_norm": 1.352433204650879, |
| "learning_rate": 1.97215777262181e-05, |
| "loss": 1.163, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04636337293538105, |
| "grad_norm": 1.179675579071045, |
| "learning_rate": 1.9706109822119105e-05, |
| "loss": 1.1223, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0486815415821501, |
| "grad_norm": 1.208991289138794, |
| "learning_rate": 1.9690641918020112e-05, |
| "loss": 1.1867, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05099971022891915, |
| "grad_norm": 0.8430147171020508, |
| "learning_rate": 1.9675174013921115e-05, |
| "loss": 1.1488, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05331787887568821, |
| "grad_norm": 0.7868801355361938, |
| "learning_rate": 1.9659706109822122e-05, |
| "loss": 1.1035, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05563604752245726, |
| "grad_norm": 0.9440180063247681, |
| "learning_rate": 1.9644238205723125e-05, |
| "loss": 1.1582, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05795421616922631, |
| "grad_norm": 1.0409374237060547, |
| "learning_rate": 1.9628770301624132e-05, |
| "loss": 1.1304, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06027238481599537, |
| "grad_norm": 1.4376304149627686, |
| "learning_rate": 1.961330239752514e-05, |
| "loss": 1.1535, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06259055346276442, |
| "grad_norm": 0.7775300741195679, |
| "learning_rate": 1.9597834493426142e-05, |
| "loss": 1.0941, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06490872210953347, |
| "grad_norm": 1.294155478477478, |
| "learning_rate": 1.958236658932715e-05, |
| "loss": 1.1859, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06722689075630252, |
| "grad_norm": 1.0435543060302734, |
| "learning_rate": 1.9566898685228152e-05, |
| "loss": 1.147, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06954505940307157, |
| "grad_norm": 1.1211307048797607, |
| "learning_rate": 1.955143078112916e-05, |
| "loss": 1.1148, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07186322804984063, |
| "grad_norm": 0.9777933955192566, |
| "learning_rate": 1.9535962877030165e-05, |
| "loss": 1.1266, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07418139669660968, |
| "grad_norm": 0.9920445680618286, |
| "learning_rate": 1.952049497293117e-05, |
| "loss": 1.1535, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07649956534337873, |
| "grad_norm": 0.7202315330505371, |
| "learning_rate": 1.9505027068832175e-05, |
| "loss": 1.139, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07881773399014778, |
| "grad_norm": 0.6207343339920044, |
| "learning_rate": 1.9489559164733182e-05, |
| "loss": 1.1105, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08113590263691683, |
| "grad_norm": 1.078873634338379, |
| "learning_rate": 1.9474091260634185e-05, |
| "loss": 1.1407, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08345407128368589, |
| "grad_norm": 0.9423937201499939, |
| "learning_rate": 1.9458623356535192e-05, |
| "loss": 1.145, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08577223993045494, |
| "grad_norm": 0.8272204399108887, |
| "learning_rate": 1.9443155452436195e-05, |
| "loss": 1.1141, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.088090408577224, |
| "grad_norm": 0.9147409796714783, |
| "learning_rate": 1.9427687548337202e-05, |
| "loss": 1.1113, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09040857722399305, |
| "grad_norm": 1.4252681732177734, |
| "learning_rate": 1.941221964423821e-05, |
| "loss": 1.1633, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0927267458707621, |
| "grad_norm": 0.8701033592224121, |
| "learning_rate": 1.9396751740139212e-05, |
| "loss": 1.1243, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09504491451753115, |
| "grad_norm": 0.9681833386421204, |
| "learning_rate": 1.938128383604022e-05, |
| "loss": 1.1518, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0973630831643002, |
| "grad_norm": 1.1157395839691162, |
| "learning_rate": 1.9365815931941222e-05, |
| "loss": 1.1178, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09968125181106925, |
| "grad_norm": 0.7797672152519226, |
| "learning_rate": 1.935034802784223e-05, |
| "loss": 1.0887, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1019994204578383, |
| "grad_norm": 1.3890780210494995, |
| "learning_rate": 1.9334880123743235e-05, |
| "loss": 1.1307, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10431758910460737, |
| "grad_norm": 0.8613622784614563, |
| "learning_rate": 1.9319412219644242e-05, |
| "loss": 1.1662, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.10663575775137642, |
| "grad_norm": 1.0830810070037842, |
| "learning_rate": 1.9303944315545245e-05, |
| "loss": 1.1796, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10895392639814547, |
| "grad_norm": 0.8859057426452637, |
| "learning_rate": 1.9288476411446252e-05, |
| "loss": 1.1584, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.11127209504491452, |
| "grad_norm": 0.6870171427726746, |
| "learning_rate": 1.9273008507347255e-05, |
| "loss": 1.1035, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11359026369168357, |
| "grad_norm": 1.181731104850769, |
| "learning_rate": 1.9257540603248262e-05, |
| "loss": 1.1241, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11590843233845262, |
| "grad_norm": 1.8238871097564697, |
| "learning_rate": 1.924207269914927e-05, |
| "loss": 1.2359, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11822660098522167, |
| "grad_norm": 0.9966158270835876, |
| "learning_rate": 1.9226604795050272e-05, |
| "loss": 1.1387, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.12054476963199073, |
| "grad_norm": 0.9722117185592651, |
| "learning_rate": 1.921113689095128e-05, |
| "loss": 1.1725, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.12286293827875978, |
| "grad_norm": 1.1024895906448364, |
| "learning_rate": 1.9195668986852282e-05, |
| "loss": 1.1424, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12518110692552883, |
| "grad_norm": 1.157772421836853, |
| "learning_rate": 1.918020108275329e-05, |
| "loss": 1.1627, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12749927557229787, |
| "grad_norm": 1.0060945749282837, |
| "learning_rate": 1.9164733178654292e-05, |
| "loss": 1.1714, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12981744421906694, |
| "grad_norm": 1.3089208602905273, |
| "learning_rate": 1.91492652745553e-05, |
| "loss": 1.1394, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.132135612865836, |
| "grad_norm": 1.0314549207687378, |
| "learning_rate": 1.9133797370456305e-05, |
| "loss": 1.1214, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13445378151260504, |
| "grad_norm": 0.848561704158783, |
| "learning_rate": 1.9118329466357312e-05, |
| "loss": 1.1364, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1367719501593741, |
| "grad_norm": 1.1316096782684326, |
| "learning_rate": 1.9102861562258315e-05, |
| "loss": 1.0673, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13909011880614314, |
| "grad_norm": 0.910849928855896, |
| "learning_rate": 1.9087393658159322e-05, |
| "loss": 1.0848, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1414082874529122, |
| "grad_norm": 1.2117191553115845, |
| "learning_rate": 1.9071925754060325e-05, |
| "loss": 1.2018, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14372645609968127, |
| "grad_norm": 1.3396297693252563, |
| "learning_rate": 1.9056457849961332e-05, |
| "loss": 1.1773, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1460446247464503, |
| "grad_norm": 1.0479182004928589, |
| "learning_rate": 1.904098994586234e-05, |
| "loss": 1.1557, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14836279339321937, |
| "grad_norm": 1.0438235998153687, |
| "learning_rate": 1.9025522041763342e-05, |
| "loss": 1.0985, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1506809620399884, |
| "grad_norm": 1.3567838668823242, |
| "learning_rate": 1.901005413766435e-05, |
| "loss": 1.0874, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.15299913068675747, |
| "grad_norm": 1.0052990913391113, |
| "learning_rate": 1.8994586233565352e-05, |
| "loss": 1.1008, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1553172993335265, |
| "grad_norm": 1.06718111038208, |
| "learning_rate": 1.897911832946636e-05, |
| "loss": 1.1004, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15763546798029557, |
| "grad_norm": 1.326567530632019, |
| "learning_rate": 1.8963650425367365e-05, |
| "loss": 1.1033, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15995363662706463, |
| "grad_norm": 1.1070104837417603, |
| "learning_rate": 1.894818252126837e-05, |
| "loss": 1.1287, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.16227180527383367, |
| "grad_norm": 1.0842565298080444, |
| "learning_rate": 1.8932714617169375e-05, |
| "loss": 1.1005, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16458997392060273, |
| "grad_norm": 0.9498984813690186, |
| "learning_rate": 1.8917246713070382e-05, |
| "loss": 1.0714, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16690814256737177, |
| "grad_norm": 1.0374786853790283, |
| "learning_rate": 1.8901778808971385e-05, |
| "loss": 1.1789, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16922631121414083, |
| "grad_norm": 0.8770291209220886, |
| "learning_rate": 1.8886310904872392e-05, |
| "loss": 1.087, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.17154447986090987, |
| "grad_norm": 1.4603461027145386, |
| "learning_rate": 1.88708430007734e-05, |
| "loss": 1.1119, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17386264850767894, |
| "grad_norm": 1.0779688358306885, |
| "learning_rate": 1.8855375096674402e-05, |
| "loss": 1.1346, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.176180817154448, |
| "grad_norm": 1.1575367450714111, |
| "learning_rate": 1.883990719257541e-05, |
| "loss": 1.043, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17849898580121704, |
| "grad_norm": 0.991324245929718, |
| "learning_rate": 1.8824439288476412e-05, |
| "loss": 1.0807, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1808171544479861, |
| "grad_norm": 1.2354373931884766, |
| "learning_rate": 1.880897138437742e-05, |
| "loss": 1.0485, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.18313532309475514, |
| "grad_norm": 1.3966253995895386, |
| "learning_rate": 1.8793503480278422e-05, |
| "loss": 1.0349, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1854534917415242, |
| "grad_norm": 1.1162493228912354, |
| "learning_rate": 1.877803557617943e-05, |
| "loss": 1.1038, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18777166038829324, |
| "grad_norm": 1.117507815361023, |
| "learning_rate": 1.8762567672080435e-05, |
| "loss": 1.0844, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1900898290350623, |
| "grad_norm": 1.083954930305481, |
| "learning_rate": 1.8747099767981442e-05, |
| "loss": 1.1057, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.19240799768183137, |
| "grad_norm": 1.0554256439208984, |
| "learning_rate": 1.8731631863882445e-05, |
| "loss": 1.0824, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1947261663286004, |
| "grad_norm": 1.1306427717208862, |
| "learning_rate": 1.8716163959783452e-05, |
| "loss": 1.0552, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19704433497536947, |
| "grad_norm": 1.1089762449264526, |
| "learning_rate": 1.8700696055684455e-05, |
| "loss": 1.0374, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1993625036221385, |
| "grad_norm": 1.0922352075576782, |
| "learning_rate": 1.8685228151585462e-05, |
| "loss": 1.2775, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.20168067226890757, |
| "grad_norm": 4.459912300109863, |
| "learning_rate": 1.866976024748647e-05, |
| "loss": 1.061, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2039988409156766, |
| "grad_norm": 0.9974443316459656, |
| "learning_rate": 1.8654292343387472e-05, |
| "loss": 1.0759, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20631700956244567, |
| "grad_norm": 0.914336621761322, |
| "learning_rate": 1.863882443928848e-05, |
| "loss": 1.0549, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.20863517820921473, |
| "grad_norm": 0.8654055595397949, |
| "learning_rate": 1.8623356535189482e-05, |
| "loss": 1.0634, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.21095334685598377, |
| "grad_norm": 0.7419248819351196, |
| "learning_rate": 1.860788863109049e-05, |
| "loss": 1.0512, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.21327151550275283, |
| "grad_norm": 1.4844622611999512, |
| "learning_rate": 1.8592420726991492e-05, |
| "loss": 1.0969, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.21558968414952187, |
| "grad_norm": 1.29688560962677, |
| "learning_rate": 1.85769528228925e-05, |
| "loss": 1.0711, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21790785279629094, |
| "grad_norm": 1.421087622642517, |
| "learning_rate": 1.8561484918793505e-05, |
| "loss": 1.0772, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.22022602144305997, |
| "grad_norm": 1.200110673904419, |
| "learning_rate": 1.8546017014694512e-05, |
| "loss": 1.1029, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.22254419008982904, |
| "grad_norm": 1.025266408920288, |
| "learning_rate": 1.8530549110595515e-05, |
| "loss": 1.1009, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2248623587365981, |
| "grad_norm": 1.1353425979614258, |
| "learning_rate": 1.8515081206496522e-05, |
| "loss": 1.1109, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22718052738336714, |
| "grad_norm": 1.0217199325561523, |
| "learning_rate": 1.849961330239753e-05, |
| "loss": 1.1158, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2294986960301362, |
| "grad_norm": 0.9707551598548889, |
| "learning_rate": 1.8484145398298532e-05, |
| "loss": 1.0189, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.23181686467690524, |
| "grad_norm": 0.8363978266716003, |
| "learning_rate": 1.846867749419954e-05, |
| "loss": 1.086, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2341350333236743, |
| "grad_norm": 1.1158548593521118, |
| "learning_rate": 1.8453209590100542e-05, |
| "loss": 1.0169, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23645320197044334, |
| "grad_norm": 0.8453333973884583, |
| "learning_rate": 1.843774168600155e-05, |
| "loss": 1.0584, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2387713706172124, |
| "grad_norm": 1.572540044784546, |
| "learning_rate": 1.8422273781902552e-05, |
| "loss": 1.105, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.24108953926398147, |
| "grad_norm": 0.9463809132575989, |
| "learning_rate": 1.840680587780356e-05, |
| "loss": 1.0552, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2434077079107505, |
| "grad_norm": 0.8801397681236267, |
| "learning_rate": 1.8391337973704565e-05, |
| "loss": 1.0615, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.24572587655751957, |
| "grad_norm": 1.251951813697815, |
| "learning_rate": 1.837587006960557e-05, |
| "loss": 1.1217, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.2480440452042886, |
| "grad_norm": 1.297305703163147, |
| "learning_rate": 1.8360402165506575e-05, |
| "loss": 1.1102, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.25036221385105767, |
| "grad_norm": 0.9023735523223877, |
| "learning_rate": 1.8344934261407582e-05, |
| "loss": 1.0786, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2526803824978267, |
| "grad_norm": 0.7729614973068237, |
| "learning_rate": 1.8329466357308585e-05, |
| "loss": 1.0636, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.25499855114459574, |
| "grad_norm": 1.458217978477478, |
| "learning_rate": 1.8313998453209592e-05, |
| "loss": 1.0998, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.25731671979136483, |
| "grad_norm": 1.295067548751831, |
| "learning_rate": 1.82985305491106e-05, |
| "loss": 1.0756, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.25963488843813387, |
| "grad_norm": 0.7502389550209045, |
| "learning_rate": 1.8283062645011602e-05, |
| "loss": 1.048, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2619530570849029, |
| "grad_norm": 0.7939056158065796, |
| "learning_rate": 1.826759474091261e-05, |
| "loss": 1.1759, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.264271225731672, |
| "grad_norm": 0.8996245861053467, |
| "learning_rate": 1.8252126836813612e-05, |
| "loss": 1.0799, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26658939437844104, |
| "grad_norm": 1.1998958587646484, |
| "learning_rate": 1.823665893271462e-05, |
| "loss": 1.0622, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2689075630252101, |
| "grad_norm": 0.7442781329154968, |
| "learning_rate": 1.8221191028615622e-05, |
| "loss": 1.1025, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2712257316719791, |
| "grad_norm": 1.0310958623886108, |
| "learning_rate": 1.820572312451663e-05, |
| "loss": 1.0557, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2735439003187482, |
| "grad_norm": 1.071614384651184, |
| "learning_rate": 1.8190255220417635e-05, |
| "loss": 1.1166, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 0.8248165845870972, |
| "learning_rate": 1.8174787316318642e-05, |
| "loss": 1.1389, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2781802376122863, |
| "grad_norm": 0.9798437356948853, |
| "learning_rate": 1.8159319412219645e-05, |
| "loss": 1.0331, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.28049840625905537, |
| "grad_norm": 0.7473148703575134, |
| "learning_rate": 1.8143851508120652e-05, |
| "loss": 1.0211, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2828165749058244, |
| "grad_norm": 1.3669893741607666, |
| "learning_rate": 1.812838360402166e-05, |
| "loss": 1.1495, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.28513474355259344, |
| "grad_norm": 0.7082749009132385, |
| "learning_rate": 1.8112915699922662e-05, |
| "loss": 1.0244, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.28745291219936253, |
| "grad_norm": 1.2198262214660645, |
| "learning_rate": 1.809744779582367e-05, |
| "loss": 1.1052, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.28977108084613157, |
| "grad_norm": 2.2517826557159424, |
| "learning_rate": 1.8081979891724672e-05, |
| "loss": 1.0324, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2920892494929006, |
| "grad_norm": 0.9831060767173767, |
| "learning_rate": 1.806651198762568e-05, |
| "loss": 1.1176, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.29440741813966964, |
| "grad_norm": 0.8122763633728027, |
| "learning_rate": 1.8051044083526682e-05, |
| "loss": 1.0238, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.29672558678643873, |
| "grad_norm": 1.0002597570419312, |
| "learning_rate": 1.803557617942769e-05, |
| "loss": 1.0847, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.29904375543320777, |
| "grad_norm": 0.8262125253677368, |
| "learning_rate": 1.8020108275328692e-05, |
| "loss": 1.1149, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3013619240799768, |
| "grad_norm": 1.2185602188110352, |
| "learning_rate": 1.80046403712297e-05, |
| "loss": 1.0688, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3036800927267459, |
| "grad_norm": 1.2163466215133667, |
| "learning_rate": 1.7989172467130705e-05, |
| "loss": 1.0366, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.30599826137351493, |
| "grad_norm": 0.6977062225341797, |
| "learning_rate": 1.7973704563031712e-05, |
| "loss": 1.0853, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.30831643002028397, |
| "grad_norm": 0.8096152544021606, |
| "learning_rate": 1.7958236658932715e-05, |
| "loss": 1.0104, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.310634598667053, |
| "grad_norm": 0.9192125201225281, |
| "learning_rate": 1.7942768754833722e-05, |
| "loss": 1.0596, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3129527673138221, |
| "grad_norm": 0.7752702236175537, |
| "learning_rate": 1.792730085073473e-05, |
| "loss": 1.1281, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.31527093596059114, |
| "grad_norm": 1.0385123491287231, |
| "learning_rate": 1.7911832946635732e-05, |
| "loss": 1.0857, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3175891046073602, |
| "grad_norm": 2.8603484630584717, |
| "learning_rate": 1.789636504253674e-05, |
| "loss": 1.1425, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.31990727325412927, |
| "grad_norm": 1.104943037033081, |
| "learning_rate": 1.7880897138437742e-05, |
| "loss": 1.1464, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3222254419008983, |
| "grad_norm": 1.7259231805801392, |
| "learning_rate": 1.786542923433875e-05, |
| "loss": 1.0472, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.32454361054766734, |
| "grad_norm": 0.9174676537513733, |
| "learning_rate": 1.7849961330239752e-05, |
| "loss": 1.0523, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3268617791944364, |
| "grad_norm": 0.9572336673736572, |
| "learning_rate": 1.783449342614076e-05, |
| "loss": 1.0327, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.32917994784120547, |
| "grad_norm": 0.6567716598510742, |
| "learning_rate": 1.7819025522041766e-05, |
| "loss": 1.0979, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3314981164879745, |
| "grad_norm": 1.8695584535598755, |
| "learning_rate": 1.780355761794277e-05, |
| "loss": 1.0659, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.33381628513474354, |
| "grad_norm": 0.8160743713378906, |
| "learning_rate": 1.7788089713843776e-05, |
| "loss": 1.0746, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 0.7144508957862854, |
| "learning_rate": 1.7772621809744782e-05, |
| "loss": 1.002, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.33845262242828167, |
| "grad_norm": 0.8914051055908203, |
| "learning_rate": 1.7757153905645786e-05, |
| "loss": 1.0334, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3407707910750507, |
| "grad_norm": 1.1182371377944946, |
| "learning_rate": 1.7741686001546792e-05, |
| "loss": 1.0535, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.34308895972181974, |
| "grad_norm": 0.6911827325820923, |
| "learning_rate": 1.77262180974478e-05, |
| "loss": 1.0783, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.34540712836858883, |
| "grad_norm": 1.141491413116455, |
| "learning_rate": 1.7710750193348802e-05, |
| "loss": 1.0522, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.34772529701535787, |
| "grad_norm": 1.103798747062683, |
| "learning_rate": 1.769528228924981e-05, |
| "loss": 1.1178, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3500434656621269, |
| "grad_norm": 1.1297893524169922, |
| "learning_rate": 1.7679814385150812e-05, |
| "loss": 1.19, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.352361634308896, |
| "grad_norm": 0.8850527405738831, |
| "learning_rate": 1.766434648105182e-05, |
| "loss": 0.9929, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.35467980295566504, |
| "grad_norm": 1.096604585647583, |
| "learning_rate": 1.7648878576952822e-05, |
| "loss": 1.0992, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.35699797160243407, |
| "grad_norm": 0.9644438624382019, |
| "learning_rate": 1.763341067285383e-05, |
| "loss": 1.0421, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3593161402492031, |
| "grad_norm": 1.0480139255523682, |
| "learning_rate": 1.7617942768754836e-05, |
| "loss": 1.0495, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3616343088959722, |
| "grad_norm": 3.11247181892395, |
| "learning_rate": 1.7602474864655842e-05, |
| "loss": 1.0407, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.36395247754274124, |
| "grad_norm": 0.9178793430328369, |
| "learning_rate": 1.7587006960556846e-05, |
| "loss": 1.0208, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3662706461895103, |
| "grad_norm": 1.0008949041366577, |
| "learning_rate": 1.7571539056457852e-05, |
| "loss": 1.036, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.36858881483627937, |
| "grad_norm": 0.9334746599197388, |
| "learning_rate": 1.755607115235886e-05, |
| "loss": 1.1097, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3709069834830484, |
| "grad_norm": 0.9296855330467224, |
| "learning_rate": 1.7540603248259862e-05, |
| "loss": 1.0597, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.37322515212981744, |
| "grad_norm": 0.7528197765350342, |
| "learning_rate": 1.752513534416087e-05, |
| "loss": 1.0252, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3755433207765865, |
| "grad_norm": 0.7995203733444214, |
| "learning_rate": 1.7509667440061872e-05, |
| "loss": 1.0446, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37786148942335557, |
| "grad_norm": 0.7773709297180176, |
| "learning_rate": 1.749419953596288e-05, |
| "loss": 1.0527, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3801796580701246, |
| "grad_norm": 0.9108691811561584, |
| "learning_rate": 1.7478731631863882e-05, |
| "loss": 1.0571, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.38249782671689364, |
| "grad_norm": 0.8565751910209656, |
| "learning_rate": 1.746326372776489e-05, |
| "loss": 1.1003, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.38481599536366273, |
| "grad_norm": 1.3683419227600098, |
| "learning_rate": 1.7447795823665896e-05, |
| "loss": 1.0697, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38713416401043177, |
| "grad_norm": 0.6560070514678955, |
| "learning_rate": 1.74323279195669e-05, |
| "loss": 0.9556, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3894523326572008, |
| "grad_norm": 1.370263934135437, |
| "learning_rate": 1.7416860015467906e-05, |
| "loss": 1.0848, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.39177050130396984, |
| "grad_norm": 1.013763427734375, |
| "learning_rate": 1.7401392111368912e-05, |
| "loss": 1.0643, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.39408866995073893, |
| "grad_norm": 0.8194316029548645, |
| "learning_rate": 1.7385924207269916e-05, |
| "loss": 1.0708, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.39640683859750797, |
| "grad_norm": 0.9241949319839478, |
| "learning_rate": 1.7370456303170922e-05, |
| "loss": 1.0502, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.398725007244277, |
| "grad_norm": 0.9724448323249817, |
| "learning_rate": 1.735498839907193e-05, |
| "loss": 1.0152, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4010431758910461, |
| "grad_norm": 0.6559419631958008, |
| "learning_rate": 1.7339520494972932e-05, |
| "loss": 1.0148, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.40336134453781514, |
| "grad_norm": 1.1617038249969482, |
| "learning_rate": 1.732405259087394e-05, |
| "loss": 1.0972, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4056795131845842, |
| "grad_norm": 0.7249406576156616, |
| "learning_rate": 1.7308584686774942e-05, |
| "loss": 0.9664, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4079976818313532, |
| "grad_norm": 1.2099742889404297, |
| "learning_rate": 1.729311678267595e-05, |
| "loss": 1.0314, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4103158504781223, |
| "grad_norm": 0.8690075278282166, |
| "learning_rate": 1.7277648878576952e-05, |
| "loss": 1.0789, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.41263401912489134, |
| "grad_norm": 0.7662826180458069, |
| "learning_rate": 1.726218097447796e-05, |
| "loss": 1.014, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4149521877716604, |
| "grad_norm": 1.22348952293396, |
| "learning_rate": 1.7246713070378966e-05, |
| "loss": 0.9765, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.41727035641842947, |
| "grad_norm": 1.0363351106643677, |
| "learning_rate": 1.7231245166279972e-05, |
| "loss": 1.0487, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4195885250651985, |
| "grad_norm": 0.8833026885986328, |
| "learning_rate": 1.7215777262180976e-05, |
| "loss": 1.0261, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.42190669371196754, |
| "grad_norm": 0.8683452606201172, |
| "learning_rate": 1.7200309358081982e-05, |
| "loss": 1.0609, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4242248623587366, |
| "grad_norm": 0.8211922645568848, |
| "learning_rate": 1.718484145398299e-05, |
| "loss": 0.9942, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.42654303100550567, |
| "grad_norm": 0.8936122059822083, |
| "learning_rate": 1.7169373549883992e-05, |
| "loss": 1.0591, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4288611996522747, |
| "grad_norm": 0.9455772042274475, |
| "learning_rate": 1.7153905645785e-05, |
| "loss": 1.0628, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.43117936829904374, |
| "grad_norm": 1.0464543104171753, |
| "learning_rate": 1.7138437741686002e-05, |
| "loss": 0.9742, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.43349753694581283, |
| "grad_norm": 1.4931954145431519, |
| "learning_rate": 1.712296983758701e-05, |
| "loss": 1.0534, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.43581570559258187, |
| "grad_norm": 0.7873006463050842, |
| "learning_rate": 1.7107501933488012e-05, |
| "loss": 1.0499, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4381338742393509, |
| "grad_norm": 0.7451059222221375, |
| "learning_rate": 1.709203402938902e-05, |
| "loss": 1.0006, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.44045204288611994, |
| "grad_norm": 0.8252111673355103, |
| "learning_rate": 1.7076566125290022e-05, |
| "loss": 1.057, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.44277021153288904, |
| "grad_norm": 0.6444481015205383, |
| "learning_rate": 1.706109822119103e-05, |
| "loss": 1.0098, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.44508838017965807, |
| "grad_norm": 0.8497568368911743, |
| "learning_rate": 1.7045630317092036e-05, |
| "loss": 1.0186, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4474065488264271, |
| "grad_norm": 1.0328199863433838, |
| "learning_rate": 1.7030162412993042e-05, |
| "loss": 1.0474, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4497247174731962, |
| "grad_norm": 0.7315878868103027, |
| "learning_rate": 1.7014694508894046e-05, |
| "loss": 0.9985, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.45204288611996524, |
| "grad_norm": 1.0060752630233765, |
| "learning_rate": 1.6999226604795052e-05, |
| "loss": 1.0985, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4543610547667343, |
| "grad_norm": 0.8653793334960938, |
| "learning_rate": 1.698375870069606e-05, |
| "loss": 0.999, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4566792234135033, |
| "grad_norm": 1.0214215517044067, |
| "learning_rate": 1.6968290796597062e-05, |
| "loss": 1.0759, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4589973920602724, |
| "grad_norm": 0.7069177627563477, |
| "learning_rate": 1.695282289249807e-05, |
| "loss": 1.1569, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.46131556070704144, |
| "grad_norm": 1.8065637350082397, |
| "learning_rate": 1.6937354988399072e-05, |
| "loss": 1.0398, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4636337293538105, |
| "grad_norm": 8.820870399475098, |
| "learning_rate": 1.692188708430008e-05, |
| "loss": 1.0801, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.46595189800057957, |
| "grad_norm": 0.7957196235656738, |
| "learning_rate": 1.6906419180201082e-05, |
| "loss": 1.01, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4682700666473486, |
| "grad_norm": 0.9078807830810547, |
| "learning_rate": 1.689095127610209e-05, |
| "loss": 1.0068, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.8792912364006042, |
| "learning_rate": 1.6875483372003096e-05, |
| "loss": 1.0169, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4729064039408867, |
| "grad_norm": 1.0289100408554077, |
| "learning_rate": 1.68600154679041e-05, |
| "loss": 0.9956, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.47522457258765577, |
| "grad_norm": 2.788477897644043, |
| "learning_rate": 1.6844547563805106e-05, |
| "loss": 1.0839, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4775427412344248, |
| "grad_norm": 0.841396152973175, |
| "learning_rate": 1.6829079659706112e-05, |
| "loss": 1.0735, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.47986090988119384, |
| "grad_norm": 1.1330881118774414, |
| "learning_rate": 1.681361175560712e-05, |
| "loss": 1.0486, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.48217907852796293, |
| "grad_norm": 1.2185862064361572, |
| "learning_rate": 1.6798143851508122e-05, |
| "loss": 1.0962, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.48449724717473197, |
| "grad_norm": 1.2216246128082275, |
| "learning_rate": 1.678267594740913e-05, |
| "loss": 1.0202, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.486815415821501, |
| "grad_norm": 0.7822516560554504, |
| "learning_rate": 1.6767208043310132e-05, |
| "loss": 1.0278, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.48913358446827004, |
| "grad_norm": 1.021760106086731, |
| "learning_rate": 1.675174013921114e-05, |
| "loss": 1.0626, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.49145175311503914, |
| "grad_norm": 1.334328055381775, |
| "learning_rate": 1.6736272235112142e-05, |
| "loss": 1.1798, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4937699217618082, |
| "grad_norm": 0.7392475605010986, |
| "learning_rate": 1.672080433101315e-05, |
| "loss": 1.0713, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4960880904085772, |
| "grad_norm": 0.768805205821991, |
| "learning_rate": 1.6705336426914152e-05, |
| "loss": 0.9839, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4984062590553463, |
| "grad_norm": 0.7203591465950012, |
| "learning_rate": 1.668986852281516e-05, |
| "loss": 0.9926, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5007244277021153, |
| "grad_norm": 1.2835793495178223, |
| "learning_rate": 1.6674400618716166e-05, |
| "loss": 1.024, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5030425963488844, |
| "grad_norm": 0.8296486139297485, |
| "learning_rate": 1.6658932714617173e-05, |
| "loss": 1.0421, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5053607649956534, |
| "grad_norm": 0.8933680653572083, |
| "learning_rate": 1.6643464810518176e-05, |
| "loss": 0.9958, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5076789336424224, |
| "grad_norm": 0.6815921068191528, |
| "learning_rate": 1.6627996906419182e-05, |
| "loss": 1.0334, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5099971022891915, |
| "grad_norm": 0.795447051525116, |
| "learning_rate": 1.661252900232019e-05, |
| "loss": 1.0553, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5123152709359606, |
| "grad_norm": 1.1784237623214722, |
| "learning_rate": 1.6597061098221192e-05, |
| "loss": 0.9998, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5146334395827297, |
| "grad_norm": 0.9474261403083801, |
| "learning_rate": 1.65815931941222e-05, |
| "loss": 1.0704, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5169516082294987, |
| "grad_norm": 0.9175812602043152, |
| "learning_rate": 1.6566125290023202e-05, |
| "loss": 1.0779, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5192697768762677, |
| "grad_norm": 0.8006009459495544, |
| "learning_rate": 1.655065738592421e-05, |
| "loss": 1.024, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5215879455230368, |
| "grad_norm": 0.7539005875587463, |
| "learning_rate": 1.6535189481825212e-05, |
| "loss": 1.0301, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5239061141698058, |
| "grad_norm": 0.8373304009437561, |
| "learning_rate": 1.651972157772622e-05, |
| "loss": 1.0519, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5262242828165749, |
| "grad_norm": 0.7653727531433105, |
| "learning_rate": 1.6504253673627222e-05, |
| "loss": 0.9657, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.528542451463344, |
| "grad_norm": 0.6552687287330627, |
| "learning_rate": 1.648878576952823e-05, |
| "loss": 1.0106, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.530860620110113, |
| "grad_norm": 1.428830862045288, |
| "learning_rate": 1.6473317865429236e-05, |
| "loss": 1.0327, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5331787887568821, |
| "grad_norm": 1.0795942544937134, |
| "learning_rate": 1.6457849961330243e-05, |
| "loss": 1.0854, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5354969574036511, |
| "grad_norm": 0.5399507284164429, |
| "learning_rate": 1.644238205723125e-05, |
| "loss": 0.9932, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5378151260504201, |
| "grad_norm": 2.8251047134399414, |
| "learning_rate": 1.6426914153132253e-05, |
| "loss": 1.002, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5401332946971892, |
| "grad_norm": 0.7555001974105835, |
| "learning_rate": 1.641144624903326e-05, |
| "loss": 0.9898, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5424514633439582, |
| "grad_norm": 0.9090583324432373, |
| "learning_rate": 1.6395978344934263e-05, |
| "loss": 0.9762, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5447696319907274, |
| "grad_norm": 0.8169143199920654, |
| "learning_rate": 1.638051044083527e-05, |
| "loss": 1.0588, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5470878006374964, |
| "grad_norm": 0.7842413783073425, |
| "learning_rate": 1.6365042536736273e-05, |
| "loss": 0.9432, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5494059692842654, |
| "grad_norm": 2.24771785736084, |
| "learning_rate": 1.634957463263728e-05, |
| "loss": 1.0636, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 0.9846341013908386, |
| "learning_rate": 1.6334106728538283e-05, |
| "loss": 0.9743, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5540423065778035, |
| "grad_norm": 0.7598584294319153, |
| "learning_rate": 1.631863882443929e-05, |
| "loss": 1.0192, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5563604752245725, |
| "grad_norm": 1.200215458869934, |
| "learning_rate": 1.6303170920340296e-05, |
| "loss": 1.1051, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5586786438713416, |
| "grad_norm": 0.8878689408302307, |
| "learning_rate": 1.62877030162413e-05, |
| "loss": 1.0622, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5609968125181107, |
| "grad_norm": 1.18966543674469, |
| "learning_rate": 1.6272235112142306e-05, |
| "loss": 1.0396, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5633149811648798, |
| "grad_norm": 0.7161230444908142, |
| "learning_rate": 1.6256767208043313e-05, |
| "loss": 1.0611, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5656331498116488, |
| "grad_norm": 0.806091845035553, |
| "learning_rate": 1.624129930394432e-05, |
| "loss": 1.0862, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5679513184584178, |
| "grad_norm": 0.9060055613517761, |
| "learning_rate": 1.6225831399845323e-05, |
| "loss": 0.9447, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5702694871051869, |
| "grad_norm": 0.6209578514099121, |
| "learning_rate": 1.621036349574633e-05, |
| "loss": 1.0291, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5725876557519559, |
| "grad_norm": 0.8875139951705933, |
| "learning_rate": 1.6194895591647333e-05, |
| "loss": 0.9596, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5749058243987251, |
| "grad_norm": 0.8894098997116089, |
| "learning_rate": 1.617942768754834e-05, |
| "loss": 1.0363, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5772239930454941, |
| "grad_norm": 0.8011065125465393, |
| "learning_rate": 1.6163959783449343e-05, |
| "loss": 0.9735, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5795421616922631, |
| "grad_norm": 1.0448037385940552, |
| "learning_rate": 1.614849187935035e-05, |
| "loss": 1.0154, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5818603303390322, |
| "grad_norm": 0.7367164492607117, |
| "learning_rate": 1.6133023975251353e-05, |
| "loss": 1.0509, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5841784989858012, |
| "grad_norm": 0.8820902705192566, |
| "learning_rate": 1.611755607115236e-05, |
| "loss": 1.074, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5864966676325702, |
| "grad_norm": 0.8512645363807678, |
| "learning_rate": 1.6102088167053366e-05, |
| "loss": 1.0478, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5888148362793393, |
| "grad_norm": 0.8832964897155762, |
| "learning_rate": 1.6086620262954373e-05, |
| "loss": 1.0502, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5911330049261084, |
| "grad_norm": 0.7311517596244812, |
| "learning_rate": 1.6071152358855376e-05, |
| "loss": 0.9904, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5934511735728775, |
| "grad_norm": 0.9509069919586182, |
| "learning_rate": 1.6055684454756383e-05, |
| "loss": 1.0628, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5957693422196465, |
| "grad_norm": 0.5056537389755249, |
| "learning_rate": 1.604021655065739e-05, |
| "loss": 0.9579, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5980875108664155, |
| "grad_norm": 0.6654573082923889, |
| "learning_rate": 1.6024748646558393e-05, |
| "loss": 0.9752, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6004056795131846, |
| "grad_norm": 0.7242197394371033, |
| "learning_rate": 1.60092807424594e-05, |
| "loss": 1.0986, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6027238481599536, |
| "grad_norm": 0.8016011118888855, |
| "learning_rate": 1.5993812838360403e-05, |
| "loss": 1.0193, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6050420168067226, |
| "grad_norm": 0.8806138038635254, |
| "learning_rate": 1.597834493426141e-05, |
| "loss": 1.0192, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6073601854534918, |
| "grad_norm": 1.1127492189407349, |
| "learning_rate": 1.5962877030162413e-05, |
| "loss": 1.1279, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6096783541002608, |
| "grad_norm": 0.8649683594703674, |
| "learning_rate": 1.594740912606342e-05, |
| "loss": 1.0059, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6119965227470299, |
| "grad_norm": 0.7879909873008728, |
| "learning_rate": 1.5931941221964423e-05, |
| "loss": 1.0077, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6143146913937989, |
| "grad_norm": 0.8802973031997681, |
| "learning_rate": 1.591647331786543e-05, |
| "loss": 1.0484, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6166328600405679, |
| "grad_norm": 1.8282607793807983, |
| "learning_rate": 1.5901005413766436e-05, |
| "loss": 0.9435, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.618951028687337, |
| "grad_norm": 0.643280565738678, |
| "learning_rate": 1.5885537509667443e-05, |
| "loss": 0.948, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.621269197334106, |
| "grad_norm": 0.727376401424408, |
| "learning_rate": 1.587006960556845e-05, |
| "loss": 0.9687, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6235873659808752, |
| "grad_norm": 0.9891621470451355, |
| "learning_rate": 1.5854601701469453e-05, |
| "loss": 0.995, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6259055346276442, |
| "grad_norm": 1.4208780527114868, |
| "learning_rate": 1.583913379737046e-05, |
| "loss": 1.107, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6282237032744132, |
| "grad_norm": 0.8574293851852417, |
| "learning_rate": 1.5823665893271463e-05, |
| "loss": 1.0016, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6305418719211823, |
| "grad_norm": 1.4257714748382568, |
| "learning_rate": 1.580819798917247e-05, |
| "loss": 1.3383, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6328600405679513, |
| "grad_norm": 0.7138167023658752, |
| "learning_rate": 1.5792730085073473e-05, |
| "loss": 1.0534, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6351782092147203, |
| "grad_norm": 1.5973683595657349, |
| "learning_rate": 1.577726218097448e-05, |
| "loss": 0.9784, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6374963778614894, |
| "grad_norm": 0.6794442534446716, |
| "learning_rate": 1.5761794276875483e-05, |
| "loss": 0.9809, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6398145465082585, |
| "grad_norm": 0.7616905570030212, |
| "learning_rate": 1.574632637277649e-05, |
| "loss": 0.9981, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6421327151550276, |
| "grad_norm": 1.709405541419983, |
| "learning_rate": 1.5730858468677496e-05, |
| "loss": 1.0105, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6444508838017966, |
| "grad_norm": 0.6796721816062927, |
| "learning_rate": 1.57153905645785e-05, |
| "loss": 0.9856, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6467690524485656, |
| "grad_norm": 0.7686854600906372, |
| "learning_rate": 1.5699922660479506e-05, |
| "loss": 1.0534, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6490872210953347, |
| "grad_norm": 1.0257889032363892, |
| "learning_rate": 1.5684454756380513e-05, |
| "loss": 0.9869, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6514053897421037, |
| "grad_norm": 0.7100695371627808, |
| "learning_rate": 1.566898685228152e-05, |
| "loss": 1.0433, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6537235583888727, |
| "grad_norm": 0.7201927900314331, |
| "learning_rate": 1.5653518948182523e-05, |
| "loss": 0.9854, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6560417270356419, |
| "grad_norm": 1.5743852853775024, |
| "learning_rate": 1.563805104408353e-05, |
| "loss": 1.0076, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6583598956824109, |
| "grad_norm": 0.7456634640693665, |
| "learning_rate": 1.5622583139984533e-05, |
| "loss": 1.0214, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.66067806432918, |
| "grad_norm": 0.6395049691200256, |
| "learning_rate": 1.560711523588554e-05, |
| "loss": 1.0321, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.662996232975949, |
| "grad_norm": 0.9406479001045227, |
| "learning_rate": 1.5591647331786543e-05, |
| "loss": 1.0387, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.665314401622718, |
| "grad_norm": 0.668521523475647, |
| "learning_rate": 1.557617942768755e-05, |
| "loss": 0.9458, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6676325702694871, |
| "grad_norm": 0.8241714239120483, |
| "learning_rate": 1.5560711523588553e-05, |
| "loss": 0.994, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6699507389162561, |
| "grad_norm": 0.7906151413917542, |
| "learning_rate": 1.554524361948956e-05, |
| "loss": 1.0127, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 3.7441999912261963, |
| "learning_rate": 1.5529775715390566e-05, |
| "loss": 1.1235, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6745870762097943, |
| "grad_norm": 0.7488934397697449, |
| "learning_rate": 1.5514307811291573e-05, |
| "loss": 1.0132, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6769052448565633, |
| "grad_norm": 0.6223219037055969, |
| "learning_rate": 1.5498839907192576e-05, |
| "loss": 0.9831, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6792234135033324, |
| "grad_norm": 0.9072495698928833, |
| "learning_rate": 1.5483372003093583e-05, |
| "loss": 0.9978, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6815415821501014, |
| "grad_norm": 1.0984013080596924, |
| "learning_rate": 1.546790409899459e-05, |
| "loss": 0.9942, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6838597507968704, |
| "grad_norm": 0.8855274319648743, |
| "learning_rate": 1.5452436194895593e-05, |
| "loss": 1.0135, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6861779194436395, |
| "grad_norm": 0.7710789442062378, |
| "learning_rate": 1.54369682907966e-05, |
| "loss": 1.034, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6884960880904086, |
| "grad_norm": 19.71253204345703, |
| "learning_rate": 1.5421500386697603e-05, |
| "loss": 0.9598, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6908142567371777, |
| "grad_norm": 0.930057942867279, |
| "learning_rate": 1.540603248259861e-05, |
| "loss": 0.9755, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6931324253839467, |
| "grad_norm": 1.0376302003860474, |
| "learning_rate": 1.5390564578499613e-05, |
| "loss": 1.0324, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6954505940307157, |
| "grad_norm": 0.8160769939422607, |
| "learning_rate": 1.537509667440062e-05, |
| "loss": 1.0805, |
| "step": 300 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1293, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0925139234599731e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|