| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 10, | |
| "global_step": 7808, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00012807377049180329, | |
| "grad_norm": 13.030157089233398, | |
| "learning_rate": 0.0, | |
| "loss": 0.6785, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0012807377049180327, | |
| "grad_norm": 22.160924911499023, | |
| "learning_rate": 3.837953091684436e-07, | |
| "loss": 0.7023, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0025614754098360654, | |
| "grad_norm": 28.483007431030273, | |
| "learning_rate": 8.102345415778253e-07, | |
| "loss": 0.5318, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0038422131147540983, | |
| "grad_norm": 14.279166221618652, | |
| "learning_rate": 1.236673773987207e-06, | |
| "loss": 0.7028, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005122950819672131, | |
| "grad_norm": 19.40032196044922, | |
| "learning_rate": 1.6631130063965886e-06, | |
| "loss": 0.7097, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006403688524590164, | |
| "grad_norm": 17.553495407104492, | |
| "learning_rate": 2.08955223880597e-06, | |
| "loss": 0.7789, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007684426229508197, | |
| "grad_norm": 5.795664310455322, | |
| "learning_rate": 2.515991471215352e-06, | |
| "loss": 0.5146, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008965163934426229, | |
| "grad_norm": 15.086921691894531, | |
| "learning_rate": 2.9424307036247335e-06, | |
| "loss": 0.703, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010245901639344262, | |
| "grad_norm": 11.061614990234375, | |
| "learning_rate": 3.3688699360341154e-06, | |
| "loss": 0.5794, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011526639344262296, | |
| "grad_norm": 11.43583869934082, | |
| "learning_rate": 3.7953091684434973e-06, | |
| "loss": 0.6144, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.012807377049180328, | |
| "grad_norm": 5.861094951629639, | |
| "learning_rate": 4.221748400852878e-06, | |
| "loss": 0.5769, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01408811475409836, | |
| "grad_norm": 28.796695709228516, | |
| "learning_rate": 4.64818763326226e-06, | |
| "loss": 0.5953, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.015368852459016393, | |
| "grad_norm": 17.27574348449707, | |
| "learning_rate": 5.074626865671642e-06, | |
| "loss": 0.4116, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.016649590163934427, | |
| "grad_norm": 20.032840728759766, | |
| "learning_rate": 5.501066098081024e-06, | |
| "loss": 0.7965, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.017930327868852458, | |
| "grad_norm": 35.11494827270508, | |
| "learning_rate": 5.927505330490405e-06, | |
| "loss": 0.8488, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.019211065573770492, | |
| "grad_norm": 17.658639907836914, | |
| "learning_rate": 6.353944562899788e-06, | |
| "loss": 0.446, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.020491803278688523, | |
| "grad_norm": 10.555081367492676, | |
| "learning_rate": 6.780383795309169e-06, | |
| "loss": 0.4964, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.021772540983606557, | |
| "grad_norm": 30.8939266204834, | |
| "learning_rate": 7.20682302771855e-06, | |
| "loss": 0.5762, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.02305327868852459, | |
| "grad_norm": 14.771651268005371, | |
| "learning_rate": 7.633262260127933e-06, | |
| "loss": 0.5545, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.024334016393442622, | |
| "grad_norm": 0.9534880518913269, | |
| "learning_rate": 8.059701492537314e-06, | |
| "loss": 0.3119, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.025614754098360656, | |
| "grad_norm": 13.96252727508545, | |
| "learning_rate": 8.486140724946695e-06, | |
| "loss": 0.6571, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.026895491803278687, | |
| "grad_norm": 6.706875801086426, | |
| "learning_rate": 8.912579957356077e-06, | |
| "loss": 0.8117, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.02817622950819672, | |
| "grad_norm": 57.71232604980469, | |
| "learning_rate": 9.339019189765458e-06, | |
| "loss": 0.4906, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.029456967213114756, | |
| "grad_norm": 15.123934745788574, | |
| "learning_rate": 9.765458422174841e-06, | |
| "loss": 0.4204, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.030737704918032786, | |
| "grad_norm": 3.7344789505004883, | |
| "learning_rate": 1.0191897654584222e-05, | |
| "loss": 0.6303, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03201844262295082, | |
| "grad_norm": 0.1564660370349884, | |
| "learning_rate": 1.0618336886993603e-05, | |
| "loss": 0.3297, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.033299180327868855, | |
| "grad_norm": 22.37810516357422, | |
| "learning_rate": 1.1044776119402986e-05, | |
| "loss": 0.3784, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.034579918032786885, | |
| "grad_norm": 4.665563106536865, | |
| "learning_rate": 1.1471215351812369e-05, | |
| "loss": 0.7696, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.035860655737704916, | |
| "grad_norm": 9.491741180419922, | |
| "learning_rate": 1.189765458422175e-05, | |
| "loss": 0.5464, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.037141393442622954, | |
| "grad_norm": 18.859682083129883, | |
| "learning_rate": 1.2324093816631131e-05, | |
| "loss": 0.5767, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.038422131147540985, | |
| "grad_norm": 7.065849304199219, | |
| "learning_rate": 1.2750533049040512e-05, | |
| "loss": 0.5723, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.039702868852459015, | |
| "grad_norm": 43.178043365478516, | |
| "learning_rate": 1.3176972281449893e-05, | |
| "loss": 0.6343, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.040983606557377046, | |
| "grad_norm": 9.827512741088867, | |
| "learning_rate": 1.3603411513859277e-05, | |
| "loss": 0.5718, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.042264344262295084, | |
| "grad_norm": 2.420236349105835, | |
| "learning_rate": 1.4029850746268658e-05, | |
| "loss": 0.5491, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.043545081967213115, | |
| "grad_norm": 8.602315902709961, | |
| "learning_rate": 1.445628997867804e-05, | |
| "loss": 0.5566, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.044825819672131145, | |
| "grad_norm": 19.52743148803711, | |
| "learning_rate": 1.488272921108742e-05, | |
| "loss": 0.4385, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04610655737704918, | |
| "grad_norm": 59.86263656616211, | |
| "learning_rate": 1.5309168443496803e-05, | |
| "loss": 0.6635, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.047387295081967214, | |
| "grad_norm": 35.44069290161133, | |
| "learning_rate": 1.5735607675906184e-05, | |
| "loss": 0.7269, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.048668032786885244, | |
| "grad_norm": 20.887710571289062, | |
| "learning_rate": 1.616204690831557e-05, | |
| "loss": 0.5448, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04994877049180328, | |
| "grad_norm": 22.93721580505371, | |
| "learning_rate": 1.658848614072495e-05, | |
| "loss": 0.7712, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.05122950819672131, | |
| "grad_norm": 11.434581756591797, | |
| "learning_rate": 1.701492537313433e-05, | |
| "loss": 0.4954, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.052510245901639344, | |
| "grad_norm": 3.9810707569122314, | |
| "learning_rate": 1.7441364605543712e-05, | |
| "loss": 0.4475, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.053790983606557374, | |
| "grad_norm": 8.25676155090332, | |
| "learning_rate": 1.7867803837953093e-05, | |
| "loss": 0.5226, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05507172131147541, | |
| "grad_norm": 40.57249069213867, | |
| "learning_rate": 1.8294243070362474e-05, | |
| "loss": 0.5507, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.05635245901639344, | |
| "grad_norm": 0.5660319924354553, | |
| "learning_rate": 1.872068230277186e-05, | |
| "loss": 0.4327, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.057633196721311473, | |
| "grad_norm": 37.062320709228516, | |
| "learning_rate": 1.914712153518124e-05, | |
| "loss": 0.3592, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.05891393442622951, | |
| "grad_norm": 22.973651885986328, | |
| "learning_rate": 1.957356076759062e-05, | |
| "loss": 0.4596, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06019467213114754, | |
| "grad_norm": 24.05460548400879, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5558, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06147540983606557, | |
| "grad_norm": 0.19256171584129333, | |
| "learning_rate": 1.9972748330835264e-05, | |
| "loss": 0.3182, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0627561475409836, | |
| "grad_norm": 3.1222236156463623, | |
| "learning_rate": 1.994549666167053e-05, | |
| "loss": 0.6922, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.06403688524590163, | |
| "grad_norm": 35.97098922729492, | |
| "learning_rate": 1.9918244992505793e-05, | |
| "loss": 0.9556, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06531762295081968, | |
| "grad_norm": 0.13992930948734283, | |
| "learning_rate": 1.9890993323341056e-05, | |
| "loss": 0.6617, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06659836065573771, | |
| "grad_norm": 5.176881313323975, | |
| "learning_rate": 1.986374165417632e-05, | |
| "loss": 0.8168, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.06787909836065574, | |
| "grad_norm": 43.677433013916016, | |
| "learning_rate": 1.9836489985011584e-05, | |
| "loss": 0.7252, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.06915983606557377, | |
| "grad_norm": 15.75368881225586, | |
| "learning_rate": 1.9809238315846847e-05, | |
| "loss": 0.6547, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0704405737704918, | |
| "grad_norm": 14.22448444366455, | |
| "learning_rate": 1.9781986646682113e-05, | |
| "loss": 0.4269, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.07172131147540983, | |
| "grad_norm": 20.48627471923828, | |
| "learning_rate": 1.9754734977517372e-05, | |
| "loss": 1.0193, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.07300204918032786, | |
| "grad_norm": 51.78612518310547, | |
| "learning_rate": 1.9727483308352638e-05, | |
| "loss": 0.5702, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.07428278688524591, | |
| "grad_norm": 0.18359607458114624, | |
| "learning_rate": 1.97002316391879e-05, | |
| "loss": 0.3638, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.07556352459016394, | |
| "grad_norm": 74.03116607666016, | |
| "learning_rate": 1.9672979970023163e-05, | |
| "loss": 0.7977, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07684426229508197, | |
| "grad_norm": 12.116443634033203, | |
| "learning_rate": 1.964572830085843e-05, | |
| "loss": 0.8353, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.078125, | |
| "grad_norm": 36.37770080566406, | |
| "learning_rate": 1.9618476631693692e-05, | |
| "loss": 0.534, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.07940573770491803, | |
| "grad_norm": 1.0840022563934326, | |
| "learning_rate": 1.9591224962528958e-05, | |
| "loss": 0.6757, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.08068647540983606, | |
| "grad_norm": 18.524744033813477, | |
| "learning_rate": 1.956397329336422e-05, | |
| "loss": 0.6852, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.08196721311475409, | |
| "grad_norm": 0.5453315377235413, | |
| "learning_rate": 1.9536721624199483e-05, | |
| "loss": 0.3746, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08324795081967214, | |
| "grad_norm": 3.647247076034546, | |
| "learning_rate": 1.950946995503475e-05, | |
| "loss": 0.3404, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08452868852459017, | |
| "grad_norm": 2.8789007663726807, | |
| "learning_rate": 1.9482218285870012e-05, | |
| "loss": 0.558, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.0858094262295082, | |
| "grad_norm": 59.30935287475586, | |
| "learning_rate": 1.9454966616705274e-05, | |
| "loss": 0.2678, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08709016393442623, | |
| "grad_norm": 36.327701568603516, | |
| "learning_rate": 1.942771494754054e-05, | |
| "loss": 0.8112, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.08837090163934426, | |
| "grad_norm": 30.401525497436523, | |
| "learning_rate": 1.9400463278375803e-05, | |
| "loss": 0.8637, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.08965163934426229, | |
| "grad_norm": 65.09701538085938, | |
| "learning_rate": 1.9373211609211066e-05, | |
| "loss": 0.4575, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09093237704918032, | |
| "grad_norm": 0.17984363436698914, | |
| "learning_rate": 1.9345959940046332e-05, | |
| "loss": 0.6336, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.09221311475409837, | |
| "grad_norm": 8.531198501586914, | |
| "learning_rate": 1.931870827088159e-05, | |
| "loss": 0.6553, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.0934938524590164, | |
| "grad_norm": 1.3908320665359497, | |
| "learning_rate": 1.9291456601716857e-05, | |
| "loss": 0.5388, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.09477459016393443, | |
| "grad_norm": 27.024486541748047, | |
| "learning_rate": 1.926420493255212e-05, | |
| "loss": 0.5134, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.09605532786885246, | |
| "grad_norm": 1.363821268081665, | |
| "learning_rate": 1.9236953263387382e-05, | |
| "loss": 0.4295, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09733606557377049, | |
| "grad_norm": 18.301353454589844, | |
| "learning_rate": 1.9209701594222648e-05, | |
| "loss": 0.8292, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.09861680327868852, | |
| "grad_norm": 7.517091751098633, | |
| "learning_rate": 1.918244992505791e-05, | |
| "loss": 0.5639, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.09989754098360656, | |
| "grad_norm": 0.8409481048583984, | |
| "learning_rate": 1.9155198255893174e-05, | |
| "loss": 0.4416, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1011782786885246, | |
| "grad_norm": 10.660968780517578, | |
| "learning_rate": 1.912794658672844e-05, | |
| "loss": 0.6898, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.10245901639344263, | |
| "grad_norm": 13.175348281860352, | |
| "learning_rate": 1.9100694917563702e-05, | |
| "loss": 0.4468, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10373975409836066, | |
| "grad_norm": 11.351682662963867, | |
| "learning_rate": 1.9073443248398965e-05, | |
| "loss": 1.0168, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.10502049180327869, | |
| "grad_norm": 2.7584354877471924, | |
| "learning_rate": 1.904619157923423e-05, | |
| "loss": 0.4303, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.10630122950819672, | |
| "grad_norm": 34.519954681396484, | |
| "learning_rate": 1.9018939910069493e-05, | |
| "loss": 0.1582, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10758196721311475, | |
| "grad_norm": 1.5243237018585205, | |
| "learning_rate": 1.8991688240904756e-05, | |
| "loss": 0.5226, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.1088627049180328, | |
| "grad_norm": 12.513932228088379, | |
| "learning_rate": 1.8964436571740022e-05, | |
| "loss": 0.6845, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.11014344262295082, | |
| "grad_norm": 32.45783996582031, | |
| "learning_rate": 1.8937184902575285e-05, | |
| "loss": 1.0624, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.11142418032786885, | |
| "grad_norm": 0.5410599112510681, | |
| "learning_rate": 1.8909933233410547e-05, | |
| "loss": 0.7797, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.11270491803278689, | |
| "grad_norm": 40.27082443237305, | |
| "learning_rate": 1.888268156424581e-05, | |
| "loss": 0.6134, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.11398565573770492, | |
| "grad_norm": 14.060335159301758, | |
| "learning_rate": 1.8855429895081076e-05, | |
| "loss": 0.4326, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.11526639344262295, | |
| "grad_norm": 13.475322723388672, | |
| "learning_rate": 1.882817822591634e-05, | |
| "loss": 0.7133, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11654713114754098, | |
| "grad_norm": 3.2171595096588135, | |
| "learning_rate": 1.88009265567516e-05, | |
| "loss": 0.6357, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.11782786885245902, | |
| "grad_norm": 34.33395767211914, | |
| "learning_rate": 1.8773674887586867e-05, | |
| "loss": 0.6751, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.11910860655737705, | |
| "grad_norm": 0.17749445140361786, | |
| "learning_rate": 1.874642321842213e-05, | |
| "loss": 0.3697, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.12038934426229508, | |
| "grad_norm": 49.89470291137695, | |
| "learning_rate": 1.8719171549257392e-05, | |
| "loss": 0.5144, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.12167008196721311, | |
| "grad_norm": 15.842961311340332, | |
| "learning_rate": 1.869191988009266e-05, | |
| "loss": 0.7409, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.12295081967213115, | |
| "grad_norm": 5.076769828796387, | |
| "learning_rate": 1.866466821092792e-05, | |
| "loss": 0.2575, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12423155737704918, | |
| "grad_norm": 6.906425476074219, | |
| "learning_rate": 1.8637416541763184e-05, | |
| "loss": 0.4676, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1255122950819672, | |
| "grad_norm": 0.2420882135629654, | |
| "learning_rate": 1.861016487259845e-05, | |
| "loss": 0.5278, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.12679303278688525, | |
| "grad_norm": 42.10707473754883, | |
| "learning_rate": 1.8582913203433712e-05, | |
| "loss": 0.2199, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.12807377049180327, | |
| "grad_norm": 67.881103515625, | |
| "learning_rate": 1.8555661534268975e-05, | |
| "loss": 0.7105, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1293545081967213, | |
| "grad_norm": 0.4502294361591339, | |
| "learning_rate": 1.852840986510424e-05, | |
| "loss": 0.7214, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.13063524590163936, | |
| "grad_norm": 0.19563625752925873, | |
| "learning_rate": 1.8501158195939504e-05, | |
| "loss": 0.344, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.13191598360655737, | |
| "grad_norm": 70.44747161865234, | |
| "learning_rate": 1.8473906526774766e-05, | |
| "loss": 0.5475, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.13319672131147542, | |
| "grad_norm": 68.5734634399414, | |
| "learning_rate": 1.844665485761003e-05, | |
| "loss": 0.866, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.13447745901639344, | |
| "grad_norm": 5.665011405944824, | |
| "learning_rate": 1.841940318844529e-05, | |
| "loss": 0.4777, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.13575819672131148, | |
| "grad_norm": 34.88306427001953, | |
| "learning_rate": 1.8392151519280557e-05, | |
| "loss": 0.6516, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.1370389344262295, | |
| "grad_norm": 5.857304096221924, | |
| "learning_rate": 1.836489985011582e-05, | |
| "loss": 0.3298, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.13831967213114754, | |
| "grad_norm": 0.40846720337867737, | |
| "learning_rate": 1.8337648180951083e-05, | |
| "loss": 0.6535, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1396004098360656, | |
| "grad_norm": 26.644474029541016, | |
| "learning_rate": 1.831039651178635e-05, | |
| "loss": 0.5543, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.1408811475409836, | |
| "grad_norm": 1.7488807439804077, | |
| "learning_rate": 1.828314484262161e-05, | |
| "loss": 0.6122, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.14216188524590165, | |
| "grad_norm": 63.28523254394531, | |
| "learning_rate": 1.8255893173456874e-05, | |
| "loss": 0.805, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.14344262295081966, | |
| "grad_norm": 56.30666732788086, | |
| "learning_rate": 1.822864150429214e-05, | |
| "loss": 0.8171, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.1447233606557377, | |
| "grad_norm": 52.1702880859375, | |
| "learning_rate": 1.8201389835127403e-05, | |
| "loss": 0.5012, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.14600409836065573, | |
| "grad_norm": 6.9870452880859375, | |
| "learning_rate": 1.817413816596267e-05, | |
| "loss": 0.3778, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.14728483606557377, | |
| "grad_norm": 48.00603103637695, | |
| "learning_rate": 1.814688649679793e-05, | |
| "loss": 0.4939, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.14856557377049182, | |
| "grad_norm": 0.6154949069023132, | |
| "learning_rate": 1.8119634827633194e-05, | |
| "loss": 0.3668, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.14984631147540983, | |
| "grad_norm": 1.350846529006958, | |
| "learning_rate": 1.809238315846846e-05, | |
| "loss": 0.8219, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.15112704918032788, | |
| "grad_norm": 17.47528648376465, | |
| "learning_rate": 1.8065131489303723e-05, | |
| "loss": 0.643, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1524077868852459, | |
| "grad_norm": 2.0453598499298096, | |
| "learning_rate": 1.8037879820138985e-05, | |
| "loss": 0.6053, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.15368852459016394, | |
| "grad_norm": 28.069385528564453, | |
| "learning_rate": 1.8010628150974248e-05, | |
| "loss": 0.7856, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.15496926229508196, | |
| "grad_norm": 4.573185920715332, | |
| "learning_rate": 1.798337648180951e-05, | |
| "loss": 0.4821, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.15625, | |
| "grad_norm": 49.39838790893555, | |
| "learning_rate": 1.7956124812644776e-05, | |
| "loss": 0.596, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.15753073770491804, | |
| "grad_norm": 5.040111064910889, | |
| "learning_rate": 1.792887314348004e-05, | |
| "loss": 0.5817, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.15881147540983606, | |
| "grad_norm": 75.80863189697266, | |
| "learning_rate": 1.79016214743153e-05, | |
| "loss": 1.0482, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.1600922131147541, | |
| "grad_norm": 8.544283866882324, | |
| "learning_rate": 1.7874369805150568e-05, | |
| "loss": 0.616, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16137295081967212, | |
| "grad_norm": 11.687309265136719, | |
| "learning_rate": 1.784711813598583e-05, | |
| "loss": 0.4421, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.16265368852459017, | |
| "grad_norm": 11.043490409851074, | |
| "learning_rate": 1.7819866466821093e-05, | |
| "loss": 0.2868, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.16393442622950818, | |
| "grad_norm": 3.897243022918701, | |
| "learning_rate": 1.779261479765636e-05, | |
| "loss": 0.4681, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.16521516393442623, | |
| "grad_norm": 0.32223525643348694, | |
| "learning_rate": 1.776536312849162e-05, | |
| "loss": 0.3196, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.16649590163934427, | |
| "grad_norm": 0.1265946328639984, | |
| "learning_rate": 1.7738111459326884e-05, | |
| "loss": 0.638, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1677766393442623, | |
| "grad_norm": 40.56721115112305, | |
| "learning_rate": 1.771085979016215e-05, | |
| "loss": 0.5043, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.16905737704918034, | |
| "grad_norm": 6.785134315490723, | |
| "learning_rate": 1.7683608120997413e-05, | |
| "loss": 0.4148, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.17033811475409835, | |
| "grad_norm": 33.5522346496582, | |
| "learning_rate": 1.7656356451832675e-05, | |
| "loss": 1.1591, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.1716188524590164, | |
| "grad_norm": 7.858984470367432, | |
| "learning_rate": 1.762910478266794e-05, | |
| "loss": 0.9522, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.1728995901639344, | |
| "grad_norm": 32.17461013793945, | |
| "learning_rate": 1.7601853113503204e-05, | |
| "loss": 0.5926, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.17418032786885246, | |
| "grad_norm": 11.334968566894531, | |
| "learning_rate": 1.7574601444338467e-05, | |
| "loss": 0.7914, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.1754610655737705, | |
| "grad_norm": 13.335744857788086, | |
| "learning_rate": 1.754734977517373e-05, | |
| "loss": 0.8537, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.17674180327868852, | |
| "grad_norm": 19.00205421447754, | |
| "learning_rate": 1.7520098106008992e-05, | |
| "loss": 0.4838, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.17802254098360656, | |
| "grad_norm": 8.699183464050293, | |
| "learning_rate": 1.7492846436844258e-05, | |
| "loss": 0.666, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.17930327868852458, | |
| "grad_norm": 1.6320335865020752, | |
| "learning_rate": 1.746559476767952e-05, | |
| "loss": 0.5107, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18058401639344263, | |
| "grad_norm": 1.2799221277236938, | |
| "learning_rate": 1.7438343098514787e-05, | |
| "loss": 0.4847, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.18186475409836064, | |
| "grad_norm": 2.808711528778076, | |
| "learning_rate": 1.741109142935005e-05, | |
| "loss": 0.4733, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.1831454918032787, | |
| "grad_norm": 18.037717819213867, | |
| "learning_rate": 1.7383839760185312e-05, | |
| "loss": 0.6985, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.18442622950819673, | |
| "grad_norm": 2.3388659954071045, | |
| "learning_rate": 1.7356588091020578e-05, | |
| "loss": 0.2029, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.18570696721311475, | |
| "grad_norm": 15.241260528564453, | |
| "learning_rate": 1.732933642185584e-05, | |
| "loss": 0.2651, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.1869877049180328, | |
| "grad_norm": 27.643362045288086, | |
| "learning_rate": 1.7302084752691103e-05, | |
| "loss": 0.6641, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.1882684426229508, | |
| "grad_norm": 51.026947021484375, | |
| "learning_rate": 1.727483308352637e-05, | |
| "loss": 0.5786, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.18954918032786885, | |
| "grad_norm": 62.00007247924805, | |
| "learning_rate": 1.7247581414361632e-05, | |
| "loss": 0.3976, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.19082991803278687, | |
| "grad_norm": 80.54548645019531, | |
| "learning_rate": 1.7220329745196894e-05, | |
| "loss": 0.4812, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.19211065573770492, | |
| "grad_norm": 108.28478240966797, | |
| "learning_rate": 1.719307807603216e-05, | |
| "loss": 0.3106, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19339139344262296, | |
| "grad_norm": 31.335493087768555, | |
| "learning_rate": 1.7165826406867423e-05, | |
| "loss": 0.4389, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.19467213114754098, | |
| "grad_norm": 2.4842689037323, | |
| "learning_rate": 1.7138574737702686e-05, | |
| "loss": 0.4561, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.19595286885245902, | |
| "grad_norm": 34.57732391357422, | |
| "learning_rate": 1.7111323068537948e-05, | |
| "loss": 0.6538, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.19723360655737704, | |
| "grad_norm": 89.62613677978516, | |
| "learning_rate": 1.708407139937321e-05, | |
| "loss": 0.3808, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.19851434426229508, | |
| "grad_norm": 0.6716292500495911, | |
| "learning_rate": 1.7056819730208477e-05, | |
| "loss": 0.5189, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.19979508196721313, | |
| "grad_norm": 32.78571319580078, | |
| "learning_rate": 1.702956806104374e-05, | |
| "loss": 1.2536, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.20107581967213115, | |
| "grad_norm": 5.39422607421875, | |
| "learning_rate": 1.7002316391879002e-05, | |
| "loss": 0.4674, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2023565573770492, | |
| "grad_norm": 0.295356422662735, | |
| "learning_rate": 1.6975064722714268e-05, | |
| "loss": 0.9923, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2036372950819672, | |
| "grad_norm": 2.7056820392608643, | |
| "learning_rate": 1.694781305354953e-05, | |
| "loss": 0.1946, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.20491803278688525, | |
| "grad_norm": 2.453801393508911, | |
| "learning_rate": 1.6920561384384793e-05, | |
| "loss": 0.4442, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.20619877049180327, | |
| "grad_norm": 5.696882247924805, | |
| "learning_rate": 1.689330971522006e-05, | |
| "loss": 0.5623, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2074795081967213, | |
| "grad_norm": 17.160661697387695, | |
| "learning_rate": 1.6866058046055322e-05, | |
| "loss": 1.097, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.20876024590163936, | |
| "grad_norm": 23.408737182617188, | |
| "learning_rate": 1.6838806376890585e-05, | |
| "loss": 0.5359, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.21004098360655737, | |
| "grad_norm": 0.7226897478103638, | |
| "learning_rate": 1.681155470772585e-05, | |
| "loss": 0.3844, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.21132172131147542, | |
| "grad_norm": 28.273542404174805, | |
| "learning_rate": 1.6784303038561113e-05, | |
| "loss": 0.6221, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.21260245901639344, | |
| "grad_norm": 0.6800060272216797, | |
| "learning_rate": 1.6757051369396376e-05, | |
| "loss": 0.4647, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.21388319672131148, | |
| "grad_norm": 7.838409423828125, | |
| "learning_rate": 1.6729799700231642e-05, | |
| "loss": 0.7482, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.2151639344262295, | |
| "grad_norm": 68.58909606933594, | |
| "learning_rate": 1.6702548031066905e-05, | |
| "loss": 0.7191, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.21644467213114754, | |
| "grad_norm": 10.408316612243652, | |
| "learning_rate": 1.6675296361902167e-05, | |
| "loss": 0.9812, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.2177254098360656, | |
| "grad_norm": 45.571781158447266, | |
| "learning_rate": 1.664804469273743e-05, | |
| "loss": 0.4901, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2190061475409836, | |
| "grad_norm": 14.653166770935059, | |
| "learning_rate": 1.6620793023572696e-05, | |
| "loss": 0.4544, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.22028688524590165, | |
| "grad_norm": 75.12469482421875, | |
| "learning_rate": 1.659354135440796e-05, | |
| "loss": 0.5393, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.22156762295081966, | |
| "grad_norm": 20.70387077331543, | |
| "learning_rate": 1.656628968524322e-05, | |
| "loss": 0.7864, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2228483606557377, | |
| "grad_norm": 2.0562734603881836, | |
| "learning_rate": 1.6539038016078487e-05, | |
| "loss": 0.6331, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.22412909836065573, | |
| "grad_norm": 13.042604446411133, | |
| "learning_rate": 1.651178634691375e-05, | |
| "loss": 0.5563, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.22540983606557377, | |
| "grad_norm": 33.89776611328125, | |
| "learning_rate": 1.6484534677749012e-05, | |
| "loss": 0.4452, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.22669057377049182, | |
| "grad_norm": 16.996103286743164, | |
| "learning_rate": 1.645728300858428e-05, | |
| "loss": 0.6087, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.22797131147540983, | |
| "grad_norm": 2.9814796447753906, | |
| "learning_rate": 1.643003133941954e-05, | |
| "loss": 0.3813, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.22925204918032788, | |
| "grad_norm": 0.20661257207393646, | |
| "learning_rate": 1.6402779670254804e-05, | |
| "loss": 0.3531, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.2305327868852459, | |
| "grad_norm": 0.23248881101608276, | |
| "learning_rate": 1.637552800109007e-05, | |
| "loss": 0.4496, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23181352459016394, | |
| "grad_norm": 55.3471565246582, | |
| "learning_rate": 1.6348276331925332e-05, | |
| "loss": 0.5625, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.23309426229508196, | |
| "grad_norm": 11.669384002685547, | |
| "learning_rate": 1.6321024662760595e-05, | |
| "loss": 0.4075, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.234375, | |
| "grad_norm": 65.76184844970703, | |
| "learning_rate": 1.629377299359586e-05, | |
| "loss": 0.3711, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.23565573770491804, | |
| "grad_norm": 1.0016331672668457, | |
| "learning_rate": 1.6266521324431124e-05, | |
| "loss": 0.1958, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.23693647540983606, | |
| "grad_norm": 9.233772277832031, | |
| "learning_rate": 1.6239269655266386e-05, | |
| "loss": 0.5992, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2382172131147541, | |
| "grad_norm": 7.4546732902526855, | |
| "learning_rate": 1.621201798610165e-05, | |
| "loss": 0.775, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.23949795081967212, | |
| "grad_norm": 0.771056056022644, | |
| "learning_rate": 1.618476631693691e-05, | |
| "loss": 0.6516, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.24077868852459017, | |
| "grad_norm": 13.350895881652832, | |
| "learning_rate": 1.6157514647772177e-05, | |
| "loss": 0.6574, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.24205942622950818, | |
| "grad_norm": 1.9616976976394653, | |
| "learning_rate": 1.613026297860744e-05, | |
| "loss": 0.7321, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.24334016393442623, | |
| "grad_norm": 0.2918919622898102, | |
| "learning_rate": 1.6103011309442703e-05, | |
| "loss": 1.1059, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.24462090163934427, | |
| "grad_norm": 13.870285987854004, | |
| "learning_rate": 1.607575964027797e-05, | |
| "loss": 0.4844, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.2459016393442623, | |
| "grad_norm": 19.64275360107422, | |
| "learning_rate": 1.604850797111323e-05, | |
| "loss": 0.3566, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.24718237704918034, | |
| "grad_norm": 54.27963638305664, | |
| "learning_rate": 1.6021256301948497e-05, | |
| "loss": 0.5747, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.24846311475409835, | |
| "grad_norm": 163.11248779296875, | |
| "learning_rate": 1.599400463278376e-05, | |
| "loss": 1.0376, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.2497438524590164, | |
| "grad_norm": 3.2400197982788086, | |
| "learning_rate": 1.5966752963619023e-05, | |
| "loss": 0.3435, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2510245901639344, | |
| "grad_norm": 0.17113502323627472, | |
| "learning_rate": 1.593950129445429e-05, | |
| "loss": 0.5393, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.25230532786885246, | |
| "grad_norm": 22.859413146972656, | |
| "learning_rate": 1.591224962528955e-05, | |
| "loss": 0.3003, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.2535860655737705, | |
| "grad_norm": 1.3010896444320679, | |
| "learning_rate": 1.5884997956124814e-05, | |
| "loss": 0.626, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.25486680327868855, | |
| "grad_norm": 2.824781656265259, | |
| "learning_rate": 1.585774628696008e-05, | |
| "loss": 0.5887, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.25614754098360654, | |
| "grad_norm": 52.8790397644043, | |
| "learning_rate": 1.5830494617795342e-05, | |
| "loss": 0.5767, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2574282786885246, | |
| "grad_norm": 10.472972869873047, | |
| "learning_rate": 1.5803242948630605e-05, | |
| "loss": 0.5818, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2587090163934426, | |
| "grad_norm": 0.7781365513801575, | |
| "learning_rate": 1.5775991279465868e-05, | |
| "loss": 0.9479, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.25998975409836067, | |
| "grad_norm": 5.116518974304199, | |
| "learning_rate": 1.574873961030113e-05, | |
| "loss": 0.6473, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2612704918032787, | |
| "grad_norm": 31.682783126831055, | |
| "learning_rate": 1.5721487941136396e-05, | |
| "loss": 1.0271, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.2625512295081967, | |
| "grad_norm": 0.6573253273963928, | |
| "learning_rate": 1.569423627197166e-05, | |
| "loss": 0.3799, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.26383196721311475, | |
| "grad_norm": 5.006514072418213, | |
| "learning_rate": 1.566698460280692e-05, | |
| "loss": 0.406, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2651127049180328, | |
| "grad_norm": 30.3986873626709, | |
| "learning_rate": 1.5639732933642188e-05, | |
| "loss": 0.7609, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.26639344262295084, | |
| "grad_norm": 8.392414093017578, | |
| "learning_rate": 1.561248126447745e-05, | |
| "loss": 0.3631, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.2676741803278688, | |
| "grad_norm": 0.6506038308143616, | |
| "learning_rate": 1.5585229595312713e-05, | |
| "loss": 0.5563, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.26895491803278687, | |
| "grad_norm": 34.08297348022461, | |
| "learning_rate": 1.555797792614798e-05, | |
| "loss": 0.3359, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2702356557377049, | |
| "grad_norm": 30.52340316772461, | |
| "learning_rate": 1.553072625698324e-05, | |
| "loss": 0.4175, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.27151639344262296, | |
| "grad_norm": 19.159420013427734, | |
| "learning_rate": 1.5503474587818504e-05, | |
| "loss": 0.5232, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.272797131147541, | |
| "grad_norm": 1.3067234754562378, | |
| "learning_rate": 1.547622291865377e-05, | |
| "loss": 0.2685, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.274077868852459, | |
| "grad_norm": 29.783512115478516, | |
| "learning_rate": 1.5448971249489033e-05, | |
| "loss": 0.7583, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.27535860655737704, | |
| "grad_norm": 55.58544921875, | |
| "learning_rate": 1.5421719580324295e-05, | |
| "loss": 0.7714, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2766393442622951, | |
| "grad_norm": 6.930970191955566, | |
| "learning_rate": 1.539446791115956e-05, | |
| "loss": 0.4346, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.27792008196721313, | |
| "grad_norm": 7.723865509033203, | |
| "learning_rate": 1.5367216241994824e-05, | |
| "loss": 0.6486, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2792008196721312, | |
| "grad_norm": 0.23200243711471558, | |
| "learning_rate": 1.5339964572830087e-05, | |
| "loss": 0.514, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.28048155737704916, | |
| "grad_norm": 29.773784637451172, | |
| "learning_rate": 1.531271290366535e-05, | |
| "loss": 0.9953, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.2817622950819672, | |
| "grad_norm": 19.467941284179688, | |
| "learning_rate": 1.5285461234500615e-05, | |
| "loss": 0.6698, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.28304303278688525, | |
| "grad_norm": 0.44849446415901184, | |
| "learning_rate": 1.5258209565335878e-05, | |
| "loss": 0.3486, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2843237704918033, | |
| "grad_norm": 3.40317702293396, | |
| "learning_rate": 1.523095789617114e-05, | |
| "loss": 0.3062, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.2856045081967213, | |
| "grad_norm": 23.58439826965332, | |
| "learning_rate": 1.5203706227006405e-05, | |
| "loss": 0.4546, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.28688524590163933, | |
| "grad_norm": 0.14240220189094543, | |
| "learning_rate": 1.517645455784167e-05, | |
| "loss": 0.466, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.2881659836065574, | |
| "grad_norm": 22.152645111083984, | |
| "learning_rate": 1.5149202888676932e-05, | |
| "loss": 0.9443, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2894467213114754, | |
| "grad_norm": 40.078433990478516, | |
| "learning_rate": 1.5121951219512196e-05, | |
| "loss": 0.7763, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.29072745901639346, | |
| "grad_norm": 22.58036231994629, | |
| "learning_rate": 1.509469955034746e-05, | |
| "loss": 0.5156, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.29200819672131145, | |
| "grad_norm": 9.161020278930664, | |
| "learning_rate": 1.5067447881182725e-05, | |
| "loss": 0.5463, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.2932889344262295, | |
| "grad_norm": 8.112720489501953, | |
| "learning_rate": 1.5040196212017987e-05, | |
| "loss": 0.8208, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.29456967213114754, | |
| "grad_norm": 2.3632164001464844, | |
| "learning_rate": 1.5012944542853252e-05, | |
| "loss": 0.5992, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2958504098360656, | |
| "grad_norm": 5.630832672119141, | |
| "learning_rate": 1.4985692873688516e-05, | |
| "loss": 0.6949, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.29713114754098363, | |
| "grad_norm": 75.62430572509766, | |
| "learning_rate": 1.4958441204523779e-05, | |
| "loss": 0.5873, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2984118852459016, | |
| "grad_norm": 11.58348274230957, | |
| "learning_rate": 1.4931189535359043e-05, | |
| "loss": 0.704, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.29969262295081966, | |
| "grad_norm": 20.816808700561523, | |
| "learning_rate": 1.4903937866194304e-05, | |
| "loss": 0.2416, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3009733606557377, | |
| "grad_norm": 0.5709815621376038, | |
| "learning_rate": 1.4876686197029568e-05, | |
| "loss": 0.4159, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.30225409836065575, | |
| "grad_norm": 0.5212659239768982, | |
| "learning_rate": 1.4849434527864833e-05, | |
| "loss": 0.4472, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.30353483606557374, | |
| "grad_norm": 10.903100967407227, | |
| "learning_rate": 1.4822182858700095e-05, | |
| "loss": 0.5868, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.3048155737704918, | |
| "grad_norm": 60.755706787109375, | |
| "learning_rate": 1.479493118953536e-05, | |
| "loss": 0.8967, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.30609631147540983, | |
| "grad_norm": 0.22794629633426666, | |
| "learning_rate": 1.4767679520370624e-05, | |
| "loss": 0.7226, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3073770491803279, | |
| "grad_norm": 52.29710006713867, | |
| "learning_rate": 1.4740427851205888e-05, | |
| "loss": 0.7622, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3086577868852459, | |
| "grad_norm": 0.6769666075706482, | |
| "learning_rate": 1.471317618204115e-05, | |
| "loss": 0.6245, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3099385245901639, | |
| "grad_norm": 1.508181095123291, | |
| "learning_rate": 1.4685924512876415e-05, | |
| "loss": 0.3395, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.31121926229508196, | |
| "grad_norm": 78.36157989501953, | |
| "learning_rate": 1.465867284371168e-05, | |
| "loss": 0.6791, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 0.1883663535118103, | |
| "learning_rate": 1.4631421174546942e-05, | |
| "loss": 0.2169, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.31378073770491804, | |
| "grad_norm": 42.14516067504883, | |
| "learning_rate": 1.4604169505382206e-05, | |
| "loss": 1.2172, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3150614754098361, | |
| "grad_norm": 11.31810474395752, | |
| "learning_rate": 1.457691783621747e-05, | |
| "loss": 0.5506, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3163422131147541, | |
| "grad_norm": 5.650265216827393, | |
| "learning_rate": 1.4549666167052733e-05, | |
| "loss": 0.595, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.3176229508196721, | |
| "grad_norm": 1.0849229097366333, | |
| "learning_rate": 1.4522414497887998e-05, | |
| "loss": 0.6437, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.31890368852459017, | |
| "grad_norm": 25.959819793701172, | |
| "learning_rate": 1.4495162828723262e-05, | |
| "loss": 0.6475, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.3201844262295082, | |
| "grad_norm": 0.33578041195869446, | |
| "learning_rate": 1.4467911159558523e-05, | |
| "loss": 0.7596, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32146516393442626, | |
| "grad_norm": 3.292280673980713, | |
| "learning_rate": 1.4440659490393787e-05, | |
| "loss": 0.6943, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.32274590163934425, | |
| "grad_norm": 12.404119491577148, | |
| "learning_rate": 1.4413407821229052e-05, | |
| "loss": 0.8402, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3240266393442623, | |
| "grad_norm": 23.83495330810547, | |
| "learning_rate": 1.4386156152064314e-05, | |
| "loss": 0.8554, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.32530737704918034, | |
| "grad_norm": 13.377483367919922, | |
| "learning_rate": 1.4358904482899578e-05, | |
| "loss": 0.7793, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.3265881147540984, | |
| "grad_norm": 73.42285919189453, | |
| "learning_rate": 1.4331652813734843e-05, | |
| "loss": 0.4669, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 40.041080474853516, | |
| "learning_rate": 1.4304401144570105e-05, | |
| "loss": 0.721, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3291495901639344, | |
| "grad_norm": 0.547555148601532, | |
| "learning_rate": 1.427714947540537e-05, | |
| "loss": 0.4564, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.33043032786885246, | |
| "grad_norm": 1.5647186040878296, | |
| "learning_rate": 1.4249897806240634e-05, | |
| "loss": 0.4615, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.3317110655737705, | |
| "grad_norm": 35.03215789794922, | |
| "learning_rate": 1.4222646137075897e-05, | |
| "loss": 0.8154, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.33299180327868855, | |
| "grad_norm": 2.925804615020752, | |
| "learning_rate": 1.4195394467911161e-05, | |
| "loss": 0.4514, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.33427254098360654, | |
| "grad_norm": 67.04120635986328, | |
| "learning_rate": 1.4168142798746425e-05, | |
| "loss": 0.3808, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.3355532786885246, | |
| "grad_norm": 44.40123748779297, | |
| "learning_rate": 1.4140891129581688e-05, | |
| "loss": 0.4096, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.3368340163934426, | |
| "grad_norm": 0.8358442187309265, | |
| "learning_rate": 1.4113639460416952e-05, | |
| "loss": 0.5851, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.33811475409836067, | |
| "grad_norm": 0.4117409884929657, | |
| "learning_rate": 1.4086387791252217e-05, | |
| "loss": 0.4103, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.3393954918032787, | |
| "grad_norm": 34.275489807128906, | |
| "learning_rate": 1.405913612208748e-05, | |
| "loss": 0.5521, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.3406762295081967, | |
| "grad_norm": 0.12396706640720367, | |
| "learning_rate": 1.4031884452922742e-05, | |
| "loss": 0.1471, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.34195696721311475, | |
| "grad_norm": 26.100513458251953, | |
| "learning_rate": 1.4004632783758006e-05, | |
| "loss": 0.6545, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.3432377049180328, | |
| "grad_norm": 1.4054203033447266, | |
| "learning_rate": 1.3977381114593269e-05, | |
| "loss": 0.3345, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.34451844262295084, | |
| "grad_norm": 18.780344009399414, | |
| "learning_rate": 1.3950129445428533e-05, | |
| "loss": 0.5751, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.3457991803278688, | |
| "grad_norm": 2.4345474243164062, | |
| "learning_rate": 1.3922877776263797e-05, | |
| "loss": 0.518, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.34707991803278687, | |
| "grad_norm": 153.76368713378906, | |
| "learning_rate": 1.389562610709906e-05, | |
| "loss": 0.9605, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.3483606557377049, | |
| "grad_norm": 23.214303970336914, | |
| "learning_rate": 1.3868374437934324e-05, | |
| "loss": 0.5311, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.34964139344262296, | |
| "grad_norm": 3.1090455055236816, | |
| "learning_rate": 1.3841122768769589e-05, | |
| "loss": 0.7492, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.350922131147541, | |
| "grad_norm": 18.95741081237793, | |
| "learning_rate": 1.3813871099604851e-05, | |
| "loss": 0.8132, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.352202868852459, | |
| "grad_norm": 35.78852081298828, | |
| "learning_rate": 1.3786619430440116e-05, | |
| "loss": 0.4757, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.35348360655737704, | |
| "grad_norm": 0.2885892391204834, | |
| "learning_rate": 1.375936776127538e-05, | |
| "loss": 0.4375, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.3547643442622951, | |
| "grad_norm": 32.26221466064453, | |
| "learning_rate": 1.3732116092110643e-05, | |
| "loss": 0.7023, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.35604508196721313, | |
| "grad_norm": 23.65122413635254, | |
| "learning_rate": 1.3704864422945907e-05, | |
| "loss": 0.1926, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.3573258196721312, | |
| "grad_norm": 22.145179748535156, | |
| "learning_rate": 1.3677612753781171e-05, | |
| "loss": 0.7968, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.35860655737704916, | |
| "grad_norm": 15.272971153259277, | |
| "learning_rate": 1.3650361084616435e-05, | |
| "loss": 0.4302, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3598872950819672, | |
| "grad_norm": 69.59125518798828, | |
| "learning_rate": 1.3623109415451698e-05, | |
| "loss": 0.5592, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.36116803278688525, | |
| "grad_norm": 0.19557702541351318, | |
| "learning_rate": 1.359585774628696e-05, | |
| "loss": 0.5795, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.3624487704918033, | |
| "grad_norm": 28.615272521972656, | |
| "learning_rate": 1.3568606077122223e-05, | |
| "loss": 0.517, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.3637295081967213, | |
| "grad_norm": 4.395263195037842, | |
| "learning_rate": 1.3541354407957488e-05, | |
| "loss": 0.6146, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.36501024590163933, | |
| "grad_norm": 0.7227006554603577, | |
| "learning_rate": 1.3514102738792752e-05, | |
| "loss": 0.5504, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3662909836065574, | |
| "grad_norm": 15.734036445617676, | |
| "learning_rate": 1.3486851069628015e-05, | |
| "loss": 0.59, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3675717213114754, | |
| "grad_norm": 1.6639937162399292, | |
| "learning_rate": 1.3459599400463279e-05, | |
| "loss": 0.5332, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.36885245901639346, | |
| "grad_norm": 0.7330634593963623, | |
| "learning_rate": 1.3432347731298543e-05, | |
| "loss": 0.4509, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.37013319672131145, | |
| "grad_norm": 42.200531005859375, | |
| "learning_rate": 1.3405096062133806e-05, | |
| "loss": 0.5252, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.3714139344262295, | |
| "grad_norm": 65.34646606445312, | |
| "learning_rate": 1.337784439296907e-05, | |
| "loss": 0.4143, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.37269467213114754, | |
| "grad_norm": 0.17863045632839203, | |
| "learning_rate": 1.3350592723804335e-05, | |
| "loss": 0.4167, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.3739754098360656, | |
| "grad_norm": 28.605680465698242, | |
| "learning_rate": 1.3323341054639599e-05, | |
| "loss": 0.6769, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.37525614754098363, | |
| "grad_norm": 0.0853688195347786, | |
| "learning_rate": 1.3296089385474861e-05, | |
| "loss": 0.5126, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.3765368852459016, | |
| "grad_norm": 63.26204299926758, | |
| "learning_rate": 1.3268837716310126e-05, | |
| "loss": 1.1696, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.37781762295081966, | |
| "grad_norm": 45.06633377075195, | |
| "learning_rate": 1.324158604714539e-05, | |
| "loss": 0.6522, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3790983606557377, | |
| "grad_norm": 36.450233459472656, | |
| "learning_rate": 1.3214334377980653e-05, | |
| "loss": 0.91, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.38037909836065575, | |
| "grad_norm": 58.59020233154297, | |
| "learning_rate": 1.3187082708815917e-05, | |
| "loss": 0.5549, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.38165983606557374, | |
| "grad_norm": 13.287269592285156, | |
| "learning_rate": 1.3159831039651181e-05, | |
| "loss": 0.4198, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.3829405737704918, | |
| "grad_norm": 20.24810218811035, | |
| "learning_rate": 1.3132579370486442e-05, | |
| "loss": 0.6179, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.38422131147540983, | |
| "grad_norm": 18.099557876586914, | |
| "learning_rate": 1.3105327701321707e-05, | |
| "loss": 0.8484, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3855020491803279, | |
| "grad_norm": 41.92770004272461, | |
| "learning_rate": 1.307807603215697e-05, | |
| "loss": 0.929, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.3867827868852459, | |
| "grad_norm": 11.101128578186035, | |
| "learning_rate": 1.3050824362992234e-05, | |
| "loss": 0.6886, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.3880635245901639, | |
| "grad_norm": 0.5516038537025452, | |
| "learning_rate": 1.3023572693827498e-05, | |
| "loss": 0.232, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.38934426229508196, | |
| "grad_norm": 19.20160675048828, | |
| "learning_rate": 1.299632102466276e-05, | |
| "loss": 0.632, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.390625, | |
| "grad_norm": 89.39508056640625, | |
| "learning_rate": 1.2969069355498025e-05, | |
| "loss": 0.5, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.39190573770491804, | |
| "grad_norm": 3.156262159347534, | |
| "learning_rate": 1.294181768633329e-05, | |
| "loss": 0.5471, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.3931864754098361, | |
| "grad_norm": 1.8074102401733398, | |
| "learning_rate": 1.2914566017168553e-05, | |
| "loss": 0.4993, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.3944672131147541, | |
| "grad_norm": 10.57691764831543, | |
| "learning_rate": 1.2887314348003816e-05, | |
| "loss": 0.3536, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.3957479508196721, | |
| "grad_norm": 31.425968170166016, | |
| "learning_rate": 1.286006267883908e-05, | |
| "loss": 0.4929, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.39702868852459017, | |
| "grad_norm": 1.107421636581421, | |
| "learning_rate": 1.2832811009674345e-05, | |
| "loss": 0.5302, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3983094262295082, | |
| "grad_norm": 31.851308822631836, | |
| "learning_rate": 1.2805559340509607e-05, | |
| "loss": 0.6524, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.39959016393442626, | |
| "grad_norm": 33.0150146484375, | |
| "learning_rate": 1.2778307671344872e-05, | |
| "loss": 0.5296, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.40087090163934425, | |
| "grad_norm": 60.5539665222168, | |
| "learning_rate": 1.2751056002180136e-05, | |
| "loss": 0.7329, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.4021516393442623, | |
| "grad_norm": 26.929574966430664, | |
| "learning_rate": 1.2723804333015399e-05, | |
| "loss": 0.5035, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.40343237704918034, | |
| "grad_norm": 28.021299362182617, | |
| "learning_rate": 1.2696552663850661e-05, | |
| "loss": 0.603, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4047131147540984, | |
| "grad_norm": 59.49539566040039, | |
| "learning_rate": 1.2669300994685924e-05, | |
| "loss": 0.5007, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.40599385245901637, | |
| "grad_norm": 31.815570831298828, | |
| "learning_rate": 1.2642049325521188e-05, | |
| "loss": 0.4406, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.4072745901639344, | |
| "grad_norm": 60.27109146118164, | |
| "learning_rate": 1.2614797656356453e-05, | |
| "loss": 0.5205, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.40855532786885246, | |
| "grad_norm": 3.3493058681488037, | |
| "learning_rate": 1.2587545987191717e-05, | |
| "loss": 0.6267, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.4098360655737705, | |
| "grad_norm": 23.72585678100586, | |
| "learning_rate": 1.256029431802698e-05, | |
| "loss": 0.6263, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.41111680327868855, | |
| "grad_norm": 24.219833374023438, | |
| "learning_rate": 1.2533042648862244e-05, | |
| "loss": 0.4589, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.41239754098360654, | |
| "grad_norm": 0.2840415835380554, | |
| "learning_rate": 1.2505790979697508e-05, | |
| "loss": 0.3652, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.4136782786885246, | |
| "grad_norm": 17.429651260375977, | |
| "learning_rate": 1.247853931053277e-05, | |
| "loss": 0.7563, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.4149590163934426, | |
| "grad_norm": 0.5108852386474609, | |
| "learning_rate": 1.2451287641368035e-05, | |
| "loss": 0.3132, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.41623975409836067, | |
| "grad_norm": 50.98451614379883, | |
| "learning_rate": 1.24240359722033e-05, | |
| "loss": 0.5077, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.4175204918032787, | |
| "grad_norm": 1.3974177837371826, | |
| "learning_rate": 1.2396784303038562e-05, | |
| "loss": 0.4276, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.4188012295081967, | |
| "grad_norm": 12.84176254272461, | |
| "learning_rate": 1.2369532633873826e-05, | |
| "loss": 0.5647, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.42008196721311475, | |
| "grad_norm": 21.05103302001953, | |
| "learning_rate": 1.234228096470909e-05, | |
| "loss": 0.1421, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.4213627049180328, | |
| "grad_norm": 0.3647187352180481, | |
| "learning_rate": 1.2315029295544353e-05, | |
| "loss": 0.6179, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.42264344262295084, | |
| "grad_norm": 90.5313720703125, | |
| "learning_rate": 1.2287777626379618e-05, | |
| "loss": 0.8233, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4239241803278688, | |
| "grad_norm": 19.75844955444336, | |
| "learning_rate": 1.226052595721488e-05, | |
| "loss": 0.5078, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.42520491803278687, | |
| "grad_norm": 0.42248353362083435, | |
| "learning_rate": 1.2233274288050143e-05, | |
| "loss": 0.3436, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.4264856557377049, | |
| "grad_norm": 59.313232421875, | |
| "learning_rate": 1.2206022618885407e-05, | |
| "loss": 0.5244, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.42776639344262296, | |
| "grad_norm": 14.109567642211914, | |
| "learning_rate": 1.2178770949720671e-05, | |
| "loss": 0.7947, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.429047131147541, | |
| "grad_norm": 16.229310989379883, | |
| "learning_rate": 1.2151519280555934e-05, | |
| "loss": 0.5386, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.430327868852459, | |
| "grad_norm": 25.23029136657715, | |
| "learning_rate": 1.2124267611391198e-05, | |
| "loss": 0.5719, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.43160860655737704, | |
| "grad_norm": 1.4985939264297485, | |
| "learning_rate": 1.2097015942226463e-05, | |
| "loss": 0.3424, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.4328893442622951, | |
| "grad_norm": 24.808349609375, | |
| "learning_rate": 1.2069764273061725e-05, | |
| "loss": 0.8804, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.43417008196721313, | |
| "grad_norm": 30.150056838989258, | |
| "learning_rate": 1.204251260389699e-05, | |
| "loss": 0.3999, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.4354508196721312, | |
| "grad_norm": 59.782325744628906, | |
| "learning_rate": 1.2015260934732254e-05, | |
| "loss": 0.4612, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.43673155737704916, | |
| "grad_norm": 55.766117095947266, | |
| "learning_rate": 1.1988009265567517e-05, | |
| "loss": 0.3971, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.4380122950819672, | |
| "grad_norm": 69.8100814819336, | |
| "learning_rate": 1.1960757596402781e-05, | |
| "loss": 0.5867, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.43929303278688525, | |
| "grad_norm": 24.89929962158203, | |
| "learning_rate": 1.1933505927238045e-05, | |
| "loss": 0.793, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.4405737704918033, | |
| "grad_norm": 21.96668243408203, | |
| "learning_rate": 1.1906254258073308e-05, | |
| "loss": 0.8675, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.4418545081967213, | |
| "grad_norm": 59.37974548339844, | |
| "learning_rate": 1.1879002588908572e-05, | |
| "loss": 0.5162, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.44313524590163933, | |
| "grad_norm": 0.49646639823913574, | |
| "learning_rate": 1.1851750919743837e-05, | |
| "loss": 0.6127, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.4444159836065574, | |
| "grad_norm": 8.308236122131348, | |
| "learning_rate": 1.1824499250579097e-05, | |
| "loss": 0.6185, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.4456967213114754, | |
| "grad_norm": 2.5998694896698, | |
| "learning_rate": 1.1797247581414362e-05, | |
| "loss": 0.5386, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.44697745901639346, | |
| "grad_norm": 39.297706604003906, | |
| "learning_rate": 1.1769995912249626e-05, | |
| "loss": 0.7987, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.44825819672131145, | |
| "grad_norm": 7.121617794036865, | |
| "learning_rate": 1.1742744243084889e-05, | |
| "loss": 0.8963, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4495389344262295, | |
| "grad_norm": 1.0637052059173584, | |
| "learning_rate": 1.1715492573920153e-05, | |
| "loss": 0.3344, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.45081967213114754, | |
| "grad_norm": 65.03225708007812, | |
| "learning_rate": 1.1688240904755417e-05, | |
| "loss": 0.6484, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.4521004098360656, | |
| "grad_norm": 0.4046671986579895, | |
| "learning_rate": 1.166098923559068e-05, | |
| "loss": 0.2954, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.45338114754098363, | |
| "grad_norm": 10.253545761108398, | |
| "learning_rate": 1.1633737566425944e-05, | |
| "loss": 0.1934, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.4546618852459016, | |
| "grad_norm": 99.9068832397461, | |
| "learning_rate": 1.1606485897261209e-05, | |
| "loss": 0.7702, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.45594262295081966, | |
| "grad_norm": 58.01685333251953, | |
| "learning_rate": 1.1579234228096471e-05, | |
| "loss": 0.5098, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.4572233606557377, | |
| "grad_norm": 116.0182876586914, | |
| "learning_rate": 1.1551982558931736e-05, | |
| "loss": 0.7526, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.45850409836065575, | |
| "grad_norm": 0.7602908611297607, | |
| "learning_rate": 1.1524730889767e-05, | |
| "loss": 0.3513, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.45978483606557374, | |
| "grad_norm": 23.507183074951172, | |
| "learning_rate": 1.1497479220602264e-05, | |
| "loss": 0.5627, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.4610655737704918, | |
| "grad_norm": 0.25320929288864136, | |
| "learning_rate": 1.1470227551437527e-05, | |
| "loss": 0.7757, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.46234631147540983, | |
| "grad_norm": 2.4358434677124023, | |
| "learning_rate": 1.1442975882272791e-05, | |
| "loss": 0.5189, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.4636270491803279, | |
| "grad_norm": 3.7247753143310547, | |
| "learning_rate": 1.1415724213108055e-05, | |
| "loss": 0.5093, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.4649077868852459, | |
| "grad_norm": 39.57719421386719, | |
| "learning_rate": 1.1388472543943316e-05, | |
| "loss": 0.7375, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.4661885245901639, | |
| "grad_norm": 68.47445678710938, | |
| "learning_rate": 1.136122087477858e-05, | |
| "loss": 0.5694, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.46746926229508196, | |
| "grad_norm": 18.36240577697754, | |
| "learning_rate": 1.1333969205613843e-05, | |
| "loss": 0.668, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.46875, | |
| "grad_norm": 38.88651657104492, | |
| "learning_rate": 1.1306717536449108e-05, | |
| "loss": 0.6662, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.47003073770491804, | |
| "grad_norm": 22.401813507080078, | |
| "learning_rate": 1.1279465867284372e-05, | |
| "loss": 0.7211, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.4713114754098361, | |
| "grad_norm": 0.3502928912639618, | |
| "learning_rate": 1.1252214198119635e-05, | |
| "loss": 0.4916, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.4725922131147541, | |
| "grad_norm": 4.397254467010498, | |
| "learning_rate": 1.1224962528954899e-05, | |
| "loss": 0.7776, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.4738729508196721, | |
| "grad_norm": 3.871940851211548, | |
| "learning_rate": 1.1197710859790163e-05, | |
| "loss": 0.707, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.47515368852459017, | |
| "grad_norm": 33.0516242980957, | |
| "learning_rate": 1.1170459190625428e-05, | |
| "loss": 0.383, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.4764344262295082, | |
| "grad_norm": 26.215961456298828, | |
| "learning_rate": 1.114320752146069e-05, | |
| "loss": 0.4088, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.47771516393442626, | |
| "grad_norm": 32.82633972167969, | |
| "learning_rate": 1.1115955852295954e-05, | |
| "loss": 0.4577, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.47899590163934425, | |
| "grad_norm": 186.45492553710938, | |
| "learning_rate": 1.1088704183131219e-05, | |
| "loss": 0.4099, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.4802766393442623, | |
| "grad_norm": 129.46585083007812, | |
| "learning_rate": 1.1061452513966481e-05, | |
| "loss": 0.6375, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.48155737704918034, | |
| "grad_norm": 0.7614141702651978, | |
| "learning_rate": 1.1034200844801746e-05, | |
| "loss": 0.5316, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.4828381147540984, | |
| "grad_norm": 34.36369323730469, | |
| "learning_rate": 1.100694917563701e-05, | |
| "loss": 0.7375, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.48411885245901637, | |
| "grad_norm": 0.140080064535141, | |
| "learning_rate": 1.0979697506472273e-05, | |
| "loss": 0.6295, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4853995901639344, | |
| "grad_norm": 7.306354999542236, | |
| "learning_rate": 1.0952445837307537e-05, | |
| "loss": 0.9806, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.48668032786885246, | |
| "grad_norm": 85.7445068359375, | |
| "learning_rate": 1.0925194168142798e-05, | |
| "loss": 0.7028, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4879610655737705, | |
| "grad_norm": 1.7156010866165161, | |
| "learning_rate": 1.0897942498978062e-05, | |
| "loss": 0.3287, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.48924180327868855, | |
| "grad_norm": 4.566237926483154, | |
| "learning_rate": 1.0870690829813327e-05, | |
| "loss": 0.4033, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.49052254098360654, | |
| "grad_norm": 46.89541244506836, | |
| "learning_rate": 1.084343916064859e-05, | |
| "loss": 0.823, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.4918032786885246, | |
| "grad_norm": 12.144411087036133, | |
| "learning_rate": 1.0816187491483854e-05, | |
| "loss": 0.5362, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.4930840163934426, | |
| "grad_norm": 18.448686599731445, | |
| "learning_rate": 1.0788935822319118e-05, | |
| "loss": 0.4256, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.49436475409836067, | |
| "grad_norm": 0.24063384532928467, | |
| "learning_rate": 1.0761684153154382e-05, | |
| "loss": 0.2343, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.4956454918032787, | |
| "grad_norm": 65.46757507324219, | |
| "learning_rate": 1.0734432483989645e-05, | |
| "loss": 0.5689, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.4969262295081967, | |
| "grad_norm": 11.055042266845703, | |
| "learning_rate": 1.0707180814824909e-05, | |
| "loss": 0.6077, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.49820696721311475, | |
| "grad_norm": 0.7104390263557434, | |
| "learning_rate": 1.0679929145660173e-05, | |
| "loss": 0.4516, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.4994877049180328, | |
| "grad_norm": 33.67184066772461, | |
| "learning_rate": 1.0652677476495436e-05, | |
| "loss": 0.895, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.5007684426229508, | |
| "grad_norm": 2.971726417541504, | |
| "learning_rate": 1.06254258073307e-05, | |
| "loss": 0.6966, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.5020491803278688, | |
| "grad_norm": 0.6927921772003174, | |
| "learning_rate": 1.0598174138165965e-05, | |
| "loss": 0.6475, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.5033299180327869, | |
| "grad_norm": 18.608713150024414, | |
| "learning_rate": 1.0570922469001227e-05, | |
| "loss": 0.6699, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.5046106557377049, | |
| "grad_norm": 4.135254859924316, | |
| "learning_rate": 1.0543670799836492e-05, | |
| "loss": 0.4654, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.5058913934426229, | |
| "grad_norm": 18.821929931640625, | |
| "learning_rate": 1.0516419130671756e-05, | |
| "loss": 0.8085, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.507172131147541, | |
| "grad_norm": 45.03554916381836, | |
| "learning_rate": 1.0489167461507017e-05, | |
| "loss": 0.4745, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.508452868852459, | |
| "grad_norm": 170.6229705810547, | |
| "learning_rate": 1.0461915792342281e-05, | |
| "loss": 0.6601, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.5097336065573771, | |
| "grad_norm": 23.49982261657715, | |
| "learning_rate": 1.0434664123177546e-05, | |
| "loss": 0.379, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.5110143442622951, | |
| "grad_norm": 2.7527880668640137, | |
| "learning_rate": 1.0407412454012808e-05, | |
| "loss": 0.427, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.5122950819672131, | |
| "grad_norm": 0.854061484336853, | |
| "learning_rate": 1.0380160784848072e-05, | |
| "loss": 0.8099, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5135758196721312, | |
| "grad_norm": 77.43912506103516, | |
| "learning_rate": 1.0352909115683337e-05, | |
| "loss": 0.2461, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.5148565573770492, | |
| "grad_norm": 0.2251901924610138, | |
| "learning_rate": 1.03256574465186e-05, | |
| "loss": 0.6852, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.5161372950819673, | |
| "grad_norm": 0.30500558018684387, | |
| "learning_rate": 1.0298405777353864e-05, | |
| "loss": 0.2133, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.5174180327868853, | |
| "grad_norm": 258.4718017578125, | |
| "learning_rate": 1.0271154108189128e-05, | |
| "loss": 0.6098, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.5186987704918032, | |
| "grad_norm": 38.535884857177734, | |
| "learning_rate": 1.024390243902439e-05, | |
| "loss": 1.0855, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5199795081967213, | |
| "grad_norm": 13.258109092712402, | |
| "learning_rate": 1.0216650769859655e-05, | |
| "loss": 0.8256, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.5212602459016393, | |
| "grad_norm": 45.26698684692383, | |
| "learning_rate": 1.018939910069492e-05, | |
| "loss": 0.579, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.5225409836065574, | |
| "grad_norm": 9.766562461853027, | |
| "learning_rate": 1.0162147431530182e-05, | |
| "loss": 0.3964, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.5238217213114754, | |
| "grad_norm": 12.843767166137695, | |
| "learning_rate": 1.0134895762365446e-05, | |
| "loss": 0.5889, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.5251024590163934, | |
| "grad_norm": 10.034939765930176, | |
| "learning_rate": 1.010764409320071e-05, | |
| "loss": 0.5689, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5263831967213115, | |
| "grad_norm": 18.635753631591797, | |
| "learning_rate": 1.0080392424035975e-05, | |
| "loss": 0.3298, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.5276639344262295, | |
| "grad_norm": 6.539854049682617, | |
| "learning_rate": 1.0053140754871236e-05, | |
| "loss": 0.8252, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.5289446721311475, | |
| "grad_norm": 19.9680118560791, | |
| "learning_rate": 1.00258890857065e-05, | |
| "loss": 0.5432, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.5302254098360656, | |
| "grad_norm": 38.84269714355469, | |
| "learning_rate": 9.998637416541764e-06, | |
| "loss": 1.0371, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.5315061475409836, | |
| "grad_norm": 8.018956184387207, | |
| "learning_rate": 9.971385747377029e-06, | |
| "loss": 1.1152, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.5327868852459017, | |
| "grad_norm": 1.0766541957855225, | |
| "learning_rate": 9.944134078212291e-06, | |
| "loss": 0.5306, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.5340676229508197, | |
| "grad_norm": 0.5119646787643433, | |
| "learning_rate": 9.916882409047554e-06, | |
| "loss": 0.4113, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.5353483606557377, | |
| "grad_norm": 60.749359130859375, | |
| "learning_rate": 9.889630739882818e-06, | |
| "loss": 0.4195, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.5366290983606558, | |
| "grad_norm": 65.9601058959961, | |
| "learning_rate": 9.862379070718083e-06, | |
| "loss": 0.3612, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.5379098360655737, | |
| "grad_norm": 10.21090316772461, | |
| "learning_rate": 9.835127401553345e-06, | |
| "loss": 0.6783, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5391905737704918, | |
| "grad_norm": 1.9999886751174927, | |
| "learning_rate": 9.80787573238861e-06, | |
| "loss": 0.1912, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.5404713114754098, | |
| "grad_norm": 1.5724451541900635, | |
| "learning_rate": 9.780624063223874e-06, | |
| "loss": 0.5334, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.5417520491803278, | |
| "grad_norm": 64.44462585449219, | |
| "learning_rate": 9.753372394059137e-06, | |
| "loss": 0.6993, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.5430327868852459, | |
| "grad_norm": 61.30992126464844, | |
| "learning_rate": 9.726120724894401e-06, | |
| "loss": 0.4775, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.5443135245901639, | |
| "grad_norm": 0.6172360777854919, | |
| "learning_rate": 9.698869055729663e-06, | |
| "loss": 0.829, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.545594262295082, | |
| "grad_norm": 73.66020202636719, | |
| "learning_rate": 9.671617386564928e-06, | |
| "loss": 0.6753, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.546875, | |
| "grad_norm": 14.051901817321777, | |
| "learning_rate": 9.644365717400192e-06, | |
| "loss": 0.9101, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.548155737704918, | |
| "grad_norm": 8.695210456848145, | |
| "learning_rate": 9.617114048235455e-06, | |
| "loss": 0.3771, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.5494364754098361, | |
| "grad_norm": 0.41656801104545593, | |
| "learning_rate": 9.589862379070719e-06, | |
| "loss": 0.498, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.5507172131147541, | |
| "grad_norm": 0.6697580814361572, | |
| "learning_rate": 9.562610709905983e-06, | |
| "loss": 0.6485, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5519979508196722, | |
| "grad_norm": 7.877650260925293, | |
| "learning_rate": 9.535359040741246e-06, | |
| "loss": 0.6239, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.5532786885245902, | |
| "grad_norm": 7.576630115509033, | |
| "learning_rate": 9.508107371576509e-06, | |
| "loss": 0.9483, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5545594262295082, | |
| "grad_norm": 21.719369888305664, | |
| "learning_rate": 9.480855702411773e-06, | |
| "loss": 0.4436, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.5558401639344263, | |
| "grad_norm": 21.08763885498047, | |
| "learning_rate": 9.453604033247037e-06, | |
| "loss": 0.4597, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.5571209016393442, | |
| "grad_norm": 18.030412673950195, | |
| "learning_rate": 9.4263523640823e-06, | |
| "loss": 0.77, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.5584016393442623, | |
| "grad_norm": 18.394670486450195, | |
| "learning_rate": 9.399100694917564e-06, | |
| "loss": 0.2567, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.5596823770491803, | |
| "grad_norm": 9.325862884521484, | |
| "learning_rate": 9.371849025752829e-06, | |
| "loss": 0.514, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.5609631147540983, | |
| "grad_norm": 0.574291467666626, | |
| "learning_rate": 9.344597356588093e-06, | |
| "loss": 0.4198, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.5622438524590164, | |
| "grad_norm": 0.7731497883796692, | |
| "learning_rate": 9.317345687423355e-06, | |
| "loss": 0.5641, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.5635245901639344, | |
| "grad_norm": 10.017977714538574, | |
| "learning_rate": 9.290094018258618e-06, | |
| "loss": 0.4151, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5648053278688525, | |
| "grad_norm": 9.897109031677246, | |
| "learning_rate": 9.262842349093882e-06, | |
| "loss": 0.5924, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.5660860655737705, | |
| "grad_norm": 0.5375286936759949, | |
| "learning_rate": 9.235590679929147e-06, | |
| "loss": 0.2296, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.5673668032786885, | |
| "grad_norm": 15.379401206970215, | |
| "learning_rate": 9.20833901076441e-06, | |
| "loss": 0.3977, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.5686475409836066, | |
| "grad_norm": 33.24900436401367, | |
| "learning_rate": 9.181087341599674e-06, | |
| "loss": 0.6875, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.5699282786885246, | |
| "grad_norm": 17.754283905029297, | |
| "learning_rate": 9.153835672434938e-06, | |
| "loss": 0.7589, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.5712090163934426, | |
| "grad_norm": 9.958178520202637, | |
| "learning_rate": 9.1265840032702e-06, | |
| "loss": 1.0191, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.5724897540983607, | |
| "grad_norm": 0.880713701248169, | |
| "learning_rate": 9.099332334105465e-06, | |
| "loss": 0.3109, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.5737704918032787, | |
| "grad_norm": 19.9377498626709, | |
| "learning_rate": 9.072080664940728e-06, | |
| "loss": 1.0805, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.5750512295081968, | |
| "grad_norm": 45.10675811767578, | |
| "learning_rate": 9.044828995775992e-06, | |
| "loss": 0.485, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.5763319672131147, | |
| "grad_norm": 2.320873498916626, | |
| "learning_rate": 9.017577326611256e-06, | |
| "loss": 0.7725, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5776127049180327, | |
| "grad_norm": 8.428739547729492, | |
| "learning_rate": 8.990325657446519e-06, | |
| "loss": 0.9644, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.5788934426229508, | |
| "grad_norm": 31.189594268798828, | |
| "learning_rate": 8.963073988281783e-06, | |
| "loss": 1.0522, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.5801741803278688, | |
| "grad_norm": 3.0397989749908447, | |
| "learning_rate": 8.935822319117047e-06, | |
| "loss": 0.7336, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.5814549180327869, | |
| "grad_norm": 28.37086296081543, | |
| "learning_rate": 8.90857064995231e-06, | |
| "loss": 1.0115, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.5827356557377049, | |
| "grad_norm": 24.746339797973633, | |
| "learning_rate": 8.881318980787574e-06, | |
| "loss": 0.6436, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5840163934426229, | |
| "grad_norm": 41.54196548461914, | |
| "learning_rate": 8.854067311622839e-06, | |
| "loss": 0.7121, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.585297131147541, | |
| "grad_norm": 45.90923309326172, | |
| "learning_rate": 8.826815642458101e-06, | |
| "loss": 0.8686, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.586577868852459, | |
| "grad_norm": 16.441612243652344, | |
| "learning_rate": 8.799563973293364e-06, | |
| "loss": 0.8231, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.5878586065573771, | |
| "grad_norm": 16.66089630126953, | |
| "learning_rate": 8.772312304128628e-06, | |
| "loss": 0.5949, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.5891393442622951, | |
| "grad_norm": 23.114477157592773, | |
| "learning_rate": 8.745060634963893e-06, | |
| "loss": 0.8395, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5904200819672131, | |
| "grad_norm": 22.976099014282227, | |
| "learning_rate": 8.717808965799155e-06, | |
| "loss": 0.8844, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.5917008196721312, | |
| "grad_norm": 12.82754898071289, | |
| "learning_rate": 8.69055729663442e-06, | |
| "loss": 0.8147, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.5929815573770492, | |
| "grad_norm": 43.78225326538086, | |
| "learning_rate": 8.663305627469684e-06, | |
| "loss": 0.7544, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.5942622950819673, | |
| "grad_norm": 19.483823776245117, | |
| "learning_rate": 8.636053958304948e-06, | |
| "loss": 0.6818, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5955430327868853, | |
| "grad_norm": 8.231918334960938, | |
| "learning_rate": 8.60880228914021e-06, | |
| "loss": 0.4572, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5968237704918032, | |
| "grad_norm": 8.501511573791504, | |
| "learning_rate": 8.581550619975473e-06, | |
| "loss": 0.7072, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.5981045081967213, | |
| "grad_norm": 28.3646297454834, | |
| "learning_rate": 8.554298950810738e-06, | |
| "loss": 0.7487, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.5993852459016393, | |
| "grad_norm": 10.618340492248535, | |
| "learning_rate": 8.527047281646002e-06, | |
| "loss": 0.4582, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.6006659836065574, | |
| "grad_norm": 48.34426498413086, | |
| "learning_rate": 8.499795612481265e-06, | |
| "loss": 0.743, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.6019467213114754, | |
| "grad_norm": 19.851808547973633, | |
| "learning_rate": 8.472543943316529e-06, | |
| "loss": 1.0467, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.6032274590163934, | |
| "grad_norm": 14.339262962341309, | |
| "learning_rate": 8.445292274151793e-06, | |
| "loss": 0.7851, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.6045081967213115, | |
| "grad_norm": 28.62281608581543, | |
| "learning_rate": 8.418040604987056e-06, | |
| "loss": 0.7838, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.6057889344262295, | |
| "grad_norm": 19.882169723510742, | |
| "learning_rate": 8.390788935822319e-06, | |
| "loss": 0.8792, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.6070696721311475, | |
| "grad_norm": 6.609494686126709, | |
| "learning_rate": 8.363537266657583e-06, | |
| "loss": 0.4227, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.6083504098360656, | |
| "grad_norm": 15.172801971435547, | |
| "learning_rate": 8.336285597492847e-06, | |
| "loss": 0.8444, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.6096311475409836, | |
| "grad_norm": 46.852413177490234, | |
| "learning_rate": 8.309033928328112e-06, | |
| "loss": 0.5798, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.6109118852459017, | |
| "grad_norm": 41.42491912841797, | |
| "learning_rate": 8.281782259163374e-06, | |
| "loss": 0.8357, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.6121926229508197, | |
| "grad_norm": 19.07272720336914, | |
| "learning_rate": 8.254530589998639e-06, | |
| "loss": 0.7716, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.6134733606557377, | |
| "grad_norm": 6.932359218597412, | |
| "learning_rate": 8.227278920833903e-06, | |
| "loss": 1.1022, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.6147540983606558, | |
| "grad_norm": 39.23098373413086, | |
| "learning_rate": 8.200027251669165e-06, | |
| "loss": 0.7454, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6160348360655737, | |
| "grad_norm": 25.000465393066406, | |
| "learning_rate": 8.172775582504428e-06, | |
| "loss": 0.6045, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.6173155737704918, | |
| "grad_norm": 16.970958709716797, | |
| "learning_rate": 8.145523913339692e-06, | |
| "loss": 0.8267, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.6185963114754098, | |
| "grad_norm": 15.70919132232666, | |
| "learning_rate": 8.118272244174957e-06, | |
| "loss": 0.6148, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.6198770491803278, | |
| "grad_norm": 25.68458366394043, | |
| "learning_rate": 8.09102057501022e-06, | |
| "loss": 0.6463, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.6211577868852459, | |
| "grad_norm": 13.340360641479492, | |
| "learning_rate": 8.063768905845484e-06, | |
| "loss": 0.645, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.6224385245901639, | |
| "grad_norm": 124.80747985839844, | |
| "learning_rate": 8.036517236680748e-06, | |
| "loss": 0.5991, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.623719262295082, | |
| "grad_norm": 20.25383186340332, | |
| "learning_rate": 8.00926556751601e-06, | |
| "loss": 0.5449, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 19.14507484436035, | |
| "learning_rate": 7.982013898351275e-06, | |
| "loss": 0.8269, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.626280737704918, | |
| "grad_norm": 15.882426261901855, | |
| "learning_rate": 7.954762229186538e-06, | |
| "loss": 0.8284, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.6275614754098361, | |
| "grad_norm": 22.384090423583984, | |
| "learning_rate": 7.927510560021802e-06, | |
| "loss": 0.6205, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6288422131147541, | |
| "grad_norm": 32.309017181396484, | |
| "learning_rate": 7.900258890857066e-06, | |
| "loss": 1.0006, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.6301229508196722, | |
| "grad_norm": 0.9309699535369873, | |
| "learning_rate": 7.873007221692329e-06, | |
| "loss": 0.8793, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.6314036885245902, | |
| "grad_norm": 18.254060745239258, | |
| "learning_rate": 7.845755552527593e-06, | |
| "loss": 0.5832, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.6326844262295082, | |
| "grad_norm": 1.1032278537750244, | |
| "learning_rate": 7.818503883362857e-06, | |
| "loss": 0.6085, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.6339651639344263, | |
| "grad_norm": 4.0901360511779785, | |
| "learning_rate": 7.79125221419812e-06, | |
| "loss": 0.7917, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.6352459016393442, | |
| "grad_norm": 8.3672456741333, | |
| "learning_rate": 7.764000545033384e-06, | |
| "loss": 0.4282, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.6365266393442623, | |
| "grad_norm": 25.113962173461914, | |
| "learning_rate": 7.736748875868647e-06, | |
| "loss": 0.719, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.6378073770491803, | |
| "grad_norm": 16.38678741455078, | |
| "learning_rate": 7.709497206703911e-06, | |
| "loss": 0.8253, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.6390881147540983, | |
| "grad_norm": 38.32978439331055, | |
| "learning_rate": 7.682245537539174e-06, | |
| "loss": 0.7423, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.6403688524590164, | |
| "grad_norm": 36.88998794555664, | |
| "learning_rate": 7.654993868374438e-06, | |
| "loss": 0.6787, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6416495901639344, | |
| "grad_norm": 31.3937931060791, | |
| "learning_rate": 7.627742199209703e-06, | |
| "loss": 0.4184, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.6429303278688525, | |
| "grad_norm": 43.30199432373047, | |
| "learning_rate": 7.600490530044966e-06, | |
| "loss": 0.7261, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.6442110655737705, | |
| "grad_norm": 24.66848373413086, | |
| "learning_rate": 7.5732388608802296e-06, | |
| "loss": 0.782, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.6454918032786885, | |
| "grad_norm": 0.7670093774795532, | |
| "learning_rate": 7.545987191715494e-06, | |
| "loss": 0.7826, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.6467725409836066, | |
| "grad_norm": 28.53043556213379, | |
| "learning_rate": 7.5187355225507565e-06, | |
| "loss": 0.7738, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.6480532786885246, | |
| "grad_norm": 39.68383026123047, | |
| "learning_rate": 7.49148385338602e-06, | |
| "loss": 0.6153, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.6493340163934426, | |
| "grad_norm": 1.5401833057403564, | |
| "learning_rate": 7.464232184221284e-06, | |
| "loss": 0.4508, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.6506147540983607, | |
| "grad_norm": 18.586135864257812, | |
| "learning_rate": 7.436980515056548e-06, | |
| "loss": 0.7714, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.6518954918032787, | |
| "grad_norm": 4.915235996246338, | |
| "learning_rate": 7.409728845891811e-06, | |
| "loss": 0.7213, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.6531762295081968, | |
| "grad_norm": 13.506136894226074, | |
| "learning_rate": 7.3824771767270756e-06, | |
| "loss": 0.8002, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6544569672131147, | |
| "grad_norm": 24.696321487426758, | |
| "learning_rate": 7.355225507562339e-06, | |
| "loss": 0.6098, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 1.202723503112793, | |
| "learning_rate": 7.3279738383976025e-06, | |
| "loss": 0.5947, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.6570184426229508, | |
| "grad_norm": 27.57708168029785, | |
| "learning_rate": 7.300722169232866e-06, | |
| "loss": 0.7283, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.6582991803278688, | |
| "grad_norm": 27.763059616088867, | |
| "learning_rate": 7.2734705000681294e-06, | |
| "loss": 0.5104, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.6595799180327869, | |
| "grad_norm": 22.97685432434082, | |
| "learning_rate": 7.246218830903393e-06, | |
| "loss": 0.7658, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.6608606557377049, | |
| "grad_norm": 20.47222137451172, | |
| "learning_rate": 7.218967161738657e-06, | |
| "loss": 0.3926, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.6621413934426229, | |
| "grad_norm": 34.984249114990234, | |
| "learning_rate": 7.191715492573921e-06, | |
| "loss": 0.9199, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.663422131147541, | |
| "grad_norm": 32.431888580322266, | |
| "learning_rate": 7.164463823409184e-06, | |
| "loss": 0.4709, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.664702868852459, | |
| "grad_norm": 11.426986694335938, | |
| "learning_rate": 7.1372121542444485e-06, | |
| "loss": 0.8031, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.6659836065573771, | |
| "grad_norm": 27.146059036254883, | |
| "learning_rate": 7.109960485079712e-06, | |
| "loss": 0.7132, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6672643442622951, | |
| "grad_norm": 11.636030197143555, | |
| "learning_rate": 7.082708815914975e-06, | |
| "loss": 0.5368, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.6685450819672131, | |
| "grad_norm": 16.758148193359375, | |
| "learning_rate": 7.055457146750239e-06, | |
| "loss": 0.6143, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.6698258196721312, | |
| "grad_norm": 0.33391350507736206, | |
| "learning_rate": 7.028205477585502e-06, | |
| "loss": 0.5793, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.6711065573770492, | |
| "grad_norm": 25.285449981689453, | |
| "learning_rate": 7.000953808420766e-06, | |
| "loss": 0.64, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.6723872950819673, | |
| "grad_norm": 1.447174072265625, | |
| "learning_rate": 6.97370213925603e-06, | |
| "loss": 0.8713, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.6736680327868853, | |
| "grad_norm": 34.83108139038086, | |
| "learning_rate": 6.946450470091294e-06, | |
| "loss": 0.6408, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.6749487704918032, | |
| "grad_norm": 13.1771821975708, | |
| "learning_rate": 6.919198800926558e-06, | |
| "loss": 0.6303, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.6762295081967213, | |
| "grad_norm": 31.439207077026367, | |
| "learning_rate": 6.8919471317618214e-06, | |
| "loss": 0.6238, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.6775102459016393, | |
| "grad_norm": 11.551750183105469, | |
| "learning_rate": 6.864695462597084e-06, | |
| "loss": 0.9247, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.6787909836065574, | |
| "grad_norm": 18.42095947265625, | |
| "learning_rate": 6.8374437934323475e-06, | |
| "loss": 1.0127, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6800717213114754, | |
| "grad_norm": 0.4005849361419678, | |
| "learning_rate": 6.810192124267612e-06, | |
| "loss": 0.675, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.6813524590163934, | |
| "grad_norm": 13.756119728088379, | |
| "learning_rate": 6.782940455102875e-06, | |
| "loss": 0.5458, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.6826331967213115, | |
| "grad_norm": 15.997631072998047, | |
| "learning_rate": 6.75568878593814e-06, | |
| "loss": 0.4342, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.6839139344262295, | |
| "grad_norm": 16.906126022338867, | |
| "learning_rate": 6.728437116773403e-06, | |
| "loss": 0.6665, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.6851946721311475, | |
| "grad_norm": 12.170743942260742, | |
| "learning_rate": 6.701185447608667e-06, | |
| "loss": 0.9264, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.6864754098360656, | |
| "grad_norm": 35.61259841918945, | |
| "learning_rate": 6.673933778443931e-06, | |
| "loss": 0.6735, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.6877561475409836, | |
| "grad_norm": 13.542879104614258, | |
| "learning_rate": 6.646682109279194e-06, | |
| "loss": 0.8259, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.6890368852459017, | |
| "grad_norm": 39.6423225402832, | |
| "learning_rate": 6.619430440114457e-06, | |
| "loss": 0.7956, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.6903176229508197, | |
| "grad_norm": 30.907363891601562, | |
| "learning_rate": 6.592178770949721e-06, | |
| "loss": 0.7366, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.6915983606557377, | |
| "grad_norm": 12.479640007019043, | |
| "learning_rate": 6.564927101784985e-06, | |
| "loss": 0.3273, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.6928790983606558, | |
| "grad_norm": 19.15838623046875, | |
| "learning_rate": 6.537675432620248e-06, | |
| "loss": 0.4575, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.6941598360655737, | |
| "grad_norm": 20.0745792388916, | |
| "learning_rate": 6.510423763455513e-06, | |
| "loss": 0.8541, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.6954405737704918, | |
| "grad_norm": 30.12567901611328, | |
| "learning_rate": 6.483172094290776e-06, | |
| "loss": 0.3144, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.6967213114754098, | |
| "grad_norm": 8.731266975402832, | |
| "learning_rate": 6.4559204251260395e-06, | |
| "loss": 0.4327, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.6980020491803278, | |
| "grad_norm": 1.370941400527954, | |
| "learning_rate": 6.428668755961304e-06, | |
| "loss": 0.3519, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.6992827868852459, | |
| "grad_norm": 28.71232795715332, | |
| "learning_rate": 6.4014170867965665e-06, | |
| "loss": 0.6712, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.7005635245901639, | |
| "grad_norm": 20.623737335205078, | |
| "learning_rate": 6.37416541763183e-06, | |
| "loss": 0.6607, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.701844262295082, | |
| "grad_norm": 7.713385581970215, | |
| "learning_rate": 6.346913748467094e-06, | |
| "loss": 0.5568, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.703125, | |
| "grad_norm": 10.449071884155273, | |
| "learning_rate": 6.319662079302358e-06, | |
| "loss": 0.8054, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.704405737704918, | |
| "grad_norm": 11.34548568725586, | |
| "learning_rate": 6.292410410137621e-06, | |
| "loss": 0.8166, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7056864754098361, | |
| "grad_norm": 2.661618947982788, | |
| "learning_rate": 6.2651587409728855e-06, | |
| "loss": 0.6567, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.7069672131147541, | |
| "grad_norm": 4.278378486633301, | |
| "learning_rate": 6.237907071808149e-06, | |
| "loss": 0.7271, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.7082479508196722, | |
| "grad_norm": 56.11579513549805, | |
| "learning_rate": 6.210655402643413e-06, | |
| "loss": 0.8394, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.7095286885245902, | |
| "grad_norm": 25.923078536987305, | |
| "learning_rate": 6.183403733478676e-06, | |
| "loss": 0.7055, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.7108094262295082, | |
| "grad_norm": 7.200451850891113, | |
| "learning_rate": 6.156152064313939e-06, | |
| "loss": 0.6715, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.7120901639344263, | |
| "grad_norm": 25.070093154907227, | |
| "learning_rate": 6.128900395149203e-06, | |
| "loss": 0.6701, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.7133709016393442, | |
| "grad_norm": 0.7995045781135559, | |
| "learning_rate": 6.101648725984467e-06, | |
| "loss": 0.7706, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.7146516393442623, | |
| "grad_norm": 14.150104522705078, | |
| "learning_rate": 6.074397056819731e-06, | |
| "loss": 0.8404, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.7159323770491803, | |
| "grad_norm": 21.669960021972656, | |
| "learning_rate": 6.047145387654995e-06, | |
| "loss": 0.5122, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.7172131147540983, | |
| "grad_norm": 10.61308765411377, | |
| "learning_rate": 6.0198937184902585e-06, | |
| "loss": 0.7182, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7184938524590164, | |
| "grad_norm": 12.267438888549805, | |
| "learning_rate": 5.992642049325522e-06, | |
| "loss": 0.7212, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.7197745901639344, | |
| "grad_norm": 12.50552749633789, | |
| "learning_rate": 5.9653903801607846e-06, | |
| "loss": 0.6373, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.7210553278688525, | |
| "grad_norm": 3.3687191009521484, | |
| "learning_rate": 5.938138710996049e-06, | |
| "loss": 0.7845, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.7223360655737705, | |
| "grad_norm": 4.029101848602295, | |
| "learning_rate": 5.910887041831312e-06, | |
| "loss": 0.6061, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.7236168032786885, | |
| "grad_norm": 11.404590606689453, | |
| "learning_rate": 5.883635372666576e-06, | |
| "loss": 0.2602, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.7248975409836066, | |
| "grad_norm": 14.377605438232422, | |
| "learning_rate": 5.85638370350184e-06, | |
| "loss": 0.6709, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.7261782786885246, | |
| "grad_norm": 54.396888732910156, | |
| "learning_rate": 5.829132034337104e-06, | |
| "loss": 0.7768, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.7274590163934426, | |
| "grad_norm": 11.300426483154297, | |
| "learning_rate": 5.801880365172368e-06, | |
| "loss": 0.6319, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.7287397540983607, | |
| "grad_norm": 25.368356704711914, | |
| "learning_rate": 5.774628696007631e-06, | |
| "loss": 0.6695, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.7300204918032787, | |
| "grad_norm": 8.80262279510498, | |
| "learning_rate": 5.747377026842894e-06, | |
| "loss": 0.8748, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.7313012295081968, | |
| "grad_norm": 14.3671236038208, | |
| "learning_rate": 5.7201253576781575e-06, | |
| "loss": 0.7312, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.7325819672131147, | |
| "grad_norm": 20.28556251525879, | |
| "learning_rate": 5.692873688513422e-06, | |
| "loss": 0.6096, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.7338627049180327, | |
| "grad_norm": 22.88327980041504, | |
| "learning_rate": 5.665622019348685e-06, | |
| "loss": 1.0243, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.7351434426229508, | |
| "grad_norm": 12.539216041564941, | |
| "learning_rate": 5.63837035018395e-06, | |
| "loss": 0.6279, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.7364241803278688, | |
| "grad_norm": 37.97767639160156, | |
| "learning_rate": 5.611118681019213e-06, | |
| "loss": 0.7142, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.7377049180327869, | |
| "grad_norm": 2.0420548915863037, | |
| "learning_rate": 5.5838670118544766e-06, | |
| "loss": 0.5773, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.7389856557377049, | |
| "grad_norm": 12.780746459960938, | |
| "learning_rate": 5.556615342689741e-06, | |
| "loss": 0.6708, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.7402663934426229, | |
| "grad_norm": 2.0761895179748535, | |
| "learning_rate": 5.5293636735250035e-06, | |
| "loss": 0.6491, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.741547131147541, | |
| "grad_norm": 41.1733512878418, | |
| "learning_rate": 5.502112004360267e-06, | |
| "loss": 0.9564, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.742827868852459, | |
| "grad_norm": 25.633703231811523, | |
| "learning_rate": 5.474860335195531e-06, | |
| "loss": 0.7985, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7441086065573771, | |
| "grad_norm": 9.461475372314453, | |
| "learning_rate": 5.447608666030795e-06, | |
| "loss": 0.5234, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.7453893442622951, | |
| "grad_norm": 18.90468978881836, | |
| "learning_rate": 5.420356996866058e-06, | |
| "loss": 0.4353, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.7466700819672131, | |
| "grad_norm": 8.587220191955566, | |
| "learning_rate": 5.3931053277013226e-06, | |
| "loss": 0.6629, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.7479508196721312, | |
| "grad_norm": 15.917558670043945, | |
| "learning_rate": 5.365853658536586e-06, | |
| "loss": 0.7589, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.7492315573770492, | |
| "grad_norm": 6.725412368774414, | |
| "learning_rate": 5.3386019893718495e-06, | |
| "loss": 0.5328, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.7505122950819673, | |
| "grad_norm": 18.641759872436523, | |
| "learning_rate": 5.311350320207113e-06, | |
| "loss": 0.5993, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.7517930327868853, | |
| "grad_norm": 30.297088623046875, | |
| "learning_rate": 5.2840986510423764e-06, | |
| "loss": 0.4351, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.7530737704918032, | |
| "grad_norm": 22.469974517822266, | |
| "learning_rate": 5.25684698187764e-06, | |
| "loss": 0.5852, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.7543545081967213, | |
| "grad_norm": 5.6571173667907715, | |
| "learning_rate": 5.229595312712904e-06, | |
| "loss": 0.6621, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.7556352459016393, | |
| "grad_norm": 32.2354736328125, | |
| "learning_rate": 5.202343643548168e-06, | |
| "loss": 0.8106, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7569159836065574, | |
| "grad_norm": 15.729165077209473, | |
| "learning_rate": 5.175091974383431e-06, | |
| "loss": 0.519, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.7581967213114754, | |
| "grad_norm": 20.02010726928711, | |
| "learning_rate": 5.1478403052186955e-06, | |
| "loss": 0.599, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.7594774590163934, | |
| "grad_norm": 1.9774470329284668, | |
| "learning_rate": 5.120588636053959e-06, | |
| "loss": 0.3835, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.7607581967213115, | |
| "grad_norm": 10.6248779296875, | |
| "learning_rate": 5.093336966889222e-06, | |
| "loss": 0.5819, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.7620389344262295, | |
| "grad_norm": 8.844250679016113, | |
| "learning_rate": 5.066085297724486e-06, | |
| "loss": 0.524, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.7633196721311475, | |
| "grad_norm": 24.882261276245117, | |
| "learning_rate": 5.038833628559749e-06, | |
| "loss": 0.5727, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.7646004098360656, | |
| "grad_norm": 16.70749855041504, | |
| "learning_rate": 5.011581959395013e-06, | |
| "loss": 0.651, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.7658811475409836, | |
| "grad_norm": 25.65505027770996, | |
| "learning_rate": 4.984330290230277e-06, | |
| "loss": 0.563, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.7671618852459017, | |
| "grad_norm": 27.863927841186523, | |
| "learning_rate": 4.957078621065541e-06, | |
| "loss": 0.4771, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.7684426229508197, | |
| "grad_norm": 0.7001621723175049, | |
| "learning_rate": 4.929826951900804e-06, | |
| "loss": 0.5382, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7697233606557377, | |
| "grad_norm": 16.65908432006836, | |
| "learning_rate": 4.902575282736068e-06, | |
| "loss": 0.829, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.7710040983606558, | |
| "grad_norm": 6.999290943145752, | |
| "learning_rate": 4.875323613571332e-06, | |
| "loss": 0.3504, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.7722848360655737, | |
| "grad_norm": 21.872570037841797, | |
| "learning_rate": 4.848071944406595e-06, | |
| "loss": 0.6681, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.7735655737704918, | |
| "grad_norm": 12.923929214477539, | |
| "learning_rate": 4.820820275241859e-06, | |
| "loss": 0.7376, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.7748463114754098, | |
| "grad_norm": 24.330562591552734, | |
| "learning_rate": 4.793568606077122e-06, | |
| "loss": 0.6243, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.7761270491803278, | |
| "grad_norm": 9.132780075073242, | |
| "learning_rate": 4.766316936912387e-06, | |
| "loss": 0.9117, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.7774077868852459, | |
| "grad_norm": 9.875121116638184, | |
| "learning_rate": 4.73906526774765e-06, | |
| "loss": 0.6056, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.7786885245901639, | |
| "grad_norm": 14.28087329864502, | |
| "learning_rate": 4.711813598582914e-06, | |
| "loss": 0.8161, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.779969262295082, | |
| "grad_norm": 4.369551658630371, | |
| "learning_rate": 4.684561929418177e-06, | |
| "loss": 0.5907, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "grad_norm": 30.508066177368164, | |
| "learning_rate": 4.6573102602534405e-06, | |
| "loss": 0.5567, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.782530737704918, | |
| "grad_norm": 24.87715721130371, | |
| "learning_rate": 4.630058591088705e-06, | |
| "loss": 0.6462, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.7838114754098361, | |
| "grad_norm": 15.003620147705078, | |
| "learning_rate": 4.602806921923968e-06, | |
| "loss": 0.5864, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.7850922131147541, | |
| "grad_norm": 21.42226219177246, | |
| "learning_rate": 4.575555252759232e-06, | |
| "loss": 0.7504, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.7863729508196722, | |
| "grad_norm": 2.328996181488037, | |
| "learning_rate": 4.548303583594495e-06, | |
| "loss": 0.6912, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.7876536885245902, | |
| "grad_norm": 10.392210960388184, | |
| "learning_rate": 4.52105191442976e-06, | |
| "loss": 0.8199, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.7889344262295082, | |
| "grad_norm": 8.533187866210938, | |
| "learning_rate": 4.493800245265023e-06, | |
| "loss": 0.5175, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.7902151639344263, | |
| "grad_norm": 11.740133285522461, | |
| "learning_rate": 4.4665485761002865e-06, | |
| "loss": 0.9367, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.7914959016393442, | |
| "grad_norm": 26.58624267578125, | |
| "learning_rate": 4.43929690693555e-06, | |
| "loss": 0.6825, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.7927766393442623, | |
| "grad_norm": 1.783715844154358, | |
| "learning_rate": 4.412045237770814e-06, | |
| "loss": 0.5531, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.7940573770491803, | |
| "grad_norm": 10.153545379638672, | |
| "learning_rate": 4.384793568606078e-06, | |
| "loss": 0.7688, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.7953381147540983, | |
| "grad_norm": 12.468855857849121, | |
| "learning_rate": 4.357541899441341e-06, | |
| "loss": 0.5524, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.7966188524590164, | |
| "grad_norm": 3.6368038654327393, | |
| "learning_rate": 4.330290230276605e-06, | |
| "loss": 0.6093, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.7978995901639344, | |
| "grad_norm": 12.435820579528809, | |
| "learning_rate": 4.303038561111868e-06, | |
| "loss": 0.6865, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.7991803278688525, | |
| "grad_norm": 9.179264068603516, | |
| "learning_rate": 4.2757868919471325e-06, | |
| "loss": 0.6233, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.8004610655737705, | |
| "grad_norm": 46.63306427001953, | |
| "learning_rate": 4.248535222782396e-06, | |
| "loss": 0.5358, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.8017418032786885, | |
| "grad_norm": 7.405709266662598, | |
| "learning_rate": 4.2212835536176595e-06, | |
| "loss": 0.5872, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.8030225409836066, | |
| "grad_norm": 20.083263397216797, | |
| "learning_rate": 4.194031884452923e-06, | |
| "loss": 0.4936, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.8043032786885246, | |
| "grad_norm": 2.6786341667175293, | |
| "learning_rate": 4.166780215288187e-06, | |
| "loss": 0.6548, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.8055840163934426, | |
| "grad_norm": 13.946334838867188, | |
| "learning_rate": 4.13952854612345e-06, | |
| "loss": 0.4651, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.8068647540983607, | |
| "grad_norm": 39.37618637084961, | |
| "learning_rate": 4.112276876958714e-06, | |
| "loss": 0.7712, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.8081454918032787, | |
| "grad_norm": 18.16588020324707, | |
| "learning_rate": 4.085025207793978e-06, | |
| "loss": 0.7139, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.8094262295081968, | |
| "grad_norm": 12.700222969055176, | |
| "learning_rate": 4.057773538629242e-06, | |
| "loss": 0.5219, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.8107069672131147, | |
| "grad_norm": 28.98236656188965, | |
| "learning_rate": 4.030521869464505e-06, | |
| "loss": 0.7143, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.8119877049180327, | |
| "grad_norm": 24.590084075927734, | |
| "learning_rate": 4.003270200299769e-06, | |
| "loss": 0.46, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.8132684426229508, | |
| "grad_norm": 24.325733184814453, | |
| "learning_rate": 3.976018531135032e-06, | |
| "loss": 0.7554, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.8145491803278688, | |
| "grad_norm": 8.794258117675781, | |
| "learning_rate": 3.948766861970296e-06, | |
| "loss": 0.4404, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.8158299180327869, | |
| "grad_norm": 0.7277682423591614, | |
| "learning_rate": 3.921515192805559e-06, | |
| "loss": 0.6449, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.8171106557377049, | |
| "grad_norm": 22.101137161254883, | |
| "learning_rate": 3.894263523640824e-06, | |
| "loss": 0.7285, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.8183913934426229, | |
| "grad_norm": 22.26101303100586, | |
| "learning_rate": 3.867011854476087e-06, | |
| "loss": 0.7699, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.819672131147541, | |
| "grad_norm": 17.823871612548828, | |
| "learning_rate": 3.839760185311351e-06, | |
| "loss": 0.6389, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.820952868852459, | |
| "grad_norm": 35.937286376953125, | |
| "learning_rate": 3.812508516146614e-06, | |
| "loss": 0.7776, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.8222336065573771, | |
| "grad_norm": 1.8482409715652466, | |
| "learning_rate": 3.785256846981878e-06, | |
| "loss": 0.7117, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.8235143442622951, | |
| "grad_norm": 1.5273475646972656, | |
| "learning_rate": 3.758005177817142e-06, | |
| "loss": 0.8126, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.8247950819672131, | |
| "grad_norm": 17.53533935546875, | |
| "learning_rate": 3.7307535086524054e-06, | |
| "loss": 0.8421, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.8260758196721312, | |
| "grad_norm": 9.50154972076416, | |
| "learning_rate": 3.703501839487669e-06, | |
| "loss": 0.5142, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.8273565573770492, | |
| "grad_norm": 12.528085708618164, | |
| "learning_rate": 3.6762501703229327e-06, | |
| "loss": 0.7004, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.8286372950819673, | |
| "grad_norm": 19.719446182250977, | |
| "learning_rate": 3.648998501158196e-06, | |
| "loss": 0.5631, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.8299180327868853, | |
| "grad_norm": 21.097314834594727, | |
| "learning_rate": 3.62174683199346e-06, | |
| "loss": 0.6893, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.8311987704918032, | |
| "grad_norm": 9.299731254577637, | |
| "learning_rate": 3.594495162828723e-06, | |
| "loss": 0.389, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.8324795081967213, | |
| "grad_norm": 5.358484268188477, | |
| "learning_rate": 3.567243493663987e-06, | |
| "loss": 0.59, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8337602459016393, | |
| "grad_norm": 17.12688446044922, | |
| "learning_rate": 3.539991824499251e-06, | |
| "loss": 0.4049, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.8350409836065574, | |
| "grad_norm": 22.643938064575195, | |
| "learning_rate": 3.512740155334515e-06, | |
| "loss": 0.5396, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.8363217213114754, | |
| "grad_norm": 15.25439167022705, | |
| "learning_rate": 3.485488486169778e-06, | |
| "loss": 0.7493, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.8376024590163934, | |
| "grad_norm": 0.7836318016052246, | |
| "learning_rate": 3.4582368170050418e-06, | |
| "loss": 0.6233, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.8388831967213115, | |
| "grad_norm": 7.646884918212891, | |
| "learning_rate": 3.4309851478403057e-06, | |
| "loss": 0.6084, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.8401639344262295, | |
| "grad_norm": 0.6499843001365662, | |
| "learning_rate": 3.4037334786755696e-06, | |
| "loss": 0.6373, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.8414446721311475, | |
| "grad_norm": 4.813564777374268, | |
| "learning_rate": 3.3764818095108326e-06, | |
| "loss": 0.8403, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.8427254098360656, | |
| "grad_norm": 20.393510818481445, | |
| "learning_rate": 3.3492301403460965e-06, | |
| "loss": 0.7535, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.8440061475409836, | |
| "grad_norm": 15.966805458068848, | |
| "learning_rate": 3.32197847118136e-06, | |
| "loss": 0.8566, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.8452868852459017, | |
| "grad_norm": 4.333749294281006, | |
| "learning_rate": 3.294726802016624e-06, | |
| "loss": 0.742, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8465676229508197, | |
| "grad_norm": 10.89919376373291, | |
| "learning_rate": 3.2674751328518873e-06, | |
| "loss": 0.5804, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.8478483606557377, | |
| "grad_norm": 9.388509750366211, | |
| "learning_rate": 3.240223463687151e-06, | |
| "loss": 0.5165, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.8491290983606558, | |
| "grad_norm": 4.184054374694824, | |
| "learning_rate": 3.2129717945224147e-06, | |
| "loss": 0.5183, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.8504098360655737, | |
| "grad_norm": 7.253784656524658, | |
| "learning_rate": 3.1857201253576786e-06, | |
| "loss": 0.7198, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.8516905737704918, | |
| "grad_norm": 11.86843490600586, | |
| "learning_rate": 3.1584684561929417e-06, | |
| "loss": 0.4899, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.8529713114754098, | |
| "grad_norm": 10.385624885559082, | |
| "learning_rate": 3.1312167870282056e-06, | |
| "loss": 0.8167, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.8542520491803278, | |
| "grad_norm": 21.208568572998047, | |
| "learning_rate": 3.1039651178634695e-06, | |
| "loss": 0.5819, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.8555327868852459, | |
| "grad_norm": 0.44370558857917786, | |
| "learning_rate": 3.0767134486987333e-06, | |
| "loss": 0.7301, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.8568135245901639, | |
| "grad_norm": 8.252354621887207, | |
| "learning_rate": 3.0494617795339964e-06, | |
| "loss": 0.488, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.858094262295082, | |
| "grad_norm": 13.996326446533203, | |
| "learning_rate": 3.0222101103692603e-06, | |
| "loss": 0.7691, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.859375, | |
| "grad_norm": 35.99543380737305, | |
| "learning_rate": 2.994958441204524e-06, | |
| "loss": 0.5266, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.860655737704918, | |
| "grad_norm": 19.631608963012695, | |
| "learning_rate": 2.9677067720397877e-06, | |
| "loss": 0.9918, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.8619364754098361, | |
| "grad_norm": 33.22177505493164, | |
| "learning_rate": 2.940455102875051e-06, | |
| "loss": 0.6044, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.8632172131147541, | |
| "grad_norm": 20.986621856689453, | |
| "learning_rate": 2.913203433710315e-06, | |
| "loss": 0.3522, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.8644979508196722, | |
| "grad_norm": 14.024015426635742, | |
| "learning_rate": 2.8859517645455785e-06, | |
| "loss": 0.6916, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.8657786885245902, | |
| "grad_norm": 11.796330451965332, | |
| "learning_rate": 2.8587000953808424e-06, | |
| "loss": 0.704, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.8670594262295082, | |
| "grad_norm": 20.83628273010254, | |
| "learning_rate": 2.831448426216106e-06, | |
| "loss": 0.8603, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.8683401639344263, | |
| "grad_norm": 18.570674896240234, | |
| "learning_rate": 2.8041967570513693e-06, | |
| "loss": 0.6714, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.8696209016393442, | |
| "grad_norm": 8.486098289489746, | |
| "learning_rate": 2.7769450878866332e-06, | |
| "loss": 0.7107, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.8709016393442623, | |
| "grad_norm": 2.3732173442840576, | |
| "learning_rate": 2.749693418721897e-06, | |
| "loss": 0.7988, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.8721823770491803, | |
| "grad_norm": 2.5915911197662354, | |
| "learning_rate": 2.72244174955716e-06, | |
| "loss": 0.6847, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.8734631147540983, | |
| "grad_norm": 30.59233856201172, | |
| "learning_rate": 2.695190080392424e-06, | |
| "loss": 0.6228, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.8747438524590164, | |
| "grad_norm": 9.502323150634766, | |
| "learning_rate": 2.667938411227688e-06, | |
| "loss": 0.5615, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.8760245901639344, | |
| "grad_norm": 17.929569244384766, | |
| "learning_rate": 2.640686742062952e-06, | |
| "loss": 0.5991, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.8773053278688525, | |
| "grad_norm": 14.03685474395752, | |
| "learning_rate": 2.613435072898215e-06, | |
| "loss": 0.7148, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.8785860655737705, | |
| "grad_norm": 14.739727020263672, | |
| "learning_rate": 2.586183403733479e-06, | |
| "loss": 0.5939, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.8798668032786885, | |
| "grad_norm": 13.458857536315918, | |
| "learning_rate": 2.5589317345687427e-06, | |
| "loss": 0.5892, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.8811475409836066, | |
| "grad_norm": 13.960780143737793, | |
| "learning_rate": 2.531680065404006e-06, | |
| "loss": 0.5841, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.8824282786885246, | |
| "grad_norm": 13.514850616455078, | |
| "learning_rate": 2.5044283962392696e-06, | |
| "loss": 0.5341, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.8837090163934426, | |
| "grad_norm": 12.330262184143066, | |
| "learning_rate": 2.4771767270745335e-06, | |
| "loss": 0.8722, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.8849897540983607, | |
| "grad_norm": 14.698627471923828, | |
| "learning_rate": 2.449925057909797e-06, | |
| "loss": 0.3832, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.8862704918032787, | |
| "grad_norm": 25.05308723449707, | |
| "learning_rate": 2.4226733887450605e-06, | |
| "loss": 0.9005, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.8875512295081968, | |
| "grad_norm": 16.247404098510742, | |
| "learning_rate": 2.3954217195803244e-06, | |
| "loss": 0.4334, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.8888319672131147, | |
| "grad_norm": 24.826126098632812, | |
| "learning_rate": 2.3681700504155883e-06, | |
| "loss": 0.4239, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.8901127049180327, | |
| "grad_norm": 30.12708282470703, | |
| "learning_rate": 2.3409183812508517e-06, | |
| "loss": 0.9281, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.8913934426229508, | |
| "grad_norm": 33.377967834472656, | |
| "learning_rate": 2.3136667120861156e-06, | |
| "loss": 1.0247, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.8926741803278688, | |
| "grad_norm": 9.090696334838867, | |
| "learning_rate": 2.286415042921379e-06, | |
| "loss": 0.6998, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.8939549180327869, | |
| "grad_norm": 18.32761001586914, | |
| "learning_rate": 2.259163373756643e-06, | |
| "loss": 0.6415, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.8952356557377049, | |
| "grad_norm": 3.1769232749938965, | |
| "learning_rate": 2.2319117045919065e-06, | |
| "loss": 0.5628, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.8965163934426229, | |
| "grad_norm": 11.886983871459961, | |
| "learning_rate": 2.2046600354271704e-06, | |
| "loss": 0.9674, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.897797131147541, | |
| "grad_norm": 2.503143072128296, | |
| "learning_rate": 2.177408366262434e-06, | |
| "loss": 0.5693, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.899077868852459, | |
| "grad_norm": 29.00408935546875, | |
| "learning_rate": 2.1501566970976973e-06, | |
| "loss": 0.6684, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.9003586065573771, | |
| "grad_norm": 14.518806457519531, | |
| "learning_rate": 2.1229050279329612e-06, | |
| "loss": 0.8301, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.9016393442622951, | |
| "grad_norm": 46.252830505371094, | |
| "learning_rate": 2.0956533587682247e-06, | |
| "loss": 1.0172, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.9029200819672131, | |
| "grad_norm": 19.148435592651367, | |
| "learning_rate": 2.068401689603488e-06, | |
| "loss": 0.7153, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.9042008196721312, | |
| "grad_norm": 17.86318588256836, | |
| "learning_rate": 2.041150020438752e-06, | |
| "loss": 0.7907, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.9054815573770492, | |
| "grad_norm": 14.341056823730469, | |
| "learning_rate": 2.0138983512740155e-06, | |
| "loss": 0.4611, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.9067622950819673, | |
| "grad_norm": 8.442182540893555, | |
| "learning_rate": 1.9866466821092794e-06, | |
| "loss": 0.8596, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.9080430327868853, | |
| "grad_norm": 15.53111743927002, | |
| "learning_rate": 1.959395012944543e-06, | |
| "loss": 0.5854, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.9093237704918032, | |
| "grad_norm": 8.210785865783691, | |
| "learning_rate": 1.932143343779807e-06, | |
| "loss": 0.855, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.9106045081967213, | |
| "grad_norm": 11.097797393798828, | |
| "learning_rate": 1.9048916746150703e-06, | |
| "loss": 0.7989, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.9118852459016393, | |
| "grad_norm": 6.103325843811035, | |
| "learning_rate": 1.8776400054503342e-06, | |
| "loss": 0.4565, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.9131659836065574, | |
| "grad_norm": 15.080409049987793, | |
| "learning_rate": 1.8503883362855976e-06, | |
| "loss": 0.4197, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.9144467213114754, | |
| "grad_norm": 23.386219024658203, | |
| "learning_rate": 1.8231366671208613e-06, | |
| "loss": 0.6161, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.9157274590163934, | |
| "grad_norm": 13.018634796142578, | |
| "learning_rate": 1.795884997956125e-06, | |
| "loss": 0.4554, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.9170081967213115, | |
| "grad_norm": 9.674510955810547, | |
| "learning_rate": 1.7686333287913887e-06, | |
| "loss": 0.7063, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.9182889344262295, | |
| "grad_norm": 13.369217872619629, | |
| "learning_rate": 1.7413816596266522e-06, | |
| "loss": 0.6227, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.9195696721311475, | |
| "grad_norm": 19.81302833557129, | |
| "learning_rate": 1.714129990461916e-06, | |
| "loss": 0.5844, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.9208504098360656, | |
| "grad_norm": 13.579237937927246, | |
| "learning_rate": 1.6868783212971795e-06, | |
| "loss": 0.632, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.9221311475409836, | |
| "grad_norm": 9.165477752685547, | |
| "learning_rate": 1.6596266521324434e-06, | |
| "loss": 0.5509, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.9234118852459017, | |
| "grad_norm": 18.232845306396484, | |
| "learning_rate": 1.6323749829677069e-06, | |
| "loss": 0.6403, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.9246926229508197, | |
| "grad_norm": 18.56736946105957, | |
| "learning_rate": 1.6051233138029706e-06, | |
| "loss": 0.7943, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.9259733606557377, | |
| "grad_norm": 8.743745803833008, | |
| "learning_rate": 1.5778716446382343e-06, | |
| "loss": 0.4279, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.9272540983606558, | |
| "grad_norm": 2.6923177242279053, | |
| "learning_rate": 1.550619975473498e-06, | |
| "loss": 0.5608, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.9285348360655737, | |
| "grad_norm": 29.790340423583984, | |
| "learning_rate": 1.5233683063087614e-06, | |
| "loss": 0.4361, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.9298155737704918, | |
| "grad_norm": 1.7628939151763916, | |
| "learning_rate": 1.4961166371440253e-06, | |
| "loss": 0.6859, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.9310963114754098, | |
| "grad_norm": 10.456538200378418, | |
| "learning_rate": 1.4688649679792888e-06, | |
| "loss": 0.7482, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.9323770491803278, | |
| "grad_norm": 28.223440170288086, | |
| "learning_rate": 1.4416132988145527e-06, | |
| "loss": 0.675, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.9336577868852459, | |
| "grad_norm": 6.400082111358643, | |
| "learning_rate": 1.4143616296498161e-06, | |
| "loss": 0.6709, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.9349385245901639, | |
| "grad_norm": 16.48478889465332, | |
| "learning_rate": 1.3871099604850798e-06, | |
| "loss": 0.3667, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.936219262295082, | |
| "grad_norm": 14.860025405883789, | |
| "learning_rate": 1.3598582913203435e-06, | |
| "loss": 0.7024, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 14.933452606201172, | |
| "learning_rate": 1.3326066221556072e-06, | |
| "loss": 0.6838, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.938780737704918, | |
| "grad_norm": 23.65451431274414, | |
| "learning_rate": 1.3053549529908707e-06, | |
| "loss": 0.5882, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.9400614754098361, | |
| "grad_norm": 23.98202133178711, | |
| "learning_rate": 1.2781032838261346e-06, | |
| "loss": 0.7442, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.9413422131147541, | |
| "grad_norm": 38.25538635253906, | |
| "learning_rate": 1.250851614661398e-06, | |
| "loss": 0.6544, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.9426229508196722, | |
| "grad_norm": 1.7557686567306519, | |
| "learning_rate": 1.223599945496662e-06, | |
| "loss": 0.3867, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.9439036885245902, | |
| "grad_norm": 10.53632926940918, | |
| "learning_rate": 1.1963482763319254e-06, | |
| "loss": 0.9491, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.9451844262295082, | |
| "grad_norm": 8.34455680847168, | |
| "learning_rate": 1.169096607167189e-06, | |
| "loss": 0.4885, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.9464651639344263, | |
| "grad_norm": 3.460608720779419, | |
| "learning_rate": 1.1418449380024528e-06, | |
| "loss": 0.4286, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.9477459016393442, | |
| "grad_norm": 20.152204513549805, | |
| "learning_rate": 1.1145932688377165e-06, | |
| "loss": 0.7888, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.9490266393442623, | |
| "grad_norm": 12.72758960723877, | |
| "learning_rate": 1.0873415996729801e-06, | |
| "loss": 0.6784, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.9503073770491803, | |
| "grad_norm": 13.164525985717773, | |
| "learning_rate": 1.0600899305082438e-06, | |
| "loss": 0.3325, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.9515881147540983, | |
| "grad_norm": 15.550426483154297, | |
| "learning_rate": 1.0328382613435075e-06, | |
| "loss": 0.5533, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.9528688524590164, | |
| "grad_norm": 4.542503356933594, | |
| "learning_rate": 1.005586592178771e-06, | |
| "loss": 0.5264, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.9541495901639344, | |
| "grad_norm": 22.304424285888672, | |
| "learning_rate": 9.783349230140347e-07, | |
| "loss": 0.7576, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.9554303278688525, | |
| "grad_norm": 24.396604537963867, | |
| "learning_rate": 9.510832538492983e-07, | |
| "loss": 0.6932, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.9567110655737705, | |
| "grad_norm": 5.150862216949463, | |
| "learning_rate": 9.23831584684562e-07, | |
| "loss": 0.5392, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.9579918032786885, | |
| "grad_norm": 6.6292829513549805, | |
| "learning_rate": 8.965799155198257e-07, | |
| "loss": 0.43, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.9592725409836066, | |
| "grad_norm": 35.094058990478516, | |
| "learning_rate": 8.693282463550894e-07, | |
| "loss": 0.4879, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.9605532786885246, | |
| "grad_norm": 31.886293411254883, | |
| "learning_rate": 8.42076577190353e-07, | |
| "loss": 0.4554, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9618340163934426, | |
| "grad_norm": 10.12392807006836, | |
| "learning_rate": 8.148249080256167e-07, | |
| "loss": 0.8466, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.9631147540983607, | |
| "grad_norm": 23.29629898071289, | |
| "learning_rate": 7.875732388608803e-07, | |
| "loss": 0.6954, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.9643954918032787, | |
| "grad_norm": 34.42799758911133, | |
| "learning_rate": 7.60321569696144e-07, | |
| "loss": 0.4265, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.9656762295081968, | |
| "grad_norm": 20.460311889648438, | |
| "learning_rate": 7.330699005314076e-07, | |
| "loss": 0.542, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.9669569672131147, | |
| "grad_norm": 1.3875937461853027, | |
| "learning_rate": 7.058182313666713e-07, | |
| "loss": 0.6803, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.9682377049180327, | |
| "grad_norm": 20.104841232299805, | |
| "learning_rate": 6.78566562201935e-07, | |
| "loss": 0.8385, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.9695184426229508, | |
| "grad_norm": 17.50690269470215, | |
| "learning_rate": 6.513148930371987e-07, | |
| "loss": 0.9916, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.9707991803278688, | |
| "grad_norm": 72.95804595947266, | |
| "learning_rate": 6.240632238724622e-07, | |
| "loss": 0.8251, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.9720799180327869, | |
| "grad_norm": 11.275779724121094, | |
| "learning_rate": 5.968115547077259e-07, | |
| "loss": 0.4506, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.9733606557377049, | |
| "grad_norm": 20.942705154418945, | |
| "learning_rate": 5.695598855429896e-07, | |
| "loss": 0.6638, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.9746413934426229, | |
| "grad_norm": 8.423453330993652, | |
| "learning_rate": 5.423082163782532e-07, | |
| "loss": 0.6953, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.975922131147541, | |
| "grad_norm": 24.83681297302246, | |
| "learning_rate": 5.150565472135169e-07, | |
| "loss": 0.7618, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.977202868852459, | |
| "grad_norm": 18.958438873291016, | |
| "learning_rate": 4.878048780487805e-07, | |
| "loss": 0.734, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.9784836065573771, | |
| "grad_norm": 12.136439323425293, | |
| "learning_rate": 4.605532088840442e-07, | |
| "loss": 0.5637, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.9797643442622951, | |
| "grad_norm": 7.522444725036621, | |
| "learning_rate": 4.3330153971930786e-07, | |
| "loss": 0.8323, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.9810450819672131, | |
| "grad_norm": 34.33516311645508, | |
| "learning_rate": 4.060498705545715e-07, | |
| "loss": 0.591, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.9823258196721312, | |
| "grad_norm": 6.395289421081543, | |
| "learning_rate": 3.787982013898352e-07, | |
| "loss": 0.5533, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 7.777110576629639, | |
| "learning_rate": 3.515465322250988e-07, | |
| "loss": 0.7885, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.9848872950819673, | |
| "grad_norm": 18.54967498779297, | |
| "learning_rate": 3.242948630603625e-07, | |
| "loss": 0.5966, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.9861680327868853, | |
| "grad_norm": 13.985085487365723, | |
| "learning_rate": 2.970431938956261e-07, | |
| "loss": 0.7105, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9874487704918032, | |
| "grad_norm": 37.31953811645508, | |
| "learning_rate": 2.697915247308898e-07, | |
| "loss": 0.8736, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.9887295081967213, | |
| "grad_norm": 9.107115745544434, | |
| "learning_rate": 2.4253985556615344e-07, | |
| "loss": 0.699, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.9900102459016393, | |
| "grad_norm": 14.522866249084473, | |
| "learning_rate": 2.152881864014171e-07, | |
| "loss": 0.7096, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.9912909836065574, | |
| "grad_norm": 12.966835975646973, | |
| "learning_rate": 1.8803651723668075e-07, | |
| "loss": 0.6627, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.9925717213114754, | |
| "grad_norm": 33.506622314453125, | |
| "learning_rate": 1.607848480719444e-07, | |
| "loss": 0.6537, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.9938524590163934, | |
| "grad_norm": 14.853964805603027, | |
| "learning_rate": 1.3353317890720807e-07, | |
| "loss": 0.4942, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.9951331967213115, | |
| "grad_norm": 5.332017421722412, | |
| "learning_rate": 1.0628150974247172e-07, | |
| "loss": 0.4523, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.9964139344262295, | |
| "grad_norm": 24.917579650878906, | |
| "learning_rate": 7.902984057773541e-08, | |
| "loss": 0.706, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.9976946721311475, | |
| "grad_norm": 21.65096664428711, | |
| "learning_rate": 5.177817141299905e-08, | |
| "loss": 0.4827, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.9989754098360656, | |
| "grad_norm": 3.226344347000122, | |
| "learning_rate": 2.452650224826271e-08, | |
| "loss": 0.8295, | |
| "step": 7800 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7808, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8217558262480896.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |