| { | |
| "best_metric": 0.060996126383543015, | |
| "best_model_checkpoint": "./eurosat_outpus/checkpoint-10125", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 10125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0049382716049382715, | |
| "grad_norm": 38.450260162353516, | |
| "learning_rate": 1.9980246913580248e-05, | |
| "loss": 0.1979, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009876543209876543, | |
| "grad_norm": 22.966312408447266, | |
| "learning_rate": 1.9960493827160498e-05, | |
| "loss": 0.3363, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014814814814814815, | |
| "grad_norm": 73.729248046875, | |
| "learning_rate": 1.9940740740740744e-05, | |
| "loss": 0.323, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.019753086419753086, | |
| "grad_norm": 58.143798828125, | |
| "learning_rate": 1.992098765432099e-05, | |
| "loss": 0.3155, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024691358024691357, | |
| "grad_norm": 38.21614074707031, | |
| "learning_rate": 1.9901234567901237e-05, | |
| "loss": 0.1625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02962962962962963, | |
| "grad_norm": 9.119422912597656, | |
| "learning_rate": 1.9881481481481483e-05, | |
| "loss": 0.4599, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0345679012345679, | |
| "grad_norm": 0.6812440156936646, | |
| "learning_rate": 1.986172839506173e-05, | |
| "loss": 0.1372, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03950617283950617, | |
| "grad_norm": 17.003955841064453, | |
| "learning_rate": 1.9841975308641976e-05, | |
| "loss": 0.184, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 4.515798568725586, | |
| "learning_rate": 1.9822222222222226e-05, | |
| "loss": 0.327, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04938271604938271, | |
| "grad_norm": 134.1114959716797, | |
| "learning_rate": 1.9802469135802472e-05, | |
| "loss": 0.1908, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05432098765432099, | |
| "grad_norm": 61.6785774230957, | |
| "learning_rate": 1.978271604938272e-05, | |
| "loss": 0.4477, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05925925925925926, | |
| "grad_norm": 0.12833823263645172, | |
| "learning_rate": 1.9762962962962965e-05, | |
| "loss": 0.2536, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06419753086419754, | |
| "grad_norm": 108.99272155761719, | |
| "learning_rate": 1.974320987654321e-05, | |
| "loss": 0.3968, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0691358024691358, | |
| "grad_norm": 35.6202507019043, | |
| "learning_rate": 1.9723456790123458e-05, | |
| "loss": 0.268, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 0.6642296314239502, | |
| "learning_rate": 1.9703703703703704e-05, | |
| "loss": 0.2327, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07901234567901234, | |
| "grad_norm": 110.14276123046875, | |
| "learning_rate": 1.968395061728395e-05, | |
| "loss": 0.5562, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08395061728395062, | |
| "grad_norm": 78.44914245605469, | |
| "learning_rate": 1.96641975308642e-05, | |
| "loss": 0.2922, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 63.069766998291016, | |
| "learning_rate": 1.9644444444444447e-05, | |
| "loss": 0.3735, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09382716049382717, | |
| "grad_norm": 246.51309204101562, | |
| "learning_rate": 1.9624691358024693e-05, | |
| "loss": 0.3787, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09876543209876543, | |
| "grad_norm": 30.573638916015625, | |
| "learning_rate": 1.960493827160494e-05, | |
| "loss": 0.3193, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1037037037037037, | |
| "grad_norm": 0.3898200988769531, | |
| "learning_rate": 1.9585185185185186e-05, | |
| "loss": 0.2929, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.10864197530864197, | |
| "grad_norm": 85.34624481201172, | |
| "learning_rate": 1.9565432098765432e-05, | |
| "loss": 0.3892, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11358024691358025, | |
| "grad_norm": 2.596446990966797, | |
| "learning_rate": 1.954567901234568e-05, | |
| "loss": 0.2047, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11851851851851852, | |
| "grad_norm": 16.085424423217773, | |
| "learning_rate": 1.952592592592593e-05, | |
| "loss": 0.3302, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12345679012345678, | |
| "grad_norm": 49.19840621948242, | |
| "learning_rate": 1.9506172839506175e-05, | |
| "loss": 0.1766, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12839506172839507, | |
| "grad_norm": 49.17105484008789, | |
| "learning_rate": 1.948641975308642e-05, | |
| "loss": 0.3533, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 31.642642974853516, | |
| "learning_rate": 1.9466666666666668e-05, | |
| "loss": 0.262, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1382716049382716, | |
| "grad_norm": 49.4565544128418, | |
| "learning_rate": 1.9446913580246914e-05, | |
| "loss": 0.3649, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14320987654320988, | |
| "grad_norm": 33.3835563659668, | |
| "learning_rate": 1.942716049382716e-05, | |
| "loss": 0.2047, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 30.190998077392578, | |
| "learning_rate": 1.9407407407407407e-05, | |
| "loss": 0.2901, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15308641975308643, | |
| "grad_norm": 73.48704528808594, | |
| "learning_rate": 1.9387654320987657e-05, | |
| "loss": 0.399, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1580246913580247, | |
| "grad_norm": 2.583846092224121, | |
| "learning_rate": 1.9367901234567903e-05, | |
| "loss": 0.3736, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16296296296296298, | |
| "grad_norm": 100.03057861328125, | |
| "learning_rate": 1.934814814814815e-05, | |
| "loss": 0.2624, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.16790123456790124, | |
| "grad_norm": 0.5729751586914062, | |
| "learning_rate": 1.93283950617284e-05, | |
| "loss": 0.4662, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1728395061728395, | |
| "grad_norm": 133.75845336914062, | |
| "learning_rate": 1.9308641975308646e-05, | |
| "loss": 0.4739, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.10531154274940491, | |
| "learning_rate": 1.928888888888889e-05, | |
| "loss": 0.2592, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.18271604938271604, | |
| "grad_norm": 4.886446952819824, | |
| "learning_rate": 1.9269135802469135e-05, | |
| "loss": 0.2601, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.18765432098765433, | |
| "grad_norm": 20.537151336669922, | |
| "learning_rate": 1.9249382716049385e-05, | |
| "loss": 0.1298, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1925925925925926, | |
| "grad_norm": 101.08270263671875, | |
| "learning_rate": 1.922962962962963e-05, | |
| "loss": 0.2034, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.19753086419753085, | |
| "grad_norm": 101.60489654541016, | |
| "learning_rate": 1.9209876543209878e-05, | |
| "loss": 0.5103, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.20246913580246914, | |
| "grad_norm": 4.052034854888916, | |
| "learning_rate": 1.9190123456790124e-05, | |
| "loss": 0.3028, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2074074074074074, | |
| "grad_norm": 21.401437759399414, | |
| "learning_rate": 1.9170370370370374e-05, | |
| "loss": 0.4247, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2123456790123457, | |
| "grad_norm": 24.329212188720703, | |
| "learning_rate": 1.915061728395062e-05, | |
| "loss": 0.3295, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.21728395061728395, | |
| "grad_norm": 6.972232341766357, | |
| "learning_rate": 1.9130864197530867e-05, | |
| "loss": 0.1283, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 2.0192437171936035, | |
| "learning_rate": 1.9111111111111113e-05, | |
| "loss": 0.3414, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2271604938271605, | |
| "grad_norm": 209.81227111816406, | |
| "learning_rate": 1.909135802469136e-05, | |
| "loss": 0.418, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23209876543209876, | |
| "grad_norm": 64.07678985595703, | |
| "learning_rate": 1.9071604938271606e-05, | |
| "loss": 0.4821, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.23703703703703705, | |
| "grad_norm": 6.7498087882995605, | |
| "learning_rate": 1.9051851851851852e-05, | |
| "loss": 0.5004, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2419753086419753, | |
| "grad_norm": 67.15694427490234, | |
| "learning_rate": 1.9032098765432102e-05, | |
| "loss": 0.2125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 49.803070068359375, | |
| "learning_rate": 1.901234567901235e-05, | |
| "loss": 0.2238, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2518518518518518, | |
| "grad_norm": 76.97516632080078, | |
| "learning_rate": 1.8992592592592595e-05, | |
| "loss": 0.6817, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.25679012345679014, | |
| "grad_norm": 45.78963088989258, | |
| "learning_rate": 1.897283950617284e-05, | |
| "loss": 0.2176, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2617283950617284, | |
| "grad_norm": 1.871187448501587, | |
| "learning_rate": 1.8953086419753087e-05, | |
| "loss": 0.0952, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 52.718875885009766, | |
| "learning_rate": 1.8933333333333334e-05, | |
| "loss": 0.306, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2716049382716049, | |
| "grad_norm": 23.83916473388672, | |
| "learning_rate": 1.891358024691358e-05, | |
| "loss": 0.306, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2765432098765432, | |
| "grad_norm": 38.469512939453125, | |
| "learning_rate": 1.889382716049383e-05, | |
| "loss": 0.3775, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2814814814814815, | |
| "grad_norm": 71.19271087646484, | |
| "learning_rate": 1.8874074074074076e-05, | |
| "loss": 0.2412, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.28641975308641976, | |
| "grad_norm": 10.515379905700684, | |
| "learning_rate": 1.8854320987654323e-05, | |
| "loss": 0.3623, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.291358024691358, | |
| "grad_norm": 56.489166259765625, | |
| "learning_rate": 1.883456790123457e-05, | |
| "loss": 0.2472, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 11.128917694091797, | |
| "learning_rate": 1.8814814814814816e-05, | |
| "loss": 0.512, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3012345679012346, | |
| "grad_norm": 2.7650094032287598, | |
| "learning_rate": 1.8795061728395062e-05, | |
| "loss": 0.518, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.30617283950617286, | |
| "grad_norm": 55.65047073364258, | |
| "learning_rate": 1.877530864197531e-05, | |
| "loss": 0.2817, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": 8.692935943603516, | |
| "learning_rate": 1.8755555555555558e-05, | |
| "loss": 0.2106, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3160493827160494, | |
| "grad_norm": 2.446716785430908, | |
| "learning_rate": 1.8735802469135805e-05, | |
| "loss": 0.2604, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.32098765432098764, | |
| "grad_norm": 12.735766410827637, | |
| "learning_rate": 1.871604938271605e-05, | |
| "loss": 0.5116, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.32592592592592595, | |
| "grad_norm": 1.8498376607894897, | |
| "learning_rate": 1.8696296296296297e-05, | |
| "loss": 0.0587, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3308641975308642, | |
| "grad_norm": 0.5433443188667297, | |
| "learning_rate": 1.8676543209876544e-05, | |
| "loss": 0.5793, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3358024691358025, | |
| "grad_norm": 0.06385264545679092, | |
| "learning_rate": 1.865679012345679e-05, | |
| "loss": 0.2238, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.34074074074074073, | |
| "grad_norm": 129.50604248046875, | |
| "learning_rate": 1.8637037037037037e-05, | |
| "loss": 0.4826, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.345679012345679, | |
| "grad_norm": 20.5740909576416, | |
| "learning_rate": 1.8617283950617286e-05, | |
| "loss": 0.4072, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3506172839506173, | |
| "grad_norm": 1.4352848529815674, | |
| "learning_rate": 1.8597530864197533e-05, | |
| "loss": 0.1077, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 1.2378454208374023, | |
| "learning_rate": 1.857777777777778e-05, | |
| "loss": 0.2087, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.36049382716049383, | |
| "grad_norm": 54.489768981933594, | |
| "learning_rate": 1.8558024691358025e-05, | |
| "loss": 0.34, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3654320987654321, | |
| "grad_norm": 85.84687042236328, | |
| "learning_rate": 1.8538271604938275e-05, | |
| "loss": 0.145, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 20.322895050048828, | |
| "learning_rate": 1.851851851851852e-05, | |
| "loss": 0.4289, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.37530864197530867, | |
| "grad_norm": 1.6802163124084473, | |
| "learning_rate": 1.8498765432098768e-05, | |
| "loss": 0.4687, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3802469135802469, | |
| "grad_norm": 125.9644546508789, | |
| "learning_rate": 1.8479012345679014e-05, | |
| "loss": 0.5029, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3851851851851852, | |
| "grad_norm": 46.97697830200195, | |
| "learning_rate": 1.845925925925926e-05, | |
| "loss": 0.4326, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.39012345679012345, | |
| "grad_norm": 82.32715606689453, | |
| "learning_rate": 1.8439506172839507e-05, | |
| "loss": 0.3779, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3950617283950617, | |
| "grad_norm": 48.87428665161133, | |
| "learning_rate": 1.8419753086419754e-05, | |
| "loss": 0.4167, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.4260449707508087, | |
| "learning_rate": 1.8400000000000003e-05, | |
| "loss": 0.3357, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4049382716049383, | |
| "grad_norm": 3.1416447162628174, | |
| "learning_rate": 1.838024691358025e-05, | |
| "loss": 0.0848, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.40987654320987654, | |
| "grad_norm": 0.17075039446353912, | |
| "learning_rate": 1.8360493827160496e-05, | |
| "loss": 0.5083, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4148148148148148, | |
| "grad_norm": 78.5146713256836, | |
| "learning_rate": 1.8340740740740743e-05, | |
| "loss": 0.3346, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.41975308641975306, | |
| "grad_norm": 38.72228240966797, | |
| "learning_rate": 1.832098765432099e-05, | |
| "loss": 0.2796, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4246913580246914, | |
| "grad_norm": 28.315433502197266, | |
| "learning_rate": 1.8301234567901235e-05, | |
| "loss": 0.4984, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.42962962962962964, | |
| "grad_norm": 1.3758037090301514, | |
| "learning_rate": 1.8281481481481482e-05, | |
| "loss": 0.2027, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4345679012345679, | |
| "grad_norm": 33.141361236572266, | |
| "learning_rate": 1.826172839506173e-05, | |
| "loss": 0.1499, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.43950617283950616, | |
| "grad_norm": 48.69041442871094, | |
| "learning_rate": 1.8241975308641978e-05, | |
| "loss": 0.1608, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 35.90753173828125, | |
| "learning_rate": 1.8222222222222224e-05, | |
| "loss": 0.2102, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.44938271604938274, | |
| "grad_norm": 27.275602340698242, | |
| "learning_rate": 1.820246913580247e-05, | |
| "loss": 0.3688, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.454320987654321, | |
| "grad_norm": 14.521764755249023, | |
| "learning_rate": 1.8182716049382717e-05, | |
| "loss": 0.3542, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.45925925925925926, | |
| "grad_norm": 22.390480041503906, | |
| "learning_rate": 1.8162962962962963e-05, | |
| "loss": 0.1098, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4641975308641975, | |
| "grad_norm": 5.19728422164917, | |
| "learning_rate": 1.814320987654321e-05, | |
| "loss": 0.2809, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4691358024691358, | |
| "grad_norm": 0.5096778869628906, | |
| "learning_rate": 1.812345679012346e-05, | |
| "loss": 0.1538, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4740740740740741, | |
| "grad_norm": 107.34992980957031, | |
| "learning_rate": 1.8103703703703706e-05, | |
| "loss": 0.5366, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.47901234567901235, | |
| "grad_norm": 37.320709228515625, | |
| "learning_rate": 1.8083950617283952e-05, | |
| "loss": 0.1668, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4839506172839506, | |
| "grad_norm": 20.405574798583984, | |
| "learning_rate": 1.80641975308642e-05, | |
| "loss": 0.3588, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4888888888888889, | |
| "grad_norm": 1.3000644445419312, | |
| "learning_rate": 1.8044444444444445e-05, | |
| "loss": 0.088, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 37.02173614501953, | |
| "learning_rate": 1.802469135802469e-05, | |
| "loss": 0.2648, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.49876543209876545, | |
| "grad_norm": 48.47230529785156, | |
| "learning_rate": 1.8004938271604938e-05, | |
| "loss": 0.5109, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5037037037037037, | |
| "grad_norm": 51.70542907714844, | |
| "learning_rate": 1.7985185185185188e-05, | |
| "loss": 0.3476, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.508641975308642, | |
| "grad_norm": 2.4657256603240967, | |
| "learning_rate": 1.7965432098765434e-05, | |
| "loss": 0.3445, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5135802469135803, | |
| "grad_norm": 96.39098358154297, | |
| "learning_rate": 1.794567901234568e-05, | |
| "loss": 0.2845, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 45.08651351928711, | |
| "learning_rate": 1.7925925925925927e-05, | |
| "loss": 0.2077, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5234567901234568, | |
| "grad_norm": 0.06106605753302574, | |
| "learning_rate": 1.7906172839506177e-05, | |
| "loss": 0.0399, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.528395061728395, | |
| "grad_norm": 36.55531692504883, | |
| "learning_rate": 1.788641975308642e-05, | |
| "loss": 0.3436, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 2.2626407146453857, | |
| "learning_rate": 1.7866666666666666e-05, | |
| "loss": 0.6299, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5382716049382716, | |
| "grad_norm": 16.667465209960938, | |
| "learning_rate": 1.7846913580246913e-05, | |
| "loss": 0.2992, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5432098765432098, | |
| "grad_norm": 41.49295425415039, | |
| "learning_rate": 1.7827160493827162e-05, | |
| "loss": 0.2554, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5481481481481482, | |
| "grad_norm": 4.2133002281188965, | |
| "learning_rate": 1.780740740740741e-05, | |
| "loss": 0.2452, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5530864197530864, | |
| "grad_norm": 49.12704086303711, | |
| "learning_rate": 1.7787654320987655e-05, | |
| "loss": 0.3656, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5580246913580247, | |
| "grad_norm": 21.075599670410156, | |
| "learning_rate": 1.7767901234567905e-05, | |
| "loss": 0.0648, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.562962962962963, | |
| "grad_norm": 0.5144210457801819, | |
| "learning_rate": 1.774814814814815e-05, | |
| "loss": 0.2793, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5679012345679012, | |
| "grad_norm": 53.27878189086914, | |
| "learning_rate": 1.7728395061728398e-05, | |
| "loss": 0.206, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5728395061728395, | |
| "grad_norm": 36.761356353759766, | |
| "learning_rate": 1.7708641975308644e-05, | |
| "loss": 0.3469, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5777777777777777, | |
| "grad_norm": 3.539717435836792, | |
| "learning_rate": 1.768888888888889e-05, | |
| "loss": 0.2327, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.582716049382716, | |
| "grad_norm": 3.940678596496582, | |
| "learning_rate": 1.7669135802469137e-05, | |
| "loss": 0.2148, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5876543209876544, | |
| "grad_norm": 44.36384963989258, | |
| "learning_rate": 1.7649382716049383e-05, | |
| "loss": 0.3014, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 0.4438416063785553, | |
| "learning_rate": 1.7629629629629633e-05, | |
| "loss": 0.2904, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5975308641975309, | |
| "grad_norm": 0.08722967654466629, | |
| "learning_rate": 1.760987654320988e-05, | |
| "loss": 0.6003, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6024691358024692, | |
| "grad_norm": 3.851921319961548, | |
| "learning_rate": 1.7590123456790126e-05, | |
| "loss": 0.1097, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6074074074074074, | |
| "grad_norm": 2.105475425720215, | |
| "learning_rate": 1.7570370370370372e-05, | |
| "loss": 0.0446, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6123456790123457, | |
| "grad_norm": 1.8762763738632202, | |
| "learning_rate": 1.755061728395062e-05, | |
| "loss": 0.3621, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 0.8981475234031677, | |
| "learning_rate": 1.7530864197530865e-05, | |
| "loss": 0.6072, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 0.05930788442492485, | |
| "learning_rate": 1.751111111111111e-05, | |
| "loss": 0.2451, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6271604938271605, | |
| "grad_norm": 47.51054763793945, | |
| "learning_rate": 1.7491358024691358e-05, | |
| "loss": 0.2657, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6320987654320988, | |
| "grad_norm": 84.59910583496094, | |
| "learning_rate": 1.7471604938271608e-05, | |
| "loss": 0.2821, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6370370370370371, | |
| "grad_norm": 92.97787475585938, | |
| "learning_rate": 1.7451851851851854e-05, | |
| "loss": 0.3573, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6419753086419753, | |
| "grad_norm": 134.259033203125, | |
| "learning_rate": 1.74320987654321e-05, | |
| "loss": 0.2441, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6469135802469136, | |
| "grad_norm": 61.10758972167969, | |
| "learning_rate": 1.7412345679012347e-05, | |
| "loss": 0.3629, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6518518518518519, | |
| "grad_norm": 0.031939879059791565, | |
| "learning_rate": 1.7392592592592593e-05, | |
| "loss": 0.0489, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6567901234567901, | |
| "grad_norm": 52.49007034301758, | |
| "learning_rate": 1.737283950617284e-05, | |
| "loss": 0.2687, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6617283950617284, | |
| "grad_norm": 4.723176002502441, | |
| "learning_rate": 1.7353086419753086e-05, | |
| "loss": 0.2252, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.503265619277954, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 0.3459, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.671604938271605, | |
| "grad_norm": 68.56127166748047, | |
| "learning_rate": 1.7313580246913582e-05, | |
| "loss": 0.4752, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6765432098765433, | |
| "grad_norm": 96.8653793334961, | |
| "learning_rate": 1.729382716049383e-05, | |
| "loss": 0.1921, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6814814814814815, | |
| "grad_norm": 139.44691467285156, | |
| "learning_rate": 1.7274074074074075e-05, | |
| "loss": 0.2266, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6864197530864198, | |
| "grad_norm": 7.88108491897583, | |
| "learning_rate": 1.725432098765432e-05, | |
| "loss": 0.1687, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.691358024691358, | |
| "grad_norm": 61.542091369628906, | |
| "learning_rate": 1.7234567901234568e-05, | |
| "loss": 0.1411, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6962962962962963, | |
| "grad_norm": 0.7576116919517517, | |
| "learning_rate": 1.7214814814814814e-05, | |
| "loss": 0.4797, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7012345679012346, | |
| "grad_norm": 14.038137435913086, | |
| "learning_rate": 1.7195061728395064e-05, | |
| "loss": 0.0933, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7061728395061728, | |
| "grad_norm": 46.00447463989258, | |
| "learning_rate": 1.717530864197531e-05, | |
| "loss": 0.3333, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 180.21914672851562, | |
| "learning_rate": 1.7155555555555557e-05, | |
| "loss": 0.4032, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7160493827160493, | |
| "grad_norm": 211.60653686523438, | |
| "learning_rate": 1.7135802469135806e-05, | |
| "loss": 0.3602, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7209876543209877, | |
| "grad_norm": 10.442931175231934, | |
| "learning_rate": 1.7116049382716053e-05, | |
| "loss": 0.3413, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.725925925925926, | |
| "grad_norm": 54.73400115966797, | |
| "learning_rate": 1.70962962962963e-05, | |
| "loss": 0.1263, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7308641975308642, | |
| "grad_norm": 7.259425163269043, | |
| "learning_rate": 1.7076543209876542e-05, | |
| "loss": 0.2431, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7358024691358025, | |
| "grad_norm": 96.37651824951172, | |
| "learning_rate": 1.7056790123456792e-05, | |
| "loss": 0.5599, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 9.702010154724121, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 0.3493, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.745679012345679, | |
| "grad_norm": 29.10769271850586, | |
| "learning_rate": 1.7017283950617285e-05, | |
| "loss": 0.3369, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7506172839506173, | |
| "grad_norm": 77.2637939453125, | |
| "learning_rate": 1.699753086419753e-05, | |
| "loss": 0.5669, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7555555555555555, | |
| "grad_norm": 0.2619607150554657, | |
| "learning_rate": 1.697777777777778e-05, | |
| "loss": 0.2248, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7604938271604939, | |
| "grad_norm": 49.25140380859375, | |
| "learning_rate": 1.6958024691358027e-05, | |
| "loss": 0.1465, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7654320987654321, | |
| "grad_norm": 1.6038424968719482, | |
| "learning_rate": 1.6938271604938274e-05, | |
| "loss": 0.2798, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7703703703703704, | |
| "grad_norm": 0.2095940262079239, | |
| "learning_rate": 1.691851851851852e-05, | |
| "loss": 0.359, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7753086419753087, | |
| "grad_norm": 53.154632568359375, | |
| "learning_rate": 1.6898765432098766e-05, | |
| "loss": 0.1937, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7802469135802469, | |
| "grad_norm": 6.8274006843566895, | |
| "learning_rate": 1.6879012345679013e-05, | |
| "loss": 0.2881, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7851851851851852, | |
| "grad_norm": 115.4723892211914, | |
| "learning_rate": 1.685925925925926e-05, | |
| "loss": 0.2592, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7901234567901234, | |
| "grad_norm": 0.015067143365740776, | |
| "learning_rate": 1.683950617283951e-05, | |
| "loss": 0.3331, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7950617283950617, | |
| "grad_norm": 28.81291961669922, | |
| "learning_rate": 1.6819753086419755e-05, | |
| "loss": 0.5361, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.010893706232309341, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.1873, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8049382716049382, | |
| "grad_norm": 1.351131796836853, | |
| "learning_rate": 1.6780246913580248e-05, | |
| "loss": 0.2294, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8098765432098766, | |
| "grad_norm": 60.61597442626953, | |
| "learning_rate": 1.6760493827160495e-05, | |
| "loss": 0.2917, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 11.661639213562012, | |
| "learning_rate": 1.674074074074074e-05, | |
| "loss": 0.4087, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8197530864197531, | |
| "grad_norm": 251.9644012451172, | |
| "learning_rate": 1.6720987654320987e-05, | |
| "loss": 0.2226, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8246913580246914, | |
| "grad_norm": 7.840044975280762, | |
| "learning_rate": 1.6701234567901237e-05, | |
| "loss": 0.5515, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8296296296296296, | |
| "grad_norm": 0.08511721342802048, | |
| "learning_rate": 1.6681481481481484e-05, | |
| "loss": 0.2206, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8345679012345679, | |
| "grad_norm": 19.307905197143555, | |
| "learning_rate": 1.666172839506173e-05, | |
| "loss": 0.2081, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8395061728395061, | |
| "grad_norm": 1.045444130897522, | |
| "learning_rate": 1.6641975308641976e-05, | |
| "loss": 0.1815, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8444444444444444, | |
| "grad_norm": 5.953945636749268, | |
| "learning_rate": 1.6622222222222223e-05, | |
| "loss": 0.2312, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8493827160493828, | |
| "grad_norm": 5.905419826507568, | |
| "learning_rate": 1.660246913580247e-05, | |
| "loss": 0.1001, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.854320987654321, | |
| "grad_norm": 118.84114837646484, | |
| "learning_rate": 1.6582716049382715e-05, | |
| "loss": 0.2355, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8592592592592593, | |
| "grad_norm": 27.624740600585938, | |
| "learning_rate": 1.6562962962962965e-05, | |
| "loss": 0.1446, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8641975308641975, | |
| "grad_norm": 126.23757934570312, | |
| "learning_rate": 1.654320987654321e-05, | |
| "loss": 0.2595, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8691358024691358, | |
| "grad_norm": 2.478506326675415, | |
| "learning_rate": 1.6523456790123458e-05, | |
| "loss": 0.0814, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8740740740740741, | |
| "grad_norm": 42.80133819580078, | |
| "learning_rate": 1.6503703703703704e-05, | |
| "loss": 0.1934, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8790123456790123, | |
| "grad_norm": 0.015840064734220505, | |
| "learning_rate": 1.648395061728395e-05, | |
| "loss": 0.2246, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8839506172839506, | |
| "grad_norm": 112.66703796386719, | |
| "learning_rate": 1.6464197530864197e-05, | |
| "loss": 0.2628, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 33.77766036987305, | |
| "learning_rate": 1.6444444444444444e-05, | |
| "loss": 0.3193, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8938271604938272, | |
| "grad_norm": 236.83761596679688, | |
| "learning_rate": 1.6424691358024693e-05, | |
| "loss": 0.3724, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8987654320987655, | |
| "grad_norm": 57.66241455078125, | |
| "learning_rate": 1.640493827160494e-05, | |
| "loss": 0.3194, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9037037037037037, | |
| "grad_norm": 142.6712646484375, | |
| "learning_rate": 1.6385185185185186e-05, | |
| "loss": 0.2389, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.908641975308642, | |
| "grad_norm": 0.11197575181722641, | |
| "learning_rate": 1.6365432098765433e-05, | |
| "loss": 0.268, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9135802469135802, | |
| "grad_norm": 407.26885986328125, | |
| "learning_rate": 1.6345679012345682e-05, | |
| "loss": 0.2186, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9185185185185185, | |
| "grad_norm": 0.057163987308740616, | |
| "learning_rate": 1.632592592592593e-05, | |
| "loss": 0.4509, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9234567901234568, | |
| "grad_norm": 66.4487075805664, | |
| "learning_rate": 1.6306172839506175e-05, | |
| "loss": 0.3745, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.928395061728395, | |
| "grad_norm": 115.2850570678711, | |
| "learning_rate": 1.628641975308642e-05, | |
| "loss": 0.4606, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 66.02615356445312, | |
| "learning_rate": 1.6266666666666668e-05, | |
| "loss": 0.2206, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.9382716049382716, | |
| "grad_norm": 2.386338949203491, | |
| "learning_rate": 1.6246913580246914e-05, | |
| "loss": 0.364, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9432098765432099, | |
| "grad_norm": 57.060977935791016, | |
| "learning_rate": 1.622716049382716e-05, | |
| "loss": 0.2837, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9481481481481482, | |
| "grad_norm": 0.7722509503364563, | |
| "learning_rate": 1.620740740740741e-05, | |
| "loss": 0.6167, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.9530864197530864, | |
| "grad_norm": 0.762596845626831, | |
| "learning_rate": 1.6187654320987657e-05, | |
| "loss": 0.1728, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9580246913580247, | |
| "grad_norm": 40.202091217041016, | |
| "learning_rate": 1.6167901234567903e-05, | |
| "loss": 0.2449, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 57.35947799682617, | |
| "learning_rate": 1.614814814814815e-05, | |
| "loss": 0.4488, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9679012345679012, | |
| "grad_norm": 68.08243560791016, | |
| "learning_rate": 1.6128395061728396e-05, | |
| "loss": 0.3488, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9728395061728395, | |
| "grad_norm": 1.9619942903518677, | |
| "learning_rate": 1.6108641975308642e-05, | |
| "loss": 0.2035, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 0.8691776990890503, | |
| "learning_rate": 1.608888888888889e-05, | |
| "loss": 0.2162, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9827160493827161, | |
| "grad_norm": 0.5446602702140808, | |
| "learning_rate": 1.606913580246914e-05, | |
| "loss": 0.4364, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 10.081711769104004, | |
| "learning_rate": 1.6049382716049385e-05, | |
| "loss": 0.2213, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9925925925925926, | |
| "grad_norm": 0.02493743598461151, | |
| "learning_rate": 1.602962962962963e-05, | |
| "loss": 0.0608, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9975308641975309, | |
| "grad_norm": 2.9489526748657227, | |
| "learning_rate": 1.6009876543209878e-05, | |
| "loss": 0.3004, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9677777777777777, | |
| "eval_loss": 0.11802458763122559, | |
| "eval_runtime": 32.902, | |
| "eval_samples_per_second": 164.124, | |
| "eval_steps_per_second": 20.515, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.002469135802469, | |
| "grad_norm": 138.59349060058594, | |
| "learning_rate": 1.5990123456790124e-05, | |
| "loss": 0.2046, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0074074074074073, | |
| "grad_norm": 0.05510491877794266, | |
| "learning_rate": 1.597037037037037e-05, | |
| "loss": 0.1356, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0123456790123457, | |
| "grad_norm": 14.264396667480469, | |
| "learning_rate": 1.5950617283950617e-05, | |
| "loss": 0.1624, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.017283950617284, | |
| "grad_norm": 0.9380566477775574, | |
| "learning_rate": 1.5930864197530867e-05, | |
| "loss": 0.2289, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.0222222222222221, | |
| "grad_norm": 0.017738979309797287, | |
| "learning_rate": 1.5911111111111113e-05, | |
| "loss": 0.4301, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.0271604938271606, | |
| "grad_norm": 0.030082279816269875, | |
| "learning_rate": 1.589135802469136e-05, | |
| "loss": 0.1049, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.0320987654320988, | |
| "grad_norm": 0.28669413924217224, | |
| "learning_rate": 1.5871604938271606e-05, | |
| "loss": 0.1245, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.037037037037037, | |
| "grad_norm": 7.697299003601074, | |
| "learning_rate": 1.5851851851851852e-05, | |
| "loss": 0.6147, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.0419753086419754, | |
| "grad_norm": 99.23163604736328, | |
| "learning_rate": 1.58320987654321e-05, | |
| "loss": 0.1613, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.0469135802469136, | |
| "grad_norm": 52.61363220214844, | |
| "learning_rate": 1.5812345679012345e-05, | |
| "loss": 0.3256, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.0518518518518518, | |
| "grad_norm": 87.68861389160156, | |
| "learning_rate": 1.5792592592592595e-05, | |
| "loss": 0.2956, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.05679012345679, | |
| "grad_norm": 30.490577697753906, | |
| "learning_rate": 1.577283950617284e-05, | |
| "loss": 0.2226, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.0617283950617284, | |
| "grad_norm": 1.5879323482513428, | |
| "learning_rate": 1.5753086419753088e-05, | |
| "loss": 0.3573, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 0.36435502767562866, | |
| "learning_rate": 1.5733333333333334e-05, | |
| "loss": 0.167, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.0716049382716049, | |
| "grad_norm": 0.3206441104412079, | |
| "learning_rate": 1.5713580246913584e-05, | |
| "loss": 0.2698, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.0765432098765433, | |
| "grad_norm": 17.28899574279785, | |
| "learning_rate": 1.569382716049383e-05, | |
| "loss": 0.383, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.0814814814814815, | |
| "grad_norm": 31.972209930419922, | |
| "learning_rate": 1.5674074074074073e-05, | |
| "loss": 0.2109, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.0864197530864197, | |
| "grad_norm": 35.79594802856445, | |
| "learning_rate": 1.565432098765432e-05, | |
| "loss": 0.2666, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.0913580246913581, | |
| "grad_norm": 3.0720813274383545, | |
| "learning_rate": 1.563456790123457e-05, | |
| "loss": 0.0663, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.0962962962962963, | |
| "grad_norm": 46.16384506225586, | |
| "learning_rate": 1.5614814814814816e-05, | |
| "loss": 0.1775, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.1012345679012345, | |
| "grad_norm": 3.8352577686309814, | |
| "learning_rate": 1.5595061728395062e-05, | |
| "loss": 0.1719, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.106172839506173, | |
| "grad_norm": 24.50127601623535, | |
| "learning_rate": 1.5575308641975312e-05, | |
| "loss": 0.4285, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 45.77573776245117, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 0.3723, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.1160493827160494, | |
| "grad_norm": 51.60211181640625, | |
| "learning_rate": 1.5535802469135805e-05, | |
| "loss": 0.1194, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1209876543209876, | |
| "grad_norm": 48.674163818359375, | |
| "learning_rate": 1.551604938271605e-05, | |
| "loss": 0.3845, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.125925925925926, | |
| "grad_norm": 0.43790122866630554, | |
| "learning_rate": 1.5496296296296298e-05, | |
| "loss": 0.1622, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.1308641975308642, | |
| "grad_norm": 0.4926997125148773, | |
| "learning_rate": 1.5476543209876544e-05, | |
| "loss": 0.1739, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.1358024691358024, | |
| "grad_norm": 27.840295791625977, | |
| "learning_rate": 1.545679012345679e-05, | |
| "loss": 0.1265, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1407407407407408, | |
| "grad_norm": 148.9844207763672, | |
| "learning_rate": 1.543703703703704e-05, | |
| "loss": 0.2187, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.145679012345679, | |
| "grad_norm": 63.56736373901367, | |
| "learning_rate": 1.5417283950617286e-05, | |
| "loss": 0.2227, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.1506172839506172, | |
| "grad_norm": 32.42955780029297, | |
| "learning_rate": 1.5397530864197533e-05, | |
| "loss": 0.1863, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.1555555555555554, | |
| "grad_norm": 72.6145248413086, | |
| "learning_rate": 1.537777777777778e-05, | |
| "loss": 0.3744, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.1604938271604939, | |
| "grad_norm": 4.558436393737793, | |
| "learning_rate": 1.5358024691358026e-05, | |
| "loss": 0.2796, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.165432098765432, | |
| "grad_norm": 0.5049192905426025, | |
| "learning_rate": 1.5338271604938272e-05, | |
| "loss": 0.1426, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.1703703703703703, | |
| "grad_norm": 0.11132398992776871, | |
| "learning_rate": 1.531851851851852e-05, | |
| "loss": 0.1231, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.1753086419753087, | |
| "grad_norm": 26.840200424194336, | |
| "learning_rate": 1.5298765432098768e-05, | |
| "loss": 0.2786, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.180246913580247, | |
| "grad_norm": 0.15319669246673584, | |
| "learning_rate": 1.5279012345679015e-05, | |
| "loss": 0.5859, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.1851851851851851, | |
| "grad_norm": 39.83156204223633, | |
| "learning_rate": 1.525925925925926e-05, | |
| "loss": 0.4391, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1901234567901235, | |
| "grad_norm": 0.38840270042419434, | |
| "learning_rate": 1.5239506172839507e-05, | |
| "loss": 0.1187, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.1950617283950618, | |
| "grad_norm": 0.025911659002304077, | |
| "learning_rate": 1.5219753086419755e-05, | |
| "loss": 0.0865, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 81.05162048339844, | |
| "learning_rate": 1.5200000000000002e-05, | |
| "loss": 0.3289, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.2049382716049384, | |
| "grad_norm": 72.2834701538086, | |
| "learning_rate": 1.5180246913580248e-05, | |
| "loss": 0.5105, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.2098765432098766, | |
| "grad_norm": 0.06509275734424591, | |
| "learning_rate": 1.5160493827160495e-05, | |
| "loss": 0.2435, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.2148148148148148, | |
| "grad_norm": 12.417915344238281, | |
| "learning_rate": 1.5140740740740743e-05, | |
| "loss": 0.3175, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.219753086419753, | |
| "grad_norm": 64.59101104736328, | |
| "learning_rate": 1.5120987654320989e-05, | |
| "loss": 0.4517, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.2246913580246914, | |
| "grad_norm": 43.42831802368164, | |
| "learning_rate": 1.5101234567901236e-05, | |
| "loss": 0.1514, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.2296296296296296, | |
| "grad_norm": 0.5973836779594421, | |
| "learning_rate": 1.5081481481481484e-05, | |
| "loss": 0.1027, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.2345679012345678, | |
| "grad_norm": 41.84488296508789, | |
| "learning_rate": 1.506172839506173e-05, | |
| "loss": 0.2706, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2395061728395063, | |
| "grad_norm": 135.85255432128906, | |
| "learning_rate": 1.5041975308641976e-05, | |
| "loss": 0.204, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.2444444444444445, | |
| "grad_norm": 14.007678985595703, | |
| "learning_rate": 1.5022222222222223e-05, | |
| "loss": 0.4253, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.2493827160493827, | |
| "grad_norm": 34.2636833190918, | |
| "learning_rate": 1.5002469135802471e-05, | |
| "loss": 0.21, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.2543209876543209, | |
| "grad_norm": 19.363365173339844, | |
| "learning_rate": 1.4982716049382717e-05, | |
| "loss": 0.2031, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.2592592592592593, | |
| "grad_norm": 0.3058103919029236, | |
| "learning_rate": 1.4962962962962964e-05, | |
| "loss": 0.2789, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.2641975308641975, | |
| "grad_norm": 70.8534164428711, | |
| "learning_rate": 1.4943209876543212e-05, | |
| "loss": 0.4306, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.269135802469136, | |
| "grad_norm": 0.1311403512954712, | |
| "learning_rate": 1.4923456790123458e-05, | |
| "loss": 0.4098, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.2740740740740741, | |
| "grad_norm": 84.89444732666016, | |
| "learning_rate": 1.4903703703703705e-05, | |
| "loss": 0.2931, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.2790123456790123, | |
| "grad_norm": 0.9064738154411316, | |
| "learning_rate": 1.4883950617283951e-05, | |
| "loss": 0.3069, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.2839506172839505, | |
| "grad_norm": 0.491811603307724, | |
| "learning_rate": 1.4864197530864199e-05, | |
| "loss": 0.2636, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.2888888888888888, | |
| "grad_norm": 35.797969818115234, | |
| "learning_rate": 1.4844444444444445e-05, | |
| "loss": 0.2673, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.2938271604938272, | |
| "grad_norm": 0.0416533537209034, | |
| "learning_rate": 1.4824691358024692e-05, | |
| "loss": 0.0711, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.2987654320987654, | |
| "grad_norm": 4.76767635345459, | |
| "learning_rate": 1.480493827160494e-05, | |
| "loss": 0.2506, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.3037037037037038, | |
| "grad_norm": 32.206031799316406, | |
| "learning_rate": 1.4785185185185186e-05, | |
| "loss": 0.453, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.308641975308642, | |
| "grad_norm": 131.6813201904297, | |
| "learning_rate": 1.4765432098765433e-05, | |
| "loss": 0.1793, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.3135802469135802, | |
| "grad_norm": 7.119224548339844, | |
| "learning_rate": 1.4745679012345679e-05, | |
| "loss": 0.0779, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.3185185185185184, | |
| "grad_norm": 139.8772735595703, | |
| "learning_rate": 1.4725925925925927e-05, | |
| "loss": 0.4545, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.3234567901234568, | |
| "grad_norm": 0.4141978919506073, | |
| "learning_rate": 1.4706172839506174e-05, | |
| "loss": 0.2352, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.328395061728395, | |
| "grad_norm": 42.8140869140625, | |
| "learning_rate": 1.468641975308642e-05, | |
| "loss": 0.1611, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 16.763948440551758, | |
| "learning_rate": 1.4666666666666666e-05, | |
| "loss": 0.0735, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3382716049382717, | |
| "grad_norm": 140.94900512695312, | |
| "learning_rate": 1.4646913580246916e-05, | |
| "loss": 0.1474, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.34320987654321, | |
| "grad_norm": 0.9029823541641235, | |
| "learning_rate": 1.4627160493827162e-05, | |
| "loss": 0.0437, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.348148148148148, | |
| "grad_norm": 46.620086669921875, | |
| "learning_rate": 1.4607407407407407e-05, | |
| "loss": 0.1856, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.3530864197530863, | |
| "grad_norm": 64.09046173095703, | |
| "learning_rate": 1.4587654320987657e-05, | |
| "loss": 0.1532, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.3580246913580247, | |
| "grad_norm": 104.23167419433594, | |
| "learning_rate": 1.4567901234567903e-05, | |
| "loss": 0.2386, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.362962962962963, | |
| "grad_norm": 0.36242911219596863, | |
| "learning_rate": 1.454814814814815e-05, | |
| "loss": 0.4831, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.3679012345679014, | |
| "grad_norm": 0.5484885573387146, | |
| "learning_rate": 1.4528395061728396e-05, | |
| "loss": 0.0836, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.3728395061728396, | |
| "grad_norm": 51.26658630371094, | |
| "learning_rate": 1.4508641975308644e-05, | |
| "loss": 0.1736, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.3777777777777778, | |
| "grad_norm": 20.211082458496094, | |
| "learning_rate": 1.448888888888889e-05, | |
| "loss": 0.3063, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.382716049382716, | |
| "grad_norm": 0.7425023913383484, | |
| "learning_rate": 1.4469135802469137e-05, | |
| "loss": 0.1025, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.3876543209876544, | |
| "grad_norm": 159.22314453125, | |
| "learning_rate": 1.4449382716049385e-05, | |
| "loss": 0.2052, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.3925925925925926, | |
| "grad_norm": 47.53805923461914, | |
| "learning_rate": 1.4429629629629631e-05, | |
| "loss": 0.1378, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.3975308641975308, | |
| "grad_norm": 0.2027841955423355, | |
| "learning_rate": 1.4409876543209878e-05, | |
| "loss": 0.0507, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.4024691358024692, | |
| "grad_norm": 0.18290477991104126, | |
| "learning_rate": 1.4390123456790124e-05, | |
| "loss": 0.2193, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.4074074074074074, | |
| "grad_norm": 126.16277313232422, | |
| "learning_rate": 1.4370370370370372e-05, | |
| "loss": 0.3206, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4123456790123456, | |
| "grad_norm": 127.88780975341797, | |
| "learning_rate": 1.4350617283950619e-05, | |
| "loss": 0.4142, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.4172839506172838, | |
| "grad_norm": 3.724766254425049, | |
| "learning_rate": 1.4330864197530865e-05, | |
| "loss": 0.0783, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.4222222222222223, | |
| "grad_norm": 199.94883728027344, | |
| "learning_rate": 1.4311111111111111e-05, | |
| "loss": 0.3896, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.4271604938271605, | |
| "grad_norm": 116.74020385742188, | |
| "learning_rate": 1.429135802469136e-05, | |
| "loss": 0.2982, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.4320987654320987, | |
| "grad_norm": 2.576690673828125, | |
| "learning_rate": 1.4271604938271606e-05, | |
| "loss": 0.1678, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.4370370370370371, | |
| "grad_norm": 95.74549865722656, | |
| "learning_rate": 1.4251851851851852e-05, | |
| "loss": 0.2808, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.4419753086419753, | |
| "grad_norm": 43.24068069458008, | |
| "learning_rate": 1.42320987654321e-05, | |
| "loss": 0.3589, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.4469135802469135, | |
| "grad_norm": 40.1359977722168, | |
| "learning_rate": 1.4212345679012347e-05, | |
| "loss": 0.1566, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.4518518518518517, | |
| "grad_norm": 7.546663284301758, | |
| "learning_rate": 1.4192592592592593e-05, | |
| "loss": 0.1562, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.4567901234567902, | |
| "grad_norm": 117.94816589355469, | |
| "learning_rate": 1.417283950617284e-05, | |
| "loss": 0.3526, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.4617283950617284, | |
| "grad_norm": 107.50965881347656, | |
| "learning_rate": 1.4153086419753088e-05, | |
| "loss": 0.2148, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "grad_norm": 16.908262252807617, | |
| "learning_rate": 1.4133333333333334e-05, | |
| "loss": 0.451, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.471604938271605, | |
| "grad_norm": 53.356773376464844, | |
| "learning_rate": 1.411358024691358e-05, | |
| "loss": 0.3616, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.4765432098765432, | |
| "grad_norm": 44.207054138183594, | |
| "learning_rate": 1.4093827160493829e-05, | |
| "loss": 0.0903, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 78.0193862915039, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 0.2323, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4864197530864198, | |
| "grad_norm": 1.2068320512771606, | |
| "learning_rate": 1.4054320987654321e-05, | |
| "loss": 0.2748, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.491358024691358, | |
| "grad_norm": 15.009058952331543, | |
| "learning_rate": 1.4034567901234568e-05, | |
| "loss": 0.2607, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.4962962962962962, | |
| "grad_norm": 1.3016469478607178, | |
| "learning_rate": 1.4014814814814816e-05, | |
| "loss": 0.0402, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.5012345679012347, | |
| "grad_norm": 64.81990814208984, | |
| "learning_rate": 1.3995061728395062e-05, | |
| "loss": 0.4051, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.5061728395061729, | |
| "grad_norm": 18.911441802978516, | |
| "learning_rate": 1.3975308641975309e-05, | |
| "loss": 0.2663, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.511111111111111, | |
| "grad_norm": 89.58609771728516, | |
| "learning_rate": 1.3955555555555558e-05, | |
| "loss": 0.2006, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.5160493827160493, | |
| "grad_norm": 84.76557922363281, | |
| "learning_rate": 1.3935802469135805e-05, | |
| "loss": 0.1644, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.5209876543209877, | |
| "grad_norm": 0.690521240234375, | |
| "learning_rate": 1.391604938271605e-05, | |
| "loss": 0.3695, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.525925925925926, | |
| "grad_norm": 0.9079038500785828, | |
| "learning_rate": 1.3896296296296296e-05, | |
| "loss": 0.1316, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.5308641975308643, | |
| "grad_norm": 0.0010949569987133145, | |
| "learning_rate": 1.3876543209876546e-05, | |
| "loss": 0.1599, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.5358024691358025, | |
| "grad_norm": 0.017062200233340263, | |
| "learning_rate": 1.3856790123456792e-05, | |
| "loss": 0.2102, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.5407407407407407, | |
| "grad_norm": 54.44521713256836, | |
| "learning_rate": 1.3837037037037038e-05, | |
| "loss": 0.2856, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.545679012345679, | |
| "grad_norm": 124.57701873779297, | |
| "learning_rate": 1.3817283950617285e-05, | |
| "loss": 0.6973, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.5506172839506172, | |
| "grad_norm": 73.95056915283203, | |
| "learning_rate": 1.3797530864197533e-05, | |
| "loss": 0.134, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 114.4755630493164, | |
| "learning_rate": 1.377777777777778e-05, | |
| "loss": 0.4007, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.5604938271604938, | |
| "grad_norm": 5.708268165588379, | |
| "learning_rate": 1.3758024691358026e-05, | |
| "loss": 0.2191, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.5654320987654322, | |
| "grad_norm": 39.35977554321289, | |
| "learning_rate": 1.3738271604938274e-05, | |
| "loss": 0.1217, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.5703703703703704, | |
| "grad_norm": 1.868407130241394, | |
| "learning_rate": 1.371851851851852e-05, | |
| "loss": 0.1177, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.5753086419753086, | |
| "grad_norm": 7.092827320098877, | |
| "learning_rate": 1.3698765432098767e-05, | |
| "loss": 0.1979, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.5802469135802468, | |
| "grad_norm": 0.005435746628791094, | |
| "learning_rate": 1.3679012345679013e-05, | |
| "loss": 0.1564, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.585185185185185, | |
| "grad_norm": 80.7311019897461, | |
| "learning_rate": 1.3659259259259261e-05, | |
| "loss": 0.2003, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.5901234567901235, | |
| "grad_norm": 0.9620011448860168, | |
| "learning_rate": 1.3639506172839507e-05, | |
| "loss": 0.1261, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.5950617283950619, | |
| "grad_norm": 95.69831085205078, | |
| "learning_rate": 1.3619753086419754e-05, | |
| "loss": 0.1809, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 61.438812255859375, | |
| "learning_rate": 1.3600000000000002e-05, | |
| "loss": 0.506, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.6049382716049383, | |
| "grad_norm": 325.63250732421875, | |
| "learning_rate": 1.3580246913580248e-05, | |
| "loss": 0.3093, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.6098765432098765, | |
| "grad_norm": 17.00379180908203, | |
| "learning_rate": 1.3560493827160495e-05, | |
| "loss": 0.2099, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6148148148148147, | |
| "grad_norm": 100.260498046875, | |
| "learning_rate": 1.3540740740740741e-05, | |
| "loss": 0.6063, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.6197530864197531, | |
| "grad_norm": 0.09998781979084015, | |
| "learning_rate": 1.352098765432099e-05, | |
| "loss": 0.3561, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.6246913580246913, | |
| "grad_norm": 0.34626302123069763, | |
| "learning_rate": 1.3501234567901236e-05, | |
| "loss": 0.0074, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.6296296296296298, | |
| "grad_norm": 0.034202978014945984, | |
| "learning_rate": 1.3481481481481482e-05, | |
| "loss": 0.4788, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.634567901234568, | |
| "grad_norm": 18.52402687072754, | |
| "learning_rate": 1.346172839506173e-05, | |
| "loss": 0.1834, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.6395061728395062, | |
| "grad_norm": 1.5653138160705566, | |
| "learning_rate": 1.3441975308641976e-05, | |
| "loss": 0.354, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.6444444444444444, | |
| "grad_norm": 69.99710845947266, | |
| "learning_rate": 1.3422222222222223e-05, | |
| "loss": 0.3642, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.6493827160493826, | |
| "grad_norm": 50.67994689941406, | |
| "learning_rate": 1.340246913580247e-05, | |
| "loss": 0.1864, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.654320987654321, | |
| "grad_norm": 0.31549400091171265, | |
| "learning_rate": 1.3382716049382717e-05, | |
| "loss": 0.3157, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.6592592592592592, | |
| "grad_norm": 111.24998474121094, | |
| "learning_rate": 1.3362962962962964e-05, | |
| "loss": 0.6087, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.6641975308641976, | |
| "grad_norm": 23.009380340576172, | |
| "learning_rate": 1.334320987654321e-05, | |
| "loss": 0.1866, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.6691358024691358, | |
| "grad_norm": 88.22378540039062, | |
| "learning_rate": 1.3323456790123456e-05, | |
| "loss": 0.1728, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.674074074074074, | |
| "grad_norm": 0.3229973316192627, | |
| "learning_rate": 1.3303703703703705e-05, | |
| "loss": 0.4118, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.6790123456790123, | |
| "grad_norm": 5.422463893890381, | |
| "learning_rate": 1.3283950617283951e-05, | |
| "loss": 0.2223, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.6839506172839505, | |
| "grad_norm": 0.07091034948825836, | |
| "learning_rate": 1.3264197530864197e-05, | |
| "loss": 0.5162, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.6888888888888889, | |
| "grad_norm": 0.41538941860198975, | |
| "learning_rate": 1.3244444444444447e-05, | |
| "loss": 0.4052, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.6938271604938273, | |
| "grad_norm": 7.8336181640625, | |
| "learning_rate": 1.3224691358024694e-05, | |
| "loss": 0.1862, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.6987654320987655, | |
| "grad_norm": 7.325730800628662, | |
| "learning_rate": 1.3204938271604938e-05, | |
| "loss": 0.1988, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.7037037037037037, | |
| "grad_norm": 39.67108154296875, | |
| "learning_rate": 1.3185185185185185e-05, | |
| "loss": 0.3016, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.708641975308642, | |
| "grad_norm": 0.42901355028152466, | |
| "learning_rate": 1.3165432098765434e-05, | |
| "loss": 0.008, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.7135802469135801, | |
| "grad_norm": 99.74118041992188, | |
| "learning_rate": 1.314567901234568e-05, | |
| "loss": 0.3562, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.7185185185185186, | |
| "grad_norm": 41.35346221923828, | |
| "learning_rate": 1.3125925925925927e-05, | |
| "loss": 0.2514, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.7234567901234568, | |
| "grad_norm": 59.84602355957031, | |
| "learning_rate": 1.3106172839506175e-05, | |
| "loss": 0.3048, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.7283950617283952, | |
| "grad_norm": 2.039802312850952, | |
| "learning_rate": 1.3086419753086422e-05, | |
| "loss": 0.2926, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "grad_norm": 66.14095306396484, | |
| "learning_rate": 1.3066666666666668e-05, | |
| "loss": 0.3515, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.7382716049382716, | |
| "grad_norm": 5.856687068939209, | |
| "learning_rate": 1.3046913580246914e-05, | |
| "loss": 0.2199, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.7432098765432098, | |
| "grad_norm": 89.60210418701172, | |
| "learning_rate": 1.3027160493827163e-05, | |
| "loss": 0.3104, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.748148148148148, | |
| "grad_norm": 2.4179534912109375, | |
| "learning_rate": 1.3007407407407409e-05, | |
| "loss": 0.2304, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.7530864197530864, | |
| "grad_norm": 39.764408111572266, | |
| "learning_rate": 1.2987654320987655e-05, | |
| "loss": 0.3049, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.7580246913580246, | |
| "grad_norm": 66.1130599975586, | |
| "learning_rate": 1.2967901234567903e-05, | |
| "loss": 0.1726, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.762962962962963, | |
| "grad_norm": 33.54975509643555, | |
| "learning_rate": 1.294814814814815e-05, | |
| "loss": 0.2627, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.7679012345679013, | |
| "grad_norm": 0.5882616639137268, | |
| "learning_rate": 1.2928395061728396e-05, | |
| "loss": 0.1133, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.7728395061728395, | |
| "grad_norm": 0.09102596342563629, | |
| "learning_rate": 1.2908641975308643e-05, | |
| "loss": 0.1391, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 0.2745858430862427, | |
| "learning_rate": 1.288888888888889e-05, | |
| "loss": 0.1178, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.7827160493827159, | |
| "grad_norm": 0.22387881577014923, | |
| "learning_rate": 1.2869135802469137e-05, | |
| "loss": 0.2893, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.7876543209876543, | |
| "grad_norm": 0.3061552047729492, | |
| "learning_rate": 1.2849382716049383e-05, | |
| "loss": 0.2718, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.7925925925925927, | |
| "grad_norm": 40.53445053100586, | |
| "learning_rate": 1.282962962962963e-05, | |
| "loss": 0.0972, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.797530864197531, | |
| "grad_norm": 0.2346036285161972, | |
| "learning_rate": 1.2809876543209878e-05, | |
| "loss": 0.1796, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.8024691358024691, | |
| "grad_norm": 84.19086456298828, | |
| "learning_rate": 1.2790123456790124e-05, | |
| "loss": 0.2555, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.8074074074074074, | |
| "grad_norm": 26.573976516723633, | |
| "learning_rate": 1.277037037037037e-05, | |
| "loss": 0.1533, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.8123456790123456, | |
| "grad_norm": 0.0031530587002635, | |
| "learning_rate": 1.2750617283950619e-05, | |
| "loss": 0.1559, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.817283950617284, | |
| "grad_norm": 72.7174072265625, | |
| "learning_rate": 1.2730864197530865e-05, | |
| "loss": 0.1383, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.8222222222222222, | |
| "grad_norm": 0.07971396297216415, | |
| "learning_rate": 1.2711111111111112e-05, | |
| "loss": 0.3888, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.8271604938271606, | |
| "grad_norm": 82.53282165527344, | |
| "learning_rate": 1.2691358024691358e-05, | |
| "loss": 0.113, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.8320987654320988, | |
| "grad_norm": 0.34782519936561584, | |
| "learning_rate": 1.2671604938271606e-05, | |
| "loss": 0.2208, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.837037037037037, | |
| "grad_norm": 6.04480504989624, | |
| "learning_rate": 1.2651851851851852e-05, | |
| "loss": 0.3451, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.8419753086419752, | |
| "grad_norm": 15.001103401184082, | |
| "learning_rate": 1.2632098765432099e-05, | |
| "loss": 0.0905, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.8469135802469134, | |
| "grad_norm": 47.090877532958984, | |
| "learning_rate": 1.2612345679012347e-05, | |
| "loss": 0.2327, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.032411132007837296, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 0.268, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.8567901234567903, | |
| "grad_norm": 54.430667877197266, | |
| "learning_rate": 1.257283950617284e-05, | |
| "loss": 0.213, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.8617283950617285, | |
| "grad_norm": 0.37125247716903687, | |
| "learning_rate": 1.2553086419753086e-05, | |
| "loss": 0.1433, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 0.05495602637529373, | |
| "learning_rate": 1.2533333333333336e-05, | |
| "loss": 0.3747, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.871604938271605, | |
| "grad_norm": 35.28487777709961, | |
| "learning_rate": 1.2513580246913582e-05, | |
| "loss": 0.503, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.876543209876543, | |
| "grad_norm": 60.75400924682617, | |
| "learning_rate": 1.2493827160493827e-05, | |
| "loss": 0.1602, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.8814814814814815, | |
| "grad_norm": 137.60702514648438, | |
| "learning_rate": 1.2474074074074073e-05, | |
| "loss": 0.2931, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.8864197530864197, | |
| "grad_norm": 60.11787796020508, | |
| "learning_rate": 1.2454320987654323e-05, | |
| "loss": 0.354, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.8913580246913582, | |
| "grad_norm": 19.017499923706055, | |
| "learning_rate": 1.243456790123457e-05, | |
| "loss": 0.1684, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.8962962962962964, | |
| "grad_norm": 43.31821823120117, | |
| "learning_rate": 1.2414814814814816e-05, | |
| "loss": 0.1728, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.9012345679012346, | |
| "grad_norm": 602.893798828125, | |
| "learning_rate": 1.2395061728395064e-05, | |
| "loss": 0.2077, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.9061728395061728, | |
| "grad_norm": 12.869080543518066, | |
| "learning_rate": 1.237530864197531e-05, | |
| "loss": 0.2419, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.911111111111111, | |
| "grad_norm": 0.9421246647834778, | |
| "learning_rate": 1.2355555555555557e-05, | |
| "loss": 0.3389, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.9160493827160494, | |
| "grad_norm": 3.65885591506958, | |
| "learning_rate": 1.2335802469135803e-05, | |
| "loss": 0.5007, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.9209876543209876, | |
| "grad_norm": 3.625490665435791, | |
| "learning_rate": 1.2316049382716051e-05, | |
| "loss": 0.1538, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.925925925925926, | |
| "grad_norm": 92.34613800048828, | |
| "learning_rate": 1.2296296296296298e-05, | |
| "loss": 0.4137, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.9308641975308642, | |
| "grad_norm": 0.5257686376571655, | |
| "learning_rate": 1.2276543209876544e-05, | |
| "loss": 0.2876, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.9358024691358025, | |
| "grad_norm": 0.39652788639068604, | |
| "learning_rate": 1.2256790123456792e-05, | |
| "loss": 0.254, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.9407407407407407, | |
| "grad_norm": 26.36481285095215, | |
| "learning_rate": 1.2237037037037039e-05, | |
| "loss": 0.271, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.9456790123456789, | |
| "grad_norm": 0.03053528629243374, | |
| "learning_rate": 1.2217283950617285e-05, | |
| "loss": 0.0742, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.9506172839506173, | |
| "grad_norm": 0.09434489160776138, | |
| "learning_rate": 1.2197530864197531e-05, | |
| "loss": 0.1895, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.9555555555555557, | |
| "grad_norm": 69.78058624267578, | |
| "learning_rate": 1.217777777777778e-05, | |
| "loss": 0.4513, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.960493827160494, | |
| "grad_norm": 0.07707086950540543, | |
| "learning_rate": 1.2158024691358026e-05, | |
| "loss": 0.054, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.9654320987654321, | |
| "grad_norm": 37.1689453125, | |
| "learning_rate": 1.2138271604938272e-05, | |
| "loss": 0.0594, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.9703703703703703, | |
| "grad_norm": 48.61039352416992, | |
| "learning_rate": 1.211851851851852e-05, | |
| "loss": 0.1572, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.9753086419753085, | |
| "grad_norm": 163.54615783691406, | |
| "learning_rate": 1.2098765432098767e-05, | |
| "loss": 0.1604, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.980246913580247, | |
| "grad_norm": 85.144775390625, | |
| "learning_rate": 1.2079012345679013e-05, | |
| "loss": 0.1157, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.9851851851851852, | |
| "grad_norm": 15.836172103881836, | |
| "learning_rate": 1.205925925925926e-05, | |
| "loss": 0.1904, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.9901234567901236, | |
| "grad_norm": 2.649322748184204, | |
| "learning_rate": 1.2039506172839508e-05, | |
| "loss": 0.2893, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.9950617283950618, | |
| "grad_norm": 1.9400321245193481, | |
| "learning_rate": 1.2019753086419754e-05, | |
| "loss": 0.2295, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 74.16377258300781, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.539, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9798148148148148, | |
| "eval_loss": 0.08341296017169952, | |
| "eval_runtime": 32.2756, | |
| "eval_samples_per_second": 167.309, | |
| "eval_steps_per_second": 20.914, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.004938271604938, | |
| "grad_norm": 0.23466235399246216, | |
| "learning_rate": 1.1980246913580247e-05, | |
| "loss": 0.0651, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.0098765432098764, | |
| "grad_norm": 10.602593421936035, | |
| "learning_rate": 1.1960493827160495e-05, | |
| "loss": 0.2293, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.0148148148148146, | |
| "grad_norm": 101.87135314941406, | |
| "learning_rate": 1.1940740740740741e-05, | |
| "loss": 0.3077, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.0197530864197533, | |
| "grad_norm": 27.52354621887207, | |
| "learning_rate": 1.1920987654320988e-05, | |
| "loss": 0.1848, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.0246913580246915, | |
| "grad_norm": 90.54155731201172, | |
| "learning_rate": 1.1901234567901236e-05, | |
| "loss": 0.2108, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.0296296296296297, | |
| "grad_norm": 0.018464339897036552, | |
| "learning_rate": 1.1881481481481482e-05, | |
| "loss": 0.1732, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.034567901234568, | |
| "grad_norm": 0.21476837992668152, | |
| "learning_rate": 1.1861728395061728e-05, | |
| "loss": 0.5227, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.039506172839506, | |
| "grad_norm": 95.82560729980469, | |
| "learning_rate": 1.1841975308641975e-05, | |
| "loss": 0.1769, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.0444444444444443, | |
| "grad_norm": 6.9548468589782715, | |
| "learning_rate": 1.1822222222222225e-05, | |
| "loss": 0.2134, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.049382716049383, | |
| "grad_norm": 80.2332763671875, | |
| "learning_rate": 1.180246913580247e-05, | |
| "loss": 0.2451, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.054320987654321, | |
| "grad_norm": 19.164928436279297, | |
| "learning_rate": 1.1782716049382716e-05, | |
| "loss": 0.1896, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.0592592592592593, | |
| "grad_norm": 0.12828746438026428, | |
| "learning_rate": 1.1762962962962965e-05, | |
| "loss": 0.077, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.0641975308641975, | |
| "grad_norm": 3.3232741355895996, | |
| "learning_rate": 1.1743209876543212e-05, | |
| "loss": 0.0855, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.0691358024691358, | |
| "grad_norm": 0.32502618432044983, | |
| "learning_rate": 1.1723456790123458e-05, | |
| "loss": 0.2269, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 1.072849154472351, | |
| "learning_rate": 1.1703703703703703e-05, | |
| "loss": 0.2473, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.079012345679012, | |
| "grad_norm": 3.3251664638519287, | |
| "learning_rate": 1.1683950617283953e-05, | |
| "loss": 0.2367, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.083950617283951, | |
| "grad_norm": 0.1870512068271637, | |
| "learning_rate": 1.1664197530864199e-05, | |
| "loss": 0.2782, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.088888888888889, | |
| "grad_norm": 3.8792381286621094, | |
| "learning_rate": 1.1644444444444446e-05, | |
| "loss": 0.1886, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.093827160493827, | |
| "grad_norm": 47.594451904296875, | |
| "learning_rate": 1.1624691358024694e-05, | |
| "loss": 0.3145, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.0987654320987654, | |
| "grad_norm": 158.525634765625, | |
| "learning_rate": 1.160493827160494e-05, | |
| "loss": 0.3143, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.1037037037037036, | |
| "grad_norm": 74.01322174072266, | |
| "learning_rate": 1.1585185185185186e-05, | |
| "loss": 0.1924, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.108641975308642, | |
| "grad_norm": 75.74314880371094, | |
| "learning_rate": 1.1565432098765433e-05, | |
| "loss": 0.3617, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.11358024691358, | |
| "grad_norm": 22.196048736572266, | |
| "learning_rate": 1.1545679012345681e-05, | |
| "loss": 0.2283, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.1185185185185187, | |
| "grad_norm": 0.7152767777442932, | |
| "learning_rate": 1.1525925925925927e-05, | |
| "loss": 0.3129, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.123456790123457, | |
| "grad_norm": 0.11401913315057755, | |
| "learning_rate": 1.1506172839506174e-05, | |
| "loss": 0.2689, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.128395061728395, | |
| "grad_norm": 52.53899002075195, | |
| "learning_rate": 1.148641975308642e-05, | |
| "loss": 0.0563, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 42.3081169128418, | |
| "learning_rate": 1.1466666666666668e-05, | |
| "loss": 0.2296, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.1382716049382715, | |
| "grad_norm": 10.208148002624512, | |
| "learning_rate": 1.1446913580246915e-05, | |
| "loss": 0.3501, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.1432098765432097, | |
| "grad_norm": 20.181745529174805, | |
| "learning_rate": 1.1427160493827161e-05, | |
| "loss": 0.0309, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 0.01720772311091423, | |
| "learning_rate": 1.1407407407407409e-05, | |
| "loss": 0.1887, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.1530864197530866, | |
| "grad_norm": 6.094252109527588, | |
| "learning_rate": 1.1387654320987655e-05, | |
| "loss": 0.0933, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.1580246913580248, | |
| "grad_norm": 0.02691926248371601, | |
| "learning_rate": 1.1367901234567902e-05, | |
| "loss": 0.1443, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.162962962962963, | |
| "grad_norm": 0.3429844081401825, | |
| "learning_rate": 1.1348148148148148e-05, | |
| "loss": 0.253, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.167901234567901, | |
| "grad_norm": 36.565834045410156, | |
| "learning_rate": 1.1328395061728396e-05, | |
| "loss": 0.3124, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.1728395061728394, | |
| "grad_norm": 0.1142088919878006, | |
| "learning_rate": 1.1308641975308643e-05, | |
| "loss": 0.2102, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.1777777777777776, | |
| "grad_norm": 1.0915874242782593, | |
| "learning_rate": 1.1288888888888889e-05, | |
| "loss": 0.117, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.1827160493827162, | |
| "grad_norm": 0.015154359862208366, | |
| "learning_rate": 1.1269135802469137e-05, | |
| "loss": 0.2591, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.1876543209876544, | |
| "grad_norm": 0.0378662571310997, | |
| "learning_rate": 1.1249382716049384e-05, | |
| "loss": 0.4314, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.1925925925925926, | |
| "grad_norm": 39.53334045410156, | |
| "learning_rate": 1.122962962962963e-05, | |
| "loss": 0.0796, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.197530864197531, | |
| "grad_norm": 33.39299011230469, | |
| "learning_rate": 1.1209876543209876e-05, | |
| "loss": 0.0708, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.202469135802469, | |
| "grad_norm": 32.73172378540039, | |
| "learning_rate": 1.1190123456790124e-05, | |
| "loss": 0.0602, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.2074074074074073, | |
| "grad_norm": 27.3021297454834, | |
| "learning_rate": 1.117037037037037e-05, | |
| "loss": 0.0563, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.212345679012346, | |
| "grad_norm": 33.85374450683594, | |
| "learning_rate": 1.1150617283950617e-05, | |
| "loss": 0.3346, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.217283950617284, | |
| "grad_norm": 46.218204498291016, | |
| "learning_rate": 1.1130864197530864e-05, | |
| "loss": 0.2087, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 47.22572326660156, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.2552, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.2271604938271605, | |
| "grad_norm": 0.1430201381444931, | |
| "learning_rate": 1.1091358024691358e-05, | |
| "loss": 0.2517, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.2320987654320987, | |
| "grad_norm": 11.38235092163086, | |
| "learning_rate": 1.1071604938271604e-05, | |
| "loss": 0.1918, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.237037037037037, | |
| "grad_norm": 37.20140838623047, | |
| "learning_rate": 1.1051851851851854e-05, | |
| "loss": 0.1549, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.241975308641975, | |
| "grad_norm": 0.10535780340433121, | |
| "learning_rate": 1.10320987654321e-05, | |
| "loss": 0.0271, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.246913580246914, | |
| "grad_norm": 0.6121019124984741, | |
| "learning_rate": 1.1012345679012347e-05, | |
| "loss": 0.389, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.251851851851852, | |
| "grad_norm": 35.94973373413086, | |
| "learning_rate": 1.0992592592592592e-05, | |
| "loss": 0.3603, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.25679012345679, | |
| "grad_norm": 95.45260620117188, | |
| "learning_rate": 1.0972839506172841e-05, | |
| "loss": 0.4025, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.2617283950617284, | |
| "grad_norm": 0.17219342291355133, | |
| "learning_rate": 1.0953086419753088e-05, | |
| "loss": 0.2335, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.2666666666666666, | |
| "grad_norm": 1.9040601253509521, | |
| "learning_rate": 1.0933333333333334e-05, | |
| "loss": 0.3124, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.271604938271605, | |
| "grad_norm": 77.7896957397461, | |
| "learning_rate": 1.0913580246913582e-05, | |
| "loss": 0.2387, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.276543209876543, | |
| "grad_norm": 0.5370518565177917, | |
| "learning_rate": 1.0893827160493829e-05, | |
| "loss": 0.1187, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.2814814814814817, | |
| "grad_norm": 113.6650619506836, | |
| "learning_rate": 1.0874074074074075e-05, | |
| "loss": 0.3598, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.28641975308642, | |
| "grad_norm": 0.025056390091776848, | |
| "learning_rate": 1.0854320987654322e-05, | |
| "loss": 0.1631, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.291358024691358, | |
| "grad_norm": 0.0650627464056015, | |
| "learning_rate": 1.083456790123457e-05, | |
| "loss": 0.257, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 34.378414154052734, | |
| "learning_rate": 1.0814814814814816e-05, | |
| "loss": 0.2349, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.3012345679012345, | |
| "grad_norm": 0.046463072299957275, | |
| "learning_rate": 1.0795061728395062e-05, | |
| "loss": 0.0695, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.3061728395061727, | |
| "grad_norm": 81.86966705322266, | |
| "learning_rate": 1.077530864197531e-05, | |
| "loss": 0.2093, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.311111111111111, | |
| "grad_norm": 0.004781852941960096, | |
| "learning_rate": 1.0755555555555557e-05, | |
| "loss": 0.1424, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.3160493827160495, | |
| "grad_norm": 0.817314624786377, | |
| "learning_rate": 1.0735802469135803e-05, | |
| "loss": 0.0413, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.3209876543209877, | |
| "grad_norm": 5.055154800415039, | |
| "learning_rate": 1.071604938271605e-05, | |
| "loss": 0.0046, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.325925925925926, | |
| "grad_norm": 133.45437622070312, | |
| "learning_rate": 1.0696296296296298e-05, | |
| "loss": 0.3131, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.330864197530864, | |
| "grad_norm": 0.014058091677725315, | |
| "learning_rate": 1.0676543209876544e-05, | |
| "loss": 0.1227, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.3358024691358024, | |
| "grad_norm": 4.482833385467529, | |
| "learning_rate": 1.065679012345679e-05, | |
| "loss": 0.1694, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.3407407407407406, | |
| "grad_norm": 0.8238074779510498, | |
| "learning_rate": 1.0637037037037037e-05, | |
| "loss": 0.1315, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.3456790123456788, | |
| "grad_norm": 55.907318115234375, | |
| "learning_rate": 1.0617283950617285e-05, | |
| "loss": 0.0988, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.3506172839506174, | |
| "grad_norm": 119.31465911865234, | |
| "learning_rate": 1.0597530864197531e-05, | |
| "loss": 0.2308, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.3555555555555556, | |
| "grad_norm": 5.956635475158691, | |
| "learning_rate": 1.0577777777777778e-05, | |
| "loss": 0.1726, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.360493827160494, | |
| "grad_norm": 1.8036092519760132, | |
| "learning_rate": 1.0558024691358026e-05, | |
| "loss": 0.2904, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.365432098765432, | |
| "grad_norm": 16.762969970703125, | |
| "learning_rate": 1.0538271604938272e-05, | |
| "loss": 0.039, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.3703703703703702, | |
| "grad_norm": 0.5352030992507935, | |
| "learning_rate": 1.0518518518518519e-05, | |
| "loss": 0.6986, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.375308641975309, | |
| "grad_norm": 72.20184326171875, | |
| "learning_rate": 1.0498765432098765e-05, | |
| "loss": 0.1986, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.380246913580247, | |
| "grad_norm": 39.09406661987305, | |
| "learning_rate": 1.0479012345679013e-05, | |
| "loss": 0.2384, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.3851851851851853, | |
| "grad_norm": 101.78142547607422, | |
| "learning_rate": 1.045925925925926e-05, | |
| "loss": 0.1049, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.3901234567901235, | |
| "grad_norm": 31.242937088012695, | |
| "learning_rate": 1.0439506172839506e-05, | |
| "loss": 0.3993, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.3950617283950617, | |
| "grad_norm": 107.1478271484375, | |
| "learning_rate": 1.0419753086419756e-05, | |
| "loss": 0.1895, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.6550659537315369, | |
| "learning_rate": 1.04e-05, | |
| "loss": 0.2174, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.404938271604938, | |
| "grad_norm": 37.14043045043945, | |
| "learning_rate": 1.0380246913580247e-05, | |
| "loss": 0.2233, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.4098765432098768, | |
| "grad_norm": 10.13899040222168, | |
| "learning_rate": 1.0360493827160493e-05, | |
| "loss": 0.4372, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.414814814814815, | |
| "grad_norm": 0.8044024705886841, | |
| "learning_rate": 1.0340740740740743e-05, | |
| "loss": 0.3235, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.419753086419753, | |
| "grad_norm": 0.08543165773153305, | |
| "learning_rate": 1.032098765432099e-05, | |
| "loss": 0.0319, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.4246913580246914, | |
| "grad_norm": 25.276649475097656, | |
| "learning_rate": 1.0301234567901236e-05, | |
| "loss": 0.2608, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.4296296296296296, | |
| "grad_norm": 53.250003814697266, | |
| "learning_rate": 1.0281481481481484e-05, | |
| "loss": 0.2555, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.434567901234568, | |
| "grad_norm": 0.0675877258181572, | |
| "learning_rate": 1.026172839506173e-05, | |
| "loss": 0.1439, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.439506172839506, | |
| "grad_norm": 0.07533666491508484, | |
| "learning_rate": 1.0241975308641977e-05, | |
| "loss": 0.2685, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.0232541486620903, | |
| "learning_rate": 1.0222222222222223e-05, | |
| "loss": 0.1896, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.449382716049383, | |
| "grad_norm": 0.4157695770263672, | |
| "learning_rate": 1.0202469135802471e-05, | |
| "loss": 0.4117, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.454320987654321, | |
| "grad_norm": 6.473262786865234, | |
| "learning_rate": 1.0182716049382717e-05, | |
| "loss": 0.1608, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.4592592592592593, | |
| "grad_norm": 47.35124588012695, | |
| "learning_rate": 1.0162962962962964e-05, | |
| "loss": 0.1861, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.4641975308641975, | |
| "grad_norm": 0.0442415289580822, | |
| "learning_rate": 1.014320987654321e-05, | |
| "loss": 0.2317, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.4691358024691357, | |
| "grad_norm": 0.02038310095667839, | |
| "learning_rate": 1.0123456790123458e-05, | |
| "loss": 0.5267, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.474074074074074, | |
| "grad_norm": 166.4259033203125, | |
| "learning_rate": 1.0103703703703705e-05, | |
| "loss": 0.2363, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.4790123456790125, | |
| "grad_norm": 68.62043762207031, | |
| "learning_rate": 1.0083950617283951e-05, | |
| "loss": 0.2097, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.4839506172839507, | |
| "grad_norm": 2.836273431777954, | |
| "learning_rate": 1.00641975308642e-05, | |
| "loss": 0.2101, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.488888888888889, | |
| "grad_norm": 4.900826930999756, | |
| "learning_rate": 1.0044444444444446e-05, | |
| "loss": 0.191, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.493827160493827, | |
| "grad_norm": 22.4804744720459, | |
| "learning_rate": 1.0024691358024692e-05, | |
| "loss": 0.182, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.4987654320987653, | |
| "grad_norm": 0.00806320272386074, | |
| "learning_rate": 1.0004938271604938e-05, | |
| "loss": 0.0106, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.5037037037037035, | |
| "grad_norm": 0.13981568813323975, | |
| "learning_rate": 9.985185185185185e-06, | |
| "loss": 0.2085, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.5086419753086417, | |
| "grad_norm": 115.363037109375, | |
| "learning_rate": 9.965432098765433e-06, | |
| "loss": 0.3881, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.5135802469135804, | |
| "grad_norm": 0.5273131132125854, | |
| "learning_rate": 9.945679012345681e-06, | |
| "loss": 0.3149, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.5185185185185186, | |
| "grad_norm": 0.044860485941171646, | |
| "learning_rate": 9.925925925925927e-06, | |
| "loss": 0.1487, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.523456790123457, | |
| "grad_norm": 0.0039957864210009575, | |
| "learning_rate": 9.906172839506174e-06, | |
| "loss": 0.1385, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.528395061728395, | |
| "grad_norm": 0.014863072894513607, | |
| "learning_rate": 9.88641975308642e-06, | |
| "loss": 0.1111, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 75.10174560546875, | |
| "learning_rate": 9.866666666666668e-06, | |
| "loss": 0.1741, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.538271604938272, | |
| "grad_norm": 0.048640429973602295, | |
| "learning_rate": 9.846913580246915e-06, | |
| "loss": 0.1827, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.5432098765432096, | |
| "grad_norm": 0.25287771224975586, | |
| "learning_rate": 9.827160493827161e-06, | |
| "loss": 0.2889, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.5481481481481483, | |
| "grad_norm": 3.0355021953582764, | |
| "learning_rate": 9.807407407407407e-06, | |
| "loss": 0.0549, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.5530864197530865, | |
| "grad_norm": 0.008490847423672676, | |
| "learning_rate": 9.787654320987655e-06, | |
| "loss": 0.1945, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.5580246913580247, | |
| "grad_norm": 0.055667582899332047, | |
| "learning_rate": 9.767901234567902e-06, | |
| "loss": 0.178, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.562962962962963, | |
| "grad_norm": 2.11090350151062, | |
| "learning_rate": 9.748148148148148e-06, | |
| "loss": 0.1497, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.567901234567901, | |
| "grad_norm": 48.44843292236328, | |
| "learning_rate": 9.728395061728396e-06, | |
| "loss": 0.3233, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.5728395061728397, | |
| "grad_norm": 16.53707504272461, | |
| "learning_rate": 9.708641975308643e-06, | |
| "loss": 0.0269, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.5777777777777775, | |
| "grad_norm": 85.8476791381836, | |
| "learning_rate": 9.688888888888889e-06, | |
| "loss": 0.4162, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.582716049382716, | |
| "grad_norm": 333.21466064453125, | |
| "learning_rate": 9.669135802469136e-06, | |
| "loss": 0.161, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.5876543209876544, | |
| "grad_norm": 46.150047302246094, | |
| "learning_rate": 9.649382716049384e-06, | |
| "loss": 0.1367, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 23.23380470275879, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.049, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.5975308641975308, | |
| "grad_norm": 0.01312983874231577, | |
| "learning_rate": 9.609876543209878e-06, | |
| "loss": 0.4376, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.602469135802469, | |
| "grad_norm": 0.1367645114660263, | |
| "learning_rate": 9.590123456790124e-06, | |
| "loss": 0.0646, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.6074074074074076, | |
| "grad_norm": 0.16247719526290894, | |
| "learning_rate": 9.570370370370371e-06, | |
| "loss": 0.3247, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.612345679012346, | |
| "grad_norm": 140.21865844726562, | |
| "learning_rate": 9.550617283950619e-06, | |
| "loss": 0.4135, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.617283950617284, | |
| "grad_norm": 60.00096893310547, | |
| "learning_rate": 9.530864197530865e-06, | |
| "loss": 0.1836, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.6222222222222222, | |
| "grad_norm": 0.0217946358025074, | |
| "learning_rate": 9.511111111111112e-06, | |
| "loss": 0.3697, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.6271604938271604, | |
| "grad_norm": 75.67610931396484, | |
| "learning_rate": 9.491358024691358e-06, | |
| "loss": 0.0953, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.6320987654320986, | |
| "grad_norm": 19.351255416870117, | |
| "learning_rate": 9.471604938271606e-06, | |
| "loss": 0.0855, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.637037037037037, | |
| "grad_norm": 7.155949115753174, | |
| "learning_rate": 9.451851851851853e-06, | |
| "loss": 0.5717, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.6419753086419755, | |
| "grad_norm": 143.97991943359375, | |
| "learning_rate": 9.432098765432099e-06, | |
| "loss": 0.0894, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.6469135802469137, | |
| "grad_norm": 66.95204162597656, | |
| "learning_rate": 9.412345679012347e-06, | |
| "loss": 0.1136, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.651851851851852, | |
| "grad_norm": 5.1548590660095215, | |
| "learning_rate": 9.392592592592593e-06, | |
| "loss": 0.066, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.65679012345679, | |
| "grad_norm": 164.66404724121094, | |
| "learning_rate": 9.37283950617284e-06, | |
| "loss": 0.2865, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.6617283950617283, | |
| "grad_norm": 200.15574645996094, | |
| "learning_rate": 9.353086419753086e-06, | |
| "loss": 0.2895, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 233.70343017578125, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.052, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.6716049382716047, | |
| "grad_norm": 140.56007385253906, | |
| "learning_rate": 9.31358024691358e-06, | |
| "loss": 0.2882, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.6765432098765434, | |
| "grad_norm": 281.7587585449219, | |
| "learning_rate": 9.293827160493827e-06, | |
| "loss": 0.1121, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.6814814814814816, | |
| "grad_norm": 0.00958334095776081, | |
| "learning_rate": 9.274074074074075e-06, | |
| "loss": 0.0447, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.68641975308642, | |
| "grad_norm": 0.6552028059959412, | |
| "learning_rate": 9.254320987654322e-06, | |
| "loss": 0.0727, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.691358024691358, | |
| "grad_norm": 0.01010242011398077, | |
| "learning_rate": 9.23456790123457e-06, | |
| "loss": 0.1756, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.696296296296296, | |
| "grad_norm": 0.013218900188803673, | |
| "learning_rate": 9.214814814814816e-06, | |
| "loss": 0.2895, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.701234567901235, | |
| "grad_norm": 44.7857780456543, | |
| "learning_rate": 9.195061728395062e-06, | |
| "loss": 0.323, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.7061728395061726, | |
| "grad_norm": 2.435910701751709, | |
| "learning_rate": 9.175308641975309e-06, | |
| "loss": 0.473, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.7111111111111112, | |
| "grad_norm": 5.467461585998535, | |
| "learning_rate": 9.155555555555557e-06, | |
| "loss": 0.4263, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.7160493827160495, | |
| "grad_norm": 0.020925594493746758, | |
| "learning_rate": 9.135802469135803e-06, | |
| "loss": 0.1927, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.7209876543209877, | |
| "grad_norm": 0.850062906742096, | |
| "learning_rate": 9.11604938271605e-06, | |
| "loss": 0.2724, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.725925925925926, | |
| "grad_norm": 0.8104738593101501, | |
| "learning_rate": 9.096296296296298e-06, | |
| "loss": 0.0688, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.730864197530864, | |
| "grad_norm": 183.3977813720703, | |
| "learning_rate": 9.076543209876544e-06, | |
| "loss": 0.403, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.7358024691358027, | |
| "grad_norm": 0.39399421215057373, | |
| "learning_rate": 9.05679012345679e-06, | |
| "loss": 0.2956, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.7407407407407405, | |
| "grad_norm": 17.86000633239746, | |
| "learning_rate": 9.037037037037037e-06, | |
| "loss": 0.2467, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.745679012345679, | |
| "grad_norm": 0.007520174607634544, | |
| "learning_rate": 9.017283950617285e-06, | |
| "loss": 0.0734, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.7506172839506173, | |
| "grad_norm": 42.2265739440918, | |
| "learning_rate": 8.997530864197531e-06, | |
| "loss": 0.1445, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.7555555555555555, | |
| "grad_norm": 55.289222717285156, | |
| "learning_rate": 8.977777777777778e-06, | |
| "loss": 0.1346, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.7604938271604937, | |
| "grad_norm": 1.1563366651535034, | |
| "learning_rate": 8.958024691358024e-06, | |
| "loss": 0.1427, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.765432098765432, | |
| "grad_norm": 31.966625213623047, | |
| "learning_rate": 8.938271604938272e-06, | |
| "loss": 0.1432, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.7703703703703706, | |
| "grad_norm": 26.22989273071289, | |
| "learning_rate": 8.91851851851852e-06, | |
| "loss": 0.1465, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.775308641975309, | |
| "grad_norm": 2.2528607845306396, | |
| "learning_rate": 8.898765432098767e-06, | |
| "loss": 0.1046, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.780246913580247, | |
| "grad_norm": 41.7017707824707, | |
| "learning_rate": 8.879012345679013e-06, | |
| "loss": 0.3095, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.785185185185185, | |
| "grad_norm": 80.6755142211914, | |
| "learning_rate": 8.85925925925926e-06, | |
| "loss": 0.1785, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.7901234567901234, | |
| "grad_norm": 49.54252624511719, | |
| "learning_rate": 8.839506172839508e-06, | |
| "loss": 0.1924, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.7950617283950616, | |
| "grad_norm": 0.05363411456346512, | |
| "learning_rate": 8.819753086419754e-06, | |
| "loss": 0.1327, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 8.126516342163086, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.121, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.8049382716049385, | |
| "grad_norm": 0.02661011926829815, | |
| "learning_rate": 8.780246913580249e-06, | |
| "loss": 0.0073, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.8098765432098767, | |
| "grad_norm": 8.132286071777344, | |
| "learning_rate": 8.760493827160495e-06, | |
| "loss": 0.1296, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.814814814814815, | |
| "grad_norm": 62.083099365234375, | |
| "learning_rate": 8.740740740740741e-06, | |
| "loss": 0.2036, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.819753086419753, | |
| "grad_norm": 17.057275772094727, | |
| "learning_rate": 8.720987654320988e-06, | |
| "loss": 0.236, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.8246913580246913, | |
| "grad_norm": 0.07913421094417572, | |
| "learning_rate": 8.701234567901236e-06, | |
| "loss": 0.0186, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.8296296296296295, | |
| "grad_norm": 59.11501693725586, | |
| "learning_rate": 8.681481481481482e-06, | |
| "loss": 0.2352, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.8345679012345677, | |
| "grad_norm": 0.05783538892865181, | |
| "learning_rate": 8.661728395061729e-06, | |
| "loss": 0.325, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.8395061728395063, | |
| "grad_norm": 0.07834266871213913, | |
| "learning_rate": 8.641975308641975e-06, | |
| "loss": 0.0508, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.8444444444444446, | |
| "grad_norm": 2.788255214691162, | |
| "learning_rate": 8.622222222222223e-06, | |
| "loss": 0.0728, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.8493827160493828, | |
| "grad_norm": 41.630611419677734, | |
| "learning_rate": 8.602469135802471e-06, | |
| "loss": 0.2255, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.854320987654321, | |
| "grad_norm": 0.47825512290000916, | |
| "learning_rate": 8.582716049382716e-06, | |
| "loss": 0.1858, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.859259259259259, | |
| "grad_norm": 0.4730166494846344, | |
| "learning_rate": 8.562962962962964e-06, | |
| "loss": 0.0417, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.8641975308641974, | |
| "grad_norm": 0.00964848231524229, | |
| "learning_rate": 8.54320987654321e-06, | |
| "loss": 0.2487, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.8691358024691356, | |
| "grad_norm": 4.990635395050049, | |
| "learning_rate": 8.523456790123458e-06, | |
| "loss": 0.1967, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.8740740740740742, | |
| "grad_norm": 0.06853197515010834, | |
| "learning_rate": 8.503703703703705e-06, | |
| "loss": 0.1847, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.8790123456790124, | |
| "grad_norm": 14.369994163513184, | |
| "learning_rate": 8.483950617283951e-06, | |
| "loss": 0.4819, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.8839506172839506, | |
| "grad_norm": 1.4478572607040405, | |
| "learning_rate": 8.464197530864198e-06, | |
| "loss": 0.2011, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 197.60943603515625, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 0.2301, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.893827160493827, | |
| "grad_norm": 0.3465060293674469, | |
| "learning_rate": 8.424691358024692e-06, | |
| "loss": 0.0814, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.8987654320987657, | |
| "grad_norm": 0.22260437905788422, | |
| "learning_rate": 8.404938271604938e-06, | |
| "loss": 0.1913, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.9037037037037035, | |
| "grad_norm": 3.2895030975341797, | |
| "learning_rate": 8.385185185185187e-06, | |
| "loss": 0.161, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.908641975308642, | |
| "grad_norm": 75.78804016113281, | |
| "learning_rate": 8.365432098765433e-06, | |
| "loss": 0.2146, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.9135802469135803, | |
| "grad_norm": 37.905670166015625, | |
| "learning_rate": 8.34567901234568e-06, | |
| "loss": 0.0171, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.9185185185185185, | |
| "grad_norm": 1.2207163572311401, | |
| "learning_rate": 8.325925925925926e-06, | |
| "loss": 0.0008, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.9234567901234567, | |
| "grad_norm": 0.26251447200775146, | |
| "learning_rate": 8.306172839506174e-06, | |
| "loss": 0.2391, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.928395061728395, | |
| "grad_norm": 184.48342895507812, | |
| "learning_rate": 8.28641975308642e-06, | |
| "loss": 0.4721, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 2.430443048477173, | |
| "learning_rate": 8.266666666666667e-06, | |
| "loss": 0.3217, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.9382716049382713, | |
| "grad_norm": 167.15850830078125, | |
| "learning_rate": 8.246913580246915e-06, | |
| "loss": 0.1661, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.94320987654321, | |
| "grad_norm": 3.9648666381835938, | |
| "learning_rate": 8.227160493827161e-06, | |
| "loss": 0.0846, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.948148148148148, | |
| "grad_norm": 0.18866649270057678, | |
| "learning_rate": 8.207407407407409e-06, | |
| "loss": 0.0355, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.9530864197530864, | |
| "grad_norm": 0.19261124730110168, | |
| "learning_rate": 8.187654320987654e-06, | |
| "loss": 0.1842, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.9580246913580246, | |
| "grad_norm": 0.13655029237270355, | |
| "learning_rate": 8.167901234567902e-06, | |
| "loss": 0.0244, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 0.24857792258262634, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.2052, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.9679012345679014, | |
| "grad_norm": 85.19855499267578, | |
| "learning_rate": 8.128395061728396e-06, | |
| "loss": 0.187, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.9728395061728397, | |
| "grad_norm": 190.1832733154297, | |
| "learning_rate": 8.108641975308643e-06, | |
| "loss": 0.5081, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.977777777777778, | |
| "grad_norm": 0.0004998709191568196, | |
| "learning_rate": 8.08888888888889e-06, | |
| "loss": 0.4187, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.982716049382716, | |
| "grad_norm": 0.019353624433279037, | |
| "learning_rate": 8.069135802469137e-06, | |
| "loss": 0.0796, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.9876543209876543, | |
| "grad_norm": 0.00627252459526062, | |
| "learning_rate": 8.049382716049384e-06, | |
| "loss": 0.4005, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.9925925925925925, | |
| "grad_norm": 159.71725463867188, | |
| "learning_rate": 8.02962962962963e-06, | |
| "loss": 0.028, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.9975308641975307, | |
| "grad_norm": 2.6106536388397217, | |
| "learning_rate": 8.009876543209876e-06, | |
| "loss": 0.024, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9831481481481481, | |
| "eval_loss": 0.07001630961894989, | |
| "eval_runtime": 32.6621, | |
| "eval_samples_per_second": 165.329, | |
| "eval_steps_per_second": 20.666, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 3.0024691358024693, | |
| "grad_norm": 1.3359025716781616, | |
| "learning_rate": 7.990123456790125e-06, | |
| "loss": 0.0996, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 3.0074074074074075, | |
| "grad_norm": 0.05273491516709328, | |
| "learning_rate": 7.970370370370371e-06, | |
| "loss": 0.012, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 3.0123456790123457, | |
| "grad_norm": 0.23167039453983307, | |
| "learning_rate": 7.950617283950617e-06, | |
| "loss": 0.1334, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.017283950617284, | |
| "grad_norm": 0.03928215801715851, | |
| "learning_rate": 7.930864197530865e-06, | |
| "loss": 0.1258, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 3.022222222222222, | |
| "grad_norm": 109.73241424560547, | |
| "learning_rate": 7.911111111111112e-06, | |
| "loss": 0.1747, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 3.0271604938271603, | |
| "grad_norm": 2.945659637451172, | |
| "learning_rate": 7.89135802469136e-06, | |
| "loss": 0.1064, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 3.0320987654320986, | |
| "grad_norm": 19.941844940185547, | |
| "learning_rate": 7.871604938271605e-06, | |
| "loss": 0.1372, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 3.037037037037037, | |
| "grad_norm": 0.11880356073379517, | |
| "learning_rate": 7.851851851851853e-06, | |
| "loss": 0.0212, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.0419753086419754, | |
| "grad_norm": 1.0245414972305298, | |
| "learning_rate": 7.832098765432099e-06, | |
| "loss": 0.2392, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 3.0469135802469136, | |
| "grad_norm": 0.23312650620937347, | |
| "learning_rate": 7.812345679012347e-06, | |
| "loss": 0.0706, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 3.051851851851852, | |
| "grad_norm": 63.500797271728516, | |
| "learning_rate": 7.792592592592594e-06, | |
| "loss": 0.2912, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 3.05679012345679, | |
| "grad_norm": 4.3201727867126465, | |
| "learning_rate": 7.77283950617284e-06, | |
| "loss": 0.1027, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 3.0617283950617282, | |
| "grad_norm": 0.009072243236005306, | |
| "learning_rate": 7.753086419753088e-06, | |
| "loss": 0.0177, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.066666666666667, | |
| "grad_norm": 7.860177993774414, | |
| "learning_rate": 7.733333333333334e-06, | |
| "loss": 0.1266, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 3.071604938271605, | |
| "grad_norm": 125.65026092529297, | |
| "learning_rate": 7.71358024691358e-06, | |
| "loss": 0.1102, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 3.0765432098765433, | |
| "grad_norm": 64.10157012939453, | |
| "learning_rate": 7.693827160493827e-06, | |
| "loss": 0.2813, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 3.0814814814814815, | |
| "grad_norm": 0.023331521078944206, | |
| "learning_rate": 7.674074074074075e-06, | |
| "loss": 0.4718, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 3.0864197530864197, | |
| "grad_norm": 0.9373367428779602, | |
| "learning_rate": 7.654320987654322e-06, | |
| "loss": 0.1784, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.091358024691358, | |
| "grad_norm": 0.09618625789880753, | |
| "learning_rate": 7.634567901234568e-06, | |
| "loss": 0.1675, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 3.096296296296296, | |
| "grad_norm": 53.146034240722656, | |
| "learning_rate": 7.614814814814816e-06, | |
| "loss": 0.3012, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 3.1012345679012348, | |
| "grad_norm": 0.9176463484764099, | |
| "learning_rate": 7.5950617283950625e-06, | |
| "loss": 0.0438, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 3.106172839506173, | |
| "grad_norm": 0.6210525035858154, | |
| "learning_rate": 7.57530864197531e-06, | |
| "loss": 0.2127, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 3.111111111111111, | |
| "grad_norm": 171.12738037109375, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 0.3021, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.1160493827160494, | |
| "grad_norm": 0.15432004630565643, | |
| "learning_rate": 7.535802469135803e-06, | |
| "loss": 0.2258, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 3.1209876543209876, | |
| "grad_norm": 6.785965919494629, | |
| "learning_rate": 7.51604938271605e-06, | |
| "loss": 0.0524, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 3.1259259259259258, | |
| "grad_norm": 14.042142868041992, | |
| "learning_rate": 7.496296296296297e-06, | |
| "loss": 0.1315, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 3.1308641975308644, | |
| "grad_norm": 0.005698219407349825, | |
| "learning_rate": 7.476543209876543e-06, | |
| "loss": 0.59, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 3.1358024691358026, | |
| "grad_norm": 0.2984008193016052, | |
| "learning_rate": 7.456790123456791e-06, | |
| "loss": 0.1643, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.140740740740741, | |
| "grad_norm": 33.20651626586914, | |
| "learning_rate": 7.437037037037038e-06, | |
| "loss": 0.0757, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 3.145679012345679, | |
| "grad_norm": 39.41627883911133, | |
| "learning_rate": 7.417283950617284e-06, | |
| "loss": 0.2282, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 3.1506172839506172, | |
| "grad_norm": 0.06810309737920761, | |
| "learning_rate": 7.3975308641975315e-06, | |
| "loss": 0.0338, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 3.1555555555555554, | |
| "grad_norm": 0.4489476680755615, | |
| "learning_rate": 7.377777777777778e-06, | |
| "loss": 0.258, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 3.1604938271604937, | |
| "grad_norm": 3.387746572494507, | |
| "learning_rate": 7.358024691358025e-06, | |
| "loss": 0.0842, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.1654320987654323, | |
| "grad_norm": 2.4589788913726807, | |
| "learning_rate": 7.3382716049382715e-06, | |
| "loss": 0.1025, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 3.1703703703703705, | |
| "grad_norm": 11.912010192871094, | |
| "learning_rate": 7.31851851851852e-06, | |
| "loss": 0.1159, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 3.1753086419753087, | |
| "grad_norm": 0.0014852778986096382, | |
| "learning_rate": 7.298765432098765e-06, | |
| "loss": 0.1174, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 3.180246913580247, | |
| "grad_norm": 0.23326246440410614, | |
| "learning_rate": 7.279012345679013e-06, | |
| "loss": 0.1595, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 3.185185185185185, | |
| "grad_norm": 0.023275885730981827, | |
| "learning_rate": 7.2592592592592605e-06, | |
| "loss": 0.3177, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.1901234567901233, | |
| "grad_norm": 0.0346212200820446, | |
| "learning_rate": 7.239506172839507e-06, | |
| "loss": 0.1329, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 3.1950617283950615, | |
| "grad_norm": 0.14802587032318115, | |
| "learning_rate": 7.219753086419754e-06, | |
| "loss": 0.0812, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.2590476870536804, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.0625, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 3.2049382716049384, | |
| "grad_norm": 0.7991506457328796, | |
| "learning_rate": 7.180246913580248e-06, | |
| "loss": 0.0811, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 3.2098765432098766, | |
| "grad_norm": 76.12113189697266, | |
| "learning_rate": 7.160493827160494e-06, | |
| "loss": 0.0727, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.214814814814815, | |
| "grad_norm": 24.764394760131836, | |
| "learning_rate": 7.140740740740741e-06, | |
| "loss": 0.3267, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 3.219753086419753, | |
| "grad_norm": 59.69222640991211, | |
| "learning_rate": 7.120987654320988e-06, | |
| "loss": 0.1661, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 3.224691358024691, | |
| "grad_norm": 0.007727318909019232, | |
| "learning_rate": 7.101234567901235e-06, | |
| "loss": 0.1388, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 3.2296296296296294, | |
| "grad_norm": 1.3282524347305298, | |
| "learning_rate": 7.081481481481482e-06, | |
| "loss": 0.0129, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 3.234567901234568, | |
| "grad_norm": 58.830318450927734, | |
| "learning_rate": 7.061728395061729e-06, | |
| "loss": 0.075, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.2395061728395063, | |
| "grad_norm": 0.0027803820557892323, | |
| "learning_rate": 7.041975308641976e-06, | |
| "loss": 0.0688, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 3.2444444444444445, | |
| "grad_norm": 7.03369140625, | |
| "learning_rate": 7.022222222222222e-06, | |
| "loss": 0.2156, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 3.2493827160493827, | |
| "grad_norm": 0.3327115476131439, | |
| "learning_rate": 7.0024691358024695e-06, | |
| "loss": 0.2499, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 3.254320987654321, | |
| "grad_norm": 0.007271229289472103, | |
| "learning_rate": 6.982716049382716e-06, | |
| "loss": 0.1749, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 3.259259259259259, | |
| "grad_norm": 0.011601006612181664, | |
| "learning_rate": 6.962962962962964e-06, | |
| "loss": 0.4342, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.2641975308641973, | |
| "grad_norm": 1.5765591859817505, | |
| "learning_rate": 6.943209876543211e-06, | |
| "loss": 0.02, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 3.269135802469136, | |
| "grad_norm": 10.005110740661621, | |
| "learning_rate": 6.923456790123458e-06, | |
| "loss": 0.1143, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 3.274074074074074, | |
| "grad_norm": 0.1242939829826355, | |
| "learning_rate": 6.903703703703705e-06, | |
| "loss": 0.3571, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 3.2790123456790123, | |
| "grad_norm": 57.85032272338867, | |
| "learning_rate": 6.883950617283951e-06, | |
| "loss": 0.3811, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 3.2839506172839505, | |
| "grad_norm": 1.068203091621399, | |
| "learning_rate": 6.8641975308641985e-06, | |
| "loss": 0.1045, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 3.2888888888888888, | |
| "grad_norm": 0.03020775318145752, | |
| "learning_rate": 6.844444444444445e-06, | |
| "loss": 0.0945, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 3.2938271604938274, | |
| "grad_norm": 18.36736297607422, | |
| "learning_rate": 6.824691358024692e-06, | |
| "loss": 0.2015, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 3.2987654320987656, | |
| "grad_norm": 0.0009854953968897462, | |
| "learning_rate": 6.8049382716049385e-06, | |
| "loss": 0.2278, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 3.303703703703704, | |
| "grad_norm": 0.02513027749955654, | |
| "learning_rate": 6.785185185185186e-06, | |
| "loss": 0.2392, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 3.308641975308642, | |
| "grad_norm": 29.72653579711914, | |
| "learning_rate": 6.765432098765433e-06, | |
| "loss": 0.2054, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.31358024691358, | |
| "grad_norm": 0.006469042040407658, | |
| "learning_rate": 6.745679012345679e-06, | |
| "loss": 0.0055, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 3.3185185185185184, | |
| "grad_norm": 129.7929229736328, | |
| "learning_rate": 6.725925925925927e-06, | |
| "loss": 0.0842, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 3.3234567901234566, | |
| "grad_norm": 0.4482802748680115, | |
| "learning_rate": 6.706172839506173e-06, | |
| "loss": 0.1121, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 3.3283950617283953, | |
| "grad_norm": 10.919482231140137, | |
| "learning_rate": 6.68641975308642e-06, | |
| "loss": 0.2323, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.10504257678985596, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.1925, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.3382716049382717, | |
| "grad_norm": 26.70441436767578, | |
| "learning_rate": 6.646913580246914e-06, | |
| "loss": 0.3749, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 3.34320987654321, | |
| "grad_norm": 1.2347007989883423, | |
| "learning_rate": 6.62716049382716e-06, | |
| "loss": 0.1701, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 3.348148148148148, | |
| "grad_norm": 6.345317840576172, | |
| "learning_rate": 6.6074074074074075e-06, | |
| "loss": 0.0607, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 3.3530864197530863, | |
| "grad_norm": 13.622949600219727, | |
| "learning_rate": 6.587654320987656e-06, | |
| "loss": 0.1763, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 3.3580246913580245, | |
| "grad_norm": 16.68195152282715, | |
| "learning_rate": 6.567901234567902e-06, | |
| "loss": 0.2754, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.362962962962963, | |
| "grad_norm": 0.2912677526473999, | |
| "learning_rate": 6.548148148148149e-06, | |
| "loss": 0.2011, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 3.3679012345679014, | |
| "grad_norm": 76.45751953125, | |
| "learning_rate": 6.528395061728396e-06, | |
| "loss": 0.3157, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 3.3728395061728396, | |
| "grad_norm": 0.0012998235179111362, | |
| "learning_rate": 6.508641975308643e-06, | |
| "loss": 0.1313, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 3.3777777777777778, | |
| "grad_norm": 170.02474975585938, | |
| "learning_rate": 6.488888888888889e-06, | |
| "loss": 0.1319, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 3.382716049382716, | |
| "grad_norm": 87.3119888305664, | |
| "learning_rate": 6.4691358024691365e-06, | |
| "loss": 0.2838, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 3.387654320987654, | |
| "grad_norm": 25.350370407104492, | |
| "learning_rate": 6.449382716049383e-06, | |
| "loss": 0.1525, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 3.3925925925925924, | |
| "grad_norm": 0.22812433540821075, | |
| "learning_rate": 6.42962962962963e-06, | |
| "loss": 0.0099, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 3.397530864197531, | |
| "grad_norm": 0.06566119194030762, | |
| "learning_rate": 6.409876543209877e-06, | |
| "loss": 0.0049, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 3.4024691358024692, | |
| "grad_norm": 0.003955530468374491, | |
| "learning_rate": 6.390123456790124e-06, | |
| "loss": 0.3611, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 3.4074074074074074, | |
| "grad_norm": 46.40278244018555, | |
| "learning_rate": 6.370370370370371e-06, | |
| "loss": 0.2929, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.4123456790123456, | |
| "grad_norm": 0.0017953283386304975, | |
| "learning_rate": 6.350617283950617e-06, | |
| "loss": 0.0162, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 3.417283950617284, | |
| "grad_norm": 0.001457493519410491, | |
| "learning_rate": 6.330864197530865e-06, | |
| "loss": 0.1854, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 3.422222222222222, | |
| "grad_norm": 0.0005978038534522057, | |
| "learning_rate": 6.311111111111111e-06, | |
| "loss": 0.2118, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 3.4271604938271603, | |
| "grad_norm": 3.947251558303833, | |
| "learning_rate": 6.291358024691358e-06, | |
| "loss": 0.0656, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 3.432098765432099, | |
| "grad_norm": 13.78681755065918, | |
| "learning_rate": 6.271604938271606e-06, | |
| "loss": 0.0259, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 3.437037037037037, | |
| "grad_norm": 0.04035714268684387, | |
| "learning_rate": 6.251851851851852e-06, | |
| "loss": 0.0107, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 3.4419753086419753, | |
| "grad_norm": 0.024245211854577065, | |
| "learning_rate": 6.2320987654321e-06, | |
| "loss": 0.1175, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 3.4469135802469135, | |
| "grad_norm": 0.04458506777882576, | |
| "learning_rate": 6.212345679012346e-06, | |
| "loss": 0.2044, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 3.4518518518518517, | |
| "grad_norm": 161.80392456054688, | |
| "learning_rate": 6.192592592592594e-06, | |
| "loss": 0.2394, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 3.45679012345679, | |
| "grad_norm": 0.04583211988210678, | |
| "learning_rate": 6.17283950617284e-06, | |
| "loss": 0.0821, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.4617283950617286, | |
| "grad_norm": 0.14376536011695862, | |
| "learning_rate": 6.153086419753087e-06, | |
| "loss": 0.3085, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 92.59646606445312, | |
| "learning_rate": 6.133333333333334e-06, | |
| "loss": 0.2538, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 3.471604938271605, | |
| "grad_norm": 83.26078033447266, | |
| "learning_rate": 6.113580246913581e-06, | |
| "loss": 0.3145, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 3.476543209876543, | |
| "grad_norm": 74.77570343017578, | |
| "learning_rate": 6.093827160493828e-06, | |
| "loss": 0.1775, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 3.4814814814814814, | |
| "grad_norm": 0.038955166935920715, | |
| "learning_rate": 6.0740740740740745e-06, | |
| "loss": 0.1509, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 3.4864197530864196, | |
| "grad_norm": 97.1812973022461, | |
| "learning_rate": 6.054320987654322e-06, | |
| "loss": 0.2155, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 3.4913580246913583, | |
| "grad_norm": 73.86189270019531, | |
| "learning_rate": 6.034567901234568e-06, | |
| "loss": 0.2615, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 3.4962962962962965, | |
| "grad_norm": 0.0055229514837265015, | |
| "learning_rate": 6.014814814814815e-06, | |
| "loss": 0.2428, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 3.5012345679012347, | |
| "grad_norm": 0.0022700978443026543, | |
| "learning_rate": 5.995061728395062e-06, | |
| "loss": 0.2049, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 3.506172839506173, | |
| "grad_norm": 1.260072946548462, | |
| "learning_rate": 5.975308641975309e-06, | |
| "loss": 0.181, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.511111111111111, | |
| "grad_norm": 1.283315896987915, | |
| "learning_rate": 5.955555555555555e-06, | |
| "loss": 0.0807, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 3.5160493827160493, | |
| "grad_norm": 82.1073989868164, | |
| "learning_rate": 5.935802469135803e-06, | |
| "loss": 0.1029, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 3.5209876543209875, | |
| "grad_norm": 8.620868682861328, | |
| "learning_rate": 5.916049382716051e-06, | |
| "loss": 0.2403, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 3.525925925925926, | |
| "grad_norm": 6.648277282714844, | |
| "learning_rate": 5.896296296296296e-06, | |
| "loss": 0.0663, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 3.5308641975308643, | |
| "grad_norm": 0.3625084459781647, | |
| "learning_rate": 5.876543209876544e-06, | |
| "loss": 0.1895, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 3.5358024691358025, | |
| "grad_norm": 25.613967895507812, | |
| "learning_rate": 5.856790123456791e-06, | |
| "loss": 0.0466, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 3.5407407407407407, | |
| "grad_norm": 0.6308773756027222, | |
| "learning_rate": 5.837037037037038e-06, | |
| "loss": 0.0887, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 3.545679012345679, | |
| "grad_norm": 28.219980239868164, | |
| "learning_rate": 5.817283950617284e-06, | |
| "loss": 0.071, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 3.550617283950617, | |
| "grad_norm": 42.56242752075195, | |
| "learning_rate": 5.797530864197532e-06, | |
| "loss": 0.345, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 3.5555555555555554, | |
| "grad_norm": 0.07085005193948746, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 0.3513, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.560493827160494, | |
| "grad_norm": 0.4435485005378723, | |
| "learning_rate": 5.758024691358025e-06, | |
| "loss": 0.0908, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 3.565432098765432, | |
| "grad_norm": 0.009900487028062344, | |
| "learning_rate": 5.7382716049382725e-06, | |
| "loss": 0.1456, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 3.5703703703703704, | |
| "grad_norm": 0.001979109598323703, | |
| "learning_rate": 5.718518518518519e-06, | |
| "loss": 0.0493, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 3.5753086419753086, | |
| "grad_norm": 0.20845463871955872, | |
| "learning_rate": 5.698765432098766e-06, | |
| "loss": 0.264, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 3.580246913580247, | |
| "grad_norm": 0.7934794425964355, | |
| "learning_rate": 5.6790123456790125e-06, | |
| "loss": 0.0509, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.585185185185185, | |
| "grad_norm": 0.045501917600631714, | |
| "learning_rate": 5.65925925925926e-06, | |
| "loss": 0.0933, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 3.5901234567901232, | |
| "grad_norm": 0.040048014372587204, | |
| "learning_rate": 5.639506172839506e-06, | |
| "loss": 0.0733, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 3.595061728395062, | |
| "grad_norm": 197.66177368164062, | |
| "learning_rate": 5.619753086419753e-06, | |
| "loss": 0.2655, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.03324214369058609, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.0812, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 3.6049382716049383, | |
| "grad_norm": 124.81009674072266, | |
| "learning_rate": 5.580246913580247e-06, | |
| "loss": 0.1874, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.6098765432098765, | |
| "grad_norm": 14.227179527282715, | |
| "learning_rate": 5.560493827160495e-06, | |
| "loss": 0.1483, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 3.6148148148148147, | |
| "grad_norm": 28.93998146057129, | |
| "learning_rate": 5.540740740740741e-06, | |
| "loss": 0.2179, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 3.6197530864197534, | |
| "grad_norm": 109.27143096923828, | |
| "learning_rate": 5.520987654320989e-06, | |
| "loss": 0.2175, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 3.624691358024691, | |
| "grad_norm": 3.306696653366089, | |
| "learning_rate": 5.501234567901234e-06, | |
| "loss": 0.1275, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 3.6296296296296298, | |
| "grad_norm": 53.0710563659668, | |
| "learning_rate": 5.481481481481482e-06, | |
| "loss": 0.2602, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.634567901234568, | |
| "grad_norm": 0.00018215861928183585, | |
| "learning_rate": 5.461728395061729e-06, | |
| "loss": 0.1973, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 3.639506172839506, | |
| "grad_norm": 14.688875198364258, | |
| "learning_rate": 5.441975308641976e-06, | |
| "loss": 0.1937, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 3.6444444444444444, | |
| "grad_norm": 121.82637023925781, | |
| "learning_rate": 5.422222222222223e-06, | |
| "loss": 0.1325, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 3.6493827160493826, | |
| "grad_norm": 0.004047624301165342, | |
| "learning_rate": 5.40246913580247e-06, | |
| "loss": 0.1085, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 3.6543209876543212, | |
| "grad_norm": 108.3661880493164, | |
| "learning_rate": 5.382716049382717e-06, | |
| "loss": 0.2458, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.659259259259259, | |
| "grad_norm": 0.029978841543197632, | |
| "learning_rate": 5.362962962962963e-06, | |
| "loss": 0.2308, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 3.6641975308641976, | |
| "grad_norm": 32.663150787353516, | |
| "learning_rate": 5.3432098765432105e-06, | |
| "loss": 0.192, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 3.669135802469136, | |
| "grad_norm": 0.000704328587744385, | |
| "learning_rate": 5.323456790123457e-06, | |
| "loss": 0.2431, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 3.674074074074074, | |
| "grad_norm": 81.13653564453125, | |
| "learning_rate": 5.303703703703704e-06, | |
| "loss": 0.1404, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 3.6790123456790123, | |
| "grad_norm": 0.0007958766655065119, | |
| "learning_rate": 5.2839506172839505e-06, | |
| "loss": 0.0767, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.6839506172839505, | |
| "grad_norm": 112.87112426757812, | |
| "learning_rate": 5.264197530864198e-06, | |
| "loss": 0.1402, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 3.688888888888889, | |
| "grad_norm": 41.893638610839844, | |
| "learning_rate": 5.244444444444445e-06, | |
| "loss": 0.1472, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 3.6938271604938273, | |
| "grad_norm": 3.585242748260498, | |
| "learning_rate": 5.224691358024691e-06, | |
| "loss": 0.0414, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 3.6987654320987655, | |
| "grad_norm": 69.6523208618164, | |
| "learning_rate": 5.2049382716049394e-06, | |
| "loss": 0.1479, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 0.9416589736938477, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.1811, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.708641975308642, | |
| "grad_norm": 193.36740112304688, | |
| "learning_rate": 5.165432098765433e-06, | |
| "loss": 0.1734, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 3.71358024691358, | |
| "grad_norm": 83.2663803100586, | |
| "learning_rate": 5.145679012345679e-06, | |
| "loss": 0.3664, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 3.7185185185185183, | |
| "grad_norm": 1.504310131072998, | |
| "learning_rate": 5.125925925925927e-06, | |
| "loss": 0.3391, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 3.723456790123457, | |
| "grad_norm": 63.3848876953125, | |
| "learning_rate": 5.106172839506173e-06, | |
| "loss": 0.2681, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 3.728395061728395, | |
| "grad_norm": 0.005675642751157284, | |
| "learning_rate": 5.08641975308642e-06, | |
| "loss": 0.193, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 0.013251741416752338, | |
| "learning_rate": 5.0666666666666676e-06, | |
| "loss": 0.1873, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 3.7382716049382716, | |
| "grad_norm": 0.0012360225664451718, | |
| "learning_rate": 5.046913580246914e-06, | |
| "loss": 0.2134, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 3.74320987654321, | |
| "grad_norm": 127.34367370605469, | |
| "learning_rate": 5.027160493827161e-06, | |
| "loss": 0.4686, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 3.748148148148148, | |
| "grad_norm": 0.01218173187226057, | |
| "learning_rate": 5.007407407407408e-06, | |
| "loss": 0.0103, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 3.753086419753086, | |
| "grad_norm": 0.03588619455695152, | |
| "learning_rate": 4.987654320987655e-06, | |
| "loss": 0.0478, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.758024691358025, | |
| "grad_norm": 126.76322937011719, | |
| "learning_rate": 4.967901234567902e-06, | |
| "loss": 0.1531, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 3.762962962962963, | |
| "grad_norm": 39.57160949707031, | |
| "learning_rate": 4.9481481481481485e-06, | |
| "loss": 0.0445, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 3.7679012345679013, | |
| "grad_norm": 0.4843272566795349, | |
| "learning_rate": 4.928395061728396e-06, | |
| "loss": 0.0298, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 3.7728395061728395, | |
| "grad_norm": 33.181583404541016, | |
| "learning_rate": 4.908641975308642e-06, | |
| "loss": 0.3563, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 3.7777777777777777, | |
| "grad_norm": 27.694658279418945, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 0.142, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.782716049382716, | |
| "grad_norm": 0.008271468803286552, | |
| "learning_rate": 4.869135802469136e-06, | |
| "loss": 0.1445, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 3.787654320987654, | |
| "grad_norm": 180.6202850341797, | |
| "learning_rate": 4.849382716049383e-06, | |
| "loss": 0.3204, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 3.7925925925925927, | |
| "grad_norm": 58.78599548339844, | |
| "learning_rate": 4.82962962962963e-06, | |
| "loss": 0.2717, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 3.797530864197531, | |
| "grad_norm": 48.85298538208008, | |
| "learning_rate": 4.8098765432098774e-06, | |
| "loss": 0.3752, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 3.802469135802469, | |
| "grad_norm": 119.5743637084961, | |
| "learning_rate": 4.790123456790124e-06, | |
| "loss": 0.266, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.8074074074074074, | |
| "grad_norm": 38.25589370727539, | |
| "learning_rate": 4.770370370370371e-06, | |
| "loss": 0.1581, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 3.8123456790123456, | |
| "grad_norm": 4.294593811035156, | |
| "learning_rate": 4.7506172839506175e-06, | |
| "loss": 0.0615, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 3.817283950617284, | |
| "grad_norm": 0.23868466913700104, | |
| "learning_rate": 4.730864197530865e-06, | |
| "loss": 0.2377, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 3.822222222222222, | |
| "grad_norm": 1.3772286176681519, | |
| "learning_rate": 4.711111111111111e-06, | |
| "loss": 0.1767, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 3.8271604938271606, | |
| "grad_norm": 0.004857083782553673, | |
| "learning_rate": 4.691358024691358e-06, | |
| "loss": 0.081, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.832098765432099, | |
| "grad_norm": 62.059326171875, | |
| "learning_rate": 4.6716049382716056e-06, | |
| "loss": 0.1362, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 3.837037037037037, | |
| "grad_norm": 0.022881271317601204, | |
| "learning_rate": 4.651851851851853e-06, | |
| "loss": 0.0713, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 3.8419753086419752, | |
| "grad_norm": 39.1450309753418, | |
| "learning_rate": 4.632098765432099e-06, | |
| "loss": 0.0745, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 3.8469135802469134, | |
| "grad_norm": 4.154773712158203, | |
| "learning_rate": 4.6123456790123464e-06, | |
| "loss": 0.1029, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 3.851851851851852, | |
| "grad_norm": 68.09147644042969, | |
| "learning_rate": 4.592592592592593e-06, | |
| "loss": 0.0558, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.8567901234567903, | |
| "grad_norm": 0.14514310657978058, | |
| "learning_rate": 4.57283950617284e-06, | |
| "loss": 0.0501, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 3.8617283950617285, | |
| "grad_norm": 1.0181536674499512, | |
| "learning_rate": 4.5530864197530865e-06, | |
| "loss": 0.0579, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 3.8666666666666667, | |
| "grad_norm": 141.15499877929688, | |
| "learning_rate": 4.533333333333334e-06, | |
| "loss": 0.2657, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 3.871604938271605, | |
| "grad_norm": 0.6955594420433044, | |
| "learning_rate": 4.513580246913581e-06, | |
| "loss": 0.2284, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 3.876543209876543, | |
| "grad_norm": 125.45293426513672, | |
| "learning_rate": 4.493827160493827e-06, | |
| "loss": 0.446, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 3.8814814814814813, | |
| "grad_norm": 0.0857425257563591, | |
| "learning_rate": 4.4740740740740746e-06, | |
| "loss": 0.0597, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 3.88641975308642, | |
| "grad_norm": 44.19774627685547, | |
| "learning_rate": 4.454320987654322e-06, | |
| "loss": 0.2066, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 3.891358024691358, | |
| "grad_norm": 61.00041580200195, | |
| "learning_rate": 4.434567901234568e-06, | |
| "loss": 0.1439, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 3.8962962962962964, | |
| "grad_norm": 0.8123835325241089, | |
| "learning_rate": 4.4148148148148154e-06, | |
| "loss": 0.2655, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 3.9012345679012346, | |
| "grad_norm": 0.0009880654979497194, | |
| "learning_rate": 4.395061728395062e-06, | |
| "loss": 0.1153, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.906172839506173, | |
| "grad_norm": 0.0027614731807261705, | |
| "learning_rate": 4.375308641975309e-06, | |
| "loss": 0.0693, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 3.911111111111111, | |
| "grad_norm": 99.65026092529297, | |
| "learning_rate": 4.3555555555555555e-06, | |
| "loss": 0.3998, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 3.916049382716049, | |
| "grad_norm": 17.23603057861328, | |
| "learning_rate": 4.335802469135803e-06, | |
| "loss": 0.2811, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 3.920987654320988, | |
| "grad_norm": 13.379603385925293, | |
| "learning_rate": 4.31604938271605e-06, | |
| "loss": 0.0989, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 3.925925925925926, | |
| "grad_norm": 0.12741827964782715, | |
| "learning_rate": 4.296296296296296e-06, | |
| "loss": 0.0431, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 3.9308641975308642, | |
| "grad_norm": 164.3784637451172, | |
| "learning_rate": 4.2765432098765436e-06, | |
| "loss": 0.3376, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 3.9358024691358025, | |
| "grad_norm": 0.002450704574584961, | |
| "learning_rate": 4.256790123456791e-06, | |
| "loss": 0.039, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 3.9407407407407407, | |
| "grad_norm": 9.37784194946289, | |
| "learning_rate": 4.237037037037037e-06, | |
| "loss": 0.3296, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 3.945679012345679, | |
| "grad_norm": 0.9755693078041077, | |
| "learning_rate": 4.2172839506172844e-06, | |
| "loss": 0.2798, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 3.950617283950617, | |
| "grad_norm": 17.373695373535156, | |
| "learning_rate": 4.197530864197531e-06, | |
| "loss": 0.044, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.9555555555555557, | |
| "grad_norm": 40.896148681640625, | |
| "learning_rate": 4.177777777777778e-06, | |
| "loss": 0.1641, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 3.960493827160494, | |
| "grad_norm": 7.210272312164307, | |
| "learning_rate": 4.158024691358025e-06, | |
| "loss": 0.0641, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 3.965432098765432, | |
| "grad_norm": 0.3746698498725891, | |
| "learning_rate": 4.138271604938272e-06, | |
| "loss": 0.1236, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 3.9703703703703703, | |
| "grad_norm": 2.9503226280212402, | |
| "learning_rate": 4.118518518518519e-06, | |
| "loss": 0.0634, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 3.9753086419753085, | |
| "grad_norm": 1.2919955253601074, | |
| "learning_rate": 4.098765432098766e-06, | |
| "loss": 0.0069, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 3.980246913580247, | |
| "grad_norm": 0.08173320442438126, | |
| "learning_rate": 4.0790123456790126e-06, | |
| "loss": 0.1177, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 3.985185185185185, | |
| "grad_norm": 0.10468322783708572, | |
| "learning_rate": 4.05925925925926e-06, | |
| "loss": 0.0602, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 3.9901234567901236, | |
| "grad_norm": 0.1967976987361908, | |
| "learning_rate": 4.039506172839506e-06, | |
| "loss": 0.1996, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 3.995061728395062, | |
| "grad_norm": 16.828914642333984, | |
| "learning_rate": 4.0197530864197534e-06, | |
| "loss": 0.0063, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 142.36537170410156, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.2674, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9833333333333333, | |
| "eval_loss": 0.06853805482387543, | |
| "eval_runtime": 32.7103, | |
| "eval_samples_per_second": 165.086, | |
| "eval_steps_per_second": 20.636, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.004938271604939, | |
| "grad_norm": 0.028582552447915077, | |
| "learning_rate": 3.980246913580247e-06, | |
| "loss": 0.3409, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 4.009876543209876, | |
| "grad_norm": 0.12553012371063232, | |
| "learning_rate": 3.960493827160494e-06, | |
| "loss": 0.1076, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 4.014814814814815, | |
| "grad_norm": 0.08727646619081497, | |
| "learning_rate": 3.940740740740741e-06, | |
| "loss": 0.2658, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 4.019753086419753, | |
| "grad_norm": 40.70219802856445, | |
| "learning_rate": 3.920987654320988e-06, | |
| "loss": 0.1109, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 4.0246913580246915, | |
| "grad_norm": 0.04967527464032173, | |
| "learning_rate": 3.901234567901235e-06, | |
| "loss": 0.2816, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 4.029629629629629, | |
| "grad_norm": 4.632954120635986, | |
| "learning_rate": 3.8814814814814816e-06, | |
| "loss": 0.0101, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 4.034567901234568, | |
| "grad_norm": 11.988831520080566, | |
| "learning_rate": 3.861728395061729e-06, | |
| "loss": 0.1071, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 4.0395061728395065, | |
| "grad_norm": 0.002083718776702881, | |
| "learning_rate": 3.841975308641976e-06, | |
| "loss": 0.3421, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 4.044444444444444, | |
| "grad_norm": 7.259564399719238, | |
| "learning_rate": 3.8222222222222224e-06, | |
| "loss": 0.0545, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 4.049382716049383, | |
| "grad_norm": 0.12477586418390274, | |
| "learning_rate": 3.8024691358024697e-06, | |
| "loss": 0.056, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.054320987654321, | |
| "grad_norm": 131.77743530273438, | |
| "learning_rate": 3.7827160493827165e-06, | |
| "loss": 0.1617, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 4.059259259259259, | |
| "grad_norm": 0.1798364818096161, | |
| "learning_rate": 3.7629629629629633e-06, | |
| "loss": 0.0063, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 4.064197530864197, | |
| "grad_norm": 111.68184661865234, | |
| "learning_rate": 3.74320987654321e-06, | |
| "loss": 0.071, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 4.069135802469136, | |
| "grad_norm": 75.00855255126953, | |
| "learning_rate": 3.723456790123457e-06, | |
| "loss": 0.4207, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 4.074074074074074, | |
| "grad_norm": 0.0791148990392685, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.0377, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 4.079012345679012, | |
| "grad_norm": 123.85789489746094, | |
| "learning_rate": 3.6839506172839506e-06, | |
| "loss": 0.282, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 4.083950617283951, | |
| "grad_norm": 0.0917818695306778, | |
| "learning_rate": 3.6641975308641982e-06, | |
| "loss": 0.2107, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 4.088888888888889, | |
| "grad_norm": 93.7401123046875, | |
| "learning_rate": 3.644444444444445e-06, | |
| "loss": 0.4766, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 4.093827160493827, | |
| "grad_norm": 4.973775863647461, | |
| "learning_rate": 3.624691358024692e-06, | |
| "loss": 0.1966, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 4.098765432098766, | |
| "grad_norm": 13.099119186401367, | |
| "learning_rate": 3.6049382716049387e-06, | |
| "loss": 0.0284, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.103703703703704, | |
| "grad_norm": 0.14128296077251434, | |
| "learning_rate": 3.5851851851851855e-06, | |
| "loss": 0.3451, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 4.108641975308642, | |
| "grad_norm": 19.09874153137207, | |
| "learning_rate": 3.5654320987654323e-06, | |
| "loss": 0.3137, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 4.11358024691358, | |
| "grad_norm": 33.85554504394531, | |
| "learning_rate": 3.545679012345679e-06, | |
| "loss": 0.1776, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 4.118518518518519, | |
| "grad_norm": 0.02345465123653412, | |
| "learning_rate": 3.525925925925926e-06, | |
| "loss": 0.2006, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 4.1234567901234565, | |
| "grad_norm": 90.08519744873047, | |
| "learning_rate": 3.5061728395061736e-06, | |
| "loss": 0.2977, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 4.128395061728395, | |
| "grad_norm": 41.20042037963867, | |
| "learning_rate": 3.4864197530864204e-06, | |
| "loss": 0.2238, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 4.133333333333334, | |
| "grad_norm": 1.0883228778839111, | |
| "learning_rate": 3.4666666666666672e-06, | |
| "loss": 0.0693, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 4.1382716049382715, | |
| "grad_norm": 0.03349454700946808, | |
| "learning_rate": 3.446913580246914e-06, | |
| "loss": 0.1569, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 4.14320987654321, | |
| "grad_norm": 18.927202224731445, | |
| "learning_rate": 3.427160493827161e-06, | |
| "loss": 0.2259, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 4.148148148148148, | |
| "grad_norm": 41.818538665771484, | |
| "learning_rate": 3.4074074074074077e-06, | |
| "loss": 0.2041, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.153086419753087, | |
| "grad_norm": 0.26372233033180237, | |
| "learning_rate": 3.3876543209876545e-06, | |
| "loss": 0.1225, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 4.158024691358024, | |
| "grad_norm": 45.54108810424805, | |
| "learning_rate": 3.3679012345679013e-06, | |
| "loss": 0.2084, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 4.162962962962963, | |
| "grad_norm": 0.014255751855671406, | |
| "learning_rate": 3.348148148148148e-06, | |
| "loss": 0.0153, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 4.167901234567902, | |
| "grad_norm": 0.8963614106178284, | |
| "learning_rate": 3.3283950617283953e-06, | |
| "loss": 0.0802, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 4.172839506172839, | |
| "grad_norm": 32.044166564941406, | |
| "learning_rate": 3.3086419753086426e-06, | |
| "loss": 0.1971, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 4.177777777777778, | |
| "grad_norm": 0.006651794072240591, | |
| "learning_rate": 3.2888888888888894e-06, | |
| "loss": 0.0366, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 4.182716049382716, | |
| "grad_norm": 1.5995298624038696, | |
| "learning_rate": 3.2691358024691362e-06, | |
| "loss": 0.2041, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 4.187654320987654, | |
| "grad_norm": 0.07189402729272842, | |
| "learning_rate": 3.249382716049383e-06, | |
| "loss": 0.1008, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 4.192592592592592, | |
| "grad_norm": 0.014369451440870762, | |
| "learning_rate": 3.22962962962963e-06, | |
| "loss": 0.1384, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 4.197530864197531, | |
| "grad_norm": 2.7586021423339844, | |
| "learning_rate": 3.2098765432098767e-06, | |
| "loss": 0.1149, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.2024691358024695, | |
| "grad_norm": 0.25027868151664734, | |
| "learning_rate": 3.1901234567901235e-06, | |
| "loss": 0.1085, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 4.207407407407407, | |
| "grad_norm": 21.993419647216797, | |
| "learning_rate": 3.1703703703703707e-06, | |
| "loss": 0.1086, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 4.212345679012346, | |
| "grad_norm": 108.14185333251953, | |
| "learning_rate": 3.1506172839506175e-06, | |
| "loss": 0.274, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 4.217283950617284, | |
| "grad_norm": 0.006499402225017548, | |
| "learning_rate": 3.1308641975308648e-06, | |
| "loss": 0.1101, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 4.222222222222222, | |
| "grad_norm": 25.40144920349121, | |
| "learning_rate": 3.1111111111111116e-06, | |
| "loss": 0.3034, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 4.22716049382716, | |
| "grad_norm": 0.04093475639820099, | |
| "learning_rate": 3.0913580246913584e-06, | |
| "loss": 0.1373, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 4.232098765432099, | |
| "grad_norm": 0.3943523168563843, | |
| "learning_rate": 3.0716049382716052e-06, | |
| "loss": 0.1059, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 4.237037037037037, | |
| "grad_norm": 34.58479309082031, | |
| "learning_rate": 3.051851851851852e-06, | |
| "loss": 0.1032, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 4.241975308641975, | |
| "grad_norm": 79.955810546875, | |
| "learning_rate": 3.032098765432099e-06, | |
| "loss": 0.1232, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 4.246913580246914, | |
| "grad_norm": 47.233482360839844, | |
| "learning_rate": 3.012345679012346e-06, | |
| "loss": 0.1098, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 4.2518518518518515, | |
| "grad_norm": 138.7650909423828, | |
| "learning_rate": 2.992592592592593e-06, | |
| "loss": 0.1554, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 4.25679012345679, | |
| "grad_norm": 34.47438430786133, | |
| "learning_rate": 2.9728395061728397e-06, | |
| "loss": 0.1909, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 4.261728395061729, | |
| "grad_norm": 0.10936783254146576, | |
| "learning_rate": 2.953086419753087e-06, | |
| "loss": 0.1279, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 66.3951416015625, | |
| "learning_rate": 2.9333333333333338e-06, | |
| "loss": 0.4878, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 4.271604938271605, | |
| "grad_norm": 0.7240855097770691, | |
| "learning_rate": 2.9135802469135806e-06, | |
| "loss": 0.171, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 4.276543209876543, | |
| "grad_norm": 84.10567474365234, | |
| "learning_rate": 2.8938271604938274e-06, | |
| "loss": 0.265, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 4.281481481481482, | |
| "grad_norm": 0.03191656991839409, | |
| "learning_rate": 2.874074074074074e-06, | |
| "loss": 0.3997, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 4.286419753086419, | |
| "grad_norm": 0.05699067562818527, | |
| "learning_rate": 2.854320987654321e-06, | |
| "loss": 0.0334, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 4.291358024691358, | |
| "grad_norm": 0.03787963092327118, | |
| "learning_rate": 2.8345679012345683e-06, | |
| "loss": 0.0026, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 4.296296296296296, | |
| "grad_norm": 0.32715028524398804, | |
| "learning_rate": 2.814814814814815e-06, | |
| "loss": 0.0851, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 4.3012345679012345, | |
| "grad_norm": 1.704313039779663, | |
| "learning_rate": 2.795061728395062e-06, | |
| "loss": 0.2827, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 4.306172839506173, | |
| "grad_norm": 35.010746002197266, | |
| "learning_rate": 2.7753086419753087e-06, | |
| "loss": 0.307, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 4.311111111111111, | |
| "grad_norm": 50.50590133666992, | |
| "learning_rate": 2.755555555555556e-06, | |
| "loss": 0.1594, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 4.3160493827160495, | |
| "grad_norm": 31.76420783996582, | |
| "learning_rate": 2.7358024691358028e-06, | |
| "loss": 0.1536, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 4.320987654320987, | |
| "grad_norm": 0.11124283820390701, | |
| "learning_rate": 2.7160493827160496e-06, | |
| "loss": 0.1278, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 4.325925925925926, | |
| "grad_norm": 29.00436019897461, | |
| "learning_rate": 2.6962962962962964e-06, | |
| "loss": 0.0417, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 4.330864197530865, | |
| "grad_norm": 0.002402759389951825, | |
| "learning_rate": 2.6765432098765436e-06, | |
| "loss": 0.077, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 4.335802469135802, | |
| "grad_norm": 5.55736780166626, | |
| "learning_rate": 2.6567901234567904e-06, | |
| "loss": 0.1247, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 4.340740740740741, | |
| "grad_norm": 0.024351775646209717, | |
| "learning_rate": 2.6370370370370373e-06, | |
| "loss": 0.1003, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 4.345679012345679, | |
| "grad_norm": 0.009600900113582611, | |
| "learning_rate": 2.617283950617284e-06, | |
| "loss": 0.1143, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 4.350617283950617, | |
| "grad_norm": 0.001896082772873342, | |
| "learning_rate": 2.597530864197531e-06, | |
| "loss": 0.0972, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 4.355555555555555, | |
| "grad_norm": 0.0376252606511116, | |
| "learning_rate": 2.577777777777778e-06, | |
| "loss": 0.1537, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 4.360493827160494, | |
| "grad_norm": 0.010516272857785225, | |
| "learning_rate": 2.558024691358025e-06, | |
| "loss": 0.0149, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 4.3654320987654325, | |
| "grad_norm": 30.120134353637695, | |
| "learning_rate": 2.5382716049382718e-06, | |
| "loss": 0.0042, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 4.37037037037037, | |
| "grad_norm": 0.48482951521873474, | |
| "learning_rate": 2.5185185185185186e-06, | |
| "loss": 0.1258, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 4.375308641975309, | |
| "grad_norm": 9.926421165466309, | |
| "learning_rate": 2.4987654320987654e-06, | |
| "loss": 0.1866, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 4.380246913580247, | |
| "grad_norm": 0.024937864392995834, | |
| "learning_rate": 2.4790123456790126e-06, | |
| "loss": 0.0231, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 4.385185185185185, | |
| "grad_norm": 0.40552499890327454, | |
| "learning_rate": 2.4592592592592594e-06, | |
| "loss": 0.0423, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 4.390123456790123, | |
| "grad_norm": 1.134421944618225, | |
| "learning_rate": 2.4395061728395063e-06, | |
| "loss": 0.1767, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 4.395061728395062, | |
| "grad_norm": 0.06691499054431915, | |
| "learning_rate": 2.419753086419753e-06, | |
| "loss": 0.2377, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 1.1887983083724976, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.1737, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 4.404938271604938, | |
| "grad_norm": 1.4004325866699219, | |
| "learning_rate": 2.380246913580247e-06, | |
| "loss": 0.162, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 4.409876543209877, | |
| "grad_norm": 5.580018520355225, | |
| "learning_rate": 2.360493827160494e-06, | |
| "loss": 0.251, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 4.4148148148148145, | |
| "grad_norm": 0.007224132306873798, | |
| "learning_rate": 2.3407407407407408e-06, | |
| "loss": 0.1454, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 4.419753086419753, | |
| "grad_norm": 154.13819885253906, | |
| "learning_rate": 2.3209876543209876e-06, | |
| "loss": 0.3889, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 4.424691358024692, | |
| "grad_norm": 32.98945236206055, | |
| "learning_rate": 2.301234567901235e-06, | |
| "loss": 0.2466, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 4.42962962962963, | |
| "grad_norm": 0.0013707876205444336, | |
| "learning_rate": 2.2814814814814816e-06, | |
| "loss": 0.2529, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 4.434567901234568, | |
| "grad_norm": 80.57937622070312, | |
| "learning_rate": 2.2617283950617284e-06, | |
| "loss": 0.1712, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 4.439506172839506, | |
| "grad_norm": 129.87698364257812, | |
| "learning_rate": 2.2419753086419753e-06, | |
| "loss": 0.1409, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 61.0521354675293, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.1277, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.449382716049382, | |
| "grad_norm": 0.05561920627951622, | |
| "learning_rate": 2.2024691358024693e-06, | |
| "loss": 0.1921, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 4.454320987654321, | |
| "grad_norm": 0.02089673839509487, | |
| "learning_rate": 2.182716049382716e-06, | |
| "loss": 0.0877, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 4.459259259259259, | |
| "grad_norm": 0.0033945185132324696, | |
| "learning_rate": 2.162962962962963e-06, | |
| "loss": 0.1127, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 4.4641975308641975, | |
| "grad_norm": 0.00884201843291521, | |
| "learning_rate": 2.1432098765432098e-06, | |
| "loss": 0.1677, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 4.469135802469136, | |
| "grad_norm": 16.309391021728516, | |
| "learning_rate": 2.123456790123457e-06, | |
| "loss": 0.1119, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 4.474074074074074, | |
| "grad_norm": 0.035716574639081955, | |
| "learning_rate": 2.103703703703704e-06, | |
| "loss": 0.068, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 4.4790123456790125, | |
| "grad_norm": 0.009720105677843094, | |
| "learning_rate": 2.0839506172839506e-06, | |
| "loss": 0.0933, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 4.48395061728395, | |
| "grad_norm": 0.2953310012817383, | |
| "learning_rate": 2.0641975308641974e-06, | |
| "loss": 0.0775, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 4.488888888888889, | |
| "grad_norm": 4.523210525512695, | |
| "learning_rate": 2.0444444444444447e-06, | |
| "loss": 0.2808, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 4.493827160493828, | |
| "grad_norm": 2.265265464782715, | |
| "learning_rate": 2.0246913580246915e-06, | |
| "loss": 0.0274, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 4.498765432098765, | |
| "grad_norm": 2.9944541454315186, | |
| "learning_rate": 2.0049382716049383e-06, | |
| "loss": 0.1563, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 4.503703703703704, | |
| "grad_norm": 15.32995891571045, | |
| "learning_rate": 1.985185185185185e-06, | |
| "loss": 0.0304, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 4.508641975308642, | |
| "grad_norm": 124.7613754272461, | |
| "learning_rate": 1.9654320987654324e-06, | |
| "loss": 0.2997, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 4.51358024691358, | |
| "grad_norm": 0.20713317394256592, | |
| "learning_rate": 1.945679012345679e-06, | |
| "loss": 0.1026, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 4.518518518518518, | |
| "grad_norm": 38.10224533081055, | |
| "learning_rate": 1.925925925925926e-06, | |
| "loss": 0.0983, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 4.523456790123457, | |
| "grad_norm": 0.042433250695466995, | |
| "learning_rate": 1.906172839506173e-06, | |
| "loss": 0.0291, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 4.528395061728395, | |
| "grad_norm": 3.1156327724456787, | |
| "learning_rate": 1.8864197530864198e-06, | |
| "loss": 0.0577, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 0.026819046586751938, | |
| "learning_rate": 1.8666666666666669e-06, | |
| "loss": 0.1211, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 4.538271604938272, | |
| "grad_norm": 0.4800088107585907, | |
| "learning_rate": 1.8469135802469137e-06, | |
| "loss": 0.0023, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 4.54320987654321, | |
| "grad_norm": 0.050341859459877014, | |
| "learning_rate": 1.8271604938271605e-06, | |
| "loss": 0.036, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.548148148148148, | |
| "grad_norm": 0.11272630095481873, | |
| "learning_rate": 1.8074074074074075e-06, | |
| "loss": 0.0335, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 4.553086419753086, | |
| "grad_norm": 44.774688720703125, | |
| "learning_rate": 1.7876543209876545e-06, | |
| "loss": 0.1142, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 4.558024691358025, | |
| "grad_norm": 0.0022994689643383026, | |
| "learning_rate": 1.7679012345679014e-06, | |
| "loss": 0.0641, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 4.562962962962963, | |
| "grad_norm": 0.9468904733657837, | |
| "learning_rate": 1.7481481481481482e-06, | |
| "loss": 0.1574, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 4.567901234567901, | |
| "grad_norm": 0.022345565259456635, | |
| "learning_rate": 1.7283950617283952e-06, | |
| "loss": 0.1025, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.57283950617284, | |
| "grad_norm": 12.888065338134766, | |
| "learning_rate": 1.7086419753086422e-06, | |
| "loss": 0.1864, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 4.5777777777777775, | |
| "grad_norm": 94.58697509765625, | |
| "learning_rate": 1.688888888888889e-06, | |
| "loss": 0.1861, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 4.582716049382716, | |
| "grad_norm": 66.434326171875, | |
| "learning_rate": 1.6691358024691359e-06, | |
| "loss": 0.0646, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 4.587654320987655, | |
| "grad_norm": 0.005768710281699896, | |
| "learning_rate": 1.6493827160493827e-06, | |
| "loss": 0.1047, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 4.592592592592593, | |
| "grad_norm": 0.08475484699010849, | |
| "learning_rate": 1.62962962962963e-06, | |
| "loss": 0.1706, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.597530864197531, | |
| "grad_norm": 0.871222972869873, | |
| "learning_rate": 1.6098765432098767e-06, | |
| "loss": 0.0384, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 4.602469135802469, | |
| "grad_norm": 35.023040771484375, | |
| "learning_rate": 1.5901234567901235e-06, | |
| "loss": 0.1562, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 4.607407407407408, | |
| "grad_norm": 0.08310205489397049, | |
| "learning_rate": 1.5703703703703704e-06, | |
| "loss": 0.1636, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 4.612345679012345, | |
| "grad_norm": 0.008625690825283527, | |
| "learning_rate": 1.5506172839506172e-06, | |
| "loss": 0.1299, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 4.617283950617284, | |
| "grad_norm": 0.07079397141933441, | |
| "learning_rate": 1.5308641975308644e-06, | |
| "loss": 0.2401, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 4.622222222222222, | |
| "grad_norm": 0.002696413081139326, | |
| "learning_rate": 1.5111111111111112e-06, | |
| "loss": 0.1377, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 4.62716049382716, | |
| "grad_norm": 52.69441604614258, | |
| "learning_rate": 1.491358024691358e-06, | |
| "loss": 0.3121, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 4.632098765432099, | |
| "grad_norm": 192.6532745361328, | |
| "learning_rate": 1.4716049382716049e-06, | |
| "loss": 0.0441, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 4.637037037037037, | |
| "grad_norm": 249.43846130371094, | |
| "learning_rate": 1.451851851851852e-06, | |
| "loss": 0.299, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 4.6419753086419755, | |
| "grad_norm": 0.05828845128417015, | |
| "learning_rate": 1.432098765432099e-06, | |
| "loss": 0.0683, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.646913580246913, | |
| "grad_norm": 176.3085174560547, | |
| "learning_rate": 1.4123456790123457e-06, | |
| "loss": 0.0396, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 4.651851851851852, | |
| "grad_norm": 3.0951056480407715, | |
| "learning_rate": 1.3925925925925925e-06, | |
| "loss": 0.0874, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 4.6567901234567906, | |
| "grad_norm": 1.2149375677108765, | |
| "learning_rate": 1.3728395061728398e-06, | |
| "loss": 0.1504, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 4.661728395061728, | |
| "grad_norm": 0.05385606735944748, | |
| "learning_rate": 1.3530864197530866e-06, | |
| "loss": 0.0918, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 11.512873649597168, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0947, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 4.671604938271605, | |
| "grad_norm": 0.024780087172985077, | |
| "learning_rate": 1.3135802469135802e-06, | |
| "loss": 0.0753, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 4.676543209876543, | |
| "grad_norm": 0.2996337115764618, | |
| "learning_rate": 1.2938271604938275e-06, | |
| "loss": 0.171, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 4.681481481481481, | |
| "grad_norm": 0.09016973525285721, | |
| "learning_rate": 1.2740740740740743e-06, | |
| "loss": 0.0803, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 4.68641975308642, | |
| "grad_norm": 0.24141840636730194, | |
| "learning_rate": 1.254320987654321e-06, | |
| "loss": 0.1636, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 4.6913580246913575, | |
| "grad_norm": 0.0026981073897331953, | |
| "learning_rate": 1.234567901234568e-06, | |
| "loss": 0.1209, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.696296296296296, | |
| "grad_norm": 0.0028422910254448652, | |
| "learning_rate": 1.214814814814815e-06, | |
| "loss": 0.0334, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 4.701234567901235, | |
| "grad_norm": 100.68513488769531, | |
| "learning_rate": 1.1950617283950618e-06, | |
| "loss": 0.3581, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 4.706172839506173, | |
| "grad_norm": 0.001111358986236155, | |
| "learning_rate": 1.1753086419753088e-06, | |
| "loss": 0.0474, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 4.711111111111111, | |
| "grad_norm": 60.36039733886719, | |
| "learning_rate": 1.1555555555555556e-06, | |
| "loss": 0.4299, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 4.716049382716049, | |
| "grad_norm": 0.0019079376943409443, | |
| "learning_rate": 1.1358024691358026e-06, | |
| "loss": 0.0945, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 4.720987654320988, | |
| "grad_norm": 0.46460771560668945, | |
| "learning_rate": 1.1160493827160494e-06, | |
| "loss": 0.312, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 4.725925925925926, | |
| "grad_norm": 1.906554937362671, | |
| "learning_rate": 1.0962962962962965e-06, | |
| "loss": 0.0951, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 4.730864197530864, | |
| "grad_norm": 1.5617965459823608, | |
| "learning_rate": 1.0765432098765433e-06, | |
| "loss": 0.1714, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 4.735802469135803, | |
| "grad_norm": 5.5619893074035645, | |
| "learning_rate": 1.0567901234567903e-06, | |
| "loss": 0.008, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 4.7407407407407405, | |
| "grad_norm": 0.01501123234629631, | |
| "learning_rate": 1.0370370370370371e-06, | |
| "loss": 0.4485, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.745679012345679, | |
| "grad_norm": 22.644359588623047, | |
| "learning_rate": 1.0172839506172842e-06, | |
| "loss": 0.0708, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 4.750617283950618, | |
| "grad_norm": 0.0668986439704895, | |
| "learning_rate": 9.97530864197531e-07, | |
| "loss": 0.2169, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 4.7555555555555555, | |
| "grad_norm": 0.5103172063827515, | |
| "learning_rate": 9.77777777777778e-07, | |
| "loss": 0.1709, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 4.760493827160494, | |
| "grad_norm": 63.763214111328125, | |
| "learning_rate": 9.580246913580248e-07, | |
| "loss": 0.3668, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 4.765432098765432, | |
| "grad_norm": 0.013139153830707073, | |
| "learning_rate": 9.382716049382717e-07, | |
| "loss": 0.0545, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 4.770370370370371, | |
| "grad_norm": 0.009220450185239315, | |
| "learning_rate": 9.185185185185185e-07, | |
| "loss": 0.1341, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 4.775308641975308, | |
| "grad_norm": 0.03191829100251198, | |
| "learning_rate": 8.987654320987656e-07, | |
| "loss": 0.1266, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 4.780246913580247, | |
| "grad_norm": 37.74824523925781, | |
| "learning_rate": 8.790123456790124e-07, | |
| "loss": 0.1043, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 4.785185185185185, | |
| "grad_norm": 0.002283359644934535, | |
| "learning_rate": 8.592592592592593e-07, | |
| "loss": 0.033, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 4.790123456790123, | |
| "grad_norm": 0.457742840051651, | |
| "learning_rate": 8.395061728395062e-07, | |
| "loss": 0.1186, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.795061728395062, | |
| "grad_norm": 0.031063128262758255, | |
| "learning_rate": 8.197530864197531e-07, | |
| "loss": 0.1125, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.012924841605126858, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.0156, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 4.8049382716049385, | |
| "grad_norm": 0.11566291004419327, | |
| "learning_rate": 7.802469135802469e-07, | |
| "loss": 0.1286, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 4.809876543209876, | |
| "grad_norm": 0.0004868748364970088, | |
| "learning_rate": 7.604938271604939e-07, | |
| "loss": 0.0012, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 4.814814814814815, | |
| "grad_norm": 81.78207397460938, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 0.1942, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 4.8197530864197535, | |
| "grad_norm": 35.868988037109375, | |
| "learning_rate": 7.209876543209878e-07, | |
| "loss": 0.0298, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 4.824691358024691, | |
| "grad_norm": 147.98873901367188, | |
| "learning_rate": 7.012345679012346e-07, | |
| "loss": 0.2483, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 4.82962962962963, | |
| "grad_norm": 0.013545212335884571, | |
| "learning_rate": 6.814814814814816e-07, | |
| "loss": 0.1292, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 4.834567901234568, | |
| "grad_norm": 0.09124937653541565, | |
| "learning_rate": 6.617283950617284e-07, | |
| "loss": 0.0697, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 4.839506172839506, | |
| "grad_norm": 0.005743750836700201, | |
| "learning_rate": 6.419753086419754e-07, | |
| "loss": 0.0975, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.844444444444444, | |
| "grad_norm": 60.90267562866211, | |
| "learning_rate": 6.222222222222223e-07, | |
| "loss": 0.0291, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 4.849382716049383, | |
| "grad_norm": 0.005148892290890217, | |
| "learning_rate": 6.024691358024692e-07, | |
| "loss": 0.0886, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 4.8543209876543205, | |
| "grad_norm": 134.9575958251953, | |
| "learning_rate": 5.827160493827161e-07, | |
| "loss": 0.2035, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 4.859259259259259, | |
| "grad_norm": 3.4503517150878906, | |
| "learning_rate": 5.62962962962963e-07, | |
| "loss": 0.1403, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 4.864197530864198, | |
| "grad_norm": 0.5870628356933594, | |
| "learning_rate": 5.432098765432099e-07, | |
| "loss": 0.0645, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 4.869135802469136, | |
| "grad_norm": 2.804311513900757, | |
| "learning_rate": 5.234567901234569e-07, | |
| "loss": 0.0234, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 4.874074074074074, | |
| "grad_norm": 0.07958123087882996, | |
| "learning_rate": 5.037037037037038e-07, | |
| "loss": 0.0569, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 4.879012345679012, | |
| "grad_norm": 0.013184885494410992, | |
| "learning_rate": 4.839506172839507e-07, | |
| "loss": 0.0514, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 4.883950617283951, | |
| "grad_norm": 0.04747697710990906, | |
| "learning_rate": 4.6419753086419757e-07, | |
| "loss": 0.0002, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 4.888888888888889, | |
| "grad_norm": 3.1284358501434326, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 0.0066, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 4.893827160493827, | |
| "grad_norm": 0.6298085451126099, | |
| "learning_rate": 4.246913580246914e-07, | |
| "loss": 0.0583, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 4.898765432098766, | |
| "grad_norm": 0.012326150201261044, | |
| "learning_rate": 4.0493827160493833e-07, | |
| "loss": 0.0099, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 4.9037037037037035, | |
| "grad_norm": 2.6905531883239746, | |
| "learning_rate": 3.8518518518518525e-07, | |
| "loss": 0.1259, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 4.908641975308642, | |
| "grad_norm": 120.47846221923828, | |
| "learning_rate": 3.6543209876543217e-07, | |
| "loss": 0.1349, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 4.91358024691358, | |
| "grad_norm": 0.0025870108511298895, | |
| "learning_rate": 3.45679012345679e-07, | |
| "loss": 0.1368, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 4.9185185185185185, | |
| "grad_norm": 0.8233745694160461, | |
| "learning_rate": 3.259259259259259e-07, | |
| "loss": 0.1123, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 4.923456790123457, | |
| "grad_norm": 0.0019518863409757614, | |
| "learning_rate": 3.061728395061729e-07, | |
| "loss": 0.285, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 4.928395061728395, | |
| "grad_norm": 0.3376046121120453, | |
| "learning_rate": 2.864197530864198e-07, | |
| "loss": 0.4414, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 4.933333333333334, | |
| "grad_norm": 0.006334675010293722, | |
| "learning_rate": 2.666666666666667e-07, | |
| "loss": 0.0841, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 4.938271604938271, | |
| "grad_norm": 0.002394834766164422, | |
| "learning_rate": 2.469135802469136e-07, | |
| "loss": 0.0574, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.94320987654321, | |
| "grad_norm": 0.0032636672258377075, | |
| "learning_rate": 2.2716049382716051e-07, | |
| "loss": 0.2294, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 4.948148148148148, | |
| "grad_norm": 113.65235137939453, | |
| "learning_rate": 2.074074074074074e-07, | |
| "loss": 0.1907, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 4.953086419753086, | |
| "grad_norm": 0.006610922981053591, | |
| "learning_rate": 1.8765432098765433e-07, | |
| "loss": 0.2999, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 4.958024691358025, | |
| "grad_norm": 21.574785232543945, | |
| "learning_rate": 1.6790123456790125e-07, | |
| "loss": 0.1753, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 4.962962962962963, | |
| "grad_norm": 0.019113583490252495, | |
| "learning_rate": 1.4814814814814817e-07, | |
| "loss": 0.1539, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 4.9679012345679014, | |
| "grad_norm": 142.03480529785156, | |
| "learning_rate": 1.2839506172839507e-07, | |
| "loss": 0.201, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 4.972839506172839, | |
| "grad_norm": 0.005930395796895027, | |
| "learning_rate": 1.0864197530864197e-07, | |
| "loss": 0.3736, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 4.977777777777778, | |
| "grad_norm": 0.011048276908695698, | |
| "learning_rate": 8.88888888888889e-08, | |
| "loss": 0.1982, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 4.9827160493827165, | |
| "grad_norm": 0.11679836362600327, | |
| "learning_rate": 6.913580246913582e-08, | |
| "loss": 0.2382, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 4.987654320987654, | |
| "grad_norm": 114.29679870605469, | |
| "learning_rate": 4.938271604938272e-08, | |
| "loss": 0.5543, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 4.992592592592593, | |
| "grad_norm": 0.07527362555265427, | |
| "learning_rate": 2.9629629629629632e-08, | |
| "loss": 0.0568, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 4.997530864197531, | |
| "grad_norm": 1.4482346773147583, | |
| "learning_rate": 9.876543209876544e-09, | |
| "loss": 0.2086, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9862962962962963, | |
| "eval_loss": 0.060996126383543015, | |
| "eval_runtime": 32.7337, | |
| "eval_samples_per_second": 164.968, | |
| "eval_steps_per_second": 20.621, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 10125, | |
| "total_flos": 2.013785167306752e+18, | |
| "train_loss": 0.2160879238260289, | |
| "train_runtime": 1485.1852, | |
| "train_samples_per_second": 54.539, | |
| "train_steps_per_second": 6.817 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10125, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.013785167306752e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |