| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.4995098039215686, | |
| "eval_steps": 500, | |
| "global_step": 5099, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004901960784313725, | |
| "grad_norm": 4.5476274490356445, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.6369, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00980392156862745, | |
| "grad_norm": 2.52302885055542, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 0.484, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014705882352941176, | |
| "grad_norm": 3.1443543434143066, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 0.3252, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0196078431372549, | |
| "grad_norm": 2.1440389156341553, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 0.2779, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024509803921568627, | |
| "grad_norm": 1.7569645643234253, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 0.2387, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 1.7137173414230347, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.2146, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03431372549019608, | |
| "grad_norm": 1.0686582326889038, | |
| "learning_rate": 5.490196078431373e-05, | |
| "loss": 0.1638, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 1.834192156791687, | |
| "learning_rate": 6.274509803921569e-05, | |
| "loss": 0.1594, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04411764705882353, | |
| "grad_norm": 2.2320666313171387, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 0.156, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.049019607843137254, | |
| "grad_norm": 1.2987866401672363, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 0.1447, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05392156862745098, | |
| "grad_norm": 1.5711545944213867, | |
| "learning_rate": 8.627450980392158e-05, | |
| "loss": 0.1449, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 0.8892576098442078, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 0.1513, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06372549019607843, | |
| "grad_norm": 1.1401337385177612, | |
| "learning_rate": 0.00010196078431372549, | |
| "loss": 0.1128, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06862745098039216, | |
| "grad_norm": 1.375543475151062, | |
| "learning_rate": 0.00010980392156862746, | |
| "loss": 0.1186, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07352941176470588, | |
| "grad_norm": 1.6076676845550537, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 0.1396, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 1.2637161016464233, | |
| "learning_rate": 0.00012549019607843137, | |
| "loss": 0.1187, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 0.5812987685203552, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.1155, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 0.7302483916282654, | |
| "learning_rate": 0.0001411764705882353, | |
| "loss": 0.1068, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09313725490196079, | |
| "grad_norm": 0.6003187894821167, | |
| "learning_rate": 0.00014901960784313728, | |
| "loss": 0.1084, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09803921568627451, | |
| "grad_norm": 1.3157514333724976, | |
| "learning_rate": 0.00015686274509803922, | |
| "loss": 0.1127, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10294117647058823, | |
| "grad_norm": 0.8480639457702637, | |
| "learning_rate": 0.0001647058823529412, | |
| "loss": 0.1025, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.10784313725490197, | |
| "grad_norm": 1.0640238523483276, | |
| "learning_rate": 0.00017254901960784316, | |
| "loss": 0.1214, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11274509803921569, | |
| "grad_norm": 0.7853420972824097, | |
| "learning_rate": 0.0001803921568627451, | |
| "loss": 0.1103, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 0.760675847530365, | |
| "learning_rate": 0.00018823529411764707, | |
| "loss": 0.1071, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12254901960784313, | |
| "grad_norm": 1.1404098272323608, | |
| "learning_rate": 0.000196078431372549, | |
| "loss": 0.1004, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12745098039215685, | |
| "grad_norm": 0.6359620690345764, | |
| "learning_rate": 0.0001999994742235753, | |
| "loss": 0.1065, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1323529411764706, | |
| "grad_norm": 0.7933241724967957, | |
| "learning_rate": 0.00019999526804535039, | |
| "loss": 0.0943, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.13725490196078433, | |
| "grad_norm": 1.599077582359314, | |
| "learning_rate": 0.00019998685586582082, | |
| "loss": 0.1304, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14215686274509803, | |
| "grad_norm": 0.8844221234321594, | |
| "learning_rate": 0.00019997423803881975, | |
| "loss": 0.0917, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 1.2456647157669067, | |
| "learning_rate": 0.00019995741509507825, | |
| "loss": 0.111, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15196078431372548, | |
| "grad_norm": 0.6590626239776611, | |
| "learning_rate": 0.00019993638774220307, | |
| "loss": 0.1022, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 0.7061448693275452, | |
| "learning_rate": 0.00019991115686464675, | |
| "loss": 0.0938, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16176470588235295, | |
| "grad_norm": 1.0512727499008179, | |
| "learning_rate": 0.00019988172352367056, | |
| "loss": 0.1059, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.6884363889694214, | |
| "learning_rate": 0.00019984808895729978, | |
| "loss": 0.0801, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1715686274509804, | |
| "grad_norm": 0.8961064219474792, | |
| "learning_rate": 0.00019981025458027169, | |
| "loss": 0.0872, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 0.7410668730735779, | |
| "learning_rate": 0.00019976822198397595, | |
| "loss": 0.0935, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.18137254901960784, | |
| "grad_norm": 0.8089532256126404, | |
| "learning_rate": 0.00019972199293638777, | |
| "loss": 0.0806, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.18627450980392157, | |
| "grad_norm": 0.6644020676612854, | |
| "learning_rate": 0.00019967156938199355, | |
| "loss": 0.0885, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.19117647058823528, | |
| "grad_norm": 0.8422799110412598, | |
| "learning_rate": 0.00019961695344170895, | |
| "loss": 0.0952, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 0.8162615299224854, | |
| "learning_rate": 0.00019955814741278986, | |
| "loss": 0.0802, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.20098039215686275, | |
| "grad_norm": 0.7302709221839905, | |
| "learning_rate": 0.0001994951537687357, | |
| "loss": 0.0884, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 0.7032344937324524, | |
| "learning_rate": 0.00019942797515918527, | |
| "loss": 0.0896, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2107843137254902, | |
| "grad_norm": 0.8042428493499756, | |
| "learning_rate": 0.00019935661440980554, | |
| "loss": 0.0811, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.21568627450980393, | |
| "grad_norm": 0.6656658053398132, | |
| "learning_rate": 0.00019928107452217255, | |
| "loss": 0.0856, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.22058823529411764, | |
| "grad_norm": 1.2202825546264648, | |
| "learning_rate": 0.00019920135867364534, | |
| "loss": 0.0895, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.22549019607843138, | |
| "grad_norm": 0.8210168480873108, | |
| "learning_rate": 0.00019911747021723216, | |
| "loss": 0.0807, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23039215686274508, | |
| "grad_norm": 0.7217456102371216, | |
| "learning_rate": 0.0001990294126814496, | |
| "loss": 0.0814, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 0.743126392364502, | |
| "learning_rate": 0.00019893718977017402, | |
| "loss": 0.0887, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.24019607843137256, | |
| "grad_norm": 0.7190248370170593, | |
| "learning_rate": 0.00019884080536248578, | |
| "loss": 0.0859, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.24509803921568626, | |
| "grad_norm": 0.5253967046737671, | |
| "learning_rate": 0.00019874026351250623, | |
| "loss": 0.0678, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.8271141052246094, | |
| "learning_rate": 0.00019863556844922696, | |
| "loss": 0.0762, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2549019607843137, | |
| "grad_norm": 0.7656545639038086, | |
| "learning_rate": 0.0001985267245763321, | |
| "loss": 0.0724, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.25980392156862747, | |
| "grad_norm": 0.6673869490623474, | |
| "learning_rate": 0.00019841373647201297, | |
| "loss": 0.0817, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 0.880395770072937, | |
| "learning_rate": 0.00019829660888877565, | |
| "loss": 0.0897, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2696078431372549, | |
| "grad_norm": 0.7278539538383484, | |
| "learning_rate": 0.00019817534675324093, | |
| "loss": 0.0808, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 0.5380986928939819, | |
| "learning_rate": 0.00019804995516593712, | |
| "loss": 0.077, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.27941176470588236, | |
| "grad_norm": 0.9306485652923584, | |
| "learning_rate": 0.00019792043940108564, | |
| "loss": 0.0883, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.28431372549019607, | |
| "grad_norm": 0.9304268956184387, | |
| "learning_rate": 0.00019778680490637902, | |
| "loss": 0.0899, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.28921568627450983, | |
| "grad_norm": 0.7331838607788086, | |
| "learning_rate": 0.00019764905730275184, | |
| "loss": 0.0709, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 0.47580260038375854, | |
| "learning_rate": 0.00019750720238414425, | |
| "loss": 0.0857, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.29901960784313725, | |
| "grad_norm": 0.5752102732658386, | |
| "learning_rate": 0.0001973612461172583, | |
| "loss": 0.0838, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.30392156862745096, | |
| "grad_norm": 0.4644894599914551, | |
| "learning_rate": 0.00019721119464130707, | |
| "loss": 0.0851, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3088235294117647, | |
| "grad_norm": 0.7036497592926025, | |
| "learning_rate": 0.00019705705426775616, | |
| "loss": 0.0741, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 0.499897301197052, | |
| "learning_rate": 0.0001968988314800585, | |
| "loss": 0.0718, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.31862745098039214, | |
| "grad_norm": 0.7211794853210449, | |
| "learning_rate": 0.0001967365329333816, | |
| "loss": 0.0798, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 0.6176502108573914, | |
| "learning_rate": 0.0001965701654543274, | |
| "loss": 0.0695, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3284313725490196, | |
| "grad_norm": 0.7395395636558533, | |
| "learning_rate": 0.0001963997360406454, | |
| "loss": 0.0581, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.5304160714149475, | |
| "learning_rate": 0.00019622525186093818, | |
| "loss": 0.0826, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3382352941176471, | |
| "grad_norm": 0.46235349774360657, | |
| "learning_rate": 0.0001960467202543599, | |
| "loss": 0.056, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3431372549019608, | |
| "grad_norm": 0.5242049098014832, | |
| "learning_rate": 0.00019586414873030758, | |
| "loss": 0.0728, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3480392156862745, | |
| "grad_norm": 0.552486777305603, | |
| "learning_rate": 0.00019567754496810534, | |
| "loss": 0.0806, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 0.5002785325050354, | |
| "learning_rate": 0.0001954869168166812, | |
| "loss": 0.0643, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.35784313725490197, | |
| "grad_norm": 0.47353097796440125, | |
| "learning_rate": 0.00019529227229423717, | |
| "loss": 0.0838, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3627450980392157, | |
| "grad_norm": 0.4200286865234375, | |
| "learning_rate": 0.00019509361958791174, | |
| "loss": 0.0776, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.36764705882352944, | |
| "grad_norm": 0.6603316068649292, | |
| "learning_rate": 0.00019489096705343578, | |
| "loss": 0.0705, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.37254901960784315, | |
| "grad_norm": 0.37562692165374756, | |
| "learning_rate": 0.0001946843232147809, | |
| "loss": 0.072, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.37745098039215685, | |
| "grad_norm": 0.6199838519096375, | |
| "learning_rate": 0.0001944736967638009, | |
| "loss": 0.0649, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 0.7614375948905945, | |
| "learning_rate": 0.0001942590965598663, | |
| "loss": 0.0735, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3872549019607843, | |
| "grad_norm": 0.671489953994751, | |
| "learning_rate": 0.00019404053162949155, | |
| "loss": 0.065, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 0.5170246362686157, | |
| "learning_rate": 0.0001938180111659556, | |
| "loss": 0.078, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.39705882352941174, | |
| "grad_norm": 0.5392031073570251, | |
| "learning_rate": 0.00019359154452891483, | |
| "loss": 0.063, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4019607843137255, | |
| "grad_norm": 0.6858069896697998, | |
| "learning_rate": 0.00019336114124400978, | |
| "loss": 0.0783, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4068627450980392, | |
| "grad_norm": 0.7257099151611328, | |
| "learning_rate": 0.0001931268110024642, | |
| "loss": 0.0798, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 0.7296270132064819, | |
| "learning_rate": 0.00019288856366067746, | |
| "loss": 0.0619, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 0.6048017740249634, | |
| "learning_rate": 0.0001926464092398101, | |
| "loss": 0.0634, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4215686274509804, | |
| "grad_norm": 0.3223126232624054, | |
| "learning_rate": 0.00019240035792536216, | |
| "loss": 0.0755, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4264705882352941, | |
| "grad_norm": 0.45046138763427734, | |
| "learning_rate": 0.0001921504200667449, | |
| "loss": 0.0661, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 0.609027624130249, | |
| "learning_rate": 0.00019189660617684537, | |
| "loss": 0.0711, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4362745098039216, | |
| "grad_norm": 0.4166688323020935, | |
| "learning_rate": 0.00019163892693158425, | |
| "loss": 0.0644, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 0.6250641345977783, | |
| "learning_rate": 0.00019137739316946685, | |
| "loss": 0.0674, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.44607843137254904, | |
| "grad_norm": 0.6781248450279236, | |
| "learning_rate": 0.00019111201589112718, | |
| "loss": 0.0657, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.45098039215686275, | |
| "grad_norm": 0.9098891615867615, | |
| "learning_rate": 0.00019084280625886516, | |
| "loss": 0.0765, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.45588235294117646, | |
| "grad_norm": 0.5926252603530884, | |
| "learning_rate": 0.00019056977559617731, | |
| "loss": 0.0896, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.46078431372549017, | |
| "grad_norm": 0.6467915773391724, | |
| "learning_rate": 0.0001902929353872803, | |
| "loss": 0.0595, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.46568627450980393, | |
| "grad_norm": 0.4950433671474457, | |
| "learning_rate": 0.0001900122972766279, | |
| "loss": 0.0651, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.6317784190177917, | |
| "learning_rate": 0.0001897278730684213, | |
| "loss": 0.08, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.47549019607843135, | |
| "grad_norm": 0.47558578848838806, | |
| "learning_rate": 0.0001894396747261125, | |
| "loss": 0.0622, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4803921568627451, | |
| "grad_norm": 0.5610472559928894, | |
| "learning_rate": 0.0001891477143719012, | |
| "loss": 0.0667, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4852941176470588, | |
| "grad_norm": 0.7227151989936829, | |
| "learning_rate": 0.00018885200428622474, | |
| "loss": 0.0648, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 0.49453797936439514, | |
| "learning_rate": 0.0001885525569072418, | |
| "loss": 0.0663, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4950980392156863, | |
| "grad_norm": 0.4297734200954437, | |
| "learning_rate": 0.000188249384830309, | |
| "loss": 0.0779, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.39416739344596863, | |
| "learning_rate": 0.00018794250080745136, | |
| "loss": 0.0577, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5049019607843137, | |
| "grad_norm": 0.6955050230026245, | |
| "learning_rate": 0.0001876319177468256, | |
| "loss": 0.0579, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 0.5533928871154785, | |
| "learning_rate": 0.00018731764871217753, | |
| "loss": 0.0583, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5147058823529411, | |
| "grad_norm": 0.4718644618988037, | |
| "learning_rate": 0.00018699970692229233, | |
| "loss": 0.0609, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5196078431372549, | |
| "grad_norm": 0.39921796321868896, | |
| "learning_rate": 0.00018667810575043864, | |
| "loss": 0.0612, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5245098039215687, | |
| "grad_norm": 0.34913963079452515, | |
| "learning_rate": 0.0001863528587238061, | |
| "loss": 0.0522, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 0.5829554796218872, | |
| "learning_rate": 0.00018602397952293618, | |
| "loss": 0.0651, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5343137254901961, | |
| "grad_norm": 0.7142338156700134, | |
| "learning_rate": 0.00018569148198114695, | |
| "loss": 0.0643, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5392156862745098, | |
| "grad_norm": 0.24581728875637054, | |
| "learning_rate": 0.00018535538008395124, | |
| "loss": 0.0537, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5441176470588235, | |
| "grad_norm": 0.41139382123947144, | |
| "learning_rate": 0.0001850156879684681, | |
| "loss": 0.0631, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 0.4532317519187927, | |
| "learning_rate": 0.00018467241992282843, | |
| "loss": 0.0573, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.553921568627451, | |
| "grad_norm": 0.45865532755851746, | |
| "learning_rate": 0.00018432559038557397, | |
| "loss": 0.053, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 0.3976840078830719, | |
| "learning_rate": 0.00018397521394504995, | |
| "loss": 0.0529, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5637254901960784, | |
| "grad_norm": 0.47105035185813904, | |
| "learning_rate": 0.00018362130533879133, | |
| "loss": 0.0671, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5686274509803921, | |
| "grad_norm": 0.5433268547058105, | |
| "learning_rate": 0.00018326387945290313, | |
| "loss": 0.0529, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5735294117647058, | |
| "grad_norm": 0.6220820546150208, | |
| "learning_rate": 0.00018290295132143415, | |
| "loss": 0.0697, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5784313725490197, | |
| "grad_norm": 0.38451075553894043, | |
| "learning_rate": 0.00018253853612574473, | |
| "loss": 0.0621, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 0.49342110753059387, | |
| "learning_rate": 0.00018217064919386807, | |
| "loss": 0.0603, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.5240128636360168, | |
| "learning_rate": 0.00018179930599986554, | |
| "loss": 0.0614, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5931372549019608, | |
| "grad_norm": 0.5015797019004822, | |
| "learning_rate": 0.0001814245221631758, | |
| "loss": 0.0651, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5980392156862745, | |
| "grad_norm": 0.7029892802238464, | |
| "learning_rate": 0.0001810463134479579, | |
| "loss": 0.0598, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6029411764705882, | |
| "grad_norm": 0.3569225072860718, | |
| "learning_rate": 0.00018066469576242806, | |
| "loss": 0.0479, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6078431372549019, | |
| "grad_norm": 0.4940333068370819, | |
| "learning_rate": 0.00018027968515819072, | |
| "loss": 0.055, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6127450980392157, | |
| "grad_norm": 0.5233299732208252, | |
| "learning_rate": 0.00017989129782956323, | |
| "loss": 0.0555, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 0.35107848048210144, | |
| "learning_rate": 0.00017949955011289465, | |
| "loss": 0.0472, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6225490196078431, | |
| "grad_norm": 0.42003870010375977, | |
| "learning_rate": 0.00017910445848587885, | |
| "loss": 0.0454, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 0.24233393371105194, | |
| "learning_rate": 0.00017870603956686117, | |
| "loss": 0.0631, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6323529411764706, | |
| "grad_norm": 0.5557372570037842, | |
| "learning_rate": 0.0001783043101141395, | |
| "loss": 0.0628, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6372549019607843, | |
| "grad_norm": 0.33980950713157654, | |
| "learning_rate": 0.00017789928702525952, | |
| "loss": 0.0591, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6421568627450981, | |
| "grad_norm": 0.2716699242591858, | |
| "learning_rate": 0.00017749098733630368, | |
| "loss": 0.0584, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 0.42181700468063354, | |
| "learning_rate": 0.00017707942822117495, | |
| "loss": 0.0572, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6519607843137255, | |
| "grad_norm": 0.46250826120376587, | |
| "learning_rate": 0.00017666462699087422, | |
| "loss": 0.0614, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6568627450980392, | |
| "grad_norm": 0.7147281169891357, | |
| "learning_rate": 0.00017624660109277223, | |
| "loss": 0.0666, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6617647058823529, | |
| "grad_norm": 0.6105577945709229, | |
| "learning_rate": 0.00017582536810987576, | |
| "loss": 0.0508, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.5564696788787842, | |
| "learning_rate": 0.00017540094576008796, | |
| "loss": 0.0581, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6715686274509803, | |
| "grad_norm": 0.4955359399318695, | |
| "learning_rate": 0.00017497335189546308, | |
| "loss": 0.0569, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 0.40812528133392334, | |
| "learning_rate": 0.0001745426045014558, | |
| "loss": 0.065, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6813725490196079, | |
| "grad_norm": 0.31670013070106506, | |
| "learning_rate": 0.00017410872169616447, | |
| "loss": 0.0632, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6862745098039216, | |
| "grad_norm": 0.5076479911804199, | |
| "learning_rate": 0.00017367172172956906, | |
| "loss": 0.0558, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6911764705882353, | |
| "grad_norm": 0.5511890053749084, | |
| "learning_rate": 0.0001732316229827637, | |
| "loss": 0.0669, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.696078431372549, | |
| "grad_norm": 0.32073989510536194, | |
| "learning_rate": 0.00017278844396718336, | |
| "loss": 0.0543, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7009803921568627, | |
| "grad_norm": 0.5955519080162048, | |
| "learning_rate": 0.00017234220332382528, | |
| "loss": 0.0594, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 0.5735410451889038, | |
| "learning_rate": 0.00017189291982246493, | |
| "loss": 0.0498, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7107843137254902, | |
| "grad_norm": 0.4336249530315399, | |
| "learning_rate": 0.0001714406123608665, | |
| "loss": 0.0577, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7156862745098039, | |
| "grad_norm": 0.2760525047779083, | |
| "learning_rate": 0.00017098529996398796, | |
| "loss": 0.05, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7205882352941176, | |
| "grad_norm": 0.466795951128006, | |
| "learning_rate": 0.00017052700178318088, | |
| "loss": 0.0435, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7254901960784313, | |
| "grad_norm": 0.2780659794807434, | |
| "learning_rate": 0.00017006573709538492, | |
| "loss": 0.0516, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7303921568627451, | |
| "grad_norm": 0.31002551317214966, | |
| "learning_rate": 0.00016960152530231696, | |
| "loss": 0.0494, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 0.4070112407207489, | |
| "learning_rate": 0.00016913438592965497, | |
| "loss": 0.0594, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7401960784313726, | |
| "grad_norm": 0.5858240127563477, | |
| "learning_rate": 0.00016866433862621692, | |
| "loss": 0.0421, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 0.2468300759792328, | |
| "learning_rate": 0.00016819140316313397, | |
| "loss": 0.0499, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.4881417155265808, | |
| "learning_rate": 0.00016771559943301926, | |
| "loss": 0.0557, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7549019607843137, | |
| "grad_norm": 0.428586483001709, | |
| "learning_rate": 0.00016723694744913087, | |
| "loss": 0.0547, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7598039215686274, | |
| "grad_norm": 0.4626677334308624, | |
| "learning_rate": 0.0001667554673445302, | |
| "loss": 0.0577, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 0.44555196166038513, | |
| "learning_rate": 0.000166271179371235, | |
| "loss": 0.0498, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7696078431372549, | |
| "grad_norm": 0.4426194131374359, | |
| "learning_rate": 0.0001657841038993677, | |
| "loss": 0.0491, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7745098039215687, | |
| "grad_norm": 0.3198953866958618, | |
| "learning_rate": 0.00016529426141629843, | |
| "loss": 0.0472, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7794117647058824, | |
| "grad_norm": 0.2759292423725128, | |
| "learning_rate": 0.0001648016725257834, | |
| "loss": 0.0508, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 0.33216890692710876, | |
| "learning_rate": 0.00016430635794709817, | |
| "loss": 0.0516, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7892156862745098, | |
| "grad_norm": 0.40735071897506714, | |
| "learning_rate": 0.0001638083385141662, | |
| "loss": 0.0549, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 0.3739156424999237, | |
| "learning_rate": 0.0001633076351746827, | |
| "loss": 0.0543, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7990196078431373, | |
| "grad_norm": 0.2865970730781555, | |
| "learning_rate": 0.0001628042689892331, | |
| "loss": 0.0557, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.803921568627451, | |
| "grad_norm": 0.6009504795074463, | |
| "learning_rate": 0.00016229826113040767, | |
| "loss": 0.0481, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8088235294117647, | |
| "grad_norm": 0.2973253130912781, | |
| "learning_rate": 0.00016178963288191072, | |
| "loss": 0.0465, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8137254901960784, | |
| "grad_norm": 0.36205539107322693, | |
| "learning_rate": 0.00016127840563766527, | |
| "loss": 0.0676, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8186274509803921, | |
| "grad_norm": 0.41221827268600464, | |
| "learning_rate": 0.0001607646009009135, | |
| "loss": 0.0544, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 0.7448700666427612, | |
| "learning_rate": 0.00016024824028331195, | |
| "loss": 0.0544, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8284313725490197, | |
| "grad_norm": 0.5625671744346619, | |
| "learning_rate": 0.0001597293455040227, | |
| "loss": 0.0659, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.5039217472076416, | |
| "learning_rate": 0.00015920793838879966, | |
| "loss": 0.0522, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8382352941176471, | |
| "grad_norm": 0.6583822965621948, | |
| "learning_rate": 0.00015868404086907077, | |
| "loss": 0.0473, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8431372549019608, | |
| "grad_norm": 0.4799802899360657, | |
| "learning_rate": 0.00015815767498101522, | |
| "loss": 0.0502, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8480392156862745, | |
| "grad_norm": 0.38270649313926697, | |
| "learning_rate": 0.00015762886286463683, | |
| "loss": 0.0488, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 0.5844431519508362, | |
| "learning_rate": 0.0001570976267628326, | |
| "loss": 0.0506, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8578431372549019, | |
| "grad_norm": 0.3698306977748871, | |
| "learning_rate": 0.00015656398902045727, | |
| "loss": 0.0474, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 0.4788702428340912, | |
| "learning_rate": 0.00015602797208338337, | |
| "loss": 0.0452, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8676470588235294, | |
| "grad_norm": 0.47195127606391907, | |
| "learning_rate": 0.00015548959849755715, | |
| "loss": 0.0497, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8725490196078431, | |
| "grad_norm": 0.3890342712402344, | |
| "learning_rate": 0.00015494889090805018, | |
| "loss": 0.0466, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8774509803921569, | |
| "grad_norm": 0.24695785343647003, | |
| "learning_rate": 0.00015440587205810692, | |
| "loss": 0.0525, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 0.41040509939193726, | |
| "learning_rate": 0.00015386056478818814, | |
| "loss": 0.0556, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8872549019607843, | |
| "grad_norm": 0.5128910541534424, | |
| "learning_rate": 0.00015331299203501, | |
| "loss": 0.0485, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8921568627450981, | |
| "grad_norm": 0.3270062804222107, | |
| "learning_rate": 0.0001527631768305796, | |
| "loss": 0.0423, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8970588235294118, | |
| "grad_norm": 0.43017369508743286, | |
| "learning_rate": 0.00015221114230122584, | |
| "loss": 0.0461, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 0.501888632774353, | |
| "learning_rate": 0.00015165691166662705, | |
| "loss": 0.0472, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9068627450980392, | |
| "grad_norm": 0.286864310503006, | |
| "learning_rate": 0.00015110050823883406, | |
| "loss": 0.0418, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 0.5109400153160095, | |
| "learning_rate": 0.00015054195542128968, | |
| "loss": 0.0426, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 0.32924002408981323, | |
| "learning_rate": 0.00014998127670784448, | |
| "loss": 0.0389, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.9215686274509803, | |
| "grad_norm": 0.21681684255599976, | |
| "learning_rate": 0.0001494184956817684, | |
| "loss": 0.0487, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9264705882352942, | |
| "grad_norm": 0.20006486773490906, | |
| "learning_rate": 0.00014885363601475888, | |
| "loss": 0.0521, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.9313725490196079, | |
| "grad_norm": 0.6207095980644226, | |
| "learning_rate": 0.00014828672146594511, | |
| "loss": 0.0542, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9362745098039216, | |
| "grad_norm": 0.5559191703796387, | |
| "learning_rate": 0.00014771777588088884, | |
| "loss": 0.0446, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.3139231503009796, | |
| "learning_rate": 0.00014714682319058112, | |
| "loss": 0.0403, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.946078431372549, | |
| "grad_norm": 0.3132689893245697, | |
| "learning_rate": 0.00014657388741043606, | |
| "loss": 0.0398, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9509803921568627, | |
| "grad_norm": 0.2745281457901001, | |
| "learning_rate": 0.00014599899263928028, | |
| "loss": 0.0358, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9558823529411765, | |
| "grad_norm": 0.2483411729335785, | |
| "learning_rate": 0.00014542216305833968, | |
| "loss": 0.0506, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9607843137254902, | |
| "grad_norm": 0.3583033084869385, | |
| "learning_rate": 0.000144843422930222, | |
| "loss": 0.0442, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9656862745098039, | |
| "grad_norm": 0.2526845932006836, | |
| "learning_rate": 0.00014426279659789651, | |
| "loss": 0.0458, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 0.3186612129211426, | |
| "learning_rate": 0.00014368030848367, | |
| "loss": 0.052, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9754901960784313, | |
| "grad_norm": 0.3088064193725586, | |
| "learning_rate": 0.00014309598308815945, | |
| "loss": 0.0453, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 0.44008663296699524, | |
| "learning_rate": 0.00014250984498926167, | |
| "loss": 0.0449, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9852941176470589, | |
| "grad_norm": 0.5169370174407959, | |
| "learning_rate": 0.0001419219188411194, | |
| "loss": 0.0411, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9901960784313726, | |
| "grad_norm": 0.32191041111946106, | |
| "learning_rate": 0.0001413322293730842, | |
| "loss": 0.0406, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9950980392156863, | |
| "grad_norm": 0.300047904253006, | |
| "learning_rate": 0.00014074080138867654, | |
| "loss": 0.0432, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6571453809738159, | |
| "learning_rate": 0.00014014765976454231, | |
| "loss": 0.0421, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0049019607843137, | |
| "grad_norm": 0.43792489171028137, | |
| "learning_rate": 0.00013955282944940652, | |
| "loss": 0.0389, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0098039215686274, | |
| "grad_norm": 0.40551769733428955, | |
| "learning_rate": 0.0001389563354630239, | |
| "loss": 0.0418, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.0147058823529411, | |
| "grad_norm": 0.3299430012702942, | |
| "learning_rate": 0.0001383582028951265, | |
| "loss": 0.0465, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.0196078431372548, | |
| "grad_norm": 0.2800670266151428, | |
| "learning_rate": 0.00013775845690436848, | |
| "loss": 0.0443, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.0245098039215685, | |
| "grad_norm": 0.49185529351234436, | |
| "learning_rate": 0.00013715712271726772, | |
| "loss": 0.0415, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.0294117647058822, | |
| "grad_norm": 0.6726065278053284, | |
| "learning_rate": 0.0001365542256271448, | |
| "loss": 0.038, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.0343137254901962, | |
| "grad_norm": 0.5443005561828613, | |
| "learning_rate": 0.00013594979099305928, | |
| "loss": 0.0407, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.0392156862745099, | |
| "grad_norm": 0.3882359564304352, | |
| "learning_rate": 0.00013534384423874272, | |
| "loss": 0.0479, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.0441176470588236, | |
| "grad_norm": 0.3460078239440918, | |
| "learning_rate": 0.00013473641085152957, | |
| "loss": 0.0472, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.0490196078431373, | |
| "grad_norm": 0.3264780342578888, | |
| "learning_rate": 0.00013412751638128503, | |
| "loss": 0.0374, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.053921568627451, | |
| "grad_norm": 0.28663188219070435, | |
| "learning_rate": 0.0001335171864393304, | |
| "loss": 0.0386, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 0.22994215786457062, | |
| "learning_rate": 0.00013290544669736576, | |
| "loss": 0.0492, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.0637254901960784, | |
| "grad_norm": 0.2018628716468811, | |
| "learning_rate": 0.0001322923228863902, | |
| "loss": 0.0367, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.0686274509803921, | |
| "grad_norm": 0.20426690578460693, | |
| "learning_rate": 0.0001316778407956196, | |
| "loss": 0.0322, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.0735294117647058, | |
| "grad_norm": 0.32902026176452637, | |
| "learning_rate": 0.00013106202627140163, | |
| "loss": 0.0321, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.0784313725490196, | |
| "grad_norm": 0.3210899233818054, | |
| "learning_rate": 0.00013044490521612904, | |
| "loss": 0.0405, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.0833333333333333, | |
| "grad_norm": 0.23476989567279816, | |
| "learning_rate": 0.00012982650358714967, | |
| "loss": 0.0416, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.088235294117647, | |
| "grad_norm": 0.3046343922615051, | |
| "learning_rate": 0.000129206847395675, | |
| "loss": 0.0394, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.093137254901961, | |
| "grad_norm": 0.27315208315849304, | |
| "learning_rate": 0.0001285859627056858, | |
| "loss": 0.0439, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0980392156862746, | |
| "grad_norm": 0.34966346621513367, | |
| "learning_rate": 0.00012796387563283605, | |
| "loss": 0.0387, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.1029411764705883, | |
| "grad_norm": 0.24096551537513733, | |
| "learning_rate": 0.00012734061234335434, | |
| "loss": 0.0412, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.107843137254902, | |
| "grad_norm": 0.26461055874824524, | |
| "learning_rate": 0.00012671619905294326, | |
| "loss": 0.0494, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1127450980392157, | |
| "grad_norm": 0.24669981002807617, | |
| "learning_rate": 0.0001260906620256767, | |
| "loss": 0.0396, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.1176470588235294, | |
| "grad_norm": 0.37667712569236755, | |
| "learning_rate": 0.00012546402757289532, | |
| "loss": 0.0426, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.1225490196078431, | |
| "grad_norm": 0.3720461130142212, | |
| "learning_rate": 0.00012483632205209953, | |
| "loss": 0.042, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.1274509803921569, | |
| "grad_norm": 0.3975540101528168, | |
| "learning_rate": 0.0001242075718658411, | |
| "loss": 0.0464, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1323529411764706, | |
| "grad_norm": 0.34910279512405396, | |
| "learning_rate": 0.00012357780346061256, | |
| "loss": 0.0412, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.1372549019607843, | |
| "grad_norm": 0.34874585270881653, | |
| "learning_rate": 0.00012294704332573462, | |
| "loss": 0.0458, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.142156862745098, | |
| "grad_norm": 0.24936801195144653, | |
| "learning_rate": 0.0001223153179922423, | |
| "loss": 0.0437, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.1470588235294117, | |
| "grad_norm": 0.39299267530441284, | |
| "learning_rate": 0.00012168265403176864, | |
| "loss": 0.0419, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.1519607843137254, | |
| "grad_norm": 0.44494786858558655, | |
| "learning_rate": 0.0001210490780554274, | |
| "loss": 0.0391, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.156862745098039, | |
| "grad_norm": 0.26526331901550293, | |
| "learning_rate": 0.00012041461671269337, | |
| "loss": 0.0338, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.161764705882353, | |
| "grad_norm": 0.4189550280570984, | |
| "learning_rate": 0.00011977929669028174, | |
| "loss": 0.0441, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 0.2872006595134735, | |
| "learning_rate": 0.00011914314471102545, | |
| "loss": 0.0427, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.1715686274509804, | |
| "grad_norm": 0.2823413908481598, | |
| "learning_rate": 0.0001185061875327512, | |
| "loss": 0.0443, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.26944294571876526, | |
| "learning_rate": 0.00011786845194715403, | |
| "loss": 0.0387, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1813725490196079, | |
| "grad_norm": 0.3672547936439514, | |
| "learning_rate": 0.00011722996477867026, | |
| "loss": 0.0397, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.1862745098039216, | |
| "grad_norm": 0.2237931340932846, | |
| "learning_rate": 0.00011659075288334938, | |
| "loss": 0.0444, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.1911764705882353, | |
| "grad_norm": 0.2790263295173645, | |
| "learning_rate": 0.00011595084314772429, | |
| "loss": 0.0358, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.196078431372549, | |
| "grad_norm": 0.2825845777988434, | |
| "learning_rate": 0.00011531026248768048, | |
| "loss": 0.0368, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.2009803921568627, | |
| "grad_norm": 0.27560582756996155, | |
| "learning_rate": 0.00011466903784732381, | |
| "loss": 0.0474, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.2058823529411764, | |
| "grad_norm": 0.4237360656261444, | |
| "learning_rate": 0.00011402719619784734, | |
| "loss": 0.0375, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.2107843137254901, | |
| "grad_norm": 0.3769036531448364, | |
| "learning_rate": 0.00011338476453639666, | |
| "loss": 0.0308, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.215686274509804, | |
| "grad_norm": 0.32824084162712097, | |
| "learning_rate": 0.00011274176988493454, | |
| "loss": 0.0386, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.2205882352941178, | |
| "grad_norm": 0.30048561096191406, | |
| "learning_rate": 0.0001120982392891042, | |
| "loss": 0.0391, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.2254901960784315, | |
| "grad_norm": 0.30690962076187134, | |
| "learning_rate": 0.00011145419981709169, | |
| "loss": 0.0443, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2303921568627452, | |
| "grad_norm": 0.4496728479862213, | |
| "learning_rate": 0.00011080967855848755, | |
| "loss": 0.0447, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": 0.25722137093544006, | |
| "learning_rate": 0.00011016470262314707, | |
| "loss": 0.0333, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.2401960784313726, | |
| "grad_norm": 0.32415884733200073, | |
| "learning_rate": 0.00010951929914005033, | |
| "loss": 0.0375, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.2450980392156863, | |
| "grad_norm": 0.33738696575164795, | |
| "learning_rate": 0.00010887349525616075, | |
| "loss": 0.0408, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.3490372896194458, | |
| "learning_rate": 0.00010822731813528354, | |
| "loss": 0.0337, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.2549019607843137, | |
| "grad_norm": 0.42863723635673523, | |
| "learning_rate": 0.00010758079495692294, | |
| "loss": 0.0442, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.2598039215686274, | |
| "grad_norm": 0.29293495416641235, | |
| "learning_rate": 0.00010693395291513908, | |
| "loss": 0.0408, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.2647058823529411, | |
| "grad_norm": 0.27024197578430176, | |
| "learning_rate": 0.00010628681921740414, | |
| "loss": 0.0377, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.2696078431372548, | |
| "grad_norm": 0.35880324244499207, | |
| "learning_rate": 0.00010563942108345785, | |
| "loss": 0.0364, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.2745098039215685, | |
| "grad_norm": 0.3253026306629181, | |
| "learning_rate": 0.0001049917857441628, | |
| "loss": 0.0374, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.2794117647058822, | |
| "grad_norm": 0.33250367641448975, | |
| "learning_rate": 0.00010434394044035878, | |
| "loss": 0.0384, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.284313725490196, | |
| "grad_norm": 0.34700486063957214, | |
| "learning_rate": 0.00010369591242171719, | |
| "loss": 0.0369, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.2892156862745099, | |
| "grad_norm": 0.3287598788738251, | |
| "learning_rate": 0.00010304772894559475, | |
| "loss": 0.0419, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 0.3363092243671417, | |
| "learning_rate": 0.00010239941727588707, | |
| "loss": 0.0419, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.2990196078431373, | |
| "grad_norm": 0.32619622349739075, | |
| "learning_rate": 0.0001017510046818817, | |
| "loss": 0.0353, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.303921568627451, | |
| "grad_norm": 0.28630563616752625, | |
| "learning_rate": 0.00010110251843711149, | |
| "loss": 0.0317, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.3088235294117647, | |
| "grad_norm": 0.470198392868042, | |
| "learning_rate": 0.00010045398581820702, | |
| "loss": 0.0397, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.3137254901960784, | |
| "grad_norm": 0.34590962529182434, | |
| "learning_rate": 9.98054341037495e-05, | |
| "loss": 0.0336, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.3186274509803921, | |
| "grad_norm": 0.18714579939842224, | |
| "learning_rate": 9.91568905731234e-05, | |
| "loss": 0.0268, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.3235294117647058, | |
| "grad_norm": 0.3055776357650757, | |
| "learning_rate": 9.850838250536885e-05, | |
| "loss": 0.0384, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3284313725490196, | |
| "grad_norm": 0.33092889189720154, | |
| "learning_rate": 9.785993717803445e-05, | |
| "loss": 0.0323, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.26138371229171753, | |
| "learning_rate": 9.721158186602979e-05, | |
| "loss": 0.0391, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.3382352941176472, | |
| "grad_norm": 0.2703782320022583, | |
| "learning_rate": 9.656334384047812e-05, | |
| "loss": 0.0268, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.343137254901961, | |
| "grad_norm": 0.24206914007663727, | |
| "learning_rate": 9.591525036756952e-05, | |
| "loss": 0.032, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.3480392156862746, | |
| "grad_norm": 0.3793281018733978, | |
| "learning_rate": 9.526732870741386e-05, | |
| "loss": 0.0399, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": 0.3143618404865265, | |
| "learning_rate": 9.46196061128942e-05, | |
| "loss": 0.0365, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.357843137254902, | |
| "grad_norm": 0.23540951311588287, | |
| "learning_rate": 9.397210982852053e-05, | |
| "loss": 0.0328, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.3627450980392157, | |
| "grad_norm": 0.2023368775844574, | |
| "learning_rate": 9.332486708928373e-05, | |
| "loss": 0.0316, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.3676470588235294, | |
| "grad_norm": 0.26689231395721436, | |
| "learning_rate": 9.267790511951015e-05, | |
| "loss": 0.0326, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.3725490196078431, | |
| "grad_norm": 0.2792396545410156, | |
| "learning_rate": 9.203125113171631e-05, | |
| "loss": 0.0336, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.3774509803921569, | |
| "grad_norm": 0.21045692265033722, | |
| "learning_rate": 9.13849323254645e-05, | |
| "loss": 0.0296, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.3823529411764706, | |
| "grad_norm": 0.26224854588508606, | |
| "learning_rate": 9.073897588621853e-05, | |
| "loss": 0.0311, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.3872549019607843, | |
| "grad_norm": 0.30219170451164246, | |
| "learning_rate": 9.009340898420029e-05, | |
| "loss": 0.0379, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.392156862745098, | |
| "grad_norm": 0.19660678505897522, | |
| "learning_rate": 8.944825877324708e-05, | |
| "loss": 0.035, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.3970588235294117, | |
| "grad_norm": 0.2348472774028778, | |
| "learning_rate": 8.880355238966923e-05, | |
| "loss": 0.0366, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4019607843137254, | |
| "grad_norm": 0.44347622990608215, | |
| "learning_rate": 8.815931695110885e-05, | |
| "loss": 0.0333, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.406862745098039, | |
| "grad_norm": 0.34309887886047363, | |
| "learning_rate": 8.751557955539915e-05, | |
| "loss": 0.0394, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 0.3023855984210968, | |
| "learning_rate": 8.687236727942465e-05, | |
| "loss": 0.0308, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.4166666666666667, | |
| "grad_norm": 0.25127673149108887, | |
| "learning_rate": 8.622970717798227e-05, | |
| "loss": 0.0384, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.4215686274509804, | |
| "grad_norm": 0.17014305293560028, | |
| "learning_rate": 8.558762628264345e-05, | |
| "loss": 0.0331, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.4264705882352942, | |
| "grad_norm": 0.32725852727890015, | |
| "learning_rate": 8.494615160061694e-05, | |
| "loss": 0.0326, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.4313725490196079, | |
| "grad_norm": 0.2895604968070984, | |
| "learning_rate": 8.430531011361298e-05, | |
| "loss": 0.0319, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.4362745098039216, | |
| "grad_norm": 0.3882890045642853, | |
| "learning_rate": 8.366512877670842e-05, | |
| "loss": 0.0331, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.4411764705882353, | |
| "grad_norm": 0.27492624521255493, | |
| "learning_rate": 8.302563451721282e-05, | |
| "loss": 0.0384, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.446078431372549, | |
| "grad_norm": 0.23316094279289246, | |
| "learning_rate": 8.238685423353588e-05, | |
| "loss": 0.0426, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.4509803921568627, | |
| "grad_norm": 0.395353227853775, | |
| "learning_rate": 8.174881479405607e-05, | |
| "loss": 0.0347, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.4558823529411764, | |
| "grad_norm": 0.5179559588432312, | |
| "learning_rate": 8.111154303599049e-05, | |
| "loss": 0.0371, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.4607843137254901, | |
| "grad_norm": 0.22947251796722412, | |
| "learning_rate": 8.047506576426596e-05, | |
| "loss": 0.03, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.465686274509804, | |
| "grad_norm": 0.3831785023212433, | |
| "learning_rate": 7.983940975039166e-05, | |
| "loss": 0.0346, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 0.34753093123435974, | |
| "learning_rate": 7.920460173133304e-05, | |
| "loss": 0.0464, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4754901960784315, | |
| "grad_norm": 0.24026577174663544, | |
| "learning_rate": 7.85706684083871e-05, | |
| "loss": 0.0334, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.4803921568627452, | |
| "grad_norm": 0.32264992594718933, | |
| "learning_rate": 7.793763644605947e-05, | |
| "loss": 0.0329, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.4852941176470589, | |
| "grad_norm": 0.25292444229125977, | |
| "learning_rate": 7.730553247094266e-05, | |
| "loss": 0.0306, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.4901960784313726, | |
| "grad_norm": 0.21395047008991241, | |
| "learning_rate": 7.667438307059627e-05, | |
| "loss": 0.0346, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.4950980392156863, | |
| "grad_norm": 0.29967001080513, | |
| "learning_rate": 7.604421479242846e-05, | |
| "loss": 0.0403, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.2828430235385895, | |
| "learning_rate": 7.541505414257959e-05, | |
| "loss": 0.0327, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.5049019607843137, | |
| "grad_norm": 0.43027809262275696, | |
| "learning_rate": 7.478692758480698e-05, | |
| "loss": 0.0331, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.5098039215686274, | |
| "grad_norm": 0.34473538398742676, | |
| "learning_rate": 7.415986153937202e-05, | |
| "loss": 0.0444, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.5147058823529411, | |
| "grad_norm": 0.3717981278896332, | |
| "learning_rate": 7.353388238192892e-05, | |
| "loss": 0.042, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.5196078431372548, | |
| "grad_norm": 0.15127846598625183, | |
| "learning_rate": 7.29090164424151e-05, | |
| "loss": 0.0296, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.5245098039215685, | |
| "grad_norm": 0.33397403359413147, | |
| "learning_rate": 7.22852900039438e-05, | |
| "loss": 0.0314, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 0.25943371653556824, | |
| "learning_rate": 7.166272930169861e-05, | |
| "loss": 0.0342, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.534313725490196, | |
| "grad_norm": 0.3120077848434448, | |
| "learning_rate": 7.104136052182992e-05, | |
| "loss": 0.0317, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.5392156862745097, | |
| "grad_norm": 0.24377594888210297, | |
| "learning_rate": 7.042120980035346e-05, | |
| "loss": 0.0284, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.5441176470588234, | |
| "grad_norm": 0.19070957601070404, | |
| "learning_rate": 6.980230322205099e-05, | |
| "loss": 0.0343, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.5490196078431373, | |
| "grad_norm": 0.22296807169914246, | |
| "learning_rate": 6.918466681937308e-05, | |
| "loss": 0.0299, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.553921568627451, | |
| "grad_norm": 0.2279416173696518, | |
| "learning_rate": 6.856832657134424e-05, | |
| "loss": 0.0333, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.5588235294117647, | |
| "grad_norm": 0.2242182493209839, | |
| "learning_rate": 6.795330840247006e-05, | |
| "loss": 0.0331, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.5637254901960784, | |
| "grad_norm": 0.2774062752723694, | |
| "learning_rate": 6.733963818164686e-05, | |
| "loss": 0.0266, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.5686274509803921, | |
| "grad_norm": 0.37312522530555725, | |
| "learning_rate": 6.672734172107354e-05, | |
| "loss": 0.0376, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.5735294117647058, | |
| "grad_norm": 0.23322941362857819, | |
| "learning_rate": 6.611644477516595e-05, | |
| "loss": 0.0282, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.5784313725490198, | |
| "grad_norm": 0.32735109329223633, | |
| "learning_rate": 6.550697303947345e-05, | |
| "loss": 0.0294, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.5833333333333335, | |
| "grad_norm": 0.21853038668632507, | |
| "learning_rate": 6.489895214959828e-05, | |
| "loss": 0.0259, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.5882352941176472, | |
| "grad_norm": 0.3016158640384674, | |
| "learning_rate": 6.429240768011719e-05, | |
| "loss": 0.028, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.593137254901961, | |
| "grad_norm": 0.20449745655059814, | |
| "learning_rate": 6.368736514350568e-05, | |
| "loss": 0.0303, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.5980392156862746, | |
| "grad_norm": 0.2439008206129074, | |
| "learning_rate": 6.308384998906506e-05, | |
| "loss": 0.027, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6029411764705883, | |
| "grad_norm": 0.28377825021743774, | |
| "learning_rate": 6.248188760185173e-05, | |
| "loss": 0.0302, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.607843137254902, | |
| "grad_norm": 0.32138168811798096, | |
| "learning_rate": 6.188150330160971e-05, | |
| "loss": 0.0255, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.6127450980392157, | |
| "grad_norm": 0.23931661248207092, | |
| "learning_rate": 6.128272234170547e-05, | |
| "loss": 0.0284, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.6176470588235294, | |
| "grad_norm": 0.26550066471099854, | |
| "learning_rate": 6.068556990806579e-05, | |
| "loss": 0.039, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.6225490196078431, | |
| "grad_norm": 0.20831480622291565, | |
| "learning_rate": 6.0090071118118355e-05, | |
| "loss": 0.0248, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.6274509803921569, | |
| "grad_norm": 0.26879703998565674, | |
| "learning_rate": 5.949625101973527e-05, | |
| "loss": 0.0303, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.6323529411764706, | |
| "grad_norm": 0.34190261363983154, | |
| "learning_rate": 5.890413459017958e-05, | |
| "loss": 0.0296, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.6372549019607843, | |
| "grad_norm": 0.3451369106769562, | |
| "learning_rate": 5.8313746735054544e-05, | |
| "loss": 0.0274, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.642156862745098, | |
| "grad_norm": 0.2692447304725647, | |
| "learning_rate": 5.77251122872561e-05, | |
| "loss": 0.0303, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 0.28057631850242615, | |
| "learning_rate": 5.713825600592841e-05, | |
| "loss": 0.0335, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.6519607843137254, | |
| "grad_norm": 0.20118731260299683, | |
| "learning_rate": 5.6553202575422385e-05, | |
| "loss": 0.0339, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.656862745098039, | |
| "grad_norm": 0.27384528517723083, | |
| "learning_rate": 5.596997660425746e-05, | |
| "loss": 0.0296, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.6617647058823528, | |
| "grad_norm": 0.27839264273643494, | |
| "learning_rate": 5.538860262408632e-05, | |
| "loss": 0.0306, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.26187360286712646, | |
| "learning_rate": 5.480910508866333e-05, | |
| "loss": 0.0327, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.6715686274509802, | |
| "grad_norm": 0.16635389626026154, | |
| "learning_rate": 5.423150837281585e-05, | |
| "loss": 0.0268, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.6764705882352942, | |
| "grad_norm": 0.3123128414154053, | |
| "learning_rate": 5.365583677141883e-05, | |
| "loss": 0.0345, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.6813725490196079, | |
| "grad_norm": 0.30305176973342896, | |
| "learning_rate": 5.308211449837315e-05, | |
| "loss": 0.0264, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.6862745098039216, | |
| "grad_norm": 0.19436487555503845, | |
| "learning_rate": 5.2510365685587026e-05, | |
| "loss": 0.0318, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.6911764705882353, | |
| "grad_norm": 0.2048874795436859, | |
| "learning_rate": 5.1940614381961004e-05, | |
| "loss": 0.0296, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.696078431372549, | |
| "grad_norm": 0.32843217253685, | |
| "learning_rate": 5.137288455237627e-05, | |
| "loss": 0.0288, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.7009803921568627, | |
| "grad_norm": 0.26734060049057007, | |
| "learning_rate": 5.080720007668689e-05, | |
| "loss": 0.0309, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": 0.2050999402999878, | |
| "learning_rate": 5.0243584748715235e-05, | |
| "loss": 0.0283, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.7107843137254903, | |
| "grad_norm": 0.32800912857055664, | |
| "learning_rate": 4.968206227525111e-05, | |
| "loss": 0.0356, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.715686274509804, | |
| "grad_norm": 0.2212320864200592, | |
| "learning_rate": 4.912265627505468e-05, | |
| "loss": 0.0278, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7205882352941178, | |
| "grad_norm": 0.22088485956192017, | |
| "learning_rate": 4.856539027786305e-05, | |
| "loss": 0.0315, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.7254901960784315, | |
| "grad_norm": 0.2626785635948181, | |
| "learning_rate": 4.8010287723400494e-05, | |
| "loss": 0.0395, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.7303921568627452, | |
| "grad_norm": 0.343022882938385, | |
| "learning_rate": 4.745737196039259e-05, | |
| "loss": 0.0235, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.7352941176470589, | |
| "grad_norm": 0.4321844279766083, | |
| "learning_rate": 4.6906666245583965e-05, | |
| "loss": 0.0393, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.7401960784313726, | |
| "grad_norm": 0.34605053067207336, | |
| "learning_rate": 4.6358193742760305e-05, | |
| "loss": 0.0214, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.7450980392156863, | |
| "grad_norm": 0.2320283055305481, | |
| "learning_rate": 4.5811977521773906e-05, | |
| "loss": 0.0331, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.21285474300384521, | |
| "learning_rate": 4.526804055757328e-05, | |
| "loss": 0.0333, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.7549019607843137, | |
| "grad_norm": 0.185288667678833, | |
| "learning_rate": 4.472640572923687e-05, | |
| "loss": 0.0269, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.7598039215686274, | |
| "grad_norm": 0.2527756989002228, | |
| "learning_rate": 4.4187095819010674e-05, | |
| "loss": 0.0296, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 0.1697828769683838, | |
| "learning_rate": 4.365013351135001e-05, | |
| "loss": 0.0272, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.7696078431372548, | |
| "grad_norm": 0.215946227312088, | |
| "learning_rate": 4.311554139196522e-05, | |
| "loss": 0.0262, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.7745098039215685, | |
| "grad_norm": 0.41971561312675476, | |
| "learning_rate": 4.258334194687188e-05, | |
| "loss": 0.0282, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.7794117647058822, | |
| "grad_norm": 0.18381306529045105, | |
| "learning_rate": 4.205355756144489e-05, | |
| "loss": 0.0293, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.784313725490196, | |
| "grad_norm": 0.24697428941726685, | |
| "learning_rate": 4.152621051947682e-05, | |
| "loss": 0.0205, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.7892156862745097, | |
| "grad_norm": 0.23640835285186768, | |
| "learning_rate": 4.1001323002240754e-05, | |
| "loss": 0.0304, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.7941176470588234, | |
| "grad_norm": 0.21955536305904388, | |
| "learning_rate": 4.047891708755724e-05, | |
| "loss": 0.0281, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.7990196078431373, | |
| "grad_norm": 0.34594038128852844, | |
| "learning_rate": 3.995901474886568e-05, | |
| "loss": 0.0344, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.803921568627451, | |
| "grad_norm": 0.4010615646839142, | |
| "learning_rate": 3.944163785429992e-05, | |
| "loss": 0.0241, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.8088235294117647, | |
| "grad_norm": 0.24570715427398682, | |
| "learning_rate": 3.8926808165768715e-05, | |
| "loss": 0.0269, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.8137254901960784, | |
| "grad_norm": 0.2714114785194397, | |
| "learning_rate": 3.841454733804016e-05, | |
| "loss": 0.0287, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.8186274509803921, | |
| "grad_norm": 0.29776889085769653, | |
| "learning_rate": 3.790487691783099e-05, | |
| "loss": 0.0325, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.8235294117647058, | |
| "grad_norm": 0.20955297350883484, | |
| "learning_rate": 3.739781834290006e-05, | |
| "loss": 0.0319, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.8284313725490198, | |
| "grad_norm": 0.2985910475254059, | |
| "learning_rate": 3.689339294114692e-05, | |
| "loss": 0.0244, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 0.33746784925460815, | |
| "learning_rate": 3.639162192971457e-05, | |
| "loss": 0.0272, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.8382352941176472, | |
| "grad_norm": 0.2537771761417389, | |
| "learning_rate": 3.5892526414096925e-05, | |
| "loss": 0.0317, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.843137254901961, | |
| "grad_norm": 0.30214208364486694, | |
| "learning_rate": 3.53961273872513e-05, | |
| "loss": 0.0207, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.8480392156862746, | |
| "grad_norm": 0.3083361089229584, | |
| "learning_rate": 3.490244572871524e-05, | |
| "loss": 0.0216, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.8529411764705883, | |
| "grad_norm": 0.24969086050987244, | |
| "learning_rate": 3.44115022037284e-05, | |
| "loss": 0.0254, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.857843137254902, | |
| "grad_norm": 0.2084352970123291, | |
| "learning_rate": 3.3923317462358905e-05, | |
| "loss": 0.0305, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.8627450980392157, | |
| "grad_norm": 0.21671414375305176, | |
| "learning_rate": 3.3437912038635056e-05, | |
| "loss": 0.0303, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.8676470588235294, | |
| "grad_norm": 0.2956879436969757, | |
| "learning_rate": 3.295530634968147e-05, | |
| "loss": 0.0298, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.8725490196078431, | |
| "grad_norm": 0.29368528723716736, | |
| "learning_rate": 3.24755206948602e-05, | |
| "loss": 0.0261, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.8774509803921569, | |
| "grad_norm": 0.13201627135276794, | |
| "learning_rate": 3.199857525491714e-05, | |
| "loss": 0.0217, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 0.21656860411167145, | |
| "learning_rate": 3.1524490091133e-05, | |
| "loss": 0.0288, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.8872549019607843, | |
| "grad_norm": 0.20571519434452057, | |
| "learning_rate": 3.105328514447957e-05, | |
| "loss": 0.0254, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.892156862745098, | |
| "grad_norm": 0.2538784444332123, | |
| "learning_rate": 3.0584980234780916e-05, | |
| "loss": 0.0325, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.8970588235294117, | |
| "grad_norm": 0.282520592212677, | |
| "learning_rate": 3.0119595059879678e-05, | |
| "loss": 0.0292, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.9019607843137254, | |
| "grad_norm": 0.2727642357349396, | |
| "learning_rate": 2.965714919480872e-05, | |
| "loss": 0.0264, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.906862745098039, | |
| "grad_norm": 0.1925729215145111, | |
| "learning_rate": 2.9197662090967625e-05, | |
| "loss": 0.0282, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.9117647058823528, | |
| "grad_norm": 0.23374304175376892, | |
| "learning_rate": 2.8741153075304438e-05, | |
| "loss": 0.0266, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.9166666666666665, | |
| "grad_norm": 0.24565072357654572, | |
| "learning_rate": 2.828764134950297e-05, | |
| "loss": 0.0198, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.9215686274509802, | |
| "grad_norm": 0.1271701604127884, | |
| "learning_rate": 2.7837145989174974e-05, | |
| "loss": 0.0206, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.9264705882352942, | |
| "grad_norm": 0.25117677450180054, | |
| "learning_rate": 2.7389685943057852e-05, | |
| "loss": 0.0249, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.9313725490196079, | |
| "grad_norm": 0.33383405208587646, | |
| "learning_rate": 2.6945280032217535e-05, | |
| "loss": 0.0298, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.9362745098039216, | |
| "grad_norm": 0.22882990539073944, | |
| "learning_rate": 2.6503946949256974e-05, | |
| "loss": 0.0273, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 0.2781018316745758, | |
| "learning_rate": 2.6065705257529848e-05, | |
| "loss": 0.0345, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.946078431372549, | |
| "grad_norm": 0.23163281381130219, | |
| "learning_rate": 2.5630573390359624e-05, | |
| "loss": 0.024, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.9509803921568627, | |
| "grad_norm": 0.30135810375213623, | |
| "learning_rate": 2.5198569650264403e-05, | |
| "loss": 0.0245, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.9558823529411766, | |
| "grad_norm": 0.2169611006975174, | |
| "learning_rate": 2.4769712208186967e-05, | |
| "loss": 0.0217, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 0.2509106397628784, | |
| "learning_rate": 2.4344019102730542e-05, | |
| "loss": 0.0225, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.965686274509804, | |
| "grad_norm": 0.3170604407787323, | |
| "learning_rate": 2.3921508239399913e-05, | |
| "loss": 0.027, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.9705882352941178, | |
| "grad_norm": 0.1460844874382019, | |
| "learning_rate": 2.350219738984849e-05, | |
| "loss": 0.021, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.9754901960784315, | |
| "grad_norm": 0.27143651247024536, | |
| "learning_rate": 2.3086104191130643e-05, | |
| "loss": 0.0262, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.9803921568627452, | |
| "grad_norm": 0.2494950294494629, | |
| "learning_rate": 2.2673246144959935e-05, | |
| "loss": 0.0249, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.9852941176470589, | |
| "grad_norm": 0.26637178659439087, | |
| "learning_rate": 2.226364061697287e-05, | |
| "loss": 0.0325, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.9901960784313726, | |
| "grad_norm": 0.2871919274330139, | |
| "learning_rate": 2.185730483599856e-05, | |
| "loss": 0.0286, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.9950980392156863, | |
| "grad_norm": 0.2098357230424881, | |
| "learning_rate": 2.1454255893334064e-05, | |
| "loss": 0.0332, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.2278250902891159, | |
| "learning_rate": 2.10545107420253e-05, | |
| "loss": 0.0234, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.0049019607843137, | |
| "grad_norm": 0.266368567943573, | |
| "learning_rate": 2.0658086196154236e-05, | |
| "loss": 0.0226, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.0098039215686274, | |
| "grad_norm": 0.10192721337080002, | |
| "learning_rate": 2.026499893013144e-05, | |
| "loss": 0.0217, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.014705882352941, | |
| "grad_norm": 0.2021161913871765, | |
| "learning_rate": 1.9875265477994875e-05, | |
| "loss": 0.0219, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.019607843137255, | |
| "grad_norm": 0.18436592817306519, | |
| "learning_rate": 1.9488902232714267e-05, | |
| "loss": 0.0216, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.0245098039215685, | |
| "grad_norm": 0.18808206915855408, | |
| "learning_rate": 1.9105925445501794e-05, | |
| "loss": 0.0184, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.0294117647058822, | |
| "grad_norm": 0.2091018408536911, | |
| "learning_rate": 1.87263512251284e-05, | |
| "loss": 0.0237, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.034313725490196, | |
| "grad_norm": 0.29868388175964355, | |
| "learning_rate": 1.8350195537246184e-05, | |
| "loss": 0.0251, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.0392156862745097, | |
| "grad_norm": 0.15926848351955414, | |
| "learning_rate": 1.797747420371699e-05, | |
| "loss": 0.0214, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.0441176470588234, | |
| "grad_norm": 0.24016976356506348, | |
| "learning_rate": 1.7608202901946826e-05, | |
| "loss": 0.0206, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.049019607843137, | |
| "grad_norm": 0.18072175979614258, | |
| "learning_rate": 1.7242397164226452e-05, | |
| "loss": 0.0192, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.053921568627451, | |
| "grad_norm": 0.21760503947734833, | |
| "learning_rate": 1.6880072377078026e-05, | |
| "loss": 0.0237, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.0588235294117645, | |
| "grad_norm": 0.28834259510040283, | |
| "learning_rate": 1.6521243780607974e-05, | |
| "loss": 0.0185, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.063725490196078, | |
| "grad_norm": 0.14096632599830627, | |
| "learning_rate": 1.616592646786599e-05, | |
| "loss": 0.0184, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.0686274509803924, | |
| "grad_norm": 0.15555402636528015, | |
| "learning_rate": 1.5814135384210026e-05, | |
| "loss": 0.02, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.073529411764706, | |
| "grad_norm": 0.09052202850580215, | |
| "learning_rate": 1.5465885326677897e-05, | |
| "loss": 0.019, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.0784313725490198, | |
| "grad_norm": 0.1283917874097824, | |
| "learning_rate": 1.512119094336466e-05, | |
| "loss": 0.0194, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 0.268288791179657, | |
| "learning_rate": 1.4780066732806663e-05, | |
| "loss": 0.024, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.088235294117647, | |
| "grad_norm": 0.1498635858297348, | |
| "learning_rate": 1.4442527043371622e-05, | |
| "loss": 0.0226, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.093137254901961, | |
| "grad_norm": 0.31013911962509155, | |
| "learning_rate": 1.4108586072655062e-05, | |
| "loss": 0.0198, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.0980392156862746, | |
| "grad_norm": 0.19611553847789764, | |
| "learning_rate": 1.377825786688326e-05, | |
| "loss": 0.029, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.1029411764705883, | |
| "grad_norm": 0.15280510485172272, | |
| "learning_rate": 1.3451556320322344e-05, | |
| "loss": 0.0305, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.107843137254902, | |
| "grad_norm": 0.1233508512377739, | |
| "learning_rate": 1.3128495174693833e-05, | |
| "loss": 0.0214, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.1127450980392157, | |
| "grad_norm": 0.23667648434638977, | |
| "learning_rate": 1.280908801859676e-05, | |
| "loss": 0.017, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 0.2285485714673996, | |
| "learning_rate": 1.2493348286936013e-05, | |
| "loss": 0.019, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.122549019607843, | |
| "grad_norm": 0.17249974608421326, | |
| "learning_rate": 1.2181289260357265e-05, | |
| "loss": 0.0233, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.127450980392157, | |
| "grad_norm": 0.18264269828796387, | |
| "learning_rate": 1.1872924064688328e-05, | |
| "loss": 0.0176, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.1323529411764706, | |
| "grad_norm": 0.23498280346393585, | |
| "learning_rate": 1.1568265670387125e-05, | |
| "loss": 0.0216, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.1372549019607843, | |
| "grad_norm": 0.20697841048240662, | |
| "learning_rate": 1.12673268919961e-05, | |
| "loss": 0.0221, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.142156862745098, | |
| "grad_norm": 0.20883601903915405, | |
| "learning_rate": 1.0970120387603122e-05, | |
| "loss": 0.0211, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.1470588235294117, | |
| "grad_norm": 0.2525753676891327, | |
| "learning_rate": 1.0676658658309225e-05, | |
| "loss": 0.0182, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.1519607843137254, | |
| "grad_norm": 0.20986422896385193, | |
| "learning_rate": 1.0386954047702646e-05, | |
| "loss": 0.0222, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.156862745098039, | |
| "grad_norm": 0.18922965228557587, | |
| "learning_rate": 1.010101874133973e-05, | |
| "loss": 0.0215, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.161764705882353, | |
| "grad_norm": 0.23508426547050476, | |
| "learning_rate": 9.81886476623226e-06, | |
| "loss": 0.0158, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.1666666666666665, | |
| "grad_norm": 0.17150916159152985, | |
| "learning_rate": 9.540503990341743e-06, | |
| "loss": 0.0204, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.1715686274509802, | |
| "grad_norm": 0.12821370363235474, | |
| "learning_rate": 9.265948122080048e-06, | |
| "loss": 0.0162, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.176470588235294, | |
| "grad_norm": 0.14045512676239014, | |
| "learning_rate": 8.995208709817071e-06, | |
| "loss": 0.0231, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.1813725490196076, | |
| "grad_norm": 0.24774880707263947, | |
| "learning_rate": 8.728297141394858e-06, | |
| "loss": 0.0205, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.186274509803922, | |
| "grad_norm": 0.20708513259887695, | |
| "learning_rate": 8.465224643648728e-06, | |
| "loss": 0.0217, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.1911764705882355, | |
| "grad_norm": 0.1988290697336197, | |
| "learning_rate": 8.206002281934977e-06, | |
| "loss": 0.0181, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.196078431372549, | |
| "grad_norm": 0.24248534440994263, | |
| "learning_rate": 7.950640959665457e-06, | |
| "loss": 0.014, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.200980392156863, | |
| "grad_norm": 0.199833944439888, | |
| "learning_rate": 7.69915141784896e-06, | |
| "loss": 0.0177, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.2058823529411766, | |
| "grad_norm": 0.18880146741867065, | |
| "learning_rate": 7.451544234639473e-06, | |
| "loss": 0.0309, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.2107843137254903, | |
| "grad_norm": 0.19817057251930237, | |
| "learning_rate": 7.207829824891199e-06, | |
| "loss": 0.0135, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.215686274509804, | |
| "grad_norm": 0.22702986001968384, | |
| "learning_rate": 6.968018439720414e-06, | |
| "loss": 0.0242, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.2205882352941178, | |
| "grad_norm": 0.23108519613742828, | |
| "learning_rate": 6.732120166074441e-06, | |
| "loss": 0.0266, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.2254901960784315, | |
| "grad_norm": 0.2683473229408264, | |
| "learning_rate": 6.500144926307295e-06, | |
| "loss": 0.0138, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.230392156862745, | |
| "grad_norm": 0.3906085193157196, | |
| "learning_rate": 6.272102477762254e-06, | |
| "loss": 0.0257, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 0.16685569286346436, | |
| "learning_rate": 6.048002412361598e-06, | |
| "loss": 0.0197, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.2401960784313726, | |
| "grad_norm": 0.19442564249038696, | |
| "learning_rate": 5.827854156203017e-06, | |
| "loss": 0.0183, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.2450980392156863, | |
| "grad_norm": 0.22825686633586884, | |
| "learning_rate": 5.611666969163243e-06, | |
| "loss": 0.0192, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.13165044784545898, | |
| "learning_rate": 5.399449944508439e-06, | |
| "loss": 0.0181, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.2549019607843137, | |
| "grad_norm": 0.32770487666130066, | |
| "learning_rate": 5.1912120085118365e-06, | |
| "loss": 0.0194, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.2598039215686274, | |
| "grad_norm": 0.20789536833763123, | |
| "learning_rate": 4.986961920078204e-06, | |
| "loss": 0.0274, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.264705882352941, | |
| "grad_norm": 0.3107619881629944, | |
| "learning_rate": 4.786708270375462e-06, | |
| "loss": 0.0244, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.269607843137255, | |
| "grad_norm": 0.11234085261821747, | |
| "learning_rate": 4.590459482473286e-06, | |
| "loss": 0.0125, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.2745098039215685, | |
| "grad_norm": 0.2838551700115204, | |
| "learning_rate": 4.398223810988866e-06, | |
| "loss": 0.0209, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.2794117647058822, | |
| "grad_norm": 0.2697765529155731, | |
| "learning_rate": 4.2100093417396845e-06, | |
| "loss": 0.0245, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.284313725490196, | |
| "grad_norm": 0.1846655011177063, | |
| "learning_rate": 4.0258239914033765e-06, | |
| "loss": 0.0271, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.2892156862745097, | |
| "grad_norm": 0.20711906254291534, | |
| "learning_rate": 3.8456755071847765e-06, | |
| "loss": 0.0262, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.2941176470588234, | |
| "grad_norm": 0.23577173054218292, | |
| "learning_rate": 3.6695714664900293e-06, | |
| "loss": 0.0147, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.299019607843137, | |
| "grad_norm": 0.21056267619132996, | |
| "learning_rate": 3.49751927660793e-06, | |
| "loss": 0.0242, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.303921568627451, | |
| "grad_norm": 0.1903896927833557, | |
| "learning_rate": 3.329526174398223e-06, | |
| "loss": 0.0199, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.3088235294117645, | |
| "grad_norm": 0.1740642637014389, | |
| "learning_rate": 3.165599225987381e-06, | |
| "loss": 0.0241, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.313725490196078, | |
| "grad_norm": 0.23523923754692078, | |
| "learning_rate": 3.005745326471254e-06, | |
| "loss": 0.0196, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.318627450980392, | |
| "grad_norm": 0.17898762226104736, | |
| "learning_rate": 2.849971199625112e-06, | |
| "loss": 0.0178, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.323529411764706, | |
| "grad_norm": 0.18050484359264374, | |
| "learning_rate": 2.6982833976208043e-06, | |
| "loss": 0.0264, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.3284313725490198, | |
| "grad_norm": 0.14259324967861176, | |
| "learning_rate": 2.5506883007511695e-06, | |
| "loss": 0.0163, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.25935426354408264, | |
| "learning_rate": 2.407192117161683e-06, | |
| "loss": 0.0215, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.338235294117647, | |
| "grad_norm": 0.24692723155021667, | |
| "learning_rate": 2.2678008825893106e-06, | |
| "loss": 0.0185, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.343137254901961, | |
| "grad_norm": 0.22854046523571014, | |
| "learning_rate": 2.1325204601086222e-06, | |
| "loss": 0.0235, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.3480392156862746, | |
| "grad_norm": 0.31760168075561523, | |
| "learning_rate": 2.001356539885213e-06, | |
| "loss": 0.0279, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 0.19233286380767822, | |
| "learning_rate": 1.8743146389363474e-06, | |
| "loss": 0.0174, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.357843137254902, | |
| "grad_norm": 0.28652098774909973, | |
| "learning_rate": 1.7514001008988923e-06, | |
| "loss": 0.0193, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.3627450980392157, | |
| "grad_norm": 0.16845420002937317, | |
| "learning_rate": 1.6326180958045502e-06, | |
| "loss": 0.0177, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.3676470588235294, | |
| "grad_norm": 0.12303224951028824, | |
| "learning_rate": 1.517973619862445e-06, | |
| "loss": 0.0205, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.372549019607843, | |
| "grad_norm": 0.1442280411720276, | |
| "learning_rate": 1.4074714952489132e-06, | |
| "loss": 0.0215, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.377450980392157, | |
| "grad_norm": 0.18173760175704956, | |
| "learning_rate": 1.3011163699046758e-06, | |
| "loss": 0.0166, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.3823529411764706, | |
| "grad_norm": 0.2543584406375885, | |
| "learning_rate": 1.1989127173393955e-06, | |
| "loss": 0.0195, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.3872549019607843, | |
| "grad_norm": 0.14100182056427002, | |
| "learning_rate": 1.1008648364434493e-06, | |
| "loss": 0.0252, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.392156862745098, | |
| "grad_norm": 0.22381238639354706, | |
| "learning_rate": 1.0069768513071287e-06, | |
| "loss": 0.0184, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.3970588235294117, | |
| "grad_norm": 0.1522032916545868, | |
| "learning_rate": 9.172527110472007e-07, | |
| "loss": 0.0226, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.4019607843137254, | |
| "grad_norm": 0.151298388838768, | |
| "learning_rate": 8.316961896407293e-07, | |
| "loss": 0.0161, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.406862745098039, | |
| "grad_norm": 0.10205589234828949, | |
| "learning_rate": 7.503108857664476e-07, | |
| "loss": 0.0221, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.411764705882353, | |
| "grad_norm": 0.29145070910453796, | |
| "learning_rate": 6.731002226532557e-07, | |
| "loss": 0.0256, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.4166666666666665, | |
| "grad_norm": 0.11820173263549805, | |
| "learning_rate": 6.000674479363366e-07, | |
| "loss": 0.0192, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.4215686274509802, | |
| "grad_norm": 0.34024494886398315, | |
| "learning_rate": 5.312156335205098e-07, | |
| "loss": 0.0184, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.426470588235294, | |
| "grad_norm": 0.2437254935503006, | |
| "learning_rate": 4.665476754510234e-07, | |
| "loss": 0.0172, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.431372549019608, | |
| "grad_norm": 0.22836612164974213, | |
| "learning_rate": 4.0606629379175143e-07, | |
| "loss": 0.0236, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.436274509803922, | |
| "grad_norm": 0.30892133712768555, | |
| "learning_rate": 3.497740325107746e-07, | |
| "loss": 0.021, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.4411764705882355, | |
| "grad_norm": 0.1466980129480362, | |
| "learning_rate": 2.9767325937338775e-07, | |
| "loss": 0.0158, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.446078431372549, | |
| "grad_norm": 0.17119024693965912, | |
| "learning_rate": 2.497661658424688e-07, | |
| "loss": 0.0211, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.450980392156863, | |
| "grad_norm": 0.24735775589942932, | |
| "learning_rate": 2.0605476698636328e-07, | |
| "loss": 0.0198, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.4558823529411766, | |
| "grad_norm": 0.22227314114570618, | |
| "learning_rate": 1.6654090139408551e-07, | |
| "loss": 0.0222, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.4607843137254903, | |
| "grad_norm": 0.2925474941730499, | |
| "learning_rate": 1.3122623109795839e-07, | |
| "loss": 0.0212, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.465686274509804, | |
| "grad_norm": 0.11938751488924026, | |
| "learning_rate": 1.0011224150379139e-07, | |
| "loss": 0.0206, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 0.18019632995128632, | |
| "learning_rate": 7.320024132829729e-08, | |
| "loss": 0.0175, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.4754901960784315, | |
| "grad_norm": 0.1969996839761734, | |
| "learning_rate": 5.049136254413611e-08, | |
| "loss": 0.0196, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.480392156862745, | |
| "grad_norm": 0.26969224214553833, | |
| "learning_rate": 3.1986560332242234e-08, | |
| "loss": 0.0159, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.485294117647059, | |
| "grad_norm": 0.24808961153030396, | |
| "learning_rate": 1.768661304166752e-08, | |
| "loss": 0.0235, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.4901960784313726, | |
| "grad_norm": 0.08070988208055496, | |
| "learning_rate": 7.592122156829806e-09, | |
| "loss": 0.0196, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.4950980392156863, | |
| "grad_norm": 0.3003765642642975, | |
| "learning_rate": 1.7035122722663943e-09, | |
| "loss": 0.0214, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.4995098039215686, | |
| "step": 5099, | |
| "total_flos": 1.0010895665363712e+17, | |
| "train_loss": 0.049077389520344766, | |
| "train_runtime": 1734.3659, | |
| "train_samples_per_second": 47.04, | |
| "train_steps_per_second": 2.94 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5099, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0010895665363712e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |