| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 250.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.177168846130371, |
| "learning_rate": 1.8e-07, |
| "loss": 0.6721, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 5.208155155181885, |
| "learning_rate": 3.8e-07, |
| "loss": 0.6798, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.579753875732422, |
| "learning_rate": 5.8e-07, |
| "loss": 0.6639, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.642096042633057, |
| "learning_rate": 7.8e-07, |
| "loss": 0.6353, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 3.5934982299804688, |
| "learning_rate": 9.8e-07, |
| "loss": 0.5288, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 2.046332597732544, |
| "learning_rate": 1.18e-06, |
| "loss": 0.4528, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.644978404045105, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 0.3442, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8051798343658447, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 0.2862, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.4936715066432953, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 0.2241, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.37280142307281494, |
| "learning_rate": 1.98e-06, |
| "loss": 0.1893, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.3533601760864258, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 0.1706, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.2663685083389282, |
| "learning_rate": 2.38e-06, |
| "loss": 0.1497, |
| "step": 120 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.28943586349487305, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 0.1377, |
| "step": 130 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.20089849829673767, |
| "learning_rate": 2.78e-06, |
| "loss": 0.1256, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.17530840635299683, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.1201, |
| "step": 150 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.19372163712978363, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 0.1136, |
| "step": 160 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.13657791912555695, |
| "learning_rate": 3.38e-06, |
| "loss": 0.1059, |
| "step": 170 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.1357901245355606, |
| "learning_rate": 3.58e-06, |
| "loss": 0.099, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.1591852754354477, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.0986, |
| "step": 190 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.12721344828605652, |
| "learning_rate": 3.98e-06, |
| "loss": 0.0935, |
| "step": 200 |
| }, |
| { |
| "epoch": 5.25, |
| "grad_norm": 0.14519330859184265, |
| "learning_rate": 4.18e-06, |
| "loss": 0.0862, |
| "step": 210 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.17094707489013672, |
| "learning_rate": 4.38e-06, |
| "loss": 0.0864, |
| "step": 220 |
| }, |
| { |
| "epoch": 5.75, |
| "grad_norm": 0.11924029141664505, |
| "learning_rate": 4.58e-06, |
| "loss": 0.082, |
| "step": 230 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.13231466710567474, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 0.0787, |
| "step": 240 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.17676876485347748, |
| "learning_rate": 4.98e-06, |
| "loss": 0.0751, |
| "step": 250 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.12683098018169403, |
| "learning_rate": 5.18e-06, |
| "loss": 0.0704, |
| "step": 260 |
| }, |
| { |
| "epoch": 6.75, |
| "grad_norm": 0.14490821957588196, |
| "learning_rate": 5.38e-06, |
| "loss": 0.0658, |
| "step": 270 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.11153838038444519, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 0.0626, |
| "step": 280 |
| }, |
| { |
| "epoch": 7.25, |
| "grad_norm": 0.11997738480567932, |
| "learning_rate": 5.78e-06, |
| "loss": 0.0613, |
| "step": 290 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.15803208947181702, |
| "learning_rate": 5.98e-06, |
| "loss": 0.0598, |
| "step": 300 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 0.1401054412126541, |
| "learning_rate": 6.18e-06, |
| "loss": 0.0555, |
| "step": 310 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.17630530893802643, |
| "learning_rate": 6.38e-06, |
| "loss": 0.0532, |
| "step": 320 |
| }, |
| { |
| "epoch": 8.25, |
| "grad_norm": 0.18545646965503693, |
| "learning_rate": 6.58e-06, |
| "loss": 0.0492, |
| "step": 330 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 0.1365729719400406, |
| "learning_rate": 6.78e-06, |
| "loss": 0.0509, |
| "step": 340 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.14690732955932617, |
| "learning_rate": 6.98e-06, |
| "loss": 0.0455, |
| "step": 350 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.13823232054710388, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 0.0432, |
| "step": 360 |
| }, |
| { |
| "epoch": 9.25, |
| "grad_norm": 0.14099512994289398, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.0434, |
| "step": 370 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 0.1966245025396347, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 0.0404, |
| "step": 380 |
| }, |
| { |
| "epoch": 9.75, |
| "grad_norm": 0.2028568536043167, |
| "learning_rate": 7.78e-06, |
| "loss": 0.0411, |
| "step": 390 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.14070606231689453, |
| "learning_rate": 7.98e-06, |
| "loss": 0.039, |
| "step": 400 |
| }, |
| { |
| "epoch": 10.25, |
| "grad_norm": 0.15250281989574432, |
| "learning_rate": 8.18e-06, |
| "loss": 0.0353, |
| "step": 410 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 0.13883410394191742, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.0344, |
| "step": 420 |
| }, |
| { |
| "epoch": 10.75, |
| "grad_norm": 0.16541792452335358, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.0359, |
| "step": 430 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.1294703632593155, |
| "learning_rate": 8.78e-06, |
| "loss": 0.0336, |
| "step": 440 |
| }, |
| { |
| "epoch": 11.25, |
| "grad_norm": 0.1688312292098999, |
| "learning_rate": 8.98e-06, |
| "loss": 0.0341, |
| "step": 450 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 0.1562804877758026, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.0315, |
| "step": 460 |
| }, |
| { |
| "epoch": 11.75, |
| "grad_norm": 0.12476273626089096, |
| "learning_rate": 9.38e-06, |
| "loss": 0.0311, |
| "step": 470 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.1468283087015152, |
| "learning_rate": 9.58e-06, |
| "loss": 0.0305, |
| "step": 480 |
| }, |
| { |
| "epoch": 12.25, |
| "grad_norm": 0.1696883887052536, |
| "learning_rate": 9.78e-06, |
| "loss": 0.0295, |
| "step": 490 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.16157524287700653, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.0296, |
| "step": 500 |
| }, |
| { |
| "epoch": 12.75, |
| "grad_norm": 0.2175641655921936, |
| "learning_rate": 1.018e-05, |
| "loss": 0.0284, |
| "step": 510 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.17267122864723206, |
| "learning_rate": 1.038e-05, |
| "loss": 0.0278, |
| "step": 520 |
| }, |
| { |
| "epoch": 13.25, |
| "grad_norm": 0.202761709690094, |
| "learning_rate": 1.058e-05, |
| "loss": 0.0272, |
| "step": 530 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 0.1282481700181961, |
| "learning_rate": 1.0780000000000002e-05, |
| "loss": 0.0263, |
| "step": 540 |
| }, |
| { |
| "epoch": 13.75, |
| "grad_norm": 0.1510225236415863, |
| "learning_rate": 1.098e-05, |
| "loss": 0.0261, |
| "step": 550 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.15521079301834106, |
| "learning_rate": 1.118e-05, |
| "loss": 0.0262, |
| "step": 560 |
| }, |
| { |
| "epoch": 14.25, |
| "grad_norm": 0.16990794241428375, |
| "learning_rate": 1.1380000000000001e-05, |
| "loss": 0.0251, |
| "step": 570 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 0.15591584146022797, |
| "learning_rate": 1.1580000000000001e-05, |
| "loss": 0.0253, |
| "step": 580 |
| }, |
| { |
| "epoch": 14.75, |
| "grad_norm": 0.14896942675113678, |
| "learning_rate": 1.178e-05, |
| "loss": 0.0258, |
| "step": 590 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.18454653024673462, |
| "learning_rate": 1.198e-05, |
| "loss": 0.0241, |
| "step": 600 |
| }, |
| { |
| "epoch": 15.25, |
| "grad_norm": 0.18142494559288025, |
| "learning_rate": 1.2180000000000002e-05, |
| "loss": 0.025, |
| "step": 610 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 0.1759718656539917, |
| "learning_rate": 1.238e-05, |
| "loss": 0.023, |
| "step": 620 |
| }, |
| { |
| "epoch": 15.75, |
| "grad_norm": 0.12727558612823486, |
| "learning_rate": 1.258e-05, |
| "loss": 0.0242, |
| "step": 630 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.1535845398902893, |
| "learning_rate": 1.278e-05, |
| "loss": 0.0236, |
| "step": 640 |
| }, |
| { |
| "epoch": 16.25, |
| "grad_norm": 0.19321507215499878, |
| "learning_rate": 1.2980000000000001e-05, |
| "loss": 0.022, |
| "step": 650 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 0.2249348759651184, |
| "learning_rate": 1.3180000000000001e-05, |
| "loss": 0.0229, |
| "step": 660 |
| }, |
| { |
| "epoch": 16.75, |
| "grad_norm": 0.19657425582408905, |
| "learning_rate": 1.338e-05, |
| "loss": 0.0223, |
| "step": 670 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.17625083029270172, |
| "learning_rate": 1.358e-05, |
| "loss": 0.022, |
| "step": 680 |
| }, |
| { |
| "epoch": 17.25, |
| "grad_norm": 0.2059263288974762, |
| "learning_rate": 1.3780000000000002e-05, |
| "loss": 0.0204, |
| "step": 690 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.2298121601343155, |
| "learning_rate": 1.3980000000000002e-05, |
| "loss": 0.0214, |
| "step": 700 |
| }, |
| { |
| "epoch": 17.75, |
| "grad_norm": 0.18725836277008057, |
| "learning_rate": 1.4180000000000001e-05, |
| "loss": 0.0213, |
| "step": 710 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.21040500700473785, |
| "learning_rate": 1.4380000000000001e-05, |
| "loss": 0.0221, |
| "step": 720 |
| }, |
| { |
| "epoch": 18.25, |
| "grad_norm": 0.20119509100914001, |
| "learning_rate": 1.4580000000000003e-05, |
| "loss": 0.0208, |
| "step": 730 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 0.1417151838541031, |
| "learning_rate": 1.4779999999999999e-05, |
| "loss": 0.0223, |
| "step": 740 |
| }, |
| { |
| "epoch": 18.75, |
| "grad_norm": 0.16525112092494965, |
| "learning_rate": 1.4979999999999999e-05, |
| "loss": 0.0191, |
| "step": 750 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.1953587532043457, |
| "learning_rate": 1.518e-05, |
| "loss": 0.0192, |
| "step": 760 |
| }, |
| { |
| "epoch": 19.25, |
| "grad_norm": 0.19152410328388214, |
| "learning_rate": 1.538e-05, |
| "loss": 0.0187, |
| "step": 770 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 0.1645529419183731, |
| "learning_rate": 1.558e-05, |
| "loss": 0.02, |
| "step": 780 |
| }, |
| { |
| "epoch": 19.75, |
| "grad_norm": 0.16479845345020294, |
| "learning_rate": 1.578e-05, |
| "loss": 0.0197, |
| "step": 790 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.20477408170700073, |
| "learning_rate": 1.598e-05, |
| "loss": 0.019, |
| "step": 800 |
| }, |
| { |
| "epoch": 20.25, |
| "grad_norm": 0.1593884378671646, |
| "learning_rate": 1.618e-05, |
| "loss": 0.0182, |
| "step": 810 |
| }, |
| { |
| "epoch": 20.5, |
| "grad_norm": 0.2143949717283249, |
| "learning_rate": 1.6380000000000002e-05, |
| "loss": 0.0192, |
| "step": 820 |
| }, |
| { |
| "epoch": 20.75, |
| "grad_norm": 0.21650566160678864, |
| "learning_rate": 1.658e-05, |
| "loss": 0.0201, |
| "step": 830 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.1762174516916275, |
| "learning_rate": 1.6780000000000002e-05, |
| "loss": 0.018, |
| "step": 840 |
| }, |
| { |
| "epoch": 21.25, |
| "grad_norm": 0.252549409866333, |
| "learning_rate": 1.698e-05, |
| "loss": 0.0184, |
| "step": 850 |
| }, |
| { |
| "epoch": 21.5, |
| "grad_norm": 0.26648053526878357, |
| "learning_rate": 1.718e-05, |
| "loss": 0.0179, |
| "step": 860 |
| }, |
| { |
| "epoch": 21.75, |
| "grad_norm": 0.18754205107688904, |
| "learning_rate": 1.7380000000000003e-05, |
| "loss": 0.0187, |
| "step": 870 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.18613967299461365, |
| "learning_rate": 1.758e-05, |
| "loss": 0.0177, |
| "step": 880 |
| }, |
| { |
| "epoch": 22.25, |
| "grad_norm": 0.20721137523651123, |
| "learning_rate": 1.7780000000000003e-05, |
| "loss": 0.018, |
| "step": 890 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.21724781394004822, |
| "learning_rate": 1.798e-05, |
| "loss": 0.0185, |
| "step": 900 |
| }, |
| { |
| "epoch": 22.75, |
| "grad_norm": 0.18366505205631256, |
| "learning_rate": 1.818e-05, |
| "loss": 0.0164, |
| "step": 910 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.1680997759103775, |
| "learning_rate": 1.838e-05, |
| "loss": 0.0171, |
| "step": 920 |
| }, |
| { |
| "epoch": 23.25, |
| "grad_norm": 0.19146476686000824, |
| "learning_rate": 1.858e-05, |
| "loss": 0.0174, |
| "step": 930 |
| }, |
| { |
| "epoch": 23.5, |
| "grad_norm": 0.21596679091453552, |
| "learning_rate": 1.878e-05, |
| "loss": 0.0173, |
| "step": 940 |
| }, |
| { |
| "epoch": 23.75, |
| "grad_norm": 0.1829003393650055, |
| "learning_rate": 1.898e-05, |
| "loss": 0.017, |
| "step": 950 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.1917005032300949, |
| "learning_rate": 1.918e-05, |
| "loss": 0.0162, |
| "step": 960 |
| }, |
| { |
| "epoch": 24.25, |
| "grad_norm": 0.1833629459142685, |
| "learning_rate": 1.938e-05, |
| "loss": 0.0163, |
| "step": 970 |
| }, |
| { |
| "epoch": 24.5, |
| "grad_norm": 0.23003654181957245, |
| "learning_rate": 1.9580000000000002e-05, |
| "loss": 0.0171, |
| "step": 980 |
| }, |
| { |
| "epoch": 24.75, |
| "grad_norm": 0.16011449694633484, |
| "learning_rate": 1.978e-05, |
| "loss": 0.0161, |
| "step": 990 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.19529913365840912, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 0.0188, |
| "step": 1000 |
| }, |
| { |
| "epoch": 25.25, |
| "grad_norm": 0.1939581334590912, |
| "learning_rate": 2.0180000000000003e-05, |
| "loss": 0.016, |
| "step": 1010 |
| }, |
| { |
| "epoch": 25.5, |
| "grad_norm": 0.2373170107603073, |
| "learning_rate": 2.038e-05, |
| "loss": 0.0153, |
| "step": 1020 |
| }, |
| { |
| "epoch": 25.75, |
| "grad_norm": 0.29833200573921204, |
| "learning_rate": 2.0580000000000003e-05, |
| "loss": 0.0162, |
| "step": 1030 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.19248932600021362, |
| "learning_rate": 2.078e-05, |
| "loss": 0.0165, |
| "step": 1040 |
| }, |
| { |
| "epoch": 26.25, |
| "grad_norm": 0.16877706348896027, |
| "learning_rate": 2.098e-05, |
| "loss": 0.015, |
| "step": 1050 |
| }, |
| { |
| "epoch": 26.5, |
| "grad_norm": 0.17076095938682556, |
| "learning_rate": 2.118e-05, |
| "loss": 0.015, |
| "step": 1060 |
| }, |
| { |
| "epoch": 26.75, |
| "grad_norm": 0.11618935316801071, |
| "learning_rate": 2.138e-05, |
| "loss": 0.0145, |
| "step": 1070 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 0.16996583342552185, |
| "learning_rate": 2.158e-05, |
| "loss": 0.0154, |
| "step": 1080 |
| }, |
| { |
| "epoch": 27.25, |
| "grad_norm": 0.15623579919338226, |
| "learning_rate": 2.178e-05, |
| "loss": 0.0152, |
| "step": 1090 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.22260257601737976, |
| "learning_rate": 2.198e-05, |
| "loss": 0.0159, |
| "step": 1100 |
| }, |
| { |
| "epoch": 27.75, |
| "grad_norm": 0.1553281843662262, |
| "learning_rate": 2.218e-05, |
| "loss": 0.0151, |
| "step": 1110 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.23849648237228394, |
| "learning_rate": 2.2380000000000003e-05, |
| "loss": 0.0144, |
| "step": 1120 |
| }, |
| { |
| "epoch": 28.25, |
| "grad_norm": 0.3228089213371277, |
| "learning_rate": 2.258e-05, |
| "loss": 0.0143, |
| "step": 1130 |
| }, |
| { |
| "epoch": 28.5, |
| "grad_norm": 0.1590012162923813, |
| "learning_rate": 2.2780000000000002e-05, |
| "loss": 0.0148, |
| "step": 1140 |
| }, |
| { |
| "epoch": 28.75, |
| "grad_norm": 0.2641109824180603, |
| "learning_rate": 2.298e-05, |
| "loss": 0.0151, |
| "step": 1150 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 0.20940740406513214, |
| "learning_rate": 2.318e-05, |
| "loss": 0.0149, |
| "step": 1160 |
| }, |
| { |
| "epoch": 29.25, |
| "grad_norm": 0.2249547690153122, |
| "learning_rate": 2.3380000000000003e-05, |
| "loss": 0.0149, |
| "step": 1170 |
| }, |
| { |
| "epoch": 29.5, |
| "grad_norm": 0.23424924910068512, |
| "learning_rate": 2.358e-05, |
| "loss": 0.0155, |
| "step": 1180 |
| }, |
| { |
| "epoch": 29.75, |
| "grad_norm": 0.24567143619060516, |
| "learning_rate": 2.3780000000000003e-05, |
| "loss": 0.0151, |
| "step": 1190 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.2167378067970276, |
| "learning_rate": 2.398e-05, |
| "loss": 0.0151, |
| "step": 1200 |
| }, |
| { |
| "epoch": 30.25, |
| "grad_norm": 0.24560105800628662, |
| "learning_rate": 2.418e-05, |
| "loss": 0.0156, |
| "step": 1210 |
| }, |
| { |
| "epoch": 30.5, |
| "grad_norm": 0.2878526747226715, |
| "learning_rate": 2.438e-05, |
| "loss": 0.0145, |
| "step": 1220 |
| }, |
| { |
| "epoch": 30.75, |
| "grad_norm": 0.16258525848388672, |
| "learning_rate": 2.4580000000000002e-05, |
| "loss": 0.0144, |
| "step": 1230 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 0.18433189392089844, |
| "learning_rate": 2.478e-05, |
| "loss": 0.0156, |
| "step": 1240 |
| }, |
| { |
| "epoch": 31.25, |
| "grad_norm": 0.23272426426410675, |
| "learning_rate": 2.498e-05, |
| "loss": 0.0147, |
| "step": 1250 |
| }, |
| { |
| "epoch": 31.5, |
| "grad_norm": 0.27221301198005676, |
| "learning_rate": 2.5180000000000003e-05, |
| "loss": 0.0151, |
| "step": 1260 |
| }, |
| { |
| "epoch": 31.75, |
| "grad_norm": 0.19055114686489105, |
| "learning_rate": 2.5380000000000004e-05, |
| "loss": 0.0134, |
| "step": 1270 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.24308407306671143, |
| "learning_rate": 2.5580000000000002e-05, |
| "loss": 0.0141, |
| "step": 1280 |
| }, |
| { |
| "epoch": 32.25, |
| "grad_norm": 0.19755953550338745, |
| "learning_rate": 2.5779999999999997e-05, |
| "loss": 0.0145, |
| "step": 1290 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 0.22123226523399353, |
| "learning_rate": 2.598e-05, |
| "loss": 0.0128, |
| "step": 1300 |
| }, |
| { |
| "epoch": 32.75, |
| "grad_norm": 0.2962028980255127, |
| "learning_rate": 2.618e-05, |
| "loss": 0.0135, |
| "step": 1310 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 0.2808900475502014, |
| "learning_rate": 2.6379999999999998e-05, |
| "loss": 0.0141, |
| "step": 1320 |
| }, |
| { |
| "epoch": 33.25, |
| "grad_norm": 0.2979101240634918, |
| "learning_rate": 2.658e-05, |
| "loss": 0.0131, |
| "step": 1330 |
| }, |
| { |
| "epoch": 33.5, |
| "grad_norm": 0.2694721817970276, |
| "learning_rate": 2.678e-05, |
| "loss": 0.0134, |
| "step": 1340 |
| }, |
| { |
| "epoch": 33.75, |
| "grad_norm": 0.2524833381175995, |
| "learning_rate": 2.698e-05, |
| "loss": 0.0135, |
| "step": 1350 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 0.2554227113723755, |
| "learning_rate": 2.718e-05, |
| "loss": 0.0132, |
| "step": 1360 |
| }, |
| { |
| "epoch": 34.25, |
| "grad_norm": 0.2688157260417938, |
| "learning_rate": 2.738e-05, |
| "loss": 0.0125, |
| "step": 1370 |
| }, |
| { |
| "epoch": 34.5, |
| "grad_norm": 0.20932850241661072, |
| "learning_rate": 2.758e-05, |
| "loss": 0.0137, |
| "step": 1380 |
| }, |
| { |
| "epoch": 34.75, |
| "grad_norm": 0.19536396861076355, |
| "learning_rate": 2.778e-05, |
| "loss": 0.0134, |
| "step": 1390 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.19312022626399994, |
| "learning_rate": 2.798e-05, |
| "loss": 0.015, |
| "step": 1400 |
| }, |
| { |
| "epoch": 35.25, |
| "grad_norm": 0.15348872542381287, |
| "learning_rate": 2.818e-05, |
| "loss": 0.0134, |
| "step": 1410 |
| }, |
| { |
| "epoch": 35.5, |
| "grad_norm": 0.19372671842575073, |
| "learning_rate": 2.8380000000000003e-05, |
| "loss": 0.0132, |
| "step": 1420 |
| }, |
| { |
| "epoch": 35.75, |
| "grad_norm": 0.16821452975273132, |
| "learning_rate": 2.858e-05, |
| "loss": 0.0124, |
| "step": 1430 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 0.14121121168136597, |
| "learning_rate": 2.8780000000000002e-05, |
| "loss": 0.0131, |
| "step": 1440 |
| }, |
| { |
| "epoch": 36.25, |
| "grad_norm": 0.14743903279304504, |
| "learning_rate": 2.898e-05, |
| "loss": 0.0122, |
| "step": 1450 |
| }, |
| { |
| "epoch": 36.5, |
| "grad_norm": 0.1689988076686859, |
| "learning_rate": 2.9180000000000002e-05, |
| "loss": 0.0121, |
| "step": 1460 |
| }, |
| { |
| "epoch": 36.75, |
| "grad_norm": 0.1635463982820511, |
| "learning_rate": 2.9380000000000003e-05, |
| "loss": 0.0123, |
| "step": 1470 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 0.2134884148836136, |
| "learning_rate": 2.958e-05, |
| "loss": 0.0118, |
| "step": 1480 |
| }, |
| { |
| "epoch": 37.25, |
| "grad_norm": 0.1917855590581894, |
| "learning_rate": 2.9780000000000003e-05, |
| "loss": 0.0135, |
| "step": 1490 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 0.25814926624298096, |
| "learning_rate": 2.998e-05, |
| "loss": 0.0119, |
| "step": 1500 |
| }, |
| { |
| "epoch": 37.75, |
| "grad_norm": 0.19456756114959717, |
| "learning_rate": 3.0180000000000002e-05, |
| "loss": 0.0126, |
| "step": 1510 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 0.16085229814052582, |
| "learning_rate": 3.0380000000000004e-05, |
| "loss": 0.0128, |
| "step": 1520 |
| }, |
| { |
| "epoch": 38.25, |
| "grad_norm": 0.1836978793144226, |
| "learning_rate": 3.058e-05, |
| "loss": 0.0127, |
| "step": 1530 |
| }, |
| { |
| "epoch": 38.5, |
| "grad_norm": 0.2007584571838379, |
| "learning_rate": 3.078e-05, |
| "loss": 0.012, |
| "step": 1540 |
| }, |
| { |
| "epoch": 38.75, |
| "grad_norm": 0.2957303524017334, |
| "learning_rate": 3.0980000000000005e-05, |
| "loss": 0.0116, |
| "step": 1550 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 0.3204158544540405, |
| "learning_rate": 3.118e-05, |
| "loss": 0.0126, |
| "step": 1560 |
| }, |
| { |
| "epoch": 39.25, |
| "grad_norm": 0.1719009280204773, |
| "learning_rate": 3.138e-05, |
| "loss": 0.011, |
| "step": 1570 |
| }, |
| { |
| "epoch": 39.5, |
| "grad_norm": 0.16781948506832123, |
| "learning_rate": 3.1580000000000006e-05, |
| "loss": 0.0117, |
| "step": 1580 |
| }, |
| { |
| "epoch": 39.75, |
| "grad_norm": 0.21803607046604156, |
| "learning_rate": 3.1780000000000004e-05, |
| "loss": 0.0122, |
| "step": 1590 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.2320670336484909, |
| "learning_rate": 3.198e-05, |
| "loss": 0.0126, |
| "step": 1600 |
| }, |
| { |
| "epoch": 40.25, |
| "grad_norm": 0.24788905680179596, |
| "learning_rate": 3.218e-05, |
| "loss": 0.0124, |
| "step": 1610 |
| }, |
| { |
| "epoch": 40.5, |
| "grad_norm": 0.1828049123287201, |
| "learning_rate": 3.238e-05, |
| "loss": 0.0112, |
| "step": 1620 |
| }, |
| { |
| "epoch": 40.75, |
| "grad_norm": 0.23975656926631927, |
| "learning_rate": 3.2579999999999996e-05, |
| "loss": 0.0124, |
| "step": 1630 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 0.21705782413482666, |
| "learning_rate": 3.278e-05, |
| "loss": 0.0112, |
| "step": 1640 |
| }, |
| { |
| "epoch": 41.25, |
| "grad_norm": 0.22534944117069244, |
| "learning_rate": 3.298e-05, |
| "loss": 0.0116, |
| "step": 1650 |
| }, |
| { |
| "epoch": 41.5, |
| "grad_norm": 0.16665105521678925, |
| "learning_rate": 3.318e-05, |
| "loss": 0.0134, |
| "step": 1660 |
| }, |
| { |
| "epoch": 41.75, |
| "grad_norm": 0.2458990514278412, |
| "learning_rate": 3.338e-05, |
| "loss": 0.012, |
| "step": 1670 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 0.192895770072937, |
| "learning_rate": 3.358e-05, |
| "loss": 0.0119, |
| "step": 1680 |
| }, |
| { |
| "epoch": 42.25, |
| "grad_norm": 0.21721677482128143, |
| "learning_rate": 3.378e-05, |
| "loss": 0.0126, |
| "step": 1690 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 0.2175186723470688, |
| "learning_rate": 3.398e-05, |
| "loss": 0.0124, |
| "step": 1700 |
| }, |
| { |
| "epoch": 42.75, |
| "grad_norm": 0.1788182556629181, |
| "learning_rate": 3.418e-05, |
| "loss": 0.0119, |
| "step": 1710 |
| }, |
| { |
| "epoch": 43.0, |
| "grad_norm": 0.17151236534118652, |
| "learning_rate": 3.438e-05, |
| "loss": 0.0115, |
| "step": 1720 |
| }, |
| { |
| "epoch": 43.25, |
| "grad_norm": 0.22371003031730652, |
| "learning_rate": 3.4580000000000004e-05, |
| "loss": 0.0121, |
| "step": 1730 |
| }, |
| { |
| "epoch": 43.5, |
| "grad_norm": 0.26781198382377625, |
| "learning_rate": 3.478e-05, |
| "loss": 0.0116, |
| "step": 1740 |
| }, |
| { |
| "epoch": 43.75, |
| "grad_norm": 0.21033993363380432, |
| "learning_rate": 3.498e-05, |
| "loss": 0.0106, |
| "step": 1750 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 0.22407804429531097, |
| "learning_rate": 3.518e-05, |
| "loss": 0.0109, |
| "step": 1760 |
| }, |
| { |
| "epoch": 44.25, |
| "grad_norm": 0.18357867002487183, |
| "learning_rate": 3.5380000000000003e-05, |
| "loss": 0.0107, |
| "step": 1770 |
| }, |
| { |
| "epoch": 44.5, |
| "grad_norm": 0.1971215456724167, |
| "learning_rate": 3.558e-05, |
| "loss": 0.0112, |
| "step": 1780 |
| }, |
| { |
| "epoch": 44.75, |
| "grad_norm": 0.18872156739234924, |
| "learning_rate": 3.578e-05, |
| "loss": 0.0105, |
| "step": 1790 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.17367962002754211, |
| "learning_rate": 3.5980000000000004e-05, |
| "loss": 0.0115, |
| "step": 1800 |
| }, |
| { |
| "epoch": 45.25, |
| "grad_norm": 0.14945487678050995, |
| "learning_rate": 3.618e-05, |
| "loss": 0.0113, |
| "step": 1810 |
| }, |
| { |
| "epoch": 45.5, |
| "grad_norm": 0.1590161919593811, |
| "learning_rate": 3.638e-05, |
| "loss": 0.0103, |
| "step": 1820 |
| }, |
| { |
| "epoch": 45.75, |
| "grad_norm": 0.18627125024795532, |
| "learning_rate": 3.6580000000000006e-05, |
| "loss": 0.0111, |
| "step": 1830 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 0.19844526052474976, |
| "learning_rate": 3.6780000000000004e-05, |
| "loss": 0.0111, |
| "step": 1840 |
| }, |
| { |
| "epoch": 46.25, |
| "grad_norm": 0.17142851650714874, |
| "learning_rate": 3.698e-05, |
| "loss": 0.0114, |
| "step": 1850 |
| }, |
| { |
| "epoch": 46.5, |
| "grad_norm": 0.1916825920343399, |
| "learning_rate": 3.7180000000000007e-05, |
| "loss": 0.0099, |
| "step": 1860 |
| }, |
| { |
| "epoch": 46.75, |
| "grad_norm": 0.1983088254928589, |
| "learning_rate": 3.7380000000000005e-05, |
| "loss": 0.0102, |
| "step": 1870 |
| }, |
| { |
| "epoch": 47.0, |
| "grad_norm": 0.11927555501461029, |
| "learning_rate": 3.758e-05, |
| "loss": 0.0099, |
| "step": 1880 |
| }, |
| { |
| "epoch": 47.25, |
| "grad_norm": 0.1939011514186859, |
| "learning_rate": 3.778000000000001e-05, |
| "loss": 0.0106, |
| "step": 1890 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 0.20579646527767181, |
| "learning_rate": 3.7980000000000006e-05, |
| "loss": 0.01, |
| "step": 1900 |
| }, |
| { |
| "epoch": 47.75, |
| "grad_norm": 0.19019336998462677, |
| "learning_rate": 3.818e-05, |
| "loss": 0.0102, |
| "step": 1910 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 0.17853817343711853, |
| "learning_rate": 3.838e-05, |
| "loss": 0.0111, |
| "step": 1920 |
| }, |
| { |
| "epoch": 48.25, |
| "grad_norm": 0.2036396712064743, |
| "learning_rate": 3.858e-05, |
| "loss": 0.0106, |
| "step": 1930 |
| }, |
| { |
| "epoch": 48.5, |
| "grad_norm": 0.1510358452796936, |
| "learning_rate": 3.878e-05, |
| "loss": 0.0104, |
| "step": 1940 |
| }, |
| { |
| "epoch": 48.75, |
| "grad_norm": 0.21868108212947845, |
| "learning_rate": 3.898e-05, |
| "loss": 0.0107, |
| "step": 1950 |
| }, |
| { |
| "epoch": 49.0, |
| "grad_norm": 0.12815332412719727, |
| "learning_rate": 3.918e-05, |
| "loss": 0.0099, |
| "step": 1960 |
| }, |
| { |
| "epoch": 49.25, |
| "grad_norm": 0.22570985555648804, |
| "learning_rate": 3.938e-05, |
| "loss": 0.0102, |
| "step": 1970 |
| }, |
| { |
| "epoch": 49.5, |
| "grad_norm": 0.18665990233421326, |
| "learning_rate": 3.958e-05, |
| "loss": 0.0109, |
| "step": 1980 |
| }, |
| { |
| "epoch": 49.75, |
| "grad_norm": 0.24190504848957062, |
| "learning_rate": 3.978e-05, |
| "loss": 0.011, |
| "step": 1990 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.2541164755821228, |
| "learning_rate": 3.998e-05, |
| "loss": 0.011, |
| "step": 2000 |
| }, |
| { |
| "epoch": 50.25, |
| "grad_norm": 0.25958317518234253, |
| "learning_rate": 4.018e-05, |
| "loss": 0.0096, |
| "step": 2010 |
| }, |
| { |
| "epoch": 50.5, |
| "grad_norm": 0.2289685308933258, |
| "learning_rate": 4.038e-05, |
| "loss": 0.0106, |
| "step": 2020 |
| }, |
| { |
| "epoch": 50.75, |
| "grad_norm": 0.19097794592380524, |
| "learning_rate": 4.058e-05, |
| "loss": 0.0105, |
| "step": 2030 |
| }, |
| { |
| "epoch": 51.0, |
| "grad_norm": 0.19423332810401917, |
| "learning_rate": 4.078e-05, |
| "loss": 0.0101, |
| "step": 2040 |
| }, |
| { |
| "epoch": 51.25, |
| "grad_norm": 0.20464728772640228, |
| "learning_rate": 4.0980000000000004e-05, |
| "loss": 0.0102, |
| "step": 2050 |
| }, |
| { |
| "epoch": 51.5, |
| "grad_norm": 0.22027207911014557, |
| "learning_rate": 4.118e-05, |
| "loss": 0.0094, |
| "step": 2060 |
| }, |
| { |
| "epoch": 51.75, |
| "grad_norm": 0.2023499310016632, |
| "learning_rate": 4.138e-05, |
| "loss": 0.0096, |
| "step": 2070 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 0.2296619862318039, |
| "learning_rate": 4.1580000000000005e-05, |
| "loss": 0.0097, |
| "step": 2080 |
| }, |
| { |
| "epoch": 52.25, |
| "grad_norm": 0.25673484802246094, |
| "learning_rate": 4.178e-05, |
| "loss": 0.0099, |
| "step": 2090 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 0.14860525727272034, |
| "learning_rate": 4.198e-05, |
| "loss": 0.0093, |
| "step": 2100 |
| }, |
| { |
| "epoch": 52.75, |
| "grad_norm": 0.20391502976417542, |
| "learning_rate": 4.2180000000000006e-05, |
| "loss": 0.0099, |
| "step": 2110 |
| }, |
| { |
| "epoch": 53.0, |
| "grad_norm": 0.1730753779411316, |
| "learning_rate": 4.2380000000000004e-05, |
| "loss": 0.0106, |
| "step": 2120 |
| }, |
| { |
| "epoch": 53.25, |
| "grad_norm": 0.24567775428295135, |
| "learning_rate": 4.258e-05, |
| "loss": 0.0101, |
| "step": 2130 |
| }, |
| { |
| "epoch": 53.5, |
| "grad_norm": 0.20263069868087769, |
| "learning_rate": 4.278e-05, |
| "loss": 0.01, |
| "step": 2140 |
| }, |
| { |
| "epoch": 53.75, |
| "grad_norm": 0.23891495168209076, |
| "learning_rate": 4.2980000000000005e-05, |
| "loss": 0.0102, |
| "step": 2150 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 0.21011893451213837, |
| "learning_rate": 4.318e-05, |
| "loss": 0.0102, |
| "step": 2160 |
| }, |
| { |
| "epoch": 54.25, |
| "grad_norm": 0.19230227172374725, |
| "learning_rate": 4.338e-05, |
| "loss": 0.0103, |
| "step": 2170 |
| }, |
| { |
| "epoch": 54.5, |
| "grad_norm": 0.2491769939661026, |
| "learning_rate": 4.3580000000000006e-05, |
| "loss": 0.0093, |
| "step": 2180 |
| }, |
| { |
| "epoch": 54.75, |
| "grad_norm": 0.22918479144573212, |
| "learning_rate": 4.3780000000000004e-05, |
| "loss": 0.0088, |
| "step": 2190 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.19413116574287415, |
| "learning_rate": 4.398e-05, |
| "loss": 0.01, |
| "step": 2200 |
| }, |
| { |
| "epoch": 55.25, |
| "grad_norm": 0.19914408028125763, |
| "learning_rate": 4.418000000000001e-05, |
| "loss": 0.0097, |
| "step": 2210 |
| }, |
| { |
| "epoch": 55.5, |
| "grad_norm": 0.2266189604997635, |
| "learning_rate": 4.438e-05, |
| "loss": 0.0101, |
| "step": 2220 |
| }, |
| { |
| "epoch": 55.75, |
| "grad_norm": 0.19024869799613953, |
| "learning_rate": 4.458e-05, |
| "loss": 0.0101, |
| "step": 2230 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 0.2472565621137619, |
| "learning_rate": 4.478e-05, |
| "loss": 0.0105, |
| "step": 2240 |
| }, |
| { |
| "epoch": 56.25, |
| "grad_norm": 0.20708179473876953, |
| "learning_rate": 4.498e-05, |
| "loss": 0.0104, |
| "step": 2250 |
| }, |
| { |
| "epoch": 56.5, |
| "grad_norm": 0.22921694815158844, |
| "learning_rate": 4.518e-05, |
| "loss": 0.0109, |
| "step": 2260 |
| }, |
| { |
| "epoch": 56.75, |
| "grad_norm": 0.16786304116249084, |
| "learning_rate": 4.538e-05, |
| "loss": 0.0094, |
| "step": 2270 |
| }, |
| { |
| "epoch": 57.0, |
| "grad_norm": 0.20425142347812653, |
| "learning_rate": 4.558e-05, |
| "loss": 0.0093, |
| "step": 2280 |
| }, |
| { |
| "epoch": 57.25, |
| "grad_norm": 0.18586701154708862, |
| "learning_rate": 4.578e-05, |
| "loss": 0.0092, |
| "step": 2290 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 0.22823484241962433, |
| "learning_rate": 4.5980000000000004e-05, |
| "loss": 0.0096, |
| "step": 2300 |
| }, |
| { |
| "epoch": 57.75, |
| "grad_norm": 0.21421211957931519, |
| "learning_rate": 4.618e-05, |
| "loss": 0.0095, |
| "step": 2310 |
| }, |
| { |
| "epoch": 58.0, |
| "grad_norm": 0.3175329267978668, |
| "learning_rate": 4.638e-05, |
| "loss": 0.0112, |
| "step": 2320 |
| }, |
| { |
| "epoch": 58.25, |
| "grad_norm": 0.206922248005867, |
| "learning_rate": 4.6580000000000005e-05, |
| "loss": 0.0093, |
| "step": 2330 |
| }, |
| { |
| "epoch": 58.5, |
| "grad_norm": 0.15221217274665833, |
| "learning_rate": 4.678e-05, |
| "loss": 0.0107, |
| "step": 2340 |
| }, |
| { |
| "epoch": 58.75, |
| "grad_norm": 0.15896166861057281, |
| "learning_rate": 4.698e-05, |
| "loss": 0.0086, |
| "step": 2350 |
| }, |
| { |
| "epoch": 59.0, |
| "grad_norm": 0.20901215076446533, |
| "learning_rate": 4.718e-05, |
| "loss": 0.0088, |
| "step": 2360 |
| }, |
| { |
| "epoch": 59.25, |
| "grad_norm": 0.263406902551651, |
| "learning_rate": 4.7380000000000004e-05, |
| "loss": 0.0092, |
| "step": 2370 |
| }, |
| { |
| "epoch": 59.5, |
| "grad_norm": 0.21237947046756744, |
| "learning_rate": 4.758e-05, |
| "loss": 0.0084, |
| "step": 2380 |
| }, |
| { |
| "epoch": 59.75, |
| "grad_norm": 0.17976641654968262, |
| "learning_rate": 4.778e-05, |
| "loss": 0.0082, |
| "step": 2390 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.21562369167804718, |
| "learning_rate": 4.7980000000000005e-05, |
| "loss": 0.0095, |
| "step": 2400 |
| }, |
| { |
| "epoch": 60.25, |
| "grad_norm": 0.2006334811449051, |
| "learning_rate": 4.818e-05, |
| "loss": 0.0089, |
| "step": 2410 |
| }, |
| { |
| "epoch": 60.5, |
| "grad_norm": 0.1672360748052597, |
| "learning_rate": 4.838e-05, |
| "loss": 0.0087, |
| "step": 2420 |
| }, |
| { |
| "epoch": 60.75, |
| "grad_norm": 0.19504928588867188, |
| "learning_rate": 4.8580000000000006e-05, |
| "loss": 0.0088, |
| "step": 2430 |
| }, |
| { |
| "epoch": 61.0, |
| "grad_norm": 0.14196740090847015, |
| "learning_rate": 4.8780000000000004e-05, |
| "loss": 0.0094, |
| "step": 2440 |
| }, |
| { |
| "epoch": 61.25, |
| "grad_norm": 0.18832427263259888, |
| "learning_rate": 4.898e-05, |
| "loss": 0.0097, |
| "step": 2450 |
| }, |
| { |
| "epoch": 61.5, |
| "grad_norm": 0.1406528502702713, |
| "learning_rate": 4.918000000000001e-05, |
| "loss": 0.008, |
| "step": 2460 |
| }, |
| { |
| "epoch": 61.75, |
| "grad_norm": 0.17165546119213104, |
| "learning_rate": 4.9380000000000005e-05, |
| "loss": 0.0096, |
| "step": 2470 |
| }, |
| { |
| "epoch": 62.0, |
| "grad_norm": 0.1912836730480194, |
| "learning_rate": 4.958e-05, |
| "loss": 0.0099, |
| "step": 2480 |
| }, |
| { |
| "epoch": 62.25, |
| "grad_norm": 0.239595428109169, |
| "learning_rate": 4.978e-05, |
| "loss": 0.0093, |
| "step": 2490 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.16011711955070496, |
| "learning_rate": 4.9980000000000006e-05, |
| "loss": 0.0083, |
| "step": 2500 |
| }, |
| { |
| "epoch": 62.75, |
| "grad_norm": 0.17068330943584442, |
| "learning_rate": 5.0180000000000004e-05, |
| "loss": 0.0084, |
| "step": 2510 |
| }, |
| { |
| "epoch": 63.0, |
| "grad_norm": 0.1504756510257721, |
| "learning_rate": 5.038e-05, |
| "loss": 0.0085, |
| "step": 2520 |
| }, |
| { |
| "epoch": 63.25, |
| "grad_norm": 0.20306769013404846, |
| "learning_rate": 5.058000000000001e-05, |
| "loss": 0.0093, |
| "step": 2530 |
| }, |
| { |
| "epoch": 63.5, |
| "grad_norm": 0.17090564966201782, |
| "learning_rate": 5.0780000000000005e-05, |
| "loss": 0.0075, |
| "step": 2540 |
| }, |
| { |
| "epoch": 63.75, |
| "grad_norm": 0.1755877137184143, |
| "learning_rate": 5.098e-05, |
| "loss": 0.0086, |
| "step": 2550 |
| }, |
| { |
| "epoch": 64.0, |
| "grad_norm": 0.11523690074682236, |
| "learning_rate": 5.118000000000001e-05, |
| "loss": 0.0084, |
| "step": 2560 |
| }, |
| { |
| "epoch": 64.25, |
| "grad_norm": 0.15952451527118683, |
| "learning_rate": 5.1380000000000006e-05, |
| "loss": 0.0085, |
| "step": 2570 |
| }, |
| { |
| "epoch": 64.5, |
| "grad_norm": 0.11506156623363495, |
| "learning_rate": 5.1580000000000004e-05, |
| "loss": 0.008, |
| "step": 2580 |
| }, |
| { |
| "epoch": 64.75, |
| "grad_norm": 0.21272993087768555, |
| "learning_rate": 5.178000000000001e-05, |
| "loss": 0.0083, |
| "step": 2590 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.18400269746780396, |
| "learning_rate": 5.198000000000001e-05, |
| "loss": 0.0083, |
| "step": 2600 |
| }, |
| { |
| "epoch": 65.25, |
| "grad_norm": 0.1559101790189743, |
| "learning_rate": 5.2180000000000005e-05, |
| "loss": 0.0075, |
| "step": 2610 |
| }, |
| { |
| "epoch": 65.5, |
| "grad_norm": 0.1637546718120575, |
| "learning_rate": 5.238000000000001e-05, |
| "loss": 0.0083, |
| "step": 2620 |
| }, |
| { |
| "epoch": 65.75, |
| "grad_norm": 0.22429779171943665, |
| "learning_rate": 5.258000000000001e-05, |
| "loss": 0.0087, |
| "step": 2630 |
| }, |
| { |
| "epoch": 66.0, |
| "grad_norm": 0.20996035635471344, |
| "learning_rate": 5.2780000000000006e-05, |
| "loss": 0.0088, |
| "step": 2640 |
| }, |
| { |
| "epoch": 66.25, |
| "grad_norm": 0.15640440583229065, |
| "learning_rate": 5.2980000000000004e-05, |
| "loss": 0.0079, |
| "step": 2650 |
| }, |
| { |
| "epoch": 66.5, |
| "grad_norm": 0.22142156958580017, |
| "learning_rate": 5.318000000000001e-05, |
| "loss": 0.0091, |
| "step": 2660 |
| }, |
| { |
| "epoch": 66.75, |
| "grad_norm": 0.17605368793010712, |
| "learning_rate": 5.338000000000001e-05, |
| "loss": 0.0081, |
| "step": 2670 |
| }, |
| { |
| "epoch": 67.0, |
| "grad_norm": 0.1534302979707718, |
| "learning_rate": 5.3580000000000005e-05, |
| "loss": 0.0086, |
| "step": 2680 |
| }, |
| { |
| "epoch": 67.25, |
| "grad_norm": 0.18950927257537842, |
| "learning_rate": 5.378e-05, |
| "loss": 0.0086, |
| "step": 2690 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 0.17753522098064423, |
| "learning_rate": 5.3979999999999995e-05, |
| "loss": 0.0075, |
| "step": 2700 |
| }, |
| { |
| "epoch": 67.75, |
| "grad_norm": 0.14533065259456635, |
| "learning_rate": 5.418e-05, |
| "loss": 0.0079, |
| "step": 2710 |
| }, |
| { |
| "epoch": 68.0, |
| "grad_norm": 0.17737789452075958, |
| "learning_rate": 5.438e-05, |
| "loss": 0.0081, |
| "step": 2720 |
| }, |
| { |
| "epoch": 68.25, |
| "grad_norm": 0.16454491019248962, |
| "learning_rate": 5.4579999999999996e-05, |
| "loss": 0.0079, |
| "step": 2730 |
| }, |
| { |
| "epoch": 68.5, |
| "grad_norm": 0.1953648179769516, |
| "learning_rate": 5.478e-05, |
| "loss": 0.0078, |
| "step": 2740 |
| }, |
| { |
| "epoch": 68.75, |
| "grad_norm": 0.20770540833473206, |
| "learning_rate": 5.498e-05, |
| "loss": 0.009, |
| "step": 2750 |
| }, |
| { |
| "epoch": 69.0, |
| "grad_norm": 0.18003995716571808, |
| "learning_rate": 5.518e-05, |
| "loss": 0.0079, |
| "step": 2760 |
| }, |
| { |
| "epoch": 69.25, |
| "grad_norm": 0.1802724152803421, |
| "learning_rate": 5.538e-05, |
| "loss": 0.008, |
| "step": 2770 |
| }, |
| { |
| "epoch": 69.5, |
| "grad_norm": 0.17148584127426147, |
| "learning_rate": 5.558e-05, |
| "loss": 0.0079, |
| "step": 2780 |
| }, |
| { |
| "epoch": 69.75, |
| "grad_norm": 0.21797288954257965, |
| "learning_rate": 5.578e-05, |
| "loss": 0.0075, |
| "step": 2790 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.19681450724601746, |
| "learning_rate": 5.5979999999999996e-05, |
| "loss": 0.0085, |
| "step": 2800 |
| }, |
| { |
| "epoch": 70.25, |
| "grad_norm": 0.18321318924427032, |
| "learning_rate": 5.618e-05, |
| "loss": 0.0087, |
| "step": 2810 |
| }, |
| { |
| "epoch": 70.5, |
| "grad_norm": 0.18249335885047913, |
| "learning_rate": 5.638e-05, |
| "loss": 0.0073, |
| "step": 2820 |
| }, |
| { |
| "epoch": 70.75, |
| "grad_norm": 0.13530372083187103, |
| "learning_rate": 5.658e-05, |
| "loss": 0.007, |
| "step": 2830 |
| }, |
| { |
| "epoch": 71.0, |
| "grad_norm": 0.23054936528205872, |
| "learning_rate": 5.678e-05, |
| "loss": 0.0086, |
| "step": 2840 |
| }, |
| { |
| "epoch": 71.25, |
| "grad_norm": 0.2308930903673172, |
| "learning_rate": 5.698e-05, |
| "loss": 0.0086, |
| "step": 2850 |
| }, |
| { |
| "epoch": 71.5, |
| "grad_norm": 0.2094513475894928, |
| "learning_rate": 5.718e-05, |
| "loss": 0.0077, |
| "step": 2860 |
| }, |
| { |
| "epoch": 71.75, |
| "grad_norm": 0.17534701526165009, |
| "learning_rate": 5.738e-05, |
| "loss": 0.0076, |
| "step": 2870 |
| }, |
| { |
| "epoch": 72.0, |
| "grad_norm": 0.22989408671855927, |
| "learning_rate": 5.758e-05, |
| "loss": 0.0079, |
| "step": 2880 |
| }, |
| { |
| "epoch": 72.25, |
| "grad_norm": 0.2179604172706604, |
| "learning_rate": 5.778e-05, |
| "loss": 0.0093, |
| "step": 2890 |
| }, |
| { |
| "epoch": 72.5, |
| "grad_norm": 0.24805155396461487, |
| "learning_rate": 5.7980000000000004e-05, |
| "loss": 0.0084, |
| "step": 2900 |
| }, |
| { |
| "epoch": 72.75, |
| "grad_norm": 0.20227353274822235, |
| "learning_rate": 5.818e-05, |
| "loss": 0.01, |
| "step": 2910 |
| }, |
| { |
| "epoch": 73.0, |
| "grad_norm": 0.17762471735477448, |
| "learning_rate": 5.838e-05, |
| "loss": 0.0075, |
| "step": 2920 |
| }, |
| { |
| "epoch": 73.25, |
| "grad_norm": 0.25949570536613464, |
| "learning_rate": 5.858e-05, |
| "loss": 0.0079, |
| "step": 2930 |
| }, |
| { |
| "epoch": 73.5, |
| "grad_norm": 0.2676275670528412, |
| "learning_rate": 5.878e-05, |
| "loss": 0.0086, |
| "step": 2940 |
| }, |
| { |
| "epoch": 73.75, |
| "grad_norm": 0.1574457585811615, |
| "learning_rate": 5.898e-05, |
| "loss": 0.0079, |
| "step": 2950 |
| }, |
| { |
| "epoch": 74.0, |
| "grad_norm": 0.28759798407554626, |
| "learning_rate": 5.918e-05, |
| "loss": 0.0089, |
| "step": 2960 |
| }, |
| { |
| "epoch": 74.25, |
| "grad_norm": 0.18525859713554382, |
| "learning_rate": 5.9380000000000004e-05, |
| "loss": 0.0086, |
| "step": 2970 |
| }, |
| { |
| "epoch": 74.5, |
| "grad_norm": 0.1620161384344101, |
| "learning_rate": 5.958e-05, |
| "loss": 0.0084, |
| "step": 2980 |
| }, |
| { |
| "epoch": 74.75, |
| "grad_norm": 0.20352789759635925, |
| "learning_rate": 5.978e-05, |
| "loss": 0.0093, |
| "step": 2990 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.2062734216451645, |
| "learning_rate": 5.9980000000000005e-05, |
| "loss": 0.0082, |
| "step": 3000 |
| }, |
| { |
| "epoch": 75.25, |
| "grad_norm": 0.18606604635715485, |
| "learning_rate": 6.018e-05, |
| "loss": 0.008, |
| "step": 3010 |
| }, |
| { |
| "epoch": 75.5, |
| "grad_norm": 0.15150688588619232, |
| "learning_rate": 6.038e-05, |
| "loss": 0.0075, |
| "step": 3020 |
| }, |
| { |
| "epoch": 75.75, |
| "grad_norm": 0.17146310210227966, |
| "learning_rate": 6.0580000000000006e-05, |
| "loss": 0.0077, |
| "step": 3030 |
| }, |
| { |
| "epoch": 76.0, |
| "grad_norm": 0.17717711627483368, |
| "learning_rate": 6.0780000000000004e-05, |
| "loss": 0.0078, |
| "step": 3040 |
| }, |
| { |
| "epoch": 76.25, |
| "grad_norm": 0.22910268604755402, |
| "learning_rate": 6.098e-05, |
| "loss": 0.0078, |
| "step": 3050 |
| }, |
| { |
| "epoch": 76.5, |
| "grad_norm": 0.1869814544916153, |
| "learning_rate": 6.118000000000001e-05, |
| "loss": 0.0081, |
| "step": 3060 |
| }, |
| { |
| "epoch": 76.75, |
| "grad_norm": 0.23087920248508453, |
| "learning_rate": 6.138e-05, |
| "loss": 0.0076, |
| "step": 3070 |
| }, |
| { |
| "epoch": 77.0, |
| "grad_norm": 0.17326343059539795, |
| "learning_rate": 6.158e-05, |
| "loss": 0.0078, |
| "step": 3080 |
| }, |
| { |
| "epoch": 77.25, |
| "grad_norm": 0.17539773881435394, |
| "learning_rate": 6.178000000000001e-05, |
| "loss": 0.0077, |
| "step": 3090 |
| }, |
| { |
| "epoch": 77.5, |
| "grad_norm": 0.16601373255252838, |
| "learning_rate": 6.198e-05, |
| "loss": 0.0071, |
| "step": 3100 |
| }, |
| { |
| "epoch": 77.75, |
| "grad_norm": 0.23548623919487, |
| "learning_rate": 6.218e-05, |
| "loss": 0.0072, |
| "step": 3110 |
| }, |
| { |
| "epoch": 78.0, |
| "grad_norm": 0.20448268949985504, |
| "learning_rate": 6.238000000000001e-05, |
| "loss": 0.0072, |
| "step": 3120 |
| }, |
| { |
| "epoch": 78.25, |
| "grad_norm": 0.18967969715595245, |
| "learning_rate": 6.258e-05, |
| "loss": 0.008, |
| "step": 3130 |
| }, |
| { |
| "epoch": 78.5, |
| "grad_norm": 0.15069612860679626, |
| "learning_rate": 6.278e-05, |
| "loss": 0.0078, |
| "step": 3140 |
| }, |
| { |
| "epoch": 78.75, |
| "grad_norm": 0.18829141557216644, |
| "learning_rate": 6.298000000000001e-05, |
| "loss": 0.0075, |
| "step": 3150 |
| }, |
| { |
| "epoch": 79.0, |
| "grad_norm": 0.25839942693710327, |
| "learning_rate": 6.318e-05, |
| "loss": 0.0071, |
| "step": 3160 |
| }, |
| { |
| "epoch": 79.25, |
| "grad_norm": 0.23017530143260956, |
| "learning_rate": 6.338e-05, |
| "loss": 0.0074, |
| "step": 3170 |
| }, |
| { |
| "epoch": 79.5, |
| "grad_norm": 0.21601979434490204, |
| "learning_rate": 6.358000000000001e-05, |
| "loss": 0.0085, |
| "step": 3180 |
| }, |
| { |
| "epoch": 79.75, |
| "grad_norm": 0.16384194791316986, |
| "learning_rate": 6.378e-05, |
| "loss": 0.0082, |
| "step": 3190 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.18998737633228302, |
| "learning_rate": 6.398000000000001e-05, |
| "loss": 0.0083, |
| "step": 3200 |
| }, |
| { |
| "epoch": 80.25, |
| "grad_norm": 0.16958178579807281, |
| "learning_rate": 6.418000000000001e-05, |
| "loss": 0.0074, |
| "step": 3210 |
| }, |
| { |
| "epoch": 80.5, |
| "grad_norm": 0.17526504397392273, |
| "learning_rate": 6.438e-05, |
| "loss": 0.0078, |
| "step": 3220 |
| }, |
| { |
| "epoch": 80.75, |
| "grad_norm": 0.1629568487405777, |
| "learning_rate": 6.458000000000001e-05, |
| "loss": 0.007, |
| "step": 3230 |
| }, |
| { |
| "epoch": 81.0, |
| "grad_norm": 0.14959284663200378, |
| "learning_rate": 6.478000000000001e-05, |
| "loss": 0.0079, |
| "step": 3240 |
| }, |
| { |
| "epoch": 81.25, |
| "grad_norm": 0.12923310697078705, |
| "learning_rate": 6.498e-05, |
| "loss": 0.0074, |
| "step": 3250 |
| }, |
| { |
| "epoch": 81.5, |
| "grad_norm": 0.24613654613494873, |
| "learning_rate": 6.518000000000001e-05, |
| "loss": 0.0077, |
| "step": 3260 |
| }, |
| { |
| "epoch": 81.75, |
| "grad_norm": 0.248945415019989, |
| "learning_rate": 6.538000000000001e-05, |
| "loss": 0.0078, |
| "step": 3270 |
| }, |
| { |
| "epoch": 82.0, |
| "grad_norm": 0.2111428678035736, |
| "learning_rate": 6.558e-05, |
| "loss": 0.0078, |
| "step": 3280 |
| }, |
| { |
| "epoch": 82.25, |
| "grad_norm": 0.1845024675130844, |
| "learning_rate": 6.578000000000001e-05, |
| "loss": 0.007, |
| "step": 3290 |
| }, |
| { |
| "epoch": 82.5, |
| "grad_norm": 0.16099268198013306, |
| "learning_rate": 6.598e-05, |
| "loss": 0.0074, |
| "step": 3300 |
| }, |
| { |
| "epoch": 82.75, |
| "grad_norm": 0.16541269421577454, |
| "learning_rate": 6.618e-05, |
| "loss": 0.0071, |
| "step": 3310 |
| }, |
| { |
| "epoch": 83.0, |
| "grad_norm": 0.1501871943473816, |
| "learning_rate": 6.638e-05, |
| "loss": 0.0073, |
| "step": 3320 |
| }, |
| { |
| "epoch": 83.25, |
| "grad_norm": 0.17568759620189667, |
| "learning_rate": 6.658e-05, |
| "loss": 0.0083, |
| "step": 3330 |
| }, |
| { |
| "epoch": 83.5, |
| "grad_norm": 0.209712415933609, |
| "learning_rate": 6.678e-05, |
| "loss": 0.0077, |
| "step": 3340 |
| }, |
| { |
| "epoch": 83.75, |
| "grad_norm": 0.23022456467151642, |
| "learning_rate": 6.698e-05, |
| "loss": 0.0075, |
| "step": 3350 |
| }, |
| { |
| "epoch": 84.0, |
| "grad_norm": 0.19351983070373535, |
| "learning_rate": 6.718e-05, |
| "loss": 0.0084, |
| "step": 3360 |
| }, |
| { |
| "epoch": 84.25, |
| "grad_norm": 0.21300102770328522, |
| "learning_rate": 6.738e-05, |
| "loss": 0.0075, |
| "step": 3370 |
| }, |
| { |
| "epoch": 84.5, |
| "grad_norm": 0.11744840443134308, |
| "learning_rate": 6.758e-05, |
| "loss": 0.0071, |
| "step": 3380 |
| }, |
| { |
| "epoch": 84.75, |
| "grad_norm": 0.18190492689609528, |
| "learning_rate": 6.778e-05, |
| "loss": 0.0076, |
| "step": 3390 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 0.17487181723117828, |
| "learning_rate": 6.798e-05, |
| "loss": 0.008, |
| "step": 3400 |
| }, |
| { |
| "epoch": 85.25, |
| "grad_norm": 0.09983476251363754, |
| "learning_rate": 6.818e-05, |
| "loss": 0.0068, |
| "step": 3410 |
| }, |
| { |
| "epoch": 85.5, |
| "grad_norm": 0.16663005948066711, |
| "learning_rate": 6.838e-05, |
| "loss": 0.0082, |
| "step": 3420 |
| }, |
| { |
| "epoch": 85.75, |
| "grad_norm": 0.20401281118392944, |
| "learning_rate": 6.858e-05, |
| "loss": 0.0076, |
| "step": 3430 |
| }, |
| { |
| "epoch": 86.0, |
| "grad_norm": 0.2125725895166397, |
| "learning_rate": 6.878e-05, |
| "loss": 0.0086, |
| "step": 3440 |
| }, |
| { |
| "epoch": 86.25, |
| "grad_norm": 0.14450369775295258, |
| "learning_rate": 6.898e-05, |
| "loss": 0.0065, |
| "step": 3450 |
| }, |
| { |
| "epoch": 86.5, |
| "grad_norm": 0.1645418107509613, |
| "learning_rate": 6.918e-05, |
| "loss": 0.0078, |
| "step": 3460 |
| }, |
| { |
| "epoch": 86.75, |
| "grad_norm": 0.15646201372146606, |
| "learning_rate": 6.938e-05, |
| "loss": 0.0073, |
| "step": 3470 |
| }, |
| { |
| "epoch": 87.0, |
| "grad_norm": 0.17534413933753967, |
| "learning_rate": 6.958e-05, |
| "loss": 0.0076, |
| "step": 3480 |
| }, |
| { |
| "epoch": 87.25, |
| "grad_norm": 0.13481944799423218, |
| "learning_rate": 6.978e-05, |
| "loss": 0.007, |
| "step": 3490 |
| }, |
| { |
| "epoch": 87.5, |
| "grad_norm": 0.10460406541824341, |
| "learning_rate": 6.998e-05, |
| "loss": 0.0074, |
| "step": 3500 |
| }, |
| { |
| "epoch": 87.75, |
| "grad_norm": 0.1374213844537735, |
| "learning_rate": 7.018e-05, |
| "loss": 0.0069, |
| "step": 3510 |
| }, |
| { |
| "epoch": 88.0, |
| "grad_norm": 0.12865151464939117, |
| "learning_rate": 7.038e-05, |
| "loss": 0.0063, |
| "step": 3520 |
| }, |
| { |
| "epoch": 88.25, |
| "grad_norm": 0.17576292157173157, |
| "learning_rate": 7.058e-05, |
| "loss": 0.007, |
| "step": 3530 |
| }, |
| { |
| "epoch": 88.5, |
| "grad_norm": 0.18292437493801117, |
| "learning_rate": 7.078e-05, |
| "loss": 0.0068, |
| "step": 3540 |
| }, |
| { |
| "epoch": 88.75, |
| "grad_norm": 0.18838587403297424, |
| "learning_rate": 7.098e-05, |
| "loss": 0.0075, |
| "step": 3550 |
| }, |
| { |
| "epoch": 89.0, |
| "grad_norm": 0.1894351989030838, |
| "learning_rate": 7.118e-05, |
| "loss": 0.0081, |
| "step": 3560 |
| }, |
| { |
| "epoch": 89.25, |
| "grad_norm": 0.14305658638477325, |
| "learning_rate": 7.138e-05, |
| "loss": 0.0072, |
| "step": 3570 |
| }, |
| { |
| "epoch": 89.5, |
| "grad_norm": 0.1975056380033493, |
| "learning_rate": 7.158e-05, |
| "loss": 0.0079, |
| "step": 3580 |
| }, |
| { |
| "epoch": 89.75, |
| "grad_norm": 0.11977282166481018, |
| "learning_rate": 7.178000000000001e-05, |
| "loss": 0.0067, |
| "step": 3590 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.1803884357213974, |
| "learning_rate": 7.198e-05, |
| "loss": 0.0071, |
| "step": 3600 |
| }, |
| { |
| "epoch": 90.25, |
| "grad_norm": 0.17560359835624695, |
| "learning_rate": 7.218e-05, |
| "loss": 0.0067, |
| "step": 3610 |
| }, |
| { |
| "epoch": 90.5, |
| "grad_norm": 0.12173809111118317, |
| "learning_rate": 7.238000000000001e-05, |
| "loss": 0.0068, |
| "step": 3620 |
| }, |
| { |
| "epoch": 90.75, |
| "grad_norm": 0.1920517534017563, |
| "learning_rate": 7.258e-05, |
| "loss": 0.0073, |
| "step": 3630 |
| }, |
| { |
| "epoch": 91.0, |
| "grad_norm": 0.18857932090759277, |
| "learning_rate": 7.278e-05, |
| "loss": 0.0074, |
| "step": 3640 |
| }, |
| { |
| "epoch": 91.25, |
| "grad_norm": 0.196843221783638, |
| "learning_rate": 7.298000000000001e-05, |
| "loss": 0.0066, |
| "step": 3650 |
| }, |
| { |
| "epoch": 91.5, |
| "grad_norm": 0.16514630615711212, |
| "learning_rate": 7.318e-05, |
| "loss": 0.0069, |
| "step": 3660 |
| }, |
| { |
| "epoch": 91.75, |
| "grad_norm": 0.14540424942970276, |
| "learning_rate": 7.338e-05, |
| "loss": 0.0066, |
| "step": 3670 |
| }, |
| { |
| "epoch": 92.0, |
| "grad_norm": 0.1184212788939476, |
| "learning_rate": 7.358000000000001e-05, |
| "loss": 0.0066, |
| "step": 3680 |
| }, |
| { |
| "epoch": 92.25, |
| "grad_norm": 0.15860192477703094, |
| "learning_rate": 7.378e-05, |
| "loss": 0.0072, |
| "step": 3690 |
| }, |
| { |
| "epoch": 92.5, |
| "grad_norm": 0.12642768025398254, |
| "learning_rate": 7.398e-05, |
| "loss": 0.0067, |
| "step": 3700 |
| }, |
| { |
| "epoch": 92.75, |
| "grad_norm": 0.14021170139312744, |
| "learning_rate": 7.418000000000001e-05, |
| "loss": 0.0064, |
| "step": 3710 |
| }, |
| { |
| "epoch": 93.0, |
| "grad_norm": 0.16662588715553284, |
| "learning_rate": 7.438e-05, |
| "loss": 0.0065, |
| "step": 3720 |
| }, |
| { |
| "epoch": 93.25, |
| "grad_norm": 0.1377324014902115, |
| "learning_rate": 7.458000000000001e-05, |
| "loss": 0.0073, |
| "step": 3730 |
| }, |
| { |
| "epoch": 93.5, |
| "grad_norm": 0.14351031184196472, |
| "learning_rate": 7.478e-05, |
| "loss": 0.0068, |
| "step": 3740 |
| }, |
| { |
| "epoch": 93.75, |
| "grad_norm": 0.14035643637180328, |
| "learning_rate": 7.498e-05, |
| "loss": 0.0065, |
| "step": 3750 |
| }, |
| { |
| "epoch": 94.0, |
| "grad_norm": 0.17440786957740784, |
| "learning_rate": 7.518000000000001e-05, |
| "loss": 0.007, |
| "step": 3760 |
| }, |
| { |
| "epoch": 94.25, |
| "grad_norm": 0.17953602969646454, |
| "learning_rate": 7.538e-05, |
| "loss": 0.0081, |
| "step": 3770 |
| }, |
| { |
| "epoch": 94.5, |
| "grad_norm": 0.19098593294620514, |
| "learning_rate": 7.558e-05, |
| "loss": 0.0078, |
| "step": 3780 |
| }, |
| { |
| "epoch": 94.75, |
| "grad_norm": 0.1300899088382721, |
| "learning_rate": 7.578000000000001e-05, |
| "loss": 0.0063, |
| "step": 3790 |
| }, |
| { |
| "epoch": 95.0, |
| "grad_norm": 0.2068023532629013, |
| "learning_rate": 7.598e-05, |
| "loss": 0.0073, |
| "step": 3800 |
| }, |
| { |
| "epoch": 95.25, |
| "grad_norm": 0.17585402727127075, |
| "learning_rate": 7.618e-05, |
| "loss": 0.0069, |
| "step": 3810 |
| }, |
| { |
| "epoch": 95.5, |
| "grad_norm": 0.22300763428211212, |
| "learning_rate": 7.638000000000001e-05, |
| "loss": 0.0075, |
| "step": 3820 |
| }, |
| { |
| "epoch": 95.75, |
| "grad_norm": 0.185755655169487, |
| "learning_rate": 7.658e-05, |
| "loss": 0.0068, |
| "step": 3830 |
| }, |
| { |
| "epoch": 96.0, |
| "grad_norm": 0.1492988020181656, |
| "learning_rate": 7.678000000000001e-05, |
| "loss": 0.0071, |
| "step": 3840 |
| }, |
| { |
| "epoch": 96.25, |
| "grad_norm": 0.15368859469890594, |
| "learning_rate": 7.698000000000001e-05, |
| "loss": 0.0077, |
| "step": 3850 |
| }, |
| { |
| "epoch": 96.5, |
| "grad_norm": 0.19075649976730347, |
| "learning_rate": 7.718e-05, |
| "loss": 0.0075, |
| "step": 3860 |
| }, |
| { |
| "epoch": 96.75, |
| "grad_norm": 0.13382676243782043, |
| "learning_rate": 7.738000000000001e-05, |
| "loss": 0.0066, |
| "step": 3870 |
| }, |
| { |
| "epoch": 97.0, |
| "grad_norm": 0.16297952830791473, |
| "learning_rate": 7.758000000000001e-05, |
| "loss": 0.0077, |
| "step": 3880 |
| }, |
| { |
| "epoch": 97.25, |
| "grad_norm": 0.12056317180395126, |
| "learning_rate": 7.778e-05, |
| "loss": 0.0074, |
| "step": 3890 |
| }, |
| { |
| "epoch": 97.5, |
| "grad_norm": 0.17979000508785248, |
| "learning_rate": 7.798000000000001e-05, |
| "loss": 0.0073, |
| "step": 3900 |
| }, |
| { |
| "epoch": 97.75, |
| "grad_norm": 0.18691030144691467, |
| "learning_rate": 7.818000000000001e-05, |
| "loss": 0.0064, |
| "step": 3910 |
| }, |
| { |
| "epoch": 98.0, |
| "grad_norm": 0.1338334083557129, |
| "learning_rate": 7.838e-05, |
| "loss": 0.0067, |
| "step": 3920 |
| }, |
| { |
| "epoch": 98.25, |
| "grad_norm": 0.1993681937456131, |
| "learning_rate": 7.858000000000001e-05, |
| "loss": 0.0068, |
| "step": 3930 |
| }, |
| { |
| "epoch": 98.5, |
| "grad_norm": 0.14984053373336792, |
| "learning_rate": 7.878e-05, |
| "loss": 0.0072, |
| "step": 3940 |
| }, |
| { |
| "epoch": 98.75, |
| "grad_norm": 0.15467676520347595, |
| "learning_rate": 7.897999999999999e-05, |
| "loss": 0.0075, |
| "step": 3950 |
| }, |
| { |
| "epoch": 99.0, |
| "grad_norm": 0.13584424555301666, |
| "learning_rate": 7.918e-05, |
| "loss": 0.0067, |
| "step": 3960 |
| }, |
| { |
| "epoch": 99.25, |
| "grad_norm": 0.10644155740737915, |
| "learning_rate": 7.938e-05, |
| "loss": 0.0072, |
| "step": 3970 |
| }, |
| { |
| "epoch": 99.5, |
| "grad_norm": 0.1576027125120163, |
| "learning_rate": 7.958e-05, |
| "loss": 0.0068, |
| "step": 3980 |
| }, |
| { |
| "epoch": 99.75, |
| "grad_norm": 0.12459024786949158, |
| "learning_rate": 7.978e-05, |
| "loss": 0.0063, |
| "step": 3990 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.1432129591703415, |
| "learning_rate": 7.998e-05, |
| "loss": 0.007, |
| "step": 4000 |
| }, |
| { |
| "epoch": 100.25, |
| "grad_norm": 0.13156336545944214, |
| "learning_rate": 8.018e-05, |
| "loss": 0.0063, |
| "step": 4010 |
| }, |
| { |
| "epoch": 100.5, |
| "grad_norm": 0.19238434731960297, |
| "learning_rate": 8.038e-05, |
| "loss": 0.0067, |
| "step": 4020 |
| }, |
| { |
| "epoch": 100.75, |
| "grad_norm": 0.14078587293624878, |
| "learning_rate": 8.058e-05, |
| "loss": 0.0065, |
| "step": 4030 |
| }, |
| { |
| "epoch": 101.0, |
| "grad_norm": 0.13242961466312408, |
| "learning_rate": 8.078e-05, |
| "loss": 0.0065, |
| "step": 4040 |
| }, |
| { |
| "epoch": 101.25, |
| "grad_norm": 0.14315347373485565, |
| "learning_rate": 8.098e-05, |
| "loss": 0.0071, |
| "step": 4050 |
| }, |
| { |
| "epoch": 101.5, |
| "grad_norm": 0.17269261181354523, |
| "learning_rate": 8.118e-05, |
| "loss": 0.0064, |
| "step": 4060 |
| }, |
| { |
| "epoch": 101.75, |
| "grad_norm": 0.12596949934959412, |
| "learning_rate": 8.138e-05, |
| "loss": 0.0075, |
| "step": 4070 |
| }, |
| { |
| "epoch": 102.0, |
| "grad_norm": 0.14768068492412567, |
| "learning_rate": 8.158e-05, |
| "loss": 0.0076, |
| "step": 4080 |
| }, |
| { |
| "epoch": 102.25, |
| "grad_norm": 0.16106919944286346, |
| "learning_rate": 8.178e-05, |
| "loss": 0.0068, |
| "step": 4090 |
| }, |
| { |
| "epoch": 102.5, |
| "grad_norm": 0.11802563816308975, |
| "learning_rate": 8.198e-05, |
| "loss": 0.0068, |
| "step": 4100 |
| }, |
| { |
| "epoch": 102.75, |
| "grad_norm": 0.14049091935157776, |
| "learning_rate": 8.218e-05, |
| "loss": 0.0064, |
| "step": 4110 |
| }, |
| { |
| "epoch": 103.0, |
| "grad_norm": 0.1250571757555008, |
| "learning_rate": 8.238000000000001e-05, |
| "loss": 0.0065, |
| "step": 4120 |
| }, |
| { |
| "epoch": 103.25, |
| "grad_norm": 0.1342255175113678, |
| "learning_rate": 8.258e-05, |
| "loss": 0.0063, |
| "step": 4130 |
| }, |
| { |
| "epoch": 103.5, |
| "grad_norm": 0.1357700079679489, |
| "learning_rate": 8.278e-05, |
| "loss": 0.006, |
| "step": 4140 |
| }, |
| { |
| "epoch": 103.75, |
| "grad_norm": 0.1375548541545868, |
| "learning_rate": 8.298000000000001e-05, |
| "loss": 0.0073, |
| "step": 4150 |
| }, |
| { |
| "epoch": 104.0, |
| "grad_norm": 0.12512585520744324, |
| "learning_rate": 8.318e-05, |
| "loss": 0.0065, |
| "step": 4160 |
| }, |
| { |
| "epoch": 104.25, |
| "grad_norm": 0.1292533129453659, |
| "learning_rate": 8.338e-05, |
| "loss": 0.0065, |
| "step": 4170 |
| }, |
| { |
| "epoch": 104.5, |
| "grad_norm": 0.12111157923936844, |
| "learning_rate": 8.358e-05, |
| "loss": 0.0071, |
| "step": 4180 |
| }, |
| { |
| "epoch": 104.75, |
| "grad_norm": 0.17220772802829742, |
| "learning_rate": 8.378e-05, |
| "loss": 0.0065, |
| "step": 4190 |
| }, |
| { |
| "epoch": 105.0, |
| "grad_norm": 0.14518342912197113, |
| "learning_rate": 8.398e-05, |
| "loss": 0.0067, |
| "step": 4200 |
| }, |
| { |
| "epoch": 105.25, |
| "grad_norm": 0.16509418189525604, |
| "learning_rate": 8.418e-05, |
| "loss": 0.0072, |
| "step": 4210 |
| }, |
| { |
| "epoch": 105.5, |
| "grad_norm": 0.13074707984924316, |
| "learning_rate": 8.438e-05, |
| "loss": 0.0063, |
| "step": 4220 |
| }, |
| { |
| "epoch": 105.75, |
| "grad_norm": 0.1503017544746399, |
| "learning_rate": 8.458e-05, |
| "loss": 0.0065, |
| "step": 4230 |
| }, |
| { |
| "epoch": 106.0, |
| "grad_norm": 0.17117547988891602, |
| "learning_rate": 8.478e-05, |
| "loss": 0.0058, |
| "step": 4240 |
| }, |
| { |
| "epoch": 106.25, |
| "grad_norm": 0.28769177198410034, |
| "learning_rate": 8.498e-05, |
| "loss": 0.0071, |
| "step": 4250 |
| }, |
| { |
| "epoch": 106.5, |
| "grad_norm": 0.18290570378303528, |
| "learning_rate": 8.518000000000001e-05, |
| "loss": 0.0069, |
| "step": 4260 |
| }, |
| { |
| "epoch": 106.75, |
| "grad_norm": 0.2097172886133194, |
| "learning_rate": 8.538e-05, |
| "loss": 0.0065, |
| "step": 4270 |
| }, |
| { |
| "epoch": 107.0, |
| "grad_norm": 0.23110030591487885, |
| "learning_rate": 8.558e-05, |
| "loss": 0.0066, |
| "step": 4280 |
| }, |
| { |
| "epoch": 107.25, |
| "grad_norm": 0.21445296704769135, |
| "learning_rate": 8.578000000000001e-05, |
| "loss": 0.0071, |
| "step": 4290 |
| }, |
| { |
| "epoch": 107.5, |
| "grad_norm": 0.15120886266231537, |
| "learning_rate": 8.598e-05, |
| "loss": 0.0067, |
| "step": 4300 |
| }, |
| { |
| "epoch": 107.75, |
| "grad_norm": 0.21303877234458923, |
| "learning_rate": 8.618e-05, |
| "loss": 0.0068, |
| "step": 4310 |
| }, |
| { |
| "epoch": 108.0, |
| "grad_norm": 0.1451047956943512, |
| "learning_rate": 8.638000000000001e-05, |
| "loss": 0.0061, |
| "step": 4320 |
| }, |
| { |
| "epoch": 108.25, |
| "grad_norm": 0.14858797192573547, |
| "learning_rate": 8.658e-05, |
| "loss": 0.0065, |
| "step": 4330 |
| }, |
| { |
| "epoch": 108.5, |
| "grad_norm": 0.12267820537090302, |
| "learning_rate": 8.678e-05, |
| "loss": 0.0058, |
| "step": 4340 |
| }, |
| { |
| "epoch": 108.75, |
| "grad_norm": 0.14928346872329712, |
| "learning_rate": 8.698000000000001e-05, |
| "loss": 0.0073, |
| "step": 4350 |
| }, |
| { |
| "epoch": 109.0, |
| "grad_norm": 0.1512640118598938, |
| "learning_rate": 8.718e-05, |
| "loss": 0.0055, |
| "step": 4360 |
| }, |
| { |
| "epoch": 109.25, |
| "grad_norm": 0.13059866428375244, |
| "learning_rate": 8.738000000000001e-05, |
| "loss": 0.0062, |
| "step": 4370 |
| }, |
| { |
| "epoch": 109.5, |
| "grad_norm": 0.12359509617090225, |
| "learning_rate": 8.758000000000001e-05, |
| "loss": 0.0063, |
| "step": 4380 |
| }, |
| { |
| "epoch": 109.75, |
| "grad_norm": 0.146541029214859, |
| "learning_rate": 8.778e-05, |
| "loss": 0.0069, |
| "step": 4390 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 0.1751328557729721, |
| "learning_rate": 8.798000000000001e-05, |
| "loss": 0.0072, |
| "step": 4400 |
| }, |
| { |
| "epoch": 110.25, |
| "grad_norm": 0.2347930371761322, |
| "learning_rate": 8.818000000000001e-05, |
| "loss": 0.0065, |
| "step": 4410 |
| }, |
| { |
| "epoch": 110.5, |
| "grad_norm": 0.18729887902736664, |
| "learning_rate": 8.838e-05, |
| "loss": 0.0064, |
| "step": 4420 |
| }, |
| { |
| "epoch": 110.75, |
| "grad_norm": 0.11705614626407623, |
| "learning_rate": 8.858000000000001e-05, |
| "loss": 0.0063, |
| "step": 4430 |
| }, |
| { |
| "epoch": 111.0, |
| "grad_norm": 0.1382816731929779, |
| "learning_rate": 8.878000000000001e-05, |
| "loss": 0.0057, |
| "step": 4440 |
| }, |
| { |
| "epoch": 111.25, |
| "grad_norm": 0.15040716528892517, |
| "learning_rate": 8.898e-05, |
| "loss": 0.0059, |
| "step": 4450 |
| }, |
| { |
| "epoch": 111.5, |
| "grad_norm": 0.15252092480659485, |
| "learning_rate": 8.918000000000001e-05, |
| "loss": 0.0065, |
| "step": 4460 |
| }, |
| { |
| "epoch": 111.75, |
| "grad_norm": 0.15340879559516907, |
| "learning_rate": 8.938e-05, |
| "loss": 0.0063, |
| "step": 4470 |
| }, |
| { |
| "epoch": 112.0, |
| "grad_norm": 0.13012273609638214, |
| "learning_rate": 8.958e-05, |
| "loss": 0.0064, |
| "step": 4480 |
| }, |
| { |
| "epoch": 112.25, |
| "grad_norm": 0.14281576871871948, |
| "learning_rate": 8.978000000000001e-05, |
| "loss": 0.0058, |
| "step": 4490 |
| }, |
| { |
| "epoch": 112.5, |
| "grad_norm": 0.14264865219593048, |
| "learning_rate": 8.998e-05, |
| "loss": 0.0062, |
| "step": 4500 |
| }, |
| { |
| "epoch": 112.75, |
| "grad_norm": 0.1958669275045395, |
| "learning_rate": 9.018000000000001e-05, |
| "loss": 0.0066, |
| "step": 4510 |
| }, |
| { |
| "epoch": 113.0, |
| "grad_norm": 0.10879232734441757, |
| "learning_rate": 9.038000000000001e-05, |
| "loss": 0.0065, |
| "step": 4520 |
| }, |
| { |
| "epoch": 113.25, |
| "grad_norm": 0.2231931984424591, |
| "learning_rate": 9.058e-05, |
| "loss": 0.0062, |
| "step": 4530 |
| }, |
| { |
| "epoch": 113.5, |
| "grad_norm": 0.16552019119262695, |
| "learning_rate": 9.078000000000001e-05, |
| "loss": 0.0066, |
| "step": 4540 |
| }, |
| { |
| "epoch": 113.75, |
| "grad_norm": 0.15375559031963348, |
| "learning_rate": 9.098000000000001e-05, |
| "loss": 0.0072, |
| "step": 4550 |
| }, |
| { |
| "epoch": 114.0, |
| "grad_norm": 0.20676645636558533, |
| "learning_rate": 9.118e-05, |
| "loss": 0.0072, |
| "step": 4560 |
| }, |
| { |
| "epoch": 114.25, |
| "grad_norm": 0.15286394953727722, |
| "learning_rate": 9.138e-05, |
| "loss": 0.0069, |
| "step": 4570 |
| }, |
| { |
| "epoch": 114.5, |
| "grad_norm": 0.15226341784000397, |
| "learning_rate": 9.158e-05, |
| "loss": 0.0066, |
| "step": 4580 |
| }, |
| { |
| "epoch": 114.75, |
| "grad_norm": 0.12615948915481567, |
| "learning_rate": 9.178e-05, |
| "loss": 0.0066, |
| "step": 4590 |
| }, |
| { |
| "epoch": 115.0, |
| "grad_norm": 0.1715383529663086, |
| "learning_rate": 9.198e-05, |
| "loss": 0.0072, |
| "step": 4600 |
| }, |
| { |
| "epoch": 115.25, |
| "grad_norm": 0.13157600164413452, |
| "learning_rate": 9.218e-05, |
| "loss": 0.0071, |
| "step": 4610 |
| }, |
| { |
| "epoch": 115.5, |
| "grad_norm": 0.16462917625904083, |
| "learning_rate": 9.238e-05, |
| "loss": 0.0067, |
| "step": 4620 |
| }, |
| { |
| "epoch": 115.75, |
| "grad_norm": 0.2137746512889862, |
| "learning_rate": 9.258e-05, |
| "loss": 0.0072, |
| "step": 4630 |
| }, |
| { |
| "epoch": 116.0, |
| "grad_norm": 0.14793118834495544, |
| "learning_rate": 9.278e-05, |
| "loss": 0.0067, |
| "step": 4640 |
| }, |
| { |
| "epoch": 116.25, |
| "grad_norm": 0.1574595421552658, |
| "learning_rate": 9.298e-05, |
| "loss": 0.007, |
| "step": 4650 |
| }, |
| { |
| "epoch": 116.5, |
| "grad_norm": 0.12151467055082321, |
| "learning_rate": 9.318e-05, |
| "loss": 0.0065, |
| "step": 4660 |
| }, |
| { |
| "epoch": 116.75, |
| "grad_norm": 0.18424446880817413, |
| "learning_rate": 9.338e-05, |
| "loss": 0.0056, |
| "step": 4670 |
| }, |
| { |
| "epoch": 117.0, |
| "grad_norm": 0.181967630982399, |
| "learning_rate": 9.358e-05, |
| "loss": 0.0064, |
| "step": 4680 |
| }, |
| { |
| "epoch": 117.25, |
| "grad_norm": 0.15449753403663635, |
| "learning_rate": 9.378e-05, |
| "loss": 0.0062, |
| "step": 4690 |
| }, |
| { |
| "epoch": 117.5, |
| "grad_norm": 0.17288358509540558, |
| "learning_rate": 9.398e-05, |
| "loss": 0.0065, |
| "step": 4700 |
| }, |
| { |
| "epoch": 117.75, |
| "grad_norm": 0.22410866618156433, |
| "learning_rate": 9.418e-05, |
| "loss": 0.0065, |
| "step": 4710 |
| }, |
| { |
| "epoch": 118.0, |
| "grad_norm": 0.1840396225452423, |
| "learning_rate": 9.438e-05, |
| "loss": 0.0063, |
| "step": 4720 |
| }, |
| { |
| "epoch": 118.25, |
| "grad_norm": 0.1368318498134613, |
| "learning_rate": 9.458e-05, |
| "loss": 0.0062, |
| "step": 4730 |
| }, |
| { |
| "epoch": 118.5, |
| "grad_norm": 0.19932417571544647, |
| "learning_rate": 9.478e-05, |
| "loss": 0.006, |
| "step": 4740 |
| }, |
| { |
| "epoch": 118.75, |
| "grad_norm": 0.13300760090351105, |
| "learning_rate": 9.498e-05, |
| "loss": 0.0066, |
| "step": 4750 |
| }, |
| { |
| "epoch": 119.0, |
| "grad_norm": 0.17009101808071136, |
| "learning_rate": 9.518000000000001e-05, |
| "loss": 0.0063, |
| "step": 4760 |
| }, |
| { |
| "epoch": 119.25, |
| "grad_norm": 0.14035862684249878, |
| "learning_rate": 9.538e-05, |
| "loss": 0.0073, |
| "step": 4770 |
| }, |
| { |
| "epoch": 119.5, |
| "grad_norm": 0.15153725445270538, |
| "learning_rate": 9.558e-05, |
| "loss": 0.0076, |
| "step": 4780 |
| }, |
| { |
| "epoch": 119.75, |
| "grad_norm": 0.16562645137310028, |
| "learning_rate": 9.578000000000001e-05, |
| "loss": 0.007, |
| "step": 4790 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 0.19395475089550018, |
| "learning_rate": 9.598e-05, |
| "loss": 0.0067, |
| "step": 4800 |
| }, |
| { |
| "epoch": 120.25, |
| "grad_norm": 0.19338366389274597, |
| "learning_rate": 9.618e-05, |
| "loss": 0.0063, |
| "step": 4810 |
| }, |
| { |
| "epoch": 120.5, |
| "grad_norm": 0.18101127445697784, |
| "learning_rate": 9.638000000000001e-05, |
| "loss": 0.0071, |
| "step": 4820 |
| }, |
| { |
| "epoch": 120.75, |
| "grad_norm": 0.15528017282485962, |
| "learning_rate": 9.658e-05, |
| "loss": 0.0068, |
| "step": 4830 |
| }, |
| { |
| "epoch": 121.0, |
| "grad_norm": 0.15108326077461243, |
| "learning_rate": 9.678e-05, |
| "loss": 0.0058, |
| "step": 4840 |
| }, |
| { |
| "epoch": 121.25, |
| "grad_norm": 0.14880536496639252, |
| "learning_rate": 9.698000000000001e-05, |
| "loss": 0.0076, |
| "step": 4850 |
| }, |
| { |
| "epoch": 121.5, |
| "grad_norm": 0.18922747671604156, |
| "learning_rate": 9.718e-05, |
| "loss": 0.0071, |
| "step": 4860 |
| }, |
| { |
| "epoch": 121.75, |
| "grad_norm": 0.15255926549434662, |
| "learning_rate": 9.738e-05, |
| "loss": 0.0055, |
| "step": 4870 |
| }, |
| { |
| "epoch": 122.0, |
| "grad_norm": 0.16512243449687958, |
| "learning_rate": 9.758000000000001e-05, |
| "loss": 0.006, |
| "step": 4880 |
| }, |
| { |
| "epoch": 122.25, |
| "grad_norm": 0.1157233789563179, |
| "learning_rate": 9.778e-05, |
| "loss": 0.006, |
| "step": 4890 |
| }, |
| { |
| "epoch": 122.5, |
| "grad_norm": 0.15160076320171356, |
| "learning_rate": 9.798000000000001e-05, |
| "loss": 0.0059, |
| "step": 4900 |
| }, |
| { |
| "epoch": 122.75, |
| "grad_norm": 0.13103358447551727, |
| "learning_rate": 9.818000000000001e-05, |
| "loss": 0.0058, |
| "step": 4910 |
| }, |
| { |
| "epoch": 123.0, |
| "grad_norm": 0.11951006203889847, |
| "learning_rate": 9.838e-05, |
| "loss": 0.0071, |
| "step": 4920 |
| }, |
| { |
| "epoch": 123.25, |
| "grad_norm": 0.15750989317893982, |
| "learning_rate": 9.858000000000001e-05, |
| "loss": 0.0053, |
| "step": 4930 |
| }, |
| { |
| "epoch": 123.5, |
| "grad_norm": 0.13397419452667236, |
| "learning_rate": 9.878e-05, |
| "loss": 0.0066, |
| "step": 4940 |
| }, |
| { |
| "epoch": 123.75, |
| "grad_norm": 0.15722912549972534, |
| "learning_rate": 9.898e-05, |
| "loss": 0.006, |
| "step": 4950 |
| }, |
| { |
| "epoch": 124.0, |
| "grad_norm": 0.1683134287595749, |
| "learning_rate": 9.918000000000001e-05, |
| "loss": 0.0061, |
| "step": 4960 |
| }, |
| { |
| "epoch": 124.25, |
| "grad_norm": 0.19436003267765045, |
| "learning_rate": 9.938e-05, |
| "loss": 0.0061, |
| "step": 4970 |
| }, |
| { |
| "epoch": 124.5, |
| "grad_norm": 0.17469929158687592, |
| "learning_rate": 9.958e-05, |
| "loss": 0.0065, |
| "step": 4980 |
| }, |
| { |
| "epoch": 124.75, |
| "grad_norm": 0.19975730776786804, |
| "learning_rate": 9.978000000000001e-05, |
| "loss": 0.0075, |
| "step": 4990 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 0.12145998328924179, |
| "learning_rate": 9.998e-05, |
| "loss": 0.0057, |
| "step": 5000 |
| }, |
| { |
| "epoch": 125.25, |
| "grad_norm": 0.12310753762722015, |
| "learning_rate": 9.999999778549045e-05, |
| "loss": 0.0057, |
| "step": 5010 |
| }, |
| { |
| "epoch": 125.5, |
| "grad_norm": 0.17047083377838135, |
| "learning_rate": 9.999999013039593e-05, |
| "loss": 0.0058, |
| "step": 5020 |
| }, |
| { |
| "epoch": 125.75, |
| "grad_norm": 0.1985018253326416, |
| "learning_rate": 9.999997700737766e-05, |
| "loss": 0.0061, |
| "step": 5030 |
| }, |
| { |
| "epoch": 126.0, |
| "grad_norm": 0.12021245807409286, |
| "learning_rate": 9.999995841643709e-05, |
| "loss": 0.0064, |
| "step": 5040 |
| }, |
| { |
| "epoch": 126.25, |
| "grad_norm": 0.08459141105413437, |
| "learning_rate": 9.999993435757623e-05, |
| "loss": 0.0061, |
| "step": 5050 |
| }, |
| { |
| "epoch": 126.5, |
| "grad_norm": 0.12869714200496674, |
| "learning_rate": 9.999990483079773e-05, |
| "loss": 0.0065, |
| "step": 5060 |
| }, |
| { |
| "epoch": 126.75, |
| "grad_norm": 0.10929016023874283, |
| "learning_rate": 9.999986983610481e-05, |
| "loss": 0.0059, |
| "step": 5070 |
| }, |
| { |
| "epoch": 127.0, |
| "grad_norm": 0.14434878528118134, |
| "learning_rate": 9.99998293735013e-05, |
| "loss": 0.0061, |
| "step": 5080 |
| }, |
| { |
| "epoch": 127.25, |
| "grad_norm": 0.17825250327587128, |
| "learning_rate": 9.999978344299161e-05, |
| "loss": 0.0061, |
| "step": 5090 |
| }, |
| { |
| "epoch": 127.5, |
| "grad_norm": 0.1418701559305191, |
| "learning_rate": 9.99997320445808e-05, |
| "loss": 0.0068, |
| "step": 5100 |
| }, |
| { |
| "epoch": 127.75, |
| "grad_norm": 0.13784301280975342, |
| "learning_rate": 9.999967517827444e-05, |
| "loss": 0.0058, |
| "step": 5110 |
| }, |
| { |
| "epoch": 128.0, |
| "grad_norm": 0.1485631763935089, |
| "learning_rate": 9.999961284407879e-05, |
| "loss": 0.0066, |
| "step": 5120 |
| }, |
| { |
| "epoch": 128.25, |
| "grad_norm": 0.13838165998458862, |
| "learning_rate": 9.999954504200067e-05, |
| "loss": 0.0053, |
| "step": 5130 |
| }, |
| { |
| "epoch": 128.5, |
| "grad_norm": 0.098316490650177, |
| "learning_rate": 9.999947177204744e-05, |
| "loss": 0.0062, |
| "step": 5140 |
| }, |
| { |
| "epoch": 128.75, |
| "grad_norm": 0.15371425449848175, |
| "learning_rate": 9.999939303422718e-05, |
| "loss": 0.0061, |
| "step": 5150 |
| }, |
| { |
| "epoch": 129.0, |
| "grad_norm": 0.12473967671394348, |
| "learning_rate": 9.999930882854847e-05, |
| "loss": 0.0058, |
| "step": 5160 |
| }, |
| { |
| "epoch": 129.25, |
| "grad_norm": 0.12960635125637054, |
| "learning_rate": 9.999921915502051e-05, |
| "loss": 0.0057, |
| "step": 5170 |
| }, |
| { |
| "epoch": 129.5, |
| "grad_norm": 0.14831651747226715, |
| "learning_rate": 9.99991240136531e-05, |
| "loss": 0.0073, |
| "step": 5180 |
| }, |
| { |
| "epoch": 129.75, |
| "grad_norm": 0.11215781420469284, |
| "learning_rate": 9.999902340445668e-05, |
| "loss": 0.0057, |
| "step": 5190 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 0.10951609164476395, |
| "learning_rate": 9.999891732744224e-05, |
| "loss": 0.0062, |
| "step": 5200 |
| }, |
| { |
| "epoch": 130.25, |
| "grad_norm": 0.12410330027341843, |
| "learning_rate": 9.999880578262135e-05, |
| "loss": 0.0063, |
| "step": 5210 |
| }, |
| { |
| "epoch": 130.5, |
| "grad_norm": 0.16753678023815155, |
| "learning_rate": 9.999868877000624e-05, |
| "loss": 0.0065, |
| "step": 5220 |
| }, |
| { |
| "epoch": 130.75, |
| "grad_norm": 0.1503169685602188, |
| "learning_rate": 9.99985662896097e-05, |
| "loss": 0.0068, |
| "step": 5230 |
| }, |
| { |
| "epoch": 131.0, |
| "grad_norm": 0.17394974827766418, |
| "learning_rate": 9.999843834144513e-05, |
| "loss": 0.0069, |
| "step": 5240 |
| }, |
| { |
| "epoch": 131.25, |
| "grad_norm": 0.1436404585838318, |
| "learning_rate": 9.99983049255265e-05, |
| "loss": 0.0066, |
| "step": 5250 |
| }, |
| { |
| "epoch": 131.5, |
| "grad_norm": 0.13780523836612701, |
| "learning_rate": 9.999816604186843e-05, |
| "loss": 0.0066, |
| "step": 5260 |
| }, |
| { |
| "epoch": 131.75, |
| "grad_norm": 0.11955001950263977, |
| "learning_rate": 9.999802169048609e-05, |
| "loss": 0.0058, |
| "step": 5270 |
| }, |
| { |
| "epoch": 132.0, |
| "grad_norm": 0.11023246496915817, |
| "learning_rate": 9.999787187139527e-05, |
| "loss": 0.0054, |
| "step": 5280 |
| }, |
| { |
| "epoch": 132.25, |
| "grad_norm": 0.13991482555866241, |
| "learning_rate": 9.999771658461234e-05, |
| "loss": 0.0059, |
| "step": 5290 |
| }, |
| { |
| "epoch": 132.5, |
| "grad_norm": 0.11659090220928192, |
| "learning_rate": 9.999755583015431e-05, |
| "loss": 0.0055, |
| "step": 5300 |
| }, |
| { |
| "epoch": 132.75, |
| "grad_norm": 0.1481342911720276, |
| "learning_rate": 9.999738960803874e-05, |
| "loss": 0.0059, |
| "step": 5310 |
| }, |
| { |
| "epoch": 133.0, |
| "grad_norm": 0.12136317044496536, |
| "learning_rate": 9.99972179182838e-05, |
| "loss": 0.0047, |
| "step": 5320 |
| }, |
| { |
| "epoch": 133.25, |
| "grad_norm": 0.12952932715415955, |
| "learning_rate": 9.99970407609083e-05, |
| "loss": 0.0059, |
| "step": 5330 |
| }, |
| { |
| "epoch": 133.5, |
| "grad_norm": 0.14714136719703674, |
| "learning_rate": 9.999685813593159e-05, |
| "loss": 0.006, |
| "step": 5340 |
| }, |
| { |
| "epoch": 133.75, |
| "grad_norm": 0.15123462677001953, |
| "learning_rate": 9.999667004337362e-05, |
| "loss": 0.0051, |
| "step": 5350 |
| }, |
| { |
| "epoch": 134.0, |
| "grad_norm": 0.17769788205623627, |
| "learning_rate": 9.9996476483255e-05, |
| "loss": 0.0059, |
| "step": 5360 |
| }, |
| { |
| "epoch": 134.25, |
| "grad_norm": 0.15275105834007263, |
| "learning_rate": 9.999627745559688e-05, |
| "loss": 0.0055, |
| "step": 5370 |
| }, |
| { |
| "epoch": 134.5, |
| "grad_norm": 0.16935890913009644, |
| "learning_rate": 9.999607296042101e-05, |
| "loss": 0.0059, |
| "step": 5380 |
| }, |
| { |
| "epoch": 134.75, |
| "grad_norm": 0.14174121618270874, |
| "learning_rate": 9.99958629977498e-05, |
| "loss": 0.0053, |
| "step": 5390 |
| }, |
| { |
| "epoch": 135.0, |
| "grad_norm": 0.1606847494840622, |
| "learning_rate": 9.999564756760615e-05, |
| "loss": 0.0055, |
| "step": 5400 |
| }, |
| { |
| "epoch": 135.25, |
| "grad_norm": 0.15848489105701447, |
| "learning_rate": 9.999542667001366e-05, |
| "loss": 0.0056, |
| "step": 5410 |
| }, |
| { |
| "epoch": 135.5, |
| "grad_norm": 0.1053905189037323, |
| "learning_rate": 9.999520030499647e-05, |
| "loss": 0.0052, |
| "step": 5420 |
| }, |
| { |
| "epoch": 135.75, |
| "grad_norm": 0.16355837881565094, |
| "learning_rate": 9.999496847257936e-05, |
| "loss": 0.0055, |
| "step": 5430 |
| }, |
| { |
| "epoch": 136.0, |
| "grad_norm": 0.18699996173381805, |
| "learning_rate": 9.999473117278764e-05, |
| "loss": 0.0059, |
| "step": 5440 |
| }, |
| { |
| "epoch": 136.25, |
| "grad_norm": 0.16889815032482147, |
| "learning_rate": 9.999448840564731e-05, |
| "loss": 0.0054, |
| "step": 5450 |
| }, |
| { |
| "epoch": 136.5, |
| "grad_norm": 0.15516693890094757, |
| "learning_rate": 9.999424017118488e-05, |
| "loss": 0.0057, |
| "step": 5460 |
| }, |
| { |
| "epoch": 136.75, |
| "grad_norm": 0.14914794266223907, |
| "learning_rate": 9.999398646942751e-05, |
| "loss": 0.0061, |
| "step": 5470 |
| }, |
| { |
| "epoch": 137.0, |
| "grad_norm": 0.17972686886787415, |
| "learning_rate": 9.999372730040296e-05, |
| "loss": 0.0054, |
| "step": 5480 |
| }, |
| { |
| "epoch": 137.25, |
| "grad_norm": 0.1406720131635666, |
| "learning_rate": 9.999346266413953e-05, |
| "loss": 0.0059, |
| "step": 5490 |
| }, |
| { |
| "epoch": 137.5, |
| "grad_norm": 0.1266246736049652, |
| "learning_rate": 9.99931925606662e-05, |
| "loss": 0.0061, |
| "step": 5500 |
| }, |
| { |
| "epoch": 137.75, |
| "grad_norm": 0.1418047994375229, |
| "learning_rate": 9.99929169900125e-05, |
| "loss": 0.0056, |
| "step": 5510 |
| }, |
| { |
| "epoch": 138.0, |
| "grad_norm": 0.1325012743473053, |
| "learning_rate": 9.999263595220855e-05, |
| "loss": 0.0051, |
| "step": 5520 |
| }, |
| { |
| "epoch": 138.25, |
| "grad_norm": 0.11973172426223755, |
| "learning_rate": 9.99923494472851e-05, |
| "loss": 0.0051, |
| "step": 5530 |
| }, |
| { |
| "epoch": 138.5, |
| "grad_norm": 0.1425570398569107, |
| "learning_rate": 9.999205747527348e-05, |
| "loss": 0.0063, |
| "step": 5540 |
| }, |
| { |
| "epoch": 138.75, |
| "grad_norm": 0.14493736624717712, |
| "learning_rate": 9.999176003620561e-05, |
| "loss": 0.0055, |
| "step": 5550 |
| }, |
| { |
| "epoch": 139.0, |
| "grad_norm": 0.10896685719490051, |
| "learning_rate": 9.999145713011405e-05, |
| "loss": 0.0056, |
| "step": 5560 |
| }, |
| { |
| "epoch": 139.25, |
| "grad_norm": 0.126507967710495, |
| "learning_rate": 9.999114875703186e-05, |
| "loss": 0.0052, |
| "step": 5570 |
| }, |
| { |
| "epoch": 139.5, |
| "grad_norm": 0.09839679300785065, |
| "learning_rate": 9.999083491699281e-05, |
| "loss": 0.0057, |
| "step": 5580 |
| }, |
| { |
| "epoch": 139.75, |
| "grad_norm": 0.10790673643350601, |
| "learning_rate": 9.999051561003123e-05, |
| "loss": 0.0057, |
| "step": 5590 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 0.11031424254179001, |
| "learning_rate": 9.999019083618202e-05, |
| "loss": 0.0057, |
| "step": 5600 |
| }, |
| { |
| "epoch": 140.25, |
| "grad_norm": 0.12621337175369263, |
| "learning_rate": 9.99898605954807e-05, |
| "loss": 0.0062, |
| "step": 5610 |
| }, |
| { |
| "epoch": 140.5, |
| "grad_norm": 0.09760873019695282, |
| "learning_rate": 9.998952488796338e-05, |
| "loss": 0.0057, |
| "step": 5620 |
| }, |
| { |
| "epoch": 140.75, |
| "grad_norm": 0.10049641132354736, |
| "learning_rate": 9.998918371366676e-05, |
| "loss": 0.0055, |
| "step": 5630 |
| }, |
| { |
| "epoch": 141.0, |
| "grad_norm": 0.11897911131381989, |
| "learning_rate": 9.99888370726282e-05, |
| "loss": 0.0052, |
| "step": 5640 |
| }, |
| { |
| "epoch": 141.25, |
| "grad_norm": 0.12692591547966003, |
| "learning_rate": 9.998848496488556e-05, |
| "loss": 0.0055, |
| "step": 5650 |
| }, |
| { |
| "epoch": 141.5, |
| "grad_norm": 0.09676604717969894, |
| "learning_rate": 9.998812739047736e-05, |
| "loss": 0.0061, |
| "step": 5660 |
| }, |
| { |
| "epoch": 141.75, |
| "grad_norm": 0.10841045528650284, |
| "learning_rate": 9.99877643494427e-05, |
| "loss": 0.0062, |
| "step": 5670 |
| }, |
| { |
| "epoch": 142.0, |
| "grad_norm": 0.13137775659561157, |
| "learning_rate": 9.998739584182128e-05, |
| "loss": 0.0065, |
| "step": 5680 |
| }, |
| { |
| "epoch": 142.25, |
| "grad_norm": 0.11271249502897263, |
| "learning_rate": 9.998702186765342e-05, |
| "loss": 0.0055, |
| "step": 5690 |
| }, |
| { |
| "epoch": 142.5, |
| "grad_norm": 0.12125517427921295, |
| "learning_rate": 9.998664242698e-05, |
| "loss": 0.0055, |
| "step": 5700 |
| }, |
| { |
| "epoch": 142.75, |
| "grad_norm": 0.15787778794765472, |
| "learning_rate": 9.998625751984251e-05, |
| "loss": 0.0048, |
| "step": 5710 |
| }, |
| { |
| "epoch": 143.0, |
| "grad_norm": 0.1311769336462021, |
| "learning_rate": 9.998586714628307e-05, |
| "loss": 0.0056, |
| "step": 5720 |
| }, |
| { |
| "epoch": 143.25, |
| "grad_norm": 0.1340465098619461, |
| "learning_rate": 9.998547130634432e-05, |
| "loss": 0.006, |
| "step": 5730 |
| }, |
| { |
| "epoch": 143.5, |
| "grad_norm": 0.11795949935913086, |
| "learning_rate": 9.99850700000696e-05, |
| "loss": 0.0057, |
| "step": 5740 |
| }, |
| { |
| "epoch": 143.75, |
| "grad_norm": 0.16333207488059998, |
| "learning_rate": 9.998466322750278e-05, |
| "loss": 0.0056, |
| "step": 5750 |
| }, |
| { |
| "epoch": 144.0, |
| "grad_norm": 0.19970041513442993, |
| "learning_rate": 9.998425098868834e-05, |
| "loss": 0.0057, |
| "step": 5760 |
| }, |
| { |
| "epoch": 144.25, |
| "grad_norm": 0.22557619214057922, |
| "learning_rate": 9.998383328367136e-05, |
| "loss": 0.0059, |
| "step": 5770 |
| }, |
| { |
| "epoch": 144.5, |
| "grad_norm": 0.22150106728076935, |
| "learning_rate": 9.99834101124975e-05, |
| "loss": 0.0062, |
| "step": 5780 |
| }, |
| { |
| "epoch": 144.75, |
| "grad_norm": 0.20753586292266846, |
| "learning_rate": 9.998298147521309e-05, |
| "loss": 0.0065, |
| "step": 5790 |
| }, |
| { |
| "epoch": 145.0, |
| "grad_norm": 0.17864994704723358, |
| "learning_rate": 9.998254737186496e-05, |
| "loss": 0.0057, |
| "step": 5800 |
| }, |
| { |
| "epoch": 145.25, |
| "grad_norm": 0.17263974249362946, |
| "learning_rate": 9.99821078025006e-05, |
| "loss": 0.0058, |
| "step": 5810 |
| }, |
| { |
| "epoch": 145.5, |
| "grad_norm": 0.16186940670013428, |
| "learning_rate": 9.998166276716807e-05, |
| "loss": 0.0063, |
| "step": 5820 |
| }, |
| { |
| "epoch": 145.75, |
| "grad_norm": 0.1366291344165802, |
| "learning_rate": 9.998121226591606e-05, |
| "loss": 0.0057, |
| "step": 5830 |
| }, |
| { |
| "epoch": 146.0, |
| "grad_norm": 0.14409442245960236, |
| "learning_rate": 9.998075629879382e-05, |
| "loss": 0.0057, |
| "step": 5840 |
| }, |
| { |
| "epoch": 146.25, |
| "grad_norm": 0.1769361048936844, |
| "learning_rate": 9.99802948658512e-05, |
| "loss": 0.0056, |
| "step": 5850 |
| }, |
| { |
| "epoch": 146.5, |
| "grad_norm": 0.178902268409729, |
| "learning_rate": 9.99798279671387e-05, |
| "loss": 0.0065, |
| "step": 5860 |
| }, |
| { |
| "epoch": 146.75, |
| "grad_norm": 0.18864601850509644, |
| "learning_rate": 9.997935560270734e-05, |
| "loss": 0.0066, |
| "step": 5870 |
| }, |
| { |
| "epoch": 147.0, |
| "grad_norm": 0.10840289294719696, |
| "learning_rate": 9.997887777260879e-05, |
| "loss": 0.0055, |
| "step": 5880 |
| }, |
| { |
| "epoch": 147.25, |
| "grad_norm": 0.1299472600221634, |
| "learning_rate": 9.997839447689532e-05, |
| "loss": 0.0062, |
| "step": 5890 |
| }, |
| { |
| "epoch": 147.5, |
| "grad_norm": 0.09526827186346054, |
| "learning_rate": 9.997790571561978e-05, |
| "loss": 0.0057, |
| "step": 5900 |
| }, |
| { |
| "epoch": 147.75, |
| "grad_norm": 0.104829341173172, |
| "learning_rate": 9.99774114888356e-05, |
| "loss": 0.0052, |
| "step": 5910 |
| }, |
| { |
| "epoch": 148.0, |
| "grad_norm": 0.11738763749599457, |
| "learning_rate": 9.997691179659684e-05, |
| "loss": 0.0056, |
| "step": 5920 |
| }, |
| { |
| "epoch": 148.25, |
| "grad_norm": 0.09024183452129364, |
| "learning_rate": 9.997640663895815e-05, |
| "loss": 0.0052, |
| "step": 5930 |
| }, |
| { |
| "epoch": 148.5, |
| "grad_norm": 0.10453230142593384, |
| "learning_rate": 9.997589601597477e-05, |
| "loss": 0.0062, |
| "step": 5940 |
| }, |
| { |
| "epoch": 148.75, |
| "grad_norm": 0.11493031680583954, |
| "learning_rate": 9.997537992770252e-05, |
| "loss": 0.0048, |
| "step": 5950 |
| }, |
| { |
| "epoch": 149.0, |
| "grad_norm": 0.1389797329902649, |
| "learning_rate": 9.997485837419788e-05, |
| "loss": 0.0055, |
| "step": 5960 |
| }, |
| { |
| "epoch": 149.25, |
| "grad_norm": 0.12700024247169495, |
| "learning_rate": 9.997433135551786e-05, |
| "loss": 0.0054, |
| "step": 5970 |
| }, |
| { |
| "epoch": 149.5, |
| "grad_norm": 0.13564901053905487, |
| "learning_rate": 9.997379887172009e-05, |
| "loss": 0.0047, |
| "step": 5980 |
| }, |
| { |
| "epoch": 149.75, |
| "grad_norm": 0.12888741493225098, |
| "learning_rate": 9.997326092286281e-05, |
| "loss": 0.0055, |
| "step": 5990 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.10119718313217163, |
| "learning_rate": 9.997271750900486e-05, |
| "loss": 0.0056, |
| "step": 6000 |
| }, |
| { |
| "epoch": 150.25, |
| "grad_norm": 0.12381160259246826, |
| "learning_rate": 9.997216863020565e-05, |
| "loss": 0.0051, |
| "step": 6010 |
| }, |
| { |
| "epoch": 150.5, |
| "grad_norm": 0.12457701563835144, |
| "learning_rate": 9.99716142865252e-05, |
| "loss": 0.0052, |
| "step": 6020 |
| }, |
| { |
| "epoch": 150.75, |
| "grad_norm": 0.1514553427696228, |
| "learning_rate": 9.997105447802415e-05, |
| "loss": 0.0054, |
| "step": 6030 |
| }, |
| { |
| "epoch": 151.0, |
| "grad_norm": 0.1348123848438263, |
| "learning_rate": 9.997048920476373e-05, |
| "loss": 0.0054, |
| "step": 6040 |
| }, |
| { |
| "epoch": 151.25, |
| "grad_norm": 0.14996957778930664, |
| "learning_rate": 9.996991846680572e-05, |
| "loss": 0.0057, |
| "step": 6050 |
| }, |
| { |
| "epoch": 151.5, |
| "grad_norm": 0.09939752519130707, |
| "learning_rate": 9.996934226421257e-05, |
| "loss": 0.0052, |
| "step": 6060 |
| }, |
| { |
| "epoch": 151.75, |
| "grad_norm": 0.10740762203931808, |
| "learning_rate": 9.996876059704726e-05, |
| "loss": 0.005, |
| "step": 6070 |
| }, |
| { |
| "epoch": 152.0, |
| "grad_norm": 0.13272447884082794, |
| "learning_rate": 9.996817346537343e-05, |
| "loss": 0.0063, |
| "step": 6080 |
| }, |
| { |
| "epoch": 152.25, |
| "grad_norm": 0.10130985081195831, |
| "learning_rate": 9.996758086925526e-05, |
| "loss": 0.0056, |
| "step": 6090 |
| }, |
| { |
| "epoch": 152.5, |
| "grad_norm": 0.13598772883415222, |
| "learning_rate": 9.996698280875759e-05, |
| "loss": 0.0053, |
| "step": 6100 |
| }, |
| { |
| "epoch": 152.75, |
| "grad_norm": 0.1303255409002304, |
| "learning_rate": 9.99663792839458e-05, |
| "loss": 0.0058, |
| "step": 6110 |
| }, |
| { |
| "epoch": 153.0, |
| "grad_norm": 0.15227794647216797, |
| "learning_rate": 9.99657702948859e-05, |
| "loss": 0.0056, |
| "step": 6120 |
| }, |
| { |
| "epoch": 153.25, |
| "grad_norm": 0.12363743036985397, |
| "learning_rate": 9.996515584164448e-05, |
| "loss": 0.0053, |
| "step": 6130 |
| }, |
| { |
| "epoch": 153.5, |
| "grad_norm": 0.16186046600341797, |
| "learning_rate": 9.996453592428873e-05, |
| "loss": 0.005, |
| "step": 6140 |
| }, |
| { |
| "epoch": 153.75, |
| "grad_norm": 0.11398918926715851, |
| "learning_rate": 9.996391054288646e-05, |
| "loss": 0.0051, |
| "step": 6150 |
| }, |
| { |
| "epoch": 154.0, |
| "grad_norm": 0.14045920968055725, |
| "learning_rate": 9.996327969750605e-05, |
| "loss": 0.0051, |
| "step": 6160 |
| }, |
| { |
| "epoch": 154.25, |
| "grad_norm": 0.14219066500663757, |
| "learning_rate": 9.996264338821649e-05, |
| "loss": 0.0045, |
| "step": 6170 |
| }, |
| { |
| "epoch": 154.5, |
| "grad_norm": 0.1310894936323166, |
| "learning_rate": 9.996200161508735e-05, |
| "loss": 0.0052, |
| "step": 6180 |
| }, |
| { |
| "epoch": 154.75, |
| "grad_norm": 0.10034388303756714, |
| "learning_rate": 9.996135437818885e-05, |
| "loss": 0.0048, |
| "step": 6190 |
| }, |
| { |
| "epoch": 155.0, |
| "grad_norm": 0.15445446968078613, |
| "learning_rate": 9.996070167759175e-05, |
| "loss": 0.0056, |
| "step": 6200 |
| }, |
| { |
| "epoch": 155.25, |
| "grad_norm": 0.11739563941955566, |
| "learning_rate": 9.996004351336743e-05, |
| "loss": 0.0055, |
| "step": 6210 |
| }, |
| { |
| "epoch": 155.5, |
| "grad_norm": 0.15179182589054108, |
| "learning_rate": 9.995937988558785e-05, |
| "loss": 0.0058, |
| "step": 6220 |
| }, |
| { |
| "epoch": 155.75, |
| "grad_norm": 0.14104419946670532, |
| "learning_rate": 9.995871079432561e-05, |
| "loss": 0.0054, |
| "step": 6230 |
| }, |
| { |
| "epoch": 156.0, |
| "grad_norm": 0.1537674367427826, |
| "learning_rate": 9.995803623965389e-05, |
| "loss": 0.0056, |
| "step": 6240 |
| }, |
| { |
| "epoch": 156.25, |
| "grad_norm": 0.1653551310300827, |
| "learning_rate": 9.995735622164641e-05, |
| "loss": 0.0057, |
| "step": 6250 |
| }, |
| { |
| "epoch": 156.5, |
| "grad_norm": 0.1293126344680786, |
| "learning_rate": 9.995667074037758e-05, |
| "loss": 0.0054, |
| "step": 6260 |
| }, |
| { |
| "epoch": 156.75, |
| "grad_norm": 0.1530045121908188, |
| "learning_rate": 9.995597979592232e-05, |
| "loss": 0.006, |
| "step": 6270 |
| }, |
| { |
| "epoch": 157.0, |
| "grad_norm": 0.14181704819202423, |
| "learning_rate": 9.995528338835625e-05, |
| "loss": 0.0052, |
| "step": 6280 |
| }, |
| { |
| "epoch": 157.25, |
| "grad_norm": 0.16457095742225647, |
| "learning_rate": 9.995458151775547e-05, |
| "loss": 0.0055, |
| "step": 6290 |
| }, |
| { |
| "epoch": 157.5, |
| "grad_norm": 0.16033579409122467, |
| "learning_rate": 9.995387418419677e-05, |
| "loss": 0.0051, |
| "step": 6300 |
| }, |
| { |
| "epoch": 157.75, |
| "grad_norm": 0.11192826926708221, |
| "learning_rate": 9.99531613877575e-05, |
| "loss": 0.0055, |
| "step": 6310 |
| }, |
| { |
| "epoch": 158.0, |
| "grad_norm": 0.15627609193325043, |
| "learning_rate": 9.995244312851559e-05, |
| "loss": 0.0055, |
| "step": 6320 |
| }, |
| { |
| "epoch": 158.25, |
| "grad_norm": 0.14025883376598358, |
| "learning_rate": 9.995171940654961e-05, |
| "loss": 0.0051, |
| "step": 6330 |
| }, |
| { |
| "epoch": 158.5, |
| "grad_norm": 0.09506193548440933, |
| "learning_rate": 9.995099022193871e-05, |
| "loss": 0.0054, |
| "step": 6340 |
| }, |
| { |
| "epoch": 158.75, |
| "grad_norm": 0.12658117711544037, |
| "learning_rate": 9.995025557476261e-05, |
| "loss": 0.0051, |
| "step": 6350 |
| }, |
| { |
| "epoch": 159.0, |
| "grad_norm": 0.12233058363199234, |
| "learning_rate": 9.994951546510165e-05, |
| "loss": 0.0055, |
| "step": 6360 |
| }, |
| { |
| "epoch": 159.25, |
| "grad_norm": 0.14143706858158112, |
| "learning_rate": 9.994876989303679e-05, |
| "loss": 0.0062, |
| "step": 6370 |
| }, |
| { |
| "epoch": 159.5, |
| "grad_norm": 0.15059718489646912, |
| "learning_rate": 9.994801885864955e-05, |
| "loss": 0.0058, |
| "step": 6380 |
| }, |
| { |
| "epoch": 159.75, |
| "grad_norm": 0.13234072923660278, |
| "learning_rate": 9.994726236202205e-05, |
| "loss": 0.0062, |
| "step": 6390 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 0.17327646911144257, |
| "learning_rate": 9.994650040323704e-05, |
| "loss": 0.0059, |
| "step": 6400 |
| }, |
| { |
| "epoch": 160.25, |
| "grad_norm": 0.13921305537223816, |
| "learning_rate": 9.994573298237784e-05, |
| "loss": 0.0049, |
| "step": 6410 |
| }, |
| { |
| "epoch": 160.5, |
| "grad_norm": 0.16460886597633362, |
| "learning_rate": 9.994496009952837e-05, |
| "loss": 0.0049, |
| "step": 6420 |
| }, |
| { |
| "epoch": 160.75, |
| "grad_norm": 0.15839236974716187, |
| "learning_rate": 9.994418175477316e-05, |
| "loss": 0.0056, |
| "step": 6430 |
| }, |
| { |
| "epoch": 161.0, |
| "grad_norm": 0.1224624365568161, |
| "learning_rate": 9.994339794819733e-05, |
| "loss": 0.0048, |
| "step": 6440 |
| }, |
| { |
| "epoch": 161.25, |
| "grad_norm": 0.13407361507415771, |
| "learning_rate": 9.994260867988658e-05, |
| "loss": 0.0055, |
| "step": 6450 |
| }, |
| { |
| "epoch": 161.5, |
| "grad_norm": 0.14540457725524902, |
| "learning_rate": 9.994181394992723e-05, |
| "loss": 0.0046, |
| "step": 6460 |
| }, |
| { |
| "epoch": 161.75, |
| "grad_norm": 0.12441486120223999, |
| "learning_rate": 9.994101375840618e-05, |
| "loss": 0.0047, |
| "step": 6470 |
| }, |
| { |
| "epoch": 162.0, |
| "grad_norm": 0.12421895563602448, |
| "learning_rate": 9.994020810541098e-05, |
| "loss": 0.0051, |
| "step": 6480 |
| }, |
| { |
| "epoch": 162.25, |
| "grad_norm": 0.11112942546606064, |
| "learning_rate": 9.99393969910297e-05, |
| "loss": 0.0051, |
| "step": 6490 |
| }, |
| { |
| "epoch": 162.5, |
| "grad_norm": 0.12117798626422882, |
| "learning_rate": 9.993858041535104e-05, |
| "loss": 0.0057, |
| "step": 6500 |
| }, |
| { |
| "epoch": 162.75, |
| "grad_norm": 0.1292831152677536, |
| "learning_rate": 9.99377583784643e-05, |
| "loss": 0.0052, |
| "step": 6510 |
| }, |
| { |
| "epoch": 163.0, |
| "grad_norm": 0.13252988457679749, |
| "learning_rate": 9.993693088045939e-05, |
| "loss": 0.0051, |
| "step": 6520 |
| }, |
| { |
| "epoch": 163.25, |
| "grad_norm": 0.13480907678604126, |
| "learning_rate": 9.99360979214268e-05, |
| "loss": 0.0049, |
| "step": 6530 |
| }, |
| { |
| "epoch": 163.5, |
| "grad_norm": 0.08185603469610214, |
| "learning_rate": 9.99352595014576e-05, |
| "loss": 0.0047, |
| "step": 6540 |
| }, |
| { |
| "epoch": 163.75, |
| "grad_norm": 0.07064332067966461, |
| "learning_rate": 9.993441562064354e-05, |
| "loss": 0.0049, |
| "step": 6550 |
| }, |
| { |
| "epoch": 164.0, |
| "grad_norm": 0.11800257861614227, |
| "learning_rate": 9.993356627907685e-05, |
| "loss": 0.0049, |
| "step": 6560 |
| }, |
| { |
| "epoch": 164.25, |
| "grad_norm": 0.1276804357767105, |
| "learning_rate": 9.99327114768504e-05, |
| "loss": 0.0055, |
| "step": 6570 |
| }, |
| { |
| "epoch": 164.5, |
| "grad_norm": 0.17329150438308716, |
| "learning_rate": 9.99318512140577e-05, |
| "loss": 0.0053, |
| "step": 6580 |
| }, |
| { |
| "epoch": 164.75, |
| "grad_norm": 0.15143054723739624, |
| "learning_rate": 9.993098549079284e-05, |
| "loss": 0.0055, |
| "step": 6590 |
| }, |
| { |
| "epoch": 165.0, |
| "grad_norm": 0.1650630086660385, |
| "learning_rate": 9.993011430715047e-05, |
| "loss": 0.006, |
| "step": 6600 |
| }, |
| { |
| "epoch": 165.25, |
| "grad_norm": 0.13941645622253418, |
| "learning_rate": 9.992923766322586e-05, |
| "loss": 0.0049, |
| "step": 6610 |
| }, |
| { |
| "epoch": 165.5, |
| "grad_norm": 0.13879020512104034, |
| "learning_rate": 9.99283555591149e-05, |
| "loss": 0.005, |
| "step": 6620 |
| }, |
| { |
| "epoch": 165.75, |
| "grad_norm": 0.1108022928237915, |
| "learning_rate": 9.992746799491404e-05, |
| "loss": 0.0054, |
| "step": 6630 |
| }, |
| { |
| "epoch": 166.0, |
| "grad_norm": 0.16898183524608612, |
| "learning_rate": 9.992657497072033e-05, |
| "loss": 0.0055, |
| "step": 6640 |
| }, |
| { |
| "epoch": 166.25, |
| "grad_norm": 0.1466725915670395, |
| "learning_rate": 9.992567648663147e-05, |
| "loss": 0.0066, |
| "step": 6650 |
| }, |
| { |
| "epoch": 166.5, |
| "grad_norm": 0.10014578700065613, |
| "learning_rate": 9.992477254274568e-05, |
| "loss": 0.0057, |
| "step": 6660 |
| }, |
| { |
| "epoch": 166.75, |
| "grad_norm": 0.11596689373254776, |
| "learning_rate": 9.992386313916183e-05, |
| "loss": 0.0051, |
| "step": 6670 |
| }, |
| { |
| "epoch": 167.0, |
| "grad_norm": 0.1346360445022583, |
| "learning_rate": 9.992294827597934e-05, |
| "loss": 0.0054, |
| "step": 6680 |
| }, |
| { |
| "epoch": 167.25, |
| "grad_norm": 0.12456992268562317, |
| "learning_rate": 9.992202795329831e-05, |
| "loss": 0.0058, |
| "step": 6690 |
| }, |
| { |
| "epoch": 167.5, |
| "grad_norm": 0.15003210306167603, |
| "learning_rate": 9.992110217121936e-05, |
| "loss": 0.0064, |
| "step": 6700 |
| }, |
| { |
| "epoch": 167.75, |
| "grad_norm": 0.12493447959423065, |
| "learning_rate": 9.992017092984372e-05, |
| "loss": 0.0048, |
| "step": 6710 |
| }, |
| { |
| "epoch": 168.0, |
| "grad_norm": 0.13486067950725555, |
| "learning_rate": 9.991923422927326e-05, |
| "loss": 0.0052, |
| "step": 6720 |
| }, |
| { |
| "epoch": 168.25, |
| "grad_norm": 0.12454357743263245, |
| "learning_rate": 9.991829206961037e-05, |
| "loss": 0.0047, |
| "step": 6730 |
| }, |
| { |
| "epoch": 168.5, |
| "grad_norm": 0.12296856194734573, |
| "learning_rate": 9.991734445095813e-05, |
| "loss": 0.0048, |
| "step": 6740 |
| }, |
| { |
| "epoch": 168.75, |
| "grad_norm": 0.12852822244167328, |
| "learning_rate": 9.991639137342015e-05, |
| "loss": 0.0052, |
| "step": 6750 |
| }, |
| { |
| "epoch": 169.0, |
| "grad_norm": 0.10896472632884979, |
| "learning_rate": 9.991543283710064e-05, |
| "loss": 0.0056, |
| "step": 6760 |
| }, |
| { |
| "epoch": 169.25, |
| "grad_norm": 0.09551511704921722, |
| "learning_rate": 9.991446884210445e-05, |
| "loss": 0.0055, |
| "step": 6770 |
| }, |
| { |
| "epoch": 169.5, |
| "grad_norm": 0.11103704571723938, |
| "learning_rate": 9.9913499388537e-05, |
| "loss": 0.0045, |
| "step": 6780 |
| }, |
| { |
| "epoch": 169.75, |
| "grad_norm": 0.09321358799934387, |
| "learning_rate": 9.99125244765043e-05, |
| "loss": 0.0045, |
| "step": 6790 |
| }, |
| { |
| "epoch": 170.0, |
| "grad_norm": 0.1304449886083603, |
| "learning_rate": 9.991154410611296e-05, |
| "loss": 0.0051, |
| "step": 6800 |
| }, |
| { |
| "epoch": 170.25, |
| "grad_norm": 0.1056373193860054, |
| "learning_rate": 9.99105582774702e-05, |
| "loss": 0.0048, |
| "step": 6810 |
| }, |
| { |
| "epoch": 170.5, |
| "grad_norm": 0.15499410033226013, |
| "learning_rate": 9.990956699068384e-05, |
| "loss": 0.0054, |
| "step": 6820 |
| }, |
| { |
| "epoch": 170.75, |
| "grad_norm": 0.1866844743490219, |
| "learning_rate": 9.990857024586224e-05, |
| "loss": 0.0054, |
| "step": 6830 |
| }, |
| { |
| "epoch": 171.0, |
| "grad_norm": 0.16530998051166534, |
| "learning_rate": 9.990756804311446e-05, |
| "loss": 0.0051, |
| "step": 6840 |
| }, |
| { |
| "epoch": 171.25, |
| "grad_norm": 0.11174263060092926, |
| "learning_rate": 9.990656038255006e-05, |
| "loss": 0.0044, |
| "step": 6850 |
| }, |
| { |
| "epoch": 171.5, |
| "grad_norm": 0.13130134344100952, |
| "learning_rate": 9.990554726427926e-05, |
| "loss": 0.0051, |
| "step": 6860 |
| }, |
| { |
| "epoch": 171.75, |
| "grad_norm": 0.13618028163909912, |
| "learning_rate": 9.990452868841284e-05, |
| "loss": 0.0055, |
| "step": 6870 |
| }, |
| { |
| "epoch": 172.0, |
| "grad_norm": 0.12057960033416748, |
| "learning_rate": 9.99035046550622e-05, |
| "loss": 0.0051, |
| "step": 6880 |
| }, |
| { |
| "epoch": 172.25, |
| "grad_norm": 0.13933198153972626, |
| "learning_rate": 9.99024751643393e-05, |
| "loss": 0.0051, |
| "step": 6890 |
| }, |
| { |
| "epoch": 172.5, |
| "grad_norm": 0.11323478817939758, |
| "learning_rate": 9.990144021635677e-05, |
| "loss": 0.0048, |
| "step": 6900 |
| }, |
| { |
| "epoch": 172.75, |
| "grad_norm": 0.12394394725561142, |
| "learning_rate": 9.990039981122775e-05, |
| "loss": 0.0053, |
| "step": 6910 |
| }, |
| { |
| "epoch": 173.0, |
| "grad_norm": 0.12509888410568237, |
| "learning_rate": 9.989935394906602e-05, |
| "loss": 0.0049, |
| "step": 6920 |
| }, |
| { |
| "epoch": 173.25, |
| "grad_norm": 0.150846928358078, |
| "learning_rate": 9.989830262998598e-05, |
| "loss": 0.0063, |
| "step": 6930 |
| }, |
| { |
| "epoch": 173.5, |
| "grad_norm": 0.15578363835811615, |
| "learning_rate": 9.989724585410259e-05, |
| "loss": 0.0053, |
| "step": 6940 |
| }, |
| { |
| "epoch": 173.75, |
| "grad_norm": 0.1302061229944229, |
| "learning_rate": 9.989618362153139e-05, |
| "loss": 0.0057, |
| "step": 6950 |
| }, |
| { |
| "epoch": 174.0, |
| "grad_norm": 0.13165602087974548, |
| "learning_rate": 9.989511593238859e-05, |
| "loss": 0.0048, |
| "step": 6960 |
| }, |
| { |
| "epoch": 174.25, |
| "grad_norm": 0.1607247292995453, |
| "learning_rate": 9.98940427867909e-05, |
| "loss": 0.0051, |
| "step": 6970 |
| }, |
| { |
| "epoch": 174.5, |
| "grad_norm": 0.17222370207309723, |
| "learning_rate": 9.989296418485573e-05, |
| "loss": 0.0067, |
| "step": 6980 |
| }, |
| { |
| "epoch": 174.75, |
| "grad_norm": 0.14066697657108307, |
| "learning_rate": 9.989188012670101e-05, |
| "loss": 0.0052, |
| "step": 6990 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 0.12622103095054626, |
| "learning_rate": 9.989079061244528e-05, |
| "loss": 0.0058, |
| "step": 7000 |
| }, |
| { |
| "epoch": 175.25, |
| "grad_norm": 0.16112670302391052, |
| "learning_rate": 9.988969564220769e-05, |
| "loss": 0.0054, |
| "step": 7010 |
| }, |
| { |
| "epoch": 175.5, |
| "grad_norm": 0.09230020642280579, |
| "learning_rate": 9.988859521610801e-05, |
| "loss": 0.0054, |
| "step": 7020 |
| }, |
| { |
| "epoch": 175.75, |
| "grad_norm": 0.14121113717556, |
| "learning_rate": 9.988748933426656e-05, |
| "loss": 0.0056, |
| "step": 7030 |
| }, |
| { |
| "epoch": 176.0, |
| "grad_norm": 0.09799350798130035, |
| "learning_rate": 9.988637799680428e-05, |
| "loss": 0.0057, |
| "step": 7040 |
| }, |
| { |
| "epoch": 176.25, |
| "grad_norm": 0.09402919560670853, |
| "learning_rate": 9.98852612038427e-05, |
| "loss": 0.0047, |
| "step": 7050 |
| }, |
| { |
| "epoch": 176.5, |
| "grad_norm": 0.1271122395992279, |
| "learning_rate": 9.988413895550397e-05, |
| "loss": 0.0046, |
| "step": 7060 |
| }, |
| { |
| "epoch": 176.75, |
| "grad_norm": 0.09895417094230652, |
| "learning_rate": 9.98830112519108e-05, |
| "loss": 0.0059, |
| "step": 7070 |
| }, |
| { |
| "epoch": 177.0, |
| "grad_norm": 0.12824778258800507, |
| "learning_rate": 9.98818780931865e-05, |
| "loss": 0.0049, |
| "step": 7080 |
| }, |
| { |
| "epoch": 177.25, |
| "grad_norm": 0.12342114001512527, |
| "learning_rate": 9.988073947945502e-05, |
| "loss": 0.0049, |
| "step": 7090 |
| }, |
| { |
| "epoch": 177.5, |
| "grad_norm": 0.143673375248909, |
| "learning_rate": 9.987959541084087e-05, |
| "loss": 0.0047, |
| "step": 7100 |
| }, |
| { |
| "epoch": 177.75, |
| "grad_norm": 0.14878948032855988, |
| "learning_rate": 9.987844588746915e-05, |
| "loss": 0.0041, |
| "step": 7110 |
| }, |
| { |
| "epoch": 178.0, |
| "grad_norm": 0.12074983865022659, |
| "learning_rate": 9.987729090946558e-05, |
| "loss": 0.0048, |
| "step": 7120 |
| }, |
| { |
| "epoch": 178.25, |
| "grad_norm": 0.12571795284748077, |
| "learning_rate": 9.987613047695647e-05, |
| "loss": 0.005, |
| "step": 7130 |
| }, |
| { |
| "epoch": 178.5, |
| "grad_norm": 0.1860690414905548, |
| "learning_rate": 9.987496459006871e-05, |
| "loss": 0.0047, |
| "step": 7140 |
| }, |
| { |
| "epoch": 178.75, |
| "grad_norm": 0.11447081714868546, |
| "learning_rate": 9.987379324892982e-05, |
| "loss": 0.0059, |
| "step": 7150 |
| }, |
| { |
| "epoch": 179.0, |
| "grad_norm": 0.2062373012304306, |
| "learning_rate": 9.987261645366788e-05, |
| "loss": 0.0049, |
| "step": 7160 |
| }, |
| { |
| "epoch": 179.25, |
| "grad_norm": 0.13676029443740845, |
| "learning_rate": 9.987143420441158e-05, |
| "loss": 0.005, |
| "step": 7170 |
| }, |
| { |
| "epoch": 179.5, |
| "grad_norm": 0.17207178473472595, |
| "learning_rate": 9.987024650129022e-05, |
| "loss": 0.0051, |
| "step": 7180 |
| }, |
| { |
| "epoch": 179.75, |
| "grad_norm": 0.17590519785881042, |
| "learning_rate": 9.986905334443368e-05, |
| "loss": 0.0059, |
| "step": 7190 |
| }, |
| { |
| "epoch": 180.0, |
| "grad_norm": 0.13459520041942596, |
| "learning_rate": 9.986785473397245e-05, |
| "loss": 0.005, |
| "step": 7200 |
| }, |
| { |
| "epoch": 180.25, |
| "grad_norm": 0.21301501989364624, |
| "learning_rate": 9.98666506700376e-05, |
| "loss": 0.0056, |
| "step": 7210 |
| }, |
| { |
| "epoch": 180.5, |
| "grad_norm": 0.13290734589099884, |
| "learning_rate": 9.986544115276081e-05, |
| "loss": 0.0066, |
| "step": 7220 |
| }, |
| { |
| "epoch": 180.75, |
| "grad_norm": 0.12449201196432114, |
| "learning_rate": 9.986422618227433e-05, |
| "loss": 0.0053, |
| "step": 7230 |
| }, |
| { |
| "epoch": 181.0, |
| "grad_norm": 0.119524285197258, |
| "learning_rate": 9.986300575871106e-05, |
| "loss": 0.0056, |
| "step": 7240 |
| }, |
| { |
| "epoch": 181.25, |
| "grad_norm": 0.10814197361469269, |
| "learning_rate": 9.986177988220444e-05, |
| "loss": 0.0047, |
| "step": 7250 |
| }, |
| { |
| "epoch": 181.5, |
| "grad_norm": 0.12408486753702164, |
| "learning_rate": 9.986054855288856e-05, |
| "loss": 0.005, |
| "step": 7260 |
| }, |
| { |
| "epoch": 181.75, |
| "grad_norm": 0.1282089203596115, |
| "learning_rate": 9.985931177089802e-05, |
| "loss": 0.0056, |
| "step": 7270 |
| }, |
| { |
| "epoch": 182.0, |
| "grad_norm": 0.17553548514842987, |
| "learning_rate": 9.985806953636814e-05, |
| "loss": 0.005, |
| "step": 7280 |
| }, |
| { |
| "epoch": 182.25, |
| "grad_norm": 0.10986651480197906, |
| "learning_rate": 9.985682184943471e-05, |
| "loss": 0.0056, |
| "step": 7290 |
| }, |
| { |
| "epoch": 182.5, |
| "grad_norm": 0.11029662936925888, |
| "learning_rate": 9.98555687102342e-05, |
| "loss": 0.0047, |
| "step": 7300 |
| }, |
| { |
| "epoch": 182.75, |
| "grad_norm": 0.1303234100341797, |
| "learning_rate": 9.985431011890367e-05, |
| "loss": 0.0052, |
| "step": 7310 |
| }, |
| { |
| "epoch": 183.0, |
| "grad_norm": 0.11000842601060867, |
| "learning_rate": 9.985304607558075e-05, |
| "loss": 0.0053, |
| "step": 7320 |
| }, |
| { |
| "epoch": 183.25, |
| "grad_norm": 0.11282704770565033, |
| "learning_rate": 9.985177658040364e-05, |
| "loss": 0.0049, |
| "step": 7330 |
| }, |
| { |
| "epoch": 183.5, |
| "grad_norm": 0.11639503389596939, |
| "learning_rate": 9.985050163351119e-05, |
| "loss": 0.005, |
| "step": 7340 |
| }, |
| { |
| "epoch": 183.75, |
| "grad_norm": 0.12452349811792374, |
| "learning_rate": 9.984922123504286e-05, |
| "loss": 0.0044, |
| "step": 7350 |
| }, |
| { |
| "epoch": 184.0, |
| "grad_norm": 0.09971412271261215, |
| "learning_rate": 9.984793538513862e-05, |
| "loss": 0.0052, |
| "step": 7360 |
| }, |
| { |
| "epoch": 184.25, |
| "grad_norm": 0.08956573903560638, |
| "learning_rate": 9.984664408393912e-05, |
| "loss": 0.0053, |
| "step": 7370 |
| }, |
| { |
| "epoch": 184.5, |
| "grad_norm": 0.0963820144534111, |
| "learning_rate": 9.984534733158556e-05, |
| "loss": 0.0053, |
| "step": 7380 |
| }, |
| { |
| "epoch": 184.75, |
| "grad_norm": 0.11035646498203278, |
| "learning_rate": 9.984404512821977e-05, |
| "loss": 0.0045, |
| "step": 7390 |
| }, |
| { |
| "epoch": 185.0, |
| "grad_norm": 0.10871769487857819, |
| "learning_rate": 9.984273747398411e-05, |
| "loss": 0.0052, |
| "step": 7400 |
| }, |
| { |
| "epoch": 185.25, |
| "grad_norm": 0.10968684405088425, |
| "learning_rate": 9.984142436902165e-05, |
| "loss": 0.0057, |
| "step": 7410 |
| }, |
| { |
| "epoch": 185.5, |
| "grad_norm": 0.135623961687088, |
| "learning_rate": 9.984010581347596e-05, |
| "loss": 0.0045, |
| "step": 7420 |
| }, |
| { |
| "epoch": 185.75, |
| "grad_norm": 0.10947667807340622, |
| "learning_rate": 9.983878180749121e-05, |
| "loss": 0.005, |
| "step": 7430 |
| }, |
| { |
| "epoch": 186.0, |
| "grad_norm": 0.10079862177371979, |
| "learning_rate": 9.983745235121222e-05, |
| "loss": 0.0046, |
| "step": 7440 |
| }, |
| { |
| "epoch": 186.25, |
| "grad_norm": 0.16193941235542297, |
| "learning_rate": 9.983611744478438e-05, |
| "loss": 0.0054, |
| "step": 7450 |
| }, |
| { |
| "epoch": 186.5, |
| "grad_norm": 0.11646270751953125, |
| "learning_rate": 9.983477708835365e-05, |
| "loss": 0.0054, |
| "step": 7460 |
| }, |
| { |
| "epoch": 186.75, |
| "grad_norm": 0.1582486480474472, |
| "learning_rate": 9.983343128206664e-05, |
| "loss": 0.0055, |
| "step": 7470 |
| }, |
| { |
| "epoch": 187.0, |
| "grad_norm": 0.15796583890914917, |
| "learning_rate": 9.983208002607049e-05, |
| "loss": 0.0054, |
| "step": 7480 |
| }, |
| { |
| "epoch": 187.25, |
| "grad_norm": 0.15638047456741333, |
| "learning_rate": 9.9830723320513e-05, |
| "loss": 0.0054, |
| "step": 7490 |
| }, |
| { |
| "epoch": 187.5, |
| "grad_norm": 0.12270047515630722, |
| "learning_rate": 9.982936116554254e-05, |
| "loss": 0.0045, |
| "step": 7500 |
| }, |
| { |
| "epoch": 187.75, |
| "grad_norm": 0.12344437092542648, |
| "learning_rate": 9.982799356130803e-05, |
| "loss": 0.0056, |
| "step": 7510 |
| }, |
| { |
| "epoch": 188.0, |
| "grad_norm": 0.13426025211811066, |
| "learning_rate": 9.982662050795908e-05, |
| "loss": 0.0055, |
| "step": 7520 |
| }, |
| { |
| "epoch": 188.25, |
| "grad_norm": 0.18254458904266357, |
| "learning_rate": 9.982524200564583e-05, |
| "loss": 0.0058, |
| "step": 7530 |
| }, |
| { |
| "epoch": 188.5, |
| "grad_norm": 0.1355525702238083, |
| "learning_rate": 9.982385805451901e-05, |
| "loss": 0.0052, |
| "step": 7540 |
| }, |
| { |
| "epoch": 188.75, |
| "grad_norm": 0.12746183574199677, |
| "learning_rate": 9.982246865472998e-05, |
| "loss": 0.0048, |
| "step": 7550 |
| }, |
| { |
| "epoch": 189.0, |
| "grad_norm": 0.07315339148044586, |
| "learning_rate": 9.982107380643069e-05, |
| "loss": 0.0051, |
| "step": 7560 |
| }, |
| { |
| "epoch": 189.25, |
| "grad_norm": 0.08937343209981918, |
| "learning_rate": 9.981967350977368e-05, |
| "loss": 0.0054, |
| "step": 7570 |
| }, |
| { |
| "epoch": 189.5, |
| "grad_norm": 0.1072855219244957, |
| "learning_rate": 9.981826776491208e-05, |
| "loss": 0.0046, |
| "step": 7580 |
| }, |
| { |
| "epoch": 189.75, |
| "grad_norm": 0.08609167486429214, |
| "learning_rate": 9.98168565719996e-05, |
| "loss": 0.0057, |
| "step": 7590 |
| }, |
| { |
| "epoch": 190.0, |
| "grad_norm": 0.1458943635225296, |
| "learning_rate": 9.98154399311906e-05, |
| "loss": 0.0052, |
| "step": 7600 |
| }, |
| { |
| "epoch": 190.25, |
| "grad_norm": 0.14894481003284454, |
| "learning_rate": 9.981401784263997e-05, |
| "loss": 0.0051, |
| "step": 7610 |
| }, |
| { |
| "epoch": 190.5, |
| "grad_norm": 0.12157489359378815, |
| "learning_rate": 9.981259030650326e-05, |
| "loss": 0.0043, |
| "step": 7620 |
| }, |
| { |
| "epoch": 190.75, |
| "grad_norm": 0.09578462690114975, |
| "learning_rate": 9.981115732293655e-05, |
| "loss": 0.0043, |
| "step": 7630 |
| }, |
| { |
| "epoch": 191.0, |
| "grad_norm": 0.1009032130241394, |
| "learning_rate": 9.980971889209659e-05, |
| "loss": 0.0047, |
| "step": 7640 |
| }, |
| { |
| "epoch": 191.25, |
| "grad_norm": 0.10971588641405106, |
| "learning_rate": 9.980827501414064e-05, |
| "loss": 0.0048, |
| "step": 7650 |
| }, |
| { |
| "epoch": 191.5, |
| "grad_norm": 0.12458957731723785, |
| "learning_rate": 9.980682568922663e-05, |
| "loss": 0.0042, |
| "step": 7660 |
| }, |
| { |
| "epoch": 191.75, |
| "grad_norm": 0.17035630345344543, |
| "learning_rate": 9.980537091751304e-05, |
| "loss": 0.0046, |
| "step": 7670 |
| }, |
| { |
| "epoch": 192.0, |
| "grad_norm": 0.12632252275943756, |
| "learning_rate": 9.980391069915897e-05, |
| "loss": 0.0047, |
| "step": 7680 |
| }, |
| { |
| "epoch": 192.25, |
| "grad_norm": 0.08305094391107559, |
| "learning_rate": 9.98024450343241e-05, |
| "loss": 0.0051, |
| "step": 7690 |
| }, |
| { |
| "epoch": 192.5, |
| "grad_norm": 0.10795965045690536, |
| "learning_rate": 9.980097392316872e-05, |
| "loss": 0.0041, |
| "step": 7700 |
| }, |
| { |
| "epoch": 192.75, |
| "grad_norm": 0.1059790700674057, |
| "learning_rate": 9.97994973658537e-05, |
| "loss": 0.0042, |
| "step": 7710 |
| }, |
| { |
| "epoch": 193.0, |
| "grad_norm": 0.09630829840898514, |
| "learning_rate": 9.979801536254054e-05, |
| "loss": 0.0038, |
| "step": 7720 |
| }, |
| { |
| "epoch": 193.25, |
| "grad_norm": 0.09148659557104111, |
| "learning_rate": 9.979652791339127e-05, |
| "loss": 0.0051, |
| "step": 7730 |
| }, |
| { |
| "epoch": 193.5, |
| "grad_norm": 0.11520194262266159, |
| "learning_rate": 9.97950350185686e-05, |
| "loss": 0.0053, |
| "step": 7740 |
| }, |
| { |
| "epoch": 193.75, |
| "grad_norm": 0.1046760156750679, |
| "learning_rate": 9.979353667823574e-05, |
| "loss": 0.0041, |
| "step": 7750 |
| }, |
| { |
| "epoch": 194.0, |
| "grad_norm": 0.13308505713939667, |
| "learning_rate": 9.979203289255658e-05, |
| "loss": 0.0046, |
| "step": 7760 |
| }, |
| { |
| "epoch": 194.25, |
| "grad_norm": 0.11976161599159241, |
| "learning_rate": 9.979052366169557e-05, |
| "loss": 0.005, |
| "step": 7770 |
| }, |
| { |
| "epoch": 194.5, |
| "grad_norm": 0.1054573506116867, |
| "learning_rate": 9.978900898581775e-05, |
| "loss": 0.0049, |
| "step": 7780 |
| }, |
| { |
| "epoch": 194.75, |
| "grad_norm": 0.14348535239696503, |
| "learning_rate": 9.978748886508875e-05, |
| "loss": 0.0043, |
| "step": 7790 |
| }, |
| { |
| "epoch": 195.0, |
| "grad_norm": 0.1736174374818802, |
| "learning_rate": 9.978596329967484e-05, |
| "loss": 0.0052, |
| "step": 7800 |
| }, |
| { |
| "epoch": 195.25, |
| "grad_norm": 0.15408484637737274, |
| "learning_rate": 9.978443228974284e-05, |
| "loss": 0.0043, |
| "step": 7810 |
| }, |
| { |
| "epoch": 195.5, |
| "grad_norm": 0.12470567971467972, |
| "learning_rate": 9.978289583546015e-05, |
| "loss": 0.0047, |
| "step": 7820 |
| }, |
| { |
| "epoch": 195.75, |
| "grad_norm": 0.1008351519703865, |
| "learning_rate": 9.978135393699484e-05, |
| "loss": 0.0045, |
| "step": 7830 |
| }, |
| { |
| "epoch": 196.0, |
| "grad_norm": 0.13738684356212616, |
| "learning_rate": 9.977980659451548e-05, |
| "loss": 0.005, |
| "step": 7840 |
| }, |
| { |
| "epoch": 196.25, |
| "grad_norm": 0.10673101246356964, |
| "learning_rate": 9.977825380819135e-05, |
| "loss": 0.005, |
| "step": 7850 |
| }, |
| { |
| "epoch": 196.5, |
| "grad_norm": 0.12118956446647644, |
| "learning_rate": 9.97766955781922e-05, |
| "loss": 0.005, |
| "step": 7860 |
| }, |
| { |
| "epoch": 196.75, |
| "grad_norm": 0.14345556497573853, |
| "learning_rate": 9.977513190468848e-05, |
| "loss": 0.0044, |
| "step": 7870 |
| }, |
| { |
| "epoch": 197.0, |
| "grad_norm": 0.13606630265712738, |
| "learning_rate": 9.977356278785116e-05, |
| "loss": 0.0044, |
| "step": 7880 |
| }, |
| { |
| "epoch": 197.25, |
| "grad_norm": 0.14485426247119904, |
| "learning_rate": 9.977198822785184e-05, |
| "loss": 0.0048, |
| "step": 7890 |
| }, |
| { |
| "epoch": 197.5, |
| "grad_norm": 0.11165472120046616, |
| "learning_rate": 9.977040822486273e-05, |
| "loss": 0.0045, |
| "step": 7900 |
| }, |
| { |
| "epoch": 197.75, |
| "grad_norm": 0.11101426929235458, |
| "learning_rate": 9.97688227790566e-05, |
| "loss": 0.0048, |
| "step": 7910 |
| }, |
| { |
| "epoch": 198.0, |
| "grad_norm": 0.1383507251739502, |
| "learning_rate": 9.976723189060684e-05, |
| "loss": 0.0048, |
| "step": 7920 |
| }, |
| { |
| "epoch": 198.25, |
| "grad_norm": 0.07337084412574768, |
| "learning_rate": 9.976563555968742e-05, |
| "loss": 0.0044, |
| "step": 7930 |
| }, |
| { |
| "epoch": 198.5, |
| "grad_norm": 0.10197046399116516, |
| "learning_rate": 9.976403378647292e-05, |
| "loss": 0.0052, |
| "step": 7940 |
| }, |
| { |
| "epoch": 198.75, |
| "grad_norm": 0.08910421282052994, |
| "learning_rate": 9.97624265711385e-05, |
| "loss": 0.0052, |
| "step": 7950 |
| }, |
| { |
| "epoch": 199.0, |
| "grad_norm": 0.12083287537097931, |
| "learning_rate": 9.976081391385993e-05, |
| "loss": 0.0052, |
| "step": 7960 |
| }, |
| { |
| "epoch": 199.25, |
| "grad_norm": 0.08918462693691254, |
| "learning_rate": 9.975919581481356e-05, |
| "loss": 0.0051, |
| "step": 7970 |
| }, |
| { |
| "epoch": 199.5, |
| "grad_norm": 0.10877599567174911, |
| "learning_rate": 9.975757227417634e-05, |
| "loss": 0.0047, |
| "step": 7980 |
| }, |
| { |
| "epoch": 199.75, |
| "grad_norm": 0.09586022794246674, |
| "learning_rate": 9.975594329212586e-05, |
| "loss": 0.005, |
| "step": 7990 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.15994898974895477, |
| "learning_rate": 9.97543088688402e-05, |
| "loss": 0.0049, |
| "step": 8000 |
| }, |
| { |
| "epoch": 200.25, |
| "grad_norm": 0.14788265526294708, |
| "learning_rate": 9.975266900449814e-05, |
| "loss": 0.0056, |
| "step": 8010 |
| }, |
| { |
| "epoch": 200.5, |
| "grad_norm": 0.11368973553180695, |
| "learning_rate": 9.975102369927898e-05, |
| "loss": 0.0045, |
| "step": 8020 |
| }, |
| { |
| "epoch": 200.75, |
| "grad_norm": 0.14189907908439636, |
| "learning_rate": 9.974937295336269e-05, |
| "loss": 0.005, |
| "step": 8030 |
| }, |
| { |
| "epoch": 201.0, |
| "grad_norm": 0.12064416706562042, |
| "learning_rate": 9.974771676692975e-05, |
| "loss": 0.0049, |
| "step": 8040 |
| }, |
| { |
| "epoch": 201.25, |
| "grad_norm": 0.09696459025144577, |
| "learning_rate": 9.974605514016131e-05, |
| "loss": 0.0043, |
| "step": 8050 |
| }, |
| { |
| "epoch": 201.5, |
| "grad_norm": 0.10713174939155579, |
| "learning_rate": 9.974438807323907e-05, |
| "loss": 0.0046, |
| "step": 8060 |
| }, |
| { |
| "epoch": 201.75, |
| "grad_norm": 0.10264813154935837, |
| "learning_rate": 9.974271556634535e-05, |
| "loss": 0.0044, |
| "step": 8070 |
| }, |
| { |
| "epoch": 202.0, |
| "grad_norm": 0.08025900274515152, |
| "learning_rate": 9.974103761966302e-05, |
| "loss": 0.0055, |
| "step": 8080 |
| }, |
| { |
| "epoch": 202.25, |
| "grad_norm": 0.13613669574260712, |
| "learning_rate": 9.973935423337563e-05, |
| "loss": 0.005, |
| "step": 8090 |
| }, |
| { |
| "epoch": 202.5, |
| "grad_norm": 0.11066287010908127, |
| "learning_rate": 9.973766540766722e-05, |
| "loss": 0.005, |
| "step": 8100 |
| }, |
| { |
| "epoch": 202.75, |
| "grad_norm": 0.09373009204864502, |
| "learning_rate": 9.97359711427225e-05, |
| "loss": 0.0063, |
| "step": 8110 |
| }, |
| { |
| "epoch": 203.0, |
| "grad_norm": 0.10568234324455261, |
| "learning_rate": 9.973427143872677e-05, |
| "loss": 0.0049, |
| "step": 8120 |
| }, |
| { |
| "epoch": 203.25, |
| "grad_norm": 0.08303306251764297, |
| "learning_rate": 9.973256629586589e-05, |
| "loss": 0.0056, |
| "step": 8130 |
| }, |
| { |
| "epoch": 203.5, |
| "grad_norm": 0.08859831839799881, |
| "learning_rate": 9.973085571432632e-05, |
| "loss": 0.0055, |
| "step": 8140 |
| }, |
| { |
| "epoch": 203.75, |
| "grad_norm": 0.12644809484481812, |
| "learning_rate": 9.972913969429513e-05, |
| "loss": 0.0056, |
| "step": 8150 |
| }, |
| { |
| "epoch": 204.0, |
| "grad_norm": 0.12281641364097595, |
| "learning_rate": 9.972741823596e-05, |
| "loss": 0.0052, |
| "step": 8160 |
| }, |
| { |
| "epoch": 204.25, |
| "grad_norm": 0.1079707145690918, |
| "learning_rate": 9.972569133950917e-05, |
| "loss": 0.0048, |
| "step": 8170 |
| }, |
| { |
| "epoch": 204.5, |
| "grad_norm": 0.13612797856330872, |
| "learning_rate": 9.972395900513151e-05, |
| "loss": 0.0047, |
| "step": 8180 |
| }, |
| { |
| "epoch": 204.75, |
| "grad_norm": 0.1213703528046608, |
| "learning_rate": 9.972222123301645e-05, |
| "loss": 0.0052, |
| "step": 8190 |
| }, |
| { |
| "epoch": 205.0, |
| "grad_norm": 0.10602298378944397, |
| "learning_rate": 9.972047802335403e-05, |
| "loss": 0.0051, |
| "step": 8200 |
| }, |
| { |
| "epoch": 205.25, |
| "grad_norm": 0.09890609234571457, |
| "learning_rate": 9.971872937633488e-05, |
| "loss": 0.0045, |
| "step": 8210 |
| }, |
| { |
| "epoch": 205.5, |
| "grad_norm": 0.10948968678712845, |
| "learning_rate": 9.971697529215024e-05, |
| "loss": 0.005, |
| "step": 8220 |
| }, |
| { |
| "epoch": 205.75, |
| "grad_norm": 0.11318536102771759, |
| "learning_rate": 9.971521577099192e-05, |
| "loss": 0.005, |
| "step": 8230 |
| }, |
| { |
| "epoch": 206.0, |
| "grad_norm": 0.14533638954162598, |
| "learning_rate": 9.971345081305236e-05, |
| "loss": 0.0051, |
| "step": 8240 |
| }, |
| { |
| "epoch": 206.25, |
| "grad_norm": 0.16806240379810333, |
| "learning_rate": 9.971168041852456e-05, |
| "loss": 0.0047, |
| "step": 8250 |
| }, |
| { |
| "epoch": 206.5, |
| "grad_norm": 0.12269231677055359, |
| "learning_rate": 9.970990458760215e-05, |
| "loss": 0.0044, |
| "step": 8260 |
| }, |
| { |
| "epoch": 206.75, |
| "grad_norm": 0.19708134233951569, |
| "learning_rate": 9.970812332047929e-05, |
| "loss": 0.0045, |
| "step": 8270 |
| }, |
| { |
| "epoch": 207.0, |
| "grad_norm": 0.16831305623054504, |
| "learning_rate": 9.97063366173508e-05, |
| "loss": 0.0046, |
| "step": 8280 |
| }, |
| { |
| "epoch": 207.25, |
| "grad_norm": 0.09102386981248856, |
| "learning_rate": 9.970454447841207e-05, |
| "loss": 0.0046, |
| "step": 8290 |
| }, |
| { |
| "epoch": 207.5, |
| "grad_norm": 0.1855050027370453, |
| "learning_rate": 9.970274690385909e-05, |
| "loss": 0.005, |
| "step": 8300 |
| }, |
| { |
| "epoch": 207.75, |
| "grad_norm": 0.1450817734003067, |
| "learning_rate": 9.970094389388844e-05, |
| "loss": 0.0054, |
| "step": 8310 |
| }, |
| { |
| "epoch": 208.0, |
| "grad_norm": 0.1687973290681839, |
| "learning_rate": 9.969913544869728e-05, |
| "loss": 0.0049, |
| "step": 8320 |
| }, |
| { |
| "epoch": 208.25, |
| "grad_norm": 0.12717373669147491, |
| "learning_rate": 9.96973215684834e-05, |
| "loss": 0.005, |
| "step": 8330 |
| }, |
| { |
| "epoch": 208.5, |
| "grad_norm": 0.1274053007364273, |
| "learning_rate": 9.969550225344513e-05, |
| "loss": 0.0051, |
| "step": 8340 |
| }, |
| { |
| "epoch": 208.75, |
| "grad_norm": 0.15039260685443878, |
| "learning_rate": 9.969367750378147e-05, |
| "loss": 0.0036, |
| "step": 8350 |
| }, |
| { |
| "epoch": 209.0, |
| "grad_norm": 0.12453170120716095, |
| "learning_rate": 9.969184731969194e-05, |
| "loss": 0.0052, |
| "step": 8360 |
| }, |
| { |
| "epoch": 209.25, |
| "grad_norm": 0.10263194143772125, |
| "learning_rate": 9.96900117013767e-05, |
| "loss": 0.005, |
| "step": 8370 |
| }, |
| { |
| "epoch": 209.5, |
| "grad_norm": 0.10451891273260117, |
| "learning_rate": 9.96881706490365e-05, |
| "loss": 0.0048, |
| "step": 8380 |
| }, |
| { |
| "epoch": 209.75, |
| "grad_norm": 0.11797595024108887, |
| "learning_rate": 9.968632416287265e-05, |
| "loss": 0.0047, |
| "step": 8390 |
| }, |
| { |
| "epoch": 210.0, |
| "grad_norm": 0.1403282731771469, |
| "learning_rate": 9.96844722430871e-05, |
| "loss": 0.0042, |
| "step": 8400 |
| }, |
| { |
| "epoch": 210.25, |
| "grad_norm": 0.12103106081485748, |
| "learning_rate": 9.968261488988235e-05, |
| "loss": 0.0052, |
| "step": 8410 |
| }, |
| { |
| "epoch": 210.5, |
| "grad_norm": 0.09587433189153671, |
| "learning_rate": 9.968075210346155e-05, |
| "loss": 0.0043, |
| "step": 8420 |
| }, |
| { |
| "epoch": 210.75, |
| "grad_norm": 0.09224146604537964, |
| "learning_rate": 9.967888388402839e-05, |
| "loss": 0.0049, |
| "step": 8430 |
| }, |
| { |
| "epoch": 211.0, |
| "grad_norm": 0.09856747835874557, |
| "learning_rate": 9.967701023178717e-05, |
| "loss": 0.0045, |
| "step": 8440 |
| }, |
| { |
| "epoch": 211.25, |
| "grad_norm": 0.1124839186668396, |
| "learning_rate": 9.967513114694282e-05, |
| "loss": 0.0045, |
| "step": 8450 |
| }, |
| { |
| "epoch": 211.5, |
| "grad_norm": 0.10403812676668167, |
| "learning_rate": 9.967324662970079e-05, |
| "loss": 0.0043, |
| "step": 8460 |
| }, |
| { |
| "epoch": 211.75, |
| "grad_norm": 0.09139248728752136, |
| "learning_rate": 9.96713566802672e-05, |
| "loss": 0.0047, |
| "step": 8470 |
| }, |
| { |
| "epoch": 212.0, |
| "grad_norm": 0.08583129942417145, |
| "learning_rate": 9.966946129884873e-05, |
| "loss": 0.0044, |
| "step": 8480 |
| }, |
| { |
| "epoch": 212.25, |
| "grad_norm": 0.10125056654214859, |
| "learning_rate": 9.966756048565265e-05, |
| "loss": 0.0047, |
| "step": 8490 |
| }, |
| { |
| "epoch": 212.5, |
| "grad_norm": 0.1086161881685257, |
| "learning_rate": 9.966565424088681e-05, |
| "loss": 0.0042, |
| "step": 8500 |
| }, |
| { |
| "epoch": 212.75, |
| "grad_norm": 0.1101728305220604, |
| "learning_rate": 9.96637425647597e-05, |
| "loss": 0.0045, |
| "step": 8510 |
| }, |
| { |
| "epoch": 213.0, |
| "grad_norm": 0.11788172274827957, |
| "learning_rate": 9.966182545748038e-05, |
| "loss": 0.0045, |
| "step": 8520 |
| }, |
| { |
| "epoch": 213.25, |
| "grad_norm": 0.10477957874536514, |
| "learning_rate": 9.96599029192585e-05, |
| "loss": 0.0046, |
| "step": 8530 |
| }, |
| { |
| "epoch": 213.5, |
| "grad_norm": 0.10119979828596115, |
| "learning_rate": 9.965797495030428e-05, |
| "loss": 0.0044, |
| "step": 8540 |
| }, |
| { |
| "epoch": 213.75, |
| "grad_norm": 0.10286411643028259, |
| "learning_rate": 9.96560415508286e-05, |
| "loss": 0.0048, |
| "step": 8550 |
| }, |
| { |
| "epoch": 214.0, |
| "grad_norm": 0.11209660023450851, |
| "learning_rate": 9.965410272104286e-05, |
| "loss": 0.0041, |
| "step": 8560 |
| }, |
| { |
| "epoch": 214.25, |
| "grad_norm": 0.15167979896068573, |
| "learning_rate": 9.96521584611591e-05, |
| "loss": 0.0046, |
| "step": 8570 |
| }, |
| { |
| "epoch": 214.5, |
| "grad_norm": 0.1361161321401596, |
| "learning_rate": 9.965020877138994e-05, |
| "loss": 0.0055, |
| "step": 8580 |
| }, |
| { |
| "epoch": 214.75, |
| "grad_norm": 0.1173548623919487, |
| "learning_rate": 9.964825365194861e-05, |
| "loss": 0.0057, |
| "step": 8590 |
| }, |
| { |
| "epoch": 215.0, |
| "grad_norm": 0.10933533310890198, |
| "learning_rate": 9.96462931030489e-05, |
| "loss": 0.0047, |
| "step": 8600 |
| }, |
| { |
| "epoch": 215.25, |
| "grad_norm": 0.1078348457813263, |
| "learning_rate": 9.96443271249052e-05, |
| "loss": 0.0044, |
| "step": 8610 |
| }, |
| { |
| "epoch": 215.5, |
| "grad_norm": 0.12997107207775116, |
| "learning_rate": 9.964235571773255e-05, |
| "loss": 0.0042, |
| "step": 8620 |
| }, |
| { |
| "epoch": 215.75, |
| "grad_norm": 0.08457321673631668, |
| "learning_rate": 9.96403788817465e-05, |
| "loss": 0.0044, |
| "step": 8630 |
| }, |
| { |
| "epoch": 216.0, |
| "grad_norm": 0.15407255291938782, |
| "learning_rate": 9.963839661716325e-05, |
| "loss": 0.0055, |
| "step": 8640 |
| }, |
| { |
| "epoch": 216.25, |
| "grad_norm": 0.1310333013534546, |
| "learning_rate": 9.963640892419958e-05, |
| "loss": 0.0051, |
| "step": 8650 |
| }, |
| { |
| "epoch": 216.5, |
| "grad_norm": 0.08272965252399445, |
| "learning_rate": 9.963441580307286e-05, |
| "loss": 0.0043, |
| "step": 8660 |
| }, |
| { |
| "epoch": 216.75, |
| "grad_norm": 0.0824747085571289, |
| "learning_rate": 9.963241725400104e-05, |
| "loss": 0.0042, |
| "step": 8670 |
| }, |
| { |
| "epoch": 217.0, |
| "grad_norm": 0.08268961310386658, |
| "learning_rate": 9.963041327720271e-05, |
| "loss": 0.0037, |
| "step": 8680 |
| }, |
| { |
| "epoch": 217.25, |
| "grad_norm": 0.07103843986988068, |
| "learning_rate": 9.962840387289697e-05, |
| "loss": 0.0051, |
| "step": 8690 |
| }, |
| { |
| "epoch": 217.5, |
| "grad_norm": 0.09084175527095795, |
| "learning_rate": 9.962638904130363e-05, |
| "loss": 0.0044, |
| "step": 8700 |
| }, |
| { |
| "epoch": 217.75, |
| "grad_norm": 0.10540860146284103, |
| "learning_rate": 9.962436878264298e-05, |
| "loss": 0.0047, |
| "step": 8710 |
| }, |
| { |
| "epoch": 218.0, |
| "grad_norm": 0.10905332863330841, |
| "learning_rate": 9.962234309713598e-05, |
| "loss": 0.0046, |
| "step": 8720 |
| }, |
| { |
| "epoch": 218.25, |
| "grad_norm": 0.1034293845295906, |
| "learning_rate": 9.962031198500414e-05, |
| "loss": 0.0041, |
| "step": 8730 |
| }, |
| { |
| "epoch": 218.5, |
| "grad_norm": 0.09964323043823242, |
| "learning_rate": 9.961827544646958e-05, |
| "loss": 0.0045, |
| "step": 8740 |
| }, |
| { |
| "epoch": 218.75, |
| "grad_norm": 0.09900356829166412, |
| "learning_rate": 9.961623348175501e-05, |
| "loss": 0.0045, |
| "step": 8750 |
| }, |
| { |
| "epoch": 219.0, |
| "grad_norm": 0.10930373519659042, |
| "learning_rate": 9.961418609108377e-05, |
| "loss": 0.0046, |
| "step": 8760 |
| }, |
| { |
| "epoch": 219.25, |
| "grad_norm": 0.09773886203765869, |
| "learning_rate": 9.961213327467971e-05, |
| "loss": 0.0046, |
| "step": 8770 |
| }, |
| { |
| "epoch": 219.5, |
| "grad_norm": 0.12222948670387268, |
| "learning_rate": 9.961007503276736e-05, |
| "loss": 0.0045, |
| "step": 8780 |
| }, |
| { |
| "epoch": 219.75, |
| "grad_norm": 0.07556351274251938, |
| "learning_rate": 9.960801136557179e-05, |
| "loss": 0.004, |
| "step": 8790 |
| }, |
| { |
| "epoch": 220.0, |
| "grad_norm": 0.08961526304483414, |
| "learning_rate": 9.960594227331866e-05, |
| "loss": 0.0037, |
| "step": 8800 |
| }, |
| { |
| "epoch": 220.25, |
| "grad_norm": 0.10761409252882004, |
| "learning_rate": 9.960386775623429e-05, |
| "loss": 0.0048, |
| "step": 8810 |
| }, |
| { |
| "epoch": 220.5, |
| "grad_norm": 0.07478926330804825, |
| "learning_rate": 9.96017878145455e-05, |
| "loss": 0.0042, |
| "step": 8820 |
| }, |
| { |
| "epoch": 220.75, |
| "grad_norm": 0.09129362553358078, |
| "learning_rate": 9.959970244847977e-05, |
| "loss": 0.0042, |
| "step": 8830 |
| }, |
| { |
| "epoch": 221.0, |
| "grad_norm": 0.10972625017166138, |
| "learning_rate": 9.959761165826518e-05, |
| "loss": 0.0047, |
| "step": 8840 |
| }, |
| { |
| "epoch": 221.25, |
| "grad_norm": 0.10915403813123703, |
| "learning_rate": 9.959551544413033e-05, |
| "loss": 0.0049, |
| "step": 8850 |
| }, |
| { |
| "epoch": 221.5, |
| "grad_norm": 0.13377898931503296, |
| "learning_rate": 9.959341380630448e-05, |
| "loss": 0.005, |
| "step": 8860 |
| }, |
| { |
| "epoch": 221.75, |
| "grad_norm": 0.11009661853313446, |
| "learning_rate": 9.959130674501746e-05, |
| "loss": 0.0044, |
| "step": 8870 |
| }, |
| { |
| "epoch": 222.0, |
| "grad_norm": 0.11041966080665588, |
| "learning_rate": 9.958919426049968e-05, |
| "loss": 0.0045, |
| "step": 8880 |
| }, |
| { |
| "epoch": 222.25, |
| "grad_norm": 0.13888955116271973, |
| "learning_rate": 9.958707635298219e-05, |
| "loss": 0.0045, |
| "step": 8890 |
| }, |
| { |
| "epoch": 222.5, |
| "grad_norm": 0.13241493701934814, |
| "learning_rate": 9.958495302269657e-05, |
| "loss": 0.0045, |
| "step": 8900 |
| }, |
| { |
| "epoch": 222.75, |
| "grad_norm": 0.090728759765625, |
| "learning_rate": 9.958282426987503e-05, |
| "loss": 0.0043, |
| "step": 8910 |
| }, |
| { |
| "epoch": 223.0, |
| "grad_norm": 0.09145260602235794, |
| "learning_rate": 9.95806900947504e-05, |
| "loss": 0.0044, |
| "step": 8920 |
| }, |
| { |
| "epoch": 223.25, |
| "grad_norm": 0.09865462779998779, |
| "learning_rate": 9.957855049755604e-05, |
| "loss": 0.0046, |
| "step": 8930 |
| }, |
| { |
| "epoch": 223.5, |
| "grad_norm": 0.1020873486995697, |
| "learning_rate": 9.957640547852593e-05, |
| "loss": 0.0041, |
| "step": 8940 |
| }, |
| { |
| "epoch": 223.75, |
| "grad_norm": 0.1301255077123642, |
| "learning_rate": 9.957425503789466e-05, |
| "loss": 0.0041, |
| "step": 8950 |
| }, |
| { |
| "epoch": 224.0, |
| "grad_norm": 0.12889862060546875, |
| "learning_rate": 9.957209917589738e-05, |
| "loss": 0.0045, |
| "step": 8960 |
| }, |
| { |
| "epoch": 224.25, |
| "grad_norm": 0.11002857983112335, |
| "learning_rate": 9.956993789276987e-05, |
| "loss": 0.0044, |
| "step": 8970 |
| }, |
| { |
| "epoch": 224.5, |
| "grad_norm": 0.08376175165176392, |
| "learning_rate": 9.956777118874847e-05, |
| "loss": 0.0049, |
| "step": 8980 |
| }, |
| { |
| "epoch": 224.75, |
| "grad_norm": 0.1019083708524704, |
| "learning_rate": 9.956559906407016e-05, |
| "loss": 0.0042, |
| "step": 8990 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 0.08716961741447449, |
| "learning_rate": 9.956342151897245e-05, |
| "loss": 0.0054, |
| "step": 9000 |
| }, |
| { |
| "epoch": 225.25, |
| "grad_norm": 0.08603795617818832, |
| "learning_rate": 9.956123855369346e-05, |
| "loss": 0.0039, |
| "step": 9010 |
| }, |
| { |
| "epoch": 225.5, |
| "grad_norm": 0.11349231004714966, |
| "learning_rate": 9.955905016847196e-05, |
| "loss": 0.0046, |
| "step": 9020 |
| }, |
| { |
| "epoch": 225.75, |
| "grad_norm": 0.10906950384378433, |
| "learning_rate": 9.955685636354723e-05, |
| "loss": 0.0043, |
| "step": 9030 |
| }, |
| { |
| "epoch": 226.0, |
| "grad_norm": 0.08074238151311874, |
| "learning_rate": 9.95546571391592e-05, |
| "loss": 0.0041, |
| "step": 9040 |
| }, |
| { |
| "epoch": 226.25, |
| "grad_norm": 0.1182880699634552, |
| "learning_rate": 9.955245249554837e-05, |
| "loss": 0.0045, |
| "step": 9050 |
| }, |
| { |
| "epoch": 226.5, |
| "grad_norm": 0.11833614856004715, |
| "learning_rate": 9.955024243295582e-05, |
| "loss": 0.0047, |
| "step": 9060 |
| }, |
| { |
| "epoch": 226.75, |
| "grad_norm": 0.12007834017276764, |
| "learning_rate": 9.954802695162328e-05, |
| "loss": 0.0048, |
| "step": 9070 |
| }, |
| { |
| "epoch": 227.0, |
| "grad_norm": 0.10048998892307281, |
| "learning_rate": 9.954580605179302e-05, |
| "loss": 0.0042, |
| "step": 9080 |
| }, |
| { |
| "epoch": 227.25, |
| "grad_norm": 0.10070258378982544, |
| "learning_rate": 9.954357973370788e-05, |
| "loss": 0.0042, |
| "step": 9090 |
| }, |
| { |
| "epoch": 227.5, |
| "grad_norm": 0.10562805086374283, |
| "learning_rate": 9.954134799761135e-05, |
| "loss": 0.006, |
| "step": 9100 |
| }, |
| { |
| "epoch": 227.75, |
| "grad_norm": 0.11274793744087219, |
| "learning_rate": 9.953911084374748e-05, |
| "loss": 0.0044, |
| "step": 9110 |
| }, |
| { |
| "epoch": 228.0, |
| "grad_norm": 0.15728285908699036, |
| "learning_rate": 9.953686827236093e-05, |
| "loss": 0.0045, |
| "step": 9120 |
| }, |
| { |
| "epoch": 228.25, |
| "grad_norm": 0.11849649250507355, |
| "learning_rate": 9.953462028369695e-05, |
| "loss": 0.0046, |
| "step": 9130 |
| }, |
| { |
| "epoch": 228.5, |
| "grad_norm": 0.10420042276382446, |
| "learning_rate": 9.953236687800136e-05, |
| "loss": 0.0046, |
| "step": 9140 |
| }, |
| { |
| "epoch": 228.75, |
| "grad_norm": 0.10627323389053345, |
| "learning_rate": 9.95301080555206e-05, |
| "loss": 0.0044, |
| "step": 9150 |
| }, |
| { |
| "epoch": 229.0, |
| "grad_norm": 0.11721424013376236, |
| "learning_rate": 9.952784381650171e-05, |
| "loss": 0.0051, |
| "step": 9160 |
| }, |
| { |
| "epoch": 229.25, |
| "grad_norm": 0.1566528081893921, |
| "learning_rate": 9.952557416119226e-05, |
| "loss": 0.0044, |
| "step": 9170 |
| }, |
| { |
| "epoch": 229.5, |
| "grad_norm": 0.12469837069511414, |
| "learning_rate": 9.95232990898405e-05, |
| "loss": 0.0046, |
| "step": 9180 |
| }, |
| { |
| "epoch": 229.75, |
| "grad_norm": 0.11129660159349442, |
| "learning_rate": 9.95210186026952e-05, |
| "loss": 0.0044, |
| "step": 9190 |
| }, |
| { |
| "epoch": 230.0, |
| "grad_norm": 0.1258237212896347, |
| "learning_rate": 9.951873270000576e-05, |
| "loss": 0.0044, |
| "step": 9200 |
| }, |
| { |
| "epoch": 230.25, |
| "grad_norm": 0.1056128740310669, |
| "learning_rate": 9.951644138202216e-05, |
| "loss": 0.0048, |
| "step": 9210 |
| }, |
| { |
| "epoch": 230.5, |
| "grad_norm": 0.12693685293197632, |
| "learning_rate": 9.951414464899498e-05, |
| "loss": 0.0045, |
| "step": 9220 |
| }, |
| { |
| "epoch": 230.75, |
| "grad_norm": 0.1353299915790558, |
| "learning_rate": 9.951184250117538e-05, |
| "loss": 0.0048, |
| "step": 9230 |
| }, |
| { |
| "epoch": 231.0, |
| "grad_norm": 0.14081411063671112, |
| "learning_rate": 9.950953493881513e-05, |
| "loss": 0.0046, |
| "step": 9240 |
| }, |
| { |
| "epoch": 231.25, |
| "grad_norm": 0.1451917141675949, |
| "learning_rate": 9.950722196216658e-05, |
| "loss": 0.0044, |
| "step": 9250 |
| }, |
| { |
| "epoch": 231.5, |
| "grad_norm": 0.11318142712116241, |
| "learning_rate": 9.950490357148265e-05, |
| "loss": 0.0048, |
| "step": 9260 |
| }, |
| { |
| "epoch": 231.75, |
| "grad_norm": 0.13487468659877777, |
| "learning_rate": 9.950257976701692e-05, |
| "loss": 0.0047, |
| "step": 9270 |
| }, |
| { |
| "epoch": 232.0, |
| "grad_norm": 0.1256389319896698, |
| "learning_rate": 9.950025054902348e-05, |
| "loss": 0.0042, |
| "step": 9280 |
| }, |
| { |
| "epoch": 232.25, |
| "grad_norm": 0.10590405017137527, |
| "learning_rate": 9.949791591775706e-05, |
| "loss": 0.004, |
| "step": 9290 |
| }, |
| { |
| "epoch": 232.5, |
| "grad_norm": 0.08192436397075653, |
| "learning_rate": 9.949557587347298e-05, |
| "loss": 0.0051, |
| "step": 9300 |
| }, |
| { |
| "epoch": 232.75, |
| "grad_norm": 0.12231657654047012, |
| "learning_rate": 9.949323041642713e-05, |
| "loss": 0.004, |
| "step": 9310 |
| }, |
| { |
| "epoch": 233.0, |
| "grad_norm": 0.08818791806697845, |
| "learning_rate": 9.949087954687602e-05, |
| "loss": 0.0047, |
| "step": 9320 |
| }, |
| { |
| "epoch": 233.25, |
| "grad_norm": 0.1179974153637886, |
| "learning_rate": 9.948852326507672e-05, |
| "loss": 0.0048, |
| "step": 9330 |
| }, |
| { |
| "epoch": 233.5, |
| "grad_norm": 0.09120003879070282, |
| "learning_rate": 9.948616157128694e-05, |
| "loss": 0.0041, |
| "step": 9340 |
| }, |
| { |
| "epoch": 233.75, |
| "grad_norm": 0.09142210334539413, |
| "learning_rate": 9.948379446576493e-05, |
| "loss": 0.0042, |
| "step": 9350 |
| }, |
| { |
| "epoch": 234.0, |
| "grad_norm": 0.15579193830490112, |
| "learning_rate": 9.948142194876952e-05, |
| "loss": 0.0044, |
| "step": 9360 |
| }, |
| { |
| "epoch": 234.25, |
| "grad_norm": 0.09148753434419632, |
| "learning_rate": 9.947904402056024e-05, |
| "loss": 0.0045, |
| "step": 9370 |
| }, |
| { |
| "epoch": 234.5, |
| "grad_norm": 0.10289934277534485, |
| "learning_rate": 9.947666068139708e-05, |
| "loss": 0.0055, |
| "step": 9380 |
| }, |
| { |
| "epoch": 234.75, |
| "grad_norm": 0.11406154930591583, |
| "learning_rate": 9.947427193154071e-05, |
| "loss": 0.0043, |
| "step": 9390 |
| }, |
| { |
| "epoch": 235.0, |
| "grad_norm": 0.13602414727210999, |
| "learning_rate": 9.947187777125233e-05, |
| "loss": 0.0055, |
| "step": 9400 |
| }, |
| { |
| "epoch": 235.25, |
| "grad_norm": 0.10251234471797943, |
| "learning_rate": 9.946947820079377e-05, |
| "loss": 0.0043, |
| "step": 9410 |
| }, |
| { |
| "epoch": 235.5, |
| "grad_norm": 0.11955104768276215, |
| "learning_rate": 9.946707322042747e-05, |
| "loss": 0.0038, |
| "step": 9420 |
| }, |
| { |
| "epoch": 235.75, |
| "grad_norm": 0.1253117173910141, |
| "learning_rate": 9.94646628304164e-05, |
| "loss": 0.0045, |
| "step": 9430 |
| }, |
| { |
| "epoch": 236.0, |
| "grad_norm": 0.13483910262584686, |
| "learning_rate": 9.946224703102418e-05, |
| "loss": 0.0046, |
| "step": 9440 |
| }, |
| { |
| "epoch": 236.25, |
| "grad_norm": 0.13329099118709564, |
| "learning_rate": 9.945982582251498e-05, |
| "loss": 0.0042, |
| "step": 9450 |
| }, |
| { |
| "epoch": 236.5, |
| "grad_norm": 0.11717459559440613, |
| "learning_rate": 9.94573992051536e-05, |
| "loss": 0.0046, |
| "step": 9460 |
| }, |
| { |
| "epoch": 236.75, |
| "grad_norm": 0.14019109308719635, |
| "learning_rate": 9.94549671792054e-05, |
| "loss": 0.0051, |
| "step": 9470 |
| }, |
| { |
| "epoch": 237.0, |
| "grad_norm": 0.13346461951732635, |
| "learning_rate": 9.945252974493635e-05, |
| "loss": 0.004, |
| "step": 9480 |
| }, |
| { |
| "epoch": 237.25, |
| "grad_norm": 0.14633530378341675, |
| "learning_rate": 9.9450086902613e-05, |
| "loss": 0.0045, |
| "step": 9490 |
| }, |
| { |
| "epoch": 237.5, |
| "grad_norm": 0.11529278010129929, |
| "learning_rate": 9.944763865250248e-05, |
| "loss": 0.0043, |
| "step": 9500 |
| }, |
| { |
| "epoch": 237.75, |
| "grad_norm": 0.11980587244033813, |
| "learning_rate": 9.944518499487254e-05, |
| "loss": 0.0039, |
| "step": 9510 |
| }, |
| { |
| "epoch": 238.0, |
| "grad_norm": 0.1261284202337265, |
| "learning_rate": 9.944272592999151e-05, |
| "loss": 0.0046, |
| "step": 9520 |
| }, |
| { |
| "epoch": 238.25, |
| "grad_norm": 0.09479007124900818, |
| "learning_rate": 9.94402614581283e-05, |
| "loss": 0.0037, |
| "step": 9530 |
| }, |
| { |
| "epoch": 238.5, |
| "grad_norm": 0.12620887160301208, |
| "learning_rate": 9.943779157955244e-05, |
| "loss": 0.0048, |
| "step": 9540 |
| }, |
| { |
| "epoch": 238.75, |
| "grad_norm": 0.1151387169957161, |
| "learning_rate": 9.943531629453403e-05, |
| "loss": 0.0043, |
| "step": 9550 |
| }, |
| { |
| "epoch": 239.0, |
| "grad_norm": 0.14887531101703644, |
| "learning_rate": 9.943283560334375e-05, |
| "loss": 0.0038, |
| "step": 9560 |
| }, |
| { |
| "epoch": 239.25, |
| "grad_norm": 0.13858802616596222, |
| "learning_rate": 9.943034950625288e-05, |
| "loss": 0.0038, |
| "step": 9570 |
| }, |
| { |
| "epoch": 239.5, |
| "grad_norm": 0.11963634192943573, |
| "learning_rate": 9.942785800353332e-05, |
| "loss": 0.0047, |
| "step": 9580 |
| }, |
| { |
| "epoch": 239.75, |
| "grad_norm": 0.13546070456504822, |
| "learning_rate": 9.942536109545751e-05, |
| "loss": 0.0041, |
| "step": 9590 |
| }, |
| { |
| "epoch": 240.0, |
| "grad_norm": 0.12385343760251999, |
| "learning_rate": 9.942285878229853e-05, |
| "loss": 0.0038, |
| "step": 9600 |
| }, |
| { |
| "epoch": 240.25, |
| "grad_norm": 0.10541026294231415, |
| "learning_rate": 9.942035106433001e-05, |
| "loss": 0.0042, |
| "step": 9610 |
| }, |
| { |
| "epoch": 240.5, |
| "grad_norm": 0.10453888773918152, |
| "learning_rate": 9.94178379418262e-05, |
| "loss": 0.0047, |
| "step": 9620 |
| }, |
| { |
| "epoch": 240.75, |
| "grad_norm": 0.10397256165742874, |
| "learning_rate": 9.941531941506194e-05, |
| "loss": 0.0041, |
| "step": 9630 |
| }, |
| { |
| "epoch": 241.0, |
| "grad_norm": 0.09260208904743195, |
| "learning_rate": 9.941279548431263e-05, |
| "loss": 0.0042, |
| "step": 9640 |
| }, |
| { |
| "epoch": 241.25, |
| "grad_norm": 0.11214695125818253, |
| "learning_rate": 9.941026614985431e-05, |
| "loss": 0.0044, |
| "step": 9650 |
| }, |
| { |
| "epoch": 241.5, |
| "grad_norm": 0.10193338245153427, |
| "learning_rate": 9.940773141196357e-05, |
| "loss": 0.0039, |
| "step": 9660 |
| }, |
| { |
| "epoch": 241.75, |
| "grad_norm": 0.09992244839668274, |
| "learning_rate": 9.94051912709176e-05, |
| "loss": 0.004, |
| "step": 9670 |
| }, |
| { |
| "epoch": 242.0, |
| "grad_norm": 0.1305544674396515, |
| "learning_rate": 9.940264572699421e-05, |
| "loss": 0.0037, |
| "step": 9680 |
| }, |
| { |
| "epoch": 242.25, |
| "grad_norm": 0.11810918897390366, |
| "learning_rate": 9.940009478047174e-05, |
| "loss": 0.0047, |
| "step": 9690 |
| }, |
| { |
| "epoch": 242.5, |
| "grad_norm": 0.08566198498010635, |
| "learning_rate": 9.939753843162918e-05, |
| "loss": 0.0036, |
| "step": 9700 |
| }, |
| { |
| "epoch": 242.75, |
| "grad_norm": 0.09429420530796051, |
| "learning_rate": 9.939497668074609e-05, |
| "loss": 0.0044, |
| "step": 9710 |
| }, |
| { |
| "epoch": 243.0, |
| "grad_norm": 0.09578400105237961, |
| "learning_rate": 9.93924095281026e-05, |
| "loss": 0.0046, |
| "step": 9720 |
| }, |
| { |
| "epoch": 243.25, |
| "grad_norm": 0.09713154286146164, |
| "learning_rate": 9.938983697397948e-05, |
| "loss": 0.004, |
| "step": 9730 |
| }, |
| { |
| "epoch": 243.5, |
| "grad_norm": 0.10905840992927551, |
| "learning_rate": 9.938725901865805e-05, |
| "loss": 0.004, |
| "step": 9740 |
| }, |
| { |
| "epoch": 243.75, |
| "grad_norm": 0.11417087912559509, |
| "learning_rate": 9.93846756624202e-05, |
| "loss": 0.0043, |
| "step": 9750 |
| }, |
| { |
| "epoch": 244.0, |
| "grad_norm": 0.1219557598233223, |
| "learning_rate": 9.938208690554849e-05, |
| "loss": 0.0039, |
| "step": 9760 |
| }, |
| { |
| "epoch": 244.25, |
| "grad_norm": 0.11892379820346832, |
| "learning_rate": 9.9379492748326e-05, |
| "loss": 0.0039, |
| "step": 9770 |
| }, |
| { |
| "epoch": 244.5, |
| "grad_norm": 0.13812822103500366, |
| "learning_rate": 9.937689319103641e-05, |
| "loss": 0.0037, |
| "step": 9780 |
| }, |
| { |
| "epoch": 244.75, |
| "grad_norm": 0.08162184059619904, |
| "learning_rate": 9.937428823396404e-05, |
| "loss": 0.0039, |
| "step": 9790 |
| }, |
| { |
| "epoch": 245.0, |
| "grad_norm": 0.11298015713691711, |
| "learning_rate": 9.937167787739372e-05, |
| "loss": 0.0041, |
| "step": 9800 |
| }, |
| { |
| "epoch": 245.25, |
| "grad_norm": 0.10408628731966019, |
| "learning_rate": 9.936906212161095e-05, |
| "loss": 0.0044, |
| "step": 9810 |
| }, |
| { |
| "epoch": 245.5, |
| "grad_norm": 0.1286010593175888, |
| "learning_rate": 9.936644096690176e-05, |
| "loss": 0.0044, |
| "step": 9820 |
| }, |
| { |
| "epoch": 245.75, |
| "grad_norm": 0.12994638085365295, |
| "learning_rate": 9.936381441355282e-05, |
| "loss": 0.0049, |
| "step": 9830 |
| }, |
| { |
| "epoch": 246.0, |
| "grad_norm": 0.13015589118003845, |
| "learning_rate": 9.936118246185136e-05, |
| "loss": 0.005, |
| "step": 9840 |
| }, |
| { |
| "epoch": 246.25, |
| "grad_norm": 0.1136460080742836, |
| "learning_rate": 9.935854511208518e-05, |
| "loss": 0.0047, |
| "step": 9850 |
| }, |
| { |
| "epoch": 246.5, |
| "grad_norm": 0.12648144364356995, |
| "learning_rate": 9.935590236454272e-05, |
| "loss": 0.0044, |
| "step": 9860 |
| }, |
| { |
| "epoch": 246.75, |
| "grad_norm": 0.11533254384994507, |
| "learning_rate": 9.935325421951298e-05, |
| "loss": 0.0042, |
| "step": 9870 |
| }, |
| { |
| "epoch": 247.0, |
| "grad_norm": 0.11005179584026337, |
| "learning_rate": 9.935060067728557e-05, |
| "loss": 0.0032, |
| "step": 9880 |
| }, |
| { |
| "epoch": 247.25, |
| "grad_norm": 0.1126861721277237, |
| "learning_rate": 9.934794173815067e-05, |
| "loss": 0.0043, |
| "step": 9890 |
| }, |
| { |
| "epoch": 247.5, |
| "grad_norm": 0.11560443043708801, |
| "learning_rate": 9.934527740239906e-05, |
| "loss": 0.0045, |
| "step": 9900 |
| }, |
| { |
| "epoch": 247.75, |
| "grad_norm": 0.11774428933858871, |
| "learning_rate": 9.934260767032209e-05, |
| "loss": 0.0043, |
| "step": 9910 |
| }, |
| { |
| "epoch": 248.0, |
| "grad_norm": 0.11532504856586456, |
| "learning_rate": 9.933993254221172e-05, |
| "loss": 0.0045, |
| "step": 9920 |
| }, |
| { |
| "epoch": 248.25, |
| "grad_norm": 0.08799305558204651, |
| "learning_rate": 9.933725201836053e-05, |
| "loss": 0.0049, |
| "step": 9930 |
| }, |
| { |
| "epoch": 248.5, |
| "grad_norm": 0.11146137118339539, |
| "learning_rate": 9.933456609906162e-05, |
| "loss": 0.0039, |
| "step": 9940 |
| }, |
| { |
| "epoch": 248.75, |
| "grad_norm": 0.08155910670757294, |
| "learning_rate": 9.933187478460875e-05, |
| "loss": 0.0047, |
| "step": 9950 |
| }, |
| { |
| "epoch": 249.0, |
| "grad_norm": 0.08949435502290726, |
| "learning_rate": 9.93291780752962e-05, |
| "loss": 0.0048, |
| "step": 9960 |
| }, |
| { |
| "epoch": 249.25, |
| "grad_norm": 0.10293368995189667, |
| "learning_rate": 9.932647597141893e-05, |
| "loss": 0.004, |
| "step": 9970 |
| }, |
| { |
| "epoch": 249.5, |
| "grad_norm": 0.09809095412492752, |
| "learning_rate": 9.932376847327239e-05, |
| "loss": 0.004, |
| "step": 9980 |
| }, |
| { |
| "epoch": 249.75, |
| "grad_norm": 0.08202514797449112, |
| "learning_rate": 9.932105558115268e-05, |
| "loss": 0.0036, |
| "step": 9990 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.12790925800800323, |
| "learning_rate": 9.931833729535651e-05, |
| "loss": 0.0048, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2500, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|