| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002, |
| "grad_norm": 0.09511563926935196, |
| "learning_rate": 2e-08, |
| "loss": 0.5518, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 0.10312563925981522, |
| "learning_rate": 4e-08, |
| "loss": 0.3882, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 0.38763827085494995, |
| "learning_rate": 6e-08, |
| "loss": 0.5202, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 0.0, |
| "learning_rate": 8e-08, |
| "loss": 0.3657, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.27898716926574707, |
| "learning_rate": 1e-07, |
| "loss": 0.5781, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 0.12367476522922516, |
| "learning_rate": 1.2e-07, |
| "loss": 0.3995, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 0.34459570050239563, |
| "learning_rate": 1.4e-07, |
| "loss": 0.4881, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6e-07, |
| "loss": 0.4435, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 0.1325724720954895, |
| "learning_rate": 1.8e-07, |
| "loss": 0.3328, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.14823980629444122, |
| "learning_rate": 2e-07, |
| "loss": 0.3558, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 0.10182484984397888, |
| "learning_rate": 2.1999999999999998e-07, |
| "loss": 0.429, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 0.2652245759963989, |
| "learning_rate": 2.4e-07, |
| "loss": 0.5915, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 0.17615041136741638, |
| "learning_rate": 2.6e-07, |
| "loss": 0.5184, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8e-07, |
| "loss": 0.5047, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.17482487857341766, |
| "learning_rate": 3e-07, |
| "loss": 0.5339, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2e-07, |
| "loss": 0.376, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 0.23231004178524017, |
| "learning_rate": 3.4000000000000003e-07, |
| "loss": 0.5982, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 0.22457273304462433, |
| "learning_rate": 3.6e-07, |
| "loss": 0.354, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 0.19676104187965393, |
| "learning_rate": 3.7999999999999996e-07, |
| "loss": 0.3774, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.11160765588283539, |
| "learning_rate": 4e-07, |
| "loss": 0.5287, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1999999999999995e-07, |
| "loss": 0.3339, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3999999999999997e-07, |
| "loss": 0.3206, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6e-07, |
| "loss": 0.2747, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.14804145693778992, |
| "learning_rate": 4.8e-07, |
| "loss": 0.4989, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.2789492607116699, |
| "learning_rate": 5e-07, |
| "loss": 0.4731, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 0.2051277458667755, |
| "learning_rate": 5.2e-07, |
| "loss": 0.4554, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 0.3951795697212219, |
| "learning_rate": 5.4e-07, |
| "loss": 0.588, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.25915592908859253, |
| "learning_rate": 5.6e-07, |
| "loss": 0.3667, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8e-07, |
| "loss": 0.4586, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.322915256023407, |
| "learning_rate": 6e-07, |
| "loss": 0.5932, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.12915822863578796, |
| "learning_rate": 6.2e-07, |
| "loss": 0.4911, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.0, |
| "learning_rate": 6.4e-07, |
| "loss": 0.2096, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.16741128265857697, |
| "learning_rate": 6.6e-07, |
| "loss": 0.323, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 0.15972258150577545, |
| "learning_rate": 6.800000000000001e-07, |
| "loss": 0.4309, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.11853062361478806, |
| "learning_rate": 7e-07, |
| "loss": 0.6585, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.38085484504699707, |
| "learning_rate": 7.2e-07, |
| "loss": 0.4488, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 0.18096469342708588, |
| "learning_rate": 7.4e-07, |
| "loss": 0.6929, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 0.23759421706199646, |
| "learning_rate": 7.599999999999999e-07, |
| "loss": 0.5354, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 0.1254105567932129, |
| "learning_rate": 7.799999999999999e-07, |
| "loss": 0.4076, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.2565242052078247, |
| "learning_rate": 8e-07, |
| "loss": 0.3771, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 0.16939924657344818, |
| "learning_rate": 8.199999999999999e-07, |
| "loss": 0.544, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 0.1262465864419937, |
| "learning_rate": 8.399999999999999e-07, |
| "loss": 0.482, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 0.27009153366088867, |
| "learning_rate": 8.599999999999999e-07, |
| "loss": 0.5624, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.21943224966526031, |
| "learning_rate": 8.799999999999999e-07, |
| "loss": 0.6051, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.13974343240261078, |
| "learning_rate": 9e-07, |
| "loss": 0.5072, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.176634281873703, |
| "learning_rate": 9.2e-07, |
| "loss": 0.4562, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 0.0, |
| "learning_rate": 9.399999999999999e-07, |
| "loss": 0.429, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.27144044637680054, |
| "learning_rate": 9.6e-07, |
| "loss": 0.52, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 0.1168377548456192, |
| "learning_rate": 9.8e-07, |
| "loss": 0.5442, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.27513357996940613, |
| "learning_rate": 1e-06, |
| "loss": 0.455, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 0.11138566583395004, |
| "learning_rate": 9.999878153526972e-07, |
| "loss": 0.4699, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.24979764223098755, |
| "learning_rate": 9.99951262004652e-07, |
| "loss": 0.6373, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 0.0, |
| "learning_rate": 9.998903417374226e-07, |
| "loss": 0.477, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 0.3736177384853363, |
| "learning_rate": 9.99805057520177e-07, |
| "loss": 0.5404, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.29452452063560486, |
| "learning_rate": 9.996954135095478e-07, |
| "loss": 0.5448, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.16893912851810455, |
| "learning_rate": 9.99561415049429e-07, |
| "loss": 0.4575, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 0.0, |
| "learning_rate": 9.99403068670717e-07, |
| "loss": 0.4689, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 0.14565463364124298, |
| "learning_rate": 9.992203820909905e-07, |
| "loss": 0.3034, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.4068674147129059, |
| "learning_rate": 9.990133642141357e-07, |
| "loss": 0.5936, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.292869508266449, |
| "learning_rate": 9.98782025129912e-07, |
| "loss": 0.3625, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 0.16919468343257904, |
| "learning_rate": 9.9852637611346e-07, |
| "loss": 0.2607, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.18435430526733398, |
| "learning_rate": 9.982464296247522e-07, |
| "loss": 0.4084, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 0.0, |
| "learning_rate": 9.97942199307985e-07, |
| "loss": 0.3676, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.14995695650577545, |
| "learning_rate": 9.976136999909155e-07, |
| "loss": 0.2827, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.1467619389295578, |
| "learning_rate": 9.972609476841365e-07, |
| "loss": 0.5643, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.16371601819992065, |
| "learning_rate": 9.968839595802981e-07, |
| "loss": 0.4394, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.1859179437160492, |
| "learning_rate": 9.964827540532684e-07, |
| "loss": 0.55, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.22546283900737762, |
| "learning_rate": 9.960573506572389e-07, |
| "loss": 0.5079, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 0.0, |
| "learning_rate": 9.956077701257707e-07, |
| "loss": 0.4989, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.17672795057296753, |
| "learning_rate": 9.95134034370785e-07, |
| "loss": 0.3486, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 0.18023300170898438, |
| "learning_rate": 9.946361664814943e-07, |
| "loss": 0.4449, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.20544709265232086, |
| "learning_rate": 9.941141907232763e-07, |
| "loss": 0.4664, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 0.0, |
| "learning_rate": 9.93568132536494e-07, |
| "loss": 0.5143, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.0, |
| "learning_rate": 9.929980185352525e-07, |
| "loss": 0.476, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.16490820050239563, |
| "learning_rate": 9.92403876506104e-07, |
| "loss": 0.5148, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.27272915840148926, |
| "learning_rate": 9.91785735406693e-07, |
| "loss": 0.3911, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.0, |
| "learning_rate": 9.911436253643443e-07, |
| "loss": 0.4182, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.18133500218391418, |
| "learning_rate": 9.904775776745956e-07, |
| "loss": 0.3948, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 0.0, |
| "learning_rate": 9.89787624799672e-07, |
| "loss": 0.3711, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.496552050113678, |
| "learning_rate": 9.890738003669027e-07, |
| "loss": 0.6309, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.14190438389778137, |
| "learning_rate": 9.883361391670839e-07, |
| "loss": 0.3818, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.0, |
| "learning_rate": 9.875746771527815e-07, |
| "loss": 0.4072, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.6360119581222534, |
| "learning_rate": 9.8678945143658e-07, |
| "loss": 0.4555, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.0, |
| "learning_rate": 9.859805002892731e-07, |
| "loss": 0.5091, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.0, |
| "learning_rate": 9.851478631379982e-07, |
| "loss": 0.4239, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.2694742977619171, |
| "learning_rate": 9.842915805643156e-07, |
| "loss": 0.351, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.5400378704071045, |
| "learning_rate": 9.834116943022297e-07, |
| "loss": 0.4987, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.3166458308696747, |
| "learning_rate": 9.825082472361556e-07, |
| "loss": 0.496, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.0, |
| "learning_rate": 9.81581283398829e-07, |
| "loss": 0.4556, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.35629552602767944, |
| "learning_rate": 9.806308479691594e-07, |
| "loss": 0.4525, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.0, |
| "learning_rate": 9.796569872700287e-07, |
| "loss": 0.4417, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.0, |
| "learning_rate": 9.786597487660335e-07, |
| "loss": 0.5627, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.0, |
| "learning_rate": 9.776391810611718e-07, |
| "loss": 0.4631, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.17732886970043182, |
| "learning_rate": 9.765953338964734e-07, |
| "loss": 0.5821, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.38875874876976013, |
| "learning_rate": 9.755282581475767e-07, |
| "loss": 0.6502, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.26511499285697937, |
| "learning_rate": 9.744380058222482e-07, |
| "loss": 0.6197, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.19585512578487396, |
| "learning_rate": 9.733246300578482e-07, |
| "loss": 0.4921, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.15143270790576935, |
| "learning_rate": 9.721881851187405e-07, |
| "loss": 0.3413, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.30573219060897827, |
| "learning_rate": 9.710287263936483e-07, |
| "loss": 0.6364, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.29644232988357544, |
| "learning_rate": 9.698463103929541e-07, |
| "loss": 0.5231, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.22645096480846405, |
| "learning_rate": 9.686409947459457e-07, |
| "loss": 0.5178, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.1591133326292038, |
| "learning_rate": 9.674128381980071e-07, |
| "loss": 0.4662, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.0, |
| "learning_rate": 9.661619006077561e-07, |
| "loss": 0.3836, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.24482370913028717, |
| "learning_rate": 9.648882429441256e-07, |
| "loss": 0.4001, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.13188563287258148, |
| "learning_rate": 9.635919272833937e-07, |
| "loss": 0.4563, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.2836967408657074, |
| "learning_rate": 9.622730168061567e-07, |
| "loss": 0.4824, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.3066335916519165, |
| "learning_rate": 9.609315757942502e-07, |
| "loss": 0.3444, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.23532205820083618, |
| "learning_rate": 9.595676696276171e-07, |
| "loss": 0.2908, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 0.0, |
| "learning_rate": 9.581813647811197e-07, |
| "loss": 0.3996, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.1776718646287918, |
| "learning_rate": 9.567727288213004e-07, |
| "loss": 0.4825, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.4047851860523224, |
| "learning_rate": 9.553418304030885e-07, |
| "loss": 0.3437, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.7905092835426331, |
| "learning_rate": 9.538887392664543e-07, |
| "loss": 0.5819, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.12891711294651031, |
| "learning_rate": 9.524135262330098e-07, |
| "loss": 0.7856, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.0, |
| "learning_rate": 9.509162632025569e-07, |
| "loss": 0.3738, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.0, |
| "learning_rate": 9.493970231495834e-07, |
| "loss": 0.4878, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.0, |
| "learning_rate": 9.478558801197064e-07, |
| "loss": 0.2867, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.2714207172393799, |
| "learning_rate": 9.462929092260628e-07, |
| "loss": 0.5123, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.0, |
| "learning_rate": 9.447081866456487e-07, |
| "loss": 0.4011, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.2039383202791214, |
| "learning_rate": 9.431017896156073e-07, |
| "loss": 0.443, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.2765626013278961, |
| "learning_rate": 9.414737964294634e-07, |
| "loss": 0.4217, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.24081580340862274, |
| "learning_rate": 9.398242864333083e-07, |
| "loss": 0.4292, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.24271254241466522, |
| "learning_rate": 9.381533400219317e-07, |
| "loss": 0.5004, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.0, |
| "learning_rate": 9.364610386349047e-07, |
| "loss": 0.3022, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.0, |
| "learning_rate": 9.347474647526095e-07, |
| "loss": 0.5438, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.0, |
| "learning_rate": 9.330127018922193e-07, |
| "loss": 0.2911, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.2007410079240799, |
| "learning_rate": 9.312568346036287e-07, |
| "loss": 0.3899, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.0, |
| "learning_rate": 9.294799484653322e-07, |
| "loss": 0.3493, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.20657280087471008, |
| "learning_rate": 9.276821300802533e-07, |
| "loss": 0.4201, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.0, |
| "learning_rate": 9.258634670715237e-07, |
| "loss": 0.2989, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.25726035237312317, |
| "learning_rate": 9.240240480782129e-07, |
| "loss": 0.3452, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.22192955017089844, |
| "learning_rate": 9.221639627510075e-07, |
| "loss": 0.397, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.7377709150314331, |
| "learning_rate": 9.202833017478421e-07, |
| "loss": 0.436, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.1414753943681717, |
| "learning_rate": 9.183821567294808e-07, |
| "loss": 0.3057, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.20657813549041748, |
| "learning_rate": 9.164606203550497e-07, |
| "loss": 0.3969, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.31822967529296875, |
| "learning_rate": 9.145187862775208e-07, |
| "loss": 0.3546, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.0, |
| "learning_rate": 9.125567491391475e-07, |
| "loss": 0.3025, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.0, |
| "learning_rate": 9.10574604566852e-07, |
| "loss": 0.3859, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.0, |
| "learning_rate": 9.085724491675642e-07, |
| "loss": 0.1681, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.15268255770206451, |
| "learning_rate": 9.065503805235137e-07, |
| "loss": 0.3683, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.17399345338344574, |
| "learning_rate": 9.045084971874737e-07, |
| "loss": 0.4302, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.0, |
| "learning_rate": 9.02446898677957e-07, |
| "loss": 0.2385, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.2537860870361328, |
| "learning_rate": 9.003656854743666e-07, |
| "loss": 0.4203, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.0, |
| "learning_rate": 8.982649590120981e-07, |
| "loss": 0.4813, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.1680055409669876, |
| "learning_rate": 8.961448216775953e-07, |
| "loss": 0.3703, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.0, |
| "learning_rate": 8.940053768033608e-07, |
| "loss": 0.4513, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.1762644648551941, |
| "learning_rate": 8.918467286629198e-07, |
| "loss": 0.5364, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.11749023199081421, |
| "learning_rate": 8.896689824657371e-07, |
| "loss": 0.3007, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.38002896308898926, |
| "learning_rate": 8.874722443520898e-07, |
| "loss": 0.3197, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 0.0, |
| "learning_rate": 8.852566213878946e-07, |
| "loss": 0.3984, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.18843649327754974, |
| "learning_rate": 8.83022221559489e-07, |
| "loss": 0.3497, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.17246094346046448, |
| "learning_rate": 8.807691537683684e-07, |
| "loss": 0.4713, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.3905675411224365, |
| "learning_rate": 8.784975278258782e-07, |
| "loss": 0.3388, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.27666154503822327, |
| "learning_rate": 8.762074544478621e-07, |
| "loss": 0.4949, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.17207394540309906, |
| "learning_rate": 8.73899045249266e-07, |
| "loss": 0.4398, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.2769884467124939, |
| "learning_rate": 8.71572412738697e-07, |
| "loss": 0.4274, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.17716415226459503, |
| "learning_rate": 8.69227670312942e-07, |
| "loss": 0.5379, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.0, |
| "learning_rate": 8.668649322514381e-07, |
| "loss": 0.2762, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.25415194034576416, |
| "learning_rate": 8.644843137107057e-07, |
| "loss": 0.3321, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.1295280158519745, |
| "learning_rate": 8.620859307187338e-07, |
| "loss": 0.4295, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.2956778407096863, |
| "learning_rate": 8.596699001693255e-07, |
| "loss": 0.34, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.15349340438842773, |
| "learning_rate": 8.572363398164016e-07, |
| "loss": 0.4112, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.16322429478168488, |
| "learning_rate": 8.547853682682604e-07, |
| "loss": 0.5416, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.1974148154258728, |
| "learning_rate": 8.523171049817973e-07, |
| "loss": 0.304, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.23957253992557526, |
| "learning_rate": 8.498316702566826e-07, |
| "loss": 0.3669, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.19382289052009583, |
| "learning_rate": 8.473291852294986e-07, |
| "loss": 0.4593, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.25313159823417664, |
| "learning_rate": 8.448097718678348e-07, |
| "loss": 0.4814, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.0, |
| "learning_rate": 8.422735529643443e-07, |
| "loss": 0.2778, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.0, |
| "learning_rate": 8.397206521307583e-07, |
| "loss": 0.3254, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.1322602927684784, |
| "learning_rate": 8.371511937918617e-07, |
| "loss": 0.4763, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.2401864230632782, |
| "learning_rate": 8.34565303179429e-07, |
| "loss": 0.7254, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.18911859393119812, |
| "learning_rate": 8.319631063261207e-07, |
| "loss": 0.4746, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.25711727142333984, |
| "learning_rate": 8.293447300593402e-07, |
| "loss": 0.5397, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.34139931201934814, |
| "learning_rate": 8.267103019950528e-07, |
| "loss": 0.4286, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.2745548188686371, |
| "learning_rate": 8.240599505315654e-07, |
| "loss": 0.5636, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.0, |
| "learning_rate": 8.213938048432696e-07, |
| "loss": 0.3068, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.1870785355567932, |
| "learning_rate": 8.187119948743449e-07, |
| "loss": 0.5474, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.0, |
| "learning_rate": 8.160146513324254e-07, |
| "loss": 0.4187, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.21126168966293335, |
| "learning_rate": 8.133019056822302e-07, |
| "loss": 0.4264, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.15116102993488312, |
| "learning_rate": 8.105738901391551e-07, |
| "loss": 0.496, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.24551807343959808, |
| "learning_rate": 8.07830737662829e-07, |
| "loss": 0.3313, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.13998627662658691, |
| "learning_rate": 8.050725819506339e-07, |
| "loss": 0.4672, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.25737959146499634, |
| "learning_rate": 8.022995574311875e-07, |
| "loss": 0.5635, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.2195865362882614, |
| "learning_rate": 7.995117992577928e-07, |
| "loss": 0.5226, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.3139910101890564, |
| "learning_rate": 7.967094433018508e-07, |
| "loss": 0.4677, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.23573672771453857, |
| "learning_rate": 7.938926261462365e-07, |
| "loss": 0.4203, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.0, |
| "learning_rate": 7.910614850786447e-07, |
| "loss": 0.3362, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.22290241718292236, |
| "learning_rate": 7.882161580848966e-07, |
| "loss": 0.3619, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.3617456555366516, |
| "learning_rate": 7.853567838422159e-07, |
| "loss": 0.5384, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.2721762955188751, |
| "learning_rate": 7.82483501712469e-07, |
| "loss": 0.2952, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.41200798749923706, |
| "learning_rate": 7.795964517353733e-07, |
| "loss": 0.4751, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.2644686996936798, |
| "learning_rate": 7.76695774621672e-07, |
| "loss": 0.3517, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.25469326972961426, |
| "learning_rate": 7.737816117462751e-07, |
| "loss": 0.4186, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.19214561581611633, |
| "learning_rate": 7.7085410514137e-07, |
| "loss": 0.4724, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.4125128984451294, |
| "learning_rate": 7.679133974894982e-07, |
| "loss": 0.3452, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.21998248994350433, |
| "learning_rate": 7.649596321166024e-07, |
| "loss": 0.3417, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.2360021471977234, |
| "learning_rate": 7.619929529850396e-07, |
| "loss": 0.4394, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.0, |
| "learning_rate": 7.590135046865651e-07, |
| "loss": 0.508, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.193772554397583, |
| "learning_rate": 7.560214324352858e-07, |
| "loss": 0.4481, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.14415864646434784, |
| "learning_rate": 7.530168820605818e-07, |
| "loss": 0.4475, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.7299479842185974, |
| "learning_rate": 7.5e-07, |
| "loss": 0.6053, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.43856266140937805, |
| "learning_rate": 7.469709332921154e-07, |
| "loss": 0.4311, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.14937852323055267, |
| "learning_rate": 7.439298295693663e-07, |
| "loss": 0.2692, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.0, |
| "learning_rate": 7.408768370508576e-07, |
| "loss": 0.46, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.1654849648475647, |
| "learning_rate": 7.378121045351377e-07, |
| "loss": 0.3169, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.17147372663021088, |
| "learning_rate": 7.347357813929454e-07, |
| "loss": 0.3335, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 0.2433047890663147, |
| "learning_rate": 7.316480175599308e-07, |
| "loss": 0.3628, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.0, |
| "learning_rate": 7.285489635293471e-07, |
| "loss": 0.3468, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.0, |
| "learning_rate": 7.254387703447153e-07, |
| "loss": 0.5754, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.0, |
| "learning_rate": 7.223175895924637e-07, |
| "loss": 0.3073, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.3906990885734558, |
| "learning_rate": 7.191855733945386e-07, |
| "loss": 0.4299, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.4774358868598938, |
| "learning_rate": 7.160428744009912e-07, |
| "loss": 0.3457, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.18777182698249817, |
| "learning_rate": 7.128896457825363e-07, |
| "loss": 0.3811, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.1838907152414322, |
| "learning_rate": 7.097260412230885e-07, |
| "loss": 0.3978, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.2754370868206024, |
| "learning_rate": 7.065522149122709e-07, |
| "loss": 0.3178, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.0, |
| "learning_rate": 7.033683215379002e-07, |
| "loss": 0.3867, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.16577281057834625, |
| "learning_rate": 7.001745162784475e-07, |
| "loss": 0.3249, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.0, |
| "learning_rate": 6.969709547954755e-07, |
| "loss": 0.3599, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.24327167868614197, |
| "learning_rate": 6.937577932260514e-07, |
| "loss": 0.3658, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 0.24798893928527832, |
| "learning_rate": 6.905351881751371e-07, |
| "loss": 0.4314, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.44808080792427063, |
| "learning_rate": 6.87303296707956e-07, |
| "loss": 0.457, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.21504735946655273, |
| "learning_rate": 6.840622763423391e-07, |
| "loss": 0.3794, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 0.17601414024829865, |
| "learning_rate": 6.80812285041046e-07, |
| "loss": 0.3203, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 0.0, |
| "learning_rate": 6.775534812040686e-07, |
| "loss": 0.4305, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.0, |
| "learning_rate": 6.742860236609076e-07, |
| "loss": 0.3924, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.236984521150589, |
| "learning_rate": 6.710100716628344e-07, |
| "loss": 0.5559, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 0.30061930418014526, |
| "learning_rate": 6.677257848751276e-07, |
| "loss": 0.3558, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.37310150265693665, |
| "learning_rate": 6.644333233692916e-07, |
| "loss": 0.3047, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.16775313019752502, |
| "learning_rate": 6.611328476152556e-07, |
| "loss": 0.3683, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.19349265098571777, |
| "learning_rate": 6.578245184735512e-07, |
| "loss": 0.2474, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.0, |
| "learning_rate": 6.545084971874736e-07, |
| "loss": 0.3745, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.24532967805862427, |
| "learning_rate": 6.511849453752223e-07, |
| "loss": 0.4912, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.3983213007450104, |
| "learning_rate": 6.478540250220233e-07, |
| "loss": 0.3237, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.12001678347587585, |
| "learning_rate": 6.445158984722358e-07, |
| "loss": 0.3325, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.2640908360481262, |
| "learning_rate": 6.411707284214383e-07, |
| "loss": 0.3922, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.156937375664711, |
| "learning_rate": 6.378186779084995e-07, |
| "loss": 0.3165, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.42180562019348145, |
| "learning_rate": 6.344599103076328e-07, |
| "loss": 0.6193, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.6098366975784302, |
| "learning_rate": 6.310945893204324e-07, |
| "loss": 0.4753, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.25933900475502014, |
| "learning_rate": 6.277228789678953e-07, |
| "loss": 0.2317, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.15789282321929932, |
| "learning_rate": 6.243449435824276e-07, |
| "loss": 0.4078, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.33222755789756775, |
| "learning_rate": 6.209609477998338e-07, |
| "loss": 0.5107, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.291755348443985, |
| "learning_rate": 6.17571056551295e-07, |
| "loss": 0.4014, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.21875688433647156, |
| "learning_rate": 6.141754350553279e-07, |
| "loss": 0.5982, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 0.15691211819648743, |
| "learning_rate": 6.107742488097338e-07, |
| "loss": 0.3726, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.21300239861011505, |
| "learning_rate": 6.073676635835316e-07, |
| "loss": 0.3049, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.0, |
| "learning_rate": 6.039558454088795e-07, |
| "loss": 0.4705, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.0, |
| "learning_rate": 6.005389605729824e-07, |
| "loss": 0.2637, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.2578297555446625, |
| "learning_rate": 5.97117175609986e-07, |
| "loss": 0.4123, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.0, |
| "learning_rate": 5.936906572928624e-07, |
| "loss": 0.3991, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.16451826691627502, |
| "learning_rate": 5.9025957262528e-07, |
| "loss": 0.3269, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.0, |
| "learning_rate": 5.868240888334652e-07, |
| "loss": 0.3393, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.0, |
| "learning_rate": 5.833843733580512e-07, |
| "loss": 0.2422, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.14584174752235413, |
| "learning_rate": 5.799405938459174e-07, |
| "loss": 0.3788, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 0.222733274102211, |
| "learning_rate": 5.764929181420191e-07, |
| "loss": 0.4557, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 0.16218475997447968, |
| "learning_rate": 5.730415142812058e-07, |
| "loss": 0.2811, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.20907045900821686, |
| "learning_rate": 5.695865504800327e-07, |
| "loss": 0.4745, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.3579579293727875, |
| "learning_rate": 5.661281951285612e-07, |
| "loss": 0.474, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 0.46978068351745605, |
| "learning_rate": 5.626666167821521e-07, |
| "loss": 0.6606, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.0, |
| "learning_rate": 5.592019841532506e-07, |
| "loss": 0.2798, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 0.28995615243911743, |
| "learning_rate": 5.557344661031627e-07, |
| "loss": 0.5134, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.2651956081390381, |
| "learning_rate": 5.522642316338268e-07, |
| "loss": 0.3321, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.0, |
| "learning_rate": 5.487914498795747e-07, |
| "loss": 0.303, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 0.12660300731658936, |
| "learning_rate": 5.453162900988901e-07, |
| "loss": 0.2541, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 0.0, |
| "learning_rate": 5.418389216661578e-07, |
| "loss": 0.2579, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.2067832052707672, |
| "learning_rate": 5.383595140634093e-07, |
| "loss": 0.5188, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.0, |
| "learning_rate": 5.348782368720625e-07, |
| "loss": 0.4631, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 0.0, |
| "learning_rate": 5.313952597646567e-07, |
| "loss": 0.2561, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.4701634347438812, |
| "learning_rate": 5.27910752496582e-07, |
| "loss": 0.4505, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.16703377664089203, |
| "learning_rate": 5.244248848978067e-07, |
| "loss": 0.3182, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 0.432353675365448, |
| "learning_rate": 5.209378268645997e-07, |
| "loss": 0.4185, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.22460083663463593, |
| "learning_rate": 5.174497483512505e-07, |
| "loss": 0.3789, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.11614007502794266, |
| "learning_rate": 5.139608193617844e-07, |
| "loss": 0.2556, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.0, |
| "learning_rate": 5.104712099416785e-07, |
| "loss": 0.4128, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 0.0, |
| "learning_rate": 5.069810901695727e-07, |
| "loss": 0.373, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 0.0, |
| "learning_rate": 5.034906301489807e-07, |
| "loss": 0.4337, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.19119147956371307, |
| "learning_rate": 5e-07, |
| "loss": 0.3645, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.3457646071910858, |
| "learning_rate": 4.965093698510192e-07, |
| "loss": 0.412, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 0.18558606505393982, |
| "learning_rate": 4.930189098304274e-07, |
| "loss": 0.3623, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 0.2022610604763031, |
| "learning_rate": 4.895287900583216e-07, |
| "loss": 0.5347, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 0.23773372173309326, |
| "learning_rate": 4.860391806382156e-07, |
| "loss": 0.6582, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.13128647208213806, |
| "learning_rate": 4.825502516487496e-07, |
| "loss": 0.2715, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 0.2204434871673584, |
| "learning_rate": 4.790621731354002e-07, |
| "loss": 0.4218, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.24123643338680267, |
| "learning_rate": 4.7557511510219335e-07, |
| "loss": 0.3945, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7208924750341805e-07, |
| "loss": 0.277, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.2314901351928711, |
| "learning_rate": 4.686047402353433e-07, |
| "loss": 0.4767, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.2163623422384262, |
| "learning_rate": 4.6512176312793735e-07, |
| "loss": 0.4975, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.38640084862709045, |
| "learning_rate": 4.6164048593659065e-07, |
| "loss": 0.3997, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5816107833384233e-07, |
| "loss": 0.204, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.32572096586227417, |
| "learning_rate": 4.5468370990110997e-07, |
| "loss": 0.4722, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 0.18735390901565552, |
| "learning_rate": 4.512085501204253e-07, |
| "loss": 0.361, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.16881507635116577, |
| "learning_rate": 4.477357683661733e-07, |
| "loss": 0.3761, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 0.0, |
| "learning_rate": 4.442655338968373e-07, |
| "loss": 0.3249, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.0, |
| "learning_rate": 4.407980158467495e-07, |
| "loss": 0.4703, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.3695274293422699, |
| "learning_rate": 4.3733338321784777e-07, |
| "loss": 0.5411, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 0.15775476396083832, |
| "learning_rate": 4.338718048714387e-07, |
| "loss": 0.4458, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.3031776249408722, |
| "learning_rate": 4.304134495199674e-07, |
| "loss": 0.4205, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2695848571879424e-07, |
| "loss": 0.3505, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 0.4674622416496277, |
| "learning_rate": 4.23507081857981e-07, |
| "loss": 0.444, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.0, |
| "learning_rate": 4.200594061540826e-07, |
| "loss": 0.2865, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.16793106496334076, |
| "learning_rate": 4.166156266419489e-07, |
| "loss": 0.2819, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.0, |
| "learning_rate": 4.131759111665348e-07, |
| "loss": 0.1888, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 0.19853752851486206, |
| "learning_rate": 4.0974042737472005e-07, |
| "loss": 0.2501, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 0.31696873903274536, |
| "learning_rate": 4.0630934270713755e-07, |
| "loss": 0.4024, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 0.0, |
| "learning_rate": 4.028828243900141e-07, |
| "loss": 0.3474, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.1818513572216034, |
| "learning_rate": 3.9946103942701775e-07, |
| "loss": 0.452, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.16656121611595154, |
| "learning_rate": 3.960441545911204e-07, |
| "loss": 0.3922, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 0.19681759178638458, |
| "learning_rate": 3.9263233641646836e-07, |
| "loss": 0.3217, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 0.2946317791938782, |
| "learning_rate": 3.8922575119026635e-07, |
| "loss": 0.3435, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.1443903148174286, |
| "learning_rate": 3.8582456494467206e-07, |
| "loss": 0.4127, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 0.2468138188123703, |
| "learning_rate": 3.8242894344870495e-07, |
| "loss": 0.3118, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.351713627576828, |
| "learning_rate": 3.790390522001662e-07, |
| "loss": 0.3966, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7565505641757266e-07, |
| "loss": 0.1856, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.0, |
| "learning_rate": 3.722771210321048e-07, |
| "loss": 0.3775, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 0.14419417083263397, |
| "learning_rate": 3.689054106795677e-07, |
| "loss": 0.2968, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6554008969236715e-07, |
| "loss": 0.2965, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.334088534116745, |
| "learning_rate": 3.621813220915004e-07, |
| "loss": 0.4049, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.13894756138324738, |
| "learning_rate": 3.5882927157856167e-07, |
| "loss": 0.4151, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 0.2062196135520935, |
| "learning_rate": 3.554841015277641e-07, |
| "loss": 0.313, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 0.0, |
| "learning_rate": 3.521459749779768e-07, |
| "loss": 0.1784, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 0.204154834151268, |
| "learning_rate": 3.488150546247778e-07, |
| "loss": 0.3507, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.34156233072280884, |
| "learning_rate": 3.454915028125263e-07, |
| "loss": 0.2441, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 0.22703151404857635, |
| "learning_rate": 3.421754815264488e-07, |
| "loss": 0.3752, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 0.0, |
| "learning_rate": 3.388671523847445e-07, |
| "loss": 0.5073, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 0.24089986085891724, |
| "learning_rate": 3.3556667663070835e-07, |
| "loss": 0.5404, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.3656230866909027, |
| "learning_rate": 3.3227421512487255e-07, |
| "loss": 0.3229, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2898992833716563e-07, |
| "loss": 0.2866, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 0.2754799723625183, |
| "learning_rate": 3.257139763390925e-07, |
| "loss": 0.4565, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2244651879593156e-07, |
| "loss": 0.4106, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.18411864340305328, |
| "learning_rate": 3.191877149589539e-07, |
| "loss": 0.3363, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 0.3307875990867615, |
| "learning_rate": 3.15937723657661e-07, |
| "loss": 0.5009, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.24335327744483948, |
| "learning_rate": 3.1269670329204393e-07, |
| "loss": 0.3791, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 0.494253545999527, |
| "learning_rate": 3.0946481182486297e-07, |
| "loss": 0.3335, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.2208227515220642, |
| "learning_rate": 3.0624220677394854e-07, |
| "loss": 0.3267, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 0.21655437350273132, |
| "learning_rate": 3.0302904520452443e-07, |
| "loss": 0.3096, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 0.6230963468551636, |
| "learning_rate": 2.9982548372155256e-07, |
| "loss": 0.5563, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.22712159156799316, |
| "learning_rate": 2.9663167846209996e-07, |
| "loss": 0.3549, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.31382498145103455, |
| "learning_rate": 2.9344778508772914e-07, |
| "loss": 0.3336, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 0.18151722848415375, |
| "learning_rate": 2.902739587769114e-07, |
| "loss": 0.4086, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 0.20704849064350128, |
| "learning_rate": 2.8711035421746363e-07, |
| "loss": 0.344, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8395712559900874e-07, |
| "loss": 0.3538, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.1955602765083313, |
| "learning_rate": 2.808144266054612e-07, |
| "loss": 0.2744, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 0.2065371572971344, |
| "learning_rate": 2.776824104075364e-07, |
| "loss": 0.39, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 0.3211356997489929, |
| "learning_rate": 2.745612296552847e-07, |
| "loss": 0.2373, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 0.11869184672832489, |
| "learning_rate": 2.71451036470653e-07, |
| "loss": 0.311, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.2647980749607086, |
| "learning_rate": 2.683519824400692e-07, |
| "loss": 0.5873, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.3186590373516083, |
| "learning_rate": 2.6526421860705473e-07, |
| "loss": 0.4824, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 0.37641438841819763, |
| "learning_rate": 2.621878954648623e-07, |
| "loss": 0.4552, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 0.13089558482170105, |
| "learning_rate": 2.591231629491423e-07, |
| "loss": 0.1949, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.3158831000328064, |
| "learning_rate": 2.5607017043063353e-07, |
| "loss": 0.445, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 0.0, |
| "learning_rate": 2.530290667078846e-07, |
| "loss": 0.4167, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.26551440358161926, |
| "learning_rate": 2.500000000000001e-07, |
| "loss": 0.6563, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 0.3335043489933014, |
| "learning_rate": 2.469831179394182e-07, |
| "loss": 0.379, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.0, |
| "learning_rate": 2.439785675647143e-07, |
| "loss": 0.3703, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4098649531343494e-07, |
| "loss": 0.2379, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 0.0, |
| "learning_rate": 2.380070470149605e-07, |
| "loss": 0.3422, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.17040787637233734, |
| "learning_rate": 2.350403678833976e-07, |
| "loss": 0.3154, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.150774747133255, |
| "learning_rate": 2.3208660251050156e-07, |
| "loss": 0.3901, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 0.15949007868766785, |
| "learning_rate": 2.2914589485863012e-07, |
| "loss": 0.4582, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 0.4009764492511749, |
| "learning_rate": 2.262183882537249e-07, |
| "loss": 0.38, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 0.0, |
| "learning_rate": 2.23304225378328e-07, |
| "loss": 0.2523, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2040354826462664e-07, |
| "loss": 0.2634, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 0.1674695461988449, |
| "learning_rate": 2.1751649828753106e-07, |
| "loss": 0.4756, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 0.0, |
| "learning_rate": 2.146432161577842e-07, |
| "loss": 0.3584, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 0.3502958416938782, |
| "learning_rate": 2.117838419151034e-07, |
| "loss": 0.359, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.2442190796136856, |
| "learning_rate": 2.0893851492135532e-07, |
| "loss": 0.2706, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.35089632868766785, |
| "learning_rate": 2.0610737385376348e-07, |
| "loss": 0.3068, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 0.2787492871284485, |
| "learning_rate": 2.0329055669814933e-07, |
| "loss": 0.5172, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 0.5598558187484741, |
| "learning_rate": 2.0048820074220711e-07, |
| "loss": 0.3267, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9770044256881258e-07, |
| "loss": 0.3907, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 0.1440645307302475, |
| "learning_rate": 1.9492741804936618e-07, |
| "loss": 0.3602, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.15142397582530975, |
| "learning_rate": 1.9216926233717084e-07, |
| "loss": 0.3659, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 0.1711028665304184, |
| "learning_rate": 1.8942610986084484e-07, |
| "loss": 0.3735, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.5658474564552307, |
| "learning_rate": 1.8669809431776988e-07, |
| "loss": 0.4354, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 0.34379860758781433, |
| "learning_rate": 1.8398534866757455e-07, |
| "loss": 0.4362, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 0.3700734078884125, |
| "learning_rate": 1.812880051256551e-07, |
| "loss": 0.3513, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7860619515673032e-07, |
| "loss": 0.3845, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.17461809515953064, |
| "learning_rate": 1.7594004946843454e-07, |
| "loss": 0.4918, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 0.1721869856119156, |
| "learning_rate": 1.7328969800494726e-07, |
| "loss": 0.333, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 0.2901223301887512, |
| "learning_rate": 1.7065526994065972e-07, |
| "loss": 0.3923, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 0.2248222529888153, |
| "learning_rate": 1.6803689367387918e-07, |
| "loss": 0.4075, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6543469682057104e-07, |
| "loss": 0.3032, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 0.23594574630260468, |
| "learning_rate": 1.6284880620813846e-07, |
| "loss": 0.5281, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 0.39197710156440735, |
| "learning_rate": 1.6027934786924185e-07, |
| "loss": 0.285, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.2870103120803833, |
| "learning_rate": 1.5772644703565564e-07, |
| "loss": 0.5428, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.1318693459033966, |
| "learning_rate": 1.551902281321651e-07, |
| "loss": 0.3816, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.3365454375743866, |
| "learning_rate": 1.5267081477050131e-07, |
| "loss": 0.3467, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 0.15061412751674652, |
| "learning_rate": 1.5016832974331723e-07, |
| "loss": 0.5107, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 0.1518489569425583, |
| "learning_rate": 1.4768289501820263e-07, |
| "loss": 0.3284, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.2993331253528595, |
| "learning_rate": 1.4521463173173965e-07, |
| "loss": 0.4759, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 0.24428100883960724, |
| "learning_rate": 1.4276366018359842e-07, |
| "loss": 0.2858, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.16279929876327515, |
| "learning_rate": 1.4033009983067452e-07, |
| "loss": 0.3974, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 0.2223827987909317, |
| "learning_rate": 1.3791406928126635e-07, |
| "loss": 0.3701, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.7606835961341858, |
| "learning_rate": 1.3551568628929432e-07, |
| "loss": 0.5949, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 0.318511962890625, |
| "learning_rate": 1.3313506774856175e-07, |
| "loss": 0.3205, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3077232968705805e-07, |
| "loss": 0.382, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.18774640560150146, |
| "learning_rate": 1.284275872613028e-07, |
| "loss": 0.3313, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2610095475073413e-07, |
| "loss": 0.3053, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 0.1625850647687912, |
| "learning_rate": 1.2379254555213786e-07, |
| "loss": 0.3819, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 0.15839314460754395, |
| "learning_rate": 1.2150247217412185e-07, |
| "loss": 0.3443, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 0.15834276378154755, |
| "learning_rate": 1.192308462316317e-07, |
| "loss": 0.2332, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.22634312510490417, |
| "learning_rate": 1.1697777844051104e-07, |
| "loss": 0.2864, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1474337861210543e-07, |
| "loss": 0.3618, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1252775564791023e-07, |
| "loss": 0.5444, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1033101753426282e-07, |
| "loss": 0.2134, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.28831779956817627, |
| "learning_rate": 1.0815327133708013e-07, |
| "loss": 0.4404, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.29169991612434387, |
| "learning_rate": 1.0599462319663904e-07, |
| "loss": 0.3982, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 0.1199837252497673, |
| "learning_rate": 1.038551783224047e-07, |
| "loss": 0.2208, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 0.26785534620285034, |
| "learning_rate": 1.0173504098790186e-07, |
| "loss": 0.3469, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.3739556670188904, |
| "learning_rate": 9.963431452563331e-08, |
| "loss": 0.3479, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 0.0, |
| "learning_rate": 9.755310132204297e-08, |
| "loss": 0.4743, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.3042360544204712, |
| "learning_rate": 9.549150281252632e-08, |
| "loss": 0.436, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 0.28365254402160645, |
| "learning_rate": 9.344961947648622e-08, |
| "loss": 0.5178, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.2818581759929657, |
| "learning_rate": 9.142755083243575e-08, |
| "loss": 0.3609, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 0.18327060341835022, |
| "learning_rate": 8.942539543314798e-08, |
| "loss": 0.4209, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 0.33414414525032043, |
| "learning_rate": 8.744325086085247e-08, |
| "loss": 0.529, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.5132295489311218, |
| "learning_rate": 8.548121372247919e-08, |
| "loss": 0.3999, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.11341214925050735, |
| "learning_rate": 8.353937964495028e-08, |
| "loss": 0.5349, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 0.0, |
| "learning_rate": 8.161784327051919e-08, |
| "loss": 0.3623, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 0.0, |
| "learning_rate": 7.971669825215787e-08, |
| "loss": 0.2772, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 0.0, |
| "learning_rate": 7.783603724899257e-08, |
| "loss": 0.5303, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.28068211674690247, |
| "learning_rate": 7.597595192178702e-08, |
| "loss": 0.401, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 0.4041430354118347, |
| "learning_rate": 7.413653292847616e-08, |
| "loss": 0.4918, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 0.22747954726219177, |
| "learning_rate": 7.23178699197467e-08, |
| "loss": 0.4324, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.2969852387905121, |
| "learning_rate": 7.052005153466778e-08, |
| "loss": 0.4096, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.20145879685878754, |
| "learning_rate": 6.874316539637126e-08, |
| "loss": 0.382, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.324589341878891, |
| "learning_rate": 6.698729810778064e-08, |
| "loss": 0.5643, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 0.10846671462059021, |
| "learning_rate": 6.52525352473905e-08, |
| "loss": 0.2396, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 0.4327607750892639, |
| "learning_rate": 6.353896136509524e-08, |
| "loss": 0.4376, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.260789155960083, |
| "learning_rate": 6.184665997806831e-08, |
| "loss": 0.3816, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 0.16783744096755981, |
| "learning_rate": 6.017571356669182e-08, |
| "loss": 0.4161, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8526203570536504e-08, |
| "loss": 0.2971, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 0.0, |
| "learning_rate": 5.689821038439263e-08, |
| "loss": 0.3532, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.4413890540599823, |
| "learning_rate": 5.529181335435124e-08, |
| "loss": 0.4198, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 0.0, |
| "learning_rate": 5.37070907739372e-08, |
| "loss": 0.2993, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 0.3765997588634491, |
| "learning_rate": 5.2144119880293544e-08, |
| "loss": 0.4302, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.18383778631687164, |
| "learning_rate": 5.060297685041659e-08, |
| "loss": 0.3244, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.0, |
| "learning_rate": 4.908373679744315e-08, |
| "loss": 0.2506, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 0.1553139090538025, |
| "learning_rate": 4.758647376699032e-08, |
| "loss": 0.3142, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 0.595188558101654, |
| "learning_rate": 4.611126073354571e-08, |
| "loss": 0.3983, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 0.0, |
| "learning_rate": 4.465816959691149e-08, |
| "loss": 0.3539, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.1859598010778427, |
| "learning_rate": 4.322727117869951e-08, |
| "loss": 0.3467, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 0.21359598636627197, |
| "learning_rate": 4.181863521888018e-08, |
| "loss": 0.405, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 0.26576465368270874, |
| "learning_rate": 4.043233037238281e-08, |
| "loss": 0.3387, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 0.15650220215320587, |
| "learning_rate": 3.9068424205749794e-08, |
| "loss": 0.3288, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.1544315665960312, |
| "learning_rate": 3.7726983193843485e-08, |
| "loss": 0.3368, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6408072716606345e-08, |
| "loss": 0.2899, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5111757055874326e-08, |
| "loss": 0.5171, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3838099392243915e-08, |
| "loss": 0.2749, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.14561113715171814, |
| "learning_rate": 3.258716180199278e-08, |
| "loss": 0.4363, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 0.1540963053703308, |
| "learning_rate": 3.135900525405427e-08, |
| "loss": 0.2762, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.23214228451251984, |
| "learning_rate": 3.015368960704584e-08, |
| "loss": 0.4436, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 0.22368201613426208, |
| "learning_rate": 2.8971273606351655e-08, |
| "loss": 0.501, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.20844587683677673, |
| "learning_rate": 2.78118148812595e-08, |
| "loss": 0.4099, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 0.5211163759231567, |
| "learning_rate": 2.667536994215186e-08, |
| "loss": 0.5766, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 0.3285559117794037, |
| "learning_rate": 2.5561994177751732e-08, |
| "loss": 0.3372, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.1555313766002655, |
| "learning_rate": 2.4471741852423233e-08, |
| "loss": 0.4767, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3404666103526537e-08, |
| "loss": 0.4606, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2360818938828187e-08, |
| "loss": 0.2631, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 0.37747922539711, |
| "learning_rate": 2.1340251233966377e-08, |
| "loss": 0.3524, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 0.7894517183303833, |
| "learning_rate": 2.0343012729971243e-08, |
| "loss": 0.5239, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.4695979654788971, |
| "learning_rate": 1.936915203084055e-08, |
| "loss": 0.4657, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 0.17745596170425415, |
| "learning_rate": 1.8418716601170947e-08, |
| "loss": 0.481, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 0.2473924607038498, |
| "learning_rate": 1.7491752763844292e-08, |
| "loss": 0.3308, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 0.2266862392425537, |
| "learning_rate": 1.658830569777031e-08, |
| "loss": 0.2895, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.20543864369392395, |
| "learning_rate": 1.570841943568446e-08, |
| "loss": 0.3418, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.2733469307422638, |
| "learning_rate": 1.4852136862001763e-08, |
| "loss": 0.5356, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 0.18073861300945282, |
| "learning_rate": 1.4019499710726911e-08, |
| "loss": 0.4471, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 0.30999889969825745, |
| "learning_rate": 1.3210548563419855e-08, |
| "loss": 0.4214, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.2232372760772705, |
| "learning_rate": 1.2425322847218367e-08, |
| "loss": 0.3141, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 0.5238833427429199, |
| "learning_rate": 1.166386083291604e-08, |
| "loss": 0.5076, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.42304739356040955, |
| "learning_rate": 1.0926199633097154e-08, |
| "loss": 0.4376, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 0.17034804821014404, |
| "learning_rate": 1.0212375200327972e-08, |
| "loss": 0.3029, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.16355615854263306, |
| "learning_rate": 9.522422325404233e-09, |
| "loss": 0.426, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 0.348092257976532, |
| "learning_rate": 8.856374635655695e-09, |
| "loss": 0.6621, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 0.16640865802764893, |
| "learning_rate": 8.214264593307096e-09, |
| "loss": 0.3636, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.8108724355697632, |
| "learning_rate": 7.59612349389599e-09, |
| "loss": 0.5295, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.0, |
| "learning_rate": 7.0019814647475636e-09, |
| "loss": 0.4824, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 0.2268078476190567, |
| "learning_rate": 6.431867463506046e-09, |
| "loss": 0.4548, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 0.2407975047826767, |
| "learning_rate": 5.8858092767236076e-09, |
| "loss": 0.4036, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 0.0, |
| "learning_rate": 5.3638335185058335e-09, |
| "loss": 0.3349, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.2542084753513336, |
| "learning_rate": 4.865965629214819e-09, |
| "loss": 0.364, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 0.12055990099906921, |
| "learning_rate": 4.3922298742291585e-09, |
| "loss": 0.3559, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 0.1341351717710495, |
| "learning_rate": 3.9426493427611175e-09, |
| "loss": 0.3196, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 0.2929215729236603, |
| "learning_rate": 3.5172459467315286e-09, |
| "loss": 0.5427, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.0, |
| "learning_rate": 3.116040419701815e-09, |
| "loss": 0.3466, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.0, |
| "learning_rate": 2.739052315863355e-09, |
| "loss": 0.3907, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.972, |
| "grad_norm": 0.2693001925945282, |
| "learning_rate": 2.3863000090844076e-09, |
| "loss": 0.2627, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.974, |
| "grad_norm": 0.2538900375366211, |
| "learning_rate": 2.057800692014833e-09, |
| "loss": 0.4413, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.15544646978378296, |
| "learning_rate": 1.7535703752478147e-09, |
| "loss": 0.4709, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.978, |
| "grad_norm": 0.17917151749134064, |
| "learning_rate": 1.4736238865398765e-09, |
| "loss": 0.3168, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.2013753056526184, |
| "learning_rate": 1.217974870087901e-09, |
| "loss": 0.3619, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.982, |
| "grad_norm": 0.34069013595581055, |
| "learning_rate": 9.866357858642205e-10, |
| "loss": 0.5432, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.28627774119377136, |
| "learning_rate": 7.79617909009489e-10, |
| "loss": 0.5604, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.986, |
| "grad_norm": 0.22166089713573456, |
| "learning_rate": 5.969313292830125e-10, |
| "loss": 0.3382, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.988, |
| "grad_norm": 0.23357252776622772, |
| "learning_rate": 4.3858495057080836e-10, |
| "loss": 0.281, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.11937293410301208, |
| "learning_rate": 3.0458649045211894e-10, |
| "loss": 0.558, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.1517428755760193, |
| "learning_rate": 1.9494247982282387e-10, |
| "loss": 0.4587, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.994, |
| "grad_norm": 0.13431088626384735, |
| "learning_rate": 1.0965826257725019e-10, |
| "loss": 0.3734, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.996, |
| "grad_norm": 0.214543879032135, |
| "learning_rate": 4.873799534788059e-11, |
| "loss": 0.4557, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.998, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2184647302626582e-11, |
| "loss": 0.4266, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.19645723700523376, |
| "learning_rate": 0.0, |
| "loss": 0.5645, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 5000, |
| "total_flos": 1.7358340143169536e+16, |
| "train_loss": 0.4102936505317688, |
| "train_runtime": 978.063, |
| "train_samples_per_second": 5.112, |
| "train_steps_per_second": 5.112 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7358340143169536e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|