| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.03889940613573299, |
| "eval_steps": 500, |
| "global_step": 21000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.8523526731301427e-05, |
| "grad_norm": 1.4365341663360596, |
| "learning_rate": 2e-09, |
| "loss": 0.0068, |
| "step": 10 |
| }, |
| { |
| "epoch": 3.7047053462602854e-05, |
| "grad_norm": 0.2875632345676422, |
| "learning_rate": 4e-09, |
| "loss": 0.0069, |
| "step": 20 |
| }, |
| { |
| "epoch": 5.557058019390428e-05, |
| "grad_norm": 0.754702627658844, |
| "learning_rate": 5.999999999999999e-09, |
| "loss": 0.0055, |
| "step": 30 |
| }, |
| { |
| "epoch": 7.409410692520571e-05, |
| "grad_norm": 0.6984386444091797, |
| "learning_rate": 8e-09, |
| "loss": 0.0052, |
| "step": 40 |
| }, |
| { |
| "epoch": 9.261763365650713e-05, |
| "grad_norm": 1.220741629600525, |
| "learning_rate": 1e-08, |
| "loss": 0.0056, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00011114116038780856, |
| "grad_norm": 1.0338093042373657, |
| "learning_rate": 1.1999999999999998e-08, |
| "loss": 0.0066, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00012966468711911, |
| "grad_norm": 0.5980871915817261, |
| "learning_rate": 1.4000000000000001e-08, |
| "loss": 0.0053, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00014818821385041142, |
| "grad_norm": 4.401883125305176, |
| "learning_rate": 1.6e-08, |
| "loss": 0.0066, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00016671174058171284, |
| "grad_norm": 0.7785063982009888, |
| "learning_rate": 1.8e-08, |
| "loss": 0.0062, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00018523526731301426, |
| "grad_norm": 2.4886574745178223, |
| "learning_rate": 2e-08, |
| "loss": 0.0065, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0002037587940443157, |
| "grad_norm": 7.158140659332275, |
| "learning_rate": 2.2e-08, |
| "loss": 0.0061, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0002222823207756171, |
| "grad_norm": 1.853729486465454, |
| "learning_rate": 2.3999999999999997e-08, |
| "loss": 0.0054, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00024080584750691854, |
| "grad_norm": 1.3051828145980835, |
| "learning_rate": 2.6e-08, |
| "loss": 0.0049, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00025932937423822, |
| "grad_norm": 0.4401150941848755, |
| "learning_rate": 2.8000000000000003e-08, |
| "loss": 0.0065, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0002778529009695214, |
| "grad_norm": 2.922142744064331, |
| "learning_rate": 3e-08, |
| "loss": 0.0058, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.00029637642770082283, |
| "grad_norm": 1.0148659944534302, |
| "learning_rate": 3.2e-08, |
| "loss": 0.0055, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.00031489995443212426, |
| "grad_norm": 0.9402350783348083, |
| "learning_rate": 3.4e-08, |
| "loss": 0.006, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0003334234811634257, |
| "grad_norm": 0.8995290398597717, |
| "learning_rate": 3.6e-08, |
| "loss": 0.007, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0003519470078947271, |
| "grad_norm": 0.8776085376739502, |
| "learning_rate": 3.7999999999999996e-08, |
| "loss": 0.0061, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.00037047053462602853, |
| "grad_norm": 1.4213812351226807, |
| "learning_rate": 4e-08, |
| "loss": 0.0053, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00038899406135732995, |
| "grad_norm": 1.0605380535125732, |
| "learning_rate": 4.2e-08, |
| "loss": 0.0081, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0004075175880886314, |
| "grad_norm": 1.9367486238479614, |
| "learning_rate": 4.4e-08, |
| "loss": 0.0059, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0004260411148199328, |
| "grad_norm": 2.089946746826172, |
| "learning_rate": 4.6e-08, |
| "loss": 0.0047, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0004445646415512342, |
| "grad_norm": 1.174837350845337, |
| "learning_rate": 4.799999999999999e-08, |
| "loss": 0.0066, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00046308816828253565, |
| "grad_norm": 0.7284667491912842, |
| "learning_rate": 5e-08, |
| "loss": 0.0078, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.00048161169501383707, |
| "grad_norm": 0.5827767848968506, |
| "learning_rate": 5.2e-08, |
| "loss": 0.0061, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0005001352217451385, |
| "grad_norm": 0.9152899980545044, |
| "learning_rate": 5.4e-08, |
| "loss": 0.0073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.00051865874847644, |
| "grad_norm": 15.577178001403809, |
| "learning_rate": 5.6000000000000005e-08, |
| "loss": 0.0049, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0005371822752077413, |
| "grad_norm": 0.4566841125488281, |
| "learning_rate": 5.7999999999999997e-08, |
| "loss": 0.0052, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0005557058019390428, |
| "grad_norm": 2.1245856285095215, |
| "learning_rate": 6e-08, |
| "loss": 0.0063, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0005742293286703442, |
| "grad_norm": 0.5508998036384583, |
| "learning_rate": 6.2e-08, |
| "loss": 0.005, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0005927528554016457, |
| "grad_norm": 2.0696892738342285, |
| "learning_rate": 6.4e-08, |
| "loss": 0.0066, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.000611276382132947, |
| "grad_norm": 1.0439932346343994, |
| "learning_rate": 6.6e-08, |
| "loss": 0.0044, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0006297999088642485, |
| "grad_norm": 2.2266595363616943, |
| "learning_rate": 6.8e-08, |
| "loss": 0.0063, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0006483234355955499, |
| "grad_norm": 1.0740715265274048, |
| "learning_rate": 6.999999999999999e-08, |
| "loss": 0.0052, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0006668469623268514, |
| "grad_norm": 2.1596767902374268, |
| "learning_rate": 7.2e-08, |
| "loss": 0.0061, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0006853704890581527, |
| "grad_norm": 1.101522445678711, |
| "learning_rate": 7.4e-08, |
| "loss": 0.0049, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0007038940157894542, |
| "grad_norm": 8.387984275817871, |
| "learning_rate": 7.599999999999999e-08, |
| "loss": 0.0059, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0007224175425207556, |
| "grad_norm": 1.0280990600585938, |
| "learning_rate": 7.8e-08, |
| "loss": 0.0058, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0007409410692520571, |
| "grad_norm": 1.0322803258895874, |
| "learning_rate": 8e-08, |
| "loss": 0.006, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0007594645959833584, |
| "grad_norm": 1.083223819732666, |
| "learning_rate": 8.199999999999999e-08, |
| "loss": 0.0054, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0007779881227146599, |
| "grad_norm": 1.4103988409042358, |
| "learning_rate": 8.4e-08, |
| "loss": 0.0058, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0007965116494459613, |
| "grad_norm": 0.6534194350242615, |
| "learning_rate": 8.599999999999999e-08, |
| "loss": 0.0045, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0008150351761772628, |
| "grad_norm": 1.0969117879867554, |
| "learning_rate": 8.8e-08, |
| "loss": 0.0068, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0008335587029085641, |
| "grad_norm": 2.153444766998291, |
| "learning_rate": 9e-08, |
| "loss": 0.0059, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0008520822296398656, |
| "grad_norm": 1.7205032110214233, |
| "learning_rate": 9.2e-08, |
| "loss": 0.0056, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.000870605756371167, |
| "grad_norm": 2.386373281478882, |
| "learning_rate": 9.4e-08, |
| "loss": 0.0056, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0008891292831024684, |
| "grad_norm": 0.6668074727058411, |
| "learning_rate": 9.599999999999999e-08, |
| "loss": 0.0058, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.0009076528098337699, |
| "grad_norm": 1.0478103160858154, |
| "learning_rate": 9.799999999999999e-08, |
| "loss": 0.0052, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0009261763365650713, |
| "grad_norm": 0.5006719827651978, |
| "learning_rate": 1e-07, |
| "loss": 0.0053, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0009446998632963728, |
| "grad_norm": 0.9427525997161865, |
| "learning_rate": 1.02e-07, |
| "loss": 0.0062, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0009632233900276741, |
| "grad_norm": 0.8038456439971924, |
| "learning_rate": 1.04e-07, |
| "loss": 0.0063, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0009817469167589755, |
| "grad_norm": 1.0056331157684326, |
| "learning_rate": 1.06e-07, |
| "loss": 0.0061, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.001000270443490277, |
| "grad_norm": 2.944345712661743, |
| "learning_rate": 1.08e-07, |
| "loss": 0.0055, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0010187939702215785, |
| "grad_norm": 0.4756002426147461, |
| "learning_rate": 1.1e-07, |
| "loss": 0.0058, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.00103731749695288, |
| "grad_norm": 0.7967053651809692, |
| "learning_rate": 1.1200000000000001e-07, |
| "loss": 0.0043, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0010558410236841812, |
| "grad_norm": 0.5439043641090393, |
| "learning_rate": 1.1399999999999999e-07, |
| "loss": 0.0068, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0010743645504154827, |
| "grad_norm": 1.1805559396743774, |
| "learning_rate": 1.1599999999999999e-07, |
| "loss": 0.0054, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0010928880771467842, |
| "grad_norm": 1.3035606145858765, |
| "learning_rate": 1.1799999999999998e-07, |
| "loss": 0.0058, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0011114116038780856, |
| "grad_norm": 1.3339598178863525, |
| "learning_rate": 1.2e-07, |
| "loss": 0.0057, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.001129935130609387, |
| "grad_norm": 1.3659064769744873, |
| "learning_rate": 1.2199999999999998e-07, |
| "loss": 0.0062, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0011484586573406884, |
| "grad_norm": 1.2174561023712158, |
| "learning_rate": 1.24e-07, |
| "loss": 0.0055, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0011669821840719899, |
| "grad_norm": 0.4670966863632202, |
| "learning_rate": 1.26e-07, |
| "loss": 0.005, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0011855057108032913, |
| "grad_norm": 0.6576770544052124, |
| "learning_rate": 1.28e-07, |
| "loss": 0.0054, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0012040292375345926, |
| "grad_norm": 1.3622369766235352, |
| "learning_rate": 1.3e-07, |
| "loss": 0.0061, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.001222552764265894, |
| "grad_norm": 0.4510115385055542, |
| "learning_rate": 1.32e-07, |
| "loss": 0.0061, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0012410762909971956, |
| "grad_norm": 1.2369922399520874, |
| "learning_rate": 1.34e-07, |
| "loss": 0.0057, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.001259599817728497, |
| "grad_norm": 2.0124547481536865, |
| "learning_rate": 1.36e-07, |
| "loss": 0.0059, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0012781233444597983, |
| "grad_norm": 1.497590184211731, |
| "learning_rate": 1.38e-07, |
| "loss": 0.0065, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0012966468711910998, |
| "grad_norm": 0.5575208067893982, |
| "learning_rate": 1.3999999999999998e-07, |
| "loss": 0.0062, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0013151703979224012, |
| "grad_norm": 0.4798245131969452, |
| "learning_rate": 1.4199999999999997e-07, |
| "loss": 0.0044, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0013336939246537027, |
| "grad_norm": 0.8238214254379272, |
| "learning_rate": 1.44e-07, |
| "loss": 0.0051, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.001352217451385004, |
| "grad_norm": 0.9985460638999939, |
| "learning_rate": 1.4599999999999998e-07, |
| "loss": 0.0049, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0013707409781163055, |
| "grad_norm": 0.8525176644325256, |
| "learning_rate": 1.48e-07, |
| "loss": 0.0056, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.001389264504847607, |
| "grad_norm": 1.585843801498413, |
| "learning_rate": 1.5e-07, |
| "loss": 0.0062, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0014077880315789084, |
| "grad_norm": 2.2086989879608154, |
| "learning_rate": 1.5199999999999998e-07, |
| "loss": 0.0066, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.00142631155831021, |
| "grad_norm": 2.4752936363220215, |
| "learning_rate": 1.54e-07, |
| "loss": 0.0062, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0014448350850415112, |
| "grad_norm": 0.5352007746696472, |
| "learning_rate": 1.56e-07, |
| "loss": 0.0054, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0014633586117728126, |
| "grad_norm": 0.5121957659721375, |
| "learning_rate": 1.58e-07, |
| "loss": 0.0046, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0014818821385041141, |
| "grad_norm": 0.7911613583564758, |
| "learning_rate": 1.6e-07, |
| "loss": 0.0045, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0015004056652354156, |
| "grad_norm": 0.6104145050048828, |
| "learning_rate": 1.62e-07, |
| "loss": 0.0045, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0015189291919667169, |
| "grad_norm": 1.2079161405563354, |
| "learning_rate": 1.6399999999999999e-07, |
| "loss": 0.0055, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0015374527186980183, |
| "grad_norm": 1.1350284814834595, |
| "learning_rate": 1.6599999999999998e-07, |
| "loss": 0.0058, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0015559762454293198, |
| "grad_norm": 1.2961735725402832, |
| "learning_rate": 1.68e-07, |
| "loss": 0.0059, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.0015744997721606213, |
| "grad_norm": 0.29242363572120667, |
| "learning_rate": 1.7e-07, |
| "loss": 0.0047, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0015930232988919225, |
| "grad_norm": 0.5930100679397583, |
| "learning_rate": 1.7199999999999998e-07, |
| "loss": 0.0062, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.001611546825623224, |
| "grad_norm": 0.5777493119239807, |
| "learning_rate": 1.74e-07, |
| "loss": 0.005, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0016300703523545255, |
| "grad_norm": 3.6954779624938965, |
| "learning_rate": 1.76e-07, |
| "loss": 0.0052, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.001648593879085827, |
| "grad_norm": 0.5278248190879822, |
| "learning_rate": 1.78e-07, |
| "loss": 0.0054, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0016671174058171282, |
| "grad_norm": 0.6074942946434021, |
| "learning_rate": 1.8e-07, |
| "loss": 0.0068, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0016856409325484297, |
| "grad_norm": 0.5475661754608154, |
| "learning_rate": 1.82e-07, |
| "loss": 0.0049, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0017041644592797312, |
| "grad_norm": 0.6424407362937927, |
| "learning_rate": 1.84e-07, |
| "loss": 0.0047, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0017226879860110327, |
| "grad_norm": 0.8039686679840088, |
| "learning_rate": 1.86e-07, |
| "loss": 0.0047, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.001741211512742334, |
| "grad_norm": 1.2419958114624023, |
| "learning_rate": 1.88e-07, |
| "loss": 0.0068, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0017597350394736354, |
| "grad_norm": 0.8218024969100952, |
| "learning_rate": 1.8999999999999998e-07, |
| "loss": 0.0052, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.001778258566204937, |
| "grad_norm": 0.6466169357299805, |
| "learning_rate": 1.9199999999999997e-07, |
| "loss": 0.0063, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.0017967820929362384, |
| "grad_norm": 0.6493163108825684, |
| "learning_rate": 1.94e-07, |
| "loss": 0.0052, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0018153056196675399, |
| "grad_norm": 1.0410829782485962, |
| "learning_rate": 1.9599999999999998e-07, |
| "loss": 0.0048, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.0018338291463988411, |
| "grad_norm": 1.0829999446868896, |
| "learning_rate": 1.98e-07, |
| "loss": 0.0063, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0018523526731301426, |
| "grad_norm": 1.1090216636657715, |
| "learning_rate": 2e-07, |
| "loss": 0.0066, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.001870876199861444, |
| "grad_norm": 1.5902459621429443, |
| "learning_rate": 1.999999999575906e-07, |
| "loss": 0.0049, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.0018893997265927455, |
| "grad_norm": 0.25215762853622437, |
| "learning_rate": 1.9999999983036245e-07, |
| "loss": 0.0052, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0019079232533240468, |
| "grad_norm": 0.7512747049331665, |
| "learning_rate": 1.9999999961831556e-07, |
| "loss": 0.0051, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0019264467800553483, |
| "grad_norm": 0.4931435286998749, |
| "learning_rate": 1.9999999932144986e-07, |
| "loss": 0.0052, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.0019449703067866498, |
| "grad_norm": 1.2866597175598145, |
| "learning_rate": 1.9999999893976544e-07, |
| "loss": 0.007, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.001963493833517951, |
| "grad_norm": 1.9010076522827148, |
| "learning_rate": 1.9999999847326223e-07, |
| "loss": 0.0051, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.0019820173602492527, |
| "grad_norm": 0.2680765986442566, |
| "learning_rate": 1.9999999792194023e-07, |
| "loss": 0.0053, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.002000540886980554, |
| "grad_norm": 0.33872854709625244, |
| "learning_rate": 1.9999999728579954e-07, |
| "loss": 0.0061, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0020190644137118552, |
| "grad_norm": 0.5961318612098694, |
| "learning_rate": 1.9999999656484e-07, |
| "loss": 0.0057, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.002037587940443157, |
| "grad_norm": 0.883726954460144, |
| "learning_rate": 1.9999999575906177e-07, |
| "loss": 0.0045, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.002056111467174458, |
| "grad_norm": 1.053317666053772, |
| "learning_rate": 1.9999999486846476e-07, |
| "loss": 0.0054, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.00207463499390576, |
| "grad_norm": 2.944972515106201, |
| "learning_rate": 1.9999999389304896e-07, |
| "loss": 0.0052, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.002093158520637061, |
| "grad_norm": 3.8879315853118896, |
| "learning_rate": 1.999999928328144e-07, |
| "loss": 0.0043, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.0021116820473683624, |
| "grad_norm": 0.7626655101776123, |
| "learning_rate": 1.999999916877611e-07, |
| "loss": 0.0051, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.002130205574099664, |
| "grad_norm": 1.2365458011627197, |
| "learning_rate": 1.9999999045788905e-07, |
| "loss": 0.0069, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0021487291008309654, |
| "grad_norm": 2.149346113204956, |
| "learning_rate": 1.9999998914319823e-07, |
| "loss": 0.006, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.0021672526275622666, |
| "grad_norm": 2.384781837463379, |
| "learning_rate": 1.9999998774368865e-07, |
| "loss": 0.0055, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0021857761542935683, |
| "grad_norm": 0.9366813898086548, |
| "learning_rate": 1.9999998625936034e-07, |
| "loss": 0.0045, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0022042996810248696, |
| "grad_norm": 0.6636898517608643, |
| "learning_rate": 1.9999998469021325e-07, |
| "loss": 0.0053, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.0022228232077561713, |
| "grad_norm": 0.6570383906364441, |
| "learning_rate": 1.999999830362474e-07, |
| "loss": 0.005, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0022413467344874725, |
| "grad_norm": 0.9230858087539673, |
| "learning_rate": 1.9999998129746283e-07, |
| "loss": 0.0045, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.002259870261218774, |
| "grad_norm": 0.6840155720710754, |
| "learning_rate": 1.999999794738595e-07, |
| "loss": 0.0057, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.0022783937879500755, |
| "grad_norm": 0.2627875506877899, |
| "learning_rate": 1.999999775654374e-07, |
| "loss": 0.0044, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.0022969173146813768, |
| "grad_norm": 0.8080741763114929, |
| "learning_rate": 1.9999997557219657e-07, |
| "loss": 0.0063, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0023154408414126785, |
| "grad_norm": 0.6294757127761841, |
| "learning_rate": 1.9999997349413702e-07, |
| "loss": 0.0055, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0023339643681439797, |
| "grad_norm": 0.8624229431152344, |
| "learning_rate": 1.999999713312587e-07, |
| "loss": 0.0056, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.002352487894875281, |
| "grad_norm": 1.3879464864730835, |
| "learning_rate": 1.9999996908356164e-07, |
| "loss": 0.0049, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.0023710114216065827, |
| "grad_norm": 0.8140110969543457, |
| "learning_rate": 1.9999996675104582e-07, |
| "loss": 0.005, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.002389534948337884, |
| "grad_norm": 2.21988582611084, |
| "learning_rate": 1.999999643337113e-07, |
| "loss": 0.0049, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.002408058475069185, |
| "grad_norm": 0.791469931602478, |
| "learning_rate": 1.9999996183155803e-07, |
| "loss": 0.0057, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.002426582001800487, |
| "grad_norm": 0.3285043239593506, |
| "learning_rate": 1.9999995924458603e-07, |
| "loss": 0.005, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.002445105528531788, |
| "grad_norm": 0.7329514026641846, |
| "learning_rate": 1.9999995657279533e-07, |
| "loss": 0.0057, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.00246362905526309, |
| "grad_norm": 0.5092055797576904, |
| "learning_rate": 1.9999995381618584e-07, |
| "loss": 0.006, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.002482152581994391, |
| "grad_norm": 0.7708818912506104, |
| "learning_rate": 1.9999995097475765e-07, |
| "loss": 0.0049, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.0025006761087256924, |
| "grad_norm": 0.9169188141822815, |
| "learning_rate": 1.9999994804851076e-07, |
| "loss": 0.0057, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.002519199635456994, |
| "grad_norm": 0.6490141153335571, |
| "learning_rate": 1.999999450374451e-07, |
| "loss": 0.0051, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.0025377231621882953, |
| "grad_norm": 2.1031227111816406, |
| "learning_rate": 1.9999994194156075e-07, |
| "loss": 0.0046, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.0025562466889195966, |
| "grad_norm": 1.4806420803070068, |
| "learning_rate": 1.999999387608577e-07, |
| "loss": 0.0044, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.0025747702156508983, |
| "grad_norm": 0.5930134057998657, |
| "learning_rate": 1.9999993549533591e-07, |
| "loss": 0.0051, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.0025932937423821995, |
| "grad_norm": 0.5469093322753906, |
| "learning_rate": 1.9999993214499543e-07, |
| "loss": 0.0063, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0026118172691135012, |
| "grad_norm": 0.5781998634338379, |
| "learning_rate": 1.999999287098362e-07, |
| "loss": 0.0046, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.0026303407958448025, |
| "grad_norm": 2.402587652206421, |
| "learning_rate": 1.9999992518985832e-07, |
| "loss": 0.0055, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.0026488643225761038, |
| "grad_norm": 1.2780495882034302, |
| "learning_rate": 1.9999992158506172e-07, |
| "loss": 0.0053, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.0026673878493074055, |
| "grad_norm": 2.1578969955444336, |
| "learning_rate": 1.9999991789544642e-07, |
| "loss": 0.0052, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.0026859113760387067, |
| "grad_norm": 8.007939338684082, |
| "learning_rate": 1.9999991412101242e-07, |
| "loss": 0.0059, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.002704434902770008, |
| "grad_norm": 1.5032520294189453, |
| "learning_rate": 1.9999991026175974e-07, |
| "loss": 0.0052, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.0027229584295013097, |
| "grad_norm": 0.7657321095466614, |
| "learning_rate": 1.9999990631768836e-07, |
| "loss": 0.0041, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.002741481956232611, |
| "grad_norm": 2.3176472187042236, |
| "learning_rate": 1.9999990228879827e-07, |
| "loss": 0.0058, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.0027600054829639126, |
| "grad_norm": 1.3602319955825806, |
| "learning_rate": 1.9999989817508954e-07, |
| "loss": 0.0061, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.002778529009695214, |
| "grad_norm": 0.4337843656539917, |
| "learning_rate": 1.999998939765621e-07, |
| "loss": 0.0049, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.002797052536426515, |
| "grad_norm": 0.9164171814918518, |
| "learning_rate": 1.9999988969321598e-07, |
| "loss": 0.0051, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.002815576063157817, |
| "grad_norm": 0.5593477487564087, |
| "learning_rate": 1.9999988532505122e-07, |
| "loss": 0.0044, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.002834099589889118, |
| "grad_norm": 0.8717262148857117, |
| "learning_rate": 1.9999988087206775e-07, |
| "loss": 0.007, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.00285262311662042, |
| "grad_norm": 0.7482004165649414, |
| "learning_rate": 1.9999987633426566e-07, |
| "loss": 0.0049, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.002871146643351721, |
| "grad_norm": 1.261317491531372, |
| "learning_rate": 1.999998717116449e-07, |
| "loss": 0.0047, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.0028896701700830223, |
| "grad_norm": 0.588097095489502, |
| "learning_rate": 1.9999986700420548e-07, |
| "loss": 0.0051, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.002908193696814324, |
| "grad_norm": 0.9068071246147156, |
| "learning_rate": 1.999998622119474e-07, |
| "loss": 0.0055, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.0029267172235456253, |
| "grad_norm": 1.6236398220062256, |
| "learning_rate": 1.999998573348707e-07, |
| "loss": 0.0054, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0029452407502769265, |
| "grad_norm": 0.26100394129753113, |
| "learning_rate": 1.999998523729753e-07, |
| "loss": 0.0046, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.0029637642770082282, |
| "grad_norm": 1.2977544069290161, |
| "learning_rate": 1.999998473262613e-07, |
| "loss": 0.0055, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.0029822878037395295, |
| "grad_norm": 1.8673232793807983, |
| "learning_rate": 1.9999984219472864e-07, |
| "loss": 0.0057, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.003000811330470831, |
| "grad_norm": 0.5209649205207825, |
| "learning_rate": 1.9999983697837737e-07, |
| "loss": 0.0055, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.0030193348572021324, |
| "grad_norm": 0.88433438539505, |
| "learning_rate": 1.9999983167720746e-07, |
| "loss": 0.0046, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.0030378583839334337, |
| "grad_norm": 0.6278052926063538, |
| "learning_rate": 1.9999982629121895e-07, |
| "loss": 0.0047, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.0030563819106647354, |
| "grad_norm": 0.9479427933692932, |
| "learning_rate": 1.999998208204118e-07, |
| "loss": 0.0057, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0030749054373960367, |
| "grad_norm": 0.38358673453330994, |
| "learning_rate": 1.9999981526478605e-07, |
| "loss": 0.0043, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.003093428964127338, |
| "grad_norm": 0.943699836730957, |
| "learning_rate": 1.999998096243417e-07, |
| "loss": 0.0059, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.0031119524908586396, |
| "grad_norm": 0.695310115814209, |
| "learning_rate": 1.9999980389907872e-07, |
| "loss": 0.0061, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.003130476017589941, |
| "grad_norm": 0.3052780330181122, |
| "learning_rate": 1.9999979808899714e-07, |
| "loss": 0.0045, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.0031489995443212426, |
| "grad_norm": 1.0659457445144653, |
| "learning_rate": 1.9999979219409697e-07, |
| "loss": 0.0056, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.003167523071052544, |
| "grad_norm": 0.7883532643318176, |
| "learning_rate": 1.999997862143782e-07, |
| "loss": 0.0056, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.003186046597783845, |
| "grad_norm": 0.7115182876586914, |
| "learning_rate": 1.9999978014984088e-07, |
| "loss": 0.0063, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.003204570124515147, |
| "grad_norm": 1.8874396085739136, |
| "learning_rate": 1.9999977400048497e-07, |
| "loss": 0.0057, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.003223093651246448, |
| "grad_norm": 0.5432929396629333, |
| "learning_rate": 1.9999976776631046e-07, |
| "loss": 0.0054, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.0032416171779777497, |
| "grad_norm": 0.851771891117096, |
| "learning_rate": 1.999997614473174e-07, |
| "loss": 0.0084, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.003260140704709051, |
| "grad_norm": 0.8765040636062622, |
| "learning_rate": 1.9999975504350578e-07, |
| "loss": 0.0051, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.0032786642314403523, |
| "grad_norm": 2.9423177242279053, |
| "learning_rate": 1.9999974855487562e-07, |
| "loss": 0.0053, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.003297187758171654, |
| "grad_norm": 2.7032599449157715, |
| "learning_rate": 1.999997419814269e-07, |
| "loss": 0.0055, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.0033157112849029552, |
| "grad_norm": 0.7423555850982666, |
| "learning_rate": 1.9999973532315962e-07, |
| "loss": 0.0055, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.0033342348116342565, |
| "grad_norm": 0.6650148034095764, |
| "learning_rate": 1.9999972858007382e-07, |
| "loss": 0.0051, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.003352758338365558, |
| "grad_norm": 1.227732539176941, |
| "learning_rate": 1.9999972175216942e-07, |
| "loss": 0.0055, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.0033712818650968594, |
| "grad_norm": 0.4454581141471863, |
| "learning_rate": 1.9999971483944656e-07, |
| "loss": 0.0054, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.003389805391828161, |
| "grad_norm": 1.0490766763687134, |
| "learning_rate": 1.9999970784190516e-07, |
| "loss": 0.006, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.0034083289185594624, |
| "grad_norm": 0.16727957129478455, |
| "learning_rate": 1.9999970075954523e-07, |
| "loss": 0.0041, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.0034268524452907637, |
| "grad_norm": 0.9306310415267944, |
| "learning_rate": 1.9999969359236682e-07, |
| "loss": 0.0052, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0034453759720220654, |
| "grad_norm": 7.755875110626221, |
| "learning_rate": 1.9999968634036986e-07, |
| "loss": 0.0045, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.0034638994987533666, |
| "grad_norm": 0.8569228053092957, |
| "learning_rate": 1.9999967900355443e-07, |
| "loss": 0.005, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.003482423025484668, |
| "grad_norm": 0.7918545603752136, |
| "learning_rate": 1.999996715819205e-07, |
| "loss": 0.005, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.0035009465522159696, |
| "grad_norm": 0.45743027329444885, |
| "learning_rate": 1.9999966407546806e-07, |
| "loss": 0.0057, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.003519470078947271, |
| "grad_norm": 0.6925662159919739, |
| "learning_rate": 1.9999965648419716e-07, |
| "loss": 0.0047, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.0035379936056785725, |
| "grad_norm": 0.6255524158477783, |
| "learning_rate": 1.999996488081078e-07, |
| "loss": 0.0049, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.003556517132409874, |
| "grad_norm": 1.9690749645233154, |
| "learning_rate": 1.9999964104719997e-07, |
| "loss": 0.0065, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.003575040659141175, |
| "grad_norm": 1.1689437627792358, |
| "learning_rate": 1.9999963320147368e-07, |
| "loss": 0.006, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.0035935641858724767, |
| "grad_norm": 0.7555713057518005, |
| "learning_rate": 1.9999962527092892e-07, |
| "loss": 0.0063, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.003612087712603778, |
| "grad_norm": 0.7352761626243591, |
| "learning_rate": 1.999996172555657e-07, |
| "loss": 0.0049, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.0036306112393350797, |
| "grad_norm": 1.2547731399536133, |
| "learning_rate": 1.9999960915538407e-07, |
| "loss": 0.0051, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.003649134766066381, |
| "grad_norm": 0.8179420828819275, |
| "learning_rate": 1.99999600970384e-07, |
| "loss": 0.0043, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.0036676582927976822, |
| "grad_norm": 1.4426568746566772, |
| "learning_rate": 1.999995927005655e-07, |
| "loss": 0.0055, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.003686181819528984, |
| "grad_norm": 0.6915298104286194, |
| "learning_rate": 1.9999958434592856e-07, |
| "loss": 0.0053, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.003704705346260285, |
| "grad_norm": 1.888800859451294, |
| "learning_rate": 1.9999957590647323e-07, |
| "loss": 0.0052, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0037232288729915864, |
| "grad_norm": 0.723024308681488, |
| "learning_rate": 1.9999956738219949e-07, |
| "loss": 0.0042, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.003741752399722888, |
| "grad_norm": 0.8231233954429626, |
| "learning_rate": 1.9999955877310735e-07, |
| "loss": 0.0053, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0037602759264541894, |
| "grad_norm": 2.150519609451294, |
| "learning_rate": 1.999995500791968e-07, |
| "loss": 0.004, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.003778799453185491, |
| "grad_norm": 0.7455304265022278, |
| "learning_rate": 1.999995413004679e-07, |
| "loss": 0.0043, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.0037973229799167924, |
| "grad_norm": 0.4912494421005249, |
| "learning_rate": 1.9999953243692063e-07, |
| "loss": 0.0051, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.0038158465066480936, |
| "grad_norm": 1.3348478078842163, |
| "learning_rate": 1.9999952348855495e-07, |
| "loss": 0.0049, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.0038343700333793953, |
| "grad_norm": 1.7985830307006836, |
| "learning_rate": 1.9999951445537092e-07, |
| "loss": 0.005, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0038528935601106966, |
| "grad_norm": 0.8237053751945496, |
| "learning_rate": 1.9999950533736856e-07, |
| "loss": 0.0055, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.003871417086841998, |
| "grad_norm": 1.7806153297424316, |
| "learning_rate": 1.9999949613454784e-07, |
| "loss": 0.0056, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.0038899406135732995, |
| "grad_norm": 1.068915843963623, |
| "learning_rate": 1.9999948684690878e-07, |
| "loss": 0.0046, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.003908464140304601, |
| "grad_norm": 0.7020597457885742, |
| "learning_rate": 1.999994774744514e-07, |
| "loss": 0.0059, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.003926987667035902, |
| "grad_norm": 0.2925936281681061, |
| "learning_rate": 1.9999946801717568e-07, |
| "loss": 0.0049, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.003945511193767203, |
| "grad_norm": 1.531053066253662, |
| "learning_rate": 1.9999945847508165e-07, |
| "loss": 0.0062, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.0039640347204985054, |
| "grad_norm": 1.1193791627883911, |
| "learning_rate": 1.9999944884816932e-07, |
| "loss": 0.0052, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.003982558247229807, |
| "grad_norm": 1.5744069814682007, |
| "learning_rate": 1.999994391364387e-07, |
| "loss": 0.0059, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.004001081773961108, |
| "grad_norm": 0.5359967350959778, |
| "learning_rate": 1.9999942933988977e-07, |
| "loss": 0.0039, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.004019605300692409, |
| "grad_norm": 0.6087894439697266, |
| "learning_rate": 1.9999941945852257e-07, |
| "loss": 0.0068, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.0040381288274237105, |
| "grad_norm": 1.3726937770843506, |
| "learning_rate": 1.9999940949233712e-07, |
| "loss": 0.0056, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.004056652354155013, |
| "grad_norm": 0.3861100673675537, |
| "learning_rate": 1.9999939944133337e-07, |
| "loss": 0.0045, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.004075175880886314, |
| "grad_norm": 0.9140152335166931, |
| "learning_rate": 1.9999938930551136e-07, |
| "loss": 0.005, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.004093699407617615, |
| "grad_norm": 0.4741251468658447, |
| "learning_rate": 1.9999937908487115e-07, |
| "loss": 0.0054, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.004112222934348916, |
| "grad_norm": 1.070580244064331, |
| "learning_rate": 1.999993687794127e-07, |
| "loss": 0.0045, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.004130746461080218, |
| "grad_norm": 1.9602667093276978, |
| "learning_rate": 1.9999935838913595e-07, |
| "loss": 0.0061, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.00414926998781152, |
| "grad_norm": 0.716974139213562, |
| "learning_rate": 1.9999934791404104e-07, |
| "loss": 0.0065, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.004167793514542821, |
| "grad_norm": 0.4090704619884491, |
| "learning_rate": 1.9999933735412787e-07, |
| "loss": 0.0041, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.004186317041274122, |
| "grad_norm": 1.1619179248809814, |
| "learning_rate": 1.9999932670939653e-07, |
| "loss": 0.0061, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.0042048405680054236, |
| "grad_norm": 1.9769097566604614, |
| "learning_rate": 1.99999315979847e-07, |
| "loss": 0.006, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.004223364094736725, |
| "grad_norm": 0.9041718244552612, |
| "learning_rate": 1.9999930516547928e-07, |
| "loss": 0.0047, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.004241887621468027, |
| "grad_norm": 0.16252444684505463, |
| "learning_rate": 1.999992942662934e-07, |
| "loss": 0.004, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.004260411148199328, |
| "grad_norm": 9.678218841552734, |
| "learning_rate": 1.999992832822893e-07, |
| "loss": 0.0049, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.0042789346749306295, |
| "grad_norm": 1.4154443740844727, |
| "learning_rate": 1.999992722134671e-07, |
| "loss": 0.0056, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.004297458201661931, |
| "grad_norm": 0.8507960438728333, |
| "learning_rate": 1.9999926105982671e-07, |
| "loss": 0.0053, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.004315981728393232, |
| "grad_norm": 0.5233428478240967, |
| "learning_rate": 1.9999924982136819e-07, |
| "loss": 0.0049, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.004334505255124533, |
| "grad_norm": 1.7477030754089355, |
| "learning_rate": 1.9999923849809156e-07, |
| "loss": 0.0059, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.004353028781855835, |
| "grad_norm": 0.7653055787086487, |
| "learning_rate": 1.9999922708999682e-07, |
| "loss": 0.0046, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.004371552308587137, |
| "grad_norm": 0.8168227076530457, |
| "learning_rate": 1.9999921559708396e-07, |
| "loss": 0.0049, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.004390075835318438, |
| "grad_norm": 0.8274291157722473, |
| "learning_rate": 1.9999920401935297e-07, |
| "loss": 0.0043, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.004408599362049739, |
| "grad_norm": 0.38084548711776733, |
| "learning_rate": 1.9999919235680392e-07, |
| "loss": 0.0049, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.00442712288878104, |
| "grad_norm": 1.6642783880233765, |
| "learning_rate": 1.9999918060943677e-07, |
| "loss": 0.0045, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.0044456464155123426, |
| "grad_norm": 1.0011886358261108, |
| "learning_rate": 1.9999916877725158e-07, |
| "loss": 0.0047, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.004464169942243644, |
| "grad_norm": 1.3866627216339111, |
| "learning_rate": 1.9999915686024828e-07, |
| "loss": 0.0046, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.004482693468974945, |
| "grad_norm": 1.1994725465774536, |
| "learning_rate": 1.9999914485842698e-07, |
| "loss": 0.0056, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.004501216995706246, |
| "grad_norm": 0.9241150617599487, |
| "learning_rate": 1.9999913277178761e-07, |
| "loss": 0.0048, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.004519740522437548, |
| "grad_norm": 0.8636120557785034, |
| "learning_rate": 1.9999912060033024e-07, |
| "loss": 0.0051, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.00453826404916885, |
| "grad_norm": 1.1372368335723877, |
| "learning_rate": 1.9999910834405482e-07, |
| "loss": 0.0055, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.004556787575900151, |
| "grad_norm": 0.6265618801116943, |
| "learning_rate": 1.9999909600296138e-07, |
| "loss": 0.0057, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.004575311102631452, |
| "grad_norm": 0.8580017685890198, |
| "learning_rate": 1.9999908357704998e-07, |
| "loss": 0.0048, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.0045938346293627535, |
| "grad_norm": 1.852146863937378, |
| "learning_rate": 1.999990710663206e-07, |
| "loss": 0.0054, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.004612358156094055, |
| "grad_norm": 1.1779755353927612, |
| "learning_rate": 1.999990584707732e-07, |
| "loss": 0.0048, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.004630881682825357, |
| "grad_norm": 0.8981501460075378, |
| "learning_rate": 1.9999904579040786e-07, |
| "loss": 0.0052, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.004649405209556658, |
| "grad_norm": 1.129531979560852, |
| "learning_rate": 1.9999903302522454e-07, |
| "loss": 0.006, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.004667928736287959, |
| "grad_norm": 2.5348591804504395, |
| "learning_rate": 1.999990201752233e-07, |
| "loss": 0.0064, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.004686452263019261, |
| "grad_norm": 0.21628016233444214, |
| "learning_rate": 1.9999900724040414e-07, |
| "loss": 0.0051, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.004704975789750562, |
| "grad_norm": 1.3315670490264893, |
| "learning_rate": 1.99998994220767e-07, |
| "loss": 0.0042, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.004723499316481863, |
| "grad_norm": 0.9182688593864441, |
| "learning_rate": 1.99998981116312e-07, |
| "loss": 0.0055, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.004742022843213165, |
| "grad_norm": 1.2962735891342163, |
| "learning_rate": 1.9999896792703908e-07, |
| "loss": 0.0051, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.004760546369944467, |
| "grad_norm": 7.547693252563477, |
| "learning_rate": 1.9999895465294827e-07, |
| "loss": 0.0044, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.004779069896675768, |
| "grad_norm": 1.5398882627487183, |
| "learning_rate": 1.999989412940396e-07, |
| "loss": 0.0043, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.004797593423407069, |
| "grad_norm": 1.5096334218978882, |
| "learning_rate": 1.99998927850313e-07, |
| "loss": 0.0045, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.00481611695013837, |
| "grad_norm": 0.874131977558136, |
| "learning_rate": 1.999989143217686e-07, |
| "loss": 0.0039, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.0048346404768696725, |
| "grad_norm": 3.5819127559661865, |
| "learning_rate": 1.9999890070840634e-07, |
| "loss": 0.0058, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.004853164003600974, |
| "grad_norm": 0.8997588753700256, |
| "learning_rate": 1.9999888701022626e-07, |
| "loss": 0.005, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.004871687530332275, |
| "grad_norm": 1.1501762866973877, |
| "learning_rate": 1.9999887322722835e-07, |
| "loss": 0.0048, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.004890211057063576, |
| "grad_norm": 0.8608025908470154, |
| "learning_rate": 1.9999885935941263e-07, |
| "loss": 0.0046, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.0049087345837948776, |
| "grad_norm": 4.227169990539551, |
| "learning_rate": 1.9999884540677909e-07, |
| "loss": 0.004, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.00492725811052618, |
| "grad_norm": 0.6507948040962219, |
| "learning_rate": 1.999988313693278e-07, |
| "loss": 0.0047, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.004945781637257481, |
| "grad_norm": 0.269436240196228, |
| "learning_rate": 1.9999881724705872e-07, |
| "loss": 0.0059, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.004964305163988782, |
| "grad_norm": 0.5552330017089844, |
| "learning_rate": 1.9999880303997187e-07, |
| "loss": 0.0048, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.0049828286907200835, |
| "grad_norm": 0.48505863547325134, |
| "learning_rate": 1.9999878874806727e-07, |
| "loss": 0.0053, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.005001352217451385, |
| "grad_norm": 0.791957437992096, |
| "learning_rate": 1.9999877437134498e-07, |
| "loss": 0.0051, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.005019875744182687, |
| "grad_norm": 1.0681192874908447, |
| "learning_rate": 1.9999875990980493e-07, |
| "loss": 0.0064, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.005038399270913988, |
| "grad_norm": 0.896776556968689, |
| "learning_rate": 1.9999874536344714e-07, |
| "loss": 0.0056, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.005056922797645289, |
| "grad_norm": 1.3150254487991333, |
| "learning_rate": 1.9999873073227167e-07, |
| "loss": 0.0045, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.005075446324376591, |
| "grad_norm": 0.9047895073890686, |
| "learning_rate": 1.999987160162785e-07, |
| "loss": 0.0044, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.005093969851107892, |
| "grad_norm": 1.2773643732070923, |
| "learning_rate": 1.9999870121546768e-07, |
| "loss": 0.0043, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.005112493377839193, |
| "grad_norm": 0.935293436050415, |
| "learning_rate": 1.9999868632983917e-07, |
| "loss": 0.0048, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.005131016904570495, |
| "grad_norm": 2.0093040466308594, |
| "learning_rate": 1.9999867135939302e-07, |
| "loss": 0.0063, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.0051495404313017966, |
| "grad_norm": 0.46760520339012146, |
| "learning_rate": 1.9999865630412923e-07, |
| "loss": 0.0044, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.005168063958033098, |
| "grad_norm": 0.5718618631362915, |
| "learning_rate": 1.9999864116404782e-07, |
| "loss": 0.0045, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.005186587484764399, |
| "grad_norm": 0.9216085076332092, |
| "learning_rate": 1.999986259391488e-07, |
| "loss": 0.0053, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.0052051110114957, |
| "grad_norm": 0.9476675987243652, |
| "learning_rate": 1.999986106294322e-07, |
| "loss": 0.0039, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.0052236345382270025, |
| "grad_norm": 0.8792651891708374, |
| "learning_rate": 1.9999859523489796e-07, |
| "loss": 0.0045, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.005242158064958304, |
| "grad_norm": 0.669017493724823, |
| "learning_rate": 1.999985797555462e-07, |
| "loss": 0.0043, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.005260681591689605, |
| "grad_norm": 0.9229434728622437, |
| "learning_rate": 1.9999856419137685e-07, |
| "loss": 0.0042, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.005279205118420906, |
| "grad_norm": 0.9118908047676086, |
| "learning_rate": 1.9999854854238994e-07, |
| "loss": 0.0044, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.0052977286451522075, |
| "grad_norm": 1.455817699432373, |
| "learning_rate": 1.9999853280858555e-07, |
| "loss": 0.0051, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.00531625217188351, |
| "grad_norm": 0.6333860754966736, |
| "learning_rate": 1.9999851698996357e-07, |
| "loss": 0.0038, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.005334775698614811, |
| "grad_norm": 1.3585294485092163, |
| "learning_rate": 1.9999850108652413e-07, |
| "loss": 0.0045, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.005353299225346112, |
| "grad_norm": 1.1225873231887817, |
| "learning_rate": 1.9999848509826718e-07, |
| "loss": 0.0067, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.005371822752077413, |
| "grad_norm": 1.4071152210235596, |
| "learning_rate": 1.9999846902519274e-07, |
| "loss": 0.0062, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.005390346278808715, |
| "grad_norm": 2.3899426460266113, |
| "learning_rate": 1.9999845286730084e-07, |
| "loss": 0.0049, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.005408869805540016, |
| "grad_norm": 1.3004745244979858, |
| "learning_rate": 1.999984366245915e-07, |
| "loss": 0.0055, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.005427393332271318, |
| "grad_norm": 1.381594181060791, |
| "learning_rate": 1.999984202970647e-07, |
| "loss": 0.0051, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.005445916859002619, |
| "grad_norm": 1.4161776304244995, |
| "learning_rate": 1.9999840388472048e-07, |
| "loss": 0.0042, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.005464440385733921, |
| "grad_norm": 0.3958333432674408, |
| "learning_rate": 1.9999838738755886e-07, |
| "loss": 0.0045, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.005482963912465222, |
| "grad_norm": 0.7790775895118713, |
| "learning_rate": 1.9999837080557985e-07, |
| "loss": 0.0051, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.005501487439196523, |
| "grad_norm": 0.958569347858429, |
| "learning_rate": 1.9999835413878344e-07, |
| "loss": 0.0039, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.005520010965927825, |
| "grad_norm": 1.5460960865020752, |
| "learning_rate": 1.9999833738716965e-07, |
| "loss": 0.0056, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.0055385344926591265, |
| "grad_norm": 0.8738213777542114, |
| "learning_rate": 1.999983205507385e-07, |
| "loss": 0.0041, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.005557058019390428, |
| "grad_norm": 2.061203718185425, |
| "learning_rate": 1.9999830362949006e-07, |
| "loss": 0.0049, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.005575581546121729, |
| "grad_norm": 1.1606186628341675, |
| "learning_rate": 1.9999828662342426e-07, |
| "loss": 0.0048, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.00559410507285303, |
| "grad_norm": 1.3103594779968262, |
| "learning_rate": 1.9999826953254114e-07, |
| "loss": 0.0048, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.005612628599584332, |
| "grad_norm": 0.8851433396339417, |
| "learning_rate": 1.9999825235684074e-07, |
| "loss": 0.0046, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.005631152126315634, |
| "grad_norm": 0.7132815718650818, |
| "learning_rate": 1.9999823509632305e-07, |
| "loss": 0.0041, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.005649675653046935, |
| "grad_norm": 1.057056188583374, |
| "learning_rate": 1.9999821775098807e-07, |
| "loss": 0.005, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.005668199179778236, |
| "grad_norm": 1.0691920518875122, |
| "learning_rate": 1.9999820032083588e-07, |
| "loss": 0.0044, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.0056867227065095375, |
| "grad_norm": 0.327333927154541, |
| "learning_rate": 1.9999818280586642e-07, |
| "loss": 0.0042, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.00570524623324084, |
| "grad_norm": 0.7470158934593201, |
| "learning_rate": 1.9999816520607973e-07, |
| "loss": 0.0041, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.005723769759972141, |
| "grad_norm": 0.6722580194473267, |
| "learning_rate": 1.9999814752147585e-07, |
| "loss": 0.0041, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.005742293286703442, |
| "grad_norm": 2.096712350845337, |
| "learning_rate": 1.9999812975205478e-07, |
| "loss": 0.0057, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.005760816813434743, |
| "grad_norm": 1.4661240577697754, |
| "learning_rate": 1.999981118978165e-07, |
| "loss": 0.0054, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.005779340340166045, |
| "grad_norm": 0.30769485235214233, |
| "learning_rate": 1.999980939587611e-07, |
| "loss": 0.0051, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.005797863866897346, |
| "grad_norm": 0.7385175228118896, |
| "learning_rate": 1.9999807593488852e-07, |
| "loss": 0.0053, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.005816387393628648, |
| "grad_norm": 2.1081535816192627, |
| "learning_rate": 1.9999805782619883e-07, |
| "loss": 0.0061, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.005834910920359949, |
| "grad_norm": 0.7908421754837036, |
| "learning_rate": 1.99998039632692e-07, |
| "loss": 0.0054, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.0058534344470912505, |
| "grad_norm": 0.39774444699287415, |
| "learning_rate": 1.9999802135436808e-07, |
| "loss": 0.0052, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.005871957973822552, |
| "grad_norm": 1.0579779148101807, |
| "learning_rate": 1.9999800299122707e-07, |
| "loss": 0.0055, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.005890481500553853, |
| "grad_norm": 1.3338305950164795, |
| "learning_rate": 1.9999798454326897e-07, |
| "loss": 0.0072, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.005909005027285155, |
| "grad_norm": 0.5270975828170776, |
| "learning_rate": 1.9999796601049384e-07, |
| "loss": 0.0047, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.0059275285540164565, |
| "grad_norm": 1.0779296159744263, |
| "learning_rate": 1.9999794739290167e-07, |
| "loss": 0.0043, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.005946052080747758, |
| "grad_norm": 0.4525056779384613, |
| "learning_rate": 1.9999792869049246e-07, |
| "loss": 0.0043, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.005964575607479059, |
| "grad_norm": 6.339492321014404, |
| "learning_rate": 1.9999790990326625e-07, |
| "loss": 0.0047, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.00598309913421036, |
| "grad_norm": 0.6705578565597534, |
| "learning_rate": 1.9999789103122305e-07, |
| "loss": 0.0041, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.006001622660941662, |
| "grad_norm": 0.5262556076049805, |
| "learning_rate": 1.9999787207436288e-07, |
| "loss": 0.005, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.006020146187672964, |
| "grad_norm": 1.3247629404067993, |
| "learning_rate": 1.9999785303268572e-07, |
| "loss": 0.0051, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.006038669714404265, |
| "grad_norm": 1.1291422843933105, |
| "learning_rate": 1.9999783390619163e-07, |
| "loss": 0.0042, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.006057193241135566, |
| "grad_norm": 3.261279821395874, |
| "learning_rate": 1.9999781469488063e-07, |
| "loss": 0.0046, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.006075716767866867, |
| "grad_norm": 1.149993896484375, |
| "learning_rate": 1.999977953987527e-07, |
| "loss": 0.0049, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.0060942402945981695, |
| "grad_norm": 1.764302372932434, |
| "learning_rate": 1.9999777601780789e-07, |
| "loss": 0.0047, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.006112763821329471, |
| "grad_norm": 1.9914242029190063, |
| "learning_rate": 1.9999775655204618e-07, |
| "loss": 0.0056, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.006131287348060772, |
| "grad_norm": 0.5566918253898621, |
| "learning_rate": 1.999977370014676e-07, |
| "loss": 0.0053, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.006149810874792073, |
| "grad_norm": 0.6487569212913513, |
| "learning_rate": 1.999977173660722e-07, |
| "loss": 0.0056, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.006168334401523375, |
| "grad_norm": 0.6536451578140259, |
| "learning_rate": 1.9999769764585998e-07, |
| "loss": 0.005, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.006186857928254676, |
| "grad_norm": 0.5939210057258606, |
| "learning_rate": 1.9999767784083093e-07, |
| "loss": 0.0051, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.006205381454985978, |
| "grad_norm": 0.661088764667511, |
| "learning_rate": 1.9999765795098508e-07, |
| "loss": 0.0048, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.006223904981717279, |
| "grad_norm": 1.5042343139648438, |
| "learning_rate": 1.9999763797632246e-07, |
| "loss": 0.0049, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.0062424285084485805, |
| "grad_norm": 1.408437967300415, |
| "learning_rate": 1.9999761791684308e-07, |
| "loss": 0.0066, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.006260952035179882, |
| "grad_norm": 1.376222014427185, |
| "learning_rate": 1.9999759777254694e-07, |
| "loss": 0.0044, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.006279475561911183, |
| "grad_norm": 1.3451160192489624, |
| "learning_rate": 1.9999757754343407e-07, |
| "loss": 0.0046, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.006297999088642485, |
| "grad_norm": 0.9029920697212219, |
| "learning_rate": 1.999975572295045e-07, |
| "loss": 0.0051, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.006316522615373786, |
| "grad_norm": 0.5186226963996887, |
| "learning_rate": 1.9999753683075827e-07, |
| "loss": 0.0041, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.006335046142105088, |
| "grad_norm": 1.0144044160842896, |
| "learning_rate": 1.9999751634719532e-07, |
| "loss": 0.006, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.006353569668836389, |
| "grad_norm": 1.5741573572158813, |
| "learning_rate": 1.999974957788157e-07, |
| "loss": 0.0053, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.00637209319556769, |
| "grad_norm": 1.4413450956344604, |
| "learning_rate": 1.9999747512561948e-07, |
| "loss": 0.0061, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.006390616722298992, |
| "grad_norm": 1.8290027379989624, |
| "learning_rate": 1.999974543876066e-07, |
| "loss": 0.0055, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.006409140249030294, |
| "grad_norm": 1.3130360841751099, |
| "learning_rate": 1.9999743356477713e-07, |
| "loss": 0.0043, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.006427663775761595, |
| "grad_norm": 1.1752779483795166, |
| "learning_rate": 1.999974126571311e-07, |
| "loss": 0.0046, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.006446187302492896, |
| "grad_norm": 1.6620230674743652, |
| "learning_rate": 1.9999739166466845e-07, |
| "loss": 0.0056, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.006464710829224197, |
| "grad_norm": 1.2153129577636719, |
| "learning_rate": 1.9999737058738927e-07, |
| "loss": 0.0055, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.0064832343559554995, |
| "grad_norm": 0.49758902192115784, |
| "learning_rate": 1.9999734942529356e-07, |
| "loss": 0.0052, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.006501757882686801, |
| "grad_norm": 1.0197575092315674, |
| "learning_rate": 1.9999732817838134e-07, |
| "loss": 0.0056, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.006520281409418102, |
| "grad_norm": 0.8856931328773499, |
| "learning_rate": 1.999973068466526e-07, |
| "loss": 0.0041, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.006538804936149403, |
| "grad_norm": 0.7209140062332153, |
| "learning_rate": 1.9999728543010738e-07, |
| "loss": 0.0044, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.0065573284628807045, |
| "grad_norm": 0.9796051383018494, |
| "learning_rate": 1.9999726392874573e-07, |
| "loss": 0.0044, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.006575851989612006, |
| "grad_norm": 1.0534104108810425, |
| "learning_rate": 1.999972423425676e-07, |
| "loss": 0.0051, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.006594375516343308, |
| "grad_norm": 0.42800286412239075, |
| "learning_rate": 1.9999722067157303e-07, |
| "loss": 0.0053, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.006612899043074609, |
| "grad_norm": 0.625129222869873, |
| "learning_rate": 1.999971989157621e-07, |
| "loss": 0.0049, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.0066314225698059105, |
| "grad_norm": 1.3979207277297974, |
| "learning_rate": 1.9999717707513475e-07, |
| "loss": 0.0044, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.006649946096537212, |
| "grad_norm": 1.9017460346221924, |
| "learning_rate": 1.9999715514969102e-07, |
| "loss": 0.0063, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.006668469623268513, |
| "grad_norm": 0.6765379309654236, |
| "learning_rate": 1.9999713313943096e-07, |
| "loss": 0.0048, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.006686993149999815, |
| "grad_norm": 1.4709538221359253, |
| "learning_rate": 1.9999711104435458e-07, |
| "loss": 0.0045, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.006705516676731116, |
| "grad_norm": 2.09368896484375, |
| "learning_rate": 1.9999708886446186e-07, |
| "loss": 0.0047, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.006724040203462418, |
| "grad_norm": 0.8782196640968323, |
| "learning_rate": 1.9999706659975284e-07, |
| "loss": 0.0043, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.006742563730193719, |
| "grad_norm": 0.948312520980835, |
| "learning_rate": 1.9999704425022755e-07, |
| "loss": 0.0051, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.00676108725692502, |
| "grad_norm": 3.337427854537964, |
| "learning_rate": 1.99997021815886e-07, |
| "loss": 0.0056, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.006779610783656322, |
| "grad_norm": 0.8315445184707642, |
| "learning_rate": 1.9999699929672822e-07, |
| "loss": 0.0053, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.0067981343103876235, |
| "grad_norm": 0.620729923248291, |
| "learning_rate": 1.999969766927542e-07, |
| "loss": 0.0046, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.006816657837118925, |
| "grad_norm": 1.029213547706604, |
| "learning_rate": 1.9999695400396401e-07, |
| "loss": 0.0056, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.006835181363850226, |
| "grad_norm": 0.3915248513221741, |
| "learning_rate": 1.999969312303576e-07, |
| "loss": 0.0047, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.006853704890581527, |
| "grad_norm": 1.6428319215774536, |
| "learning_rate": 1.9999690837193505e-07, |
| "loss": 0.0045, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.0068722284173128294, |
| "grad_norm": 0.5545074343681335, |
| "learning_rate": 1.9999688542869637e-07, |
| "loss": 0.0046, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.006890751944044131, |
| "grad_norm": 0.47737884521484375, |
| "learning_rate": 1.9999686240064154e-07, |
| "loss": 0.0044, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.006909275470775432, |
| "grad_norm": 0.8470133543014526, |
| "learning_rate": 1.9999683928777062e-07, |
| "loss": 0.0072, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.006927798997506733, |
| "grad_norm": 1.68419349193573, |
| "learning_rate": 1.999968160900836e-07, |
| "loss": 0.0057, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.0069463225242380345, |
| "grad_norm": 0.7402858138084412, |
| "learning_rate": 1.9999679280758056e-07, |
| "loss": 0.0051, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.006964846050969336, |
| "grad_norm": 1.7464038133621216, |
| "learning_rate": 1.9999676944026144e-07, |
| "loss": 0.0041, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.006983369577700638, |
| "grad_norm": 1.3768118619918823, |
| "learning_rate": 1.999967459881263e-07, |
| "loss": 0.0045, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.007001893104431939, |
| "grad_norm": 0.40433743596076965, |
| "learning_rate": 1.9999672245117515e-07, |
| "loss": 0.0033, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.00702041663116324, |
| "grad_norm": 1.2718610763549805, |
| "learning_rate": 1.9999669882940802e-07, |
| "loss": 0.005, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.007038940157894542, |
| "grad_norm": 1.7019349336624146, |
| "learning_rate": 1.9999667512282489e-07, |
| "loss": 0.0052, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.007057463684625843, |
| "grad_norm": 1.3705981969833374, |
| "learning_rate": 1.9999665133142588e-07, |
| "loss": 0.0044, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.007075987211357145, |
| "grad_norm": 0.5234670042991638, |
| "learning_rate": 1.999966274552109e-07, |
| "loss": 0.0049, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.007094510738088446, |
| "grad_norm": 1.444151759147644, |
| "learning_rate": 1.9999660349418002e-07, |
| "loss": 0.0047, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.007113034264819748, |
| "grad_norm": 1.250465989112854, |
| "learning_rate": 1.999965794483333e-07, |
| "loss": 0.0049, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.007131557791551049, |
| "grad_norm": 1.5127027034759521, |
| "learning_rate": 1.9999655531767067e-07, |
| "loss": 0.0061, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.00715008131828235, |
| "grad_norm": 1.0191987752914429, |
| "learning_rate": 1.999965311021922e-07, |
| "loss": 0.0042, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.007168604845013652, |
| "grad_norm": 0.94724440574646, |
| "learning_rate": 1.999965068018979e-07, |
| "loss": 0.0077, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.0071871283717449535, |
| "grad_norm": 0.9621548056602478, |
| "learning_rate": 1.9999648241678782e-07, |
| "loss": 0.005, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.007205651898476255, |
| "grad_norm": 1.3939456939697266, |
| "learning_rate": 1.9999645794686195e-07, |
| "loss": 0.0053, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.007224175425207556, |
| "grad_norm": 1.8091320991516113, |
| "learning_rate": 1.9999643339212032e-07, |
| "loss": 0.0065, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.007242698951938857, |
| "grad_norm": 0.5781366229057312, |
| "learning_rate": 1.9999640875256295e-07, |
| "loss": 0.0054, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.007261222478670159, |
| "grad_norm": 0.626268208026886, |
| "learning_rate": 1.9999638402818984e-07, |
| "loss": 0.0054, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.007279746005401461, |
| "grad_norm": 0.8427907824516296, |
| "learning_rate": 1.9999635921900105e-07, |
| "loss": 0.0044, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.007298269532132762, |
| "grad_norm": 0.8691850304603577, |
| "learning_rate": 1.999963343249966e-07, |
| "loss": 0.0052, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.007316793058864063, |
| "grad_norm": 1.103049397468567, |
| "learning_rate": 1.9999630934617646e-07, |
| "loss": 0.0054, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.0073353165855953644, |
| "grad_norm": 1.3710514307022095, |
| "learning_rate": 1.9999628428254071e-07, |
| "loss": 0.0065, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.007353840112326666, |
| "grad_norm": 0.7242420315742493, |
| "learning_rate": 1.9999625913408934e-07, |
| "loss": 0.0057, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.007372363639057968, |
| "grad_norm": 1.1996089220046997, |
| "learning_rate": 1.9999623390082236e-07, |
| "loss": 0.0046, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.007390887165789269, |
| "grad_norm": 1.4444879293441772, |
| "learning_rate": 1.9999620858273985e-07, |
| "loss": 0.0049, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.00740941069252057, |
| "grad_norm": 1.1874390840530396, |
| "learning_rate": 1.9999618317984176e-07, |
| "loss": 0.004, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.007427934219251872, |
| "grad_norm": 0.9472229480743408, |
| "learning_rate": 1.9999615769212812e-07, |
| "loss": 0.0038, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.007446457745983173, |
| "grad_norm": 0.5600486993789673, |
| "learning_rate": 1.99996132119599e-07, |
| "loss": 0.0034, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.007464981272714475, |
| "grad_norm": 0.6269398331642151, |
| "learning_rate": 1.999961064622544e-07, |
| "loss": 0.005, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.007483504799445776, |
| "grad_norm": 1.4484384059906006, |
| "learning_rate": 1.9999608072009435e-07, |
| "loss": 0.0053, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.0075020283261770775, |
| "grad_norm": 0.8751400709152222, |
| "learning_rate": 1.9999605489311884e-07, |
| "loss": 0.0049, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.007520551852908379, |
| "grad_norm": 0.8875912427902222, |
| "learning_rate": 1.999960289813279e-07, |
| "loss": 0.0048, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.00753907537963968, |
| "grad_norm": 1.4428391456604004, |
| "learning_rate": 1.999960029847216e-07, |
| "loss": 0.0043, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.007557598906370982, |
| "grad_norm": 0.790433943271637, |
| "learning_rate": 1.999959769032999e-07, |
| "loss": 0.0042, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.0075761224331022834, |
| "grad_norm": 0.8253072500228882, |
| "learning_rate": 1.9999595073706284e-07, |
| "loss": 0.005, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.007594645959833585, |
| "grad_norm": 0.582712709903717, |
| "learning_rate": 1.9999592448601046e-07, |
| "loss": 0.0062, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.007613169486564886, |
| "grad_norm": 0.4836924970149994, |
| "learning_rate": 1.9999589815014274e-07, |
| "loss": 0.0054, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.007631693013296187, |
| "grad_norm": 0.7537421584129333, |
| "learning_rate": 1.9999587172945977e-07, |
| "loss": 0.0044, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.0076502165400274885, |
| "grad_norm": 0.68345707654953, |
| "learning_rate": 1.9999584522396153e-07, |
| "loss": 0.0061, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.007668740066758791, |
| "grad_norm": 1.3512098789215088, |
| "learning_rate": 1.9999581863364808e-07, |
| "loss": 0.0046, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.007687263593490092, |
| "grad_norm": 0.40522634983062744, |
| "learning_rate": 1.9999579195851937e-07, |
| "loss": 0.0051, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.007705787120221393, |
| "grad_norm": 1.8822197914123535, |
| "learning_rate": 1.9999576519857547e-07, |
| "loss": 0.0053, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.007724310646952694, |
| "grad_norm": 1.395050287246704, |
| "learning_rate": 1.999957383538164e-07, |
| "loss": 0.0057, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.007742834173683996, |
| "grad_norm": 0.6531908512115479, |
| "learning_rate": 1.999957114242422e-07, |
| "loss": 0.0044, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.007761357700415298, |
| "grad_norm": 1.163049340248108, |
| "learning_rate": 1.9999568440985283e-07, |
| "loss": 0.0038, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.007779881227146599, |
| "grad_norm": 0.6923274993896484, |
| "learning_rate": 1.9999565731064837e-07, |
| "loss": 0.004, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.0077984047538779, |
| "grad_norm": 1.1693150997161865, |
| "learning_rate": 1.9999563012662883e-07, |
| "loss": 0.0066, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.007816928280609202, |
| "grad_norm": 0.5887753367424011, |
| "learning_rate": 1.9999560285779423e-07, |
| "loss": 0.0061, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.007835451807340504, |
| "grad_norm": 1.0952030420303345, |
| "learning_rate": 1.9999557550414462e-07, |
| "loss": 0.0049, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.007853975334071804, |
| "grad_norm": 1.2115508317947388, |
| "learning_rate": 1.9999554806567995e-07, |
| "loss": 0.0052, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.007872498860803106, |
| "grad_norm": 0.5822485089302063, |
| "learning_rate": 1.9999552054240035e-07, |
| "loss": 0.0047, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.007891022387534407, |
| "grad_norm": 2.5040669441223145, |
| "learning_rate": 1.9999549293430574e-07, |
| "loss": 0.0052, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.007909545914265709, |
| "grad_norm": 1.0125981569290161, |
| "learning_rate": 1.9999546524139622e-07, |
| "loss": 0.0056, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.007928069440997011, |
| "grad_norm": 0.8981004953384399, |
| "learning_rate": 1.9999543746367175e-07, |
| "loss": 0.0037, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.007946592967728311, |
| "grad_norm": 0.6215224862098694, |
| "learning_rate": 1.999954096011324e-07, |
| "loss": 0.0052, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.007965116494459613, |
| "grad_norm": 1.0108771324157715, |
| "learning_rate": 1.9999538165377816e-07, |
| "loss": 0.0055, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.007983640021190914, |
| "grad_norm": 2.2663819789886475, |
| "learning_rate": 1.999953536216091e-07, |
| "loss": 0.0055, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.008002163547922216, |
| "grad_norm": 1.5759721994400024, |
| "learning_rate": 1.999953255046252e-07, |
| "loss": 0.0037, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.008020687074653518, |
| "grad_norm": 1.0464463233947754, |
| "learning_rate": 1.9999529730282649e-07, |
| "loss": 0.0059, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.008039210601384818, |
| "grad_norm": 0.29625359177589417, |
| "learning_rate": 1.9999526901621299e-07, |
| "loss": 0.0053, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.00805773412811612, |
| "grad_norm": 0.6446239352226257, |
| "learning_rate": 1.9999524064478476e-07, |
| "loss": 0.0051, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.008076257654847421, |
| "grad_norm": 0.7770497798919678, |
| "learning_rate": 1.9999521218854182e-07, |
| "loss": 0.0044, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.008094781181578723, |
| "grad_norm": 1.2534641027450562, |
| "learning_rate": 1.9999518364748415e-07, |
| "loss": 0.0056, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.008113304708310025, |
| "grad_norm": 1.418199896812439, |
| "learning_rate": 1.9999515502161183e-07, |
| "loss": 0.0035, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.008131828235041326, |
| "grad_norm": 0.65910404920578, |
| "learning_rate": 1.9999512631092482e-07, |
| "loss": 0.0043, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.008150351761772628, |
| "grad_norm": 0.7953601479530334, |
| "learning_rate": 1.999950975154232e-07, |
| "loss": 0.0056, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.008168875288503928, |
| "grad_norm": 0.41441935300827026, |
| "learning_rate": 1.9999506863510697e-07, |
| "loss": 0.0061, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.00818739881523523, |
| "grad_norm": 1.1818616390228271, |
| "learning_rate": 1.9999503966997616e-07, |
| "loss": 0.0054, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.008205922341966532, |
| "grad_norm": 0.8118964433670044, |
| "learning_rate": 1.9999501062003076e-07, |
| "loss": 0.0046, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.008224445868697833, |
| "grad_norm": 0.26739996671676636, |
| "learning_rate": 1.9999498148527086e-07, |
| "loss": 0.0058, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.008242969395429135, |
| "grad_norm": 0.9063378572463989, |
| "learning_rate": 1.9999495226569642e-07, |
| "loss": 0.0045, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.008261492922160435, |
| "grad_norm": 1.0673067569732666, |
| "learning_rate": 1.9999492296130753e-07, |
| "loss": 0.0043, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.008280016448891737, |
| "grad_norm": 0.9013051390647888, |
| "learning_rate": 1.9999489357210418e-07, |
| "loss": 0.0047, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.00829853997562304, |
| "grad_norm": 1.1533620357513428, |
| "learning_rate": 1.9999486409808636e-07, |
| "loss": 0.0041, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.00831706350235434, |
| "grad_norm": 2.932135820388794, |
| "learning_rate": 1.9999483453925417e-07, |
| "loss": 0.005, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.008335587029085642, |
| "grad_norm": 0.8070574402809143, |
| "learning_rate": 1.9999480489560758e-07, |
| "loss": 0.0046, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.008354110555816942, |
| "grad_norm": 1.250813364982605, |
| "learning_rate": 1.9999477516714664e-07, |
| "loss": 0.0056, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.008372634082548245, |
| "grad_norm": 1.0614657402038574, |
| "learning_rate": 1.9999474535387137e-07, |
| "loss": 0.0044, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.008391157609279547, |
| "grad_norm": 1.6173075437545776, |
| "learning_rate": 1.9999471545578177e-07, |
| "loss": 0.0052, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.008409681136010847, |
| "grad_norm": 1.833392858505249, |
| "learning_rate": 1.999946854728779e-07, |
| "loss": 0.0057, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.00842820466274215, |
| "grad_norm": 0.9398495554924011, |
| "learning_rate": 1.999946554051598e-07, |
| "loss": 0.006, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.00844672818947345, |
| "grad_norm": 1.2231231927871704, |
| "learning_rate": 1.999946252526274e-07, |
| "loss": 0.005, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.008465251716204752, |
| "grad_norm": 0.7262556552886963, |
| "learning_rate": 1.9999459501528084e-07, |
| "loss": 0.0052, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.008483775242936054, |
| "grad_norm": 0.685969889163971, |
| "learning_rate": 1.999945646931201e-07, |
| "loss": 0.0056, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.008502298769667354, |
| "grad_norm": 1.5113415718078613, |
| "learning_rate": 1.999945342861452e-07, |
| "loss": 0.0049, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.008520822296398656, |
| "grad_norm": 0.807433009147644, |
| "learning_rate": 1.9999450379435614e-07, |
| "loss": 0.0045, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.008539345823129957, |
| "grad_norm": 1.0939662456512451, |
| "learning_rate": 1.99994473217753e-07, |
| "loss": 0.0052, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.008557869349861259, |
| "grad_norm": 1.0202559232711792, |
| "learning_rate": 1.999944425563358e-07, |
| "loss": 0.0055, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.00857639287659256, |
| "grad_norm": 0.756401777267456, |
| "learning_rate": 1.9999441181010455e-07, |
| "loss": 0.005, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.008594916403323861, |
| "grad_norm": 0.5749719738960266, |
| "learning_rate": 1.9999438097905922e-07, |
| "loss": 0.004, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.008613439930055164, |
| "grad_norm": 0.9044076800346375, |
| "learning_rate": 1.9999435006319994e-07, |
| "loss": 0.0049, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.008631963456786464, |
| "grad_norm": 0.7828972339630127, |
| "learning_rate": 1.9999431906252668e-07, |
| "loss": 0.0044, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.008650486983517766, |
| "grad_norm": 1.7968603372573853, |
| "learning_rate": 1.9999428797703947e-07, |
| "loss": 0.0057, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.008669010510249067, |
| "grad_norm": 0.6785223484039307, |
| "learning_rate": 1.9999425680673836e-07, |
| "loss": 0.0045, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.008687534036980369, |
| "grad_norm": 0.853285014629364, |
| "learning_rate": 1.9999422555162333e-07, |
| "loss": 0.0038, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.00870605756371167, |
| "grad_norm": 1.1492109298706055, |
| "learning_rate": 1.9999419421169442e-07, |
| "loss": 0.0046, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.008724581090442971, |
| "grad_norm": 1.902663230895996, |
| "learning_rate": 1.999941627869517e-07, |
| "loss": 0.0068, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.008743104617174273, |
| "grad_norm": 0.21514450013637543, |
| "learning_rate": 1.9999413127739512e-07, |
| "loss": 0.0042, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.008761628143905574, |
| "grad_norm": 0.831731379032135, |
| "learning_rate": 1.9999409968302482e-07, |
| "loss": 0.005, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.008780151670636876, |
| "grad_norm": 0.4649916887283325, |
| "learning_rate": 1.999940680038407e-07, |
| "loss": 0.0049, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.008798675197368178, |
| "grad_norm": 0.7050091028213501, |
| "learning_rate": 1.9999403623984287e-07, |
| "loss": 0.0048, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.008817198724099478, |
| "grad_norm": 0.9163200259208679, |
| "learning_rate": 1.9999400439103136e-07, |
| "loss": 0.0062, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.00883572225083078, |
| "grad_norm": 0.5314086675643921, |
| "learning_rate": 1.9999397245740612e-07, |
| "loss": 0.0033, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.00885424577756208, |
| "grad_norm": 0.9505736231803894, |
| "learning_rate": 1.9999394043896726e-07, |
| "loss": 0.005, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.008872769304293383, |
| "grad_norm": 0.9602097272872925, |
| "learning_rate": 1.9999390833571478e-07, |
| "loss": 0.0057, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.008891292831024685, |
| "grad_norm": 0.5842890739440918, |
| "learning_rate": 1.9999387614764865e-07, |
| "loss": 0.0052, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.008909816357755986, |
| "grad_norm": 0.7851259708404541, |
| "learning_rate": 1.99993843874769e-07, |
| "loss": 0.0051, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.008928339884487288, |
| "grad_norm": 1.0511106252670288, |
| "learning_rate": 1.999938115170758e-07, |
| "loss": 0.0045, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.008946863411218588, |
| "grad_norm": 1.6090624332427979, |
| "learning_rate": 1.9999377907456908e-07, |
| "loss": 0.0049, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.00896538693794989, |
| "grad_norm": 2.510429620742798, |
| "learning_rate": 1.9999374654724887e-07, |
| "loss": 0.0057, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.008983910464681192, |
| "grad_norm": 0.715458333492279, |
| "learning_rate": 1.999937139351152e-07, |
| "loss": 0.0053, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.009002433991412493, |
| "grad_norm": 0.7535446882247925, |
| "learning_rate": 1.9999368123816808e-07, |
| "loss": 0.0051, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.009020957518143795, |
| "grad_norm": 0.5744192600250244, |
| "learning_rate": 1.9999364845640756e-07, |
| "loss": 0.0042, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.009039481044875095, |
| "grad_norm": 0.613284707069397, |
| "learning_rate": 1.9999361558983369e-07, |
| "loss": 0.0061, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.009058004571606397, |
| "grad_norm": 0.6608142256736755, |
| "learning_rate": 1.999935826384464e-07, |
| "loss": 0.0055, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.0090765280983377, |
| "grad_norm": 0.8393628597259521, |
| "learning_rate": 1.9999354960224587e-07, |
| "loss": 0.0045, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.009095051625069, |
| "grad_norm": 0.5852001905441284, |
| "learning_rate": 1.99993516481232e-07, |
| "loss": 0.0045, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.009113575151800302, |
| "grad_norm": 0.7544299960136414, |
| "learning_rate": 1.999934832754049e-07, |
| "loss": 0.005, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.009132098678531602, |
| "grad_norm": 0.6234810948371887, |
| "learning_rate": 1.999934499847645e-07, |
| "loss": 0.0068, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.009150622205262905, |
| "grad_norm": 0.280820369720459, |
| "learning_rate": 1.9999341660931094e-07, |
| "loss": 0.0044, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.009169145731994207, |
| "grad_norm": 0.7477278113365173, |
| "learning_rate": 1.999933831490442e-07, |
| "loss": 0.0049, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.009187669258725507, |
| "grad_norm": 0.6096538305282593, |
| "learning_rate": 1.9999334960396427e-07, |
| "loss": 0.0054, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.00920619278545681, |
| "grad_norm": 1.1913049221038818, |
| "learning_rate": 1.9999331597407125e-07, |
| "loss": 0.0047, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.00922471631218811, |
| "grad_norm": 1.6365412473678589, |
| "learning_rate": 1.9999328225936511e-07, |
| "loss": 0.0066, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.009243239838919412, |
| "grad_norm": 1.3636044263839722, |
| "learning_rate": 1.9999324845984594e-07, |
| "loss": 0.0052, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.009261763365650714, |
| "grad_norm": 0.6262246966362, |
| "learning_rate": 1.999932145755137e-07, |
| "loss": 0.0042, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.009280286892382014, |
| "grad_norm": 1.2262002229690552, |
| "learning_rate": 1.9999318060636844e-07, |
| "loss": 0.0053, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.009298810419113316, |
| "grad_norm": 1.1981359720230103, |
| "learning_rate": 1.9999314655241023e-07, |
| "loss": 0.0043, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.009317333945844617, |
| "grad_norm": 0.8489042520523071, |
| "learning_rate": 1.9999311241363906e-07, |
| "loss": 0.0053, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.009335857472575919, |
| "grad_norm": 0.4504554867744446, |
| "learning_rate": 1.9999307819005495e-07, |
| "loss": 0.0043, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.00935438099930722, |
| "grad_norm": 0.5051777362823486, |
| "learning_rate": 1.9999304388165794e-07, |
| "loss": 0.0044, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.009372904526038521, |
| "grad_norm": 1.2746784687042236, |
| "learning_rate": 1.999930094884481e-07, |
| "loss": 0.0053, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.009391428052769824, |
| "grad_norm": 0.7270585298538208, |
| "learning_rate": 1.999929750104254e-07, |
| "loss": 0.0044, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.009409951579501124, |
| "grad_norm": 1.9962904453277588, |
| "learning_rate": 1.999929404475899e-07, |
| "loss": 0.0055, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.009428475106232426, |
| "grad_norm": 0.7217946648597717, |
| "learning_rate": 1.999929057999416e-07, |
| "loss": 0.0036, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.009446998632963726, |
| "grad_norm": 1.5632860660552979, |
| "learning_rate": 1.999928710674806e-07, |
| "loss": 0.0061, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.009465522159695029, |
| "grad_norm": 1.8371762037277222, |
| "learning_rate": 1.9999283625020683e-07, |
| "loss": 0.0061, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.00948404568642633, |
| "grad_norm": 2.0273938179016113, |
| "learning_rate": 1.9999280134812043e-07, |
| "loss": 0.0054, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.009502569213157631, |
| "grad_norm": 0.6358574628829956, |
| "learning_rate": 1.999927663612213e-07, |
| "loss": 0.0053, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.009521092739888933, |
| "grad_norm": 0.8530735373497009, |
| "learning_rate": 1.999927312895096e-07, |
| "loss": 0.005, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.009539616266620234, |
| "grad_norm": 0.886954128742218, |
| "learning_rate": 1.9999269613298525e-07, |
| "loss": 0.0056, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.009558139793351536, |
| "grad_norm": 0.4890105128288269, |
| "learning_rate": 1.9999266089164836e-07, |
| "loss": 0.0046, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.009576663320082838, |
| "grad_norm": 0.565142035484314, |
| "learning_rate": 1.9999262556549894e-07, |
| "loss": 0.0045, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.009595186846814138, |
| "grad_norm": 0.6378746032714844, |
| "learning_rate": 1.99992590154537e-07, |
| "loss": 0.0072, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.00961371037354544, |
| "grad_norm": 0.684836745262146, |
| "learning_rate": 1.9999255465876254e-07, |
| "loss": 0.0052, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.00963223390027674, |
| "grad_norm": 1.4691460132598877, |
| "learning_rate": 1.9999251907817567e-07, |
| "loss": 0.0046, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.009650757427008043, |
| "grad_norm": 1.2790758609771729, |
| "learning_rate": 1.999924834127764e-07, |
| "loss": 0.006, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.009669280953739345, |
| "grad_norm": 1.1134737730026245, |
| "learning_rate": 1.999924476625647e-07, |
| "loss": 0.0047, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.009687804480470645, |
| "grad_norm": 0.6474093794822693, |
| "learning_rate": 1.9999241182754064e-07, |
| "loss": 0.0057, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.009706328007201948, |
| "grad_norm": 0.5406485199928284, |
| "learning_rate": 1.9999237590770427e-07, |
| "loss": 0.0061, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.009724851533933248, |
| "grad_norm": 0.6851491928100586, |
| "learning_rate": 1.999923399030556e-07, |
| "loss": 0.0047, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.00974337506066455, |
| "grad_norm": 1.137979507446289, |
| "learning_rate": 1.9999230381359468e-07, |
| "loss": 0.006, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.009761898587395852, |
| "grad_norm": 0.386147141456604, |
| "learning_rate": 1.999922676393215e-07, |
| "loss": 0.0046, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.009780422114127153, |
| "grad_norm": 1.505621075630188, |
| "learning_rate": 1.999922313802361e-07, |
| "loss": 0.0042, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.009798945640858455, |
| "grad_norm": 1.4938277006149292, |
| "learning_rate": 1.9999219503633854e-07, |
| "loss": 0.0046, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.009817469167589755, |
| "grad_norm": 0.9566072225570679, |
| "learning_rate": 1.9999215860762882e-07, |
| "loss": 0.0047, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.009835992694321057, |
| "grad_norm": 0.6391525268554688, |
| "learning_rate": 1.99992122094107e-07, |
| "loss": 0.0054, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.00985451622105236, |
| "grad_norm": 0.7227911949157715, |
| "learning_rate": 1.9999208549577312e-07, |
| "loss": 0.0039, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.00987303974778366, |
| "grad_norm": 1.283530831336975, |
| "learning_rate": 1.9999204881262715e-07, |
| "loss": 0.0055, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.009891563274514962, |
| "grad_norm": 0.8534697890281677, |
| "learning_rate": 1.9999201204466915e-07, |
| "loss": 0.0045, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.009910086801246262, |
| "grad_norm": 1.049355149269104, |
| "learning_rate": 1.999919751918992e-07, |
| "loss": 0.0052, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.009928610327977564, |
| "grad_norm": 1.9515596628189087, |
| "learning_rate": 1.9999193825431727e-07, |
| "loss": 0.0061, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.009947133854708867, |
| "grad_norm": 1.5255975723266602, |
| "learning_rate": 1.999919012319234e-07, |
| "loss": 0.0044, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.009965657381440167, |
| "grad_norm": 0.914089024066925, |
| "learning_rate": 1.9999186412471768e-07, |
| "loss": 0.0052, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.009984180908171469, |
| "grad_norm": 0.8056774735450745, |
| "learning_rate": 1.9999182693270005e-07, |
| "loss": 0.0047, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.01000270443490277, |
| "grad_norm": 1.076330304145813, |
| "learning_rate": 1.999917896558706e-07, |
| "loss": 0.0044, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.010021227961634072, |
| "grad_norm": 3.0182743072509766, |
| "learning_rate": 1.9999175229422934e-07, |
| "loss": 0.0052, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.010039751488365374, |
| "grad_norm": 0.8086827993392944, |
| "learning_rate": 1.9999171484777633e-07, |
| "loss": 0.0037, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.010058275015096674, |
| "grad_norm": 0.5428926944732666, |
| "learning_rate": 1.9999167731651157e-07, |
| "loss": 0.0043, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.010076798541827976, |
| "grad_norm": 1.1494678258895874, |
| "learning_rate": 1.999916397004351e-07, |
| "loss": 0.0047, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.010095322068559277, |
| "grad_norm": 0.8914420008659363, |
| "learning_rate": 1.9999160199954696e-07, |
| "loss": 0.0049, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.010113845595290579, |
| "grad_norm": 0.4892839789390564, |
| "learning_rate": 1.999915642138472e-07, |
| "loss": 0.0053, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.01013236912202188, |
| "grad_norm": 0.8774476647377014, |
| "learning_rate": 1.9999152634333581e-07, |
| "loss": 0.005, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.010150892648753181, |
| "grad_norm": 0.5296536684036255, |
| "learning_rate": 1.9999148838801283e-07, |
| "loss": 0.0042, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.010169416175484483, |
| "grad_norm": 0.4783259630203247, |
| "learning_rate": 1.999914503478783e-07, |
| "loss": 0.0039, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.010187939702215784, |
| "grad_norm": 0.8164564371109009, |
| "learning_rate": 1.999914122229323e-07, |
| "loss": 0.006, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.010206463228947086, |
| "grad_norm": 0.682399332523346, |
| "learning_rate": 1.999913740131748e-07, |
| "loss": 0.0051, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.010224986755678386, |
| "grad_norm": 0.5319806337356567, |
| "learning_rate": 1.9999133571860582e-07, |
| "loss": 0.0046, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.010243510282409688, |
| "grad_norm": 0.5874443650245667, |
| "learning_rate": 1.9999129733922545e-07, |
| "loss": 0.0055, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.01026203380914099, |
| "grad_norm": 0.3967069089412689, |
| "learning_rate": 1.999912588750337e-07, |
| "loss": 0.0037, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.010280557335872291, |
| "grad_norm": 0.9231893420219421, |
| "learning_rate": 1.999912203260306e-07, |
| "loss": 0.005, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.010299080862603593, |
| "grad_norm": 0.4438602328300476, |
| "learning_rate": 1.9999118169221616e-07, |
| "loss": 0.0047, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.010317604389334894, |
| "grad_norm": 0.5434121489524841, |
| "learning_rate": 1.9999114297359046e-07, |
| "loss": 0.0043, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.010336127916066196, |
| "grad_norm": 1.5575553178787231, |
| "learning_rate": 1.9999110417015347e-07, |
| "loss": 0.0054, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.010354651442797498, |
| "grad_norm": 1.4973243474960327, |
| "learning_rate": 1.9999106528190528e-07, |
| "loss": 0.0051, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.010373174969528798, |
| "grad_norm": 0.8369397521018982, |
| "learning_rate": 1.9999102630884592e-07, |
| "loss": 0.0045, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.0103916984962601, |
| "grad_norm": 1.8409373760223389, |
| "learning_rate": 1.9999098725097537e-07, |
| "loss": 0.0049, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.0104102220229914, |
| "grad_norm": 0.925690770149231, |
| "learning_rate": 1.9999094810829375e-07, |
| "loss": 0.0049, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.010428745549722703, |
| "grad_norm": 1.3561915159225464, |
| "learning_rate": 1.9999090888080102e-07, |
| "loss": 0.0041, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.010447269076454005, |
| "grad_norm": 0.5484433770179749, |
| "learning_rate": 1.9999086956849724e-07, |
| "loss": 0.0037, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.010465792603185305, |
| "grad_norm": 1.3982502222061157, |
| "learning_rate": 1.999908301713824e-07, |
| "loss": 0.0057, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.010484316129916607, |
| "grad_norm": 0.5583667755126953, |
| "learning_rate": 1.9999079068945662e-07, |
| "loss": 0.0048, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.010502839656647908, |
| "grad_norm": 1.0019716024398804, |
| "learning_rate": 1.9999075112271986e-07, |
| "loss": 0.004, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.01052136318337921, |
| "grad_norm": 2.020299196243286, |
| "learning_rate": 1.9999071147117218e-07, |
| "loss": 0.0052, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.010539886710110512, |
| "grad_norm": 1.1758064031600952, |
| "learning_rate": 1.999906717348136e-07, |
| "loss": 0.0049, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.010558410236841812, |
| "grad_norm": 2.2198078632354736, |
| "learning_rate": 1.9999063191364422e-07, |
| "loss": 0.0049, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.010576933763573115, |
| "grad_norm": 1.2298004627227783, |
| "learning_rate": 1.9999059200766396e-07, |
| "loss": 0.0061, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.010595457290304415, |
| "grad_norm": 0.4814535081386566, |
| "learning_rate": 1.9999055201687297e-07, |
| "loss": 0.0047, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.010613980817035717, |
| "grad_norm": 0.6831616163253784, |
| "learning_rate": 1.999905119412712e-07, |
| "loss": 0.0045, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.01063250434376702, |
| "grad_norm": 1.8222451210021973, |
| "learning_rate": 1.999904717808587e-07, |
| "loss": 0.0044, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.01065102787049832, |
| "grad_norm": 0.9469901323318481, |
| "learning_rate": 1.9999043153563553e-07, |
| "loss": 0.0054, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.010669551397229622, |
| "grad_norm": 0.32088392972946167, |
| "learning_rate": 1.999903912056017e-07, |
| "loss": 0.0048, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.010688074923960922, |
| "grad_norm": 1.863303303718567, |
| "learning_rate": 1.9999035079075727e-07, |
| "loss": 0.0047, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.010706598450692224, |
| "grad_norm": 0.4461580514907837, |
| "learning_rate": 1.9999031029110224e-07, |
| "loss": 0.0048, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.010725121977423526, |
| "grad_norm": 1.103312373161316, |
| "learning_rate": 1.9999026970663668e-07, |
| "loss": 0.0053, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.010743645504154827, |
| "grad_norm": 1.7623060941696167, |
| "learning_rate": 1.9999022903736063e-07, |
| "loss": 0.0051, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.010762169030886129, |
| "grad_norm": 0.44566792249679565, |
| "learning_rate": 1.9999018828327408e-07, |
| "loss": 0.0048, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.01078069255761743, |
| "grad_norm": 2.1573126316070557, |
| "learning_rate": 1.9999014744437708e-07, |
| "loss": 0.0051, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.010799216084348731, |
| "grad_norm": 2.563613176345825, |
| "learning_rate": 1.9999010652066966e-07, |
| "loss": 0.0052, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.010817739611080032, |
| "grad_norm": 0.7833878993988037, |
| "learning_rate": 1.9999006551215188e-07, |
| "loss": 0.0041, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.010836263137811334, |
| "grad_norm": 0.9682196378707886, |
| "learning_rate": 1.9999002441882377e-07, |
| "loss": 0.0057, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.010854786664542636, |
| "grad_norm": 1.1835592985153198, |
| "learning_rate": 1.9998998324068536e-07, |
| "loss": 0.0038, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.010873310191273937, |
| "grad_norm": 0.4966825246810913, |
| "learning_rate": 1.9998994197773667e-07, |
| "loss": 0.0048, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.010891833718005239, |
| "grad_norm": 0.38705042004585266, |
| "learning_rate": 1.9998990062997772e-07, |
| "loss": 0.0063, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.010910357244736539, |
| "grad_norm": 0.93874591588974, |
| "learning_rate": 1.999898591974086e-07, |
| "loss": 0.005, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.010928880771467841, |
| "grad_norm": 1.1283129453659058, |
| "learning_rate": 1.9998981768002934e-07, |
| "loss": 0.0042, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.010947404298199143, |
| "grad_norm": 1.720888376235962, |
| "learning_rate": 1.999897760778399e-07, |
| "loss": 0.0037, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.010965927824930444, |
| "grad_norm": 1.1553153991699219, |
| "learning_rate": 1.9998973439084042e-07, |
| "loss": 0.0053, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.010984451351661746, |
| "grad_norm": 1.2236387729644775, |
| "learning_rate": 1.9998969261903084e-07, |
| "loss": 0.0068, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.011002974878393046, |
| "grad_norm": 1.7974553108215332, |
| "learning_rate": 1.9998965076241127e-07, |
| "loss": 0.0042, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.011021498405124348, |
| "grad_norm": 0.7733255624771118, |
| "learning_rate": 1.9998960882098167e-07, |
| "loss": 0.0031, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.01104002193185565, |
| "grad_norm": 1.2585145235061646, |
| "learning_rate": 1.9998956679474213e-07, |
| "loss": 0.0061, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.011058545458586951, |
| "grad_norm": 0.4307413399219513, |
| "learning_rate": 1.9998952468369268e-07, |
| "loss": 0.0043, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.011077068985318253, |
| "grad_norm": 0.43582257628440857, |
| "learning_rate": 1.9998948248783336e-07, |
| "loss": 0.0051, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.011095592512049553, |
| "grad_norm": 1.0996239185333252, |
| "learning_rate": 1.999894402071642e-07, |
| "loss": 0.0048, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.011114116038780856, |
| "grad_norm": 1.5136151313781738, |
| "learning_rate": 1.999893978416852e-07, |
| "loss": 0.0055, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.011132639565512158, |
| "grad_norm": 0.46866336464881897, |
| "learning_rate": 1.9998935539139645e-07, |
| "loss": 0.0039, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.011151163092243458, |
| "grad_norm": 1.4977253675460815, |
| "learning_rate": 1.9998931285629798e-07, |
| "loss": 0.0051, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.01116968661897476, |
| "grad_norm": 1.497334599494934, |
| "learning_rate": 1.9998927023638977e-07, |
| "loss": 0.0045, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.01118821014570606, |
| "grad_norm": 1.2557651996612549, |
| "learning_rate": 1.9998922753167192e-07, |
| "loss": 0.005, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.011206733672437363, |
| "grad_norm": 1.549138069152832, |
| "learning_rate": 1.9998918474214444e-07, |
| "loss": 0.0042, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.011225257199168665, |
| "grad_norm": 2.3984110355377197, |
| "learning_rate": 1.9998914186780737e-07, |
| "loss": 0.0045, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.011243780725899965, |
| "grad_norm": 0.9594945907592773, |
| "learning_rate": 1.9998909890866073e-07, |
| "loss": 0.0043, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.011262304252631267, |
| "grad_norm": 1.0715326070785522, |
| "learning_rate": 1.9998905586470461e-07, |
| "loss": 0.0049, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.011280827779362568, |
| "grad_norm": 1.471585750579834, |
| "learning_rate": 1.9998901273593899e-07, |
| "loss": 0.0056, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.01129935130609387, |
| "grad_norm": 0.8725175261497498, |
| "learning_rate": 1.999889695223639e-07, |
| "loss": 0.0046, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.011317874832825172, |
| "grad_norm": 0.9626299142837524, |
| "learning_rate": 1.9998892622397941e-07, |
| "loss": 0.0046, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.011336398359556472, |
| "grad_norm": 0.6687320470809937, |
| "learning_rate": 1.9998888284078555e-07, |
| "loss": 0.0043, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.011354921886287775, |
| "grad_norm": 2.5093936920166016, |
| "learning_rate": 1.9998883937278235e-07, |
| "loss": 0.0056, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.011373445413019075, |
| "grad_norm": 0.8474906086921692, |
| "learning_rate": 1.9998879581996985e-07, |
| "loss": 0.0043, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.011391968939750377, |
| "grad_norm": 0.6211300492286682, |
| "learning_rate": 1.999887521823481e-07, |
| "loss": 0.0045, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.01141049246648168, |
| "grad_norm": 1.0607517957687378, |
| "learning_rate": 1.999887084599171e-07, |
| "loss": 0.0048, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.01142901599321298, |
| "grad_norm": 1.0385024547576904, |
| "learning_rate": 1.9998866465267695e-07, |
| "loss": 0.0043, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.011447539519944282, |
| "grad_norm": 0.7626750469207764, |
| "learning_rate": 1.9998862076062762e-07, |
| "loss": 0.0044, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.011466063046675582, |
| "grad_norm": 1.400589942932129, |
| "learning_rate": 1.999885767837692e-07, |
| "loss": 0.0046, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.011484586573406884, |
| "grad_norm": 0.6756898760795593, |
| "learning_rate": 1.9998853272210168e-07, |
| "loss": 0.006, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.011503110100138186, |
| "grad_norm": 0.3252939283847809, |
| "learning_rate": 1.9998848857562514e-07, |
| "loss": 0.0045, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.011521633626869487, |
| "grad_norm": 1.436022400856018, |
| "learning_rate": 1.999884443443396e-07, |
| "loss": 0.0046, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.011540157153600789, |
| "grad_norm": 0.43667012453079224, |
| "learning_rate": 1.9998840002824505e-07, |
| "loss": 0.0049, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.01155868068033209, |
| "grad_norm": 0.7786639332771301, |
| "learning_rate": 1.9998835562734163e-07, |
| "loss": 0.004, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.011577204207063391, |
| "grad_norm": 0.6937276721000671, |
| "learning_rate": 1.999883111416293e-07, |
| "loss": 0.0054, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.011595727733794692, |
| "grad_norm": 1.4458993673324585, |
| "learning_rate": 1.9998826657110812e-07, |
| "loss": 0.0065, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.011614251260525994, |
| "grad_norm": 0.6148513555526733, |
| "learning_rate": 1.9998822191577813e-07, |
| "loss": 0.0046, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.011632774787257296, |
| "grad_norm": 1.3800839185714722, |
| "learning_rate": 1.9998817717563936e-07, |
| "loss": 0.0055, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.011651298313988596, |
| "grad_norm": 0.8290160894393921, |
| "learning_rate": 1.9998813235069184e-07, |
| "loss": 0.005, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.011669821840719899, |
| "grad_norm": 0.5129774212837219, |
| "learning_rate": 1.9998808744093566e-07, |
| "loss": 0.0041, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.011688345367451199, |
| "grad_norm": 0.7607941031455994, |
| "learning_rate": 1.9998804244637077e-07, |
| "loss": 0.0048, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.011706868894182501, |
| "grad_norm": 1.2245440483093262, |
| "learning_rate": 1.999879973669973e-07, |
| "loss": 0.0047, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.011725392420913803, |
| "grad_norm": 0.27017250657081604, |
| "learning_rate": 1.9998795220281522e-07, |
| "loss": 0.0042, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.011743915947645104, |
| "grad_norm": 0.6682379841804504, |
| "learning_rate": 1.9998790695382462e-07, |
| "loss": 0.0042, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.011762439474376406, |
| "grad_norm": 1.150757908821106, |
| "learning_rate": 1.9998786162002547e-07, |
| "loss": 0.005, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.011780963001107706, |
| "grad_norm": 1.3020960092544556, |
| "learning_rate": 1.9998781620141787e-07, |
| "loss": 0.0054, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.011799486527839008, |
| "grad_norm": 0.409411758184433, |
| "learning_rate": 1.9998777069800186e-07, |
| "loss": 0.005, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.01181801005457031, |
| "grad_norm": 0.4993356466293335, |
| "learning_rate": 1.9998772510977741e-07, |
| "loss": 0.0048, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.01183653358130161, |
| "grad_norm": 0.6446143984794617, |
| "learning_rate": 1.9998767943674464e-07, |
| "loss": 0.0046, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.011855057108032913, |
| "grad_norm": 0.9871600270271301, |
| "learning_rate": 1.9998763367890357e-07, |
| "loss": 0.0058, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.011873580634764213, |
| "grad_norm": 1.4248993396759033, |
| "learning_rate": 1.999875878362542e-07, |
| "loss": 0.0043, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.011892104161495515, |
| "grad_norm": 1.0000044107437134, |
| "learning_rate": 1.9998754190879658e-07, |
| "loss": 0.0044, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.011910627688226818, |
| "grad_norm": 3.019697666168213, |
| "learning_rate": 1.9998749589653077e-07, |
| "loss": 0.0045, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.011929151214958118, |
| "grad_norm": 3.4525275230407715, |
| "learning_rate": 1.9998744979945684e-07, |
| "loss": 0.0037, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.01194767474168942, |
| "grad_norm": 2.3522465229034424, |
| "learning_rate": 1.9998740361757472e-07, |
| "loss": 0.004, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.01196619826842072, |
| "grad_norm": 0.5118739008903503, |
| "learning_rate": 1.9998735735088456e-07, |
| "loss": 0.0056, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.011984721795152023, |
| "grad_norm": 0.5207595229148865, |
| "learning_rate": 1.9998731099938637e-07, |
| "loss": 0.0036, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.012003245321883325, |
| "grad_norm": 1.0849483013153076, |
| "learning_rate": 1.9998726456308014e-07, |
| "loss": 0.0041, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.012021768848614625, |
| "grad_norm": 1.0602933168411255, |
| "learning_rate": 1.9998721804196598e-07, |
| "loss": 0.0048, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.012040292375345927, |
| "grad_norm": 0.9715251326560974, |
| "learning_rate": 1.999871714360439e-07, |
| "loss": 0.0065, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.012058815902077228, |
| "grad_norm": 1.5308769941329956, |
| "learning_rate": 1.999871247453139e-07, |
| "loss": 0.0059, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.01207733942880853, |
| "grad_norm": 1.5637868642807007, |
| "learning_rate": 1.9998707796977609e-07, |
| "loss": 0.0046, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.012095862955539832, |
| "grad_norm": 0.6605505347251892, |
| "learning_rate": 1.9998703110943045e-07, |
| "loss": 0.0044, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.012114386482271132, |
| "grad_norm": 0.5709793567657471, |
| "learning_rate": 1.9998698416427703e-07, |
| "loss": 0.0051, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.012132910009002434, |
| "grad_norm": 0.9911216497421265, |
| "learning_rate": 1.9998693713431593e-07, |
| "loss": 0.0043, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.012151433535733735, |
| "grad_norm": 0.5670028924942017, |
| "learning_rate": 1.999868900195471e-07, |
| "loss": 0.0057, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.012169957062465037, |
| "grad_norm": 1.038466215133667, |
| "learning_rate": 1.9998684281997068e-07, |
| "loss": 0.0058, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.012188480589196339, |
| "grad_norm": 0.8275384306907654, |
| "learning_rate": 1.999867955355866e-07, |
| "loss": 0.0047, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.01220700411592764, |
| "grad_norm": 0.9158803820610046, |
| "learning_rate": 1.99986748166395e-07, |
| "loss": 0.0041, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.012225527642658942, |
| "grad_norm": 1.9012762308120728, |
| "learning_rate": 1.9998670071239584e-07, |
| "loss": 0.0049, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.012244051169390242, |
| "grad_norm": 0.8034256100654602, |
| "learning_rate": 1.999866531735892e-07, |
| "loss": 0.0055, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.012262574696121544, |
| "grad_norm": 1.8934110403060913, |
| "learning_rate": 1.9998660554997513e-07, |
| "loss": 0.0052, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.012281098222852846, |
| "grad_norm": 0.6737769842147827, |
| "learning_rate": 1.9998655784155366e-07, |
| "loss": 0.0044, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.012299621749584147, |
| "grad_norm": 1.5266069173812866, |
| "learning_rate": 1.9998651004832482e-07, |
| "loss": 0.0047, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.012318145276315449, |
| "grad_norm": 0.6605862975120544, |
| "learning_rate": 1.9998646217028865e-07, |
| "loss": 0.0033, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.01233666880304675, |
| "grad_norm": 0.49088865518569946, |
| "learning_rate": 1.9998641420744517e-07, |
| "loss": 0.0044, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.012355192329778051, |
| "grad_norm": 1.2727864980697632, |
| "learning_rate": 1.999863661597945e-07, |
| "loss": 0.0053, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.012373715856509352, |
| "grad_norm": 1.2164759635925293, |
| "learning_rate": 1.9998631802733658e-07, |
| "loss": 0.0038, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.012392239383240654, |
| "grad_norm": 2.9112789630889893, |
| "learning_rate": 1.9998626981007155e-07, |
| "loss": 0.0053, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.012410762909971956, |
| "grad_norm": 1.8191032409667969, |
| "learning_rate": 1.9998622150799936e-07, |
| "loss": 0.0042, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.012429286436703256, |
| "grad_norm": 0.7922589182853699, |
| "learning_rate": 1.9998617312112012e-07, |
| "loss": 0.0042, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.012447809963434558, |
| "grad_norm": 0.7463862299919128, |
| "learning_rate": 1.9998612464943382e-07, |
| "loss": 0.0043, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.012466333490165859, |
| "grad_norm": 1.4704411029815674, |
| "learning_rate": 1.9998607609294054e-07, |
| "loss": 0.0041, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.012484857016897161, |
| "grad_norm": 1.06722092628479, |
| "learning_rate": 1.999860274516403e-07, |
| "loss": 0.0053, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.012503380543628463, |
| "grad_norm": 1.9677430391311646, |
| "learning_rate": 1.9998597872553314e-07, |
| "loss": 0.0056, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.012521904070359764, |
| "grad_norm": 0.9780071973800659, |
| "learning_rate": 1.9998592991461912e-07, |
| "loss": 0.0055, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.012540427597091066, |
| "grad_norm": 1.7688167095184326, |
| "learning_rate": 1.9998588101889825e-07, |
| "loss": 0.0041, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.012558951123822366, |
| "grad_norm": 1.176604986190796, |
| "learning_rate": 1.999858320383706e-07, |
| "loss": 0.0051, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.012577474650553668, |
| "grad_norm": 1.1377366781234741, |
| "learning_rate": 1.999857829730362e-07, |
| "loss": 0.0063, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.01259599817728497, |
| "grad_norm": 0.4529532492160797, |
| "learning_rate": 1.999857338228951e-07, |
| "loss": 0.0041, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.01261452170401627, |
| "grad_norm": 1.1294665336608887, |
| "learning_rate": 1.9998568458794735e-07, |
| "loss": 0.0048, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.012633045230747573, |
| "grad_norm": 1.1223347187042236, |
| "learning_rate": 1.9998563526819292e-07, |
| "loss": 0.0049, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.012651568757478873, |
| "grad_norm": 2.435007095336914, |
| "learning_rate": 1.9998558586363194e-07, |
| "loss": 0.0047, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.012670092284210175, |
| "grad_norm": 1.471243977546692, |
| "learning_rate": 1.9998553637426446e-07, |
| "loss": 0.0048, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.012688615810941477, |
| "grad_norm": 0.7498399019241333, |
| "learning_rate": 1.9998548680009045e-07, |
| "loss": 0.0042, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.012707139337672778, |
| "grad_norm": 0.5828412175178528, |
| "learning_rate": 1.9998543714110997e-07, |
| "loss": 0.0038, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.01272566286440408, |
| "grad_norm": 0.7062546014785767, |
| "learning_rate": 1.999853873973231e-07, |
| "loss": 0.0043, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.01274418639113538, |
| "grad_norm": 2.1820194721221924, |
| "learning_rate": 1.9998533756872985e-07, |
| "loss": 0.0048, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.012762709917866683, |
| "grad_norm": 1.6870174407958984, |
| "learning_rate": 1.9998528765533024e-07, |
| "loss": 0.0055, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.012781233444597985, |
| "grad_norm": 0.9094802141189575, |
| "learning_rate": 1.9998523765712441e-07, |
| "loss": 0.0052, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.012799756971329285, |
| "grad_norm": 0.5565671920776367, |
| "learning_rate": 1.9998518757411228e-07, |
| "loss": 0.0065, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.012818280498060587, |
| "grad_norm": 1.2048276662826538, |
| "learning_rate": 1.9998513740629396e-07, |
| "loss": 0.0047, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.012836804024791888, |
| "grad_norm": 0.9527319073677063, |
| "learning_rate": 1.999850871536695e-07, |
| "loss": 0.0035, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.01285532755152319, |
| "grad_norm": 1.1012948751449585, |
| "learning_rate": 1.9998503681623893e-07, |
| "loss": 0.0035, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.012873851078254492, |
| "grad_norm": 1.2475626468658447, |
| "learning_rate": 1.9998498639400225e-07, |
| "loss": 0.0048, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.012892374604985792, |
| "grad_norm": 0.6311481595039368, |
| "learning_rate": 1.9998493588695954e-07, |
| "loss": 0.004, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.012910898131717094, |
| "grad_norm": 1.0941135883331299, |
| "learning_rate": 1.999848852951109e-07, |
| "loss": 0.005, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.012929421658448395, |
| "grad_norm": 1.335740089416504, |
| "learning_rate": 1.9998483461845624e-07, |
| "loss": 0.0044, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.012947945185179697, |
| "grad_norm": 0.43091148138046265, |
| "learning_rate": 1.9998478385699573e-07, |
| "loss": 0.0041, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.012966468711910999, |
| "grad_norm": 1.6673928499221802, |
| "learning_rate": 1.9998473301072932e-07, |
| "loss": 0.0056, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.0129849922386423, |
| "grad_norm": 1.4265776872634888, |
| "learning_rate": 1.9998468207965713e-07, |
| "loss": 0.006, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.013003515765373602, |
| "grad_norm": 0.9223793745040894, |
| "learning_rate": 1.9998463106377916e-07, |
| "loss": 0.005, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.013022039292104902, |
| "grad_norm": 0.7204763889312744, |
| "learning_rate": 1.9998457996309545e-07, |
| "loss": 0.005, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.013040562818836204, |
| "grad_norm": 0.8767715692520142, |
| "learning_rate": 1.9998452877760609e-07, |
| "loss": 0.0046, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.013059086345567504, |
| "grad_norm": 0.671276330947876, |
| "learning_rate": 1.9998447750731104e-07, |
| "loss": 0.0046, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.013077609872298807, |
| "grad_norm": 0.4646291434764862, |
| "learning_rate": 1.9998442615221037e-07, |
| "loss": 0.0041, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.013096133399030109, |
| "grad_norm": 1.4228308200836182, |
| "learning_rate": 1.999843747123042e-07, |
| "loss": 0.0044, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.013114656925761409, |
| "grad_norm": 1.0358463525772095, |
| "learning_rate": 1.999843231875925e-07, |
| "loss": 0.0039, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.013133180452492711, |
| "grad_norm": 2.841841220855713, |
| "learning_rate": 1.9998427157807535e-07, |
| "loss": 0.0082, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.013151703979224012, |
| "grad_norm": 2.5183050632476807, |
| "learning_rate": 1.9998421988375273e-07, |
| "loss": 0.0038, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.013170227505955314, |
| "grad_norm": 1.9204206466674805, |
| "learning_rate": 1.9998416810462477e-07, |
| "loss": 0.0058, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.013188751032686616, |
| "grad_norm": 1.0739190578460693, |
| "learning_rate": 1.9998411624069145e-07, |
| "loss": 0.0044, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.013207274559417916, |
| "grad_norm": 0.5621417760848999, |
| "learning_rate": 1.9998406429195285e-07, |
| "loss": 0.0046, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.013225798086149218, |
| "grad_norm": 0.2962639629840851, |
| "learning_rate": 1.99984012258409e-07, |
| "loss": 0.0044, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.013244321612880519, |
| "grad_norm": 0.4295441210269928, |
| "learning_rate": 1.9998396014005993e-07, |
| "loss": 0.005, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.013262845139611821, |
| "grad_norm": 1.3871376514434814, |
| "learning_rate": 1.9998390793690572e-07, |
| "loss": 0.0036, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.013281368666343123, |
| "grad_norm": 0.5170560479164124, |
| "learning_rate": 1.9998385564894638e-07, |
| "loss": 0.0036, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.013299892193074423, |
| "grad_norm": 0.445928692817688, |
| "learning_rate": 1.9998380327618197e-07, |
| "loss": 0.0045, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.013318415719805726, |
| "grad_norm": 0.8867661952972412, |
| "learning_rate": 1.9998375081861255e-07, |
| "loss": 0.0047, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.013336939246537026, |
| "grad_norm": 0.5516932606697083, |
| "learning_rate": 1.9998369827623813e-07, |
| "loss": 0.0044, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.013355462773268328, |
| "grad_norm": 1.0565916299819946, |
| "learning_rate": 1.9998364564905875e-07, |
| "loss": 0.0043, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.01337398629999963, |
| "grad_norm": 0.5001686811447144, |
| "learning_rate": 1.999835929370745e-07, |
| "loss": 0.0052, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.01339250982673093, |
| "grad_norm": 1.397940993309021, |
| "learning_rate": 1.999835401402854e-07, |
| "loss": 0.0048, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.013411033353462233, |
| "grad_norm": 1.2145320177078247, |
| "learning_rate": 1.9998348725869153e-07, |
| "loss": 0.0042, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.013429556880193533, |
| "grad_norm": 0.8812707662582397, |
| "learning_rate": 1.9998343429229284e-07, |
| "loss": 0.0039, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.013448080406924835, |
| "grad_norm": 0.5108830332756042, |
| "learning_rate": 1.9998338124108948e-07, |
| "loss": 0.0049, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.013466603933656137, |
| "grad_norm": 1.0097687244415283, |
| "learning_rate": 1.9998332810508142e-07, |
| "loss": 0.004, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.013485127460387438, |
| "grad_norm": 1.1193820238113403, |
| "learning_rate": 1.999832748842688e-07, |
| "loss": 0.004, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.01350365098711874, |
| "grad_norm": 4.651251792907715, |
| "learning_rate": 1.9998322157865152e-07, |
| "loss": 0.005, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.01352217451385004, |
| "grad_norm": 0.6428113579750061, |
| "learning_rate": 1.9998316818822972e-07, |
| "loss": 0.0049, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.013540698040581342, |
| "grad_norm": 5.16061544418335, |
| "learning_rate": 1.9998311471300347e-07, |
| "loss": 0.0061, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.013559221567312645, |
| "grad_norm": 0.9377419352531433, |
| "learning_rate": 1.9998306115297276e-07, |
| "loss": 0.0038, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.013577745094043945, |
| "grad_norm": 1.3704923391342163, |
| "learning_rate": 1.9998300750813763e-07, |
| "loss": 0.0051, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.013596268620775247, |
| "grad_norm": 0.5168454051017761, |
| "learning_rate": 1.9998295377849817e-07, |
| "loss": 0.0039, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.013614792147506547, |
| "grad_norm": 1.3589528799057007, |
| "learning_rate": 1.999828999640544e-07, |
| "loss": 0.0047, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.01363331567423785, |
| "grad_norm": 0.9819934964179993, |
| "learning_rate": 1.9998284606480635e-07, |
| "loss": 0.0051, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.013651839200969152, |
| "grad_norm": 0.7832059860229492, |
| "learning_rate": 1.999827920807541e-07, |
| "loss": 0.0043, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.013670362727700452, |
| "grad_norm": 9.282112121582031, |
| "learning_rate": 1.999827380118977e-07, |
| "loss": 0.0045, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.013688886254431754, |
| "grad_norm": 3.068037509918213, |
| "learning_rate": 1.9998268385823717e-07, |
| "loss": 0.0057, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.013707409781163055, |
| "grad_norm": 0.5647586584091187, |
| "learning_rate": 1.9998262961977253e-07, |
| "loss": 0.0041, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.013725933307894357, |
| "grad_norm": 0.3233998119831085, |
| "learning_rate": 1.9998257529650387e-07, |
| "loss": 0.0054, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.013744456834625659, |
| "grad_norm": 0.3803546726703644, |
| "learning_rate": 1.9998252088843124e-07, |
| "loss": 0.0053, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.01376298036135696, |
| "grad_norm": 1.4831609725952148, |
| "learning_rate": 1.9998246639555464e-07, |
| "loss": 0.0043, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.013781503888088261, |
| "grad_norm": 2.2573049068450928, |
| "learning_rate": 1.9998241181787416e-07, |
| "loss": 0.0045, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.013800027414819562, |
| "grad_norm": 1.3548682928085327, |
| "learning_rate": 1.9998235715538986e-07, |
| "loss": 0.0054, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.013818550941550864, |
| "grad_norm": 0.5436132550239563, |
| "learning_rate": 1.9998230240810173e-07, |
| "loss": 0.0037, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.013837074468282164, |
| "grad_norm": 1.4047155380249023, |
| "learning_rate": 1.9998224757600987e-07, |
| "loss": 0.0051, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.013855597995013466, |
| "grad_norm": 0.8302357196807861, |
| "learning_rate": 1.9998219265911427e-07, |
| "loss": 0.0048, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.013874121521744769, |
| "grad_norm": 1.0981420278549194, |
| "learning_rate": 1.9998213765741503e-07, |
| "loss": 0.0042, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.013892645048476069, |
| "grad_norm": 1.1036394834518433, |
| "learning_rate": 1.9998208257091217e-07, |
| "loss": 0.0052, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.013911168575207371, |
| "grad_norm": 0.5272079706192017, |
| "learning_rate": 1.9998202739960575e-07, |
| "loss": 0.0043, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.013929692101938672, |
| "grad_norm": 0.6824163198471069, |
| "learning_rate": 1.999819721434958e-07, |
| "loss": 0.0034, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.013948215628669974, |
| "grad_norm": 0.717613160610199, |
| "learning_rate": 1.999819168025824e-07, |
| "loss": 0.0044, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.013966739155401276, |
| "grad_norm": 0.36964836716651917, |
| "learning_rate": 1.9998186137686552e-07, |
| "loss": 0.005, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.013985262682132576, |
| "grad_norm": 0.24934236705303192, |
| "learning_rate": 1.999818058663453e-07, |
| "loss": 0.0045, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.014003786208863878, |
| "grad_norm": 1.3952760696411133, |
| "learning_rate": 1.9998175027102173e-07, |
| "loss": 0.006, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.014022309735595179, |
| "grad_norm": 3.1247060298919678, |
| "learning_rate": 1.999816945908949e-07, |
| "loss": 0.0042, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.01404083326232648, |
| "grad_norm": 1.5241121053695679, |
| "learning_rate": 1.9998163882596478e-07, |
| "loss": 0.0053, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.014059356789057783, |
| "grad_norm": 0.4054291844367981, |
| "learning_rate": 1.999815829762315e-07, |
| "loss": 0.0039, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.014077880315789083, |
| "grad_norm": 1.1743965148925781, |
| "learning_rate": 1.999815270416951e-07, |
| "loss": 0.004, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.014096403842520385, |
| "grad_norm": 0.48605385422706604, |
| "learning_rate": 1.9998147102235557e-07, |
| "loss": 0.0046, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.014114927369251686, |
| "grad_norm": 0.7395641207695007, |
| "learning_rate": 1.9998141491821298e-07, |
| "loss": 0.0054, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.014133450895982988, |
| "grad_norm": 0.6947181224822998, |
| "learning_rate": 1.9998135872926744e-07, |
| "loss": 0.0055, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.01415197442271429, |
| "grad_norm": 0.5310218334197998, |
| "learning_rate": 1.999813024555189e-07, |
| "loss": 0.0041, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.01417049794944559, |
| "grad_norm": 0.7264940142631531, |
| "learning_rate": 1.9998124609696747e-07, |
| "loss": 0.0052, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.014189021476176893, |
| "grad_norm": 0.5867084860801697, |
| "learning_rate": 1.9998118965361318e-07, |
| "loss": 0.0037, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.014207545002908193, |
| "grad_norm": 1.239925742149353, |
| "learning_rate": 1.999811331254561e-07, |
| "loss": 0.0047, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.014226068529639495, |
| "grad_norm": 1.8906760215759277, |
| "learning_rate": 1.999810765124962e-07, |
| "loss": 0.0053, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.014244592056370797, |
| "grad_norm": 4.847606658935547, |
| "learning_rate": 1.9998101981473363e-07, |
| "loss": 0.0035, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.014263115583102098, |
| "grad_norm": 0.7075890898704529, |
| "learning_rate": 1.999809630321684e-07, |
| "loss": 0.0045, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.0142816391098334, |
| "grad_norm": 1.1188857555389404, |
| "learning_rate": 1.9998090616480053e-07, |
| "loss": 0.005, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.0143001626365647, |
| "grad_norm": 1.1795648336410522, |
| "learning_rate": 1.999808492126301e-07, |
| "loss": 0.0036, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.014318686163296002, |
| "grad_norm": 1.097029447555542, |
| "learning_rate": 1.9998079217565715e-07, |
| "loss": 0.0055, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.014337209690027304, |
| "grad_norm": 0.5832175016403198, |
| "learning_rate": 1.999807350538817e-07, |
| "loss": 0.0049, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.014355733216758605, |
| "grad_norm": 0.36027607321739197, |
| "learning_rate": 1.9998067784730385e-07, |
| "loss": 0.0042, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.014374256743489907, |
| "grad_norm": 1.275489091873169, |
| "learning_rate": 1.9998062055592363e-07, |
| "loss": 0.0036, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.014392780270221207, |
| "grad_norm": 0.9427604079246521, |
| "learning_rate": 1.9998056317974105e-07, |
| "loss": 0.0049, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.01441130379695251, |
| "grad_norm": 0.6243997812271118, |
| "learning_rate": 1.9998050571875624e-07, |
| "loss": 0.0048, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.014429827323683812, |
| "grad_norm": 1.4829784631729126, |
| "learning_rate": 1.9998044817296916e-07, |
| "loss": 0.0053, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.014448350850415112, |
| "grad_norm": 1.4203242063522339, |
| "learning_rate": 1.9998039054237993e-07, |
| "loss": 0.0046, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.014466874377146414, |
| "grad_norm": 0.7487713098526001, |
| "learning_rate": 1.9998033282698853e-07, |
| "loss": 0.0044, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.014485397903877715, |
| "grad_norm": 1.4941959381103516, |
| "learning_rate": 1.9998027502679505e-07, |
| "loss": 0.0036, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.014503921430609017, |
| "grad_norm": 0.527245283126831, |
| "learning_rate": 1.9998021714179955e-07, |
| "loss": 0.004, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.014522444957340319, |
| "grad_norm": 1.3346662521362305, |
| "learning_rate": 1.9998015917200207e-07, |
| "loss": 0.0038, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.01454096848407162, |
| "grad_norm": 4.4243974685668945, |
| "learning_rate": 1.9998010111740267e-07, |
| "loss": 0.0047, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.014559492010802921, |
| "grad_norm": 0.9892958998680115, |
| "learning_rate": 1.9998004297800133e-07, |
| "loss": 0.0059, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.014578015537534222, |
| "grad_norm": 1.0535051822662354, |
| "learning_rate": 1.999799847537982e-07, |
| "loss": 0.0042, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.014596539064265524, |
| "grad_norm": 2.46565842628479, |
| "learning_rate": 1.9997992644479327e-07, |
| "loss": 0.0046, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.014615062590996824, |
| "grad_norm": 0.6282051205635071, |
| "learning_rate": 1.9997986805098658e-07, |
| "loss": 0.0049, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.014633586117728126, |
| "grad_norm": 0.42676499485969543, |
| "learning_rate": 1.9997980957237822e-07, |
| "loss": 0.0051, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.014652109644459428, |
| "grad_norm": 1.3575069904327393, |
| "learning_rate": 1.999797510089682e-07, |
| "loss": 0.0046, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.014670633171190729, |
| "grad_norm": 1.0328059196472168, |
| "learning_rate": 1.9997969236075662e-07, |
| "loss": 0.0045, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.014689156697922031, |
| "grad_norm": 0.3862772285938263, |
| "learning_rate": 1.9997963362774346e-07, |
| "loss": 0.0044, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.014707680224653331, |
| "grad_norm": 1.1072419881820679, |
| "learning_rate": 1.9997957480992884e-07, |
| "loss": 0.0042, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.014726203751384634, |
| "grad_norm": 0.19309449195861816, |
| "learning_rate": 1.9997951590731277e-07, |
| "loss": 0.0039, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.014744727278115936, |
| "grad_norm": 0.7775810956954956, |
| "learning_rate": 1.9997945691989534e-07, |
| "loss": 0.0041, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.014763250804847236, |
| "grad_norm": 1.0817900896072388, |
| "learning_rate": 1.999793978476765e-07, |
| "loss": 0.0054, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.014781774331578538, |
| "grad_norm": 0.8423750400543213, |
| "learning_rate": 1.9997933869065645e-07, |
| "loss": 0.004, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.014800297858309839, |
| "grad_norm": 0.861052393913269, |
| "learning_rate": 1.9997927944883508e-07, |
| "loss": 0.0036, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.01481882138504114, |
| "grad_norm": 1.7140874862670898, |
| "learning_rate": 1.9997922012221258e-07, |
| "loss": 0.0046, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.014837344911772443, |
| "grad_norm": 0.6867257952690125, |
| "learning_rate": 1.999791607107889e-07, |
| "loss": 0.0039, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.014855868438503743, |
| "grad_norm": 0.3871649205684662, |
| "learning_rate": 1.9997910121456416e-07, |
| "loss": 0.0039, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.014874391965235045, |
| "grad_norm": 0.6352835893630981, |
| "learning_rate": 1.9997904163353838e-07, |
| "loss": 0.0036, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.014892915491966346, |
| "grad_norm": 0.8107224106788635, |
| "learning_rate": 1.999789819677116e-07, |
| "loss": 0.0041, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.014911439018697648, |
| "grad_norm": 1.2498986721038818, |
| "learning_rate": 1.9997892221708388e-07, |
| "loss": 0.0043, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.01492996254542895, |
| "grad_norm": 1.205080270767212, |
| "learning_rate": 1.9997886238165525e-07, |
| "loss": 0.005, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.01494848607216025, |
| "grad_norm": 0.9285450577735901, |
| "learning_rate": 1.9997880246142582e-07, |
| "loss": 0.004, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.014967009598891553, |
| "grad_norm": 0.8476603031158447, |
| "learning_rate": 1.9997874245639558e-07, |
| "loss": 0.0057, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.014985533125622853, |
| "grad_norm": 0.3520084619522095, |
| "learning_rate": 1.9997868236656463e-07, |
| "loss": 0.005, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.015004056652354155, |
| "grad_norm": 1.0680679082870483, |
| "learning_rate": 1.9997862219193298e-07, |
| "loss": 0.0043, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.015022580179085457, |
| "grad_norm": 0.9957355856895447, |
| "learning_rate": 1.9997856193250068e-07, |
| "loss": 0.0035, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.015041103705816758, |
| "grad_norm": 0.49109822511672974, |
| "learning_rate": 1.9997850158826783e-07, |
| "loss": 0.005, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.01505962723254806, |
| "grad_norm": 0.6732653379440308, |
| "learning_rate": 1.9997844115923447e-07, |
| "loss": 0.0044, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.01507815075927936, |
| "grad_norm": 1.2722110748291016, |
| "learning_rate": 1.999783806454006e-07, |
| "loss": 0.0044, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.015096674286010662, |
| "grad_norm": 1.6857893466949463, |
| "learning_rate": 1.9997832004676627e-07, |
| "loss": 0.0041, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.015115197812741964, |
| "grad_norm": 2.7750627994537354, |
| "learning_rate": 1.9997825936333159e-07, |
| "loss": 0.0048, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.015133721339473265, |
| "grad_norm": 0.6073914766311646, |
| "learning_rate": 1.9997819859509663e-07, |
| "loss": 0.004, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.015152244866204567, |
| "grad_norm": 0.7536759376525879, |
| "learning_rate": 1.9997813774206133e-07, |
| "loss": 0.0042, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.015170768392935867, |
| "grad_norm": 0.8029915690422058, |
| "learning_rate": 1.9997807680422584e-07, |
| "loss": 0.0046, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.01518929191966717, |
| "grad_norm": 0.5253338813781738, |
| "learning_rate": 1.9997801578159014e-07, |
| "loss": 0.0044, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.015207815446398472, |
| "grad_norm": 0.5572255849838257, |
| "learning_rate": 1.9997795467415438e-07, |
| "loss": 0.0041, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.015226338973129772, |
| "grad_norm": 1.572336196899414, |
| "learning_rate": 1.9997789348191852e-07, |
| "loss": 0.0058, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.015244862499861074, |
| "grad_norm": 1.1556674242019653, |
| "learning_rate": 1.9997783220488268e-07, |
| "loss": 0.0049, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.015263386026592374, |
| "grad_norm": 2.3045637607574463, |
| "learning_rate": 1.9997777084304684e-07, |
| "loss": 0.0041, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.015281909553323677, |
| "grad_norm": 0.3899919092655182, |
| "learning_rate": 1.999777093964111e-07, |
| "loss": 0.0058, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.015300433080054977, |
| "grad_norm": 1.0309175252914429, |
| "learning_rate": 1.999776478649755e-07, |
| "loss": 0.0045, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.015318956606786279, |
| "grad_norm": 0.5064734220504761, |
| "learning_rate": 1.999775862487401e-07, |
| "loss": 0.0041, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.015337480133517581, |
| "grad_norm": 0.7135197520256042, |
| "learning_rate": 1.9997752454770494e-07, |
| "loss": 0.0055, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.015356003660248882, |
| "grad_norm": 1.4438592195510864, |
| "learning_rate": 1.9997746276187003e-07, |
| "loss": 0.0046, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.015374527186980184, |
| "grad_norm": 1.7102742195129395, |
| "learning_rate": 1.9997740089123556e-07, |
| "loss": 0.0047, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.015393050713711484, |
| "grad_norm": 0.6631841659545898, |
| "learning_rate": 1.9997733893580144e-07, |
| "loss": 0.0058, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.015411574240442786, |
| "grad_norm": 0.8265522718429565, |
| "learning_rate": 1.999772768955678e-07, |
| "loss": 0.0038, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.015430097767174088, |
| "grad_norm": 0.6872648000717163, |
| "learning_rate": 1.9997721477053465e-07, |
| "loss": 0.0043, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.015448621293905389, |
| "grad_norm": 0.6156404614448547, |
| "learning_rate": 1.9997715256070205e-07, |
| "loss": 0.0042, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.015467144820636691, |
| "grad_norm": 0.4310632050037384, |
| "learning_rate": 1.9997709026607007e-07, |
| "loss": 0.0052, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.015485668347367991, |
| "grad_norm": 1.2005386352539062, |
| "learning_rate": 1.999770278866388e-07, |
| "loss": 0.0039, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.015504191874099293, |
| "grad_norm": 1.8429206609725952, |
| "learning_rate": 1.999769654224082e-07, |
| "loss": 0.0046, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.015522715400830596, |
| "grad_norm": 0.7069671154022217, |
| "learning_rate": 1.9997690287337838e-07, |
| "loss": 0.0028, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.015541238927561896, |
| "grad_norm": 0.5858443975448608, |
| "learning_rate": 1.9997684023954938e-07, |
| "loss": 0.0051, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.015559762454293198, |
| "grad_norm": 1.5247914791107178, |
| "learning_rate": 1.999767775209213e-07, |
| "loss": 0.0056, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.015578285981024498, |
| "grad_norm": 1.0919623374938965, |
| "learning_rate": 1.9997671471749412e-07, |
| "loss": 0.0042, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.0155968095077558, |
| "grad_norm": 0.2331302911043167, |
| "learning_rate": 1.999766518292679e-07, |
| "loss": 0.0041, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.015615333034487103, |
| "grad_norm": 0.4476732611656189, |
| "learning_rate": 1.9997658885624277e-07, |
| "loss": 0.0043, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.015633856561218403, |
| "grad_norm": 0.9618854522705078, |
| "learning_rate": 1.999765257984187e-07, |
| "loss": 0.004, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.015652380087949704, |
| "grad_norm": 0.6848201155662537, |
| "learning_rate": 1.9997646265579578e-07, |
| "loss": 0.004, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.015670903614681007, |
| "grad_norm": 1.0891481637954712, |
| "learning_rate": 1.9997639942837408e-07, |
| "loss": 0.0037, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.015689427141412308, |
| "grad_norm": 1.0522816181182861, |
| "learning_rate": 1.999763361161536e-07, |
| "loss": 0.0053, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.015707950668143608, |
| "grad_norm": 1.0642685890197754, |
| "learning_rate": 1.9997627271913444e-07, |
| "loss": 0.0034, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.015726474194874912, |
| "grad_norm": 1.705619215965271, |
| "learning_rate": 1.9997620923731664e-07, |
| "loss": 0.005, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.015744997721606212, |
| "grad_norm": 0.2627123296260834, |
| "learning_rate": 1.9997614567070026e-07, |
| "loss": 0.0062, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.015763521248337513, |
| "grad_norm": 0.48840856552124023, |
| "learning_rate": 1.9997608201928532e-07, |
| "loss": 0.0045, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.015782044775068813, |
| "grad_norm": 0.912911057472229, |
| "learning_rate": 1.9997601828307195e-07, |
| "loss": 0.0052, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.015800568301800117, |
| "grad_norm": 0.665995180606842, |
| "learning_rate": 1.9997595446206013e-07, |
| "loss": 0.0041, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.015819091828531417, |
| "grad_norm": 0.6801586747169495, |
| "learning_rate": 1.9997589055624994e-07, |
| "loss": 0.005, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.015837615355262718, |
| "grad_norm": 1.1667735576629639, |
| "learning_rate": 1.9997582656564142e-07, |
| "loss": 0.0053, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.015856138881994022, |
| "grad_norm": 1.0843561887741089, |
| "learning_rate": 1.9997576249023464e-07, |
| "loss": 0.0042, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.015874662408725322, |
| "grad_norm": 1.7238801717758179, |
| "learning_rate": 1.9997569833002967e-07, |
| "loss": 0.0049, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.015893185935456623, |
| "grad_norm": 0.34246015548706055, |
| "learning_rate": 1.9997563408502656e-07, |
| "loss": 0.0034, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.015911709462187926, |
| "grad_norm": 1.2983548641204834, |
| "learning_rate": 1.999755697552253e-07, |
| "loss": 0.0039, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.015930232988919227, |
| "grad_norm": 1.3458633422851562, |
| "learning_rate": 1.9997550534062606e-07, |
| "loss": 0.0049, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.015948756515650527, |
| "grad_norm": 2.532499074935913, |
| "learning_rate": 1.9997544084122878e-07, |
| "loss": 0.004, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.015967280042381828, |
| "grad_norm": 1.1108027696609497, |
| "learning_rate": 1.999753762570336e-07, |
| "loss": 0.0038, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.01598580356911313, |
| "grad_norm": 0.5047584176063538, |
| "learning_rate": 1.9997531158804053e-07, |
| "loss": 0.0055, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.016004327095844432, |
| "grad_norm": 1.08219313621521, |
| "learning_rate": 1.9997524683424961e-07, |
| "loss": 0.0046, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.016022850622575732, |
| "grad_norm": 3.6591594219207764, |
| "learning_rate": 1.9997518199566096e-07, |
| "loss": 0.0056, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.016041374149307036, |
| "grad_norm": 0.6368611454963684, |
| "learning_rate": 1.9997511707227456e-07, |
| "loss": 0.0044, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.016059897676038336, |
| "grad_norm": 0.35338371992111206, |
| "learning_rate": 1.9997505206409053e-07, |
| "loss": 0.0056, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.016078421202769637, |
| "grad_norm": 0.7746136784553528, |
| "learning_rate": 1.999749869711089e-07, |
| "loss": 0.0043, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.01609694472950094, |
| "grad_norm": 1.162908911705017, |
| "learning_rate": 1.9997492179332968e-07, |
| "loss": 0.0037, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.01611546825623224, |
| "grad_norm": 0.8728556036949158, |
| "learning_rate": 1.9997485653075298e-07, |
| "loss": 0.0042, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.01613399178296354, |
| "grad_norm": 2.9004342555999756, |
| "learning_rate": 1.9997479118337885e-07, |
| "loss": 0.0058, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.016152515309694842, |
| "grad_norm": 2.0210251808166504, |
| "learning_rate": 1.9997472575120734e-07, |
| "loss": 0.0049, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.016171038836426146, |
| "grad_norm": 0.6767845749855042, |
| "learning_rate": 1.999746602342385e-07, |
| "loss": 0.0041, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.016189562363157446, |
| "grad_norm": 1.5122381448745728, |
| "learning_rate": 1.9997459463247238e-07, |
| "loss": 0.0057, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.016208085889888747, |
| "grad_norm": 0.2984503209590912, |
| "learning_rate": 1.9997452894590906e-07, |
| "loss": 0.0039, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.01622660941662005, |
| "grad_norm": 1.4575154781341553, |
| "learning_rate": 1.9997446317454856e-07, |
| "loss": 0.0046, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.01624513294335135, |
| "grad_norm": 0.667724072933197, |
| "learning_rate": 1.9997439731839097e-07, |
| "loss": 0.0049, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.01626365647008265, |
| "grad_norm": 1.7611080408096313, |
| "learning_rate": 1.9997433137743632e-07, |
| "loss": 0.005, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.016282179996813955, |
| "grad_norm": 1.1792736053466797, |
| "learning_rate": 1.9997426535168466e-07, |
| "loss": 0.0046, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.016300703523545255, |
| "grad_norm": 0.7357038855552673, |
| "learning_rate": 1.999741992411361e-07, |
| "loss": 0.0053, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.016319227050276556, |
| "grad_norm": 0.6902112364768982, |
| "learning_rate": 1.9997413304579062e-07, |
| "loss": 0.0046, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.016337750577007856, |
| "grad_norm": 1.6841918230056763, |
| "learning_rate": 1.9997406676564834e-07, |
| "loss": 0.0036, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.01635627410373916, |
| "grad_norm": 1.3094260692596436, |
| "learning_rate": 1.9997400040070928e-07, |
| "loss": 0.0065, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.01637479763047046, |
| "grad_norm": 0.8650581240653992, |
| "learning_rate": 1.9997393395097353e-07, |
| "loss": 0.0044, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.01639332115720176, |
| "grad_norm": 1.6597647666931152, |
| "learning_rate": 1.999738674164411e-07, |
| "loss": 0.005, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.016411844683933065, |
| "grad_norm": 0.7247337102890015, |
| "learning_rate": 1.9997380079711208e-07, |
| "loss": 0.0054, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.016430368210664365, |
| "grad_norm": 0.6491051912307739, |
| "learning_rate": 1.999737340929865e-07, |
| "loss": 0.0047, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.016448891737395666, |
| "grad_norm": 0.5910527110099792, |
| "learning_rate": 1.9997366730406444e-07, |
| "loss": 0.0056, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.016467415264126966, |
| "grad_norm": 1.4455671310424805, |
| "learning_rate": 1.9997360043034596e-07, |
| "loss": 0.0053, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.01648593879085827, |
| "grad_norm": 0.44134023785591125, |
| "learning_rate": 1.999735334718311e-07, |
| "loss": 0.004, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.01650446231758957, |
| "grad_norm": 1.5593891143798828, |
| "learning_rate": 1.9997346642851993e-07, |
| "loss": 0.0059, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.01652298584432087, |
| "grad_norm": 1.3159610033035278, |
| "learning_rate": 1.999733993004125e-07, |
| "loss": 0.0044, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.016541509371052174, |
| "grad_norm": 0.15289658308029175, |
| "learning_rate": 1.9997333208750885e-07, |
| "loss": 0.0049, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.016560032897783475, |
| "grad_norm": 1.633427381515503, |
| "learning_rate": 1.999732647898091e-07, |
| "loss": 0.0052, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.016578556424514775, |
| "grad_norm": 0.5088497400283813, |
| "learning_rate": 1.999731974073132e-07, |
| "loss": 0.0044, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.01659707995124608, |
| "grad_norm": 2.5566632747650146, |
| "learning_rate": 1.9997312994002131e-07, |
| "loss": 0.004, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.01661560347797738, |
| "grad_norm": 1.031653642654419, |
| "learning_rate": 1.9997306238793344e-07, |
| "loss": 0.0049, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.01663412700470868, |
| "grad_norm": 1.1217010021209717, |
| "learning_rate": 1.9997299475104963e-07, |
| "loss": 0.0046, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.01665265053143998, |
| "grad_norm": 1.151426911354065, |
| "learning_rate": 1.9997292702936995e-07, |
| "loss": 0.0038, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.016671174058171284, |
| "grad_norm": 1.1687980890274048, |
| "learning_rate": 1.999728592228945e-07, |
| "loss": 0.0036, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.016689697584902585, |
| "grad_norm": 0.6960824131965637, |
| "learning_rate": 1.9997279133162332e-07, |
| "loss": 0.0044, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.016708221111633885, |
| "grad_norm": 3.1780805587768555, |
| "learning_rate": 1.9997272335555641e-07, |
| "loss": 0.0051, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.01672674463836519, |
| "grad_norm": 0.7304292917251587, |
| "learning_rate": 1.999726552946939e-07, |
| "loss": 0.0048, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.01674526816509649, |
| "grad_norm": 0.39188894629478455, |
| "learning_rate": 1.9997258714903582e-07, |
| "loss": 0.0046, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.01676379169182779, |
| "grad_norm": 0.5043576955795288, |
| "learning_rate": 1.9997251891858223e-07, |
| "loss": 0.0049, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.016782315218559093, |
| "grad_norm": 0.9844755530357361, |
| "learning_rate": 1.9997245060333315e-07, |
| "loss": 0.0041, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.016800838745290394, |
| "grad_norm": 1.0253583192825317, |
| "learning_rate": 1.999723822032887e-07, |
| "loss": 0.0046, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.016819362272021694, |
| "grad_norm": 0.3260776698589325, |
| "learning_rate": 1.9997231371844888e-07, |
| "loss": 0.0038, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.016837885798752995, |
| "grad_norm": 0.8749006986618042, |
| "learning_rate": 1.9997224514881382e-07, |
| "loss": 0.0038, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.0168564093254843, |
| "grad_norm": 1.3569176197052002, |
| "learning_rate": 1.999721764943835e-07, |
| "loss": 0.0059, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.0168749328522156, |
| "grad_norm": 0.9446332454681396, |
| "learning_rate": 1.99972107755158e-07, |
| "loss": 0.0056, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.0168934563789469, |
| "grad_norm": 0.41128236055374146, |
| "learning_rate": 1.9997203893113746e-07, |
| "loss": 0.0053, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.016911979905678203, |
| "grad_norm": 0.9697746634483337, |
| "learning_rate": 1.9997197002232182e-07, |
| "loss": 0.0043, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.016930503432409504, |
| "grad_norm": 0.9527771472930908, |
| "learning_rate": 1.999719010287112e-07, |
| "loss": 0.0057, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.016949026959140804, |
| "grad_norm": 0.6190195083618164, |
| "learning_rate": 1.9997183195030565e-07, |
| "loss": 0.0044, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.016967550485872108, |
| "grad_norm": 0.5652283430099487, |
| "learning_rate": 1.9997176278710523e-07, |
| "loss": 0.0044, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.016986074012603408, |
| "grad_norm": 0.25012028217315674, |
| "learning_rate": 1.9997169353910998e-07, |
| "loss": 0.0044, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.01700459753933471, |
| "grad_norm": 4.73937463760376, |
| "learning_rate": 1.9997162420632e-07, |
| "loss": 0.0041, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.01702312106606601, |
| "grad_norm": 0.6528874039649963, |
| "learning_rate": 1.9997155478873528e-07, |
| "loss": 0.0035, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.017041644592797313, |
| "grad_norm": 1.7770953178405762, |
| "learning_rate": 1.9997148528635598e-07, |
| "loss": 0.0044, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.017060168119528613, |
| "grad_norm": 1.0450669527053833, |
| "learning_rate": 1.9997141569918206e-07, |
| "loss": 0.0041, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.017078691646259914, |
| "grad_norm": 2.0028116703033447, |
| "learning_rate": 1.9997134602721363e-07, |
| "loss": 0.0054, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.017097215172991218, |
| "grad_norm": 1.6637686491012573, |
| "learning_rate": 1.9997127627045072e-07, |
| "loss": 0.0047, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.017115738699722518, |
| "grad_norm": 1.9286481142044067, |
| "learning_rate": 1.9997120642889343e-07, |
| "loss": 0.0052, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.01713426222645382, |
| "grad_norm": 0.8772292733192444, |
| "learning_rate": 1.9997113650254182e-07, |
| "loss": 0.0039, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.01715278575318512, |
| "grad_norm": 1.7083206176757812, |
| "learning_rate": 1.9997106649139588e-07, |
| "loss": 0.0042, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.017171309279916423, |
| "grad_norm": 0.44467809796333313, |
| "learning_rate": 1.9997099639545575e-07, |
| "loss": 0.0043, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.017189832806647723, |
| "grad_norm": 0.5728235244750977, |
| "learning_rate": 1.9997092621472143e-07, |
| "loss": 0.005, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.017208356333379023, |
| "grad_norm": 0.8556253910064697, |
| "learning_rate": 1.99970855949193e-07, |
| "loss": 0.0047, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.017226879860110327, |
| "grad_norm": 1.6084396839141846, |
| "learning_rate": 1.9997078559887056e-07, |
| "loss": 0.0041, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.017245403386841628, |
| "grad_norm": 0.3883759677410126, |
| "learning_rate": 1.999707151637541e-07, |
| "loss": 0.0039, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.017263926913572928, |
| "grad_norm": 2.8804821968078613, |
| "learning_rate": 1.999706446438437e-07, |
| "loss": 0.0045, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.017282450440304232, |
| "grad_norm": 1.2428147792816162, |
| "learning_rate": 1.999705740391395e-07, |
| "loss": 0.0046, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.017300973967035532, |
| "grad_norm": 0.795876145362854, |
| "learning_rate": 1.9997050334964144e-07, |
| "loss": 0.0043, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.017319497493766833, |
| "grad_norm": 0.7071340680122375, |
| "learning_rate": 1.9997043257534963e-07, |
| "loss": 0.0036, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.017338021020498133, |
| "grad_norm": 0.39569318294525146, |
| "learning_rate": 1.9997036171626416e-07, |
| "loss": 0.0042, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.017356544547229437, |
| "grad_norm": 0.6116693615913391, |
| "learning_rate": 1.9997029077238507e-07, |
| "loss": 0.0044, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.017375068073960737, |
| "grad_norm": 0.257621169090271, |
| "learning_rate": 1.999702197437124e-07, |
| "loss": 0.0038, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.017393591600692038, |
| "grad_norm": 0.29687631130218506, |
| "learning_rate": 1.999701486302462e-07, |
| "loss": 0.0044, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.01741211512742334, |
| "grad_norm": 0.8272486329078674, |
| "learning_rate": 1.9997007743198656e-07, |
| "loss": 0.0042, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.017430638654154642, |
| "grad_norm": 2.998185634613037, |
| "learning_rate": 1.9997000614893357e-07, |
| "loss": 0.0037, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.017449162180885942, |
| "grad_norm": 0.8274715542793274, |
| "learning_rate": 1.9996993478108726e-07, |
| "loss": 0.0044, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.017467685707617246, |
| "grad_norm": 0.7815435528755188, |
| "learning_rate": 1.9996986332844763e-07, |
| "loss": 0.0054, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.017486209234348547, |
| "grad_norm": 1.229856014251709, |
| "learning_rate": 1.9996979179101484e-07, |
| "loss": 0.0052, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.017504732761079847, |
| "grad_norm": 0.9731438755989075, |
| "learning_rate": 1.999697201687889e-07, |
| "loss": 0.0046, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.017523256287811147, |
| "grad_norm": 1.1173068284988403, |
| "learning_rate": 1.9996964846176986e-07, |
| "loss": 0.0045, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.01754177981454245, |
| "grad_norm": 0.5310545563697815, |
| "learning_rate": 1.999695766699578e-07, |
| "loss": 0.003, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.01756030334127375, |
| "grad_norm": 0.9242424368858337, |
| "learning_rate": 1.9996950479335283e-07, |
| "loss": 0.0035, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.017578826868005052, |
| "grad_norm": 0.8172231912612915, |
| "learning_rate": 1.999694328319549e-07, |
| "loss": 0.0041, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.017597350394736356, |
| "grad_norm": 1.4767719507217407, |
| "learning_rate": 1.9996936078576416e-07, |
| "loss": 0.0055, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.017615873921467656, |
| "grad_norm": 0.5275189280509949, |
| "learning_rate": 1.9996928865478063e-07, |
| "loss": 0.0049, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.017634397448198957, |
| "grad_norm": 1.080090045928955, |
| "learning_rate": 1.9996921643900436e-07, |
| "loss": 0.0041, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.01765292097493026, |
| "grad_norm": 1.2835578918457031, |
| "learning_rate": 1.999691441384355e-07, |
| "loss": 0.0048, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.01767144450166156, |
| "grad_norm": 0.9508166909217834, |
| "learning_rate": 1.99969071753074e-07, |
| "loss": 0.0041, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.01768996802839286, |
| "grad_norm": 1.4011200666427612, |
| "learning_rate": 1.9996899928291997e-07, |
| "loss": 0.0036, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.01770849155512416, |
| "grad_norm": 0.9394834637641907, |
| "learning_rate": 1.9996892672797347e-07, |
| "loss": 0.0044, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.017727015081855466, |
| "grad_norm": 1.002217173576355, |
| "learning_rate": 1.9996885408823458e-07, |
| "loss": 0.0046, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.017745538608586766, |
| "grad_norm": 0.40080058574676514, |
| "learning_rate": 1.9996878136370333e-07, |
| "loss": 0.0043, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.017764062135318066, |
| "grad_norm": 1.8101344108581543, |
| "learning_rate": 1.999687085543798e-07, |
| "loss": 0.0074, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.01778258566204937, |
| "grad_norm": 0.8633871078491211, |
| "learning_rate": 1.9996863566026402e-07, |
| "loss": 0.0047, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.01780110918878067, |
| "grad_norm": 0.8291581869125366, |
| "learning_rate": 1.999685626813561e-07, |
| "loss": 0.0051, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.01781963271551197, |
| "grad_norm": 1.9119772911071777, |
| "learning_rate": 1.9996848961765606e-07, |
| "loss": 0.0055, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.017838156242243275, |
| "grad_norm": 0.5285390019416809, |
| "learning_rate": 1.9996841646916401e-07, |
| "loss": 0.0044, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.017856679768974575, |
| "grad_norm": 0.8999338150024414, |
| "learning_rate": 1.9996834323588e-07, |
| "loss": 0.0044, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.017875203295705876, |
| "grad_norm": 1.9978399276733398, |
| "learning_rate": 1.99968269917804e-07, |
| "loss": 0.0052, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.017893726822437176, |
| "grad_norm": 0.9967847466468811, |
| "learning_rate": 1.9996819651493621e-07, |
| "loss": 0.0039, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.01791225034916848, |
| "grad_norm": 0.2726913094520569, |
| "learning_rate": 1.999681230272766e-07, |
| "loss": 0.0045, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.01793077387589978, |
| "grad_norm": 0.6133876442909241, |
| "learning_rate": 1.999680494548253e-07, |
| "loss": 0.0041, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.01794929740263108, |
| "grad_norm": 2.7411675453186035, |
| "learning_rate": 1.9996797579758229e-07, |
| "loss": 0.0046, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.017967820929362385, |
| "grad_norm": 1.0368309020996094, |
| "learning_rate": 1.9996790205554773e-07, |
| "loss": 0.0048, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.017986344456093685, |
| "grad_norm": 0.8047605752944946, |
| "learning_rate": 1.9996782822872157e-07, |
| "loss": 0.0053, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.018004867982824985, |
| "grad_norm": 0.7528997659683228, |
| "learning_rate": 1.9996775431710398e-07, |
| "loss": 0.0038, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.018023391509556286, |
| "grad_norm": 1.419985294342041, |
| "learning_rate": 1.9996768032069494e-07, |
| "loss": 0.0043, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.01804191503628759, |
| "grad_norm": 0.8917560577392578, |
| "learning_rate": 1.9996760623949455e-07, |
| "loss": 0.0038, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.01806043856301889, |
| "grad_norm": 0.5174658298492432, |
| "learning_rate": 1.999675320735029e-07, |
| "loss": 0.0055, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.01807896208975019, |
| "grad_norm": 0.8098558187484741, |
| "learning_rate": 1.9996745782272e-07, |
| "loss": 0.0043, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.018097485616481494, |
| "grad_norm": 0.36458224058151245, |
| "learning_rate": 1.9996738348714595e-07, |
| "loss": 0.0045, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.018116009143212795, |
| "grad_norm": 0.9201998114585876, |
| "learning_rate": 1.9996730906678078e-07, |
| "loss": 0.0043, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.018134532669944095, |
| "grad_norm": 0.8556378483772278, |
| "learning_rate": 1.9996723456162462e-07, |
| "loss": 0.0038, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.0181530561966754, |
| "grad_norm": 0.5827649831771851, |
| "learning_rate": 1.9996715997167745e-07, |
| "loss": 0.0043, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.0181715797234067, |
| "grad_norm": 0.8942850232124329, |
| "learning_rate": 1.999670852969394e-07, |
| "loss": 0.0038, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.018190103250138, |
| "grad_norm": 0.9683301448822021, |
| "learning_rate": 1.9996701053741042e-07, |
| "loss": 0.0056, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.0182086267768693, |
| "grad_norm": 0.7990354299545288, |
| "learning_rate": 1.9996693569309073e-07, |
| "loss": 0.0063, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.018227150303600604, |
| "grad_norm": 1.0179579257965088, |
| "learning_rate": 1.999668607639803e-07, |
| "loss": 0.0053, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.018245673830331904, |
| "grad_norm": 1.0524885654449463, |
| "learning_rate": 1.9996678575007922e-07, |
| "loss": 0.0038, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.018264197357063205, |
| "grad_norm": 0.520573079586029, |
| "learning_rate": 1.9996671065138751e-07, |
| "loss": 0.0046, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.01828272088379451, |
| "grad_norm": 1.1568214893341064, |
| "learning_rate": 1.9996663546790532e-07, |
| "loss": 0.0038, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.01830124441052581, |
| "grad_norm": 0.5618509650230408, |
| "learning_rate": 1.9996656019963264e-07, |
| "loss": 0.0046, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.01831976793725711, |
| "grad_norm": 1.3835537433624268, |
| "learning_rate": 1.9996648484656955e-07, |
| "loss": 0.0042, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.018338291463988413, |
| "grad_norm": 0.5863046646118164, |
| "learning_rate": 1.9996640940871614e-07, |
| "loss": 0.0047, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.018356814990719714, |
| "grad_norm": 0.3961147367954254, |
| "learning_rate": 1.9996633388607248e-07, |
| "loss": 0.0042, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.018375338517451014, |
| "grad_norm": 1.7058590650558472, |
| "learning_rate": 1.9996625827863854e-07, |
| "loss": 0.0038, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.018393862044182314, |
| "grad_norm": 2.0092124938964844, |
| "learning_rate": 1.9996618258641452e-07, |
| "loss": 0.0053, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.01841238557091362, |
| "grad_norm": 0.9541193246841431, |
| "learning_rate": 1.9996610680940038e-07, |
| "loss": 0.003, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.01843090909764492, |
| "grad_norm": 0.9015825390815735, |
| "learning_rate": 1.9996603094759623e-07, |
| "loss": 0.0038, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.01844943262437622, |
| "grad_norm": 0.30549857020378113, |
| "learning_rate": 1.9996595500100212e-07, |
| "loss": 0.0041, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.018467956151107523, |
| "grad_norm": 0.7488313317298889, |
| "learning_rate": 1.9996587896961814e-07, |
| "loss": 0.0046, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.018486479677838823, |
| "grad_norm": 1.1713547706604004, |
| "learning_rate": 1.9996580285344433e-07, |
| "loss": 0.0055, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.018505003204570124, |
| "grad_norm": 1.1204206943511963, |
| "learning_rate": 1.9996572665248075e-07, |
| "loss": 0.0054, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.018523526731301428, |
| "grad_norm": 1.6548596620559692, |
| "learning_rate": 1.9996565036672747e-07, |
| "loss": 0.0052, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.018542050258032728, |
| "grad_norm": 0.7798900008201599, |
| "learning_rate": 1.9996557399618461e-07, |
| "loss": 0.0038, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.01856057378476403, |
| "grad_norm": 1.0112378597259521, |
| "learning_rate": 1.9996549754085214e-07, |
| "loss": 0.0038, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.01857909731149533, |
| "grad_norm": 0.9646735191345215, |
| "learning_rate": 1.9996542100073016e-07, |
| "loss": 0.0047, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.018597620838226633, |
| "grad_norm": 0.8091621994972229, |
| "learning_rate": 1.9996534437581879e-07, |
| "loss": 0.0054, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.018616144364957933, |
| "grad_norm": 0.6395015716552734, |
| "learning_rate": 1.99965267666118e-07, |
| "loss": 0.0034, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.018634667891689233, |
| "grad_norm": 1.429945468902588, |
| "learning_rate": 1.999651908716279e-07, |
| "loss": 0.0042, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.018653191418420537, |
| "grad_norm": 2.344635248184204, |
| "learning_rate": 1.9996511399234861e-07, |
| "loss": 0.0047, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.018671714945151838, |
| "grad_norm": 1.4581433534622192, |
| "learning_rate": 1.999650370282801e-07, |
| "loss": 0.0044, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.018690238471883138, |
| "grad_norm": 1.218477725982666, |
| "learning_rate": 1.9996495997942252e-07, |
| "loss": 0.0046, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.01870876199861444, |
| "grad_norm": 1.8469760417938232, |
| "learning_rate": 1.9996488284577587e-07, |
| "loss": 0.0039, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.018727285525345742, |
| "grad_norm": 0.24046263098716736, |
| "learning_rate": 1.9996480562734025e-07, |
| "loss": 0.0042, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.018745809052077043, |
| "grad_norm": 0.7213006019592285, |
| "learning_rate": 1.999647283241157e-07, |
| "loss": 0.0049, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.018764332578808343, |
| "grad_norm": 0.644180417060852, |
| "learning_rate": 1.999646509361023e-07, |
| "loss": 0.0039, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.018782856105539647, |
| "grad_norm": 1.4993228912353516, |
| "learning_rate": 1.9996457346330015e-07, |
| "loss": 0.0045, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.018801379632270947, |
| "grad_norm": 0.6667758226394653, |
| "learning_rate": 1.9996449590570925e-07, |
| "loss": 0.005, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.018819903159002248, |
| "grad_norm": 0.7460207343101501, |
| "learning_rate": 1.9996441826332972e-07, |
| "loss": 0.0041, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.01883842668573355, |
| "grad_norm": 0.5453267097473145, |
| "learning_rate": 1.9996434053616158e-07, |
| "loss": 0.0055, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.018856950212464852, |
| "grad_norm": 0.64606773853302, |
| "learning_rate": 1.9996426272420494e-07, |
| "loss": 0.0039, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.018875473739196152, |
| "grad_norm": 0.6951911449432373, |
| "learning_rate": 1.9996418482745985e-07, |
| "loss": 0.0051, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.018893997265927453, |
| "grad_norm": 0.7704794406890869, |
| "learning_rate": 1.9996410684592634e-07, |
| "loss": 0.0039, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.018912520792658757, |
| "grad_norm": 0.5671060085296631, |
| "learning_rate": 1.9996402877960454e-07, |
| "loss": 0.0043, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.018931044319390057, |
| "grad_norm": 0.7393127679824829, |
| "learning_rate": 1.9996395062849448e-07, |
| "loss": 0.0048, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.018949567846121358, |
| "grad_norm": 0.5430881977081299, |
| "learning_rate": 1.9996387239259624e-07, |
| "loss": 0.0053, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.01896809137285266, |
| "grad_norm": 0.8876209855079651, |
| "learning_rate": 1.999637940719099e-07, |
| "loss": 0.0041, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.018986614899583962, |
| "grad_norm": 0.6596053242683411, |
| "learning_rate": 1.9996371566643544e-07, |
| "loss": 0.0047, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.019005138426315262, |
| "grad_norm": 0.4034847319126129, |
| "learning_rate": 1.9996363717617304e-07, |
| "loss": 0.0036, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.019023661953046566, |
| "grad_norm": 2.488400936126709, |
| "learning_rate": 1.9996355860112267e-07, |
| "loss": 0.0031, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.019042185479777866, |
| "grad_norm": 0.7505651712417603, |
| "learning_rate": 1.999634799412845e-07, |
| "loss": 0.0035, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.019060709006509167, |
| "grad_norm": 2.6209018230438232, |
| "learning_rate": 1.999634011966585e-07, |
| "loss": 0.0043, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.019079232533240467, |
| "grad_norm": 0.9472781419754028, |
| "learning_rate": 1.9996332236724477e-07, |
| "loss": 0.0048, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.01909775605997177, |
| "grad_norm": 1.0245192050933838, |
| "learning_rate": 1.9996324345304342e-07, |
| "loss": 0.004, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.01911627958670307, |
| "grad_norm": 1.7471510171890259, |
| "learning_rate": 1.999631644540545e-07, |
| "loss": 0.004, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.019134803113434372, |
| "grad_norm": 1.0485868453979492, |
| "learning_rate": 1.99963085370278e-07, |
| "loss": 0.0044, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.019153326640165676, |
| "grad_norm": 0.6735509037971497, |
| "learning_rate": 1.9996300620171406e-07, |
| "loss": 0.0032, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.019171850166896976, |
| "grad_norm": 0.8220440149307251, |
| "learning_rate": 1.9996292694836273e-07, |
| "loss": 0.0042, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.019190373693628276, |
| "grad_norm": 0.7454270124435425, |
| "learning_rate": 1.999628476102241e-07, |
| "loss": 0.0044, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.01920889722035958, |
| "grad_norm": 0.4643478989601135, |
| "learning_rate": 1.9996276818729824e-07, |
| "loss": 0.0056, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.01922742074709088, |
| "grad_norm": 0.6909576058387756, |
| "learning_rate": 1.9996268867958516e-07, |
| "loss": 0.0044, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.01924594427382218, |
| "grad_norm": 0.33222198486328125, |
| "learning_rate": 1.9996260908708495e-07, |
| "loss": 0.0041, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.01926446780055348, |
| "grad_norm": 0.556448221206665, |
| "learning_rate": 1.999625294097977e-07, |
| "loss": 0.0045, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.019282991327284785, |
| "grad_norm": 0.8849384784698486, |
| "learning_rate": 1.999624496477235e-07, |
| "loss": 0.0032, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.019301514854016086, |
| "grad_norm": 0.660408079624176, |
| "learning_rate": 1.9996236980086234e-07, |
| "loss": 0.0036, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.019320038380747386, |
| "grad_norm": 1.885615348815918, |
| "learning_rate": 1.9996228986921435e-07, |
| "loss": 0.0052, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.01933856190747869, |
| "grad_norm": 1.7404649257659912, |
| "learning_rate": 1.9996220985277955e-07, |
| "loss": 0.005, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.01935708543420999, |
| "grad_norm": 0.5331248641014099, |
| "learning_rate": 1.9996212975155809e-07, |
| "loss": 0.004, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.01937560896094129, |
| "grad_norm": 0.34787309169769287, |
| "learning_rate": 1.9996204956554997e-07, |
| "loss": 0.0037, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.01939413248767259, |
| "grad_norm": 0.5059776306152344, |
| "learning_rate": 1.9996196929475526e-07, |
| "loss": 0.0046, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.019412656014403895, |
| "grad_norm": 2.0636556148529053, |
| "learning_rate": 1.9996188893917406e-07, |
| "loss": 0.0039, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.019431179541135195, |
| "grad_norm": 0.863540530204773, |
| "learning_rate": 1.999618084988064e-07, |
| "loss": 0.0033, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.019449703067866496, |
| "grad_norm": 2.5235605239868164, |
| "learning_rate": 1.9996172797365237e-07, |
| "loss": 0.0043, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.0194682265945978, |
| "grad_norm": 1.1779553890228271, |
| "learning_rate": 1.9996164736371205e-07, |
| "loss": 0.0039, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.0194867501213291, |
| "grad_norm": 0.1930914968252182, |
| "learning_rate": 1.9996156666898547e-07, |
| "loss": 0.0045, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.0195052736480604, |
| "grad_norm": 1.0799890756607056, |
| "learning_rate": 1.9996148588947275e-07, |
| "loss": 0.0052, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.019523797174791704, |
| "grad_norm": 1.657225251197815, |
| "learning_rate": 1.9996140502517394e-07, |
| "loss": 0.0039, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.019542320701523005, |
| "grad_norm": 1.3575892448425293, |
| "learning_rate": 1.9996132407608909e-07, |
| "loss": 0.0044, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.019560844228254305, |
| "grad_norm": 2.525514841079712, |
| "learning_rate": 1.9996124304221825e-07, |
| "loss": 0.0044, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.019579367754985606, |
| "grad_norm": 2.423532724380493, |
| "learning_rate": 1.9996116192356153e-07, |
| "loss": 0.0035, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.01959789128171691, |
| "grad_norm": 1.4325683116912842, |
| "learning_rate": 1.9996108072011898e-07, |
| "loss": 0.005, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.01961641480844821, |
| "grad_norm": 0.41579318046569824, |
| "learning_rate": 1.9996099943189071e-07, |
| "loss": 0.0039, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.01963493833517951, |
| "grad_norm": 0.4001620411872864, |
| "learning_rate": 1.9996091805887675e-07, |
| "loss": 0.0042, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.019653461861910814, |
| "grad_norm": 0.40057483315467834, |
| "learning_rate": 1.9996083660107717e-07, |
| "loss": 0.0045, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.019671985388642114, |
| "grad_norm": 1.20992910861969, |
| "learning_rate": 1.99960755058492e-07, |
| "loss": 0.0046, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.019690508915373415, |
| "grad_norm": 3.830972194671631, |
| "learning_rate": 1.999606734311214e-07, |
| "loss": 0.0053, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.01970903244210472, |
| "grad_norm": 0.7156141400337219, |
| "learning_rate": 1.9996059171896538e-07, |
| "loss": 0.0041, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.01972755596883602, |
| "grad_norm": 1.0570077896118164, |
| "learning_rate": 1.9996050992202402e-07, |
| "loss": 0.0045, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.01974607949556732, |
| "grad_norm": 0.6062852144241333, |
| "learning_rate": 1.9996042804029737e-07, |
| "loss": 0.0037, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.01976460302229862, |
| "grad_norm": 1.4890351295471191, |
| "learning_rate": 1.9996034607378553e-07, |
| "loss": 0.0043, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.019783126549029924, |
| "grad_norm": 0.7631430625915527, |
| "learning_rate": 1.9996026402248857e-07, |
| "loss": 0.0034, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.019801650075761224, |
| "grad_norm": 0.982003390789032, |
| "learning_rate": 1.9996018188640655e-07, |
| "loss": 0.0045, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.019820173602492525, |
| "grad_norm": 1.317332148551941, |
| "learning_rate": 1.9996009966553953e-07, |
| "loss": 0.0044, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.01983869712922383, |
| "grad_norm": 1.2513245344161987, |
| "learning_rate": 1.9996001735988758e-07, |
| "loss": 0.0035, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.01985722065595513, |
| "grad_norm": 0.8831415176391602, |
| "learning_rate": 1.9995993496945078e-07, |
| "loss": 0.0037, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.01987574418268643, |
| "grad_norm": 0.8434158563613892, |
| "learning_rate": 1.999598524942292e-07, |
| "loss": 0.0047, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.019894267709417733, |
| "grad_norm": 0.7173445820808411, |
| "learning_rate": 1.9995976993422293e-07, |
| "loss": 0.0039, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.019912791236149033, |
| "grad_norm": 0.6487358808517456, |
| "learning_rate": 1.9995968728943198e-07, |
| "loss": 0.0037, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.019931314762880334, |
| "grad_norm": 0.4218233525753021, |
| "learning_rate": 1.9995960455985648e-07, |
| "loss": 0.004, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.019949838289611634, |
| "grad_norm": 0.9249664545059204, |
| "learning_rate": 1.999595217454965e-07, |
| "loss": 0.004, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.019968361816342938, |
| "grad_norm": 1.5009821653366089, |
| "learning_rate": 1.9995943884635204e-07, |
| "loss": 0.0047, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.01998688534307424, |
| "grad_norm": 0.2918950617313385, |
| "learning_rate": 1.9995935586242323e-07, |
| "loss": 0.0043, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.02000540886980554, |
| "grad_norm": 0.6740665435791016, |
| "learning_rate": 1.9995927279371014e-07, |
| "loss": 0.0035, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.020023932396536843, |
| "grad_norm": 0.47994542121887207, |
| "learning_rate": 1.999591896402128e-07, |
| "loss": 0.0039, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.020042455923268143, |
| "grad_norm": 1.5067847967147827, |
| "learning_rate": 1.9995910640193133e-07, |
| "loss": 0.0045, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.020060979449999444, |
| "grad_norm": 1.0457830429077148, |
| "learning_rate": 1.999590230788658e-07, |
| "loss": 0.0036, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.020079502976730747, |
| "grad_norm": 0.6851208209991455, |
| "learning_rate": 1.9995893967101626e-07, |
| "loss": 0.0054, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.020098026503462048, |
| "grad_norm": 1.1617788076400757, |
| "learning_rate": 1.9995885617838276e-07, |
| "loss": 0.0046, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.020116550030193348, |
| "grad_norm": 1.1798062324523926, |
| "learning_rate": 1.9995877260096542e-07, |
| "loss": 0.0046, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.02013507355692465, |
| "grad_norm": 0.1883193999528885, |
| "learning_rate": 1.9995868893876424e-07, |
| "loss": 0.0034, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.020153597083655952, |
| "grad_norm": 3.4635565280914307, |
| "learning_rate": 1.9995860519177937e-07, |
| "loss": 0.0047, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.020172120610387253, |
| "grad_norm": 1.7969893217086792, |
| "learning_rate": 1.9995852136001085e-07, |
| "loss": 0.0036, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.020190644137118553, |
| "grad_norm": 0.934319019317627, |
| "learning_rate": 1.999584374434587e-07, |
| "loss": 0.0041, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.020209167663849857, |
| "grad_norm": 1.155469298362732, |
| "learning_rate": 1.9995835344212307e-07, |
| "loss": 0.0036, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.020227691190581158, |
| "grad_norm": 0.42699894309043884, |
| "learning_rate": 1.99958269356004e-07, |
| "loss": 0.0041, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.020246214717312458, |
| "grad_norm": 1.128645896911621, |
| "learning_rate": 1.9995818518510156e-07, |
| "loss": 0.0049, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.02026473824404376, |
| "grad_norm": 0.4376215636730194, |
| "learning_rate": 1.999581009294158e-07, |
| "loss": 0.0039, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.020283261770775062, |
| "grad_norm": 0.518243670463562, |
| "learning_rate": 1.9995801658894685e-07, |
| "loss": 0.0051, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.020301785297506363, |
| "grad_norm": 0.47851717472076416, |
| "learning_rate": 1.999579321636947e-07, |
| "loss": 0.0033, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.020320308824237663, |
| "grad_norm": 0.989443838596344, |
| "learning_rate": 1.999578476536595e-07, |
| "loss": 0.0049, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.020338832350968967, |
| "grad_norm": 1.676496148109436, |
| "learning_rate": 1.999577630588413e-07, |
| "loss": 0.0046, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.020357355877700267, |
| "grad_norm": 0.8464547395706177, |
| "learning_rate": 1.9995767837924015e-07, |
| "loss": 0.0033, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.020375879404431568, |
| "grad_norm": 0.19645555317401886, |
| "learning_rate": 1.9995759361485608e-07, |
| "loss": 0.0047, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.02039440293116287, |
| "grad_norm": 1.6279752254486084, |
| "learning_rate": 1.9995750876568926e-07, |
| "loss": 0.0052, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.020412926457894172, |
| "grad_norm": 0.9186310172080994, |
| "learning_rate": 1.9995742383173974e-07, |
| "loss": 0.0043, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.020431449984625472, |
| "grad_norm": 0.6073471307754517, |
| "learning_rate": 1.999573388130075e-07, |
| "loss": 0.0046, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.020449973511356773, |
| "grad_norm": 2.026857852935791, |
| "learning_rate": 1.9995725370949273e-07, |
| "loss": 0.0042, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.020468497038088077, |
| "grad_norm": 1.1785808801651, |
| "learning_rate": 1.999571685211954e-07, |
| "loss": 0.0041, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.020487020564819377, |
| "grad_norm": 1.0623115301132202, |
| "learning_rate": 1.999570832481157e-07, |
| "loss": 0.0036, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.020505544091550677, |
| "grad_norm": 1.5273675918579102, |
| "learning_rate": 1.999569978902536e-07, |
| "loss": 0.0049, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.02052406761828198, |
| "grad_norm": 0.8437715172767639, |
| "learning_rate": 1.999569124476092e-07, |
| "loss": 0.0045, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.02054259114501328, |
| "grad_norm": 0.3356923460960388, |
| "learning_rate": 1.999568269201826e-07, |
| "loss": 0.004, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.020561114671744582, |
| "grad_norm": 2.1886203289031982, |
| "learning_rate": 1.9995674130797386e-07, |
| "loss": 0.0051, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.020579638198475886, |
| "grad_norm": 0.5572504997253418, |
| "learning_rate": 1.9995665561098304e-07, |
| "loss": 0.0041, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.020598161725207186, |
| "grad_norm": 0.7014231085777283, |
| "learning_rate": 1.999565698292102e-07, |
| "loss": 0.0032, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.020616685251938487, |
| "grad_norm": 1.1279445886611938, |
| "learning_rate": 1.9995648396265546e-07, |
| "loss": 0.004, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.020635208778669787, |
| "grad_norm": 1.4305812120437622, |
| "learning_rate": 1.9995639801131886e-07, |
| "loss": 0.0041, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.02065373230540109, |
| "grad_norm": 1.9915997982025146, |
| "learning_rate": 1.9995631197520045e-07, |
| "loss": 0.005, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.02067225583213239, |
| "grad_norm": 1.9734001159667969, |
| "learning_rate": 1.9995622585430035e-07, |
| "loss": 0.0032, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.02069077935886369, |
| "grad_norm": 1.1925320625305176, |
| "learning_rate": 1.9995613964861862e-07, |
| "loss": 0.0051, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.020709302885594996, |
| "grad_norm": 0.3007209599018097, |
| "learning_rate": 1.9995605335815534e-07, |
| "loss": 0.0036, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.020727826412326296, |
| "grad_norm": 2.914504051208496, |
| "learning_rate": 1.999559669829105e-07, |
| "loss": 0.0045, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.020746349939057596, |
| "grad_norm": 0.5375096797943115, |
| "learning_rate": 1.999558805228843e-07, |
| "loss": 0.0035, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.0207648734657889, |
| "grad_norm": 0.8085628151893616, |
| "learning_rate": 1.9995579397807676e-07, |
| "loss": 0.0035, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.0207833969925202, |
| "grad_norm": 1.687476634979248, |
| "learning_rate": 1.9995570734848793e-07, |
| "loss": 0.0039, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.0208019205192515, |
| "grad_norm": 1.7321419715881348, |
| "learning_rate": 1.9995562063411792e-07, |
| "loss": 0.0035, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.0208204440459828, |
| "grad_norm": 0.46695607900619507, |
| "learning_rate": 1.9995553383496677e-07, |
| "loss": 0.0041, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.020838967572714105, |
| "grad_norm": 0.5256772041320801, |
| "learning_rate": 1.9995544695103459e-07, |
| "loss": 0.003, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.020857491099445406, |
| "grad_norm": 0.8563908338546753, |
| "learning_rate": 1.9995535998232142e-07, |
| "loss": 0.0033, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.020876014626176706, |
| "grad_norm": 0.9469535946846008, |
| "learning_rate": 1.9995527292882735e-07, |
| "loss": 0.0048, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.02089453815290801, |
| "grad_norm": 0.7452173233032227, |
| "learning_rate": 1.9995518579055245e-07, |
| "loss": 0.0033, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.02091306167963931, |
| "grad_norm": 1.2956979274749756, |
| "learning_rate": 1.999550985674968e-07, |
| "loss": 0.0044, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.02093158520637061, |
| "grad_norm": 0.8553891777992249, |
| "learning_rate": 1.9995501125966044e-07, |
| "loss": 0.0039, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.02095010873310191, |
| "grad_norm": 0.4210663139820099, |
| "learning_rate": 1.9995492386704352e-07, |
| "loss": 0.0033, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.020968632259833215, |
| "grad_norm": 1.3937798738479614, |
| "learning_rate": 1.9995483638964604e-07, |
| "loss": 0.0063, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.020987155786564515, |
| "grad_norm": 0.3456120789051056, |
| "learning_rate": 1.9995474882746813e-07, |
| "loss": 0.0036, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.021005679313295816, |
| "grad_norm": 0.3505333364009857, |
| "learning_rate": 1.9995466118050982e-07, |
| "loss": 0.0024, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.02102420284002712, |
| "grad_norm": 1.0481879711151123, |
| "learning_rate": 1.999545734487712e-07, |
| "loss": 0.0041, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.02104272636675842, |
| "grad_norm": 0.49380356073379517, |
| "learning_rate": 1.9995448563225232e-07, |
| "loss": 0.0049, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.02106124989348972, |
| "grad_norm": 2.0820581912994385, |
| "learning_rate": 1.9995439773095328e-07, |
| "loss": 0.0043, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.021079773420221024, |
| "grad_norm": 1.0408623218536377, |
| "learning_rate": 1.9995430974487418e-07, |
| "loss": 0.004, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.021098296946952325, |
| "grad_norm": 1.0584180355072021, |
| "learning_rate": 1.9995422167401506e-07, |
| "loss": 0.0042, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.021116820473683625, |
| "grad_norm": 0.9139922261238098, |
| "learning_rate": 1.99954133518376e-07, |
| "loss": 0.0037, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.021135344000414925, |
| "grad_norm": 1.7950440645217896, |
| "learning_rate": 1.999540452779571e-07, |
| "loss": 0.0056, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.02115386752714623, |
| "grad_norm": 1.1674875020980835, |
| "learning_rate": 1.999539569527584e-07, |
| "loss": 0.004, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.02117239105387753, |
| "grad_norm": 1.2293630838394165, |
| "learning_rate": 1.9995386854277997e-07, |
| "loss": 0.0052, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.02119091458060883, |
| "grad_norm": 0.32438477873802185, |
| "learning_rate": 1.999537800480219e-07, |
| "loss": 0.0036, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.021209438107340134, |
| "grad_norm": 0.5731669664382935, |
| "learning_rate": 1.999536914684843e-07, |
| "loss": 0.0036, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.021227961634071434, |
| "grad_norm": 1.1910243034362793, |
| "learning_rate": 1.999536028041672e-07, |
| "loss": 0.003, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.021246485160802735, |
| "grad_norm": 0.6449489593505859, |
| "learning_rate": 1.9995351405507067e-07, |
| "loss": 0.0034, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.02126500868753404, |
| "grad_norm": 0.6353984475135803, |
| "learning_rate": 1.9995342522119484e-07, |
| "loss": 0.0042, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.02128353221426534, |
| "grad_norm": 1.6719144582748413, |
| "learning_rate": 1.9995333630253973e-07, |
| "loss": 0.0043, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.02130205574099664, |
| "grad_norm": 0.8018255829811096, |
| "learning_rate": 1.9995324729910543e-07, |
| "loss": 0.0039, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.02132057926772794, |
| "grad_norm": 0.657651424407959, |
| "learning_rate": 1.9995315821089202e-07, |
| "loss": 0.0054, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.021339102794459244, |
| "grad_norm": 1.2621873617172241, |
| "learning_rate": 1.999530690378996e-07, |
| "loss": 0.0038, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.021357626321190544, |
| "grad_norm": 0.12901923060417175, |
| "learning_rate": 1.9995297978012816e-07, |
| "loss": 0.0039, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.021376149847921844, |
| "grad_norm": 0.2438955456018448, |
| "learning_rate": 1.999528904375779e-07, |
| "loss": 0.0031, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.021394673374653148, |
| "grad_norm": 1.6099838018417358, |
| "learning_rate": 1.9995280101024882e-07, |
| "loss": 0.0043, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.02141319690138445, |
| "grad_norm": 0.3221456706523895, |
| "learning_rate": 1.99952711498141e-07, |
| "loss": 0.004, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.02143172042811575, |
| "grad_norm": 0.9431011080741882, |
| "learning_rate": 1.9995262190125454e-07, |
| "loss": 0.0037, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.021450243954847053, |
| "grad_norm": 0.3925634026527405, |
| "learning_rate": 1.9995253221958947e-07, |
| "loss": 0.0031, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.021468767481578353, |
| "grad_norm": 0.8866441249847412, |
| "learning_rate": 1.9995244245314588e-07, |
| "loss": 0.0039, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.021487291008309654, |
| "grad_norm": 0.8010444641113281, |
| "learning_rate": 1.9995235260192392e-07, |
| "loss": 0.0042, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.021505814535040954, |
| "grad_norm": 1.806249737739563, |
| "learning_rate": 1.9995226266592355e-07, |
| "loss": 0.0043, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.021524338061772258, |
| "grad_norm": 1.1026357412338257, |
| "learning_rate": 1.9995217264514495e-07, |
| "loss": 0.006, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.02154286158850356, |
| "grad_norm": 1.4329309463500977, |
| "learning_rate": 1.9995208253958812e-07, |
| "loss": 0.0035, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.02156138511523486, |
| "grad_norm": 1.2971662282943726, |
| "learning_rate": 1.999519923492532e-07, |
| "loss": 0.0042, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.021579908641966163, |
| "grad_norm": 0.968996524810791, |
| "learning_rate": 1.9995190207414022e-07, |
| "loss": 0.003, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.021598432168697463, |
| "grad_norm": 0.8942487835884094, |
| "learning_rate": 1.9995181171424928e-07, |
| "loss": 0.0056, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.021616955695428763, |
| "grad_norm": 1.7549582719802856, |
| "learning_rate": 1.999517212695804e-07, |
| "loss": 0.0024, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.021635479222160064, |
| "grad_norm": 5.932610511779785, |
| "learning_rate": 1.9995163074013376e-07, |
| "loss": 0.0046, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.021654002748891368, |
| "grad_norm": 1.0635918378829956, |
| "learning_rate": 1.9995154012590934e-07, |
| "loss": 0.0044, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.021672526275622668, |
| "grad_norm": 0.6824076175689697, |
| "learning_rate": 1.9995144942690728e-07, |
| "loss": 0.004, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.02169104980235397, |
| "grad_norm": 1.1098347902297974, |
| "learning_rate": 1.9995135864312762e-07, |
| "loss": 0.0045, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.021709573329085272, |
| "grad_norm": 1.632853388786316, |
| "learning_rate": 1.9995126777457047e-07, |
| "loss": 0.0048, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.021728096855816573, |
| "grad_norm": 0.6560743451118469, |
| "learning_rate": 1.999511768212359e-07, |
| "loss": 0.0033, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.021746620382547873, |
| "grad_norm": 0.44074228405952454, |
| "learning_rate": 1.9995108578312397e-07, |
| "loss": 0.0044, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.021765143909279177, |
| "grad_norm": 1.107337474822998, |
| "learning_rate": 1.9995099466023473e-07, |
| "loss": 0.006, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.021783667436010477, |
| "grad_norm": 0.2580069601535797, |
| "learning_rate": 1.9995090345256833e-07, |
| "loss": 0.0036, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.021802190962741778, |
| "grad_norm": 0.29794543981552124, |
| "learning_rate": 1.9995081216012477e-07, |
| "loss": 0.0038, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.021820714489473078, |
| "grad_norm": 1.8231271505355835, |
| "learning_rate": 1.999507207829042e-07, |
| "loss": 0.0052, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.021839238016204382, |
| "grad_norm": 1.1275067329406738, |
| "learning_rate": 1.9995062932090666e-07, |
| "loss": 0.0037, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.021857761542935682, |
| "grad_norm": 0.6289139986038208, |
| "learning_rate": 1.999505377741322e-07, |
| "loss": 0.0044, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.021876285069666983, |
| "grad_norm": 1.1204489469528198, |
| "learning_rate": 1.9995044614258094e-07, |
| "loss": 0.0039, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.021894808596398287, |
| "grad_norm": 0.9327753782272339, |
| "learning_rate": 1.9995035442625295e-07, |
| "loss": 0.0035, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.021913332123129587, |
| "grad_norm": 0.6412800550460815, |
| "learning_rate": 1.999502626251483e-07, |
| "loss": 0.004, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.021931855649860887, |
| "grad_norm": 1.2296700477600098, |
| "learning_rate": 1.999501707392671e-07, |
| "loss": 0.0042, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.02195037917659219, |
| "grad_norm": 0.3419044315814972, |
| "learning_rate": 1.9995007876860937e-07, |
| "loss": 0.0036, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.02196890270332349, |
| "grad_norm": 1.1582615375518799, |
| "learning_rate": 1.9994998671317523e-07, |
| "loss": 0.0043, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.021987426230054792, |
| "grad_norm": 0.8223651647567749, |
| "learning_rate": 1.9994989457296474e-07, |
| "loss": 0.0034, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.022005949756786092, |
| "grad_norm": 1.3145171403884888, |
| "learning_rate": 1.9994980234797798e-07, |
| "loss": 0.0037, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.022024473283517396, |
| "grad_norm": 0.437412828207016, |
| "learning_rate": 1.9994971003821502e-07, |
| "loss": 0.0056, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.022042996810248697, |
| "grad_norm": 0.2918112576007843, |
| "learning_rate": 1.9994961764367598e-07, |
| "loss": 0.0041, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.022061520336979997, |
| "grad_norm": 0.9091414213180542, |
| "learning_rate": 1.9994952516436088e-07, |
| "loss": 0.0049, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.0220800438637113, |
| "grad_norm": 0.36367067694664, |
| "learning_rate": 1.9994943260026985e-07, |
| "loss": 0.0045, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.0220985673904426, |
| "grad_norm": 1.018792986869812, |
| "learning_rate": 1.9994933995140292e-07, |
| "loss": 0.0039, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.022117090917173902, |
| "grad_norm": 1.392177939414978, |
| "learning_rate": 1.9994924721776021e-07, |
| "loss": 0.0042, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.022135614443905206, |
| "grad_norm": 14.086770057678223, |
| "learning_rate": 1.9994915439934177e-07, |
| "loss": 0.0041, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.022154137970636506, |
| "grad_norm": 0.6419123411178589, |
| "learning_rate": 1.9994906149614772e-07, |
| "loss": 0.005, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.022172661497367806, |
| "grad_norm": 1.7256454229354858, |
| "learning_rate": 1.9994896850817808e-07, |
| "loss": 0.004, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.022191185024099107, |
| "grad_norm": 0.2731253206729889, |
| "learning_rate": 1.99948875435433e-07, |
| "loss": 0.0042, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.02220970855083041, |
| "grad_norm": 1.2516132593154907, |
| "learning_rate": 1.9994878227791245e-07, |
| "loss": 0.005, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.02222823207756171, |
| "grad_norm": 0.5635455250740051, |
| "learning_rate": 1.9994868903561665e-07, |
| "loss": 0.0044, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.02224675560429301, |
| "grad_norm": 0.40112847089767456, |
| "learning_rate": 1.9994859570854557e-07, |
| "loss": 0.0041, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.022265279131024315, |
| "grad_norm": 0.7097703218460083, |
| "learning_rate": 1.9994850229669932e-07, |
| "loss": 0.0038, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.022283802657755616, |
| "grad_norm": 0.40352827310562134, |
| "learning_rate": 1.9994840880007798e-07, |
| "loss": 0.0044, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.022302326184486916, |
| "grad_norm": 0.8182700872421265, |
| "learning_rate": 1.9994831521868166e-07, |
| "loss": 0.0043, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.02232084971121822, |
| "grad_norm": 2.1451284885406494, |
| "learning_rate": 1.999482215525104e-07, |
| "loss": 0.0053, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.02233937323794952, |
| "grad_norm": 0.8694214820861816, |
| "learning_rate": 1.9994812780156427e-07, |
| "loss": 0.0036, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.02235789676468082, |
| "grad_norm": 0.893051266670227, |
| "learning_rate": 1.999480339658434e-07, |
| "loss": 0.0034, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.02237642029141212, |
| "grad_norm": 1.4941633939743042, |
| "learning_rate": 1.9994794004534782e-07, |
| "loss": 0.0036, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.022394943818143425, |
| "grad_norm": 1.3479055166244507, |
| "learning_rate": 1.999478460400777e-07, |
| "loss": 0.0045, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.022413467344874725, |
| "grad_norm": 0.921055793762207, |
| "learning_rate": 1.9994775195003296e-07, |
| "loss": 0.0041, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.022431990871606026, |
| "grad_norm": 0.5856291055679321, |
| "learning_rate": 1.999476577752138e-07, |
| "loss": 0.0047, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.02245051439833733, |
| "grad_norm": 0.7620528340339661, |
| "learning_rate": 1.999475635156203e-07, |
| "loss": 0.0044, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.02246903792506863, |
| "grad_norm": 1.5510215759277344, |
| "learning_rate": 1.9994746917125248e-07, |
| "loss": 0.0048, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.02248756145179993, |
| "grad_norm": 1.1489896774291992, |
| "learning_rate": 1.9994737474211046e-07, |
| "loss": 0.0041, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.02250608497853123, |
| "grad_norm": 0.6117718815803528, |
| "learning_rate": 1.9994728022819432e-07, |
| "loss": 0.0041, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.022524608505262535, |
| "grad_norm": 1.8428627252578735, |
| "learning_rate": 1.9994718562950413e-07, |
| "loss": 0.0041, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.022543132031993835, |
| "grad_norm": 0.9782434701919556, |
| "learning_rate": 1.9994709094603995e-07, |
| "loss": 0.0038, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.022561655558725136, |
| "grad_norm": 1.3487772941589355, |
| "learning_rate": 1.9994699617780187e-07, |
| "loss": 0.0047, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.02258017908545644, |
| "grad_norm": 0.7518923878669739, |
| "learning_rate": 1.9994690132479004e-07, |
| "loss": 0.0041, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.02259870261218774, |
| "grad_norm": 1.5131444931030273, |
| "learning_rate": 1.9994680638700445e-07, |
| "loss": 0.0039, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.02261722613891904, |
| "grad_norm": 0.9053683876991272, |
| "learning_rate": 1.999467113644452e-07, |
| "loss": 0.0045, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.022635749665650344, |
| "grad_norm": 1.0087581872940063, |
| "learning_rate": 1.999466162571124e-07, |
| "loss": 0.0037, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.022654273192381644, |
| "grad_norm": 0.3778531551361084, |
| "learning_rate": 1.9994652106500612e-07, |
| "loss": 0.0031, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.022672796719112945, |
| "grad_norm": 0.8948971629142761, |
| "learning_rate": 1.999464257881264e-07, |
| "loss": 0.0037, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.022691320245844245, |
| "grad_norm": 2.014846086502075, |
| "learning_rate": 1.9994633042647337e-07, |
| "loss": 0.0041, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.02270984377257555, |
| "grad_norm": 1.185621738433838, |
| "learning_rate": 1.9994623498004712e-07, |
| "loss": 0.0043, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.02272836729930685, |
| "grad_norm": 1.1489503383636475, |
| "learning_rate": 1.9994613944884772e-07, |
| "loss": 0.0041, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.02274689082603815, |
| "grad_norm": 0.6679458022117615, |
| "learning_rate": 1.999460438328752e-07, |
| "loss": 0.0044, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.022765414352769454, |
| "grad_norm": 4.611051082611084, |
| "learning_rate": 1.9994594813212968e-07, |
| "loss": 0.0045, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.022783937879500754, |
| "grad_norm": 0.8402919769287109, |
| "learning_rate": 1.9994585234661126e-07, |
| "loss": 0.0034, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.022802461406232055, |
| "grad_norm": 0.7501224875450134, |
| "learning_rate": 1.9994575647632e-07, |
| "loss": 0.0037, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.02282098493296336, |
| "grad_norm": 0.6108946204185486, |
| "learning_rate": 1.99945660521256e-07, |
| "loss": 0.004, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.02283950845969466, |
| "grad_norm": 0.3673897087574005, |
| "learning_rate": 1.999455644814193e-07, |
| "loss": 0.0043, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.02285803198642596, |
| "grad_norm": 0.6609338521957397, |
| "learning_rate": 1.9994546835681e-07, |
| "loss": 0.0042, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.02287655551315726, |
| "grad_norm": 0.47323575615882874, |
| "learning_rate": 1.9994537214742818e-07, |
| "loss": 0.0045, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.022895079039888563, |
| "grad_norm": 0.5024768710136414, |
| "learning_rate": 1.9994527585327394e-07, |
| "loss": 0.0055, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.022913602566619864, |
| "grad_norm": 1.6143661737442017, |
| "learning_rate": 1.9994517947434737e-07, |
| "loss": 0.0065, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.022932126093351164, |
| "grad_norm": 1.2490456104278564, |
| "learning_rate": 1.9994508301064852e-07, |
| "loss": 0.0043, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.022950649620082468, |
| "grad_norm": 0.7850220799446106, |
| "learning_rate": 1.9994498646217748e-07, |
| "loss": 0.0038, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.02296917314681377, |
| "grad_norm": 0.8535389304161072, |
| "learning_rate": 1.9994488982893434e-07, |
| "loss": 0.0043, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.02298769667354507, |
| "grad_norm": 1.0304555892944336, |
| "learning_rate": 1.9994479311091917e-07, |
| "loss": 0.0047, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.023006220200276373, |
| "grad_norm": 0.9606121182441711, |
| "learning_rate": 1.999446963081321e-07, |
| "loss": 0.0031, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.023024743727007673, |
| "grad_norm": 0.4527212679386139, |
| "learning_rate": 1.9994459942057312e-07, |
| "loss": 0.0051, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.023043267253738973, |
| "grad_norm": 1.3798104524612427, |
| "learning_rate": 1.9994450244824243e-07, |
| "loss": 0.0039, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.023061790780470274, |
| "grad_norm": 0.7217701077461243, |
| "learning_rate": 1.9994440539113998e-07, |
| "loss": 0.0033, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.023080314307201578, |
| "grad_norm": 0.9752712845802307, |
| "learning_rate": 1.9994430824926593e-07, |
| "loss": 0.0049, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.023098837833932878, |
| "grad_norm": 0.7819736003875732, |
| "learning_rate": 1.999442110226204e-07, |
| "loss": 0.0049, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.02311736136066418, |
| "grad_norm": 3.0538058280944824, |
| "learning_rate": 1.9994411371120337e-07, |
| "loss": 0.0038, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.023135884887395482, |
| "grad_norm": 1.0759543180465698, |
| "learning_rate": 1.99944016315015e-07, |
| "loss": 0.0039, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.023154408414126783, |
| "grad_norm": 0.9482446312904358, |
| "learning_rate": 1.9994391883405534e-07, |
| "loss": 0.0034, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.023172931940858083, |
| "grad_norm": 0.798263669013977, |
| "learning_rate": 1.9994382126832447e-07, |
| "loss": 0.006, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.023191455467589384, |
| "grad_norm": 0.7347808480262756, |
| "learning_rate": 1.9994372361782253e-07, |
| "loss": 0.0041, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.023209978994320687, |
| "grad_norm": 0.8049002289772034, |
| "learning_rate": 1.9994362588254954e-07, |
| "loss": 0.0042, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.023228502521051988, |
| "grad_norm": 1.1502327919006348, |
| "learning_rate": 1.9994352806250557e-07, |
| "loss": 0.0041, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.023247026047783288, |
| "grad_norm": 0.403735488653183, |
| "learning_rate": 1.9994343015769078e-07, |
| "loss": 0.0052, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.023265549574514592, |
| "grad_norm": 0.20620794594287872, |
| "learning_rate": 1.9994333216810517e-07, |
| "loss": 0.0036, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.023284073101245892, |
| "grad_norm": 8.42691421508789, |
| "learning_rate": 1.9994323409374885e-07, |
| "loss": 0.0059, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.023302596627977193, |
| "grad_norm": 0.974631130695343, |
| "learning_rate": 1.9994313593462194e-07, |
| "loss": 0.0034, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.023321120154708497, |
| "grad_norm": 0.4839624762535095, |
| "learning_rate": 1.9994303769072449e-07, |
| "loss": 0.0032, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.023339643681439797, |
| "grad_norm": 1.1262454986572266, |
| "learning_rate": 1.999429393620566e-07, |
| "loss": 0.004, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.023358167208171098, |
| "grad_norm": 1.2690633535385132, |
| "learning_rate": 1.9994284094861833e-07, |
| "loss": 0.0049, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.023376690734902398, |
| "grad_norm": 1.2983993291854858, |
| "learning_rate": 1.999427424504098e-07, |
| "loss": 0.0038, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.023395214261633702, |
| "grad_norm": 0.4273400902748108, |
| "learning_rate": 1.9994264386743102e-07, |
| "loss": 0.0043, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.023413737788365002, |
| "grad_norm": 1.6379945278167725, |
| "learning_rate": 1.9994254519968216e-07, |
| "loss": 0.0043, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.023432261315096303, |
| "grad_norm": 0.7200930118560791, |
| "learning_rate": 1.9994244644716326e-07, |
| "loss": 0.0055, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.023450784841827606, |
| "grad_norm": 0.7471675872802734, |
| "learning_rate": 1.999423476098744e-07, |
| "loss": 0.0048, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.023469308368558907, |
| "grad_norm": 1.360355257987976, |
| "learning_rate": 1.999422486878157e-07, |
| "loss": 0.005, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.023487831895290207, |
| "grad_norm": 2.2988743782043457, |
| "learning_rate": 1.999421496809872e-07, |
| "loss": 0.0043, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.02350635542202151, |
| "grad_norm": 0.7278249263763428, |
| "learning_rate": 1.99942050589389e-07, |
| "loss": 0.004, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.02352487894875281, |
| "grad_norm": 0.9349688291549683, |
| "learning_rate": 1.999419514130212e-07, |
| "loss": 0.0053, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.023543402475484112, |
| "grad_norm": 0.4226296842098236, |
| "learning_rate": 1.9994185215188386e-07, |
| "loss": 0.0031, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.023561926002215412, |
| "grad_norm": 3.6751651763916016, |
| "learning_rate": 1.9994175280597708e-07, |
| "loss": 0.0052, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.023580449528946716, |
| "grad_norm": 0.28604334592819214, |
| "learning_rate": 1.9994165337530094e-07, |
| "loss": 0.004, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.023598973055678017, |
| "grad_norm": 1.5660161972045898, |
| "learning_rate": 1.9994155385985552e-07, |
| "loss": 0.0038, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.023617496582409317, |
| "grad_norm": 0.797073483467102, |
| "learning_rate": 1.999414542596409e-07, |
| "loss": 0.0039, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.02363602010914062, |
| "grad_norm": 1.3645159006118774, |
| "learning_rate": 1.9994135457465719e-07, |
| "loss": 0.0039, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.02365454363587192, |
| "grad_norm": 3.588331937789917, |
| "learning_rate": 1.9994125480490444e-07, |
| "loss": 0.0035, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.02367306716260322, |
| "grad_norm": 0.4760388731956482, |
| "learning_rate": 1.9994115495038278e-07, |
| "loss": 0.0041, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.023691590689334525, |
| "grad_norm": 1.312637448310852, |
| "learning_rate": 1.9994105501109223e-07, |
| "loss": 0.0041, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.023710114216065826, |
| "grad_norm": 0.7631438374519348, |
| "learning_rate": 1.9994095498703293e-07, |
| "loss": 0.004, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.023728637742797126, |
| "grad_norm": 1.3392548561096191, |
| "learning_rate": 1.9994085487820495e-07, |
| "loss": 0.0045, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.023747161269528427, |
| "grad_norm": 0.7242027521133423, |
| "learning_rate": 1.9994075468460836e-07, |
| "loss": 0.0038, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.02376568479625973, |
| "grad_norm": 0.9271637201309204, |
| "learning_rate": 1.999406544062433e-07, |
| "loss": 0.005, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.02378420832299103, |
| "grad_norm": 0.7944082021713257, |
| "learning_rate": 1.9994055404310974e-07, |
| "loss": 0.0053, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.02380273184972233, |
| "grad_norm": 0.7931725978851318, |
| "learning_rate": 1.9994045359520789e-07, |
| "loss": 0.0032, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.023821255376453635, |
| "grad_norm": 1.214794635772705, |
| "learning_rate": 1.9994035306253773e-07, |
| "loss": 0.0038, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.023839778903184936, |
| "grad_norm": 0.6131728887557983, |
| "learning_rate": 1.9994025244509945e-07, |
| "loss": 0.0036, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.023858302429916236, |
| "grad_norm": 0.4505075514316559, |
| "learning_rate": 1.9994015174289305e-07, |
| "loss": 0.0043, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.023876825956647536, |
| "grad_norm": 0.7889305353164673, |
| "learning_rate": 1.9994005095591863e-07, |
| "loss": 0.0044, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.02389534948337884, |
| "grad_norm": 0.7913212180137634, |
| "learning_rate": 1.9993995008417634e-07, |
| "loss": 0.0045, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.02391387301011014, |
| "grad_norm": 1.411206603050232, |
| "learning_rate": 1.9993984912766617e-07, |
| "loss": 0.0044, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.02393239653684144, |
| "grad_norm": 3.236736297607422, |
| "learning_rate": 1.999397480863883e-07, |
| "loss": 0.0047, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.023950920063572745, |
| "grad_norm": 1.022062063217163, |
| "learning_rate": 1.9993964696034276e-07, |
| "loss": 0.0055, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.023969443590304045, |
| "grad_norm": 1.1789883375167847, |
| "learning_rate": 1.999395457495296e-07, |
| "loss": 0.0037, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.023987967117035346, |
| "grad_norm": 1.1766873598098755, |
| "learning_rate": 1.9993944445394901e-07, |
| "loss": 0.0042, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.02400649064376665, |
| "grad_norm": 2.5113847255706787, |
| "learning_rate": 1.99939343073601e-07, |
| "loss": 0.0035, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.02402501417049795, |
| "grad_norm": 1.2734301090240479, |
| "learning_rate": 1.9993924160848565e-07, |
| "loss": 0.0045, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.02404353769722925, |
| "grad_norm": 0.2985021471977234, |
| "learning_rate": 1.9993914005860312e-07, |
| "loss": 0.0036, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.02406206122396055, |
| "grad_norm": 0.7399972677230835, |
| "learning_rate": 1.999390384239534e-07, |
| "loss": 0.0035, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.024080584750691855, |
| "grad_norm": 0.5462217330932617, |
| "learning_rate": 1.999389367045366e-07, |
| "loss": 0.0028, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.024099108277423155, |
| "grad_norm": 1.5863651037216187, |
| "learning_rate": 1.9993883490035289e-07, |
| "loss": 0.005, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.024117631804154455, |
| "grad_norm": 0.902741551399231, |
| "learning_rate": 1.9993873301140224e-07, |
| "loss": 0.0047, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.02413615533088576, |
| "grad_norm": 0.3167039155960083, |
| "learning_rate": 1.9993863103768483e-07, |
| "loss": 0.0052, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.02415467885761706, |
| "grad_norm": 0.7409302592277527, |
| "learning_rate": 1.999385289792007e-07, |
| "loss": 0.0037, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.02417320238434836, |
| "grad_norm": 0.5789228081703186, |
| "learning_rate": 1.9993842683594993e-07, |
| "loss": 0.0036, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.024191725911079664, |
| "grad_norm": 0.9407364726066589, |
| "learning_rate": 1.999383246079326e-07, |
| "loss": 0.0032, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.024210249437810964, |
| "grad_norm": 0.930705189704895, |
| "learning_rate": 1.9993822229514885e-07, |
| "loss": 0.0033, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.024228772964542265, |
| "grad_norm": 0.973807692527771, |
| "learning_rate": 1.9993811989759873e-07, |
| "loss": 0.0035, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.024247296491273565, |
| "grad_norm": 2.007293701171875, |
| "learning_rate": 1.9993801741528234e-07, |
| "loss": 0.0048, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.02426582001800487, |
| "grad_norm": 0.8778340816497803, |
| "learning_rate": 1.9993791484819974e-07, |
| "loss": 0.0041, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.02428434354473617, |
| "grad_norm": 1.2206062078475952, |
| "learning_rate": 1.9993781219635103e-07, |
| "loss": 0.0029, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.02430286707146747, |
| "grad_norm": 1.1749815940856934, |
| "learning_rate": 1.9993770945973632e-07, |
| "loss": 0.0044, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.024321390598198774, |
| "grad_norm": 1.1433521509170532, |
| "learning_rate": 1.9993760663835566e-07, |
| "loss": 0.0033, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.024339914124930074, |
| "grad_norm": 1.854564905166626, |
| "learning_rate": 1.9993750373220916e-07, |
| "loss": 0.0035, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.024358437651661374, |
| "grad_norm": 2.1192049980163574, |
| "learning_rate": 1.9993740074129692e-07, |
| "loss": 0.0042, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.024376961178392678, |
| "grad_norm": 2.7676448822021484, |
| "learning_rate": 1.9993729766561902e-07, |
| "loss": 0.0058, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.02439548470512398, |
| "grad_norm": 4.022232532501221, |
| "learning_rate": 1.999371945051755e-07, |
| "loss": 0.0041, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.02441400823185528, |
| "grad_norm": 0.5549601316452026, |
| "learning_rate": 1.999370912599665e-07, |
| "loss": 0.0029, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.02443253175858658, |
| "grad_norm": 0.9859621524810791, |
| "learning_rate": 1.999369879299921e-07, |
| "loss": 0.0047, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.024451055285317883, |
| "grad_norm": 0.472397118806839, |
| "learning_rate": 1.999368845152524e-07, |
| "loss": 0.0037, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.024469578812049184, |
| "grad_norm": 0.3009524345397949, |
| "learning_rate": 1.9993678101574743e-07, |
| "loss": 0.0035, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.024488102338780484, |
| "grad_norm": 1.2662854194641113, |
| "learning_rate": 1.9993667743147733e-07, |
| "loss": 0.0054, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.024506625865511788, |
| "grad_norm": 0.7446502447128296, |
| "learning_rate": 1.9993657376244216e-07, |
| "loss": 0.0052, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.024525149392243088, |
| "grad_norm": 1.4077544212341309, |
| "learning_rate": 1.9993647000864207e-07, |
| "loss": 0.0065, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.02454367291897439, |
| "grad_norm": 0.30665475130081177, |
| "learning_rate": 1.9993636617007704e-07, |
| "loss": 0.0041, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.024562196445705693, |
| "grad_norm": 1.9413292407989502, |
| "learning_rate": 1.9993626224674726e-07, |
| "loss": 0.0039, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.024580719972436993, |
| "grad_norm": 0.8427108526229858, |
| "learning_rate": 1.9993615823865277e-07, |
| "loss": 0.0043, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.024599243499168293, |
| "grad_norm": 3.0078439712524414, |
| "learning_rate": 1.9993605414579365e-07, |
| "loss": 0.0046, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.024617767025899594, |
| "grad_norm": 1.311022400856018, |
| "learning_rate": 1.9993594996817e-07, |
| "loss": 0.0036, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.024636290552630898, |
| "grad_norm": 0.5277770757675171, |
| "learning_rate": 1.9993584570578194e-07, |
| "loss": 0.0034, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.024654814079362198, |
| "grad_norm": 2.953326463699341, |
| "learning_rate": 1.999357413586295e-07, |
| "loss": 0.0035, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.0246733376060935, |
| "grad_norm": 1.2214648723602295, |
| "learning_rate": 1.999356369267128e-07, |
| "loss": 0.0036, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.024691861132824802, |
| "grad_norm": 0.5046392679214478, |
| "learning_rate": 1.9993553241003194e-07, |
| "loss": 0.0049, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.024710384659556103, |
| "grad_norm": 0.5710066556930542, |
| "learning_rate": 1.99935427808587e-07, |
| "loss": 0.0039, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.024728908186287403, |
| "grad_norm": 0.4568794071674347, |
| "learning_rate": 1.9993532312237805e-07, |
| "loss": 0.0035, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.024747431713018703, |
| "grad_norm": 1.226789951324463, |
| "learning_rate": 1.999352183514052e-07, |
| "loss": 0.0055, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.024765955239750007, |
| "grad_norm": 0.3830243945121765, |
| "learning_rate": 1.9993511349566852e-07, |
| "loss": 0.0049, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.024784478766481308, |
| "grad_norm": 1.1660419702529907, |
| "learning_rate": 1.9993500855516813e-07, |
| "loss": 0.0036, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.024803002293212608, |
| "grad_norm": 0.5242053866386414, |
| "learning_rate": 1.999349035299041e-07, |
| "loss": 0.0043, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.024821525819943912, |
| "grad_norm": 1.0264207124710083, |
| "learning_rate": 1.999347984198765e-07, |
| "loss": 0.0037, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.024840049346675212, |
| "grad_norm": 0.546720564365387, |
| "learning_rate": 1.9993469322508542e-07, |
| "loss": 0.0032, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.024858572873406513, |
| "grad_norm": 1.5827056169509888, |
| "learning_rate": 1.9993458794553103e-07, |
| "loss": 0.0045, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.024877096400137817, |
| "grad_norm": 0.7910020351409912, |
| "learning_rate": 1.999344825812133e-07, |
| "loss": 0.003, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.024895619926869117, |
| "grad_norm": 2.7343554496765137, |
| "learning_rate": 1.9993437713213241e-07, |
| "loss": 0.0039, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.024914143453600417, |
| "grad_norm": 0.5539982318878174, |
| "learning_rate": 1.999342715982884e-07, |
| "loss": 0.0036, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.024932666980331718, |
| "grad_norm": 1.0445407629013062, |
| "learning_rate": 1.999341659796814e-07, |
| "loss": 0.0039, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.02495119050706302, |
| "grad_norm": 0.9071051478385925, |
| "learning_rate": 1.999340602763114e-07, |
| "loss": 0.0035, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.024969714033794322, |
| "grad_norm": 3.8790252208709717, |
| "learning_rate": 1.999339544881786e-07, |
| "loss": 0.0039, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.024988237560525622, |
| "grad_norm": 1.3649259805679321, |
| "learning_rate": 1.9993384861528312e-07, |
| "loss": 0.0043, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.025006761087256926, |
| "grad_norm": 1.1538264751434326, |
| "learning_rate": 1.999337426576249e-07, |
| "loss": 0.0046, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.025025284613988227, |
| "grad_norm": 0.8608886003494263, |
| "learning_rate": 1.9993363661520416e-07, |
| "loss": 0.0027, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.025043808140719527, |
| "grad_norm": 1.1931533813476562, |
| "learning_rate": 1.9993353048802093e-07, |
| "loss": 0.0047, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.02506233166745083, |
| "grad_norm": 0.46739956736564636, |
| "learning_rate": 1.999334242760753e-07, |
| "loss": 0.0039, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.02508085519418213, |
| "grad_norm": 0.8243370652198792, |
| "learning_rate": 1.999333179793674e-07, |
| "loss": 0.0039, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.02509937872091343, |
| "grad_norm": 0.9790375828742981, |
| "learning_rate": 1.9993321159789726e-07, |
| "loss": 0.0032, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.025117902247644732, |
| "grad_norm": 0.8523391485214233, |
| "learning_rate": 1.99933105131665e-07, |
| "loss": 0.0033, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.025136425774376036, |
| "grad_norm": 1.8698952198028564, |
| "learning_rate": 1.9993299858067077e-07, |
| "loss": 0.0039, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.025154949301107336, |
| "grad_norm": 1.440710186958313, |
| "learning_rate": 1.9993289194491456e-07, |
| "loss": 0.0037, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.025173472827838637, |
| "grad_norm": 1.831391453742981, |
| "learning_rate": 1.999327852243965e-07, |
| "loss": 0.0046, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.02519199635456994, |
| "grad_norm": 1.0586085319519043, |
| "learning_rate": 1.999326784191167e-07, |
| "loss": 0.004, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.02521051988130124, |
| "grad_norm": 0.6870210766792297, |
| "learning_rate": 1.9993257152907525e-07, |
| "loss": 0.0043, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.02522904340803254, |
| "grad_norm": 0.969866931438446, |
| "learning_rate": 1.9993246455427222e-07, |
| "loss": 0.0037, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.025247566934763845, |
| "grad_norm": 1.4233394861221313, |
| "learning_rate": 1.999323574947077e-07, |
| "loss": 0.0041, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.025266090461495146, |
| "grad_norm": 1.1810661554336548, |
| "learning_rate": 1.999322503503818e-07, |
| "loss": 0.0033, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.025284613988226446, |
| "grad_norm": 1.3166649341583252, |
| "learning_rate": 1.9993214312129457e-07, |
| "loss": 0.0042, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.025303137514957746, |
| "grad_norm": 1.1056807041168213, |
| "learning_rate": 1.9993203580744616e-07, |
| "loss": 0.0043, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.02532166104168905, |
| "grad_norm": 1.1100889444351196, |
| "learning_rate": 1.9993192840883662e-07, |
| "loss": 0.0038, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.02534018456842035, |
| "grad_norm": 0.5040842890739441, |
| "learning_rate": 1.9993182092546603e-07, |
| "loss": 0.0044, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.02535870809515165, |
| "grad_norm": 1.169029951095581, |
| "learning_rate": 1.9993171335733454e-07, |
| "loss": 0.0037, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.025377231621882955, |
| "grad_norm": 1.6770260334014893, |
| "learning_rate": 1.999316057044422e-07, |
| "loss": 0.0044, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.025395755148614255, |
| "grad_norm": 1.1162688732147217, |
| "learning_rate": 1.9993149796678908e-07, |
| "loss": 0.0034, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.025414278675345556, |
| "grad_norm": 1.3762277364730835, |
| "learning_rate": 1.9993139014437531e-07, |
| "loss": 0.0036, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.025432802202076856, |
| "grad_norm": 0.23831801116466522, |
| "learning_rate": 1.9993128223720097e-07, |
| "loss": 0.0037, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.02545132572880816, |
| "grad_norm": 2.6825010776519775, |
| "learning_rate": 1.9993117424526616e-07, |
| "loss": 0.0038, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.02546984925553946, |
| "grad_norm": 1.3211004734039307, |
| "learning_rate": 1.9993106616857096e-07, |
| "loss": 0.0043, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.02548837278227076, |
| "grad_norm": 1.1379201412200928, |
| "learning_rate": 1.9993095800711545e-07, |
| "loss": 0.0043, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.025506896309002065, |
| "grad_norm": 8.816250801086426, |
| "learning_rate": 1.9993084976089976e-07, |
| "loss": 0.0035, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.025525419835733365, |
| "grad_norm": 0.5511662364006042, |
| "learning_rate": 1.999307414299239e-07, |
| "loss": 0.0052, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.025543943362464665, |
| "grad_norm": 1.8915300369262695, |
| "learning_rate": 1.9993063301418808e-07, |
| "loss": 0.0046, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.02556246688919597, |
| "grad_norm": 2.0237274169921875, |
| "learning_rate": 1.9993052451369233e-07, |
| "loss": 0.0049, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.02558099041592727, |
| "grad_norm": 0.8218046426773071, |
| "learning_rate": 1.999304159284367e-07, |
| "loss": 0.0042, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.02559951394265857, |
| "grad_norm": 0.9157915711402893, |
| "learning_rate": 1.9993030725842135e-07, |
| "loss": 0.0041, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.02561803746938987, |
| "grad_norm": 0.9119143486022949, |
| "learning_rate": 1.9993019850364634e-07, |
| "loss": 0.0039, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.025636560996121174, |
| "grad_norm": 1.533337950706482, |
| "learning_rate": 1.9993008966411178e-07, |
| "loss": 0.0038, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.025655084522852475, |
| "grad_norm": 2.22788667678833, |
| "learning_rate": 1.9992998073981774e-07, |
| "loss": 0.0032, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.025673608049583775, |
| "grad_norm": 1.1273174285888672, |
| "learning_rate": 1.9992987173076433e-07, |
| "loss": 0.0041, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.02569213157631508, |
| "grad_norm": 0.6672047972679138, |
| "learning_rate": 1.9992976263695165e-07, |
| "loss": 0.0041, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.02571065510304638, |
| "grad_norm": 0.7757489085197449, |
| "learning_rate": 1.9992965345837974e-07, |
| "loss": 0.0042, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.02572917862977768, |
| "grad_norm": 1.2127727270126343, |
| "learning_rate": 1.9992954419504877e-07, |
| "loss": 0.0039, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.025747702156508984, |
| "grad_norm": 2.30127215385437, |
| "learning_rate": 1.9992943484695875e-07, |
| "loss": 0.0031, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.025766225683240284, |
| "grad_norm": 0.745219349861145, |
| "learning_rate": 1.9992932541410989e-07, |
| "loss": 0.0045, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.025784749209971584, |
| "grad_norm": 1.2701218128204346, |
| "learning_rate": 1.9992921589650216e-07, |
| "loss": 0.0035, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.025803272736702885, |
| "grad_norm": 0.30821022391319275, |
| "learning_rate": 1.9992910629413572e-07, |
| "loss": 0.0028, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.02582179626343419, |
| "grad_norm": 1.768576741218567, |
| "learning_rate": 1.9992899660701063e-07, |
| "loss": 0.0034, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.02584031979016549, |
| "grad_norm": 0.5029256343841553, |
| "learning_rate": 1.99928886835127e-07, |
| "loss": 0.0041, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.02585884331689679, |
| "grad_norm": 0.396045058965683, |
| "learning_rate": 1.9992877697848494e-07, |
| "loss": 0.0033, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.025877366843628093, |
| "grad_norm": 1.0669636726379395, |
| "learning_rate": 1.999286670370845e-07, |
| "loss": 0.0042, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.025895890370359394, |
| "grad_norm": 1.2855182886123657, |
| "learning_rate": 1.9992855701092582e-07, |
| "loss": 0.0035, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.025914413897090694, |
| "grad_norm": 2.3098907470703125, |
| "learning_rate": 1.9992844690000897e-07, |
| "loss": 0.0038, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.025932937423821998, |
| "grad_norm": 1.3860021829605103, |
| "learning_rate": 1.99928336704334e-07, |
| "loss": 0.0036, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.0259514609505533, |
| "grad_norm": 1.1566129922866821, |
| "learning_rate": 1.9992822642390112e-07, |
| "loss": 0.0036, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.0259699844772846, |
| "grad_norm": 0.5010298490524292, |
| "learning_rate": 1.9992811605871033e-07, |
| "loss": 0.0043, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.0259885080040159, |
| "grad_norm": 1.7062780857086182, |
| "learning_rate": 1.9992800560876174e-07, |
| "loss": 0.0039, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.026007031530747203, |
| "grad_norm": 0.7996389865875244, |
| "learning_rate": 1.9992789507405543e-07, |
| "loss": 0.0043, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.026025555057478503, |
| "grad_norm": 0.5072804093360901, |
| "learning_rate": 1.9992778445459152e-07, |
| "loss": 0.003, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.026044078584209804, |
| "grad_norm": 0.9613421559333801, |
| "learning_rate": 1.9992767375037012e-07, |
| "loss": 0.0045, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.026062602110941108, |
| "grad_norm": 1.3300940990447998, |
| "learning_rate": 1.9992756296139128e-07, |
| "loss": 0.0038, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.026081125637672408, |
| "grad_norm": 0.4797874689102173, |
| "learning_rate": 1.9992745208765514e-07, |
| "loss": 0.0038, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.02609964916440371, |
| "grad_norm": 8.949529647827148, |
| "learning_rate": 1.9992734112916173e-07, |
| "loss": 0.0042, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.02611817269113501, |
| "grad_norm": 0.5192855000495911, |
| "learning_rate": 1.9992723008591122e-07, |
| "loss": 0.003, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.026136696217866313, |
| "grad_norm": 1.2549939155578613, |
| "learning_rate": 1.9992711895790365e-07, |
| "loss": 0.0051, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.026155219744597613, |
| "grad_norm": 1.0937813520431519, |
| "learning_rate": 1.999270077451391e-07, |
| "loss": 0.0048, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.026173743271328914, |
| "grad_norm": 0.5928589105606079, |
| "learning_rate": 1.9992689644761774e-07, |
| "loss": 0.0024, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.026192266798060217, |
| "grad_norm": 0.32942864298820496, |
| "learning_rate": 1.9992678506533962e-07, |
| "loss": 0.0039, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.026210790324791518, |
| "grad_norm": 1.1413058042526245, |
| "learning_rate": 1.999266735983048e-07, |
| "loss": 0.0028, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.026229313851522818, |
| "grad_norm": 1.7829631567001343, |
| "learning_rate": 1.9992656204651345e-07, |
| "loss": 0.004, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.026247837378254122, |
| "grad_norm": 0.6462355852127075, |
| "learning_rate": 1.9992645040996562e-07, |
| "loss": 0.0031, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.026266360904985422, |
| "grad_norm": 0.7902731895446777, |
| "learning_rate": 1.9992633868866137e-07, |
| "loss": 0.0043, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.026284884431716723, |
| "grad_norm": 0.5349451303482056, |
| "learning_rate": 1.9992622688260088e-07, |
| "loss": 0.0036, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.026303407958448023, |
| "grad_norm": 0.8034486770629883, |
| "learning_rate": 1.9992611499178418e-07, |
| "loss": 0.0035, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.026321931485179327, |
| "grad_norm": 0.497665137052536, |
| "learning_rate": 1.9992600301621136e-07, |
| "loss": 0.0036, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.026340455011910627, |
| "grad_norm": 0.5894801020622253, |
| "learning_rate": 1.9992589095588257e-07, |
| "loss": 0.0033, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.026358978538641928, |
| "grad_norm": 0.32930904626846313, |
| "learning_rate": 1.9992577881079786e-07, |
| "loss": 0.0034, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.02637750206537323, |
| "grad_norm": 0.6587752103805542, |
| "learning_rate": 1.9992566658095734e-07, |
| "loss": 0.0041, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.026396025592104532, |
| "grad_norm": 1.508559226989746, |
| "learning_rate": 1.9992555426636111e-07, |
| "loss": 0.0033, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.026414549118835833, |
| "grad_norm": 0.551942765712738, |
| "learning_rate": 1.9992544186700924e-07, |
| "loss": 0.005, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.026433072645567136, |
| "grad_norm": 2.6497669219970703, |
| "learning_rate": 1.9992532938290184e-07, |
| "loss": 0.0046, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.026451596172298437, |
| "grad_norm": 1.497714877128601, |
| "learning_rate": 1.9992521681403903e-07, |
| "loss": 0.0034, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.026470119699029737, |
| "grad_norm": 3.9580254554748535, |
| "learning_rate": 1.999251041604209e-07, |
| "loss": 0.0034, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.026488643225761038, |
| "grad_norm": 2.1725597381591797, |
| "learning_rate": 1.999249914220475e-07, |
| "loss": 0.0041, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.02650716675249234, |
| "grad_norm": 1.4030534029006958, |
| "learning_rate": 1.9992487859891896e-07, |
| "loss": 0.0032, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.026525690279223642, |
| "grad_norm": 0.40618935227394104, |
| "learning_rate": 1.9992476569103537e-07, |
| "loss": 0.0036, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.026544213805954942, |
| "grad_norm": 0.869651734828949, |
| "learning_rate": 1.9992465269839684e-07, |
| "loss": 0.0027, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.026562737332686246, |
| "grad_norm": 0.9191752076148987, |
| "learning_rate": 1.9992453962100346e-07, |
| "loss": 0.0039, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.026581260859417546, |
| "grad_norm": 1.091217279434204, |
| "learning_rate": 1.999244264588553e-07, |
| "loss": 0.0036, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.026599784386148847, |
| "grad_norm": 1.7123265266418457, |
| "learning_rate": 1.9992431321195248e-07, |
| "loss": 0.0039, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.02661830791288015, |
| "grad_norm": 6.467123985290527, |
| "learning_rate": 1.999241998802951e-07, |
| "loss": 0.0049, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.02663683143961145, |
| "grad_norm": 1.721150279045105, |
| "learning_rate": 1.9992408646388324e-07, |
| "loss": 0.0052, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.02665535496634275, |
| "grad_norm": 1.336623191833496, |
| "learning_rate": 1.99923972962717e-07, |
| "loss": 0.0037, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.026673878493074052, |
| "grad_norm": 1.2325992584228516, |
| "learning_rate": 1.9992385937679647e-07, |
| "loss": 0.0036, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.026692402019805356, |
| "grad_norm": 3.1750712394714355, |
| "learning_rate": 1.9992374570612178e-07, |
| "loss": 0.0038, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.026710925546536656, |
| "grad_norm": 0.7979589104652405, |
| "learning_rate": 1.99923631950693e-07, |
| "loss": 0.0037, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.026729449073267957, |
| "grad_norm": 1.2638963460922241, |
| "learning_rate": 1.999235181105102e-07, |
| "loss": 0.0046, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.02674797259999926, |
| "grad_norm": 0.9827898740768433, |
| "learning_rate": 1.9992340418557356e-07, |
| "loss": 0.0037, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.02676649612673056, |
| "grad_norm": 0.388492614030838, |
| "learning_rate": 1.9992329017588309e-07, |
| "loss": 0.0047, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.02678501965346186, |
| "grad_norm": 2.1175193786621094, |
| "learning_rate": 1.9992317608143892e-07, |
| "loss": 0.0037, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.026803543180193165, |
| "grad_norm": 0.644545316696167, |
| "learning_rate": 1.9992306190224112e-07, |
| "loss": 0.0044, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.026822066706924465, |
| "grad_norm": 0.39012351632118225, |
| "learning_rate": 1.9992294763828986e-07, |
| "loss": 0.0044, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.026840590233655766, |
| "grad_norm": 4.8135857582092285, |
| "learning_rate": 1.9992283328958517e-07, |
| "loss": 0.0027, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.026859113760387066, |
| "grad_norm": 0.8605958223342896, |
| "learning_rate": 1.9992271885612716e-07, |
| "loss": 0.0041, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.02687763728711837, |
| "grad_norm": 0.7354183197021484, |
| "learning_rate": 1.9992260433791594e-07, |
| "loss": 0.0039, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.02689616081384967, |
| "grad_norm": 0.5786769986152649, |
| "learning_rate": 1.9992248973495157e-07, |
| "loss": 0.0031, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.02691468434058097, |
| "grad_norm": 1.000627040863037, |
| "learning_rate": 1.999223750472342e-07, |
| "loss": 0.0037, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.026933207867312275, |
| "grad_norm": 0.49018093943595886, |
| "learning_rate": 1.9992226027476393e-07, |
| "loss": 0.0029, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.026951731394043575, |
| "grad_norm": 1.3955392837524414, |
| "learning_rate": 1.9992214541754082e-07, |
| "loss": 0.0045, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.026970254920774876, |
| "grad_norm": 1.0570303201675415, |
| "learning_rate": 1.9992203047556497e-07, |
| "loss": 0.0042, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.026988778447506176, |
| "grad_norm": 0.4549688994884491, |
| "learning_rate": 1.999219154488365e-07, |
| "loss": 0.0047, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.02700730197423748, |
| "grad_norm": 1.182187557220459, |
| "learning_rate": 1.9992180033735549e-07, |
| "loss": 0.0038, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.02702582550096878, |
| "grad_norm": 0.8583022952079773, |
| "learning_rate": 1.9992168514112202e-07, |
| "loss": 0.0046, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.02704434902770008, |
| "grad_norm": 0.5665132999420166, |
| "learning_rate": 1.9992156986013624e-07, |
| "loss": 0.0035, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.027062872554431384, |
| "grad_norm": 1.042681336402893, |
| "learning_rate": 1.9992145449439822e-07, |
| "loss": 0.0048, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.027081396081162685, |
| "grad_norm": 0.3293008804321289, |
| "learning_rate": 1.9992133904390804e-07, |
| "loss": 0.0034, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.027099919607893985, |
| "grad_norm": 1.644984245300293, |
| "learning_rate": 1.999212235086658e-07, |
| "loss": 0.0038, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.02711844313462529, |
| "grad_norm": 1.421950340270996, |
| "learning_rate": 1.9992110788867166e-07, |
| "loss": 0.0055, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.02713696666135659, |
| "grad_norm": 1.3089810609817505, |
| "learning_rate": 1.9992099218392564e-07, |
| "loss": 0.0031, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.02715549018808789, |
| "grad_norm": 4.183242321014404, |
| "learning_rate": 1.9992087639442786e-07, |
| "loss": 0.0032, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.02717401371481919, |
| "grad_norm": 0.5830032825469971, |
| "learning_rate": 1.9992076052017843e-07, |
| "loss": 0.0038, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.027192537241550494, |
| "grad_norm": 1.4001753330230713, |
| "learning_rate": 1.9992064456117745e-07, |
| "loss": 0.0034, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.027211060768281795, |
| "grad_norm": 13.539731979370117, |
| "learning_rate": 1.9992052851742502e-07, |
| "loss": 0.005, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.027229584295013095, |
| "grad_norm": 0.8338188529014587, |
| "learning_rate": 1.999204123889212e-07, |
| "loss": 0.0043, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.0272481078217444, |
| "grad_norm": 1.5026789903640747, |
| "learning_rate": 1.9992029617566616e-07, |
| "loss": 0.0035, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.0272666313484757, |
| "grad_norm": 3.635765790939331, |
| "learning_rate": 1.9992017987765993e-07, |
| "loss": 0.0042, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.027285154875207, |
| "grad_norm": 1.1293585300445557, |
| "learning_rate": 1.9992006349490266e-07, |
| "loss": 0.0048, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.027303678401938303, |
| "grad_norm": 1.0480681657791138, |
| "learning_rate": 1.9991994702739442e-07, |
| "loss": 0.0037, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.027322201928669604, |
| "grad_norm": 0.37252336740493774, |
| "learning_rate": 1.9991983047513532e-07, |
| "loss": 0.0034, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.027340725455400904, |
| "grad_norm": 4.205869674682617, |
| "learning_rate": 1.9991971383812541e-07, |
| "loss": 0.004, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.027359248982132205, |
| "grad_norm": 2.336991310119629, |
| "learning_rate": 1.9991959711636488e-07, |
| "loss": 0.0045, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.02737777250886351, |
| "grad_norm": 0.5513859987258911, |
| "learning_rate": 1.9991948030985378e-07, |
| "loss": 0.0038, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.02739629603559481, |
| "grad_norm": 1.1170828342437744, |
| "learning_rate": 1.999193634185922e-07, |
| "loss": 0.0037, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.02741481956232611, |
| "grad_norm": 1.3165197372436523, |
| "learning_rate": 1.9991924644258024e-07, |
| "loss": 0.0029, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.027433343089057413, |
| "grad_norm": 0.6852640509605408, |
| "learning_rate": 1.9991912938181802e-07, |
| "loss": 0.0033, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.027451866615788714, |
| "grad_norm": 1.3344347476959229, |
| "learning_rate": 1.9991901223630562e-07, |
| "loss": 0.0026, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.027470390142520014, |
| "grad_norm": 1.9052156209945679, |
| "learning_rate": 1.9991889500604315e-07, |
| "loss": 0.0041, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.027488913669251318, |
| "grad_norm": 0.7156654596328735, |
| "learning_rate": 1.9991877769103072e-07, |
| "loss": 0.004, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.027507437195982618, |
| "grad_norm": 0.8646858930587769, |
| "learning_rate": 1.9991866029126841e-07, |
| "loss": 0.0033, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.02752596072271392, |
| "grad_norm": 1.7443900108337402, |
| "learning_rate": 1.999185428067563e-07, |
| "loss": 0.0029, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.02754448424944522, |
| "grad_norm": 5.108303070068359, |
| "learning_rate": 1.9991842523749455e-07, |
| "loss": 0.0035, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.027563007776176523, |
| "grad_norm": 0.6446295380592346, |
| "learning_rate": 1.999183075834832e-07, |
| "loss": 0.003, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.027581531302907823, |
| "grad_norm": 1.04851233959198, |
| "learning_rate": 1.999181898447224e-07, |
| "loss": 0.0048, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.027600054829639124, |
| "grad_norm": 0.6830344200134277, |
| "learning_rate": 1.999180720212122e-07, |
| "loss": 0.0046, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.027618578356370427, |
| "grad_norm": 1.8201650381088257, |
| "learning_rate": 1.9991795411295277e-07, |
| "loss": 0.0041, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.027637101883101728, |
| "grad_norm": 0.6919720768928528, |
| "learning_rate": 1.9991783611994412e-07, |
| "loss": 0.0036, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.02765562540983303, |
| "grad_norm": 1.1396560668945312, |
| "learning_rate": 1.999177180421864e-07, |
| "loss": 0.0055, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.02767414893656433, |
| "grad_norm": 1.5992690324783325, |
| "learning_rate": 1.9991759987967972e-07, |
| "loss": 0.0049, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.027692672463295633, |
| "grad_norm": 1.2165946960449219, |
| "learning_rate": 1.9991748163242415e-07, |
| "loss": 0.0043, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.027711195990026933, |
| "grad_norm": 0.7770680785179138, |
| "learning_rate": 1.9991736330041982e-07, |
| "loss": 0.0045, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.027729719516758233, |
| "grad_norm": 1.6203789710998535, |
| "learning_rate": 1.999172448836668e-07, |
| "loss": 0.0052, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.027748243043489537, |
| "grad_norm": 0.6099765300750732, |
| "learning_rate": 1.999171263821652e-07, |
| "loss": 0.0039, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.027766766570220838, |
| "grad_norm": 1.437012791633606, |
| "learning_rate": 1.9991700779591517e-07, |
| "loss": 0.0052, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.027785290096952138, |
| "grad_norm": 1.3011822700500488, |
| "learning_rate": 1.9991688912491674e-07, |
| "loss": 0.0035, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.027803813623683442, |
| "grad_norm": 0.31955835223197937, |
| "learning_rate": 1.9991677036917003e-07, |
| "loss": 0.0036, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.027822337150414742, |
| "grad_norm": 0.9672516584396362, |
| "learning_rate": 1.9991665152867517e-07, |
| "loss": 0.0044, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.027840860677146043, |
| "grad_norm": 1.3713430166244507, |
| "learning_rate": 1.9991653260343223e-07, |
| "loss": 0.0036, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.027859384203877343, |
| "grad_norm": 0.41362401843070984, |
| "learning_rate": 1.999164135934413e-07, |
| "loss": 0.004, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.027877907730608647, |
| "grad_norm": 0.7470771670341492, |
| "learning_rate": 1.9991629449870254e-07, |
| "loss": 0.0041, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.027896431257339947, |
| "grad_norm": 1.2483714818954468, |
| "learning_rate": 1.99916175319216e-07, |
| "loss": 0.0045, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.027914954784071248, |
| "grad_norm": 0.5899113416671753, |
| "learning_rate": 1.9991605605498178e-07, |
| "loss": 0.0047, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.02793347831080255, |
| "grad_norm": 9.110048294067383, |
| "learning_rate": 1.99915936706e-07, |
| "loss": 0.0034, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.027952001837533852, |
| "grad_norm": 0.582204282283783, |
| "learning_rate": 1.9991581727227075e-07, |
| "loss": 0.0034, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.027970525364265152, |
| "grad_norm": 1.242082953453064, |
| "learning_rate": 1.9991569775379414e-07, |
| "loss": 0.0039, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.027989048890996456, |
| "grad_norm": 1.0316402912139893, |
| "learning_rate": 1.9991557815057028e-07, |
| "loss": 0.0048, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.028007572417727757, |
| "grad_norm": 0.47821304202079773, |
| "learning_rate": 1.9991545846259928e-07, |
| "loss": 0.0047, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.028026095944459057, |
| "grad_norm": 6.3203816413879395, |
| "learning_rate": 1.9991533868988119e-07, |
| "loss": 0.0039, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.028044619471190357, |
| "grad_norm": 1.1486930847167969, |
| "learning_rate": 1.9991521883241615e-07, |
| "loss": 0.0043, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.02806314299792166, |
| "grad_norm": 0.36191168427467346, |
| "learning_rate": 1.9991509889020427e-07, |
| "loss": 0.0036, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.02808166652465296, |
| "grad_norm": 1.2858384847640991, |
| "learning_rate": 1.999149788632456e-07, |
| "loss": 0.0034, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.028100190051384262, |
| "grad_norm": 0.9385653734207153, |
| "learning_rate": 1.999148587515403e-07, |
| "loss": 0.0036, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.028118713578115566, |
| "grad_norm": 1.1493018865585327, |
| "learning_rate": 1.9991473855508846e-07, |
| "loss": 0.0044, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.028137237104846866, |
| "grad_norm": 1.142225980758667, |
| "learning_rate": 1.9991461827389016e-07, |
| "loss": 0.0048, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.028155760631578167, |
| "grad_norm": 0.32843345403671265, |
| "learning_rate": 1.999144979079455e-07, |
| "loss": 0.004, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.02817428415830947, |
| "grad_norm": 1.2703535556793213, |
| "learning_rate": 1.999143774572546e-07, |
| "loss": 0.0041, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.02819280768504077, |
| "grad_norm": 0.6766828894615173, |
| "learning_rate": 1.999142569218176e-07, |
| "loss": 0.0029, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.02821133121177207, |
| "grad_norm": 1.405356526374817, |
| "learning_rate": 1.9991413630163454e-07, |
| "loss": 0.0041, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.02822985473850337, |
| "grad_norm": 0.7553339004516602, |
| "learning_rate": 1.9991401559670554e-07, |
| "loss": 0.0035, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.028248378265234676, |
| "grad_norm": 0.9763771891593933, |
| "learning_rate": 1.999138948070307e-07, |
| "loss": 0.0044, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.028266901791965976, |
| "grad_norm": 0.9215732216835022, |
| "learning_rate": 1.9991377393261014e-07, |
| "loss": 0.0037, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.028285425318697276, |
| "grad_norm": 0.6952494978904724, |
| "learning_rate": 1.9991365297344394e-07, |
| "loss": 0.0041, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.02830394884542858, |
| "grad_norm": 2.7120444774627686, |
| "learning_rate": 1.999135319295322e-07, |
| "loss": 0.0044, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.02832247237215988, |
| "grad_norm": 1.354853630065918, |
| "learning_rate": 1.9991341080087505e-07, |
| "loss": 0.0034, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.02834099589889118, |
| "grad_norm": 0.5792673230171204, |
| "learning_rate": 1.9991328958747258e-07, |
| "loss": 0.0043, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.02835951942562248, |
| "grad_norm": 0.6537497043609619, |
| "learning_rate": 1.999131682893249e-07, |
| "loss": 0.0045, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.028378042952353785, |
| "grad_norm": 0.7030304670333862, |
| "learning_rate": 1.999130469064321e-07, |
| "loss": 0.005, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.028396566479085086, |
| "grad_norm": 0.741597056388855, |
| "learning_rate": 1.9991292543879427e-07, |
| "loss": 0.0032, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.028415090005816386, |
| "grad_norm": 1.2588895559310913, |
| "learning_rate": 1.9991280388641153e-07, |
| "loss": 0.0034, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.02843361353254769, |
| "grad_norm": 1.1994308233261108, |
| "learning_rate": 1.99912682249284e-07, |
| "loss": 0.0033, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.02845213705927899, |
| "grad_norm": 0.436038613319397, |
| "learning_rate": 1.9991256052741178e-07, |
| "loss": 0.0034, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.02847066058601029, |
| "grad_norm": 0.6602546572685242, |
| "learning_rate": 1.9991243872079494e-07, |
| "loss": 0.0041, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.028489184112741595, |
| "grad_norm": 1.5382957458496094, |
| "learning_rate": 1.9991231682943362e-07, |
| "loss": 0.0037, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.028507707639472895, |
| "grad_norm": 0.8141869306564331, |
| "learning_rate": 1.9991219485332787e-07, |
| "loss": 0.0039, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.028526231166204195, |
| "grad_norm": 1.6710875034332275, |
| "learning_rate": 1.9991207279247785e-07, |
| "loss": 0.0029, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.028544754692935496, |
| "grad_norm": 1.658119559288025, |
| "learning_rate": 1.9991195064688364e-07, |
| "loss": 0.0039, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.0285632782196668, |
| "grad_norm": 0.47136446833610535, |
| "learning_rate": 1.9991182841654537e-07, |
| "loss": 0.0033, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.0285818017463981, |
| "grad_norm": 1.649505615234375, |
| "learning_rate": 1.999117061014631e-07, |
| "loss": 0.004, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.0286003252731294, |
| "grad_norm": 0.6832846403121948, |
| "learning_rate": 1.9991158370163696e-07, |
| "loss": 0.004, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.028618848799860704, |
| "grad_norm": 0.29199764132499695, |
| "learning_rate": 1.9991146121706707e-07, |
| "loss": 0.0041, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.028637372326592005, |
| "grad_norm": 1.0341655015945435, |
| "learning_rate": 1.9991133864775347e-07, |
| "loss": 0.0043, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.028655895853323305, |
| "grad_norm": 1.6165870428085327, |
| "learning_rate": 1.999112159936963e-07, |
| "loss": 0.0053, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.02867441938005461, |
| "grad_norm": 0.906106173992157, |
| "learning_rate": 1.999110932548957e-07, |
| "loss": 0.0042, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.02869294290678591, |
| "grad_norm": 0.7213954925537109, |
| "learning_rate": 1.9991097043135173e-07, |
| "loss": 0.0035, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.02871146643351721, |
| "grad_norm": 2.238007068634033, |
| "learning_rate": 1.9991084752306452e-07, |
| "loss": 0.005, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.02872998996024851, |
| "grad_norm": 1.570681095123291, |
| "learning_rate": 1.9991072453003418e-07, |
| "loss": 0.0034, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.028748513486979814, |
| "grad_norm": 1.5118080377578735, |
| "learning_rate": 1.9991060145226078e-07, |
| "loss": 0.0037, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.028767037013711114, |
| "grad_norm": 2.763939619064331, |
| "learning_rate": 1.9991047828974444e-07, |
| "loss": 0.003, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.028785560540442415, |
| "grad_norm": 2.990626573562622, |
| "learning_rate": 1.9991035504248525e-07, |
| "loss": 0.0036, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.02880408406717372, |
| "grad_norm": 1.9799326658248901, |
| "learning_rate": 1.9991023171048336e-07, |
| "loss": 0.0028, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.02882260759390502, |
| "grad_norm": 2.3236095905303955, |
| "learning_rate": 1.999101082937388e-07, |
| "loss": 0.0053, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.02884113112063632, |
| "grad_norm": 0.7750484943389893, |
| "learning_rate": 1.9990998479225177e-07, |
| "loss": 0.004, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.028859654647367623, |
| "grad_norm": 0.8030531406402588, |
| "learning_rate": 1.9990986120602228e-07, |
| "loss": 0.0034, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.028878178174098924, |
| "grad_norm": 0.8942427635192871, |
| "learning_rate": 1.999097375350505e-07, |
| "loss": 0.003, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.028896701700830224, |
| "grad_norm": 1.9762060642242432, |
| "learning_rate": 1.9990961377933656e-07, |
| "loss": 0.0042, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.028915225227561524, |
| "grad_norm": 0.7471545338630676, |
| "learning_rate": 1.9990948993888046e-07, |
| "loss": 0.004, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.02893374875429283, |
| "grad_norm": 0.18691560626029968, |
| "learning_rate": 1.9990936601368239e-07, |
| "loss": 0.0023, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.02895227228102413, |
| "grad_norm": 1.501625418663025, |
| "learning_rate": 1.9990924200374243e-07, |
| "loss": 0.0039, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.02897079580775543, |
| "grad_norm": 0.8801237344741821, |
| "learning_rate": 1.9990911790906066e-07, |
| "loss": 0.003, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.028989319334486733, |
| "grad_norm": 0.9448741674423218, |
| "learning_rate": 1.9990899372963722e-07, |
| "loss": 0.0042, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.029007842861218033, |
| "grad_norm": 0.9058844447135925, |
| "learning_rate": 1.999088694654722e-07, |
| "loss": 0.0038, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.029026366387949334, |
| "grad_norm": 0.7671257257461548, |
| "learning_rate": 1.9990874511656576e-07, |
| "loss": 0.003, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.029044889914680638, |
| "grad_norm": 0.6622403264045715, |
| "learning_rate": 1.999086206829179e-07, |
| "loss": 0.0045, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.029063413441411938, |
| "grad_norm": 1.1803573369979858, |
| "learning_rate": 1.9990849616452878e-07, |
| "loss": 0.0044, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.02908193696814324, |
| "grad_norm": 2.5220417976379395, |
| "learning_rate": 1.9990837156139855e-07, |
| "loss": 0.0041, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.02910046049487454, |
| "grad_norm": 0.6443779468536377, |
| "learning_rate": 1.9990824687352722e-07, |
| "loss": 0.0039, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.029118984021605843, |
| "grad_norm": 1.6230930089950562, |
| "learning_rate": 1.99908122100915e-07, |
| "loss": 0.0038, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.029137507548337143, |
| "grad_norm": 0.6745863556861877, |
| "learning_rate": 1.999079972435619e-07, |
| "loss": 0.0039, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.029156031075068443, |
| "grad_norm": 0.601959228515625, |
| "learning_rate": 1.9990787230146808e-07, |
| "loss": 0.0031, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.029174554601799747, |
| "grad_norm": 1.4307167530059814, |
| "learning_rate": 1.9990774727463365e-07, |
| "loss": 0.004, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.029193078128531048, |
| "grad_norm": 0.5226728916168213, |
| "learning_rate": 1.999076221630587e-07, |
| "loss": 0.0046, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.029211601655262348, |
| "grad_norm": 2.857330083847046, |
| "learning_rate": 1.9990749696674336e-07, |
| "loss": 0.0042, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.02923012518199365, |
| "grad_norm": 0.6622576117515564, |
| "learning_rate": 1.999073716856877e-07, |
| "loss": 0.0042, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.029248648708724952, |
| "grad_norm": 0.6390544176101685, |
| "learning_rate": 1.9990724631989182e-07, |
| "loss": 0.0034, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.029267172235456253, |
| "grad_norm": 0.4996614456176758, |
| "learning_rate": 1.9990712086935587e-07, |
| "loss": 0.0033, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.029285695762187553, |
| "grad_norm": 2.8185348510742188, |
| "learning_rate": 1.999069953340799e-07, |
| "loss": 0.0033, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.029304219288918857, |
| "grad_norm": 1.1217732429504395, |
| "learning_rate": 1.999068697140641e-07, |
| "loss": 0.0054, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.029322742815650157, |
| "grad_norm": 0.6329953670501709, |
| "learning_rate": 1.9990674400930848e-07, |
| "loss": 0.0035, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.029341266342381458, |
| "grad_norm": 0.593044638633728, |
| "learning_rate": 1.9990661821981324e-07, |
| "loss": 0.0042, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.02935978986911276, |
| "grad_norm": 1.7039304971694946, |
| "learning_rate": 1.9990649234557838e-07, |
| "loss": 0.003, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.029378313395844062, |
| "grad_norm": 0.8086302280426025, |
| "learning_rate": 1.9990636638660412e-07, |
| "loss": 0.0031, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.029396836922575362, |
| "grad_norm": 0.8163928985595703, |
| "learning_rate": 1.999062403428905e-07, |
| "loss": 0.0031, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.029415360449306663, |
| "grad_norm": 1.130387306213379, |
| "learning_rate": 1.9990611421443765e-07, |
| "loss": 0.0038, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.029433883976037967, |
| "grad_norm": 1.3781731128692627, |
| "learning_rate": 1.9990598800124564e-07, |
| "loss": 0.0041, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.029452407502769267, |
| "grad_norm": 0.6974221467971802, |
| "learning_rate": 1.999058617033146e-07, |
| "loss": 0.004, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.029470931029500567, |
| "grad_norm": 0.6066935062408447, |
| "learning_rate": 1.9990573532064467e-07, |
| "loss": 0.0026, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.02948945455623187, |
| "grad_norm": 2.2456135749816895, |
| "learning_rate": 1.999056088532359e-07, |
| "loss": 0.0053, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.02950797808296317, |
| "grad_norm": 1.1532353162765503, |
| "learning_rate": 1.999054823010885e-07, |
| "loss": 0.0039, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.029526501609694472, |
| "grad_norm": 0.8219150900840759, |
| "learning_rate": 1.999053556642024e-07, |
| "loss": 0.005, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.029545025136425776, |
| "grad_norm": 1.6324681043624878, |
| "learning_rate": 1.9990522894257786e-07, |
| "loss": 0.0039, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.029563548663157076, |
| "grad_norm": 0.8843380808830261, |
| "learning_rate": 1.9990510213621493e-07, |
| "loss": 0.0033, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.029582072189888377, |
| "grad_norm": 0.9674801230430603, |
| "learning_rate": 1.9990497524511376e-07, |
| "loss": 0.0035, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.029600595716619677, |
| "grad_norm": 4.400674819946289, |
| "learning_rate": 1.999048482692744e-07, |
| "loss": 0.0043, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.02961911924335098, |
| "grad_norm": 0.9735763669013977, |
| "learning_rate": 1.9990472120869696e-07, |
| "loss": 0.0038, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.02963764277008228, |
| "grad_norm": 0.7468534708023071, |
| "learning_rate": 1.999045940633816e-07, |
| "loss": 0.0031, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.029656166296813582, |
| "grad_norm": 1.1733306646347046, |
| "learning_rate": 1.999044668333284e-07, |
| "loss": 0.0034, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.029674689823544886, |
| "grad_norm": 2.700390100479126, |
| "learning_rate": 1.9990433951853742e-07, |
| "loss": 0.004, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.029693213350276186, |
| "grad_norm": 2.520772695541382, |
| "learning_rate": 1.9990421211900883e-07, |
| "loss": 0.0032, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.029711736877007486, |
| "grad_norm": 0.8531783819198608, |
| "learning_rate": 1.9990408463474275e-07, |
| "loss": 0.0028, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.02973026040373879, |
| "grad_norm": 1.6771340370178223, |
| "learning_rate": 1.9990395706573922e-07, |
| "loss": 0.0043, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.02974878393047009, |
| "grad_norm": 1.1201356649398804, |
| "learning_rate": 1.9990382941199842e-07, |
| "loss": 0.0037, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.02976730745720139, |
| "grad_norm": 1.218896746635437, |
| "learning_rate": 1.999037016735204e-07, |
| "loss": 0.0044, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.02978583098393269, |
| "grad_norm": 2.8217110633850098, |
| "learning_rate": 1.9990357385030533e-07, |
| "loss": 0.0034, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.029804354510663995, |
| "grad_norm": 0.9139496684074402, |
| "learning_rate": 1.9990344594235326e-07, |
| "loss": 0.0044, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.029822878037395296, |
| "grad_norm": 1.0848513841629028, |
| "learning_rate": 1.999033179496643e-07, |
| "loss": 0.0039, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.029841401564126596, |
| "grad_norm": 1.2054266929626465, |
| "learning_rate": 1.9990318987223862e-07, |
| "loss": 0.0028, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.0298599250908579, |
| "grad_norm": 1.6671653985977173, |
| "learning_rate": 1.9990306171007624e-07, |
| "loss": 0.0044, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.0298784486175892, |
| "grad_norm": 2.4361774921417236, |
| "learning_rate": 1.9990293346317734e-07, |
| "loss": 0.0031, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.0298969721443205, |
| "grad_norm": 0.4015349745750427, |
| "learning_rate": 1.9990280513154204e-07, |
| "loss": 0.0036, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.0299154956710518, |
| "grad_norm": 1.036508560180664, |
| "learning_rate": 1.9990267671517035e-07, |
| "loss": 0.0033, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.029934019197783105, |
| "grad_norm": 2.1979353427886963, |
| "learning_rate": 1.999025482140625e-07, |
| "loss": 0.0042, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.029952542724514405, |
| "grad_norm": 3.6309401988983154, |
| "learning_rate": 1.999024196282185e-07, |
| "loss": 0.0029, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.029971066251245706, |
| "grad_norm": 1.1090561151504517, |
| "learning_rate": 1.9990229095763854e-07, |
| "loss": 0.0052, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.02998958977797701, |
| "grad_norm": 1.6074210405349731, |
| "learning_rate": 1.9990216220232265e-07, |
| "loss": 0.0048, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.03000811330470831, |
| "grad_norm": 0.9984138607978821, |
| "learning_rate": 1.9990203336227101e-07, |
| "loss": 0.0049, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.03002663683143961, |
| "grad_norm": 0.7897469997406006, |
| "learning_rate": 1.9990190443748366e-07, |
| "loss": 0.0031, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.030045160358170914, |
| "grad_norm": 1.1137150526046753, |
| "learning_rate": 1.999017754279608e-07, |
| "loss": 0.0036, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.030063683884902215, |
| "grad_norm": 1.1875672340393066, |
| "learning_rate": 1.9990164633370247e-07, |
| "loss": 0.0051, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.030082207411633515, |
| "grad_norm": 1.1474882364273071, |
| "learning_rate": 1.999015171547088e-07, |
| "loss": 0.0034, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.030100730938364816, |
| "grad_norm": 0.7690886855125427, |
| "learning_rate": 1.999013878909799e-07, |
| "loss": 0.0039, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.03011925446509612, |
| "grad_norm": 2.3962886333465576, |
| "learning_rate": 1.9990125854251586e-07, |
| "loss": 0.0044, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.03013777799182742, |
| "grad_norm": 0.533268928527832, |
| "learning_rate": 1.9990112910931678e-07, |
| "loss": 0.0035, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.03015630151855872, |
| "grad_norm": 0.5454217791557312, |
| "learning_rate": 1.9990099959138282e-07, |
| "loss": 0.0033, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.030174825045290024, |
| "grad_norm": 0.9992498755455017, |
| "learning_rate": 1.999008699887141e-07, |
| "loss": 0.003, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.030193348572021324, |
| "grad_norm": 1.3405163288116455, |
| "learning_rate": 1.9990074030131066e-07, |
| "loss": 0.0039, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.030211872098752625, |
| "grad_norm": 0.401813268661499, |
| "learning_rate": 1.9990061052917264e-07, |
| "loss": 0.0045, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.03023039562548393, |
| "grad_norm": 1.077160120010376, |
| "learning_rate": 1.9990048067230017e-07, |
| "loss": 0.0031, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.03024891915221523, |
| "grad_norm": 1.2192018032073975, |
| "learning_rate": 1.9990035073069333e-07, |
| "loss": 0.0047, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.03026744267894653, |
| "grad_norm": 0.524927020072937, |
| "learning_rate": 1.9990022070435227e-07, |
| "loss": 0.0035, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.03028596620567783, |
| "grad_norm": 0.6730382442474365, |
| "learning_rate": 1.9990009059327706e-07, |
| "loss": 0.0031, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.030304489732409134, |
| "grad_norm": 2.4915831089019775, |
| "learning_rate": 1.9989996039746783e-07, |
| "loss": 0.0044, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.030323013259140434, |
| "grad_norm": 1.1308013200759888, |
| "learning_rate": 1.998998301169247e-07, |
| "loss": 0.0022, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.030341536785871735, |
| "grad_norm": 1.9372681379318237, |
| "learning_rate": 1.9989969975164775e-07, |
| "loss": 0.0037, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.03036006031260304, |
| "grad_norm": 0.9105736017227173, |
| "learning_rate": 1.9989956930163712e-07, |
| "loss": 0.0031, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.03037858383933434, |
| "grad_norm": 3.553898811340332, |
| "learning_rate": 1.998994387668929e-07, |
| "loss": 0.0035, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.03039710736606564, |
| "grad_norm": 3.223512649536133, |
| "learning_rate": 1.9989930814741522e-07, |
| "loss": 0.0027, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.030415630892796943, |
| "grad_norm": 0.482815682888031, |
| "learning_rate": 1.9989917744320418e-07, |
| "loss": 0.004, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.030434154419528243, |
| "grad_norm": 0.7999683022499084, |
| "learning_rate": 1.9989904665425989e-07, |
| "loss": 0.003, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.030452677946259544, |
| "grad_norm": 0.9879207611083984, |
| "learning_rate": 1.998989157805824e-07, |
| "loss": 0.004, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.030471201472990844, |
| "grad_norm": 0.9049139618873596, |
| "learning_rate": 1.9989878482217197e-07, |
| "loss": 0.004, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.030489724999722148, |
| "grad_norm": 1.9240611791610718, |
| "learning_rate": 1.9989865377902858e-07, |
| "loss": 0.0031, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.03050824852645345, |
| "grad_norm": 1.0779393911361694, |
| "learning_rate": 1.9989852265115242e-07, |
| "loss": 0.0032, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.03052677205318475, |
| "grad_norm": 7.229299068450928, |
| "learning_rate": 1.9989839143854355e-07, |
| "loss": 0.0054, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.030545295579916053, |
| "grad_norm": 2.4070465564727783, |
| "learning_rate": 1.9989826014120208e-07, |
| "loss": 0.0049, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.030563819106647353, |
| "grad_norm": 0.1604072004556656, |
| "learning_rate": 1.9989812875912815e-07, |
| "loss": 0.0043, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.030582342633378654, |
| "grad_norm": 0.5973244309425354, |
| "learning_rate": 1.9989799729232187e-07, |
| "loss": 0.0042, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.030600866160109954, |
| "grad_norm": 0.7293545603752136, |
| "learning_rate": 1.9989786574078333e-07, |
| "loss": 0.0042, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.030619389686841258, |
| "grad_norm": 1.0312001705169678, |
| "learning_rate": 1.9989773410451266e-07, |
| "loss": 0.0026, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.030637913213572558, |
| "grad_norm": 0.4179084897041321, |
| "learning_rate": 1.9989760238351e-07, |
| "loss": 0.0037, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.03065643674030386, |
| "grad_norm": 0.7142603397369385, |
| "learning_rate": 1.9989747057777535e-07, |
| "loss": 0.0034, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.030674960267035162, |
| "grad_norm": 1.518131136894226, |
| "learning_rate": 1.9989733868730897e-07, |
| "loss": 0.0043, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.030693483793766463, |
| "grad_norm": 1.2144932746887207, |
| "learning_rate": 1.9989720671211086e-07, |
| "loss": 0.0032, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.030712007320497763, |
| "grad_norm": 2.125108242034912, |
| "learning_rate": 1.9989707465218118e-07, |
| "loss": 0.0039, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.030730530847229067, |
| "grad_norm": 1.7034671306610107, |
| "learning_rate": 1.9989694250752005e-07, |
| "loss": 0.0032, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.030749054373960368, |
| "grad_norm": 1.247122883796692, |
| "learning_rate": 1.9989681027812754e-07, |
| "loss": 0.0047, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.030767577900691668, |
| "grad_norm": 1.009826898574829, |
| "learning_rate": 1.998966779640038e-07, |
| "loss": 0.0039, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.03078610142742297, |
| "grad_norm": 1.9136264324188232, |
| "learning_rate": 1.9989654556514896e-07, |
| "loss": 0.0024, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.030804624954154272, |
| "grad_norm": 0.38534414768218994, |
| "learning_rate": 1.9989641308156307e-07, |
| "loss": 0.0039, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.030823148480885573, |
| "grad_norm": 0.7698262929916382, |
| "learning_rate": 1.9989628051324626e-07, |
| "loss": 0.0036, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.030841672007616873, |
| "grad_norm": 0.27269813418388367, |
| "learning_rate": 1.998961478601987e-07, |
| "loss": 0.0026, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.030860195534348177, |
| "grad_norm": 1.086376667022705, |
| "learning_rate": 1.9989601512242043e-07, |
| "loss": 0.0035, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.030878719061079477, |
| "grad_norm": 0.6532080769538879, |
| "learning_rate": 1.9989588229991163e-07, |
| "loss": 0.002, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.030897242587810778, |
| "grad_norm": 1.0692529678344727, |
| "learning_rate": 1.9989574939267235e-07, |
| "loss": 0.0046, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.03091576611454208, |
| "grad_norm": 1.38497793674469, |
| "learning_rate": 1.9989561640070272e-07, |
| "loss": 0.0035, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.030934289641273382, |
| "grad_norm": 0.83016437292099, |
| "learning_rate": 1.9989548332400287e-07, |
| "loss": 0.0036, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.030952813168004682, |
| "grad_norm": 1.6940925121307373, |
| "learning_rate": 1.9989535016257292e-07, |
| "loss": 0.004, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.030971336694735983, |
| "grad_norm": 0.19783420860767365, |
| "learning_rate": 1.9989521691641296e-07, |
| "loss": 0.0032, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.030989860221467286, |
| "grad_norm": 0.9069746732711792, |
| "learning_rate": 1.998950835855231e-07, |
| "loss": 0.003, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.031008383748198587, |
| "grad_norm": 0.8623332977294922, |
| "learning_rate": 1.998949501699035e-07, |
| "loss": 0.0052, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.031026907274929887, |
| "grad_norm": 0.7303258776664734, |
| "learning_rate": 1.9989481666955416e-07, |
| "loss": 0.0038, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.03104543080166119, |
| "grad_norm": 0.8383782505989075, |
| "learning_rate": 1.9989468308447536e-07, |
| "loss": 0.0033, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.03106395432839249, |
| "grad_norm": 0.5982236862182617, |
| "learning_rate": 1.9989454941466705e-07, |
| "loss": 0.0028, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.031082477855123792, |
| "grad_norm": 0.6020573377609253, |
| "learning_rate": 1.9989441566012946e-07, |
| "loss": 0.0033, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.031101001381855096, |
| "grad_norm": 1.1083521842956543, |
| "learning_rate": 1.9989428182086266e-07, |
| "loss": 0.0035, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.031119524908586396, |
| "grad_norm": 1.1133754253387451, |
| "learning_rate": 1.998941478968667e-07, |
| "loss": 0.0043, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.031138048435317697, |
| "grad_norm": 0.41166236996650696, |
| "learning_rate": 1.9989401388814184e-07, |
| "loss": 0.0034, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.031156571962048997, |
| "grad_norm": 1.206494688987732, |
| "learning_rate": 1.9989387979468807e-07, |
| "loss": 0.004, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.0311750954887803, |
| "grad_norm": 0.6977930665016174, |
| "learning_rate": 1.9989374561650555e-07, |
| "loss": 0.0038, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.0311936190155116, |
| "grad_norm": 0.5044334530830383, |
| "learning_rate": 1.998936113535944e-07, |
| "loss": 0.0034, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.0312121425422429, |
| "grad_norm": 0.6841486096382141, |
| "learning_rate": 1.9989347700595468e-07, |
| "loss": 0.0046, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.031230666068974205, |
| "grad_norm": 1.0014703273773193, |
| "learning_rate": 1.9989334257358662e-07, |
| "loss": 0.0036, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.031249189595705506, |
| "grad_norm": 0.5336496829986572, |
| "learning_rate": 1.998932080564902e-07, |
| "loss": 0.0042, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.031267713122436806, |
| "grad_norm": 0.29383689165115356, |
| "learning_rate": 1.998930734546656e-07, |
| "loss": 0.0026, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.03128623664916811, |
| "grad_norm": 0.7651355862617493, |
| "learning_rate": 1.9989293876811297e-07, |
| "loss": 0.0038, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.03130476017589941, |
| "grad_norm": 1.98328697681427, |
| "learning_rate": 1.9989280399683234e-07, |
| "loss": 0.0035, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.03132328370263071, |
| "grad_norm": 0.43235403299331665, |
| "learning_rate": 1.998926691408239e-07, |
| "loss": 0.0046, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.031341807229362015, |
| "grad_norm": 0.7309406995773315, |
| "learning_rate": 1.9989253420008772e-07, |
| "loss": 0.004, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.03136033075609331, |
| "grad_norm": 0.6414340734481812, |
| "learning_rate": 1.9989239917462388e-07, |
| "loss": 0.0033, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.031378854282824616, |
| "grad_norm": 0.5578116774559021, |
| "learning_rate": 1.998922640644326e-07, |
| "loss": 0.0029, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.03139737780955592, |
| "grad_norm": 2.857933521270752, |
| "learning_rate": 1.998921288695139e-07, |
| "loss": 0.0034, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.031415901336287216, |
| "grad_norm": 0.676051139831543, |
| "learning_rate": 1.9989199358986798e-07, |
| "loss": 0.002, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.03143442486301852, |
| "grad_norm": 0.783967137336731, |
| "learning_rate": 1.9989185822549482e-07, |
| "loss": 0.0033, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.031452948389749824, |
| "grad_norm": 1.2051674127578735, |
| "learning_rate": 1.9989172277639469e-07, |
| "loss": 0.0041, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.03147147191648112, |
| "grad_norm": 0.4563734531402588, |
| "learning_rate": 1.9989158724256762e-07, |
| "loss": 0.0034, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.031489995443212425, |
| "grad_norm": 0.91441410779953, |
| "learning_rate": 1.9989145162401372e-07, |
| "loss": 0.0038, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.03150851896994373, |
| "grad_norm": 0.30844759941101074, |
| "learning_rate": 1.9989131592073313e-07, |
| "loss": 0.0039, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.031527042496675026, |
| "grad_norm": 0.5497186183929443, |
| "learning_rate": 1.9989118013272598e-07, |
| "loss": 0.0047, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.03154556602340633, |
| "grad_norm": 0.7321539521217346, |
| "learning_rate": 1.9989104425999234e-07, |
| "loss": 0.0052, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.031564089550137626, |
| "grad_norm": 0.495615154504776, |
| "learning_rate": 1.9989090830253236e-07, |
| "loss": 0.0029, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.03158261307686893, |
| "grad_norm": 0.9451618790626526, |
| "learning_rate": 1.9989077226034613e-07, |
| "loss": 0.0037, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.031601136603600234, |
| "grad_norm": 3.3871376514434814, |
| "learning_rate": 1.9989063613343382e-07, |
| "loss": 0.0038, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.03161966013033153, |
| "grad_norm": 1.7632180452346802, |
| "learning_rate": 1.9989049992179545e-07, |
| "loss": 0.0042, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.031638183657062835, |
| "grad_norm": 0.9597700238227844, |
| "learning_rate": 1.9989036362543123e-07, |
| "loss": 0.0035, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.03165670718379414, |
| "grad_norm": 0.845029890537262, |
| "learning_rate": 1.9989022724434124e-07, |
| "loss": 0.0036, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.031675230710525436, |
| "grad_norm": 0.6060001850128174, |
| "learning_rate": 1.9989009077852557e-07, |
| "loss": 0.0041, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.03169375423725674, |
| "grad_norm": 1.372538685798645, |
| "learning_rate": 1.998899542279844e-07, |
| "loss": 0.0038, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.031712277763988043, |
| "grad_norm": 1.2644238471984863, |
| "learning_rate": 1.9988981759271773e-07, |
| "loss": 0.0042, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.03173080129071934, |
| "grad_norm": 1.0968735218048096, |
| "learning_rate": 1.9988968087272581e-07, |
| "loss": 0.0036, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.031749324817450644, |
| "grad_norm": 0.5491446256637573, |
| "learning_rate": 1.9988954406800866e-07, |
| "loss": 0.003, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.03176784834418195, |
| "grad_norm": 1.2909908294677734, |
| "learning_rate": 1.9988940717856645e-07, |
| "loss": 0.0029, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.031786371870913245, |
| "grad_norm": 0.8242806792259216, |
| "learning_rate": 1.998892702043993e-07, |
| "loss": 0.003, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.03180489539764455, |
| "grad_norm": 0.9386950135231018, |
| "learning_rate": 1.998891331455073e-07, |
| "loss": 0.0046, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.03182341892437585, |
| "grad_norm": 0.6727918982505798, |
| "learning_rate": 1.9988899600189053e-07, |
| "loss": 0.0038, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.03184194245110715, |
| "grad_norm": 4.096502780914307, |
| "learning_rate": 1.9988885877354917e-07, |
| "loss": 0.0042, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.031860465977838454, |
| "grad_norm": 1.2213850021362305, |
| "learning_rate": 1.998887214604833e-07, |
| "loss": 0.0045, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.03187898950456976, |
| "grad_norm": 0.615985095500946, |
| "learning_rate": 1.9988858406269306e-07, |
| "loss": 0.0045, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.031897513031301054, |
| "grad_norm": 3.431279182434082, |
| "learning_rate": 1.9988844658017858e-07, |
| "loss": 0.0033, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.03191603655803236, |
| "grad_norm": 1.5382513999938965, |
| "learning_rate": 1.9988830901293994e-07, |
| "loss": 0.0049, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.031934560084763655, |
| "grad_norm": 0.7988921403884888, |
| "learning_rate": 1.9988817136097723e-07, |
| "loss": 0.0034, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.03195308361149496, |
| "grad_norm": 0.2650558352470398, |
| "learning_rate": 1.9988803362429066e-07, |
| "loss": 0.0025, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.03197160713822626, |
| "grad_norm": 0.8157468438148499, |
| "learning_rate": 1.9988789580288028e-07, |
| "loss": 0.0043, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.03199013066495756, |
| "grad_norm": 0.7332100868225098, |
| "learning_rate": 1.998877578967462e-07, |
| "loss": 0.0028, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.032008654191688864, |
| "grad_norm": 1.3929975032806396, |
| "learning_rate": 1.9988761990588857e-07, |
| "loss": 0.0029, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.03202717771842017, |
| "grad_norm": 1.933868169784546, |
| "learning_rate": 1.998874818303075e-07, |
| "loss": 0.004, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.032045701245151464, |
| "grad_norm": 0.7339229583740234, |
| "learning_rate": 1.9988734367000308e-07, |
| "loss": 0.0039, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.03206422477188277, |
| "grad_norm": 2.134631633758545, |
| "learning_rate": 1.9988720542497549e-07, |
| "loss": 0.0037, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.03208274829861407, |
| "grad_norm": 2.0203869342803955, |
| "learning_rate": 1.9988706709522477e-07, |
| "loss": 0.0028, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.03210127182534537, |
| "grad_norm": 0.9169048070907593, |
| "learning_rate": 1.998869286807511e-07, |
| "loss": 0.0038, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.03211979535207667, |
| "grad_norm": 0.39312538504600525, |
| "learning_rate": 1.9988679018155455e-07, |
| "loss": 0.0042, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.03213831887880798, |
| "grad_norm": 2.06289005279541, |
| "learning_rate": 1.9988665159763524e-07, |
| "loss": 0.0031, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.032156842405539274, |
| "grad_norm": 0.28996264934539795, |
| "learning_rate": 1.9988651292899334e-07, |
| "loss": 0.0033, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.03217536593227058, |
| "grad_norm": 0.501732587814331, |
| "learning_rate": 1.998863741756289e-07, |
| "loss": 0.0022, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.03219388945900188, |
| "grad_norm": 0.4125761389732361, |
| "learning_rate": 1.998862353375421e-07, |
| "loss": 0.0035, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.03221241298573318, |
| "grad_norm": 0.36984291672706604, |
| "learning_rate": 1.99886096414733e-07, |
| "loss": 0.0033, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.03223093651246448, |
| "grad_norm": 3.536524534225464, |
| "learning_rate": 1.9988595740720177e-07, |
| "loss": 0.0039, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.03224946003919578, |
| "grad_norm": 0.5179738402366638, |
| "learning_rate": 1.998858183149485e-07, |
| "loss": 0.0041, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.03226798356592708, |
| "grad_norm": 0.7969852089881897, |
| "learning_rate": 1.9988567913797332e-07, |
| "loss": 0.0042, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.03228650709265839, |
| "grad_norm": 2.993321657180786, |
| "learning_rate": 1.9988553987627633e-07, |
| "loss": 0.0045, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.032305030619389684, |
| "grad_norm": 0.5006862282752991, |
| "learning_rate": 1.9988540052985766e-07, |
| "loss": 0.0029, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.03232355414612099, |
| "grad_norm": 0.8920158743858337, |
| "learning_rate": 1.9988526109871742e-07, |
| "loss": 0.0032, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.03234207767285229, |
| "grad_norm": 9.35921573638916, |
| "learning_rate": 1.9988512158285574e-07, |
| "loss": 0.0034, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.03236060119958359, |
| "grad_norm": 0.2036902755498886, |
| "learning_rate": 1.9988498198227272e-07, |
| "loss": 0.0028, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.03237912472631489, |
| "grad_norm": 0.5074575543403625, |
| "learning_rate": 1.998848422969685e-07, |
| "loss": 0.0054, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.032397648253046196, |
| "grad_norm": 0.9770308136940002, |
| "learning_rate": 1.9988470252694322e-07, |
| "loss": 0.0047, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.03241617177977749, |
| "grad_norm": 0.9221186637878418, |
| "learning_rate": 1.9988456267219695e-07, |
| "loss": 0.0042, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.0324346953065088, |
| "grad_norm": 0.6137563586235046, |
| "learning_rate": 1.998844227327298e-07, |
| "loss": 0.0025, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.0324532188332401, |
| "grad_norm": 0.7591598033905029, |
| "learning_rate": 1.9988428270854193e-07, |
| "loss": 0.0027, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.0324717423599714, |
| "grad_norm": 0.8489348888397217, |
| "learning_rate": 1.9988414259963347e-07, |
| "loss": 0.0029, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.0324902658867027, |
| "grad_norm": 1.7605209350585938, |
| "learning_rate": 1.998840024060045e-07, |
| "loss": 0.0033, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.032508789413434006, |
| "grad_norm": 0.45369818806648254, |
| "learning_rate": 1.9988386212765516e-07, |
| "loss": 0.0039, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.0325273129401653, |
| "grad_norm": 0.9554593563079834, |
| "learning_rate": 1.9988372176458555e-07, |
| "loss": 0.0034, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.032545836466896606, |
| "grad_norm": 1.0483547449111938, |
| "learning_rate": 1.9988358131679578e-07, |
| "loss": 0.0028, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.03256435999362791, |
| "grad_norm": 1.1310410499572754, |
| "learning_rate": 1.9988344078428602e-07, |
| "loss": 0.003, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.03258288352035921, |
| "grad_norm": 1.6612110137939453, |
| "learning_rate": 1.9988330016705636e-07, |
| "loss": 0.0044, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.03260140704709051, |
| "grad_norm": 1.247881531715393, |
| "learning_rate": 1.998831594651069e-07, |
| "loss": 0.0054, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.03261993057382181, |
| "grad_norm": 1.163558006286621, |
| "learning_rate": 1.9988301867843777e-07, |
| "loss": 0.0033, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.03263845410055311, |
| "grad_norm": 2.3126580715179443, |
| "learning_rate": 1.9988287780704912e-07, |
| "loss": 0.0036, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.032656977627284416, |
| "grad_norm": 1.012695550918579, |
| "learning_rate": 1.9988273685094104e-07, |
| "loss": 0.0031, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.03267550115401571, |
| "grad_norm": 0.30023452639579773, |
| "learning_rate": 1.9988259581011362e-07, |
| "loss": 0.0042, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.032694024680747016, |
| "grad_norm": 1.0222716331481934, |
| "learning_rate": 1.9988245468456705e-07, |
| "loss": 0.0042, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.03271254820747832, |
| "grad_norm": 1.635694146156311, |
| "learning_rate": 1.9988231347430143e-07, |
| "loss": 0.0031, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.03273107173420962, |
| "grad_norm": 2.207439661026001, |
| "learning_rate": 1.9988217217931685e-07, |
| "loss": 0.0035, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.03274959526094092, |
| "grad_norm": 1.2231603860855103, |
| "learning_rate": 1.9988203079961344e-07, |
| "loss": 0.0035, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.032768118787672225, |
| "grad_norm": 0.8158063888549805, |
| "learning_rate": 1.9988188933519133e-07, |
| "loss": 0.0035, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.03278664231440352, |
| "grad_norm": 0.5225628614425659, |
| "learning_rate": 1.9988174778605062e-07, |
| "loss": 0.0031, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.032805165841134826, |
| "grad_norm": 0.8148209452629089, |
| "learning_rate": 1.9988160615219148e-07, |
| "loss": 0.0032, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.03282368936786613, |
| "grad_norm": 8.615594863891602, |
| "learning_rate": 1.9988146443361396e-07, |
| "loss": 0.0035, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.032842212894597426, |
| "grad_norm": 0.5120860934257507, |
| "learning_rate": 1.998813226303182e-07, |
| "loss": 0.0035, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.03286073642132873, |
| "grad_norm": 0.4185231924057007, |
| "learning_rate": 1.9988118074230437e-07, |
| "loss": 0.0031, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.032879259948060034, |
| "grad_norm": 0.8797296285629272, |
| "learning_rate": 1.9988103876957257e-07, |
| "loss": 0.0029, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.03289778347479133, |
| "grad_norm": 0.7382543087005615, |
| "learning_rate": 1.9988089671212287e-07, |
| "loss": 0.0047, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.032916307001522635, |
| "grad_norm": 1.5534471273422241, |
| "learning_rate": 1.9988075456995547e-07, |
| "loss": 0.0059, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.03293483052825393, |
| "grad_norm": 1.6365872621536255, |
| "learning_rate": 1.9988061234307038e-07, |
| "loss": 0.0036, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.032953354054985236, |
| "grad_norm": 1.5077663660049438, |
| "learning_rate": 1.9988047003146783e-07, |
| "loss": 0.0041, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.03297187758171654, |
| "grad_norm": 0.6841549277305603, |
| "learning_rate": 1.998803276351479e-07, |
| "loss": 0.0025, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.03299040110844784, |
| "grad_norm": 1.711006760597229, |
| "learning_rate": 1.998801851541107e-07, |
| "loss": 0.0027, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.03300892463517914, |
| "grad_norm": 0.6896673440933228, |
| "learning_rate": 1.9988004258835635e-07, |
| "loss": 0.0036, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.033027448161910444, |
| "grad_norm": 1.8127459287643433, |
| "learning_rate": 1.99879899937885e-07, |
| "loss": 0.005, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.03304597168864174, |
| "grad_norm": 1.246256709098816, |
| "learning_rate": 1.9987975720269676e-07, |
| "loss": 0.0036, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.033064495215373045, |
| "grad_norm": 1.5150445699691772, |
| "learning_rate": 1.9987961438279173e-07, |
| "loss": 0.0035, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.03308301874210435, |
| "grad_norm": 1.601601004600525, |
| "learning_rate": 1.9987947147817006e-07, |
| "loss": 0.0034, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.033101542268835646, |
| "grad_norm": 0.5102198719978333, |
| "learning_rate": 1.9987932848883183e-07, |
| "loss": 0.0038, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.03312006579556695, |
| "grad_norm": 7.574174404144287, |
| "learning_rate": 1.998791854147772e-07, |
| "loss": 0.0032, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.033138589322298254, |
| "grad_norm": 1.457836627960205, |
| "learning_rate": 1.9987904225600626e-07, |
| "loss": 0.0032, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.03315711284902955, |
| "grad_norm": 0.8960339426994324, |
| "learning_rate": 1.9987889901251916e-07, |
| "loss": 0.0032, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.033175636375760854, |
| "grad_norm": 2.2523484230041504, |
| "learning_rate": 1.9987875568431604e-07, |
| "loss": 0.0034, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.03319415990249216, |
| "grad_norm": 2.3058037757873535, |
| "learning_rate": 1.9987861227139696e-07, |
| "loss": 0.0027, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.033212683429223455, |
| "grad_norm": 1.8199127912521362, |
| "learning_rate": 1.9987846877376207e-07, |
| "loss": 0.0031, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.03323120695595476, |
| "grad_norm": 0.7936412692070007, |
| "learning_rate": 1.9987832519141153e-07, |
| "loss": 0.0032, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.03324973048268606, |
| "grad_norm": 0.4965610206127167, |
| "learning_rate": 1.998781815243454e-07, |
| "loss": 0.0034, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.03326825400941736, |
| "grad_norm": 1.306909441947937, |
| "learning_rate": 1.9987803777256384e-07, |
| "loss": 0.0038, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.033286777536148664, |
| "grad_norm": 2.0445873737335205, |
| "learning_rate": 1.9987789393606693e-07, |
| "loss": 0.0036, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.03330530106287996, |
| "grad_norm": 1.9258192777633667, |
| "learning_rate": 1.9987775001485487e-07, |
| "loss": 0.0031, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.033323824589611264, |
| "grad_norm": 1.2828470468521118, |
| "learning_rate": 1.998776060089277e-07, |
| "loss": 0.0044, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.03334234811634257, |
| "grad_norm": 0.8697891235351562, |
| "learning_rate": 1.998774619182856e-07, |
| "loss": 0.0042, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.033360871643073865, |
| "grad_norm": 1.3002070188522339, |
| "learning_rate": 1.9987731774292868e-07, |
| "loss": 0.0053, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.03337939516980517, |
| "grad_norm": 0.7896958589553833, |
| "learning_rate": 1.9987717348285704e-07, |
| "loss": 0.0043, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.03339791869653647, |
| "grad_norm": 3.89027738571167, |
| "learning_rate": 1.998770291380708e-07, |
| "loss": 0.0032, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.03341644222326777, |
| "grad_norm": 2.3262972831726074, |
| "learning_rate": 1.9987688470857013e-07, |
| "loss": 0.0028, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.033434965749999074, |
| "grad_norm": 2.0204803943634033, |
| "learning_rate": 1.9987674019435506e-07, |
| "loss": 0.0035, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.03345348927673038, |
| "grad_norm": 0.7347742319107056, |
| "learning_rate": 1.9987659559542586e-07, |
| "loss": 0.0035, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.033472012803461675, |
| "grad_norm": 6.925575256347656, |
| "learning_rate": 1.9987645091178248e-07, |
| "loss": 0.0035, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.03349053633019298, |
| "grad_norm": 2.0295567512512207, |
| "learning_rate": 1.9987630614342516e-07, |
| "loss": 0.0047, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.03350905985692428, |
| "grad_norm": 2.0094292163848877, |
| "learning_rate": 1.9987616129035398e-07, |
| "loss": 0.0031, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.03352758338365558, |
| "grad_norm": 1.5079076290130615, |
| "learning_rate": 1.998760163525691e-07, |
| "loss": 0.0027, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.03354610691038688, |
| "grad_norm": 1.0791319608688354, |
| "learning_rate": 1.998758713300706e-07, |
| "loss": 0.0036, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.03356463043711819, |
| "grad_norm": 1.1840084791183472, |
| "learning_rate": 1.9987572622285862e-07, |
| "loss": 0.0054, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.033583153963849484, |
| "grad_norm": 0.9554263949394226, |
| "learning_rate": 1.998755810309333e-07, |
| "loss": 0.0036, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.03360167749058079, |
| "grad_norm": 0.34252992272377014, |
| "learning_rate": 1.998754357542947e-07, |
| "loss": 0.0029, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.033620201017312085, |
| "grad_norm": 1.3148545026779175, |
| "learning_rate": 1.9987529039294303e-07, |
| "loss": 0.0035, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.03363872454404339, |
| "grad_norm": 0.5127333402633667, |
| "learning_rate": 1.9987514494687839e-07, |
| "loss": 0.0029, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.03365724807077469, |
| "grad_norm": 1.376829981803894, |
| "learning_rate": 1.998749994161008e-07, |
| "loss": 0.0041, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.03367577159750599, |
| "grad_norm": 0.8420721292495728, |
| "learning_rate": 1.9987485380061054e-07, |
| "loss": 0.003, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.03369429512423729, |
| "grad_norm": 1.6433014869689941, |
| "learning_rate": 1.9987470810040766e-07, |
| "loss": 0.0038, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.0337128186509686, |
| "grad_norm": 0.942827582359314, |
| "learning_rate": 1.9987456231549228e-07, |
| "loss": 0.0035, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.033731342177699894, |
| "grad_norm": 2.944533348083496, |
| "learning_rate": 1.9987441644586452e-07, |
| "loss": 0.0043, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.0337498657044312, |
| "grad_norm": 0.4099912941455841, |
| "learning_rate": 1.998742704915245e-07, |
| "loss": 0.0025, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.0337683892311625, |
| "grad_norm": 6.218419551849365, |
| "learning_rate": 1.9987412445247238e-07, |
| "loss": 0.0037, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.0337869127578938, |
| "grad_norm": 0.5342549085617065, |
| "learning_rate": 1.9987397832870824e-07, |
| "loss": 0.0036, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.0338054362846251, |
| "grad_norm": 0.25968873500823975, |
| "learning_rate": 1.9987383212023223e-07, |
| "loss": 0.0033, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.033823959811356406, |
| "grad_norm": 0.6779420971870422, |
| "learning_rate": 1.9987368582704448e-07, |
| "loss": 0.0042, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.0338424833380877, |
| "grad_norm": 2.5992417335510254, |
| "learning_rate": 1.998735394491451e-07, |
| "loss": 0.0043, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.03386100686481901, |
| "grad_norm": 0.5151141881942749, |
| "learning_rate": 1.9987339298653422e-07, |
| "loss": 0.003, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.03387953039155031, |
| "grad_norm": 1.009832739830017, |
| "learning_rate": 1.9987324643921194e-07, |
| "loss": 0.0033, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.03389805391828161, |
| "grad_norm": 0.5050942301750183, |
| "learning_rate": 1.9987309980717843e-07, |
| "loss": 0.0044, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.03391657744501291, |
| "grad_norm": 4.007758140563965, |
| "learning_rate": 1.9987295309043378e-07, |
| "loss": 0.0056, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.033935100971744216, |
| "grad_norm": 0.8257130980491638, |
| "learning_rate": 1.9987280628897812e-07, |
| "loss": 0.0032, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.03395362449847551, |
| "grad_norm": 0.23258741199970245, |
| "learning_rate": 1.9987265940281159e-07, |
| "loss": 0.0026, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.033972148025206816, |
| "grad_norm": 1.6375796794891357, |
| "learning_rate": 1.998725124319343e-07, |
| "loss": 0.003, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.03399067155193811, |
| "grad_norm": 0.43538978695869446, |
| "learning_rate": 1.9987236537634638e-07, |
| "loss": 0.0044, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.03400919507866942, |
| "grad_norm": 0.7185086011886597, |
| "learning_rate": 1.9987221823604794e-07, |
| "loss": 0.004, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.03402771860540072, |
| "grad_norm": 1.4456875324249268, |
| "learning_rate": 1.9987207101103914e-07, |
| "loss": 0.003, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.03404624213213202, |
| "grad_norm": 1.9470597505569458, |
| "learning_rate": 1.9987192370132006e-07, |
| "loss": 0.0036, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.03406476565886332, |
| "grad_norm": 2.124014377593994, |
| "learning_rate": 1.9987177630689085e-07, |
| "loss": 0.0046, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.034083289185594626, |
| "grad_norm": 0.6246276497840881, |
| "learning_rate": 1.9987162882775165e-07, |
| "loss": 0.0032, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.03410181271232592, |
| "grad_norm": 0.5049999356269836, |
| "learning_rate": 1.9987148126390254e-07, |
| "loss": 0.0025, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.034120336239057227, |
| "grad_norm": 1.9510364532470703, |
| "learning_rate": 1.998713336153437e-07, |
| "loss": 0.003, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.03413885976578853, |
| "grad_norm": 1.8055649995803833, |
| "learning_rate": 1.9987118588207522e-07, |
| "loss": 0.0036, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.03415738329251983, |
| "grad_norm": 0.9042274355888367, |
| "learning_rate": 1.9987103806409722e-07, |
| "loss": 0.0035, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.03417590681925113, |
| "grad_norm": 0.6133391261100769, |
| "learning_rate": 1.9987089016140986e-07, |
| "loss": 0.0034, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.034194430345982435, |
| "grad_norm": 1.5863555669784546, |
| "learning_rate": 1.998707421740132e-07, |
| "loss": 0.0042, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.03421295387271373, |
| "grad_norm": 2.24369215965271, |
| "learning_rate": 1.9987059410190747e-07, |
| "loss": 0.0044, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.034231477399445036, |
| "grad_norm": 0.4150441288948059, |
| "learning_rate": 1.998704459450927e-07, |
| "loss": 0.0028, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.03425000092617634, |
| "grad_norm": 0.7335507273674011, |
| "learning_rate": 1.9987029770356907e-07, |
| "loss": 0.0036, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.03426852445290764, |
| "grad_norm": 0.8964026570320129, |
| "learning_rate": 1.9987014937733665e-07, |
| "loss": 0.003, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.03428704797963894, |
| "grad_norm": 0.7239894866943359, |
| "learning_rate": 1.9987000096639567e-07, |
| "loss": 0.0024, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.03430557150637024, |
| "grad_norm": 2.498103380203247, |
| "learning_rate": 1.998698524707461e-07, |
| "loss": 0.0057, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.03432409503310154, |
| "grad_norm": 0.496054470539093, |
| "learning_rate": 1.998697038903882e-07, |
| "loss": 0.0025, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.034342618559832845, |
| "grad_norm": 1.0351760387420654, |
| "learning_rate": 1.9986955522532204e-07, |
| "loss": 0.0035, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.03436114208656414, |
| "grad_norm": 0.667980432510376, |
| "learning_rate": 1.998694064755478e-07, |
| "loss": 0.0043, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.034379665613295446, |
| "grad_norm": 2.156524658203125, |
| "learning_rate": 1.9986925764106554e-07, |
| "loss": 0.0026, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.03439818914002675, |
| "grad_norm": 1.4279463291168213, |
| "learning_rate": 1.9986910872187538e-07, |
| "loss": 0.0045, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.03441671266675805, |
| "grad_norm": 1.781225562095642, |
| "learning_rate": 1.998689597179775e-07, |
| "loss": 0.0037, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.03443523619348935, |
| "grad_norm": 0.4843122065067291, |
| "learning_rate": 1.99868810629372e-07, |
| "loss": 0.0036, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.034453759720220654, |
| "grad_norm": 0.5495992302894592, |
| "learning_rate": 1.99868661456059e-07, |
| "loss": 0.0029, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.03447228324695195, |
| "grad_norm": 2.188624620437622, |
| "learning_rate": 1.998685121980386e-07, |
| "loss": 0.0037, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.034490806773683255, |
| "grad_norm": 0.9626719355583191, |
| "learning_rate": 1.9986836285531102e-07, |
| "loss": 0.0033, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.03450933030041456, |
| "grad_norm": 3.2979846000671387, |
| "learning_rate": 1.9986821342787632e-07, |
| "loss": 0.0039, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.034527853827145856, |
| "grad_norm": 0.15777960419654846, |
| "learning_rate": 1.9986806391573462e-07, |
| "loss": 0.0034, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.03454637735387716, |
| "grad_norm": 1.4066557884216309, |
| "learning_rate": 1.9986791431888602e-07, |
| "loss": 0.0051, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.034564900880608464, |
| "grad_norm": 0.4462161362171173, |
| "learning_rate": 1.9986776463733074e-07, |
| "loss": 0.0041, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.03458342440733976, |
| "grad_norm": 0.4397236406803131, |
| "learning_rate": 1.9986761487106886e-07, |
| "loss": 0.0033, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.034601947934071065, |
| "grad_norm": 2.176435708999634, |
| "learning_rate": 1.9986746502010048e-07, |
| "loss": 0.005, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.03462047146080237, |
| "grad_norm": 1.5458993911743164, |
| "learning_rate": 1.9986731508442576e-07, |
| "loss": 0.003, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.034638994987533665, |
| "grad_norm": 0.5323123931884766, |
| "learning_rate": 1.998671650640448e-07, |
| "loss": 0.0038, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.03465751851426497, |
| "grad_norm": 0.6469013690948486, |
| "learning_rate": 1.9986701495895776e-07, |
| "loss": 0.0026, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.034676042040996266, |
| "grad_norm": 1.8083308935165405, |
| "learning_rate": 1.9986686476916477e-07, |
| "loss": 0.0035, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.03469456556772757, |
| "grad_norm": 0.5271221995353699, |
| "learning_rate": 1.998667144946659e-07, |
| "loss": 0.0042, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.034713089094458874, |
| "grad_norm": 1.1640464067459106, |
| "learning_rate": 1.9986656413546133e-07, |
| "loss": 0.0032, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.03473161262119017, |
| "grad_norm": 1.0021498203277588, |
| "learning_rate": 1.9986641369155117e-07, |
| "loss": 0.0037, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.034750136147921475, |
| "grad_norm": 1.3866386413574219, |
| "learning_rate": 1.9986626316293555e-07, |
| "loss": 0.004, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.03476865967465278, |
| "grad_norm": 0.5864830017089844, |
| "learning_rate": 1.9986611254961462e-07, |
| "loss": 0.0035, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.034787183201384075, |
| "grad_norm": 0.6676185131072998, |
| "learning_rate": 1.9986596185158846e-07, |
| "loss": 0.0038, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.03480570672811538, |
| "grad_norm": 0.9182947874069214, |
| "learning_rate": 1.9986581106885721e-07, |
| "loss": 0.0034, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.03482423025484668, |
| "grad_norm": 1.0439637899398804, |
| "learning_rate": 1.9986566020142106e-07, |
| "loss": 0.0033, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.03484275378157798, |
| "grad_norm": 0.28350016474723816, |
| "learning_rate": 1.9986550924928007e-07, |
| "loss": 0.0034, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.034861277308309284, |
| "grad_norm": 1.1529831886291504, |
| "learning_rate": 1.9986535821243438e-07, |
| "loss": 0.0043, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.03487980083504059, |
| "grad_norm": 5.076997756958008, |
| "learning_rate": 1.9986520709088413e-07, |
| "loss": 0.004, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.034898324361771885, |
| "grad_norm": 1.0792638063430786, |
| "learning_rate": 1.9986505588462944e-07, |
| "loss": 0.0031, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.03491684788850319, |
| "grad_norm": 1.719867467880249, |
| "learning_rate": 1.9986490459367046e-07, |
| "loss": 0.003, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.03493537141523449, |
| "grad_norm": 0.3182157278060913, |
| "learning_rate": 1.9986475321800728e-07, |
| "loss": 0.002, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.03495389494196579, |
| "grad_norm": 0.6554461717605591, |
| "learning_rate": 1.9986460175764006e-07, |
| "loss": 0.003, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.03497241846869709, |
| "grad_norm": 2.095546007156372, |
| "learning_rate": 1.9986445021256891e-07, |
| "loss": 0.0035, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.0349909419954284, |
| "grad_norm": 0.8449950218200684, |
| "learning_rate": 1.99864298582794e-07, |
| "loss": 0.0034, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.035009465522159694, |
| "grad_norm": 0.5359604954719543, |
| "learning_rate": 1.9986414686831536e-07, |
| "loss": 0.0028, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.035027989048891, |
| "grad_norm": 0.9908369779586792, |
| "learning_rate": 1.9986399506913324e-07, |
| "loss": 0.003, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.035046512575622295, |
| "grad_norm": 1.4414681196212769, |
| "learning_rate": 1.998638431852477e-07, |
| "loss": 0.0034, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.0350650361023536, |
| "grad_norm": 0.6343901753425598, |
| "learning_rate": 1.9986369121665886e-07, |
| "loss": 0.0042, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.0350835596290849, |
| "grad_norm": 0.9236226677894592, |
| "learning_rate": 1.998635391633669e-07, |
| "loss": 0.003, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.0351020831558162, |
| "grad_norm": 0.8473572731018066, |
| "learning_rate": 1.9986338702537191e-07, |
| "loss": 0.0048, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.0351206066825475, |
| "grad_norm": 2.0656371116638184, |
| "learning_rate": 1.99863234802674e-07, |
| "loss": 0.0057, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.03513913020927881, |
| "grad_norm": 0.8192446827888489, |
| "learning_rate": 1.9986308249527335e-07, |
| "loss": 0.0037, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.035157653736010104, |
| "grad_norm": 0.6716576814651489, |
| "learning_rate": 1.9986293010317005e-07, |
| "loss": 0.0042, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.03517617726274141, |
| "grad_norm": 1.3140870332717896, |
| "learning_rate": 1.998627776263643e-07, |
| "loss": 0.0026, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.03519470078947271, |
| "grad_norm": 0.7249475717544556, |
| "learning_rate": 1.998626250648561e-07, |
| "loss": 0.003, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.03521322431620401, |
| "grad_norm": 1.5127142667770386, |
| "learning_rate": 1.998624724186457e-07, |
| "loss": 0.0031, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.03523174784293531, |
| "grad_norm": 1.2868050336837769, |
| "learning_rate": 1.998623196877332e-07, |
| "loss": 0.003, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.035250271369666616, |
| "grad_norm": 2.026670455932617, |
| "learning_rate": 1.998621668721187e-07, |
| "loss": 0.0037, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.03526879489639791, |
| "grad_norm": 1.6896562576293945, |
| "learning_rate": 1.9986201397180232e-07, |
| "loss": 0.0036, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.03528731842312922, |
| "grad_norm": 0.7348533272743225, |
| "learning_rate": 1.998618609867842e-07, |
| "loss": 0.0035, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.03530584194986052, |
| "grad_norm": 1.084052324295044, |
| "learning_rate": 1.998617079170645e-07, |
| "loss": 0.0039, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.03532436547659182, |
| "grad_norm": 0.7120713591575623, |
| "learning_rate": 1.9986155476264334e-07, |
| "loss": 0.0046, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.03534288900332312, |
| "grad_norm": 0.647713303565979, |
| "learning_rate": 1.9986140152352085e-07, |
| "loss": 0.0037, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.03536141253005442, |
| "grad_norm": 1.1439307928085327, |
| "learning_rate": 1.9986124819969714e-07, |
| "loss": 0.0034, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.03537993605678572, |
| "grad_norm": 1.7926459312438965, |
| "learning_rate": 1.9986109479117236e-07, |
| "loss": 0.0034, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.03539845958351703, |
| "grad_norm": 0.5525590181350708, |
| "learning_rate": 1.998609412979466e-07, |
| "loss": 0.003, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.03541698311024832, |
| "grad_norm": 1.4765915870666504, |
| "learning_rate": 1.9986078772002005e-07, |
| "loss": 0.0031, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.03543550663697963, |
| "grad_norm": 1.0233795642852783, |
| "learning_rate": 1.9986063405739285e-07, |
| "loss": 0.0032, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.03545403016371093, |
| "grad_norm": 1.4423023462295532, |
| "learning_rate": 1.9986048031006505e-07, |
| "loss": 0.0042, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.03547255369044223, |
| "grad_norm": 1.3508613109588623, |
| "learning_rate": 1.9986032647803684e-07, |
| "loss": 0.0022, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.03549107721717353, |
| "grad_norm": 0.26619842648506165, |
| "learning_rate": 1.998601725613083e-07, |
| "loss": 0.0031, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.035509600743904836, |
| "grad_norm": 0.9467312097549438, |
| "learning_rate": 1.9986001855987965e-07, |
| "loss": 0.0047, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.03552812427063613, |
| "grad_norm": 7.417558670043945, |
| "learning_rate": 1.9985986447375093e-07, |
| "loss": 0.004, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.03554664779736744, |
| "grad_norm": 0.8356530666351318, |
| "learning_rate": 1.998597103029223e-07, |
| "loss": 0.0027, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.03556517132409874, |
| "grad_norm": 0.7894399166107178, |
| "learning_rate": 1.998595560473939e-07, |
| "loss": 0.0042, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.03558369485083004, |
| "grad_norm": 1.066159963607788, |
| "learning_rate": 1.9985940170716585e-07, |
| "loss": 0.0032, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.03560221837756134, |
| "grad_norm": 1.0459017753601074, |
| "learning_rate": 1.9985924728223833e-07, |
| "loss": 0.0031, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.035620741904292645, |
| "grad_norm": 2.8311445713043213, |
| "learning_rate": 1.9985909277261137e-07, |
| "loss": 0.0022, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.03563926543102394, |
| "grad_norm": 1.1559298038482666, |
| "learning_rate": 1.9985893817828522e-07, |
| "loss": 0.0035, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.035657788957755246, |
| "grad_norm": 0.6410951614379883, |
| "learning_rate": 1.998587834992599e-07, |
| "loss": 0.0035, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.03567631248448655, |
| "grad_norm": 0.9691218137741089, |
| "learning_rate": 1.9985862873553564e-07, |
| "loss": 0.003, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.03569483601121785, |
| "grad_norm": 1.0513867139816284, |
| "learning_rate": 1.9985847388711247e-07, |
| "loss": 0.0034, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.03571335953794915, |
| "grad_norm": 0.45165732502937317, |
| "learning_rate": 1.9985831895399063e-07, |
| "loss": 0.0019, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.03573188306468045, |
| "grad_norm": 2.4560883045196533, |
| "learning_rate": 1.9985816393617017e-07, |
| "loss": 0.0031, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.03575040659141175, |
| "grad_norm": 0.6173827648162842, |
| "learning_rate": 1.9985800883365125e-07, |
| "loss": 0.0029, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.035768930118143055, |
| "grad_norm": 0.4740954339504242, |
| "learning_rate": 1.99857853646434e-07, |
| "loss": 0.0033, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.03578745364487435, |
| "grad_norm": 1.4231611490249634, |
| "learning_rate": 1.9985769837451856e-07, |
| "loss": 0.0037, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.035805977171605656, |
| "grad_norm": 0.5511701703071594, |
| "learning_rate": 1.9985754301790503e-07, |
| "loss": 0.0033, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.03582450069833696, |
| "grad_norm": 0.2996627986431122, |
| "learning_rate": 1.998573875765936e-07, |
| "loss": 0.0043, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.03584302422506826, |
| "grad_norm": 0.6647844910621643, |
| "learning_rate": 1.9985723205058434e-07, |
| "loss": 0.0028, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.03586154775179956, |
| "grad_norm": 1.228018879890442, |
| "learning_rate": 1.9985707643987742e-07, |
| "loss": 0.0034, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.035880071278530865, |
| "grad_norm": 0.654123067855835, |
| "learning_rate": 1.9985692074447297e-07, |
| "loss": 0.0026, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.03589859480526216, |
| "grad_norm": 1.6002602577209473, |
| "learning_rate": 1.9985676496437108e-07, |
| "loss": 0.0036, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.035917118331993465, |
| "grad_norm": 0.6049405336380005, |
| "learning_rate": 1.9985660909957195e-07, |
| "loss": 0.0029, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.03593564185872477, |
| "grad_norm": 2.578028917312622, |
| "learning_rate": 1.9985645315007565e-07, |
| "loss": 0.0036, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.035954165385456066, |
| "grad_norm": 1.1659135818481445, |
| "learning_rate": 1.9985629711588234e-07, |
| "loss": 0.0042, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.03597268891218737, |
| "grad_norm": 1.3945130109786987, |
| "learning_rate": 1.9985614099699218e-07, |
| "loss": 0.0038, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.035991212438918674, |
| "grad_norm": 2.4460220336914062, |
| "learning_rate": 1.9985598479340523e-07, |
| "loss": 0.0035, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.03600973596564997, |
| "grad_norm": 1.243489146232605, |
| "learning_rate": 1.9985582850512172e-07, |
| "loss": 0.0039, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.036028259492381275, |
| "grad_norm": 0.9915016293525696, |
| "learning_rate": 1.998556721321417e-07, |
| "loss": 0.0027, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.03604678301911257, |
| "grad_norm": 0.3849189579486847, |
| "learning_rate": 1.9985551567446534e-07, |
| "loss": 0.0024, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.036065306545843875, |
| "grad_norm": 1.2613993883132935, |
| "learning_rate": 1.9985535913209274e-07, |
| "loss": 0.0031, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.03608383007257518, |
| "grad_norm": 1.0675455331802368, |
| "learning_rate": 1.9985520250502408e-07, |
| "loss": 0.0054, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.036102353599306476, |
| "grad_norm": 0.9333555102348328, |
| "learning_rate": 1.9985504579325947e-07, |
| "loss": 0.0029, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.03612087712603778, |
| "grad_norm": 0.6854788661003113, |
| "learning_rate": 1.9985488899679904e-07, |
| "loss": 0.0053, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.036139400652769084, |
| "grad_norm": 2.302269697189331, |
| "learning_rate": 1.998547321156429e-07, |
| "loss": 0.0053, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.03615792417950038, |
| "grad_norm": 0.798496663570404, |
| "learning_rate": 1.9985457514979127e-07, |
| "loss": 0.0052, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.036176447706231685, |
| "grad_norm": 1.045938491821289, |
| "learning_rate": 1.9985441809924417e-07, |
| "loss": 0.0063, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.03619497123296299, |
| "grad_norm": 0.45389243960380554, |
| "learning_rate": 1.998542609640018e-07, |
| "loss": 0.0039, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.036213494759694285, |
| "grad_norm": 1.0441776514053345, |
| "learning_rate": 1.9985410374406427e-07, |
| "loss": 0.0051, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.03623201828642559, |
| "grad_norm": 0.310332328081131, |
| "learning_rate": 1.9985394643943177e-07, |
| "loss": 0.0054, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.03625054181315689, |
| "grad_norm": 0.42228832840919495, |
| "learning_rate": 1.9985378905010431e-07, |
| "loss": 0.0045, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.03626906533988819, |
| "grad_norm": 1.0036780834197998, |
| "learning_rate": 1.9985363157608214e-07, |
| "loss": 0.0038, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.036287588866619494, |
| "grad_norm": 0.7045961022377014, |
| "learning_rate": 1.9985347401736538e-07, |
| "loss": 0.0041, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.0363061123933508, |
| "grad_norm": 0.5960044264793396, |
| "learning_rate": 1.998533163739541e-07, |
| "loss": 0.0044, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.036324635920082095, |
| "grad_norm": 1.1904021501541138, |
| "learning_rate": 1.9985315864584846e-07, |
| "loss": 0.0045, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.0363431594468134, |
| "grad_norm": 0.6961872577667236, |
| "learning_rate": 1.9985300083304863e-07, |
| "loss": 0.0049, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.0363616829735447, |
| "grad_norm": 2.580206871032715, |
| "learning_rate": 1.998528429355547e-07, |
| "loss": 0.0055, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.036380206500276, |
| "grad_norm": 1.3117705583572388, |
| "learning_rate": 1.9985268495336684e-07, |
| "loss": 0.0034, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.0363987300270073, |
| "grad_norm": 0.8053256273269653, |
| "learning_rate": 1.9985252688648516e-07, |
| "loss": 0.0035, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.0364172535537386, |
| "grad_norm": 3.830737829208374, |
| "learning_rate": 1.998523687349098e-07, |
| "loss": 0.0049, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.036435777080469904, |
| "grad_norm": 0.5795256495475769, |
| "learning_rate": 1.9985221049864086e-07, |
| "loss": 0.0054, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.03645430060720121, |
| "grad_norm": 0.11074592173099518, |
| "learning_rate": 1.9985205217767857e-07, |
| "loss": 0.0038, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.036472824133932505, |
| "grad_norm": 0.5531294941902161, |
| "learning_rate": 1.9985189377202296e-07, |
| "loss": 0.0041, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.03649134766066381, |
| "grad_norm": 1.5527266263961792, |
| "learning_rate": 1.9985173528167422e-07, |
| "loss": 0.0046, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.03650987118739511, |
| "grad_norm": 0.826956033706665, |
| "learning_rate": 1.9985157670663245e-07, |
| "loss": 0.0058, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.03652839471412641, |
| "grad_norm": 3.1858956813812256, |
| "learning_rate": 1.9985141804689782e-07, |
| "loss": 0.0042, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.03654691824085771, |
| "grad_norm": 0.6962982416152954, |
| "learning_rate": 1.9985125930247046e-07, |
| "loss": 0.0049, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.03656544176758902, |
| "grad_norm": 0.7228627800941467, |
| "learning_rate": 1.9985110047335047e-07, |
| "loss": 0.005, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.036583965294320314, |
| "grad_norm": 1.1162699460983276, |
| "learning_rate": 1.9985094155953806e-07, |
| "loss": 0.0041, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.03660248882105162, |
| "grad_norm": 1.3536961078643799, |
| "learning_rate": 1.9985078256103324e-07, |
| "loss": 0.0041, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.03662101234778292, |
| "grad_norm": 0.4968113601207733, |
| "learning_rate": 1.998506234778363e-07, |
| "loss": 0.0034, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.03663953587451422, |
| "grad_norm": 1.8808673620224, |
| "learning_rate": 1.9985046430994722e-07, |
| "loss": 0.0053, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.03665805940124552, |
| "grad_norm": 1.342679500579834, |
| "learning_rate": 1.9985030505736623e-07, |
| "loss": 0.0038, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.03667658292797683, |
| "grad_norm": 0.6389815211296082, |
| "learning_rate": 1.998501457200935e-07, |
| "loss": 0.0041, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.036695106454708123, |
| "grad_norm": 0.4262131452560425, |
| "learning_rate": 1.9984998629812906e-07, |
| "loss": 0.004, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.03671362998143943, |
| "grad_norm": 1.0432332754135132, |
| "learning_rate": 1.9984982679147308e-07, |
| "loss": 0.0046, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.036732153508170724, |
| "grad_norm": 1.1393214464187622, |
| "learning_rate": 1.9984966720012574e-07, |
| "loss": 0.0046, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.03675067703490203, |
| "grad_norm": 0.9665826559066772, |
| "learning_rate": 1.9984950752408715e-07, |
| "loss": 0.0043, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.03676920056163333, |
| "grad_norm": 0.5058696269989014, |
| "learning_rate": 1.998493477633574e-07, |
| "loss": 0.0043, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.03678772408836463, |
| "grad_norm": 1.3922209739685059, |
| "learning_rate": 1.998491879179367e-07, |
| "loss": 0.0044, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.03680624761509593, |
| "grad_norm": 5.119363307952881, |
| "learning_rate": 1.9984902798782515e-07, |
| "loss": 0.0043, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.03682477114182724, |
| "grad_norm": 1.9968947172164917, |
| "learning_rate": 1.9984886797302288e-07, |
| "loss": 0.0054, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.036843294668558534, |
| "grad_norm": 1.4728156328201294, |
| "learning_rate": 1.9984870787353002e-07, |
| "loss": 0.0044, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.03686181819528984, |
| "grad_norm": 1.068397045135498, |
| "learning_rate": 1.9984854768934673e-07, |
| "loss": 0.0042, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.03688034172202114, |
| "grad_norm": 3.0315334796905518, |
| "learning_rate": 1.9984838742047314e-07, |
| "loss": 0.0079, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.03689886524875244, |
| "grad_norm": 2.092592716217041, |
| "learning_rate": 1.998482270669094e-07, |
| "loss": 0.0039, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.03691738877548374, |
| "grad_norm": 2.271408796310425, |
| "learning_rate": 1.9984806662865558e-07, |
| "loss": 0.0065, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.036935912302215046, |
| "grad_norm": 0.7889383435249329, |
| "learning_rate": 1.998479061057119e-07, |
| "loss": 0.0045, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.03695443582894634, |
| "grad_norm": 0.777569591999054, |
| "learning_rate": 1.9984774549807843e-07, |
| "loss": 0.0046, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.03697295935567765, |
| "grad_norm": 1.3818707466125488, |
| "learning_rate": 1.9984758480575534e-07, |
| "loss": 0.0041, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.03699148288240895, |
| "grad_norm": 2.1899654865264893, |
| "learning_rate": 1.998474240287428e-07, |
| "loss": 0.0036, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.03701000640914025, |
| "grad_norm": 0.54935222864151, |
| "learning_rate": 1.9984726316704088e-07, |
| "loss": 0.0027, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.03702852993587155, |
| "grad_norm": 1.1287750005722046, |
| "learning_rate": 1.9984710222064973e-07, |
| "loss": 0.0032, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.037047053462602855, |
| "grad_norm": 0.258894681930542, |
| "learning_rate": 1.9984694118956952e-07, |
| "loss": 0.0053, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.03706557698933415, |
| "grad_norm": 1.4985841512680054, |
| "learning_rate": 1.9984678007380036e-07, |
| "loss": 0.0045, |
| "step": 20010 |
| }, |
| { |
| "epoch": 0.037084100516065456, |
| "grad_norm": 1.0753264427185059, |
| "learning_rate": 1.998466188733424e-07, |
| "loss": 0.0033, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.03710262404279675, |
| "grad_norm": 1.5253010988235474, |
| "learning_rate": 1.9984645758819576e-07, |
| "loss": 0.0047, |
| "step": 20030 |
| }, |
| { |
| "epoch": 0.03712114756952806, |
| "grad_norm": 1.1419920921325684, |
| "learning_rate": 1.998462962183606e-07, |
| "loss": 0.0038, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.03713967109625936, |
| "grad_norm": 0.36432480812072754, |
| "learning_rate": 1.9984613476383704e-07, |
| "loss": 0.0031, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.03715819462299066, |
| "grad_norm": 0.9524299502372742, |
| "learning_rate": 1.998459732246252e-07, |
| "loss": 0.0042, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.03717671814972196, |
| "grad_norm": 1.0806434154510498, |
| "learning_rate": 1.998458116007253e-07, |
| "loss": 0.0051, |
| "step": 20070 |
| }, |
| { |
| "epoch": 0.037195241676453265, |
| "grad_norm": 2.4457690715789795, |
| "learning_rate": 1.9984564989213734e-07, |
| "loss": 0.006, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.03721376520318456, |
| "grad_norm": 1.1180081367492676, |
| "learning_rate": 1.9984548809886158e-07, |
| "loss": 0.0053, |
| "step": 20090 |
| }, |
| { |
| "epoch": 0.037232288729915866, |
| "grad_norm": 1.5082453489303589, |
| "learning_rate": 1.9984532622089808e-07, |
| "loss": 0.0056, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.03725081225664717, |
| "grad_norm": 0.8040734529495239, |
| "learning_rate": 1.9984516425824704e-07, |
| "loss": 0.0036, |
| "step": 20110 |
| }, |
| { |
| "epoch": 0.03726933578337847, |
| "grad_norm": 2.260471820831299, |
| "learning_rate": 1.9984500221090854e-07, |
| "loss": 0.0055, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.03728785931010977, |
| "grad_norm": 1.0465112924575806, |
| "learning_rate": 1.9984484007888275e-07, |
| "loss": 0.0046, |
| "step": 20130 |
| }, |
| { |
| "epoch": 0.037306382836841075, |
| "grad_norm": 0.5842317342758179, |
| "learning_rate": 1.9984467786216982e-07, |
| "loss": 0.0046, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.03732490636357237, |
| "grad_norm": 0.6854948401451111, |
| "learning_rate": 1.998445155607698e-07, |
| "loss": 0.004, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.037343429890303675, |
| "grad_norm": 0.937711238861084, |
| "learning_rate": 1.9984435317468295e-07, |
| "loss": 0.0039, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.03736195341703498, |
| "grad_norm": 4.139392852783203, |
| "learning_rate": 1.9984419070390937e-07, |
| "loss": 0.0053, |
| "step": 20170 |
| }, |
| { |
| "epoch": 0.037380476943766276, |
| "grad_norm": 4.063986301422119, |
| "learning_rate": 1.9984402814844914e-07, |
| "loss": 0.0051, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.03739900047049758, |
| "grad_norm": 4.531255722045898, |
| "learning_rate": 1.9984386550830245e-07, |
| "loss": 0.0047, |
| "step": 20190 |
| }, |
| { |
| "epoch": 0.03741752399722888, |
| "grad_norm": 0.7128534913063049, |
| "learning_rate": 1.9984370278346943e-07, |
| "loss": 0.0046, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.03743604752396018, |
| "grad_norm": 0.6727036833763123, |
| "learning_rate": 1.9984353997395021e-07, |
| "loss": 0.0043, |
| "step": 20210 |
| }, |
| { |
| "epoch": 0.037454571050691485, |
| "grad_norm": 2.167731523513794, |
| "learning_rate": 1.998433770797449e-07, |
| "loss": 0.0042, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.03747309457742278, |
| "grad_norm": 0.4157962203025818, |
| "learning_rate": 1.9984321410085373e-07, |
| "loss": 0.0047, |
| "step": 20230 |
| }, |
| { |
| "epoch": 0.037491618104154086, |
| "grad_norm": 0.8783450126647949, |
| "learning_rate": 1.9984305103727675e-07, |
| "loss": 0.0038, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.03751014163088539, |
| "grad_norm": 1.196747899055481, |
| "learning_rate": 1.9984288788901416e-07, |
| "loss": 0.0044, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.037528665157616686, |
| "grad_norm": 0.5749648213386536, |
| "learning_rate": 1.99842724656066e-07, |
| "loss": 0.0038, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.03754718868434799, |
| "grad_norm": 1.1771186590194702, |
| "learning_rate": 1.998425613384325e-07, |
| "loss": 0.0055, |
| "step": 20270 |
| }, |
| { |
| "epoch": 0.037565712211079294, |
| "grad_norm": 2.013296127319336, |
| "learning_rate": 1.9984239793611382e-07, |
| "loss": 0.0048, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.03758423573781059, |
| "grad_norm": 1.8180620670318604, |
| "learning_rate": 1.9984223444911e-07, |
| "loss": 0.0055, |
| "step": 20290 |
| }, |
| { |
| "epoch": 0.037602759264541895, |
| "grad_norm": 0.9603599309921265, |
| "learning_rate": 1.9984207087742125e-07, |
| "loss": 0.0046, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.0376212827912732, |
| "grad_norm": 2.043929100036621, |
| "learning_rate": 1.9984190722104768e-07, |
| "loss": 0.0051, |
| "step": 20310 |
| }, |
| { |
| "epoch": 0.037639806318004496, |
| "grad_norm": 0.5705754160881042, |
| "learning_rate": 1.9984174347998942e-07, |
| "loss": 0.0049, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.0376583298447358, |
| "grad_norm": 0.8956061601638794, |
| "learning_rate": 1.9984157965424664e-07, |
| "loss": 0.0036, |
| "step": 20330 |
| }, |
| { |
| "epoch": 0.0376768533714671, |
| "grad_norm": 2.103830337524414, |
| "learning_rate": 1.998414157438195e-07, |
| "loss": 0.0053, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.0376953768981984, |
| "grad_norm": 1.0262129306793213, |
| "learning_rate": 1.9984125174870802e-07, |
| "loss": 0.0034, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.037713900424929704, |
| "grad_norm": 1.408827543258667, |
| "learning_rate": 1.998410876689125e-07, |
| "loss": 0.0038, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.03773242395166101, |
| "grad_norm": 1.2948274612426758, |
| "learning_rate": 1.9984092350443298e-07, |
| "loss": 0.0064, |
| "step": 20370 |
| }, |
| { |
| "epoch": 0.037750947478392305, |
| "grad_norm": 1.192555546760559, |
| "learning_rate": 1.998407592552696e-07, |
| "loss": 0.0044, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.03776947100512361, |
| "grad_norm": 0.9793321490287781, |
| "learning_rate": 1.9984059492142254e-07, |
| "loss": 0.0047, |
| "step": 20390 |
| }, |
| { |
| "epoch": 0.037787994531854906, |
| "grad_norm": 0.7747433185577393, |
| "learning_rate": 1.9984043050289192e-07, |
| "loss": 0.0035, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.03780651805858621, |
| "grad_norm": 0.5744499564170837, |
| "learning_rate": 1.998402659996779e-07, |
| "loss": 0.0051, |
| "step": 20410 |
| }, |
| { |
| "epoch": 0.03782504158531751, |
| "grad_norm": 0.5887323617935181, |
| "learning_rate": 1.9984010141178056e-07, |
| "loss": 0.0031, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.03784356511204881, |
| "grad_norm": 0.9337971806526184, |
| "learning_rate": 1.998399367392001e-07, |
| "loss": 0.0042, |
| "step": 20430 |
| }, |
| { |
| "epoch": 0.037862088638780114, |
| "grad_norm": 0.9736045002937317, |
| "learning_rate": 1.9983977198193664e-07, |
| "loss": 0.0038, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.03788061216551142, |
| "grad_norm": 0.5290261507034302, |
| "learning_rate": 1.998396071399903e-07, |
| "loss": 0.0036, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.037899135692242715, |
| "grad_norm": 0.6117266416549683, |
| "learning_rate": 1.9983944221336126e-07, |
| "loss": 0.0041, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.03791765921897402, |
| "grad_norm": 1.1116174459457397, |
| "learning_rate": 1.9983927720204962e-07, |
| "loss": 0.0038, |
| "step": 20470 |
| }, |
| { |
| "epoch": 0.03793618274570532, |
| "grad_norm": 0.9392820000648499, |
| "learning_rate": 1.9983911210605554e-07, |
| "loss": 0.0045, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.03795470627243662, |
| "grad_norm": 0.9199703335762024, |
| "learning_rate": 1.9983894692537916e-07, |
| "loss": 0.003, |
| "step": 20490 |
| }, |
| { |
| "epoch": 0.037973229799167924, |
| "grad_norm": 0.16939327120780945, |
| "learning_rate": 1.998387816600206e-07, |
| "loss": 0.0037, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.03799175332589923, |
| "grad_norm": 1.0374970436096191, |
| "learning_rate": 1.9983861630998008e-07, |
| "loss": 0.004, |
| "step": 20510 |
| }, |
| { |
| "epoch": 0.038010276852630524, |
| "grad_norm": 0.5100601315498352, |
| "learning_rate": 1.9983845087525763e-07, |
| "loss": 0.0039, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.03802880037936183, |
| "grad_norm": 1.9221622943878174, |
| "learning_rate": 1.9983828535585346e-07, |
| "loss": 0.0038, |
| "step": 20530 |
| }, |
| { |
| "epoch": 0.03804732390609313, |
| "grad_norm": 1.8519715070724487, |
| "learning_rate": 1.9983811975176766e-07, |
| "loss": 0.0046, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.03806584743282443, |
| "grad_norm": 1.8802757263183594, |
| "learning_rate": 1.9983795406300042e-07, |
| "loss": 0.0039, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.03808437095955573, |
| "grad_norm": 0.6118941903114319, |
| "learning_rate": 1.9983778828955185e-07, |
| "loss": 0.0037, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.03810289448628703, |
| "grad_norm": 1.527833104133606, |
| "learning_rate": 1.9983762243142212e-07, |
| "loss": 0.0033, |
| "step": 20570 |
| }, |
| { |
| "epoch": 0.038121418013018334, |
| "grad_norm": 1.4447176456451416, |
| "learning_rate": 1.9983745648861133e-07, |
| "loss": 0.0051, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.03813994153974964, |
| "grad_norm": 1.3891228437423706, |
| "learning_rate": 1.9983729046111964e-07, |
| "loss": 0.004, |
| "step": 20590 |
| }, |
| { |
| "epoch": 0.038158465066480934, |
| "grad_norm": 0.9447519183158875, |
| "learning_rate": 1.998371243489472e-07, |
| "loss": 0.005, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.03817698859321224, |
| "grad_norm": 0.990287184715271, |
| "learning_rate": 1.9983695815209416e-07, |
| "loss": 0.0048, |
| "step": 20610 |
| }, |
| { |
| "epoch": 0.03819551211994354, |
| "grad_norm": 0.8946551084518433, |
| "learning_rate": 1.998367918705606e-07, |
| "loss": 0.0047, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.03821403564667484, |
| "grad_norm": 1.6752524375915527, |
| "learning_rate": 1.9983662550434677e-07, |
| "loss": 0.0049, |
| "step": 20630 |
| }, |
| { |
| "epoch": 0.03823255917340614, |
| "grad_norm": 0.8208008408546448, |
| "learning_rate": 1.998364590534527e-07, |
| "loss": 0.004, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.03825108270013745, |
| "grad_norm": 0.775272786617279, |
| "learning_rate": 1.998362925178786e-07, |
| "loss": 0.0039, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.038269606226868744, |
| "grad_norm": 0.8370658755302429, |
| "learning_rate": 1.9983612589762458e-07, |
| "loss": 0.0055, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.03828812975360005, |
| "grad_norm": 0.5341131687164307, |
| "learning_rate": 1.998359591926908e-07, |
| "loss": 0.0044, |
| "step": 20670 |
| }, |
| { |
| "epoch": 0.03830665328033135, |
| "grad_norm": 0.6617851257324219, |
| "learning_rate": 1.9983579240307739e-07, |
| "loss": 0.0026, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.03832517680706265, |
| "grad_norm": 1.443732738494873, |
| "learning_rate": 1.998356255287845e-07, |
| "loss": 0.0043, |
| "step": 20690 |
| }, |
| { |
| "epoch": 0.03834370033379395, |
| "grad_norm": 0.6683312654495239, |
| "learning_rate": 1.9983545856981223e-07, |
| "loss": 0.0049, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.038362223860525256, |
| "grad_norm": 0.48248207569122314, |
| "learning_rate": 1.9983529152616079e-07, |
| "loss": 0.0039, |
| "step": 20710 |
| }, |
| { |
| "epoch": 0.03838074738725655, |
| "grad_norm": 1.4212145805358887, |
| "learning_rate": 1.9983512439783027e-07, |
| "loss": 0.003, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.03839927091398786, |
| "grad_norm": 0.9524348974227905, |
| "learning_rate": 1.9983495718482083e-07, |
| "loss": 0.0042, |
| "step": 20730 |
| }, |
| { |
| "epoch": 0.03841779444071916, |
| "grad_norm": 1.2262171506881714, |
| "learning_rate": 1.9983478988713262e-07, |
| "loss": 0.0036, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.03843631796745046, |
| "grad_norm": 0.4224924147129059, |
| "learning_rate": 1.9983462250476577e-07, |
| "loss": 0.0041, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.03845484149418176, |
| "grad_norm": 1.181715965270996, |
| "learning_rate": 1.9983445503772044e-07, |
| "loss": 0.0038, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.03847336502091306, |
| "grad_norm": 1.3067787885665894, |
| "learning_rate": 1.9983428748599674e-07, |
| "loss": 0.0046, |
| "step": 20770 |
| }, |
| { |
| "epoch": 0.03849188854764436, |
| "grad_norm": 1.4510211944580078, |
| "learning_rate": 1.9983411984959485e-07, |
| "loss": 0.0043, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.038510412074375666, |
| "grad_norm": 0.7801799178123474, |
| "learning_rate": 1.9983395212851488e-07, |
| "loss": 0.0033, |
| "step": 20790 |
| }, |
| { |
| "epoch": 0.03852893560110696, |
| "grad_norm": 1.8517725467681885, |
| "learning_rate": 1.9983378432275698e-07, |
| "loss": 0.0044, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.03854745912783827, |
| "grad_norm": 1.6459349393844604, |
| "learning_rate": 1.9983361643232127e-07, |
| "loss": 0.0046, |
| "step": 20810 |
| }, |
| { |
| "epoch": 0.03856598265456957, |
| "grad_norm": 1.1100202798843384, |
| "learning_rate": 1.9983344845720797e-07, |
| "loss": 0.0042, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.03858450618130087, |
| "grad_norm": 0.7286704182624817, |
| "learning_rate": 1.9983328039741716e-07, |
| "loss": 0.0044, |
| "step": 20830 |
| }, |
| { |
| "epoch": 0.03860302970803217, |
| "grad_norm": 0.9118245840072632, |
| "learning_rate": 1.99833112252949e-07, |
| "loss": 0.003, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.038621553234763475, |
| "grad_norm": 1.8745135068893433, |
| "learning_rate": 1.998329440238036e-07, |
| "loss": 0.0035, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.03864007676149477, |
| "grad_norm": 0.7710930705070496, |
| "learning_rate": 1.9983277570998113e-07, |
| "loss": 0.0041, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.038658600288226076, |
| "grad_norm": 2.1815669536590576, |
| "learning_rate": 1.9983260731148175e-07, |
| "loss": 0.0047, |
| "step": 20870 |
| }, |
| { |
| "epoch": 0.03867712381495738, |
| "grad_norm": 1.7133078575134277, |
| "learning_rate": 1.998324388283056e-07, |
| "loss": 0.0046, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.03869564734168868, |
| "grad_norm": 0.6241070032119751, |
| "learning_rate": 1.9983227026045277e-07, |
| "loss": 0.0043, |
| "step": 20890 |
| }, |
| { |
| "epoch": 0.03871417086841998, |
| "grad_norm": 2.0762457847595215, |
| "learning_rate": 1.9983210160792344e-07, |
| "loss": 0.0038, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.038732694395151285, |
| "grad_norm": 1.5216177701950073, |
| "learning_rate": 1.9983193287071777e-07, |
| "loss": 0.0035, |
| "step": 20910 |
| }, |
| { |
| "epoch": 0.03875121792188258, |
| "grad_norm": 1.5395363569259644, |
| "learning_rate": 1.9983176404883593e-07, |
| "loss": 0.0041, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.038769741448613886, |
| "grad_norm": 0.9218603372573853, |
| "learning_rate": 1.9983159514227798e-07, |
| "loss": 0.0047, |
| "step": 20930 |
| }, |
| { |
| "epoch": 0.03878826497534518, |
| "grad_norm": 2.208829164505005, |
| "learning_rate": 1.998314261510441e-07, |
| "loss": 0.0037, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.038806788502076486, |
| "grad_norm": 1.3221584558486938, |
| "learning_rate": 1.998312570751344e-07, |
| "loss": 0.0037, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.03882531202880779, |
| "grad_norm": 0.5245024561882019, |
| "learning_rate": 1.9983108791454916e-07, |
| "loss": 0.0037, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.03884383555553909, |
| "grad_norm": 0.5969715118408203, |
| "learning_rate": 1.9983091866928833e-07, |
| "loss": 0.0045, |
| "step": 20970 |
| }, |
| { |
| "epoch": 0.03886235908227039, |
| "grad_norm": 1.0095936059951782, |
| "learning_rate": 1.998307493393522e-07, |
| "loss": 0.004, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.038880882609001695, |
| "grad_norm": 1.271608829498291, |
| "learning_rate": 1.9983057992474083e-07, |
| "loss": 0.0046, |
| "step": 20990 |
| }, |
| { |
| "epoch": 0.03889940613573299, |
| "grad_norm": 1.4095211029052734, |
| "learning_rate": 1.9983041042545442e-07, |
| "loss": 0.0046, |
| "step": 21000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1079708, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|