| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9274431057563588, |
| "eval_steps": 500, |
| "global_step": 4500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00428380187416332, |
| "grad_norm": 1.7204455338427447, |
| "learning_rate": 9.635974304068523e-08, |
| "loss": 0.310353684425354, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00856760374832664, |
| "grad_norm": 1.5542088461890025, |
| "learning_rate": 2.0342612419700217e-07, |
| "loss": 0.3165663003921509, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01285140562248996, |
| "grad_norm": 1.2986368981078442, |
| "learning_rate": 3.1049250535331905e-07, |
| "loss": 0.2931360721588135, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01713520749665328, |
| "grad_norm": 1.3372226018458075, |
| "learning_rate": 4.1755888650963603e-07, |
| "loss": 0.29114551544189454, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0214190093708166, |
| "grad_norm": 1.065879980657948, |
| "learning_rate": 5.24625267665953e-07, |
| "loss": 0.2506369352340698, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02570281124497992, |
| "grad_norm": 0.6781359530445974, |
| "learning_rate": 6.3169164882227e-07, |
| "loss": 0.22955031394958497, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02998661311914324, |
| "grad_norm": 0.5249895834183264, |
| "learning_rate": 7.387580299785868e-07, |
| "loss": 0.22067618370056152, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03427041499330656, |
| "grad_norm": 0.5375825140839746, |
| "learning_rate": 8.458244111349037e-07, |
| "loss": 0.21640052795410156, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03855421686746988, |
| "grad_norm": 0.5549702260654302, |
| "learning_rate": 9.528907922912206e-07, |
| "loss": 0.22633485794067382, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0428380187416332, |
| "grad_norm": 0.43849850542690183, |
| "learning_rate": 1.0599571734475375e-06, |
| "loss": 0.20759968757629393, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04712182061579652, |
| "grad_norm": 0.46359825184269493, |
| "learning_rate": 1.1670235546038546e-06, |
| "loss": 0.1973546862602234, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05140562248995984, |
| "grad_norm": 0.41875998061321557, |
| "learning_rate": 1.2740899357601712e-06, |
| "loss": 0.19023516178131103, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.055689424364123156, |
| "grad_norm": 0.5127942743282548, |
| "learning_rate": 1.3811563169164883e-06, |
| "loss": 0.2127223491668701, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05997322623828648, |
| "grad_norm": 0.46830156678706125, |
| "learning_rate": 1.4882226980728054e-06, |
| "loss": 0.1934453845024109, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0642570281124498, |
| "grad_norm": 0.508710707179685, |
| "learning_rate": 1.5952890792291223e-06, |
| "loss": 0.2092526912689209, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06854082998661312, |
| "grad_norm": 0.5421780654693655, |
| "learning_rate": 1.7023554603854392e-06, |
| "loss": 0.2070756435394287, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07282463186077644, |
| "grad_norm": 0.493152534196984, |
| "learning_rate": 1.809421841541756e-06, |
| "loss": 0.19875586032867432, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07710843373493977, |
| "grad_norm": 0.4980754232181657, |
| "learning_rate": 1.916488222698073e-06, |
| "loss": 0.18669115304946898, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08139223560910308, |
| "grad_norm": 0.5304243381141518, |
| "learning_rate": 2.02355460385439e-06, |
| "loss": 0.2146810531616211, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0856760374832664, |
| "grad_norm": 0.5500032540817943, |
| "learning_rate": 2.1306209850107067e-06, |
| "loss": 0.19487186670303344, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08995983935742972, |
| "grad_norm": 0.42930678514550324, |
| "learning_rate": 2.2376873661670238e-06, |
| "loss": 0.18126411437988282, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09424364123159304, |
| "grad_norm": 0.5711883922291429, |
| "learning_rate": 2.3447537473233404e-06, |
| "loss": 0.2076016664505005, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.09852744310575635, |
| "grad_norm": 0.6678390082859929, |
| "learning_rate": 2.4518201284796575e-06, |
| "loss": 0.20209894180297852, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10281124497991968, |
| "grad_norm": 0.4835994184206877, |
| "learning_rate": 2.558886509635974e-06, |
| "loss": 0.16395035982131959, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.107095046854083, |
| "grad_norm": 0.42091478620818606, |
| "learning_rate": 2.6659528907922917e-06, |
| "loss": 0.17233937978744507, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11137884872824631, |
| "grad_norm": 0.5320893808200788, |
| "learning_rate": 2.7730192719486084e-06, |
| "loss": 0.18311020135879516, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11566265060240964, |
| "grad_norm": 0.5257058584608517, |
| "learning_rate": 2.8800856531049255e-06, |
| "loss": 0.20057764053344726, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.11994645247657296, |
| "grad_norm": 0.5293308935757324, |
| "learning_rate": 2.987152034261242e-06, |
| "loss": 0.1837336540222168, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.12423025435073627, |
| "grad_norm": 0.46642029540934604, |
| "learning_rate": 3.0942184154175592e-06, |
| "loss": 0.19081385135650636, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1285140562248996, |
| "grad_norm": 0.5628183880631954, |
| "learning_rate": 3.201284796573876e-06, |
| "loss": 0.17274467945098876, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13279785809906292, |
| "grad_norm": 0.48916829855974603, |
| "learning_rate": 3.308351177730193e-06, |
| "loss": 0.18039458990097046, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.13708165997322624, |
| "grad_norm": 0.5298792138248726, |
| "learning_rate": 3.41541755888651e-06, |
| "loss": 0.1898115634918213, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14136546184738955, |
| "grad_norm": 0.439555530924186, |
| "learning_rate": 3.5224839400428268e-06, |
| "loss": 0.17530070543289183, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.14564926372155287, |
| "grad_norm": 0.5117824224937999, |
| "learning_rate": 3.629550321199144e-06, |
| "loss": 0.17437742948532103, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1499330655957162, |
| "grad_norm": 0.4753694102031763, |
| "learning_rate": 3.7366167023554605e-06, |
| "loss": 0.17850689888000487, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15421686746987953, |
| "grad_norm": 0.6306563088828507, |
| "learning_rate": 3.843683083511778e-06, |
| "loss": 0.18741222620010375, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.15850066934404283, |
| "grad_norm": 0.4694179327929818, |
| "learning_rate": 3.950749464668095e-06, |
| "loss": 0.17026090621948242, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16278447121820616, |
| "grad_norm": 0.5048197989896139, |
| "learning_rate": 4.057815845824411e-06, |
| "loss": 0.1726588487625122, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1670682730923695, |
| "grad_norm": 0.524700358081214, |
| "learning_rate": 4.164882226980728e-06, |
| "loss": 0.18944069147109985, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1713520749665328, |
| "grad_norm": 0.4571670229694066, |
| "learning_rate": 4.2719486081370455e-06, |
| "loss": 0.16420159339904786, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17563587684069612, |
| "grad_norm": 0.46820018814554304, |
| "learning_rate": 4.379014989293362e-06, |
| "loss": 0.19183117151260376, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.17991967871485945, |
| "grad_norm": 0.46894253012471776, |
| "learning_rate": 4.486081370449679e-06, |
| "loss": 0.17184211015701295, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.18420348058902275, |
| "grad_norm": 0.46858595995063135, |
| "learning_rate": 4.593147751605996e-06, |
| "loss": 0.17618422508239745, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.18848728246318608, |
| "grad_norm": 0.5091947698167847, |
| "learning_rate": 4.700214132762313e-06, |
| "loss": 0.18246437311172486, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 0.5203679422298269, |
| "learning_rate": 4.807280513918631e-06, |
| "loss": 0.16799516677856446, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1970548862115127, |
| "grad_norm": 0.4306784639956151, |
| "learning_rate": 4.914346895074946e-06, |
| "loss": 0.1661084771156311, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.20133868808567604, |
| "grad_norm": 0.4604689622728843, |
| "learning_rate": 5.021413276231264e-06, |
| "loss": 0.17491416931152343, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.20562248995983937, |
| "grad_norm": 0.47273083263588245, |
| "learning_rate": 5.128479657387581e-06, |
| "loss": 0.16252427101135253, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.20990629183400267, |
| "grad_norm": 0.4611929063195057, |
| "learning_rate": 5.235546038543897e-06, |
| "loss": 0.1942029356956482, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.214190093708166, |
| "grad_norm": 0.4640899103515948, |
| "learning_rate": 5.342612419700215e-06, |
| "loss": 0.1781969666481018, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.214190093708166, |
| "eval_loss": 0.17298774421215057, |
| "eval_runtime": 813.3235, |
| "eval_samples_per_second": 20.41, |
| "eval_steps_per_second": 5.103, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.21847389558232932, |
| "grad_norm": 0.4892486598590822, |
| "learning_rate": 5.4496788008565314e-06, |
| "loss": 0.17805953025817872, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.22275769745649263, |
| "grad_norm": 0.45908639825034264, |
| "learning_rate": 5.556745182012848e-06, |
| "loss": 0.1704517126083374, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.22704149933065595, |
| "grad_norm": 0.4606868972349124, |
| "learning_rate": 5.663811563169165e-06, |
| "loss": 0.17605620622634888, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.23132530120481928, |
| "grad_norm": 0.4883203630934758, |
| "learning_rate": 5.770877944325482e-06, |
| "loss": 0.185607647895813, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.23560910307898258, |
| "grad_norm": 0.480706769968442, |
| "learning_rate": 5.877944325481799e-06, |
| "loss": 0.1776334285736084, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2398929049531459, |
| "grad_norm": 0.43013827677127364, |
| "learning_rate": 5.985010706638116e-06, |
| "loss": 0.17925962209701538, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.24417670682730924, |
| "grad_norm": 0.43681041122775155, |
| "learning_rate": 6.092077087794433e-06, |
| "loss": 0.15904269218444825, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.24846050870147254, |
| "grad_norm": 0.4057531376060292, |
| "learning_rate": 6.19914346895075e-06, |
| "loss": 0.17201122045516967, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2527443105756359, |
| "grad_norm": 0.5693511659878766, |
| "learning_rate": 6.3062098501070665e-06, |
| "loss": 0.1783498764038086, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2570281124497992, |
| "grad_norm": 0.5038894023292907, |
| "learning_rate": 6.413276231263383e-06, |
| "loss": 0.16208181381225586, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2613119143239625, |
| "grad_norm": 0.4255056407918071, |
| "learning_rate": 6.5203426124197015e-06, |
| "loss": 0.1778697967529297, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.26559571619812583, |
| "grad_norm": 0.42463834883952506, |
| "learning_rate": 6.627408993576018e-06, |
| "loss": 0.17847087383270263, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.26987951807228916, |
| "grad_norm": 0.4280904585197745, |
| "learning_rate": 6.734475374732334e-06, |
| "loss": 0.16192808151245117, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2741633199464525, |
| "grad_norm": 0.4032310396751306, |
| "learning_rate": 6.841541755888651e-06, |
| "loss": 0.1500581383705139, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2784471218206158, |
| "grad_norm": 0.36936808807497884, |
| "learning_rate": 6.948608137044969e-06, |
| "loss": 0.1805708885192871, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2827309236947791, |
| "grad_norm": 0.41279770820447376, |
| "learning_rate": 7.055674518201286e-06, |
| "loss": 0.15682549476623536, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2870147255689424, |
| "grad_norm": 0.48113068018089383, |
| "learning_rate": 7.162740899357602e-06, |
| "loss": 0.17637710571289061, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.29129852744310575, |
| "grad_norm": 0.45019312769869485, |
| "learning_rate": 7.26980728051392e-06, |
| "loss": 0.16801434755325317, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2955823293172691, |
| "grad_norm": 0.4323771559896418, |
| "learning_rate": 7.3768736616702365e-06, |
| "loss": 0.1738981246948242, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2998661311914324, |
| "grad_norm": 0.4445466528485117, |
| "learning_rate": 7.483940042826553e-06, |
| "loss": 0.17883800268173217, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.30414993306559573, |
| "grad_norm": 0.4169235110055358, |
| "learning_rate": 7.59100642398287e-06, |
| "loss": 0.1757150650024414, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.30843373493975906, |
| "grad_norm": 0.46124417838321063, |
| "learning_rate": 7.698072805139187e-06, |
| "loss": 0.16563992500305175, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.31271753681392234, |
| "grad_norm": 0.4455214464656937, |
| "learning_rate": 7.805139186295504e-06, |
| "loss": 0.15891735553741454, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.31700133868808567, |
| "grad_norm": 0.48435793526108334, |
| "learning_rate": 7.91220556745182e-06, |
| "loss": 0.16565344333648682, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 0.425099998591317, |
| "learning_rate": 8.019271948608137e-06, |
| "loss": 0.16711184978485108, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3255689424364123, |
| "grad_norm": 0.4137507644842352, |
| "learning_rate": 8.126338329764456e-06, |
| "loss": 0.17436256408691406, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.32985274431057565, |
| "grad_norm": 0.5261718559693129, |
| "learning_rate": 8.23340471092077e-06, |
| "loss": 0.17338960170745848, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.334136546184739, |
| "grad_norm": 0.44161850092055, |
| "learning_rate": 8.340471092077087e-06, |
| "loss": 0.15373395681381224, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.33842034805890225, |
| "grad_norm": 0.42667362111196244, |
| "learning_rate": 8.447537473233406e-06, |
| "loss": 0.170109760761261, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3427041499330656, |
| "grad_norm": 0.4289414936466275, |
| "learning_rate": 8.554603854389722e-06, |
| "loss": 0.16255849599838257, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3469879518072289, |
| "grad_norm": 0.4462302049947027, |
| "learning_rate": 8.661670235546039e-06, |
| "loss": 0.1558121919631958, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.35127175368139224, |
| "grad_norm": 0.39502484462695925, |
| "learning_rate": 8.768736616702356e-06, |
| "loss": 0.14783246517181398, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.4872082027579418, |
| "learning_rate": 8.875802997858674e-06, |
| "loss": 0.162847638130188, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3598393574297189, |
| "grad_norm": 0.427450758730554, |
| "learning_rate": 8.98286937901499e-06, |
| "loss": 0.16352038383483886, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.36412315930388217, |
| "grad_norm": 0.4528788471261664, |
| "learning_rate": 9.089935760171307e-06, |
| "loss": 0.16523147821426393, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3684069611780455, |
| "grad_norm": 0.37599550924220604, |
| "learning_rate": 9.197002141327624e-06, |
| "loss": 0.15126256942749022, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.37269076305220883, |
| "grad_norm": 0.40592589779270666, |
| "learning_rate": 9.30406852248394e-06, |
| "loss": 0.15496088266372682, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.37697456492637216, |
| "grad_norm": 0.39024589028386475, |
| "learning_rate": 9.411134903640257e-06, |
| "loss": 0.16612087488174437, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3812583668005355, |
| "grad_norm": 0.46291145028584035, |
| "learning_rate": 9.518201284796574e-06, |
| "loss": 0.16229329109191895, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3855421686746988, |
| "grad_norm": 0.4622950426469592, |
| "learning_rate": 9.625267665952892e-06, |
| "loss": 0.16289321184158326, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3898259705488621, |
| "grad_norm": 0.4404561037311073, |
| "learning_rate": 9.732334047109209e-06, |
| "loss": 0.16939005851745606, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3941097724230254, |
| "grad_norm": 0.4522735218377503, |
| "learning_rate": 9.839400428265526e-06, |
| "loss": 0.1664318323135376, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.39839357429718875, |
| "grad_norm": 0.39873486371619626, |
| "learning_rate": 9.946466809421842e-06, |
| "loss": 0.17302082777023314, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4026773761713521, |
| "grad_norm": 0.4403816711799427, |
| "learning_rate": 9.99999127026893e-06, |
| "loss": 0.1635822534561157, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4069611780455154, |
| "grad_norm": 0.42736129250630583, |
| "learning_rate": 9.999921432603256e-06, |
| "loss": 0.16364479064941406, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.41124497991967873, |
| "grad_norm": 0.48227838403112244, |
| "learning_rate": 9.999781758247374e-06, |
| "loss": 0.1692502498626709, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.41552878179384206, |
| "grad_norm": 0.4325606288398738, |
| "learning_rate": 9.999572249152187e-06, |
| "loss": 0.1753953218460083, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.41981258366800533, |
| "grad_norm": 0.4184812393572346, |
| "learning_rate": 9.999292908244031e-06, |
| "loss": 0.15361449718475342, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.42409638554216866, |
| "grad_norm": 0.3536766183699388, |
| "learning_rate": 9.998943739424614e-06, |
| "loss": 0.16968698501586915, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.428380187416332, |
| "grad_norm": 0.4421427075174403, |
| "learning_rate": 9.99852474757097e-06, |
| "loss": 0.17062946557998657, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.428380187416332, |
| "eval_loss": 0.1632310301065445, |
| "eval_runtime": 809.0798, |
| "eval_samples_per_second": 20.517, |
| "eval_steps_per_second": 5.129, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4326639892904953, |
| "grad_norm": 0.4501749565827156, |
| "learning_rate": 9.998035938535395e-06, |
| "loss": 0.17221925258636475, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.43694779116465865, |
| "grad_norm": 0.35159158648894256, |
| "learning_rate": 9.997477319145354e-06, |
| "loss": 0.18630390167236327, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.441231593038822, |
| "grad_norm": 0.38517475012295227, |
| "learning_rate": 9.9968488972034e-06, |
| "loss": 0.17598154544830322, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.44551539491298525, |
| "grad_norm": 0.3612688847646603, |
| "learning_rate": 9.996150681487047e-06, |
| "loss": 0.1822005033493042, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4497991967871486, |
| "grad_norm": 0.3577703505886406, |
| "learning_rate": 9.995382681748667e-06, |
| "loss": 0.16494649648666382, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4540829986613119, |
| "grad_norm": 0.4006846696021192, |
| "learning_rate": 9.99454490871534e-06, |
| "loss": 0.1681265115737915, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.45836680053547524, |
| "grad_norm": 0.41680986168641504, |
| "learning_rate": 9.99363737408871e-06, |
| "loss": 0.15723063945770263, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.46265060240963857, |
| "grad_norm": 0.3955828911870276, |
| "learning_rate": 9.992660090544814e-06, |
| "loss": 0.17240710258483888, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4669344042838019, |
| "grad_norm": 0.40819367835971887, |
| "learning_rate": 9.991613071733923e-06, |
| "loss": 0.1590951204299927, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.47121820615796517, |
| "grad_norm": 0.4961313776161533, |
| "learning_rate": 9.990496332280327e-06, |
| "loss": 0.16744821071624755, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4755020080321285, |
| "grad_norm": 0.40118583702904315, |
| "learning_rate": 9.989309887782153e-06, |
| "loss": 0.16566444635391236, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.4797858099062918, |
| "grad_norm": 0.38801799234687073, |
| "learning_rate": 9.988053754811129e-06, |
| "loss": 0.16186387538909913, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.48406961178045516, |
| "grad_norm": 0.40747871131177194, |
| "learning_rate": 9.986727950912364e-06, |
| "loss": 0.162397563457489, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4883534136546185, |
| "grad_norm": 0.3728066796444714, |
| "learning_rate": 9.985332494604107e-06, |
| "loss": 0.1676606059074402, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4926372155287818, |
| "grad_norm": 0.40985672457156785, |
| "learning_rate": 9.983867405377467e-06, |
| "loss": 0.1700581431388855, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4969210174029451, |
| "grad_norm": 0.4419642574041659, |
| "learning_rate": 9.982332703696165e-06, |
| "loss": 0.16604260206222535, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5012048192771085, |
| "grad_norm": 0.4019173064441985, |
| "learning_rate": 9.980728410996235e-06, |
| "loss": 0.16702601909637452, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5054886211512718, |
| "grad_norm": 0.37849315597886735, |
| "learning_rate": 9.979054549685726e-06, |
| "loss": 0.17048054933547974, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5097724230254351, |
| "grad_norm": 0.41757098420175776, |
| "learning_rate": 9.977311143144392e-06, |
| "loss": 0.1623483419418335, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5140562248995983, |
| "grad_norm": 0.39624453257545467, |
| "learning_rate": 9.97549821572337e-06, |
| "loss": 0.18060542345046998, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5183400267737617, |
| "grad_norm": 0.3790478315082819, |
| "learning_rate": 9.97361579274482e-06, |
| "loss": 0.15714950561523439, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.522623828647925, |
| "grad_norm": 0.47455877319994494, |
| "learning_rate": 9.971663900501597e-06, |
| "loss": 0.1706780195236206, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5269076305220883, |
| "grad_norm": 0.41051182237414957, |
| "learning_rate": 9.969642566256869e-06, |
| "loss": 0.17303004264831542, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5311914323962517, |
| "grad_norm": 0.3664624770254722, |
| "learning_rate": 9.967551818243738e-06, |
| "loss": 0.16188311576843262, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.535475234270415, |
| "grad_norm": 0.37091548258017915, |
| "learning_rate": 9.965391685664844e-06, |
| "loss": 0.14944344758987427, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5397590361445783, |
| "grad_norm": 0.36490771306848957, |
| "learning_rate": 9.963162198691967e-06, |
| "loss": 0.17565066814422609, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5440428380187416, |
| "grad_norm": 0.36889359791667947, |
| "learning_rate": 9.960863388465592e-06, |
| "loss": 0.14779505729675294, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.548326639892905, |
| "grad_norm": 0.437009279584505, |
| "learning_rate": 9.958495287094485e-06, |
| "loss": 0.16427998542785643, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5526104417670683, |
| "grad_norm": 0.39554167977080396, |
| "learning_rate": 9.956057927655236e-06, |
| "loss": 0.15541106462478638, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5568942436412316, |
| "grad_norm": 0.45029869907045383, |
| "learning_rate": 9.953551344191806e-06, |
| "loss": 0.16692056655883789, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.561178045515395, |
| "grad_norm": 0.4199591876603144, |
| "learning_rate": 9.95097557171504e-06, |
| "loss": 0.14758901596069335, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5654618473895582, |
| "grad_norm": 0.4075053894893693, |
| "learning_rate": 9.948330646202192e-06, |
| "loss": 0.14503839015960693, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5697456492637215, |
| "grad_norm": 0.43881797258551375, |
| "learning_rate": 9.94561660459641e-06, |
| "loss": 0.16932222843170167, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5740294511378848, |
| "grad_norm": 0.3663150123238361, |
| "learning_rate": 9.942833484806224e-06, |
| "loss": 0.1607386827468872, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5783132530120482, |
| "grad_norm": 0.3407640340916305, |
| "learning_rate": 9.939981325705022e-06, |
| "loss": 0.1527782440185547, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5825970548862115, |
| "grad_norm": 0.37405131401648734, |
| "learning_rate": 9.937060167130499e-06, |
| "loss": 0.171100389957428, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5868808567603748, |
| "grad_norm": 0.37761828710703715, |
| "learning_rate": 9.934070049884108e-06, |
| "loss": 0.15846436023712157, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5911646586345382, |
| "grad_norm": 0.3693798885089601, |
| "learning_rate": 9.931011015730481e-06, |
| "loss": 0.16067838668823242, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5954484605087015, |
| "grad_norm": 0.3911912390175172, |
| "learning_rate": 9.927883107396855e-06, |
| "loss": 0.17477972507476808, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5997322623828648, |
| "grad_norm": 0.3338272015147582, |
| "learning_rate": 9.924686368572467e-06, |
| "loss": 0.15092020034790038, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6040160642570281, |
| "grad_norm": 0.4078089060822029, |
| "learning_rate": 9.921420843907954e-06, |
| "loss": 0.1569045066833496, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6082998661311915, |
| "grad_norm": 0.4794604605869132, |
| "learning_rate": 9.918086579014719e-06, |
| "loss": 0.15916914939880372, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6125836680053548, |
| "grad_norm": 0.3657509810107675, |
| "learning_rate": 9.914683620464296e-06, |
| "loss": 0.1613703727722168, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6168674698795181, |
| "grad_norm": 0.3781037238260815, |
| "learning_rate": 9.911212015787705e-06, |
| "loss": 0.1711595058441162, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6211512717536813, |
| "grad_norm": 0.4177181930728637, |
| "learning_rate": 9.907671813474787e-06, |
| "loss": 0.1607887864112854, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6254350736278447, |
| "grad_norm": 0.43202846060475897, |
| "learning_rate": 9.904063062973518e-06, |
| "loss": 0.1596811056137085, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.629718875502008, |
| "grad_norm": 0.37967408761934135, |
| "learning_rate": 9.90038581468933e-06, |
| "loss": 0.15738776922225953, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6340026773761713, |
| "grad_norm": 0.35431221490178816, |
| "learning_rate": 9.8966401199844e-06, |
| "loss": 0.16409718990325928, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6382864792503347, |
| "grad_norm": 0.5424318304534815, |
| "learning_rate": 9.892826031176932e-06, |
| "loss": 0.1624216079711914, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "grad_norm": 0.4154998221845867, |
| "learning_rate": 9.888943601540435e-06, |
| "loss": 0.16612136363983154, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "eval_loss": 0.15596744418144226, |
| "eval_runtime": 5287.3746, |
| "eval_samples_per_second": 3.14, |
| "eval_steps_per_second": 0.785, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6468540829986613, |
| "grad_norm": 0.31788237500758254, |
| "learning_rate": 9.884992885302964e-06, |
| "loss": 0.16352603435516358, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6511378848728246, |
| "grad_norm": 0.3933875722388967, |
| "learning_rate": 9.880973937646376e-06, |
| "loss": 0.16239913702011108, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.655421686746988, |
| "grad_norm": 0.3911043138186677, |
| "learning_rate": 9.876886814705557e-06, |
| "loss": 0.15573612451553345, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6597054886211513, |
| "grad_norm": 0.4165342919082731, |
| "learning_rate": 9.87273157356763e-06, |
| "loss": 0.15565356016159057, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6639892904953146, |
| "grad_norm": 0.4041990333202639, |
| "learning_rate": 9.868508272271162e-06, |
| "loss": 0.15832991600036622, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.668273092369478, |
| "grad_norm": 0.42249142494241126, |
| "learning_rate": 9.86421696980536e-06, |
| "loss": 0.15069495439529418, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6725568942436412, |
| "grad_norm": 0.3703727697545347, |
| "learning_rate": 9.859857726109237e-06, |
| "loss": 0.1529747486114502, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6768406961178045, |
| "grad_norm": 0.340178722202618, |
| "learning_rate": 9.85543060207078e-06, |
| "loss": 0.15539826154708863, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6811244979919678, |
| "grad_norm": 0.3731049216784043, |
| "learning_rate": 9.850935659526097e-06, |
| "loss": 0.15447347164154052, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6854082998661312, |
| "grad_norm": 0.4042949825278044, |
| "learning_rate": 9.84637296125856e-06, |
| "loss": 0.17724437713623048, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6896921017402945, |
| "grad_norm": 0.3660587911460726, |
| "learning_rate": 9.841742570997916e-06, |
| "loss": 0.16080789566040038, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6939759036144578, |
| "grad_norm": 0.41736075936721456, |
| "learning_rate": 9.837044553419411e-06, |
| "loss": 0.16406190395355225, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6982597054886212, |
| "grad_norm": 0.36046398358975057, |
| "learning_rate": 9.832278974142872e-06, |
| "loss": 0.15605542659759522, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7025435073627845, |
| "grad_norm": 0.3954125125143182, |
| "learning_rate": 9.827445899731805e-06, |
| "loss": 0.16570944786071778, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7068273092369478, |
| "grad_norm": 0.40637254190631067, |
| "learning_rate": 9.822545397692453e-06, |
| "loss": 0.16883254051208496, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.41139029483344075, |
| "learning_rate": 9.81757753647286e-06, |
| "loss": 0.16364901065826415, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7153949129852745, |
| "grad_norm": 0.4536938064672351, |
| "learning_rate": 9.812542385461912e-06, |
| "loss": 0.1675459623336792, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7196787148594378, |
| "grad_norm": 0.3585184083438791, |
| "learning_rate": 9.807440014988375e-06, |
| "loss": 0.16231054067611694, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7239625167336011, |
| "grad_norm": 0.3167942544933684, |
| "learning_rate": 9.802270496319896e-06, |
| "loss": 0.14959096908569336, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7282463186077643, |
| "grad_norm": 0.3699602110663905, |
| "learning_rate": 9.79703390166203e-06, |
| "loss": 0.14526536464691162, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7325301204819277, |
| "grad_norm": 0.3613102627272191, |
| "learning_rate": 9.791730304157212e-06, |
| "loss": 0.15053074359893798, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.736813922356091, |
| "grad_norm": 0.36315339995103474, |
| "learning_rate": 9.786359777883743e-06, |
| "loss": 0.1579727292060852, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7410977242302543, |
| "grad_norm": 0.35352051713516114, |
| "learning_rate": 9.78092239785476e-06, |
| "loss": 0.16381702423095704, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7453815261044177, |
| "grad_norm": 0.41420218762506095, |
| "learning_rate": 9.775418240017183e-06, |
| "loss": 0.16737335920333862, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.749665327978581, |
| "grad_norm": 0.32647328326287134, |
| "learning_rate": 9.769847381250647e-06, |
| "loss": 0.16527401208877562, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7539491298527443, |
| "grad_norm": 0.35594572768523836, |
| "learning_rate": 9.764209899366451e-06, |
| "loss": 0.17207796573638917, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7582329317269076, |
| "grad_norm": 0.3529492671194906, |
| "learning_rate": 9.75850587310644e-06, |
| "loss": 0.15534259080886842, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.762516733601071, |
| "grad_norm": 0.41518414925000824, |
| "learning_rate": 9.752735382141931e-06, |
| "loss": 0.17126250267028809, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7668005354752343, |
| "grad_norm": 0.3416212552791915, |
| "learning_rate": 9.74689850707259e-06, |
| "loss": 0.17300653457641602, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.7710843373493976, |
| "grad_norm": 0.4506868511706448, |
| "learning_rate": 9.740995329425304e-06, |
| "loss": 0.16119366884231567, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.775368139223561, |
| "grad_norm": 0.4127032617250803, |
| "learning_rate": 9.735025931653047e-06, |
| "loss": 0.1660417675971985, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.7796519410977242, |
| "grad_norm": 0.3711426866374276, |
| "learning_rate": 9.728990397133725e-06, |
| "loss": 0.15557256937026978, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7839357429718875, |
| "grad_norm": 0.4403684798533838, |
| "learning_rate": 9.722888810169015e-06, |
| "loss": 0.14504989385604858, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7882195448460508, |
| "grad_norm": 0.43523370881285106, |
| "learning_rate": 9.716721255983184e-06, |
| "loss": 0.18080484867095947, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7925033467202142, |
| "grad_norm": 0.39796977768343905, |
| "learning_rate": 9.710487820721897e-06, |
| "loss": 0.16169551610946656, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7967871485943775, |
| "grad_norm": 0.3696365244924919, |
| "learning_rate": 9.704188591451021e-06, |
| "loss": 0.1710440158843994, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8010709504685408, |
| "grad_norm": 0.3501836475183059, |
| "learning_rate": 9.697823656155404e-06, |
| "loss": 0.14459784030914308, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8053547523427041, |
| "grad_norm": 0.4008873451421875, |
| "learning_rate": 9.691393103737646e-06, |
| "loss": 0.15653254985809326, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8096385542168675, |
| "grad_norm": 0.3726778794979056, |
| "learning_rate": 9.684897024016856e-06, |
| "loss": 0.15802738666534424, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8139223560910308, |
| "grad_norm": 0.3602622222902254, |
| "learning_rate": 9.678335507727406e-06, |
| "loss": 0.15577685832977295, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8182061579651941, |
| "grad_norm": 0.36180831789633733, |
| "learning_rate": 9.671708646517644e-06, |
| "loss": 0.1501927375793457, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8224899598393575, |
| "grad_norm": 0.4291946610668789, |
| "learning_rate": 9.665016532948643e-06, |
| "loss": 0.1524329662322998, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8267737617135208, |
| "grad_norm": 0.36439021529215626, |
| "learning_rate": 9.658259260492879e-06, |
| "loss": 0.1579957962036133, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.8310575635876841, |
| "grad_norm": 0.36185634405902617, |
| "learning_rate": 9.651436923532947e-06, |
| "loss": 0.1648595690727234, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8353413654618473, |
| "grad_norm": 0.3896804732201538, |
| "learning_rate": 9.644549617360227e-06, |
| "loss": 0.14703061580657958, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8396251673360107, |
| "grad_norm": 0.3270830246578632, |
| "learning_rate": 9.63759743817357e-06, |
| "loss": 0.14643968343734742, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.843908969210174, |
| "grad_norm": 0.36683351098847644, |
| "learning_rate": 9.630580483077934e-06, |
| "loss": 0.15101373195648193, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8481927710843373, |
| "grad_norm": 0.2637127315901447, |
| "learning_rate": 9.623498850083043e-06, |
| "loss": 0.1591057300567627, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.8524765729585007, |
| "grad_norm": 0.3681053572408943, |
| "learning_rate": 9.616352638102017e-06, |
| "loss": 0.1697171926498413, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.856760374832664, |
| "grad_norm": 0.40805430553066435, |
| "learning_rate": 9.609141946949978e-06, |
| "loss": 0.1591539740562439, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.856760374832664, |
| "eval_loss": 0.15128476917743683, |
| "eval_runtime": 813.5807, |
| "eval_samples_per_second": 20.404, |
| "eval_steps_per_second": 5.101, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8610441767068273, |
| "grad_norm": 0.3914592710894462, |
| "learning_rate": 9.601866877342673e-06, |
| "loss": 0.15913846492767333, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.8653279785809906, |
| "grad_norm": 0.34232621179600625, |
| "learning_rate": 9.594527530895055e-06, |
| "loss": 0.1589035987854004, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.869611780455154, |
| "grad_norm": 0.35138032967412824, |
| "learning_rate": 9.587124010119866e-06, |
| "loss": 0.15038516521453857, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.8738955823293173, |
| "grad_norm": 0.38790494555500904, |
| "learning_rate": 9.579656418426208e-06, |
| "loss": 0.14970223903656005, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8781793842034806, |
| "grad_norm": 0.453347749337455, |
| "learning_rate": 9.572124860118099e-06, |
| "loss": 0.15592522621154786, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.882463186077644, |
| "grad_norm": 0.36254040692639466, |
| "learning_rate": 9.564529440393013e-06, |
| "loss": 0.14756847620010377, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.8867469879518072, |
| "grad_norm": 0.28500745218910845, |
| "learning_rate": 9.55687026534041e-06, |
| "loss": 0.15284668207168578, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.8910307898259705, |
| "grad_norm": 0.33059182026983963, |
| "learning_rate": 9.54914744194026e-06, |
| "loss": 0.13931398391723632, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8953145917001338, |
| "grad_norm": 0.3308533363527482, |
| "learning_rate": 9.541361078061543e-06, |
| "loss": 0.152490496635437, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8995983935742972, |
| "grad_norm": 0.39410920160803786, |
| "learning_rate": 9.533511282460744e-06, |
| "loss": 0.15455267429351807, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9038821954484605, |
| "grad_norm": 0.3510760250872994, |
| "learning_rate": 9.525598164780335e-06, |
| "loss": 0.15271444320678712, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9081659973226238, |
| "grad_norm": 0.34879574406946134, |
| "learning_rate": 9.51762183554724e-06, |
| "loss": 0.145074462890625, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9124497991967871, |
| "grad_norm": 0.4102513842794922, |
| "learning_rate": 9.5095824061713e-06, |
| "loss": 0.1671789288520813, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9167336010709505, |
| "grad_norm": 0.32100215647635666, |
| "learning_rate": 9.501479988943705e-06, |
| "loss": 0.14845454692840576, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9210174029451138, |
| "grad_norm": 0.35303111230486783, |
| "learning_rate": 9.493314697035433e-06, |
| "loss": 0.14766921997070312, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.9253012048192771, |
| "grad_norm": 0.3595530843531657, |
| "learning_rate": 9.48508664449567e-06, |
| "loss": 0.1577920436859131, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9295850066934405, |
| "grad_norm": 0.3500784633268657, |
| "learning_rate": 9.476795946250213e-06, |
| "loss": 0.15419769287109375, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9338688085676038, |
| "grad_norm": 0.5035759293187142, |
| "learning_rate": 9.468442718099866e-06, |
| "loss": 0.15254662036895753, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9381526104417671, |
| "grad_norm": 0.3597669443798906, |
| "learning_rate": 9.460027076718825e-06, |
| "loss": 0.15965031385421752, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9424364123159303, |
| "grad_norm": 0.32302117680971176, |
| "learning_rate": 9.451549139653043e-06, |
| "loss": 0.15642788410186767, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9467202141900937, |
| "grad_norm": 0.37709479129796397, |
| "learning_rate": 9.443009025318595e-06, |
| "loss": 0.16215311288833617, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.951004016064257, |
| "grad_norm": 0.41863991954422164, |
| "learning_rate": 9.434406853000017e-06, |
| "loss": 0.16595734357833863, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.9552878179384203, |
| "grad_norm": 0.3895832137317719, |
| "learning_rate": 9.425742742848652e-06, |
| "loss": 0.1542948842048645, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.9595716198125837, |
| "grad_norm": 0.3383760951721925, |
| "learning_rate": 9.417016815880948e-06, |
| "loss": 0.1523042917251587, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.4388306567649398, |
| "learning_rate": 9.4082291939768e-06, |
| "loss": 0.13539564609527588, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.9681392235609103, |
| "grad_norm": 0.392487987824093, |
| "learning_rate": 9.399379999877816e-06, |
| "loss": 0.16397664546966553, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.9724230254350736, |
| "grad_norm": 0.37843944567360804, |
| "learning_rate": 9.390469357185626e-06, |
| "loss": 0.1599686861038208, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.976706827309237, |
| "grad_norm": 0.36495911845917256, |
| "learning_rate": 9.381497390360146e-06, |
| "loss": 0.16280412673950195, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.9809906291834003, |
| "grad_norm": 0.3098293192725145, |
| "learning_rate": 9.372464224717836e-06, |
| "loss": 0.16709411144256592, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.9852744310575636, |
| "grad_norm": 0.36503501082057177, |
| "learning_rate": 9.36336998642996e-06, |
| "loss": 0.14577250480651854, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.989558232931727, |
| "grad_norm": 0.369748777319339, |
| "learning_rate": 9.354214802520813e-06, |
| "loss": 0.15008455514907837, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.9938420348058902, |
| "grad_norm": 0.38954595915895235, |
| "learning_rate": 9.344998800865949e-06, |
| "loss": 0.16494543552398683, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.9981258366800535, |
| "grad_norm": 0.3263933545214738, |
| "learning_rate": 9.335722110190409e-06, |
| "loss": 0.1547703266143799, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0021419009370816, |
| "grad_norm": 0.3240736359093112, |
| "learning_rate": 9.326384860066894e-06, |
| "loss": 0.1678773880004883, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.0064257028112449, |
| "grad_norm": 0.4121951074794008, |
| "learning_rate": 9.316987180913993e-06, |
| "loss": 0.13320955038070678, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0107095046854082, |
| "grad_norm": 0.35703547426799104, |
| "learning_rate": 9.30752920399432e-06, |
| "loss": 0.12546956539154053, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.0149933065595715, |
| "grad_norm": 0.40623072991807463, |
| "learning_rate": 9.298011061412718e-06, |
| "loss": 0.13189778327941895, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0192771084337349, |
| "grad_norm": 0.44222195266756315, |
| "learning_rate": 9.288432886114388e-06, |
| "loss": 0.12098613977432252, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.0235609103078982, |
| "grad_norm": 0.367684966832025, |
| "learning_rate": 9.278794811883047e-06, |
| "loss": 0.11746659278869628, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.0278447121820615, |
| "grad_norm": 0.33284392755056474, |
| "learning_rate": 9.26909697333905e-06, |
| "loss": 0.12567752599716187, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0321285140562249, |
| "grad_norm": 0.3455297587313404, |
| "learning_rate": 9.259339505937514e-06, |
| "loss": 0.12083170413970948, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0364123159303882, |
| "grad_norm": 0.3389705981902334, |
| "learning_rate": 9.249522545966427e-06, |
| "loss": 0.12095551490783692, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.0406961178045515, |
| "grad_norm": 0.40601150502203404, |
| "learning_rate": 9.239646230544741e-06, |
| "loss": 0.14402755498886108, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.0449799196787148, |
| "grad_norm": 0.3559777449007349, |
| "learning_rate": 9.229710697620462e-06, |
| "loss": 0.1495804786682129, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.0492637215528782, |
| "grad_norm": 0.36896684434500243, |
| "learning_rate": 9.219716085968716e-06, |
| "loss": 0.12875673770904542, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.0535475234270415, |
| "grad_norm": 0.39146972255890167, |
| "learning_rate": 9.209662535189814e-06, |
| "loss": 0.13340590000152588, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.0578313253012048, |
| "grad_norm": 0.40291541972442413, |
| "learning_rate": 9.199550185707309e-06, |
| "loss": 0.1337528109550476, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.0621151271753682, |
| "grad_norm": 0.37956437532491505, |
| "learning_rate": 9.189379178766022e-06, |
| "loss": 0.12576285600662232, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.0663989290495315, |
| "grad_norm": 0.41298961387679495, |
| "learning_rate": 9.179149656430077e-06, |
| "loss": 0.1333579182624817, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.0706827309236948, |
| "grad_norm": 0.36210102393181387, |
| "learning_rate": 9.168861761580916e-06, |
| "loss": 0.13212097883224488, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0706827309236948, |
| "eval_loss": 0.15047596395015717, |
| "eval_runtime": 816.7373, |
| "eval_samples_per_second": 20.325, |
| "eval_steps_per_second": 5.081, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0749665327978581, |
| "grad_norm": 0.3726254379576281, |
| "learning_rate": 9.158515637915303e-06, |
| "loss": 0.12463078498840333, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.0792503346720215, |
| "grad_norm": 0.37712160221949104, |
| "learning_rate": 9.148111429943316e-06, |
| "loss": 0.12076478004455567, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.0835341365461848, |
| "grad_norm": 0.34263415579260603, |
| "learning_rate": 9.137649282986326e-06, |
| "loss": 0.11901497840881348, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.0878179384203481, |
| "grad_norm": 0.43199587554265134, |
| "learning_rate": 9.127129343174974e-06, |
| "loss": 0.1473910093307495, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.0921017402945115, |
| "grad_norm": 0.3888436375726906, |
| "learning_rate": 9.116551757447124e-06, |
| "loss": 0.12526917457580566, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.0963855421686748, |
| "grad_norm": 0.3720699240255782, |
| "learning_rate": 9.105916673545811e-06, |
| "loss": 0.12781134843826295, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1006693440428381, |
| "grad_norm": 0.3528738970780735, |
| "learning_rate": 9.095224240017187e-06, |
| "loss": 0.12412866353988647, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1049531459170012, |
| "grad_norm": 0.4631292939251323, |
| "learning_rate": 9.084474606208426e-06, |
| "loss": 0.11998128890991211, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1092369477911648, |
| "grad_norm": 0.3596397164936987, |
| "learning_rate": 9.073667922265659e-06, |
| "loss": 0.13821544647216796, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1135207496653279, |
| "grad_norm": 0.37491061752134996, |
| "learning_rate": 9.062804339131865e-06, |
| "loss": 0.12905315160751343, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1178045515394912, |
| "grad_norm": 0.33236296782840824, |
| "learning_rate": 9.051884008544769e-06, |
| "loss": 0.11152592897415162, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1220883534136545, |
| "grad_norm": 0.41099647558351027, |
| "learning_rate": 9.040907083034714e-06, |
| "loss": 0.120727276802063, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1263721552878179, |
| "grad_norm": 0.3859893211528485, |
| "learning_rate": 9.02987371592254e-06, |
| "loss": 0.14195597171783447, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.1306559571619812, |
| "grad_norm": 0.36839784644083184, |
| "learning_rate": 9.018784061317434e-06, |
| "loss": 0.12041090726852417, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.1349397590361445, |
| "grad_norm": 0.41679177088273905, |
| "learning_rate": 9.007638274114787e-06, |
| "loss": 0.13752386569976807, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.1392235609103079, |
| "grad_norm": 0.38785605712752647, |
| "learning_rate": 8.996436509994022e-06, |
| "loss": 0.12111247777938842, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.1435073627844712, |
| "grad_norm": 0.37367256419499406, |
| "learning_rate": 8.985178925416424e-06, |
| "loss": 0.13275750875473022, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.1477911646586345, |
| "grad_norm": 0.3674681161529881, |
| "learning_rate": 8.973865677622954e-06, |
| "loss": 0.13491373062133788, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.1520749665327978, |
| "grad_norm": 0.34447615774959234, |
| "learning_rate": 8.962496924632051e-06, |
| "loss": 0.13558318614959716, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.1563587684069612, |
| "grad_norm": 0.3564170987558211, |
| "learning_rate": 8.951072825237426e-06, |
| "loss": 0.1193579912185669, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.1606425702811245, |
| "grad_norm": 0.35305355840674923, |
| "learning_rate": 8.939593539005842e-06, |
| "loss": 0.13529754877090455, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.1649263721552878, |
| "grad_norm": 0.35352573268560833, |
| "learning_rate": 8.928059226274894e-06, |
| "loss": 0.12423286437988282, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.1692101740294512, |
| "grad_norm": 0.36689834147420736, |
| "learning_rate": 8.916470048150756e-06, |
| "loss": 0.13518364429473878, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.1734939759036145, |
| "grad_norm": 0.38162988673475534, |
| "learning_rate": 8.90482616650594e-06, |
| "loss": 0.13908401727676392, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.1777777777777778, |
| "grad_norm": 0.40336299975505086, |
| "learning_rate": 8.893127743977036e-06, |
| "loss": 0.13255722522735597, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.1820615796519411, |
| "grad_norm": 0.3324644606155607, |
| "learning_rate": 8.881374943962426e-06, |
| "loss": 0.1357291579246521, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.1863453815261045, |
| "grad_norm": 0.3994255780678427, |
| "learning_rate": 8.869567930620027e-06, |
| "loss": 0.12042539119720459, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.1906291834002678, |
| "grad_norm": 0.35581614764806313, |
| "learning_rate": 8.857706868864977e-06, |
| "loss": 0.13282716274261475, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.1949129852744311, |
| "grad_norm": 0.3106663113756059, |
| "learning_rate": 8.845791924367334e-06, |
| "loss": 0.12471635341644287, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.1991967871485945, |
| "grad_norm": 0.4256051698707425, |
| "learning_rate": 8.833823263549775e-06, |
| "loss": 0.11954612731933593, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.2034805890227578, |
| "grad_norm": 0.41689423223672023, |
| "learning_rate": 8.821801053585254e-06, |
| "loss": 0.12010161876678467, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.2077643908969211, |
| "grad_norm": 0.3817725350186758, |
| "learning_rate": 8.809725462394684e-06, |
| "loss": 0.11917848587036133, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.2120481927710842, |
| "grad_norm": 0.35927209564755835, |
| "learning_rate": 8.797596658644581e-06, |
| "loss": 0.12020325660705566, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.2163319946452478, |
| "grad_norm": 0.4238739504322855, |
| "learning_rate": 8.785414811744703e-06, |
| "loss": 0.13289868831634521, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.2206157965194109, |
| "grad_norm": 0.39167399451224444, |
| "learning_rate": 8.773180091845701e-06, |
| "loss": 0.12138681411743164, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.2248995983935742, |
| "grad_norm": 0.3947355797116567, |
| "learning_rate": 8.760892669836729e-06, |
| "loss": 0.14103634357452394, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.2291834002677375, |
| "grad_norm": 0.39740732729868383, |
| "learning_rate": 8.74855271734306e-06, |
| "loss": 0.13904783725738526, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.2334672021419009, |
| "grad_norm": 0.41730678044784, |
| "learning_rate": 8.736160406723688e-06, |
| "loss": 0.12443190813064575, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.2377510040160642, |
| "grad_norm": 0.3765448851707534, |
| "learning_rate": 8.723715911068931e-06, |
| "loss": 0.1321355938911438, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.2420348058902275, |
| "grad_norm": 0.3634462539369135, |
| "learning_rate": 8.71121940419799e-06, |
| "loss": 0.14078364372253419, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2463186077643909, |
| "grad_norm": 0.4094668843551737, |
| "learning_rate": 8.698671060656549e-06, |
| "loss": 0.13006095886230468, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.2506024096385542, |
| "grad_norm": 0.37644871257316387, |
| "learning_rate": 8.686071055714318e-06, |
| "loss": 0.12324719429016114, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.2548862115127175, |
| "grad_norm": 0.4032403895979568, |
| "learning_rate": 8.673419565362587e-06, |
| "loss": 0.14000382423400878, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.2591700133868808, |
| "grad_norm": 0.4398800669174728, |
| "learning_rate": 8.660716766311778e-06, |
| "loss": 0.11818475723266601, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.2634538152610442, |
| "grad_norm": 0.3615959969952865, |
| "learning_rate": 8.647962835988968e-06, |
| "loss": 0.1338767886161804, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.2677376171352075, |
| "grad_norm": 0.31737564808536584, |
| "learning_rate": 8.635157952535411e-06, |
| "loss": 0.1270219087600708, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.2720214190093708, |
| "grad_norm": 0.3582723999510731, |
| "learning_rate": 8.622302294804052e-06, |
| "loss": 0.12293977737426758, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.2763052208835342, |
| "grad_norm": 0.35972611924117803, |
| "learning_rate": 8.609396042357033e-06, |
| "loss": 0.12699666023254394, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.2805890227576975, |
| "grad_norm": 0.4025263144287077, |
| "learning_rate": 8.596439375463174e-06, |
| "loss": 0.13745148181915284, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.2848728246318608, |
| "grad_norm": 0.3227213368538748, |
| "learning_rate": 8.583432475095468e-06, |
| "loss": 0.11785190105438233, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2848728246318608, |
| "eval_loss": 0.1485673487186432, |
| "eval_runtime": 812.0391, |
| "eval_samples_per_second": 20.442, |
| "eval_steps_per_second": 5.111, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2891566265060241, |
| "grad_norm": 0.35470826251565785, |
| "learning_rate": 8.570375522928543e-06, |
| "loss": 0.12998595237731933, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.2934404283801875, |
| "grad_norm": 0.3775363159731956, |
| "learning_rate": 8.55726870133613e-06, |
| "loss": 0.11246494054794312, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.2977242302543508, |
| "grad_norm": 0.36015262199345144, |
| "learning_rate": 8.544112193388513e-06, |
| "loss": 0.1255005955696106, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.3020080321285141, |
| "grad_norm": 0.32095363304273905, |
| "learning_rate": 8.530906182849971e-06, |
| "loss": 0.14123222827911378, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.3062918340026775, |
| "grad_norm": 0.35963777187492285, |
| "learning_rate": 8.51765085417622e-06, |
| "loss": 0.12764023542404174, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.3105756358768406, |
| "grad_norm": 0.3545121600646447, |
| "learning_rate": 8.504346392511824e-06, |
| "loss": 0.12473820447921753, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.3148594377510041, |
| "grad_norm": 0.3752599966671012, |
| "learning_rate": 8.490992983687617e-06, |
| "loss": 0.12995026111602784, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.3191432396251672, |
| "grad_norm": 0.3326424253698993, |
| "learning_rate": 8.477590814218104e-06, |
| "loss": 0.13189772367477418, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.3234270414993308, |
| "grad_norm": 0.33944521013309487, |
| "learning_rate": 8.464140071298858e-06, |
| "loss": 0.12935359477996827, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.3277108433734939, |
| "grad_norm": 0.37010591984301416, |
| "learning_rate": 8.450640942803904e-06, |
| "loss": 0.13249437808990477, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3319946452476574, |
| "grad_norm": 0.3615034420980659, |
| "learning_rate": 8.437093617283099e-06, |
| "loss": 0.12562718391418456, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.3362784471218205, |
| "grad_norm": 0.3917767190914898, |
| "learning_rate": 8.423498283959487e-06, |
| "loss": 0.13038911819458007, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.3405622489959839, |
| "grad_norm": 0.38109850464604067, |
| "learning_rate": 8.40985513272667e-06, |
| "loss": 0.13978877067565917, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.3448460508701472, |
| "grad_norm": 0.4445890595042772, |
| "learning_rate": 8.39616435414615e-06, |
| "loss": 0.13834033012390137, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.3491298527443105, |
| "grad_norm": 0.31874973345433283, |
| "learning_rate": 8.38242613944466e-06, |
| "loss": 0.1258203625679016, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.3534136546184738, |
| "grad_norm": 0.33127205404029225, |
| "learning_rate": 8.368640680511507e-06, |
| "loss": 0.12356986999511718, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.3576974564926372, |
| "grad_norm": 0.416559211705474, |
| "learning_rate": 8.35480816989588e-06, |
| "loss": 0.11982156038284301, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.3619812583668005, |
| "grad_norm": 0.3660453384090912, |
| "learning_rate": 8.34092880080417e-06, |
| "loss": 0.11788184642791748, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.3662650602409638, |
| "grad_norm": 0.34339125686903177, |
| "learning_rate": 8.32700276709726e-06, |
| "loss": 0.13102638721466064, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.3705488621151272, |
| "grad_norm": 0.35489193494077403, |
| "learning_rate": 8.313030263287825e-06, |
| "loss": 0.1122696876525879, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3748326639892905, |
| "grad_norm": 0.3746174683003833, |
| "learning_rate": 8.299011484537621e-06, |
| "loss": 0.1276139497756958, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.3791164658634538, |
| "grad_norm": 0.7123969948931433, |
| "learning_rate": 8.284946626654743e-06, |
| "loss": 0.1328984022140503, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.3834002677376172, |
| "grad_norm": 0.3822847406441411, |
| "learning_rate": 8.270835886090901e-06, |
| "loss": 0.11024882793426513, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.3876840696117805, |
| "grad_norm": 0.300720958006405, |
| "learning_rate": 8.256679459938681e-06, |
| "loss": 0.11192436218261718, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.3919678714859438, |
| "grad_norm": 0.36933913785412426, |
| "learning_rate": 8.242477545928775e-06, |
| "loss": 0.1279488682746887, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.3962516733601071, |
| "grad_norm": 0.3733629104677544, |
| "learning_rate": 8.228230342427237e-06, |
| "loss": 0.12411469221115112, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.4005354752342705, |
| "grad_norm": 0.37527660608807045, |
| "learning_rate": 8.213938048432697e-06, |
| "loss": 0.12071568965911865, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.4048192771084338, |
| "grad_norm": 0.32477552350056993, |
| "learning_rate": 8.199600863573599e-06, |
| "loss": 0.10580611228942871, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.4091030789825971, |
| "grad_norm": 0.4717886686044222, |
| "learning_rate": 8.185218988105392e-06, |
| "loss": 0.14088404178619385, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.4133868808567605, |
| "grad_norm": 0.3538053722734618, |
| "learning_rate": 8.170792622907751e-06, |
| "loss": 0.14626517295837402, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.4176706827309236, |
| "grad_norm": 0.3513040684652719, |
| "learning_rate": 8.156321969481762e-06, |
| "loss": 0.11440718173980713, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.421954484605087, |
| "grad_norm": 0.3591684736408224, |
| "learning_rate": 8.14180722994711e-06, |
| "loss": 0.13487778902053832, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.4262382864792502, |
| "grad_norm": 0.3658104845595591, |
| "learning_rate": 8.127248607039254e-06, |
| "loss": 0.11574537754058838, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.4305220883534138, |
| "grad_norm": 0.3197726257189657, |
| "learning_rate": 8.112646304106593e-06, |
| "loss": 0.12187765836715699, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.4348058902275769, |
| "grad_norm": 0.3846940368635854, |
| "learning_rate": 8.09800052510764e-06, |
| "loss": 0.11478321552276612, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.4390896921017404, |
| "grad_norm": 0.4176339612370988, |
| "learning_rate": 8.08331147460815e-06, |
| "loss": 0.1217038869857788, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.4433734939759035, |
| "grad_norm": 0.37885782340374674, |
| "learning_rate": 8.068579357778284e-06, |
| "loss": 0.12176965475082398, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.4476572958500669, |
| "grad_norm": 0.41287039590285307, |
| "learning_rate": 8.053804380389728e-06, |
| "loss": 0.12061818838119506, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.4519410977242302, |
| "grad_norm": 0.3494750223733423, |
| "learning_rate": 8.038986748812832e-06, |
| "loss": 0.131140398979187, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.4562248995983935, |
| "grad_norm": 0.3952195504175884, |
| "learning_rate": 8.024126670013716e-06, |
| "loss": 0.11915416717529297, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.4605087014725568, |
| "grad_norm": 0.3737316963804442, |
| "learning_rate": 8.009224351551386e-06, |
| "loss": 0.11794298887252808, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.4647925033467202, |
| "grad_norm": 0.3446468035702987, |
| "learning_rate": 7.99428000157483e-06, |
| "loss": 0.1277950167655945, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.4690763052208835, |
| "grad_norm": 0.37272918562452995, |
| "learning_rate": 7.979293828820119e-06, |
| "loss": 0.14721099138259888, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.4733601070950468, |
| "grad_norm": 0.33085072922732706, |
| "learning_rate": 7.96426604260748e-06, |
| "loss": 0.11756453514099122, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.4776439089692102, |
| "grad_norm": 0.320178401178284, |
| "learning_rate": 7.949196852838383e-06, |
| "loss": 0.1269507050514221, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.4819277108433735, |
| "grad_norm": 0.3580459421820677, |
| "learning_rate": 7.934086469992605e-06, |
| "loss": 0.1412634253501892, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.4862115127175368, |
| "grad_norm": 0.36913989344261383, |
| "learning_rate": 7.918935105125283e-06, |
| "loss": 0.15048539638519287, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.4904953145917001, |
| "grad_norm": 0.38425139309308326, |
| "learning_rate": 7.903742969863982e-06, |
| "loss": 0.13397784233093263, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.4947791164658635, |
| "grad_norm": 0.3627531984044689, |
| "learning_rate": 7.88851027640572e-06, |
| "loss": 0.11737120151519775, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.4990629183400268, |
| "grad_norm": 0.30678086877528343, |
| "learning_rate": 7.873237237514024e-06, |
| "loss": 0.1271947741508484, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.4990629183400268, |
| "eval_loss": 0.14634032547473907, |
| "eval_runtime": 11508.5453, |
| "eval_samples_per_second": 1.442, |
| "eval_steps_per_second": 0.361, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5033467202141901, |
| "grad_norm": 0.3989430501599751, |
| "learning_rate": 7.857924066515941e-06, |
| "loss": 0.1253154993057251, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.5076305220883535, |
| "grad_norm": 0.3712393247049027, |
| "learning_rate": 7.842570977299067e-06, |
| "loss": 0.13159399032592772, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.5119143239625168, |
| "grad_norm": 0.39043985321189406, |
| "learning_rate": 7.827178184308559e-06, |
| "loss": 0.12818803787231445, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.51619812583668, |
| "grad_norm": 0.3783719248133356, |
| "learning_rate": 7.81174590254414e-06, |
| "loss": 0.12482264041900634, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.5204819277108435, |
| "grad_norm": 0.33627341086836304, |
| "learning_rate": 7.796274347557094e-06, |
| "loss": 0.1259792685508728, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.5247657295850066, |
| "grad_norm": 0.3307003231873695, |
| "learning_rate": 7.780763735447252e-06, |
| "loss": 0.11816374063491822, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.52904953145917, |
| "grad_norm": 0.41275730039950287, |
| "learning_rate": 7.765214282859981e-06, |
| "loss": 0.12664893865585328, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.3033638102712773, |
| "learning_rate": 7.749626206983157e-06, |
| "loss": 0.1236607551574707, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.5376171352074968, |
| "grad_norm": 0.3554700928985279, |
| "learning_rate": 7.733999725544126e-06, |
| "loss": 0.12761454582214354, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.5419009370816599, |
| "grad_norm": 0.35291611398156203, |
| "learning_rate": 7.718335056806665e-06, |
| "loss": 0.1287233352661133, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.5461847389558234, |
| "grad_norm": 0.3567309323303257, |
| "learning_rate": 7.702632419567937e-06, |
| "loss": 0.14273253679275513, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.5504685408299865, |
| "grad_norm": 0.3185688429925057, |
| "learning_rate": 7.68689203315543e-06, |
| "loss": 0.12159850597381591, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.55475234270415, |
| "grad_norm": 0.36346114221079345, |
| "learning_rate": 7.671114117423896e-06, |
| "loss": 0.12236592769622803, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.5590361445783132, |
| "grad_norm": 0.28944951523091206, |
| "learning_rate": 7.655298892752281e-06, |
| "loss": 0.1200286865234375, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.5633199464524767, |
| "grad_norm": 0.3320579302602756, |
| "learning_rate": 7.639446580040647e-06, |
| "loss": 0.13653804063796998, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.5676037483266398, |
| "grad_norm": 0.3367309725103469, |
| "learning_rate": 7.623557400707081e-06, |
| "loss": 0.12761712074279785, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.5718875502008032, |
| "grad_norm": 0.36883406849675304, |
| "learning_rate": 7.607631576684611e-06, |
| "loss": 0.12503886222839355, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.5761713520749665, |
| "grad_norm": 0.35021731907363346, |
| "learning_rate": 7.5916693304181e-06, |
| "loss": 0.11194202899932862, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.5804551539491298, |
| "grad_norm": 0.3466816731323651, |
| "learning_rate": 7.575670884861142e-06, |
| "loss": 0.11533315181732177, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.5847389558232932, |
| "grad_norm": 0.3695468619685566, |
| "learning_rate": 7.559636463472941e-06, |
| "loss": 0.12558252811431886, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.5890227576974565, |
| "grad_norm": 0.33317140225660996, |
| "learning_rate": 7.543566290215205e-06, |
| "loss": 0.11223011016845703, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.5933065595716198, |
| "grad_norm": 0.3802726049715593, |
| "learning_rate": 7.5274605895490014e-06, |
| "loss": 0.11428353786468506, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.5975903614457831, |
| "grad_norm": 0.3502543345535625, |
| "learning_rate": 7.511319586431631e-06, |
| "loss": 0.12747797966003419, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.6018741633199465, |
| "grad_norm": 0.48600388474175416, |
| "learning_rate": 7.495143506313484e-06, |
| "loss": 0.12503063678741455, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.6061579651941098, |
| "grad_norm": 0.402765639804346, |
| "learning_rate": 7.478932575134887e-06, |
| "loss": 0.1338959217071533, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.6104417670682731, |
| "grad_norm": 0.3921866593643898, |
| "learning_rate": 7.462687019322957e-06, |
| "loss": 0.11669353246688843, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.6147255689424365, |
| "grad_norm": 0.33652188082752615, |
| "learning_rate": 7.446407065788428e-06, |
| "loss": 0.12007842063903809, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.6190093708165998, |
| "grad_norm": 0.4118194067707435, |
| "learning_rate": 7.4300929419224866e-06, |
| "loss": 0.12169758081436158, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.623293172690763, |
| "grad_norm": 0.36857356877163894, |
| "learning_rate": 7.413744875593597e-06, |
| "loss": 0.12564884424209594, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.6275769745649264, |
| "grad_norm": 0.350693413841003, |
| "learning_rate": 7.397363095144318e-06, |
| "loss": 0.12418256998062134, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.6318607764390896, |
| "grad_norm": 0.3712677998496879, |
| "learning_rate": 7.380947829388108e-06, |
| "loss": 0.12151600122451782, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.636144578313253, |
| "grad_norm": 0.4183039288576934, |
| "learning_rate": 7.364499307606136e-06, |
| "loss": 0.11588020324707031, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.6404283801874162, |
| "grad_norm": 0.39624838378484395, |
| "learning_rate": 7.348017759544075e-06, |
| "loss": 0.12545753717422486, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.6447121820615798, |
| "grad_norm": 0.335543915765519, |
| "learning_rate": 7.331503415408899e-06, |
| "loss": 0.11865659952163696, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.6489959839357429, |
| "grad_norm": 0.30699590589486353, |
| "learning_rate": 7.3149565058656545e-06, |
| "loss": 0.11257133483886719, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.6532797858099064, |
| "grad_norm": 0.4211864176178027, |
| "learning_rate": 7.298377262034258e-06, |
| "loss": 0.12412948608398437, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.6575635876840695, |
| "grad_norm": 0.5480668142726313, |
| "learning_rate": 7.281765915486247e-06, |
| "loss": 0.11110868453979492, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.661847389558233, |
| "grad_norm": 0.38707346036306395, |
| "learning_rate": 7.265122698241562e-06, |
| "loss": 0.1353888154029846, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.6661311914323962, |
| "grad_norm": 0.3496172916459521, |
| "learning_rate": 7.248447842765298e-06, |
| "loss": 0.12294532060623169, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.6704149933065597, |
| "grad_norm": 0.3178905513170639, |
| "learning_rate": 7.231741581964455e-06, |
| "loss": 0.11635351181030273, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.6746987951807228, |
| "grad_norm": 0.37730744546548595, |
| "learning_rate": 7.2150041491846965e-06, |
| "loss": 0.13707247972488404, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.6789825970548862, |
| "grad_norm": 0.4258774014748926, |
| "learning_rate": 7.198235778207072e-06, |
| "loss": 0.11108559370040894, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.6832663989290495, |
| "grad_norm": 0.3269689561831232, |
| "learning_rate": 7.181436703244773e-06, |
| "loss": 0.13123619556427002, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.6875502008032128, |
| "grad_norm": 0.3328432989440898, |
| "learning_rate": 7.1646071589398406e-06, |
| "loss": 0.11167018413543701, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.6918340026773762, |
| "grad_norm": 0.3844316794696797, |
| "learning_rate": 7.147747380359905e-06, |
| "loss": 0.11800698041915894, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.6961178045515395, |
| "grad_norm": 0.33099179444642823, |
| "learning_rate": 7.130857602994894e-06, |
| "loss": 0.13457157611846923, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.7004016064257028, |
| "grad_norm": 0.3147285218500962, |
| "learning_rate": 7.113938062753742e-06, |
| "loss": 0.13172318935394287, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.7046854082998661, |
| "grad_norm": 0.3592833207498237, |
| "learning_rate": 7.0969889959611045e-06, |
| "loss": 0.1196314811706543, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.7089692101740295, |
| "grad_norm": 0.29811223409083043, |
| "learning_rate": 7.080010639354045e-06, |
| "loss": 0.11256670951843262, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.7132530120481928, |
| "grad_norm": 0.4270868815948092, |
| "learning_rate": 7.063003230078734e-06, |
| "loss": 0.12309803962707519, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.7132530120481928, |
| "eval_loss": 0.144321471452713, |
| "eval_runtime": 817.2721, |
| "eval_samples_per_second": 20.311, |
| "eval_steps_per_second": 5.078, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.7175368139223561, |
| "grad_norm": 0.3733167797076492, |
| "learning_rate": 7.045967005687141e-06, |
| "loss": 0.11690073013305664, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.7218206157965195, |
| "grad_norm": 0.32612931848843507, |
| "learning_rate": 7.028902204133711e-06, |
| "loss": 0.1235615611076355, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.7261044176706828, |
| "grad_norm": 0.3537546537362819, |
| "learning_rate": 7.011809063772038e-06, |
| "loss": 0.1282111883163452, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.730388219544846, |
| "grad_norm": 0.41855495134878623, |
| "learning_rate": 6.994687823351547e-06, |
| "loss": 0.13276000022888185, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.7346720214190094, |
| "grad_norm": 0.3640723677373699, |
| "learning_rate": 6.9775387220141465e-06, |
| "loss": 0.12338956594467163, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.7389558232931726, |
| "grad_norm": 0.348482478201222, |
| "learning_rate": 6.960361999290894e-06, |
| "loss": 0.1142328143119812, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.743239625167336, |
| "grad_norm": 0.41291989661610773, |
| "learning_rate": 6.943157895098656e-06, |
| "loss": 0.12496788501739502, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.7475234270414992, |
| "grad_norm": 0.31746340210362767, |
| "learning_rate": 6.925926649736745e-06, |
| "loss": 0.11045465469360352, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.7518072289156628, |
| "grad_norm": 0.32179304285895316, |
| "learning_rate": 6.9086685038835725e-06, |
| "loss": 0.13367241621017456, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.7560910307898259, |
| "grad_norm": 0.3467502021616522, |
| "learning_rate": 6.891383698593283e-06, |
| "loss": 0.11450705528259278, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.7603748326639894, |
| "grad_norm": 0.37824785627911034, |
| "learning_rate": 6.874072475292388e-06, |
| "loss": 0.11085845232009887, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.7646586345381525, |
| "grad_norm": 0.33242640245264393, |
| "learning_rate": 6.856735075776395e-06, |
| "loss": 0.12101356983184815, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.768942436412316, |
| "grad_norm": 0.3295693613929198, |
| "learning_rate": 6.839371742206432e-06, |
| "loss": 0.11143279075622559, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.7732262382864792, |
| "grad_norm": 0.41043258389255455, |
| "learning_rate": 6.821982717105855e-06, |
| "loss": 0.11657199859619141, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.7775100401606427, |
| "grad_norm": 0.3336241961556357, |
| "learning_rate": 6.804568243356876e-06, |
| "loss": 0.12107970714569091, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.7817938420348058, |
| "grad_norm": 0.404764797519025, |
| "learning_rate": 6.7871285641971576e-06, |
| "loss": 0.12142288684844971, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.7860776439089692, |
| "grad_norm": 0.35528280014790076, |
| "learning_rate": 6.769663923216419e-06, |
| "loss": 0.14445422887802123, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.7903614457831325, |
| "grad_norm": 0.36424811344112645, |
| "learning_rate": 6.75217456435304e-06, |
| "loss": 0.11748452186584472, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.7946452476572958, |
| "grad_norm": 0.3580564279402089, |
| "learning_rate": 6.734660731890645e-06, |
| "loss": 0.11877243518829346, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.7989290495314592, |
| "grad_norm": 0.3945693311810663, |
| "learning_rate": 6.717122670454701e-06, |
| "loss": 0.12274388074874878, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.8032128514056225, |
| "grad_norm": 0.3274495553953029, |
| "learning_rate": 6.699560625009085e-06, |
| "loss": 0.11418673992156983, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.8074966532797858, |
| "grad_norm": 0.43405948322435506, |
| "learning_rate": 6.6819748408526775e-06, |
| "loss": 0.11989142894744872, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.8117804551539491, |
| "grad_norm": 0.34302792735595455, |
| "learning_rate": 6.6643655636159325e-06, |
| "loss": 0.10752333402633667, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.8160642570281125, |
| "grad_norm": 0.38396837748701773, |
| "learning_rate": 6.646733039257442e-06, |
| "loss": 0.12758421897888184, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.8203480589022758, |
| "grad_norm": 0.35943197748111966, |
| "learning_rate": 6.629077514060501e-06, |
| "loss": 0.11687214374542236, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.8246318607764391, |
| "grad_norm": 0.33900665564961463, |
| "learning_rate": 6.611399234629679e-06, |
| "loss": 0.1235961675643921, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.8289156626506025, |
| "grad_norm": 0.36539098779168305, |
| "learning_rate": 6.593698447887357e-06, |
| "loss": 0.12241628170013427, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.8331994645247658, |
| "grad_norm": 0.38361329899883734, |
| "learning_rate": 6.575975401070291e-06, |
| "loss": 0.12448443174362182, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.837483266398929, |
| "grad_norm": 0.4014122394041882, |
| "learning_rate": 6.5582303417261605e-06, |
| "loss": 0.1193004846572876, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.8417670682730924, |
| "grad_norm": 0.3678903848404944, |
| "learning_rate": 6.540463517710099e-06, |
| "loss": 0.1212453842163086, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.8460508701472556, |
| "grad_norm": 0.3251163301086072, |
| "learning_rate": 6.5226751771812476e-06, |
| "loss": 0.12798908948898316, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.850334672021419, |
| "grad_norm": 0.3415099254328554, |
| "learning_rate": 6.5048655685992705e-06, |
| "loss": 0.13018690347671508, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.8546184738955822, |
| "grad_norm": 0.3905905047279772, |
| "learning_rate": 6.487034940720902e-06, |
| "loss": 0.12057719230651856, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.8589022757697458, |
| "grad_norm": 0.3646836032160996, |
| "learning_rate": 6.469183542596464e-06, |
| "loss": 0.13052282333374024, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.8631860776439089, |
| "grad_norm": 0.33435104754269, |
| "learning_rate": 6.451311623566386e-06, |
| "loss": 0.11543186902999877, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.8674698795180724, |
| "grad_norm": 0.3562601136655919, |
| "learning_rate": 6.433419433257726e-06, |
| "loss": 0.12250864505767822, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.8717536813922355, |
| "grad_norm": 0.3226539154934918, |
| "learning_rate": 6.415507221580678e-06, |
| "loss": 0.12082786560058593, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.876037483266399, |
| "grad_norm": 0.3635681601652211, |
| "learning_rate": 6.397575238725091e-06, |
| "loss": 0.12619302272796631, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.8803212851405622, |
| "grad_norm": 0.3607934399845053, |
| "learning_rate": 6.379623735156968e-06, |
| "loss": 0.12855522632598876, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.8846050870147257, |
| "grad_norm": 0.33220984445822355, |
| "learning_rate": 6.361652961614966e-06, |
| "loss": 0.11576036214828492, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.3957629269071009, |
| "learning_rate": 6.343663169106897e-06, |
| "loss": 0.12123892307281495, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.8931726907630522, |
| "grad_norm": 0.38648528140436955, |
| "learning_rate": 6.325654608906228e-06, |
| "loss": 0.13391902446746826, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.8974564926372155, |
| "grad_norm": 0.322831029116286, |
| "learning_rate": 6.307627532548554e-06, |
| "loss": 0.11682146787643433, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.9017402945113788, |
| "grad_norm": 0.34943896220332243, |
| "learning_rate": 6.289582191828102e-06, |
| "loss": 0.10885384082794189, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.9060240963855422, |
| "grad_norm": 0.3885943387224764, |
| "learning_rate": 6.2715188387942085e-06, |
| "loss": 0.11223304271697998, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.9103078982597055, |
| "grad_norm": 0.336737608622642, |
| "learning_rate": 6.253437725747795e-06, |
| "loss": 0.11982736587524415, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.9145917001338688, |
| "grad_norm": 0.3692087496930761, |
| "learning_rate": 6.235339105237849e-06, |
| "loss": 0.12470091581344604, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.9188755020080321, |
| "grad_norm": 0.39542747620483304, |
| "learning_rate": 6.217223230057891e-06, |
| "loss": 0.1260706901550293, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.9231593038821955, |
| "grad_norm": 0.36423974792020714, |
| "learning_rate": 6.199090353242452e-06, |
| "loss": 0.11962894201278687, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.9274431057563588, |
| "grad_norm": 0.36962654876484385, |
| "learning_rate": 6.18094072806353e-06, |
| "loss": 0.11819722652435302, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.9274431057563588, |
| "eval_loss": 0.14271628856658936, |
| "eval_runtime": 1019.4039, |
| "eval_samples_per_second": 16.284, |
| "eval_steps_per_second": 4.071, |
| "step": 4500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 9340, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 481757134651392.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|