| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997450352701209, | |
| "eval_steps": 500, | |
| "global_step": 3529, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002832941443100371, | |
| "grad_norm": 174.56146240234375, | |
| "learning_rate": 9.433962264150944e-07, | |
| "loss": 17.5632, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005665882886200742, | |
| "grad_norm": 99.76522064208984, | |
| "learning_rate": 1.8867924528301889e-06, | |
| "loss": 16.5212, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008498824329301113, | |
| "grad_norm": 61.843570709228516, | |
| "learning_rate": 2.830188679245283e-06, | |
| "loss": 14.66, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011331765772401484, | |
| "grad_norm": 58.26314926147461, | |
| "learning_rate": 3.7735849056603777e-06, | |
| "loss": 12.8877, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014164707215501856, | |
| "grad_norm": 64.67306518554688, | |
| "learning_rate": 4.716981132075472e-06, | |
| "loss": 12.6102, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.016997648658602225, | |
| "grad_norm": 40.95338439941406, | |
| "learning_rate": 5.660377358490566e-06, | |
| "loss": 10.3169, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.019830590101702596, | |
| "grad_norm": 59.99547576904297, | |
| "learning_rate": 6.60377358490566e-06, | |
| "loss": 10.4793, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.022663531544802967, | |
| "grad_norm": 69.4089584350586, | |
| "learning_rate": 7.5471698113207555e-06, | |
| "loss": 10.2298, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02549647298790334, | |
| "grad_norm": 38.2027702331543, | |
| "learning_rate": 8.49056603773585e-06, | |
| "loss": 8.5679, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.028329414431003713, | |
| "grad_norm": 41.05867004394531, | |
| "learning_rate": 9.433962264150944e-06, | |
| "loss": 8.6834, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.031162355874104083, | |
| "grad_norm": 45.03547286987305, | |
| "learning_rate": 9.999966306552455e-06, | |
| "loss": 7.8071, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03399529731720445, | |
| "grad_norm": 43.03623580932617, | |
| "learning_rate": 9.999587260482597e-06, | |
| "loss": 8.2945, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.036828238760304825, | |
| "grad_norm": 37.273048400878906, | |
| "learning_rate": 9.998787083568112e-06, | |
| "loss": 8.0273, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03966118020340519, | |
| "grad_norm": 55.94173049926758, | |
| "learning_rate": 9.997565843210401e-06, | |
| "loss": 7.1597, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04249412164650557, | |
| "grad_norm": 50.30799102783203, | |
| "learning_rate": 9.995923642278351e-06, | |
| "loss": 7.2908, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.045327063089605935, | |
| "grad_norm": 53.113243103027344, | |
| "learning_rate": 9.993860619099673e-06, | |
| "loss": 5.9006, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04816000453270631, | |
| "grad_norm": 51.57769012451172, | |
| "learning_rate": 9.991376947449254e-06, | |
| "loss": 8.8304, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05099294597580668, | |
| "grad_norm": 38.17790985107422, | |
| "learning_rate": 9.988472836534509e-06, | |
| "loss": 8.6644, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05382588741890705, | |
| "grad_norm": 34.28203582763672, | |
| "learning_rate": 9.985148530977767e-06, | |
| "loss": 6.6272, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.056658828862007425, | |
| "grad_norm": 34.18405532836914, | |
| "learning_rate": 9.981404310795667e-06, | |
| "loss": 8.2372, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05949177030510779, | |
| "grad_norm": 40.83757400512695, | |
| "learning_rate": 9.97724049137556e-06, | |
| "loss": 5.3928, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06232471174820817, | |
| "grad_norm": 36.30077362060547, | |
| "learning_rate": 9.972657423448961e-06, | |
| "loss": 6.5196, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06515765319130853, | |
| "grad_norm": 38.03015899658203, | |
| "learning_rate": 9.96765549306199e-06, | |
| "loss": 7.1923, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0679905946344089, | |
| "grad_norm": 42.84525680541992, | |
| "learning_rate": 9.962235121542858e-06, | |
| "loss": 6.9849, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07082353607750928, | |
| "grad_norm": 34.071800231933594, | |
| "learning_rate": 9.956396765466382e-06, | |
| "loss": 7.0978, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07365647752060965, | |
| "grad_norm": 37.807029724121094, | |
| "learning_rate": 9.950140916615526e-06, | |
| "loss": 7.5077, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07648941896371002, | |
| "grad_norm": 36.55296325683594, | |
| "learning_rate": 9.943468101939968e-06, | |
| "loss": 6.6867, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07932236040681039, | |
| "grad_norm": 31.735977172851562, | |
| "learning_rate": 9.936378883511722e-06, | |
| "loss": 8.5626, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08215530184991077, | |
| "grad_norm": 51.97509002685547, | |
| "learning_rate": 9.92887385847779e-06, | |
| "loss": 7.1104, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08498824329301113, | |
| "grad_norm": 56.030025482177734, | |
| "learning_rate": 9.920953659009863e-06, | |
| "loss": 6.6099, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0878211847361115, | |
| "grad_norm": 37.08720016479492, | |
| "learning_rate": 9.912618952251071e-06, | |
| "loss": 4.9933, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09065412617921187, | |
| "grad_norm": 34.61451721191406, | |
| "learning_rate": 9.903870440259787e-06, | |
| "loss": 5.7727, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09348706762231225, | |
| "grad_norm": 35.92675018310547, | |
| "learning_rate": 9.89470885995049e-06, | |
| "loss": 4.8536, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09632000906541262, | |
| "grad_norm": 31.899490356445312, | |
| "learning_rate": 9.885134983031694e-06, | |
| "loss": 5.6988, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09915295050851299, | |
| "grad_norm": 37.71702194213867, | |
| "learning_rate": 9.875149615940943e-06, | |
| "loss": 8.0547, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10198589195161337, | |
| "grad_norm": 32.81459426879883, | |
| "learning_rate": 9.864753599776883e-06, | |
| "loss": 5.7466, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10481883339471373, | |
| "grad_norm": 34.12638854980469, | |
| "learning_rate": 9.853947810228416e-06, | |
| "loss": 6.3535, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1076517748378141, | |
| "grad_norm": 34.04792022705078, | |
| "learning_rate": 9.842733157500932e-06, | |
| "loss": 5.7424, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11048471628091447, | |
| "grad_norm": 33.2330322265625, | |
| "learning_rate": 9.831110586239643e-06, | |
| "loss": 5.4935, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11331765772401485, | |
| "grad_norm": 33.370567321777344, | |
| "learning_rate": 9.819081075450014e-06, | |
| "loss": 5.5257, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11615059916711522, | |
| "grad_norm": 34.091304779052734, | |
| "learning_rate": 9.806645638415302e-06, | |
| "loss": 6.1631, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11898354061021559, | |
| "grad_norm": 28.293777465820312, | |
| "learning_rate": 9.79380532261119e-06, | |
| "loss": 6.2594, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12181648205331595, | |
| "grad_norm": 31.169191360473633, | |
| "learning_rate": 9.780561209617569e-06, | |
| "loss": 5.428, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12464942349641633, | |
| "grad_norm": 31.67852783203125, | |
| "learning_rate": 9.766914415027426e-06, | |
| "loss": 6.3704, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1274823649395167, | |
| "grad_norm": 24.92995262145996, | |
| "learning_rate": 9.752866088352882e-06, | |
| "loss": 6.3413, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13031530638261707, | |
| "grad_norm": 30.267122268676758, | |
| "learning_rate": 9.738417412928348e-06, | |
| "loss": 6.1918, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13314824782571744, | |
| "grad_norm": 34.71146011352539, | |
| "learning_rate": 9.72356960581087e-06, | |
| "loss": 5.2388, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1359811892688178, | |
| "grad_norm": 32.87137985229492, | |
| "learning_rate": 9.7083239176776e-06, | |
| "loss": 4.2622, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13881413071191817, | |
| "grad_norm": 37.83769226074219, | |
| "learning_rate": 9.692681632720448e-06, | |
| "loss": 4.1838, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14164707215501857, | |
| "grad_norm": 29.84713363647461, | |
| "learning_rate": 9.676644068537915e-06, | |
| "loss": 6.1015, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14448001359811893, | |
| "grad_norm": 37.30936050415039, | |
| "learning_rate": 9.660212576024102e-06, | |
| "loss": 6.1121, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1473129550412193, | |
| "grad_norm": 43.746490478515625, | |
| "learning_rate": 9.64338853925493e-06, | |
| "loss": 6.0385, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.15014589648431967, | |
| "grad_norm": 32.99515151977539, | |
| "learning_rate": 9.62617337537154e-06, | |
| "loss": 6.1911, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.15297883792742004, | |
| "grad_norm": 46.957340240478516, | |
| "learning_rate": 9.608568534460938e-06, | |
| "loss": 5.822, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1558117793705204, | |
| "grad_norm": 33.011844635009766, | |
| "learning_rate": 9.590575499433837e-06, | |
| "loss": 7.1735, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15864472081362077, | |
| "grad_norm": 31.042083740234375, | |
| "learning_rate": 9.572195785899756e-06, | |
| "loss": 6.8695, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.16147766225672117, | |
| "grad_norm": 26.289737701416016, | |
| "learning_rate": 9.553430942039352e-06, | |
| "loss": 6.9135, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.16431060369982153, | |
| "grad_norm": 29.071701049804688, | |
| "learning_rate": 9.534282548474008e-06, | |
| "loss": 5.075, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.1671435451429219, | |
| "grad_norm": 46.65534973144531, | |
| "learning_rate": 9.514752218132703e-06, | |
| "loss": 7.0842, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16997648658602227, | |
| "grad_norm": 31.195234298706055, | |
| "learning_rate": 9.494841596116138e-06, | |
| "loss": 4.9555, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17280942802912264, | |
| "grad_norm": 26.25963020324707, | |
| "learning_rate": 9.474552359558167e-06, | |
| "loss": 8.0829, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.175642369472223, | |
| "grad_norm": 25.132131576538086, | |
| "learning_rate": 9.453886217484536e-06, | |
| "loss": 4.0549, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.17847531091532337, | |
| "grad_norm": 47.06711196899414, | |
| "learning_rate": 9.432844910668914e-06, | |
| "loss": 6.1125, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.18130825235842374, | |
| "grad_norm": 41.20506286621094, | |
| "learning_rate": 9.41143021148627e-06, | |
| "loss": 6.7009, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18414119380152413, | |
| "grad_norm": 27.037729263305664, | |
| "learning_rate": 9.389643923763573e-06, | |
| "loss": 6.8328, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1869741352446245, | |
| "grad_norm": 44.91098403930664, | |
| "learning_rate": 9.367487882627866e-06, | |
| "loss": 5.0284, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.18980707668772487, | |
| "grad_norm": 32.71237564086914, | |
| "learning_rate": 9.344963954351662e-06, | |
| "loss": 6.0377, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.19264001813082524, | |
| "grad_norm": 24.608020782470703, | |
| "learning_rate": 9.32207403619577e-06, | |
| "loss": 3.9539, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1954729595739256, | |
| "grad_norm": 45.37845230102539, | |
| "learning_rate": 9.298820056249459e-06, | |
| "loss": 7.6906, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.19830590101702597, | |
| "grad_norm": 26.40629768371582, | |
| "learning_rate": 9.275203973268064e-06, | |
| "loss": 5.7302, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.20113884246012634, | |
| "grad_norm": 25.433490753173828, | |
| "learning_rate": 9.251227776507989e-06, | |
| "loss": 5.6252, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20397178390322673, | |
| "grad_norm": 25.276575088500977, | |
| "learning_rate": 9.226893485559146e-06, | |
| "loss": 5.8884, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2068047253463271, | |
| "grad_norm": 45.13107681274414, | |
| "learning_rate": 9.202203150174836e-06, | |
| "loss": 9.215, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.20963766678942747, | |
| "grad_norm": 26.52821922302246, | |
| "learning_rate": 9.177158850099099e-06, | |
| "loss": 5.7232, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.21247060823252784, | |
| "grad_norm": 43.29339599609375, | |
| "learning_rate": 9.151762694891522e-06, | |
| "loss": 6.7846, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2153035496756282, | |
| "grad_norm": 29.308732986450195, | |
| "learning_rate": 9.12601682374955e-06, | |
| "loss": 5.8371, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.21813649111872857, | |
| "grad_norm": 25.373172760009766, | |
| "learning_rate": 9.099923405328293e-06, | |
| "loss": 3.9846, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.22096943256182894, | |
| "grad_norm": 33.12062454223633, | |
| "learning_rate": 9.073484637557852e-06, | |
| "loss": 4.8174, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2238023740049293, | |
| "grad_norm": 24.834850311279297, | |
| "learning_rate": 9.046702747458186e-06, | |
| "loss": 5.8073, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2266353154480297, | |
| "grad_norm": 23.760942459106445, | |
| "learning_rate": 9.019579990951514e-06, | |
| "loss": 5.6668, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.22946825689113007, | |
| "grad_norm": 25.230995178222656, | |
| "learning_rate": 8.992118652672302e-06, | |
| "loss": 5.6386, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.23230119833423044, | |
| "grad_norm": 27.033655166625977, | |
| "learning_rate": 8.964321045774808e-06, | |
| "loss": 5.1316, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2351341397773308, | |
| "grad_norm": 36.77193832397461, | |
| "learning_rate": 8.936189511738254e-06, | |
| "loss": 5.0568, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.23796708122043117, | |
| "grad_norm": 21.841785430908203, | |
| "learning_rate": 8.907726420169583e-06, | |
| "loss": 5.5521, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.24080002266353154, | |
| "grad_norm": 46.69823455810547, | |
| "learning_rate": 8.878934168603865e-06, | |
| "loss": 6.7058, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2436329641066319, | |
| "grad_norm": 23.228717803955078, | |
| "learning_rate": 8.849815182302345e-06, | |
| "loss": 7.8944, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2464659055497323, | |
| "grad_norm": 33.86655807495117, | |
| "learning_rate": 8.820371914048153e-06, | |
| "loss": 4.8468, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.24929884699283267, | |
| "grad_norm": 25.241182327270508, | |
| "learning_rate": 8.790606843939705e-06, | |
| "loss": 3.709, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.25213178843593304, | |
| "grad_norm": 25.6811580657959, | |
| "learning_rate": 8.760522479181784e-06, | |
| "loss": 4.844, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2549647298790334, | |
| "grad_norm": 43.929115295410156, | |
| "learning_rate": 8.730121353874365e-06, | |
| "loss": 6.7687, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25779767132213377, | |
| "grad_norm": 25.351106643676758, | |
| "learning_rate": 8.69940602879915e-06, | |
| "loss": 3.7733, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.26063061276523414, | |
| "grad_norm": 45.543373107910156, | |
| "learning_rate": 8.66837909120387e-06, | |
| "loss": 6.5226, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2634635542083345, | |
| "grad_norm": 35.3692626953125, | |
| "learning_rate": 8.637043154584351e-06, | |
| "loss": 7.782, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2662964956514349, | |
| "grad_norm": 23.175140380859375, | |
| "learning_rate": 8.60540085846437e-06, | |
| "loss": 3.7581, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.26912943709453524, | |
| "grad_norm": 22.461284637451172, | |
| "learning_rate": 8.573454868173325e-06, | |
| "loss": 3.8114, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2719623785376356, | |
| "grad_norm": 30.49061393737793, | |
| "learning_rate": 8.541207874621718e-06, | |
| "loss": 5.6752, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.274795319980736, | |
| "grad_norm": 43.03390121459961, | |
| "learning_rate": 8.508662594074496e-06, | |
| "loss": 5.8459, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.27762826142383634, | |
| "grad_norm": 144.74916076660156, | |
| "learning_rate": 8.475821767922254e-06, | |
| "loss": 6.1604, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.28046120286693677, | |
| "grad_norm": 26.303081512451172, | |
| "learning_rate": 8.442688162450315e-06, | |
| "loss": 4.7632, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.28329414431003713, | |
| "grad_norm": 28.066007614135742, | |
| "learning_rate": 8.409264568605714e-06, | |
| "loss": 8.0062, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2861270857531375, | |
| "grad_norm": 23.32230567932129, | |
| "learning_rate": 8.375553801762119e-06, | |
| "loss": 3.9505, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.28896002719623787, | |
| "grad_norm": 53.46368408203125, | |
| "learning_rate": 8.34155870148267e-06, | |
| "loss": 4.7727, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.29179296863933823, | |
| "grad_norm": 28.557096481323242, | |
| "learning_rate": 8.307282131280805e-06, | |
| "loss": 3.876, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2946259100824386, | |
| "grad_norm": 26.72674560546875, | |
| "learning_rate": 8.272726978379049e-06, | |
| "loss": 3.6362, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.29745885152553897, | |
| "grad_norm": 33.64091110229492, | |
| "learning_rate": 8.23789615346582e-06, | |
| "loss": 4.8435, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.30029179296863934, | |
| "grad_norm": 25.513519287109375, | |
| "learning_rate": 8.202792590450246e-06, | |
| "loss": 4.8615, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3031247344117397, | |
| "grad_norm": 26.183082580566406, | |
| "learning_rate": 8.167419246215042e-06, | |
| "loss": 3.7897, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.30595767585484007, | |
| "grad_norm": 37.91279983520508, | |
| "learning_rate": 8.131779100367438e-06, | |
| "loss": 3.8092, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.30879061729794044, | |
| "grad_norm": 20.643037796020508, | |
| "learning_rate": 8.09587515498819e-06, | |
| "loss": 5.8217, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3116235587410408, | |
| "grad_norm": 24.009424209594727, | |
| "learning_rate": 8.059710434378717e-06, | |
| "loss": 4.6594, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3144565001841412, | |
| "grad_norm": 26.472389221191406, | |
| "learning_rate": 8.02328798480635e-06, | |
| "loss": 6.5989, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.31728944162724154, | |
| "grad_norm": 42.69245529174805, | |
| "learning_rate": 7.986610874247736e-06, | |
| "loss": 6.7969, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3201223830703419, | |
| "grad_norm": 20.50579833984375, | |
| "learning_rate": 7.949682192130407e-06, | |
| "loss": 7.532, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.32295532451344233, | |
| "grad_norm": 33.428890228271484, | |
| "learning_rate": 7.912505049072559e-06, | |
| "loss": 5.5098, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3257882659565427, | |
| "grad_norm": 33.04521560668945, | |
| "learning_rate": 7.875082576621024e-06, | |
| "loss": 5.7852, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.32862120739964307, | |
| "grad_norm": 29.72992706298828, | |
| "learning_rate": 7.837417926987496e-06, | |
| "loss": 3.8586, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.33145414884274343, | |
| "grad_norm": 22.467132568359375, | |
| "learning_rate": 7.799514272783014e-06, | |
| "loss": 5.6287, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3342870902858438, | |
| "grad_norm": 25.866819381713867, | |
| "learning_rate": 7.761374806750712e-06, | |
| "loss": 3.7462, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.33712003172894417, | |
| "grad_norm": 21.801698684692383, | |
| "learning_rate": 7.723002741496892e-06, | |
| "loss": 5.6068, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.33995297317204454, | |
| "grad_norm": 41.93526840209961, | |
| "learning_rate": 7.684401309220416e-06, | |
| "loss": 5.8573, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3427859146151449, | |
| "grad_norm": 23.245235443115234, | |
| "learning_rate": 7.645573761440444e-06, | |
| "loss": 4.5851, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.34561885605824527, | |
| "grad_norm": 24.502330780029297, | |
| "learning_rate": 7.606523368722554e-06, | |
| "loss": 6.4644, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.34845179750134564, | |
| "grad_norm": 16.042354583740234, | |
| "learning_rate": 7.567253420403249e-06, | |
| "loss": 5.6877, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.351284738944446, | |
| "grad_norm": 26.405628204345703, | |
| "learning_rate": 7.527767224312883e-06, | |
| "loss": 4.764, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3541176803875464, | |
| "grad_norm": 40.40938186645508, | |
| "learning_rate": 7.488068106497035e-06, | |
| "loss": 5.8002, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.35695062183064674, | |
| "grad_norm": 25.338321685791016, | |
| "learning_rate": 7.448159410936348e-06, | |
| "loss": 5.5113, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3597835632737471, | |
| "grad_norm": 39.411128997802734, | |
| "learning_rate": 7.4080444992648534e-06, | |
| "loss": 5.5444, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3626165047168475, | |
| "grad_norm": 22.218137741088867, | |
| "learning_rate": 7.3677267504868055e-06, | |
| "loss": 4.4882, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3654494461599479, | |
| "grad_norm": 43.15862274169922, | |
| "learning_rate": 7.327209560692063e-06, | |
| "loss": 6.6107, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.36828238760304827, | |
| "grad_norm": 20.51604652404785, | |
| "learning_rate": 7.2864963427700284e-06, | |
| "loss": 5.6351, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.37111532904614863, | |
| "grad_norm": 21.392065048217773, | |
| "learning_rate": 7.2455905261221585e-06, | |
| "loss": 5.7755, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.373948270489249, | |
| "grad_norm": 28.160072326660156, | |
| "learning_rate": 7.204495556373106e-06, | |
| "loss": 6.5779, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.37678121193234937, | |
| "grad_norm": 41.15205764770508, | |
| "learning_rate": 7.163214895080479e-06, | |
| "loss": 6.4435, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.37961415337544974, | |
| "grad_norm": 26.100757598876953, | |
| "learning_rate": 7.121752019443266e-06, | |
| "loss": 6.5864, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3824470948185501, | |
| "grad_norm": 41.462791442871094, | |
| "learning_rate": 7.080110422008937e-06, | |
| "loss": 5.6488, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.38528003626165047, | |
| "grad_norm": 22.369388580322266, | |
| "learning_rate": 7.038293610379255e-06, | |
| "loss": 4.4922, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.38811297770475084, | |
| "grad_norm": 19.927444458007812, | |
| "learning_rate": 6.996305106914824e-06, | |
| "loss": 4.5791, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.3909459191478512, | |
| "grad_norm": 25.871030807495117, | |
| "learning_rate": 6.954148448438389e-06, | |
| "loss": 4.5578, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3937788605909516, | |
| "grad_norm": 18.420751571655273, | |
| "learning_rate": 6.911827185936914e-06, | |
| "loss": 4.6252, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.39661180203405194, | |
| "grad_norm": 27.263010025024414, | |
| "learning_rate": 6.869344884262473e-06, | |
| "loss": 5.5235, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3994447434771523, | |
| "grad_norm": 24.479764938354492, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 5.6514, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4022776849202527, | |
| "grad_norm": 23.21695899963379, | |
| "learning_rate": 6.7839114903257404e-06, | |
| "loss": 7.5326, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.40511062636335304, | |
| "grad_norm": 21.287368774414062, | |
| "learning_rate": 6.74096759438496e-06, | |
| "loss": 3.895, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.40794356780645347, | |
| "grad_norm": 25.839454650878906, | |
| "learning_rate": 6.697877051308067e-06, | |
| "loss": 6.3928, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.41077650924955383, | |
| "grad_norm": 22.896682739257812, | |
| "learning_rate": 6.654643490746042e-06, | |
| "loss": 4.5232, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4136094506926542, | |
| "grad_norm": 25.252422332763672, | |
| "learning_rate": 6.611270554396676e-06, | |
| "loss": 6.5998, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.41644239213575457, | |
| "grad_norm": 24.610836029052734, | |
| "learning_rate": 6.567761895697816e-06, | |
| "loss": 4.6121, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.41927533357885494, | |
| "grad_norm": 19.092580795288086, | |
| "learning_rate": 6.524121179519625e-06, | |
| "loss": 3.6029, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4221082750219553, | |
| "grad_norm": 22.915136337280273, | |
| "learning_rate": 6.480352081855884e-06, | |
| "loss": 3.6352, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.42494121646505567, | |
| "grad_norm": 29.044233322143555, | |
| "learning_rate": 6.436458289514342e-06, | |
| "loss": 4.6979, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.42777415790815604, | |
| "grad_norm": 39.80937194824219, | |
| "learning_rate": 6.392443499806175e-06, | |
| "loss": 4.6673, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4306070993512564, | |
| "grad_norm": 22.760765075683594, | |
| "learning_rate": 6.348311420234542e-06, | |
| "loss": 4.6801, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4334400407943568, | |
| "grad_norm": 21.216337203979492, | |
| "learning_rate": 6.304065768182295e-06, | |
| "loss": 5.7451, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.43627298223745714, | |
| "grad_norm": 20.622943878173828, | |
| "learning_rate": 6.259710270598848e-06, | |
| "loss": 5.6216, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.4391059236805575, | |
| "grad_norm": 40.299949645996094, | |
| "learning_rate": 6.215248663686251e-06, | |
| "loss": 6.5508, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4419388651236579, | |
| "grad_norm": 28.81671905517578, | |
| "learning_rate": 6.170684692584469e-06, | |
| "loss": 3.5039, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.44477180656675824, | |
| "grad_norm": 43.010169982910156, | |
| "learning_rate": 6.126022111055929e-06, | |
| "loss": 6.4925, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4476047480098586, | |
| "grad_norm": 23.351240158081055, | |
| "learning_rate": 6.081264681169317e-06, | |
| "loss": 3.4456, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.45043768945295903, | |
| "grad_norm": 40.19292449951172, | |
| "learning_rate": 6.0364161729826905e-06, | |
| "loss": 4.4953, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4532706308960594, | |
| "grad_norm": 25.595369338989258, | |
| "learning_rate": 5.991480364225924e-06, | |
| "loss": 6.2619, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.45610357233915977, | |
| "grad_norm": 32.5233268737793, | |
| "learning_rate": 5.946461039982485e-06, | |
| "loss": 5.5702, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.45893651378226014, | |
| "grad_norm": 25.565658569335938, | |
| "learning_rate": 5.901361992370614e-06, | |
| "loss": 3.5389, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4617694552253605, | |
| "grad_norm": 21.443763732910156, | |
| "learning_rate": 5.856187020223901e-06, | |
| "loss": 4.6532, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.46460239666846087, | |
| "grad_norm": 26.775903701782227, | |
| "learning_rate": 5.8109399287712935e-06, | |
| "loss": 5.7745, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.46743533811156124, | |
| "grad_norm": 20.02845001220703, | |
| "learning_rate": 5.765624529316573e-06, | |
| "loss": 5.506, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4702682795546616, | |
| "grad_norm": 22.177770614624023, | |
| "learning_rate": 5.7202446389173225e-06, | |
| "loss": 3.5255, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.473101220997762, | |
| "grad_norm": 27.885957717895508, | |
| "learning_rate": 5.674804080063392e-06, | |
| "loss": 3.5088, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.47593416244086234, | |
| "grad_norm": 33.34544372558594, | |
| "learning_rate": 5.62930668035493e-06, | |
| "loss": 4.4746, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4787671038839627, | |
| "grad_norm": 24.865848541259766, | |
| "learning_rate": 5.5837562721799644e-06, | |
| "loss": 6.4182, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4816000453270631, | |
| "grad_norm": 20.06027603149414, | |
| "learning_rate": 5.538156692391592e-06, | |
| "loss": 3.499, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.48443298677016344, | |
| "grad_norm": 28.240829467773438, | |
| "learning_rate": 5.4925117819847925e-06, | |
| "loss": 5.4651, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4872659282132638, | |
| "grad_norm": 39.07200241088867, | |
| "learning_rate": 5.44682538577288e-06, | |
| "loss": 4.7134, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.4900988696563642, | |
| "grad_norm": 31.383825302124023, | |
| "learning_rate": 5.4011013520636466e-06, | |
| "loss": 4.4705, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4929318110994646, | |
| "grad_norm": 40.832984924316406, | |
| "learning_rate": 5.355343532335215e-06, | |
| "loss": 7.2469, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.49576475254256497, | |
| "grad_norm": 20.33405303955078, | |
| "learning_rate": 5.309555780911604e-06, | |
| "loss": 5.4482, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.49859769398566534, | |
| "grad_norm": 22.8585262298584, | |
| "learning_rate": 5.263741954638072e-06, | |
| "loss": 4.4573, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5014306354287656, | |
| "grad_norm": 42.46244430541992, | |
| "learning_rate": 5.217905912556248e-06, | |
| "loss": 5.5277, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5042635768718661, | |
| "grad_norm": 21.30562973022461, | |
| "learning_rate": 5.172051515579065e-06, | |
| "loss": 5.4764, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5070965183149664, | |
| "grad_norm": 18.9359130859375, | |
| "learning_rate": 5.126182626165547e-06, | |
| "loss": 6.4232, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5099294597580668, | |
| "grad_norm": 33.8026123046875, | |
| "learning_rate": 5.080303107995461e-06, | |
| "loss": 6.6042, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5127624012011671, | |
| "grad_norm": 40.52323913574219, | |
| "learning_rate": 5.034416825643868e-06, | |
| "loss": 5.5848, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5155953426442675, | |
| "grad_norm": 27.342744827270508, | |
| "learning_rate": 4.988527644255591e-06, | |
| "loss": 5.2504, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.518428284087368, | |
| "grad_norm": 19.118297576904297, | |
| "learning_rate": 4.942639429219661e-06, | |
| "loss": 4.5668, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5212612255304683, | |
| "grad_norm": 41.146236419677734, | |
| "learning_rate": 4.896756045843698e-06, | |
| "loss": 6.0831, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5240941669735687, | |
| "grad_norm": 19.685937881469727, | |
| "learning_rate": 4.85088135902834e-06, | |
| "loss": 5.5025, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.526927108416669, | |
| "grad_norm": 22.97096061706543, | |
| "learning_rate": 4.805019232941689e-06, | |
| "loss": 4.4157, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5297600498597694, | |
| "grad_norm": 27.627784729003906, | |
| "learning_rate": 4.7591735306938144e-06, | |
| "loss": 4.3861, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5325929913028697, | |
| "grad_norm": 25.308032989501953, | |
| "learning_rate": 4.713348114011357e-06, | |
| "loss": 7.2963, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5354259327459702, | |
| "grad_norm": 19.11351203918457, | |
| "learning_rate": 4.667546842912239e-06, | |
| "loss": 4.2907, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5382588741890705, | |
| "grad_norm": 28.81739044189453, | |
| "learning_rate": 4.6217735753805235e-06, | |
| "loss": 4.5385, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5410918156321709, | |
| "grad_norm": 20.510547637939453, | |
| "learning_rate": 4.576032167041452e-06, | |
| "loss": 7.2043, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5439247570752712, | |
| "grad_norm": 26.19765281677246, | |
| "learning_rate": 4.530326470836659e-06, | |
| "loss": 4.3494, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5467576985183716, | |
| "grad_norm": 25.779802322387695, | |
| "learning_rate": 4.484660336699638e-06, | |
| "loss": 5.3226, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.549590639961472, | |
| "grad_norm": 26.97022247314453, | |
| "learning_rate": 4.439037611231448e-06, | |
| "loss": 6.5069, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5524235814045724, | |
| "grad_norm": 26.32407569885254, | |
| "learning_rate": 4.393462137376696e-06, | |
| "loss": 3.545, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5552565228476727, | |
| "grad_norm": 30.962535858154297, | |
| "learning_rate": 4.347937754099841e-06, | |
| "loss": 4.4292, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5580894642907731, | |
| "grad_norm": 38.1851921081543, | |
| "learning_rate": 4.302468296061823e-06, | |
| "loss": 4.3079, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5609224057338735, | |
| "grad_norm": 21.038278579711914, | |
| "learning_rate": 4.257057593297055e-06, | |
| "loss": 4.5294, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5637553471769738, | |
| "grad_norm": 20.618942260742188, | |
| "learning_rate": 4.211709470890815e-06, | |
| "loss": 7.2449, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5665882886200743, | |
| "grad_norm": 21.230995178222656, | |
| "learning_rate": 4.166427748657034e-06, | |
| "loss": 4.3681, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5694212300631746, | |
| "grad_norm": 20.577428817749023, | |
| "learning_rate": 4.121216240816559e-06, | |
| "loss": 5.3925, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.572254171506275, | |
| "grad_norm": 21.1496524810791, | |
| "learning_rate": 4.076078755675852e-06, | |
| "loss": 5.0495, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5750871129493753, | |
| "grad_norm": 26.215744018554688, | |
| "learning_rate": 4.0310190953062155e-06, | |
| "loss": 5.5832, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5779200543924757, | |
| "grad_norm": 33.668174743652344, | |
| "learning_rate": 3.986041055223526e-06, | |
| "loss": 5.1639, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.580752995835576, | |
| "grad_norm": 28.786453247070312, | |
| "learning_rate": 3.9411484240685315e-06, | |
| "loss": 3.3797, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5835859372786765, | |
| "grad_norm": 24.81963348388672, | |
| "learning_rate": 3.8963449832877164e-06, | |
| "loss": 6.3189, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5864188787217768, | |
| "grad_norm": 25.143753051757812, | |
| "learning_rate": 3.851634506814782e-06, | |
| "loss": 6.463, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5892518201648772, | |
| "grad_norm": 39.29959487915039, | |
| "learning_rate": 3.8070207607527587e-06, | |
| "loss": 7.5255, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5920847616079775, | |
| "grad_norm": 19.030284881591797, | |
| "learning_rate": 3.7625075030567683e-06, | |
| "loss": 4.2513, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5949177030510779, | |
| "grad_norm": 24.105989456176758, | |
| "learning_rate": 3.718098483217484e-06, | |
| "loss": 3.3586, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5977506444941783, | |
| "grad_norm": 38.95778274536133, | |
| "learning_rate": 3.673797441945304e-06, | |
| "loss": 4.2773, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6005835859372787, | |
| "grad_norm": 42.26526641845703, | |
| "learning_rate": 3.629608110855248e-06, | |
| "loss": 5.2586, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6034165273803791, | |
| "grad_norm": 21.60348892211914, | |
| "learning_rate": 3.585534212152643e-06, | |
| "loss": 4.4408, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6062494688234794, | |
| "grad_norm": 39.41062545776367, | |
| "learning_rate": 3.5415794583195846e-06, | |
| "loss": 4.5132, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6090824102665798, | |
| "grad_norm": 32.25893783569336, | |
| "learning_rate": 3.497747551802221e-06, | |
| "loss": 8.4284, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6119153517096801, | |
| "grad_norm": 34.556373596191406, | |
| "learning_rate": 3.4540421846988916e-06, | |
| "loss": 6.3801, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6147482931527806, | |
| "grad_norm": 27.99374771118164, | |
| "learning_rate": 3.4104670384491234e-06, | |
| "loss": 5.5573, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6175812345958809, | |
| "grad_norm": 23.997901916503906, | |
| "learning_rate": 3.367025783523534e-06, | |
| "loss": 4.2779, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6204141760389813, | |
| "grad_norm": 41.76970291137695, | |
| "learning_rate": 3.3237220791146597e-06, | |
| "loss": 5.241, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6232471174820816, | |
| "grad_norm": 27.922670364379883, | |
| "learning_rate": 3.2805595728287255e-06, | |
| "loss": 4.2649, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.626080058925182, | |
| "grad_norm": 33.54890060424805, | |
| "learning_rate": 3.2375419003783957e-06, | |
| "loss": 6.0635, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6289130003682823, | |
| "grad_norm": 21.987178802490234, | |
| "learning_rate": 3.1946726852765325e-06, | |
| "loss": 5.1542, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6317459418113828, | |
| "grad_norm": 35.2348518371582, | |
| "learning_rate": 3.1519555385309685e-06, | |
| "loss": 4.2332, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6345788832544831, | |
| "grad_norm": 39.060691833496094, | |
| "learning_rate": 3.1093940583403447e-06, | |
| "loss": 8.0693, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6374118246975835, | |
| "grad_norm": 20.76451873779297, | |
| "learning_rate": 3.066991829791024e-06, | |
| "loss": 5.3108, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6402447661406838, | |
| "grad_norm": 40.92884826660156, | |
| "learning_rate": 3.024752424555105e-06, | |
| "loss": 4.2548, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6430777075837842, | |
| "grad_norm": 24.043121337890625, | |
| "learning_rate": 2.982679400589569e-06, | |
| "loss": 5.3648, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6459106490268847, | |
| "grad_norm": 22.929412841796875, | |
| "learning_rate": 2.9407763018365854e-06, | |
| "loss": 4.2817, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.648743590469985, | |
| "grad_norm": 36.0571174621582, | |
| "learning_rate": 2.899046657924992e-06, | |
| "loss": 7.9167, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6515765319130854, | |
| "grad_norm": 23.849647521972656, | |
| "learning_rate": 2.8574939838729844e-06, | |
| "loss": 4.44, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6544094733561857, | |
| "grad_norm": 42.65750503540039, | |
| "learning_rate": 2.8161217797920304e-06, | |
| "loss": 5.6655, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6572424147992861, | |
| "grad_norm": 23.45660400390625, | |
| "learning_rate": 2.774933530592054e-06, | |
| "loss": 5.4841, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6600753562423864, | |
| "grad_norm": 21.22451400756836, | |
| "learning_rate": 2.733932705687883e-06, | |
| "loss": 3.3468, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6629082976854869, | |
| "grad_norm": 37.178993225097656, | |
| "learning_rate": 2.693122758707013e-06, | |
| "loss": 5.1606, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6657412391285872, | |
| "grad_norm": 24.34912109375, | |
| "learning_rate": 2.652507127198689e-06, | |
| "loss": 7.2961, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6685741805716876, | |
| "grad_norm": 40.61592483520508, | |
| "learning_rate": 2.612089232344371e-06, | |
| "loss": 6.3695, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6714071220147879, | |
| "grad_norm": 20.37811279296875, | |
| "learning_rate": 2.571872478669528e-06, | |
| "loss": 3.3039, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6742400634578883, | |
| "grad_norm": 25.745912551879883, | |
| "learning_rate": 2.5318602537568904e-06, | |
| "loss": 4.2973, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6770730049009887, | |
| "grad_norm": 22.395126342773438, | |
| "learning_rate": 2.4920559279610886e-06, | |
| "loss": 4.1162, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6799059463440891, | |
| "grad_norm": 34.32621383666992, | |
| "learning_rate": 2.452462854124758e-06, | |
| "loss": 4.1658, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6827388877871894, | |
| "grad_norm": 39.03499984741211, | |
| "learning_rate": 2.413084367296127e-06, | |
| "loss": 6.3083, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6855718292302898, | |
| "grad_norm": 47.788394927978516, | |
| "learning_rate": 2.373923784448089e-06, | |
| "loss": 4.2861, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6884047706733902, | |
| "grad_norm": 26.90192413330078, | |
| "learning_rate": 2.3349844041988044e-06, | |
| "loss": 4.3008, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6912377121164905, | |
| "grad_norm": 22.178869247436523, | |
| "learning_rate": 2.296269506533846e-06, | |
| "loss": 5.2767, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.694070653559591, | |
| "grad_norm": 21.529335021972656, | |
| "learning_rate": 2.2577823525299205e-06, | |
| "loss": 7.1097, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6969035950026913, | |
| "grad_norm": 20.215675354003906, | |
| "learning_rate": 2.2195261840801757e-06, | |
| "loss": 7.1815, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6997365364457917, | |
| "grad_norm": 21.300861358642578, | |
| "learning_rate": 2.18150422362112e-06, | |
| "loss": 6.9142, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.702569477888892, | |
| "grad_norm": 30.098453521728516, | |
| "learning_rate": 2.1437196738611958e-06, | |
| "loss": 4.4774, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7054024193319924, | |
| "grad_norm": 25.317970275878906, | |
| "learning_rate": 2.1061757175110024e-06, | |
| "loss": 4.4772, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7082353607750927, | |
| "grad_norm": 30.881681442260742, | |
| "learning_rate": 2.0688755170152e-06, | |
| "loss": 4.2296, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7110683022181932, | |
| "grad_norm": 23.95901107788086, | |
| "learning_rate": 2.031822214286134e-06, | |
| "loss": 5.0405, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7139012436612935, | |
| "grad_norm": 41.624210357666016, | |
| "learning_rate": 1.9950189304391855e-06, | |
| "loss": 6.3358, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7167341851043939, | |
| "grad_norm": 34.76797866821289, | |
| "learning_rate": 1.958468765529853e-06, | |
| "loss": 5.061, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7195671265474942, | |
| "grad_norm": 20.406444549560547, | |
| "learning_rate": 1.9221747982926493e-06, | |
| "loss": 5.1701, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7224000679905946, | |
| "grad_norm": 24.22311782836914, | |
| "learning_rate": 1.8861400858817508e-06, | |
| "loss": 4.2621, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.725233009433695, | |
| "grad_norm": 37.65345001220703, | |
| "learning_rate": 1.8503676636134882e-06, | |
| "loss": 6.1661, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7280659508767954, | |
| "grad_norm": 20.813777923583984, | |
| "learning_rate": 1.81486054471068e-06, | |
| "loss": 5.3045, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7308988923198958, | |
| "grad_norm": 39.82976150512695, | |
| "learning_rate": 1.7796217200488114e-06, | |
| "loss": 6.4348, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7337318337629961, | |
| "grad_norm": 25.495925903320312, | |
| "learning_rate": 1.7446541579041048e-06, | |
| "loss": 4.2349, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7365647752060965, | |
| "grad_norm": 38.05914306640625, | |
| "learning_rate": 1.7099608037034953e-06, | |
| "loss": 5.2485, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7393977166491968, | |
| "grad_norm": 22.876413345336914, | |
| "learning_rate": 1.6755445797765286e-06, | |
| "loss": 4.263, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7422306580922973, | |
| "grad_norm": 22.402753829956055, | |
| "learning_rate": 1.6414083851091973e-06, | |
| "loss": 4.3153, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7450635995353976, | |
| "grad_norm": 20.86781883239746, | |
| "learning_rate": 1.6075550950997592e-06, | |
| "loss": 4.4095, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.747896540978498, | |
| "grad_norm": 39.51744842529297, | |
| "learning_rate": 1.5739875613165283e-06, | |
| "loss": 6.2356, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7507294824215983, | |
| "grad_norm": 26.651187896728516, | |
| "learning_rate": 1.5407086112576813e-06, | |
| "loss": 4.1033, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7535624238646987, | |
| "grad_norm": 46.947757720947266, | |
| "learning_rate": 1.5077210481130815e-06, | |
| "loss": 8.1815, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.756395365307799, | |
| "grad_norm": 41.29295349121094, | |
| "learning_rate": 1.475027650528168e-06, | |
| "loss": 6.1637, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7592283067508995, | |
| "grad_norm": 39.40729522705078, | |
| "learning_rate": 1.442631172369896e-06, | |
| "loss": 7.1273, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7620612481939998, | |
| "grad_norm": 39.58256912231445, | |
| "learning_rate": 1.4105343424947654e-06, | |
| "loss": 5.187, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7648941896371002, | |
| "grad_norm": 39.836185455322266, | |
| "learning_rate": 1.378739864518971e-06, | |
| "loss": 3.8889, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7677271310802005, | |
| "grad_norm": 40.20053482055664, | |
| "learning_rate": 1.3472504165906614e-06, | |
| "loss": 5.3001, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7705600725233009, | |
| "grad_norm": 23.571002960205078, | |
| "learning_rate": 1.3160686511643505e-06, | |
| "loss": 4.0238, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7733930139664014, | |
| "grad_norm": 23.623443603515625, | |
| "learning_rate": 1.2851971947774987e-06, | |
| "loss": 5.1091, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7762259554095017, | |
| "grad_norm": 31.367658615112305, | |
| "learning_rate": 1.2546386478292604e-06, | |
| "loss": 4.1048, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7790588968526021, | |
| "grad_norm": 39.296226501464844, | |
| "learning_rate": 1.2243955843614558e-06, | |
| "loss": 4.271, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7818918382957024, | |
| "grad_norm": 28.69118881225586, | |
| "learning_rate": 1.1944705518417466e-06, | |
| "loss": 4.0739, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7847247797388028, | |
| "grad_norm": 32.27414321899414, | |
| "learning_rate": 1.1648660709490538e-06, | |
| "loss": 5.1998, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7875577211819031, | |
| "grad_norm": 24.473217010498047, | |
| "learning_rate": 1.135584635361232e-06, | |
| "loss": 4.9601, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7903906626250036, | |
| "grad_norm": 27.856367111206055, | |
| "learning_rate": 1.1066287115450242e-06, | |
| "loss": 4.9381, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.7932236040681039, | |
| "grad_norm": 17.640838623046875, | |
| "learning_rate": 1.0780007385483005e-06, | |
| "loss": 4.2145, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7960565455112043, | |
| "grad_norm": 34.375091552734375, | |
| "learning_rate": 1.0497031277946062e-06, | |
| "loss": 8.3028, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7988894869543046, | |
| "grad_norm": 23.346403121948242, | |
| "learning_rate": 1.0217382628800465e-06, | |
| "loss": 6.9337, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.801722428397405, | |
| "grad_norm": 25.259016036987305, | |
| "learning_rate": 9.94108499372507e-07, | |
| "loss": 3.1855, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8045553698405054, | |
| "grad_norm": 33.022727966308594, | |
| "learning_rate": 9.668161646132296e-07, | |
| "loss": 5.2408, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8073883112836058, | |
| "grad_norm": 30.2951717376709, | |
| "learning_rate": 9.398635575207854e-07, | |
| "loss": 3.1828, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8102212527267061, | |
| "grad_norm": 51.273616790771484, | |
| "learning_rate": 9.132529483974217e-07, | |
| "loss": 5.0485, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8130541941698065, | |
| "grad_norm": 39.878597259521484, | |
| "learning_rate": 8.869865787378262e-07, | |
| "loss": 6.3068, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8158871356129069, | |
| "grad_norm": 21.064966201782227, | |
| "learning_rate": 8.61066661040324e-07, | |
| "loss": 3.3587, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8187200770560072, | |
| "grad_norm": 22.18380355834961, | |
| "learning_rate": 8.354953786205133e-07, | |
| "loss": 4.242, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8215530184991077, | |
| "grad_norm": 41.24013137817383, | |
| "learning_rate": 8.102748854273468e-07, | |
| "loss": 4.1017, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.824385959942208, | |
| "grad_norm": 23.30076789855957, | |
| "learning_rate": 7.854073058617112e-07, | |
| "loss": 5.3308, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8272189013853084, | |
| "grad_norm": 21.42025375366211, | |
| "learning_rate": 7.60894734597476e-07, | |
| "loss": 8.113, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8300518428284087, | |
| "grad_norm": 18.741073608398438, | |
| "learning_rate": 7.367392364050485e-07, | |
| "loss": 5.1848, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.8328847842715091, | |
| "grad_norm": 23.857194900512695, | |
| "learning_rate": 7.129428459774618e-07, | |
| "loss": 7.1581, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8357177257146094, | |
| "grad_norm": 28.530094146728516, | |
| "learning_rate": 6.895075677589791e-07, | |
| "loss": 6.2661, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8385506671577099, | |
| "grad_norm": 40.354949951171875, | |
| "learning_rate": 6.664353757762515e-07, | |
| "loss": 4.2647, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8413836086008102, | |
| "grad_norm": 21.305288314819336, | |
| "learning_rate": 6.437282134720479e-07, | |
| "loss": 4.9122, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8442165500439106, | |
| "grad_norm": 40.32603454589844, | |
| "learning_rate": 6.21387993541544e-07, | |
| "loss": 6.2095, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8470494914870109, | |
| "grad_norm": 39.293067932128906, | |
| "learning_rate": 5.994165977712175e-07, | |
| "loss": 4.1365, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8498824329301113, | |
| "grad_norm": 25.006118774414062, | |
| "learning_rate": 5.778158768803294e-07, | |
| "loss": 3.4504, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8527153743732117, | |
| "grad_norm": 38.37477111816406, | |
| "learning_rate": 5.565876503650442e-07, | |
| "loss": 4.2214, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8555483158163121, | |
| "grad_norm": 40.921207427978516, | |
| "learning_rate": 5.357337063451601e-07, | |
| "loss": 5.1103, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8583812572594125, | |
| "grad_norm": 26.225017547607422, | |
| "learning_rate": 5.152558014134906e-07, | |
| "loss": 5.9913, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8612141987025128, | |
| "grad_norm": 22.678930282592773, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 4.3731, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8640471401456132, | |
| "grad_norm": 29.937393188476562, | |
| "learning_rate": 4.754349766660299e-07, | |
| "loss": 4.2301, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8668800815887135, | |
| "grad_norm": 26.465091705322266, | |
| "learning_rate": 4.5609541108263377e-07, | |
| "loss": 6.0091, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.869713023031814, | |
| "grad_norm": 25.58681297302246, | |
| "learning_rate": 4.3713859276971026e-07, | |
| "loss": 6.979, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8725459644749143, | |
| "grad_norm": 22.564706802368164, | |
| "learning_rate": 4.1856611851925245e-07, | |
| "loss": 5.0316, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8753789059180147, | |
| "grad_norm": 40.97758102416992, | |
| "learning_rate": 4.003795527487508e-07, | |
| "loss": 8.964, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.878211847361115, | |
| "grad_norm": 32.80113220214844, | |
| "learning_rate": 3.8258042736942446e-07, | |
| "loss": 3.1517, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8810447888042154, | |
| "grad_norm": 30.950176239013672, | |
| "learning_rate": 3.651702416571762e-07, | |
| "loss": 4.345, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8838777302473158, | |
| "grad_norm": 21.242090225219727, | |
| "learning_rate": 3.481504621263049e-07, | |
| "loss": 6.1642, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8867106716904162, | |
| "grad_norm": 21.0790958404541, | |
| "learning_rate": 3.315225224059809e-07, | |
| "loss": 5.1734, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.8895436131335165, | |
| "grad_norm": 41.8050537109375, | |
| "learning_rate": 3.1528782311948226e-07, | |
| "loss": 5.0608, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.8923765545766169, | |
| "grad_norm": 23.527942657470703, | |
| "learning_rate": 2.9944773176621756e-07, | |
| "loss": 5.9961, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8952094960197172, | |
| "grad_norm": 28.754201889038086, | |
| "learning_rate": 2.840035826065368e-07, | |
| "loss": 3.8781, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8980424374628176, | |
| "grad_norm": 26.580829620361328, | |
| "learning_rate": 2.689566765493451e-07, | |
| "loss": 4.1426, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9008753789059181, | |
| "grad_norm": 18.550945281982422, | |
| "learning_rate": 2.5430828104251684e-07, | |
| "loss": 4.9139, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9037083203490184, | |
| "grad_norm": 20.301895141601562, | |
| "learning_rate": 2.4005962996614174e-07, | |
| "loss": 3.1654, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9065412617921188, | |
| "grad_norm": 37.804969787597656, | |
| "learning_rate": 2.2621192352858702e-07, | |
| "loss": 5.0736, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9093742032352191, | |
| "grad_norm": 29.193897247314453, | |
| "learning_rate": 2.1276632816540077e-07, | |
| "loss": 5.2175, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9122071446783195, | |
| "grad_norm": 23.378320693969727, | |
| "learning_rate": 1.9972397644106023e-07, | |
| "loss": 5.2508, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9150400861214198, | |
| "grad_norm": 18.92923355102539, | |
| "learning_rate": 1.870859669535724e-07, | |
| "loss": 5.2554, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9178730275645203, | |
| "grad_norm": 18.03963279724121, | |
| "learning_rate": 1.7485336424193366e-07, | |
| "loss": 5.1253, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9207059690076206, | |
| "grad_norm": 38.961456298828125, | |
| "learning_rate": 1.6302719869646432e-07, | |
| "loss": 5.0659, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.923538910450721, | |
| "grad_norm": 20.624431610107422, | |
| "learning_rate": 1.5160846647201132e-07, | |
| "loss": 4.1776, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9263718518938213, | |
| "grad_norm": 21.755279541015625, | |
| "learning_rate": 1.4059812940404093e-07, | |
| "loss": 3.142, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9292047933369217, | |
| "grad_norm": 20.076051712036133, | |
| "learning_rate": 1.2999711492762079e-07, | |
| "loss": 5.2161, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.932037734780022, | |
| "grad_norm": 25.841142654418945, | |
| "learning_rate": 1.198063159992996e-07, | |
| "loss": 5.3184, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9348706762231225, | |
| "grad_norm": 35.23577117919922, | |
| "learning_rate": 1.1002659102188784e-07, | |
| "loss": 3.3098, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9377036176662228, | |
| "grad_norm": 20.789785385131836, | |
| "learning_rate": 1.006587637721551e-07, | |
| "loss": 3.1742, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9405365591093232, | |
| "grad_norm": 40.675296783447266, | |
| "learning_rate": 9.170362333143778e-08, | |
| "loss": 7.3385, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.9433695005524236, | |
| "grad_norm": 24.71589469909668, | |
| "learning_rate": 8.316192401917667e-08, | |
| "loss": 5.3478, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.946202441995524, | |
| "grad_norm": 38.48093032836914, | |
| "learning_rate": 7.503438532937169e-08, | |
| "loss": 6.069, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.9490353834386244, | |
| "grad_norm": 26.636127471923828, | |
| "learning_rate": 6.732169186998372e-08, | |
| "loss": 4.1179, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9518683248817247, | |
| "grad_norm": 42.95631790161133, | |
| "learning_rate": 6.002449330526294e-08, | |
| "loss": 6.9268, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9547012663248251, | |
| "grad_norm": 20.64594268798828, | |
| "learning_rate": 5.31434043010276e-08, | |
| "loss": 3.1192, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9575342077679254, | |
| "grad_norm": 44.144744873046875, | |
| "learning_rate": 4.667900447288931e-08, | |
| "loss": 6.0163, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9603671492110258, | |
| "grad_norm": 41.0361442565918, | |
| "learning_rate": 4.0631838337427675e-08, | |
| "loss": 5.265, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.9632000906541262, | |
| "grad_norm": 28.538305282592773, | |
| "learning_rate": 3.500241526632753e-08, | |
| "loss": 5.07, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9660330320972266, | |
| "grad_norm": 24.23358154296875, | |
| "learning_rate": 2.979120944346936e-08, | |
| "loss": 4.3623, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9688659735403269, | |
| "grad_norm": 26.74643898010254, | |
| "learning_rate": 2.499865982499128e-08, | |
| "loss": 3.3373, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9716989149834273, | |
| "grad_norm": 22.635358810424805, | |
| "learning_rate": 2.0625170102309687e-08, | |
| "loss": 4.2529, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.9745318564265276, | |
| "grad_norm": 25.663415908813477, | |
| "learning_rate": 1.6671108668119828e-08, | |
| "loss": 4.1368, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.977364797869628, | |
| "grad_norm": 28.40155029296875, | |
| "learning_rate": 1.3136808585361149e-08, | |
| "loss": 5.9535, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9801977393127284, | |
| "grad_norm": 21.457969665527344, | |
| "learning_rate": 1.0022567559164198e-08, | |
| "loss": 6.1661, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9830306807558288, | |
| "grad_norm": 26.77224349975586, | |
| "learning_rate": 7.328647911774567e-09, | |
| "loss": 4.1479, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9858636221989292, | |
| "grad_norm": 34.7308464050293, | |
| "learning_rate": 5.055276560454459e-09, | |
| "loss": 3.216, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9886965636420295, | |
| "grad_norm": 27.362573623657227, | |
| "learning_rate": 3.202644998370752e-09, | |
| "loss": 5.132, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9915295050851299, | |
| "grad_norm": 33.32588195800781, | |
| "learning_rate": 1.770909278464017e-09, | |
| "loss": 6.1225, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9943624465282302, | |
| "grad_norm": 19.929155349731445, | |
| "learning_rate": 7.601900003051388e-10, | |
| "loss": 3.1458, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.9971953879713307, | |
| "grad_norm": 26.834556579589844, | |
| "learning_rate": 1.7057229993344693e-10, | |
| "loss": 7.2351, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.9997450352701209, | |
| "step": 3529, | |
| "total_flos": 1.1540349697243742e+19, | |
| "train_loss": 5.616373471816494, | |
| "train_runtime": 84659.9545, | |
| "train_samples_per_second": 2.502, | |
| "train_steps_per_second": 0.042 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3529, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1540349697243742e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |